From 18ccd2ac8e105d67c7dc9c78baa60e9519c8b7ae Mon Sep 17 00:00:00 2001
From: Wouter Peters <wouter.peters@wur.nl>
Date: Tue, 28 Aug 2012 07:35:14 +0000
Subject: [PATCH] A new routine has been added that summarizes the X2
 statistics for all time steps, and for all sites. The result is an html table
 in the analysis/summary folder.

---
 da/analysis/summarize_obs.py | 155 ++++++++++++++++++++++++++++++++++-
 1 file changed, 152 insertions(+), 3 deletions(-)

diff --git a/da/analysis/summarize_obs.py b/da/analysis/summarize_obs.py
index 07bccf1f..0d62c247 100755
--- a/da/analysis/summarize_obs.py
+++ b/da/analysis/summarize_obs.py
@@ -192,6 +192,150 @@ def SummarizeObs(DaCycle,printfmt='html'):
 
     logging.info("File written with summary: %s" % saveas)
 
+def SummarizeStats(DaCycle):
+    """
+    Summarize the statistics of the observations for this cycle
+    This includes X2 statistics, RMSD, and others for both forecast and
+    final fluxes
+    """
+    import string
+
+    sumdir=os.path.join(DaCycle['dir.analysis'],'summary')
+    if not os.path.exists(sumdir):
+        logging.info( "Creating new directory "+sumdir )
+        os.makedirs(sumdir)
+
+    # get forecast data from optimizer.ddddd.nc
+
+    startdate                       = DaCycle['time.start'] 
+    DaCycle['time.sample.stamp']    = "%s"%(startdate.strftime("%Y%m%d"),)
+    infile                          = os.path.join(DaCycle['dir.output'],'optimizer.%s.nc'%DaCycle['time.sample.stamp'])
+
+    if not os.path.exists(infile):
+        logging.error("File not found: %s"%infile)
+        raise IOError
+
+    f = io.CT_CDF(infile,'read')
+    sites = f.GetVariable('sitecode')
+    y0  = f.GetVariable('observed')*1e6
+    hx  = f.GetVariable('modelsamplesmean_prior')*1e6
+    dF  = f.GetVariable('modelsamplesdeviations_prior')*1e6
+    HPHTR = f.GetVariable('totalmolefractionvariance').diagonal()*1e6*1e6
+    R     = f.GetVariable('modeldatamismatchvariance').diagonal()*1e6*1e6
+    flags = f.GetVariable('flag')
+    f.close()
+
+    HPHT  = dF.dot(np.transpose(dF)).diagonal()/(dF.shape[1]-1.0)
+    rejected = (flags == 2.0)
+
+    sitecodes = [string.join(s.compressed(),'').strip() for s in sites]
+
+
+    # calculate X2 per observation for this time step
+
+    x2=[]
+    for i,site in enumerate(sitecodes):
+        
+        x2.append((y0[i]-hx[i])**2/HPHTR[i] )
+
+    x2=np.ma.masked_where(HPHTR == 0.0,x2)
+
+    # calculate X2 per site
+    saveas=os.path.join(sumdir,'x2_table_%s.html'%DaCycle['time.sample.stamp'] )
+    logging.info("Writing HTML tables for this cycle (%s)"%saveas)
+    f=open(saveas,'w')
+    txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
+    f.write(txt)
+    txt="<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
+    f.write(txt)
+    tablehead = \
+          "<TR>\n <TH> Site code </TH> \
+               <TH> N<sub>obs</sub> </TH>  \
+               <TH> N<sub>rejected</sub> </TH>  \
+               <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
+               <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
+               <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
+               <TH> X2 </TH> \n \
+           </TR>\n"
+
+    fmt=  """<TR> \n \
+            <TD>%s</TD>\
+            <TD>%d</TD>\
+            <TD>%d</TD>\
+            <TD>%+5.2f</TD>\
+            <TD>%+5.2f</TD>\
+            <TD>%+5.2f&plusmn;%5.2f</TD>\
+            <TD>%5.2f</TD>\n \
+           </TR>\n"""
+
+    f.write(tablehead)
+
+    set_sites = set(sitecodes)
+    set_sites = np.sort(list(set_sites))
+
+    for i,site in enumerate(set_sites):
+        sel = [i for i,s in enumerate(sitecodes) if s == site]
+        ss=(site, len(sel),rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx-y0).take(sel).mean(),(hx-y0).take(sel).std(), x2.take(sel).mean(),)
+        #print site,sel,x2.take(sel)
+
+        f.write(fmt%ss)
+        if (i+1)%15 == 0:
+            f.write(tablehead)
+
+    txt="\n</table>"
+    f.write(txt)
+    f.close()
+
+    # Now summarize for each site across time steps
+
+    if not DaCycle['time.start'] >= dt.datetime(2008,12,29):
+        return
+
+    logging.info("Writing HTML tables for each site")
+    for site in set_sites:
+        saveas=os.path.join(sumdir,'%s_x2.html'%site)
+        f=open(saveas,'w')
+        logging.debug(saveas)
+        txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
+        f.write(txt)
+        txt="<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
+        f.write(txt)
+        tablehead = \
+              "<TR>\n <TH> From File </TH> \
+                   <TH> Site </TH>  \
+                   <TH> N<sub>obs</sub> </TH>  \
+                   <TH> N<sub>rejected</sub> </TH>  \
+                   <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
+                   <TH> X2 </TH> \n \
+               </TR>\n"
+        f.write(tablehead)
+
+        files = os.listdir(sumdir)
+        x2_files = [file for file in files if file.startswith('x2')]
+        for htmlfile in x2_files:
+            lines = grep(site,os.path.join(sumdir,htmlfile))
+            for line in lines:
+                f.write('<TR>\n')
+                f.write('<TD>'+htmlfile+'</TD>')
+                f.write(line+'\n')
+                f.write('</TR>\n')
+
+        txt="\n</table>"
+        f.write(txt)
+        f.close()
+
+import re
+def grep(pattern,file):
+    fileObj = open(file,'r')
+    r=[]
+    linenumber=0
+    for line in fileObj:
+        if re.search(pattern,line):
+            r.append(line)
+    return r
+
 # main body if called as script
 
 if __name__ == '__main__':    # started as script
@@ -204,16 +348,21 @@ if __name__ == '__main__':    # started as script
 
     logging.root.setLevel(logging.DEBUG)
 
-    DaCycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack.rc'})
+    DaCycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'})
     DaCycle.Initialize()
     DaCycle.ParseTimes()
 
-    DaSystem    = CtDaSystem('../rc/carbontracker_ct09_op.rc')
+    DaSystem    = CtDaSystem('../rc/carbontracker_ct09_opf.rc')
     DaSystem.Initialize()
 
     DaCycle.DaSystem    = DaSystem
 
-    q=SummarizeObs(DaCycle)
+    #q=SummarizeObs(DaCycle)
+
+    while DaCycle['time.start'] < DaCycle['time.finish']:
+
+        q=SummarizeStats(DaCycle)
+        DaCycle.AdvanceCycleTimes()
 
     sys.exit(0)
 
-- 
GitLab