From 18ccd2ac8e105d67c7dc9c78baa60e9519c8b7ae Mon Sep 17 00:00:00 2001 From: Wouter Peters <wouter.peters@wur.nl> Date: Tue, 28 Aug 2012 07:35:14 +0000 Subject: [PATCH] A new routine has been added that summarizes the X2 statistics for all time steps, and for all sites. The result is an html table in the analysis/summary folder. --- da/analysis/summarize_obs.py | 155 ++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 3 deletions(-) diff --git a/da/analysis/summarize_obs.py b/da/analysis/summarize_obs.py index 07bccf1f..0d62c247 100755 --- a/da/analysis/summarize_obs.py +++ b/da/analysis/summarize_obs.py @@ -192,6 +192,150 @@ def SummarizeObs(DaCycle,printfmt='html'): logging.info("File written with summary: %s" % saveas) +def SummarizeStats(DaCycle): + """ + Summarize the statistics of the observations for this cycle + This includes X2 statistics, RMSD, and others for both forecast and + final fluxes + """ + import string + + sumdir=os.path.join(DaCycle['dir.analysis'],'summary') + if not os.path.exists(sumdir): + logging.info( "Creating new directory "+sumdir ) + os.makedirs(sumdir) + + # get forecast data from optimizer.ddddd.nc + + startdate = DaCycle['time.start'] + DaCycle['time.sample.stamp'] = "%s"%(startdate.strftime("%Y%m%d"),) + infile = os.path.join(DaCycle['dir.output'],'optimizer.%s.nc'%DaCycle['time.sample.stamp']) + + if not os.path.exists(infile): + logging.error("File not found: %s"%infile) + raise IOError + + f = io.CT_CDF(infile,'read') + sites = f.GetVariable('sitecode') + y0 = f.GetVariable('observed')*1e6 + hx = f.GetVariable('modelsamplesmean_prior')*1e6 + dF = f.GetVariable('modelsamplesdeviations_prior')*1e6 + HPHTR = f.GetVariable('totalmolefractionvariance').diagonal()*1e6*1e6 + R = f.GetVariable('modeldatamismatchvariance').diagonal()*1e6*1e6 + flags = f.GetVariable('flag') + f.close() + + HPHT = dF.dot(np.transpose(dF)).diagonal()/(dF.shape[1]-1.0) + rejected = (flags == 2.0) + + sitecodes = [string.join(s.compressed(),'').strip() for s in sites] + + + # calculate X2 per observation for this time step + + x2=[] + for i,site in enumerate(sitecodes): + + x2.append((y0[i]-hx[i])**2/HPHTR[i] ) + + x2=np.ma.masked_where(HPHTR == 0.0,x2) + + # calculate X2 per site + saveas=os.path.join(sumdir,'x2_table_%s.html'%DaCycle['time.sample.stamp'] ) + logging.info("Writing HTML tables for this cycle (%s)"%saveas) + f=open(saveas,'w') + txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" + f.write(txt) + txt="<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" + f.write(txt) + tablehead = \ + "<TR>\n <TH> Site code </TH> \ + <TH> N<sub>obs</sub> </TH> \ + <TH> N<sub>rejected</sub> </TH> \ + <TH> √R (μmol mol<sup>-1</sup>) </TH> \ + <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ + <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ + <TH> X2 </TH> \n \ + </TR>\n" + + fmt= """<TR> \n \ + <TD>%s</TD>\ + <TD>%d</TD>\ + <TD>%d</TD>\ + <TD>%+5.2f</TD>\ + <TD>%+5.2f</TD>\ + <TD>%+5.2f±%5.2f</TD>\ + <TD>%5.2f</TD>\n \ + </TR>\n""" + + f.write(tablehead) + + set_sites = set(sitecodes) + set_sites = np.sort(list(set_sites)) + + for i,site in enumerate(set_sites): + sel = [i for i,s in enumerate(sitecodes) if s == site] + ss=(site, len(sel),rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx-y0).take(sel).mean(),(hx-y0).take(sel).std(), x2.take(sel).mean(),) + #print site,sel,x2.take(sel) + + f.write(fmt%ss) + if (i+1)%15 == 0: + f.write(tablehead) + + txt="\n</table>" + f.write(txt) + f.close() + + # Now summarize for each site across time steps + + if not DaCycle['time.start'] >= dt.datetime(2008,12,29): + return + + logging.info("Writing HTML tables for each site") + for site in set_sites: + saveas=os.path.join(sumdir,'%s_x2.html'%site) + f=open(saveas,'w') + logging.debug(saveas) + txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" + f.write(txt) + txt="<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" + f.write(txt) + tablehead = \ + "<TR>\n <TH> From File </TH> \ + <TH> Site </TH> \ + <TH> N<sub>obs</sub> </TH> \ + <TH> N<sub>rejected</sub> </TH> \ + <TH> √R (μmol mol<sup>-1</sup>) </TH> \ + <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ + <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ + <TH> X2 </TH> \n \ + </TR>\n" + f.write(tablehead) + + files = os.listdir(sumdir) + x2_files = [file for file in files if file.startswith('x2')] + for htmlfile in x2_files: + lines = grep(site,os.path.join(sumdir,htmlfile)) + for line in lines: + f.write('<TR>\n') + f.write('<TD>'+htmlfile+'</TD>') + f.write(line+'\n') + f.write('</TR>\n') + + txt="\n</table>" + f.write(txt) + f.close() + +import re +def grep(pattern,file): + fileObj = open(file,'r') + r=[] + linenumber=0 + for line in fileObj: + if re.search(pattern,line): + r.append(line) + return r + # main body if called as script if __name__ == '__main__': # started as script @@ -204,16 +348,21 @@ if __name__ == '__main__': # started as script logging.root.setLevel(logging.DEBUG) - DaCycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack.rc'}) + DaCycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'}) DaCycle.Initialize() DaCycle.ParseTimes() - DaSystem = CtDaSystem('../rc/carbontracker_ct09_op.rc') + DaSystem = CtDaSystem('../rc/carbontracker_ct09_opf.rc') DaSystem.Initialize() DaCycle.DaSystem = DaSystem - q=SummarizeObs(DaCycle) + #q=SummarizeObs(DaCycle) + + while DaCycle['time.start'] < DaCycle['time.finish']: + + q=SummarizeStats(DaCycle) + DaCycle.AdvanceCycleTimes() sys.exit(0) -- GitLab