summarize_obs.py 11.7 KB
Newer Older
karolina's avatar
karolina committed
1
2
3
4
5
6
7
8
#!/usr/bin/env python
import sys
sys.path.append('../../')
import os
import numpy as np
import string
import datetime as dt
import logging
9
import re
karolina's avatar
karolina committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import da.tools.io4 as io

fontsize = 10

def nice_lat(cls):
    #
    # Convert latitude from decimal to cardinal
    #
    if cls > 0:
        h = 'N'
    else:
        h = 'S'
  
    dec, deg = np.math.modf(cls)

    return string.strip('%2d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))

def nice_lon(cls):
    #
    # Convert longitude from decimal to cardinal
    #
    if cls > 0:
        h = 'E'
    else:
        h = 'W'
  
    dec, deg = np.math.modf(cls)

    return string.strip('%3d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))

def nice_alt(cls):
    #
    # Reformat elevation or altitude
    #
    return string.strip('%10.1f masl' % round(cls, -1))


47
def summarize_obs(dacycle, printfmt='html'):
karolina's avatar
karolina committed
48
49
50
51
52
53
54
    """***************************************************************************************
    Call example:

    python summarize_obs.py 

    Option printfmt    : [tex,scr,html] print summary table in latex, terminal, or html format 

55
    Other options are all those needed to create a dacycle object
karolina's avatar
karolina committed
56
57
58
59
60

    OR:

    call directly from a python script as:

61
    q=summarize_obs(dacycle,printfmt='html')
karolina's avatar
karolina committed
62
63
64

    ***************************************************************************************"""

65
    sumdir = os.path.join(dacycle['dir.analysis'], 'summary')
karolina's avatar
karolina committed
66
67
68
69
    if not os.path.exists(sumdir):
        logging.info("Creating new directory " + sumdir)
        os.makedirs(sumdir)

70
    mrdir = os.path.join(dacycle['dir.analysis'], 'data_molefractions')
karolina's avatar
karolina committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
    if not os.path.exists(mrdir):
        logging.error("Input directory does not exist (%s), exiting... " % mrdir)
        return None

    mrfiles = os.listdir(mrdir)
    infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')]

    if printfmt == 'tex': 
        print '\\begin{tabular*}{\\textheight}{l l l l r r r r}'
        print 'Code &  Name & Lat, Lon, Elev & Lab &  N (flagged) & $\\sqrt{R}$  &Inn \\XS &Bias\\\\'
        print '\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] '
        fmt = '%8s  & ' + ' %55s  & ' + '%20s &' + '%6s &' + ' %4d (%d)  & ' + ' %5.2f  & ' + ' %5.2f & ' + '%+5.2f  \\\\'
    elif printfmt == 'html':
        tablehead = \
              "<TR>\n <TH> Site code </TH> \
                   <TH> Sampling Type </TH> \
                   <TH> Lab. </TH> \
                   <TH> Country </TH> \
                   <TH> Lat, Lon, Elev. (m ASL) </TH> \
                   <TH> No. Obs. Avail. </TH>  \
                   <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
                   <TH> &#8730;HPH  (&mu;mol mol<sup>-1</sup>) </TH> \
                   <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \
                   <TH> H(x)-y (JJAS) (&mu;mol mol<sup>-1</sup>) </TH>  \
                   <TH> H(x)-y (NDJFMA) (&mu;mol mol<sup>-1</sup>) </TH> \n \
               </TR>\n"

        fmt = """<TR> \n \
                <TD><a href='javascript:LoadCO2Tseries("%s")'>%s </a></TD>\
                <TD>%s</TD>\
                <TD>%s</TD>\
                <TD>%40s</TD>\
                <TD>%s</TD>\
                <TD>%d</TD>\
                <TD>%+5.2f</TD>\
                <TD>%+5.2f</TD>\
                <TD>%+5.2f&plusmn;%5.2f</TD>\
                <TD>%+5.2f&plusmn;%5.2f</TD>\
                <TD>%+5.2f&plusmn;%5.2f</TD>\n \
               </TR>\n"""
    elif printfmt == 'scr':
        print 'Code   Site     NObs flagged  R  Inn X2'
        fmt = '%8s ' + ' %55s  %s %s' + ' %4d ' + ' %4d ' + ' %5.2f ' + ' %5.2f'

    table = []
116

karolina's avatar
karolina committed
117
118
119
120
    for infile in infiles:

            #logging.debug( infile )
            f = io.CT_CDF(infile, 'read')
121
122
123
124
125
            date = f.get_variable('time')
            obs = f.get_variable('value') * 1e6
            mdm = f.get_variable('modeldatamismatch') * 1e6
            simulated = f.get_variable('modelsamplesmean_forecast') * 1e6
            simulated_std = f.get_variable('modelsamplesstandarddeviation_forecast') * 1e6
karolina's avatar
karolina committed
126
127
128
129
130
131
132
133
134
135
136
137

            pydates = [dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]

            summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ]
            winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ]

            diff = ((simulated - obs).mean())
            diffsummer = ((simulated - obs).take(summer).mean())
            diffwinter = ((simulated - obs).take(winter).mean())
            diffstd = ((simulated - obs).std())
            diffsummerstd = ((simulated - obs).take(summer).std())
            diffwinterstd = ((simulated - obs).take(winter).std())
138
            
karolina's avatar
karolina committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
            location = nice_lat(f.site_latitude) + ', ' + nice_lon(f.site_longitude) + ', ' + nice_alt(f.site_elevation)

            if printfmt == 'html':
                ss = (f.site_code.upper(),
                    f.site_code.upper(),
                    f.dataset_project,
                    f.lab_abbr,
                    f.site_country,
                    location,
                    len(np.ma.compressed(mdm)),
                    mdm.mean(),
                    np.sqrt((simulated_std ** 2).mean()),
                    diff, diffstd,
                    diffsummer, diffsummerstd,
                    diffwinter, diffwinterstd)
            
            table.append(ss)
            f.close()

    if printfmt == 'tex':
        saveas = os.path.join(sumdir, 'site_table.tex')
        f = open(saveas, 'w')
    elif printfmt == 'html':
        saveas = os.path.join(sumdir, 'site_table.html')
        f = open(saveas, 'w')
        txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
        f.write(txt)
        txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
        f.write(txt)

    f.write(tablehead)

    for i, ss in enumerate(table):

        f.write(fmt % ss)
        if (i + 1) % 15 == 0:
            f.write(tablehead)

    if printfmt == 'tex': 
        f.write('\cline{2-8}\\\\')
        f.write('\hline \\\\')
        f.write('\end{tabular*}')
    else:
        txt = "\n</table>"
        f.write(txt)
    f.close()

    logging.info("File written with summary: %s" % saveas)

188
def summarize_stats(dacycle):
karolina's avatar
karolina committed
189
190
191
192
193
194
195
    """
    Summarize the statistics of the observations for this cycle
    This includes X2 statistics, RMSD, and others for both forecast and
    final fluxes
    """
    

196
    sumdir = os.path.join(dacycle['dir.analysis'], 'summary')
karolina's avatar
karolina committed
197
198
199
200
201
202
    if not os.path.exists(sumdir):
        logging.info("Creating new directory " + sumdir)
        os.makedirs(sumdir)

    # get forecast data from optimizer.ddddd.nc

203
204
205
    startdate = dacycle['time.start'] 
    dacycle['time.sample.stamp'] = "%s" % (startdate.strftime("%Y%m%d"),)
    infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % dacycle['time.sample.stamp'])
karolina's avatar
karolina committed
206
207
208
209
210
211

    if not os.path.exists(infile):
        logging.error("File not found: %s" % infile)
        raise IOError

    f = io.CT_CDF(infile, 'read')
212
213
214
215
216
217
218
    sites = f.get_variable('sitecode')
    y0 = f.get_variable('observed') * 1e6
    hx = f.get_variable('modelsamplesmean_prior') * 1e6
    dF = f.get_variable('modelsamplesdeviations_prior') * 1e6
    HPHTR = f.get_variable('totalmolefractionvariance').diagonal() * 1e6 * 1e6
    R = f.get_variable('modeldatamismatchvariance').diagonal() * 1e6 * 1e6
    flags = f.get_variable('flag')
karolina's avatar
karolina committed
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
    f.close()

    HPHT = dF.dot(np.transpose(dF)).diagonal() / (dF.shape[1] - 1.0)
    rejected = (flags == 2.0)

    sitecodes = [string.join(s.compressed(), '').strip() for s in sites]


    # calculate X2 per observation for this time step

    x2 = []
    for i, site in enumerate(sitecodes):
        
        x2.append((y0[i] - hx[i]) ** 2 / HPHTR[i])

    x2 = np.ma.masked_where(HPHTR == 0.0, x2)

    # calculate X2 per site
237
    saveas = os.path.join(sumdir, 'x2_table_%s.html' % dacycle['time.sample.stamp'])
karolina's avatar
karolina committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    logging.info("Writing HTML tables for this cycle (%s)" % saveas)
    f = open(saveas, 'w')
    txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
    f.write(txt)
    txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
    f.write(txt)
    tablehead = \
          "<TR>\n <TH> Site code </TH> \
               <TH> N<sub>obs</sub> </TH>  \
               <TH> N<sub>rejected</sub> </TH>  \
               <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
               <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
               <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
               <TH> X2 </TH> \n \
           </TR>\n"

    fmt = """<TR> \n \
            <TD>%s</TD>\
            <TD>%d</TD>\
            <TD>%d</TD>\
            <TD>%+5.2f</TD>\
            <TD>%+5.2f</TD>\
            <TD>%+5.2f&plusmn;%5.2f</TD>\
            <TD>%5.2f</TD>\n \
           </TR>\n"""

    f.write(tablehead)

    set_sites = set(sitecodes)
    set_sites = np.sort(list(set_sites))

    for i, site in enumerate(set_sites):
        sel = [i for i, s in enumerate(sitecodes) if s == site]
        ss = (site, len(sel), rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx - y0).take(sel).mean(), (hx - y0).take(sel).std(), x2.take(sel).mean(),)
        #print site,sel,x2.take(sel)

        f.write(fmt % ss)
        if (i + 1) % 15 == 0:
            f.write(tablehead)

    txt = "\n</table>"
    f.write(txt)
    f.close()

    # Now summarize for each site across time steps

284
    if not dacycle['time.start'] >= dt.datetime(2008, 12, 29):
karolina's avatar
karolina committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
        return

    logging.info("Writing HTML tables for each site")
    for site in set_sites:
        saveas = os.path.join(sumdir, '%s_x2.html' % site)
        f = open(saveas, 'w')
        logging.debug(saveas)
        txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
        f.write(txt)
        txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
        f.write(txt)
        tablehead = \
              "<TR>\n <TH> From File </TH> \
                   <TH> Site </TH>  \
                   <TH> N<sub>obs</sub> </TH>  \
                   <TH> N<sub>rejected</sub> </TH>  \
                   <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
                   <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
                   <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
                   <TH> X2 </TH> \n \
               </TR>\n"
        f.write(tablehead)

        files = os.listdir(sumdir)
309
        x2_files = [fil for fil in files if fil.startswith('x2')]
karolina's avatar
karolina committed
310
311
312
313
314
315
316
317
318
319
320
321
        for htmlfile in x2_files:
            lines = grep(site, os.path.join(sumdir, htmlfile))
            for line in lines:
                f.write('<TR>\n')
                f.write('<TD>' + htmlfile + '</TD>')
                f.write(line + '\n')
                f.write('</TR>\n')

        txt = "\n</table>"
        f.write(txt)
        f.close()

322
323
324

def grep(pattern, fil):
    fileObj = open(fil, 'r')
karolina's avatar
karolina committed
325
326
327
328
329
330
331
332
333
334
    r = []
    for line in fileObj:
        if re.search(pattern, line):
            r.append(line)
    return r

# main body if called as script

if __name__ == '__main__':    # started as script
    from da.tools.initexit import CycleControl
335
    from da.carbondioxide.dasystem import CO2DaSystem 
karolina's avatar
karolina committed
336
337
338
339
340

    sys.path.append('../../')

    logging.root.setLevel(logging.DEBUG)

341
    dacycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'})
342
    dacycle.setup()
343
    dacycle.parse_times()
karolina's avatar
karolina committed
344

345
    dasystem = CO2DaSystem('../rc/carbontracker_ct09_opf.rc')
346
    dasystem.setup()
karolina's avatar
karolina committed
347

348
    dacycle.dasystem = dasystem
karolina's avatar
karolina committed
349

350
    q = summarize_obs(dacycle)
karolina's avatar
karolina committed
351

352
353
354
    while dacycle['time.start'] < dacycle['time.finish']:
        q = summarize_stats(dacycle)
        dacycle.advance_cycle_times()
karolina's avatar
karolina committed
355
356
357
358

    sys.exit(0)