obspack_geocarbon.py 20.8 KB
Newer Older
karolina's avatar
karolina committed
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/env python
# obs.py

"""
Author : peters 

Revision History:
File created on 28 Jul 2010.

"""
import os
import sys
import logging
14
15
16
17
        
import datetime as dtm
from string import strip
from numpy import array, logical_and
karolina's avatar
karolina committed
18
19
20
21
22
23
sys.path.append(os.getcwd())
sys.path.append('../../')

identifier = 'CarbonTracker CO2 mixing ratios'
version = '0.0'

24
from da.baseclasses.obs import Observations
25
26
import da.tools.io4 as io
import da.tools.rc as rc
karolina's avatar
karolina committed
27
28
################### Begin Class ObsPackObservations ###################

29
class ObsPackObservations(Observations):
karolina's avatar
karolina committed
30
31
    """ an object that holds data + methods and attributes needed to manipulate mixing ratio values """

32
    def setup(self, dacycle):
karolina's avatar
karolina committed
33

34
35
        self.startdate = dacycle['time.sample.start']
        self.enddate = dacycle['time.sample.end']
karolina's avatar
karolina committed
36

37
38
        op_id = dacycle.dasystem['obspack.input.id']
        op_dir = dacycle.dasystem['obspack.input.dir']
karolina's avatar
karolina committed
39
40
41
42
43
44

        if not os.path.exists(op_dir):
            msg = 'Could not find  the required ObsPack distribution (%s) ' % op_dir
            logging.error(msg)
            raise IOError, msg
        else:
karolina's avatar
karolina committed
45
46
            self.obspack_dir = op_dir
            self.obspack_id = op_id
karolina's avatar
karolina committed
47
48
49
50
51
52
53
54
55
56
57
58
59

        self.datalist = []

    def add_observations(self):
        """ Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
      
            The ObsPack mixing ratio files are provided as time series per site with all dates in sequence. 
            We will loop over all site files in the ObsPackage, and subset each to our needs
            
        """

        # Step 1: Read list of available site files in package

karolina's avatar
karolina committed
60
        infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,))
karolina's avatar
karolina committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
        f = open(infile, 'r')
        lines = f.readlines()
        f.close()

        ncfilelist = []
        for line in lines:
            if line.startswith('#'): continue # header

            items = line.split()
            #ncfile, lab , start_date, stop_date, data_comparison = items[0:5]
            ncfile, lab , start_date, stop_date, data_comparison= line[:105].split()


            ncfilelist += [ncfile]

        logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist))

        for ncfile in ncfilelist:

karolina's avatar
karolina committed
80
            infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc')
81
            ncf = io.ct_read(infile, 'read')
82
            idates = ncf.get_variable('time_components')
karolina's avatar
karolina committed
83
84
85
86
87
88
            dates = array([dtm.datetime(*d) for d in idates])

            subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]

            dates = dates.take(subselect, axis=0)
            
89
90
            obspacknum = ncf.get_variable('obspack_num').take(subselect)  # or should we propagate obs_num which is not unique across datasets??
            obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
karolina's avatar
karolina committed
91
92
93
            obspackid = [s.tostring().lower() for s in obspackid]
            obspackid = map(strip, obspackid)
            datasetname = ncfile  # use full name of dataset to propagate for clarity
94
95
96
97
            lats = ncf.get_variable('latitude').take(subselect, axis=0)
            lons = ncf.get_variable('longitude').take(subselect, axis=0)
            alts = ncf.get_variable('altitude').take(subselect, axis=0)
            obs = ncf.get_variable('value').take(subselect, axis=0)
98
            species = ncf.get_attribute('dataset_parameter')
99
            flags = ncf.get_variable('obs_flag').take(subselect, axis=0)
karolina's avatar
karolina committed
100
101
102
103
104
105
106
            ncf.close()

            for n in range(len(dates)): 
                self.datalist.append(MixingRatioSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile))

            logging.debug("Added %d observations from file (%s) to the Data list" % (len(dates), ncfile)) 

107
        logging.info("Observations list now holds %d values" % len(self.datalist))
karolina's avatar
karolina committed
108
109
110
111
112
113
114
115
116
117
118
119

    def add_simulations(self, filename, silent=False):
        """ Adds model simulated values to the mixing ratio objects """


        if not os.path.exists(filename):
            msg = "Sample output filename for observations could not be found : %s" % filename 
            logging.error(msg)
            logging.error("Did the sampling step succeed?")
            logging.error("...exiting")
            raise IOError, msg

120
        ncf = io.ct_read(filename, method='read')
121
122
        ids = ncf.get_variable('obs_num')
        simulated = ncf.get_variable('flask')
karolina's avatar
karolina committed
123
124
125
        ncf.close()
        logging.info("Successfully read data from model sample file (%s)" % filename)

126
        obs_ids = self.getvalues('id').tolist()
karolina's avatar
karolina committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        ids = map(int, ids)

        missing_samples = []

        for idx, val in zip(ids, simulated): 
            if idx in obs_ids:
                index = obs_ids.index(idx)

                self.datalist[index].simulated = val  # in mol/mol
            else:     
                missing_samples.append(idx)

        if not silent and missing_samples != []:
            logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...')
            #msg = '%s'%missing_samples ; logging.warning(msg)

        logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))

145
    def write_sample_coords(self, obsinputfile):
karolina's avatar
karolina committed
146
147
148
149
150
151
152
153
        """ 
            Write the information needed by the observation operator to a file. Return the filename that was written for later use

        """

        f = io.CT_CDF(obsinputfile, method='create')
        logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile)

154
155
156
157
        dimid = f.add_dim('obs', len(self.datalist))
        dim200char = f.add_dim('string_of200chars', 200)
        dim10char = f.add_dim('string_of10chars', 10)
        dimcalcomp = f.add_dim('calendar_components', 6)
karolina's avatar
karolina committed
158
159
160

        if len(self.datalist) == 0:
            f.close()
161
            #return obsinputfile
karolina's avatar
karolina committed
162

karolina's avatar
karolina committed
163
        for key, value in self.site_move.iteritems():
karolina's avatar
karolina committed
164
            msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value 
165
            f.add_attribute(key, msg)
karolina's avatar
karolina committed
166
167
168
169
170
171
172
173
174
175
176

        data = self.getvalues('id')

        savedict = io.std_savedict.copy() 
        savedict['name'] = "obs_num"
        savedict['dtype'] = "int"
        savedict['long_name'] = "Unique_Dataset_observation_index_number"
        savedict['units'] = ""
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
177
        f.add_data(savedict)
karolina's avatar
karolina committed
178
179
180
181
182
183
184
185
186
187
188
189

        data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ]

        savedict = io.std_savedict.copy() 
        savedict['dtype'] = "int"
        savedict['name'] = "date_components"
        savedict['units'] = "integer components of UTC date/time"
        savedict['dims'] = dimid + dimcalcomp
        savedict['values'] = data
        savedict['missing_value'] = -9
        savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." 
        savedict['order'] = "year, month, day, hour, minute, second"
190
        f.add_data(savedict)
karolina's avatar
karolina committed
191
192
193
194
195
196
197
198
199

        data = self.getvalues('lat')

        savedict = io.std_savedict.copy() 
        savedict['name'] = "latitude"
        savedict['units'] = "degrees_north"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['missing_value'] = -999.9
200
        f.add_data(savedict)
karolina's avatar
karolina committed
201
202
203
204
205
206
207
208
209

        data = self.getvalues('lon')

        savedict = io.std_savedict.copy() 
        savedict['name'] = "longitude"
        savedict['units'] = "degrees_east"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['missing_value'] = -999.9
210
        f.add_data(savedict)
karolina's avatar
karolina committed
211
212
213
214
215
216
217
218
219

        data = self.getvalues('height')

        savedict = io.std_savedict.copy() 
        savedict['name'] = "altitude"
        savedict['units'] = "meters_above_sea_level"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['missing_value'] = -999.9
220
        f.add_data(savedict)
karolina's avatar
karolina committed
221
222
223
224
225
226
227
228
229
230

        data = self.getvalues('samplingstrategy')

        savedict = io.std_savedict.copy() 
        savedict['dtype'] = "int"
        savedict['name'] = "sampling_strategy"
        savedict['units'] = "NA"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['missing_value'] = -9
231
        f.add_data(savedict)
karolina's avatar
karolina committed
232
233
234
235
236
237
238
239
240
241

        data = self.getvalues('evn')

        savedict = io.std_savedict.copy() 
        savedict['dtype'] = "char"
        savedict['name'] = "obs_id"
        savedict['units'] = "ObsPack datapoint identifier"
        savedict['dims'] = dimid + dim200char
        savedict['values'] = data
        savedict['missing_value'] = '!'
242
        f.add_data(savedict)
karolina's avatar
karolina committed
243
244
245
246
247
248

        f.close()

        logging.debug("Successfully wrote data to obs file")
        logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile)

249
        
karolina's avatar
karolina committed
250

251
    def add_model_data_mismatch(self, filename):
karolina's avatar
karolina committed
252
253
254
255
256
257
258
259
        """ 
            Get the model-data mismatch values for this cycle.

                (1) Open a sites_weights file
                (2) Parse the data
                (3) Compare site list against data
                (4) Take care of double sites, etc

260
        """    
karolina's avatar
karolina committed
261
262
263
264
265
266

        if not os.path.exists(filename):
            msg = 'Could not find  the required sites.rc input file (%s) ' % filename
            logging.error(msg)
            raise IOError, msg
        else:
karolina's avatar
karolina committed
267
            self.sites_file = filename
karolina's avatar
karolina committed
268

karolina's avatar
karolina committed
269
        sites_weights = rc.read(self.sites_file)
karolina's avatar
karolina committed
270

karolina's avatar
karolina committed
271
272
273
        self.rejection_threshold = int(sites_weights['obs.rejection.threshold'])
        self.global_R_scaling = float(sites_weights['global.R.scaling'])
        self.n_site_categories = int(sites_weights['n.site.categories'])
karolina's avatar
karolina committed
274
275
276
277
278

        logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold)
        logging.warning('Model-data mismatch scaling factor     : %f ' % self.global_R_scaling)
        logging.debug('Model-data mismatch site categories    : %d ' % self.n_site_categories)
   
karolina's avatar
karolina committed
279
        cats = [k for k in sites_weights.keys() if 'site.category' in k] 
karolina's avatar
karolina committed
280

karolina's avatar
karolina committed
281
        site_categories = {}
karolina's avatar
karolina committed
282
        for key in cats:
karolina's avatar
karolina committed
283
            name, error, may_localize, may_reject = sites_weights[key].split(';')
karolina's avatar
karolina committed
284
285
286
287
            name = name.strip().lower()
            error = float(error)
            may_reject = ("TRUE" in may_reject.upper())
            may_localize = ("TRUE" in may_localize.upper())
karolina's avatar
karolina committed
288
            site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject}
karolina's avatar
karolina committed
289

karolina's avatar
karolina committed
290
291
292
        site_info = {}
        site_move = {}
        site_hourly = {}   # option added to include only certain hours of the day (for e.g. PAL) IvdL
293
        site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file 
karolina's avatar
karolina committed
294
        for key, value in sites_weights.iteritems():
karolina's avatar
karolina committed
295
296
297
298
            if 'co2_' in key or 'sf6' in key:  # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP
                sitename, sitecategory = key, value
                sitename = sitename.strip()
                sitecategory = sitecategory.split()[0].strip().lower()
karolina's avatar
karolina committed
299
                site_info[sitename] = site_categories[sitecategory]
karolina's avatar
karolina committed
300
301
            if 'site.move' in key:
                identifier, latmove, lonmove = value.split(';')
karolina's avatar
karolina committed
302
                site_move[identifier.strip()] = (float(latmove), float(lonmove))
karolina's avatar
karolina committed
303
304
            if 'site.hourly' in key:
                identifier, hourfrom, hourto = value.split(';')
karolina's avatar
karolina committed
305
                site_hourly[identifier.strip()] = (int(hourfrom), int(hourto))
306
307
308
309
            if 'site.incalt' in key:
                identifier, incalt = value.split(';')
                site_incalt[identifier.strip()] = (int(incalt))
                logging.debug('test')
karolina's avatar
karolina committed
310
311
312
313
314
315
316
317
318
319

        for obs in self.datalist:  # loop over all available data points

            obs.mdm = 1000.0  # default is very high model-data-mismatch, until explicitly set by script
            obs.flag = 99  # default is do-not-use , until explicitly set by script
            exclude_hourly = False # default is that hourly values are not included

            identifier = obs.code
            species, site, method, lab, datasetnr = identifier.split('_')

karolina's avatar
karolina committed
320
321
322
            if site_info.has_key(identifier):
                if site_hourly.has_key(identifier):
                    hourf, hourt = site_hourly[identifier]
karolina's avatar
karolina committed
323
                    if int(obs.xdate.hour) >= hourf and int(obs.xdate.hour) <= hourt:
324
                        logging.warning("Observations in hourly dataset INCLUDED, while sampling time %s was between %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
karolina's avatar
karolina committed
325
326
327
                    else:
                        logging.warning("Observation in hourly dataset EXCLUDED, while sampling time %s was outside %s:00-%s:00"%(obs.xdate.time(),hourf,hourt))
                        exclude_hourly = True
karolina's avatar
karolina committed
328
                if site_info[identifier]['category'] == 'do-not-use' or exclude_hourly:
karolina's avatar
karolina committed
329
                    logging.warning("Observation found (%s, %d), but not used in assimilation !!!" % (identifier, obs.id))
karolina's avatar
karolina committed
330
331
332
                    obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
karolina's avatar
karolina committed
333
334
335
                    obs.flag = 99
                else:
                    logging.debug("Observation found (%s, %d)" % (identifier, obs.id))
karolina's avatar
karolina committed
336
337
338
                    obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
                    obs.may_localize = site_info[identifier]['may_localize']
                    obs.may_reject = site_info[identifier]['may_reject']
karolina's avatar
karolina committed
339
340
341
                    obs.flag = 0

            else:
karolina's avatar
karolina committed
342
                logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s)  !!!" % (identifier, obs.id, self.sites_file))
karolina's avatar
karolina committed
343

karolina's avatar
karolina committed
344
            if site_move.has_key(identifier):
karolina's avatar
karolina committed
345

karolina's avatar
karolina committed
346
                movelat, movelon = site_move[identifier]
karolina's avatar
karolina committed
347
348
349
350
351
                obs.lat = obs.lat + movelat
                obs.lon = obs.lon + movelon

                logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon))

352
353
354
355
356
357
358
359
            if site_incalt.has_key(identifier):

                incalt = site_incalt[identifier]
                obs.height = obs.height + incalt

                logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt))


karolina's avatar
karolina committed
360
        # Add site_info dictionary to the Observations object for future use
karolina's avatar
karolina committed
361

karolina's avatar
karolina committed
362
363
364
        self.site_info = site_info
        self.site_move = site_move
        self.site_hourly = site_hourly
365
        self.site_incalt = site_incalt
karolina's avatar
karolina committed
366
367
368

        logging.debug("Added Model Data Mismatch to all samples ")

369
    def write_sample_auxiliary(self, auxoutputfile):
karolina's avatar
karolina committed
370
        """ 
371
            Write selected information contained in the Observations object to a file. 
karolina's avatar
karolina committed
372
373
374

        """

375
376
        f = io.CT_CDF(auxoutputfile, method='create')
        logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile)
karolina's avatar
karolina committed
377

378
379
380
381
        dimid = f.add_dim('obs', len(self.datalist))
        dim200char = f.add_dim('string_of200chars', 200)
        dim10char = f.add_dim('string_of10chars', 10)
        dimcalcomp = f.add_dim('calendar_components', 6)
karolina's avatar
karolina committed
382
383
384

        if len(self.datalist) == 0:
            f.close()
385
            #return outfile
karolina's avatar
karolina committed
386

karolina's avatar
karolina committed
387
        for key, value in self.site_move.iteritems():
karolina's avatar
karolina committed
388
            msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value 
389
            f.add_attribute(key, msg)
karolina's avatar
karolina committed
390
391
392
393
394
395
396
397
398
399
400

        data = self.getvalues('id')

        savedict = io.std_savedict.copy() 
        savedict['name'] = "obs_num"
        savedict['dtype'] = "int"
        savedict['long_name'] = "Unique_Dataset_observation_index_number"
        savedict['units'] = ""
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
401
        f.add_data(savedict)
karolina's avatar
karolina committed
402
403
404
405
406
407
408
409
410
411
412
413

        data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')]

        savedict = io.std_savedict.copy() 
        savedict['dtype'] = "int"
        savedict['name'] = "date_components"
        savedict['units'] = "integer components of UTC date/time"
        savedict['dims'] = dimid + dimcalcomp
        savedict['values'] = data
        savedict['missing_value'] = -9
        savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." 
        savedict['order'] = "year, month, day, hour, minute, second"
414
        f.add_data(savedict)
karolina's avatar
karolina committed
415
416
417
418
419
420
421
422
423
424

        data = self.getvalues('obs')

        savedict = io.std_savedict.copy()
        savedict['name'] = "observed"
        savedict['long_name'] = "observedvalues"
        savedict['units'] = "mol mol-1"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['comment'] = 'Observations used in optimization'
425
        f.add_data(savedict)
karolina's avatar
karolina committed
426
427
428
429
430
431
432
433
434
435

        data = self.getvalues('mdm')

        savedict = io.std_savedict.copy()
        savedict['name'] = "modeldatamismatch"
        savedict['long_name'] = "modeldatamismatch"
        savedict['units'] = "[mol mol-1]"
        savedict['dims'] = dimid
        savedict['values'] = data.tolist()
        savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch'
436
        f.add_data(savedict)
karolina's avatar
karolina committed
437
438
439

        data = self.getvalues('simulated') 

440
        dimmembers = f.add_dim('members', data.shape[1])
karolina's avatar
karolina committed
441
442
443
444
445
446
447
448

        savedict = io.std_savedict.copy()
        savedict['name'] = "modelsamples"
        savedict['long_name'] = "modelsamples for all ensemble members"
        savedict['units'] = "mol mol-1"
        savedict['dims'] = dimid + dimmembers
        savedict['values'] = data.tolist()
        savedict['comment'] = 'simulated mixing ratios based on optimized state vector'
449
        f.add_data(savedict)
karolina's avatar
karolina committed
450
451
452
453
454
455
456
457
458
459

        data = self.getvalues('fromfile') 

        savedict = io.std_savedict.copy()
        savedict['name'] = "inputfilename"
        savedict['long_name'] = "name of file where original obs data was taken from"
        savedict['dtype'] = "char"
        savedict['dims'] = dimid + dim200char
        savedict['values'] = data
        savedict['missing_value'] = '!'
460
        f.add_data(savedict)
karolina's avatar
karolina committed
461
462
463

        f.close()

464
        logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile)
karolina's avatar
karolina committed
465

466
        #return outfile
karolina's avatar
karolina committed
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510



################### End Class CtObservations ###################



################### Begin Class MixingRatioSample ###################

class MixingRatioSample(object):
    """ 
        Holds the data that defines a Mixing Ratio Sample in the data assimilation framework. Sor far, this includes all
        attributes listed below in the __init__ method. One can additionally make more types of data, or make new
        objects for specific projects.

    """

    def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'):
        self.code = code.strip()      # dataset identifier, i.e., co2_lef_tower_insitu_1_99
        self.xdate = xdate             # Date of obs
        self.obs = obs               # Value observed
        self.simulated = simulated         # Value simulated by model
        self.resid = resid             # Mixing ratio residuals
        self.hphr = hphr              # Mixing ratio prior uncertainty from fluxes and (HPH) and model data mismatch (R)
        self.mdm = mdm               # Model data mismatch
        self.may_localize = True           # Whether sample may be localized in optimizer
        self.may_reject = True              # Whether sample may be rejected if outside threshold
        self.flag = flag              # Flag
        self.height = height            # Sample height in masl
        self.lat = lat               # Sample lat
        self.lon = lon               # Sample lon
        self.id = idx               # Obspack ID within distrution (integer), e.g., 82536
        self.evn = evn               # Obspack Number within distrution (string), e.g., obspack_co2_1_PROTOTYPE_v0.9.2_2012-07-26_99_82536
        self.sdev = sdev              # standard deviation of ensemble
        self.masl = True              # Sample is in Meters Above Sea Level
        self.mag = not self.masl     # Sample is in Meters Above Ground
        self.species = species.strip()
        self.samplingstrategy = samplingstrategy
        self.fromfile = fromfile   # netcdf filename inside ObsPack distribution, to write back later

################### End Class MixingRatioSample ###################


if __name__ == "__main__":
511
    pass
karolina's avatar
karolina committed
512
513
514