From e37d6346996888be00bbb46aa17551edf206b9ef Mon Sep 17 00:00:00 2001 From: Wouter Peters <wouter.peters@wur.nl> Date: Fri, 16 Mar 2012 15:54:37 +0000 Subject: [PATCH] now a working version, but still needs tweaks based on final ObsPack design --- da/ct/obspack.py | 214 ++++++++++++++++++++--------------------------- 1 file changed, 92 insertions(+), 122 deletions(-) diff --git a/da/ct/obspack.py b/da/ct/obspack.py index f9f758c1..68ae4ceb 100755 --- a/da/ct/obspack.py +++ b/da/ct/obspack.py @@ -19,9 +19,9 @@ version = '0.0' from da.baseclasses.obs import Observation -################### Begin Class CtObservations ################### +################### Begin Class ObsPackObservations ################### -class CtObservations(Observation): +class ObsPackObservations(Observation): """ an object that holds data + methods and attributes needed to manipulate mixing ratio values """ def getid(self): @@ -39,18 +39,15 @@ class CtObservations(Observation): self.enddate = self.DaCycle['time.sample.end'] DaSystem = self.DaCycle.DaSystem - sfname = DaSystem['obs.input.fname'] + op_id = DaSystem['obspack.input.id'] + op_dir = DaSystem['obspack.input.dir'] - if sfname.endswith('.nc'): - filename = os.path.join(DaSystem['obs.input.dir'],sfname) - else: - filename = os.path.join(DaSystem['obs.input.dir'],sfname+'.'+self.startdate.strftime('%Y%m%d')+'.nc') - - if not os.path.exists(filename): - msg = 'Could not find the required observation input file (%s) ' % filename ; logging.error(msg) + if not os.path.exists(op_dir): + msg = 'Could not find the required ObsPack distribution (%s) ' % op_dir ; logging.error(msg) raise IOError,msg else: - self.ObsFilename = filename + self.ObsPackDir = op_dir + self.ObsPackId = op_id self.Data = MixingRatioList([]) @@ -59,58 +56,68 @@ class CtObservations(Observation): def AddObs(self): """ Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file - The CarbonTracker mixing ratio files are provided as one long list of obs for all possible dates. So we can - either: - - (1) read all, and the subselect the data we will use in the rest of this cycle - (2) Use nco to make a subset of the data + The ObsPack mixing ratio files are provided as time series per site with all dates in sequence. + We will loop over all site files in the ObsPackage, and subset each to our needs - For now, we will stick with option (1) - """ import da.tools.io4 as io import datetime as dtm from string import strip, join from numpy import array, logical_and - ncf = io.CT_Read(self.ObsFilename,'read') - idates = ncf.GetVariable('date_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect,axis=0) - - ids = ncf.GetVariable('id').take(subselect,axis=0) - evn = ncf.GetVariable('eventnumber').take(subselect,axis=0) - evn = [s.tostring().lower() for s in evn] - evn = map(strip,evn) - sites = ncf.GetVariable('site').take(subselect,axis=0) - sites = [s.tostring().lower() for s in sites] - sites = map(strip,sites) - lats = ncf.GetVariable('lat').take(subselect,axis=0) - lons = ncf.GetVariable('lon').take(subselect,axis=0) - alts = ncf.GetVariable('alt').take(subselect,axis=0) - obs = ncf.GetVariable('obs').take(subselect,axis=0) - species = ncf.GetVariable('species').take(subselect,axis=0) - species = [s.tostring().lower() for s in species] - species = map(strip,species) - date = ncf.GetVariable('date').take(subselect,axis=0) - strategy = ncf.GetVariable('sampling_strategy').take(subselect,axis=0) - flags = ncf.GetVariable('NOAA_QC_flags').take(subselect,axis=0) - flags = [s.tostring().lower() for s in flags] - flags = map(strip,flags) - flags = [int(f == '...') for f in flags] - dummy = ncf.close() + # Step 1: Read list of available site files in package + + infile = os.path.join(self.ObsPackDir,'summary','%s_dataset_summary.txt'%(self.ObsPackId,)) + f = open(infile,'r') + lines = f.readlines() + dummy = f.close() + + ncfilelist = [] + for line in lines: + if line.startswith('#'): continue # header + + ncfile, lab , dist, start_date, stop_date, data_comparison = line.split() - msg = "Successfully read data from obs file (%s)"%self.ObsFilename ; logging.debug(msg) + ncfilelist += [ncfile] - - for n in range(len(dates)): + msg = "ObsPack dataset info read, proceeding with %d netcdf files"%(len(ncfilelist),) ; logging.debug(msg) - self.Data.append( MixingRatioSample(ids[n],dates[n],sites[n],obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],evn[n],species[n],strategy[n],0.0) ) + for ncfile in ncfilelist: - msg = "Added %d observations to the Data list" % len(dates) ; logging.debug(msg) + infile = os.path.join(self.ObsPackDir,'data','nc',ncfile) + ncf = io.CT_Read(infile,'read') + idates = ncf.GetVariable('time_components') + dates = array([dtm.datetime(*d) for d in idates]) + + subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] + + dates = dates.take(subselect,axis=0) + + ids = ncf.GetVariable('obspack_num').take(subselect) # or should we propagate obs_num which is not unique across datasets?? + evn = ncf.GetVariable('obspack_id').take(subselect,axis=0) + evn = [s.tostring().lower() for s in evn] + evn = map(strip,evn) + site = ncf.GetAttribute('site_code') + lats = ncf.GetVariable('latitude').take(subselect,axis=0) + lons = ncf.GetVariable('longitude').take(subselect,axis=0) + alts = ncf.GetVariable('altitude').take(subselect,axis=0) + obs = ncf.GetVariable('value').take(subselect,axis=0) + species = ncf.GetAttribute('dataset_parameter') + #strategy = ncf.GetVariable('sampling_strategy').take(subselect,axis=0) + strategy = 1 + flags = ncf.GetVariable('qc_flag').take(subselect,axis=0) + flags = [s.tostring().lower() for s in flags] + flags = map(strip,flags) + flags = [int(f == '...') for f in flags] + dummy = ncf.close() + + for n in range(len(dates)): + + self.Data.append( MixingRatioSample(ids[n],dates[n],site,obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],evn[n],species,strategy,0.0, ncfile) ) + + msg = "Added %d observations from file (%s) to the Data list" % (len(dates),ncfile,) ; logging.debug(msg) + + msg = "Observation list now holds %d values" % (len(self.Data),) ; logging.info(msg) def AddSimulations(self,filename,silent=True): """ Adds model simulated values to the mixing ratio objects """ @@ -124,7 +131,7 @@ class CtObservations(Observation): raise IOError,msg ncf = io.CT_Read(filename,method='read') - ids = ncf.GetVariable('id') + ids = ncf.GetVariable('obs_num') simulated = ncf.GetVariable('flask') dummy = ncf.close() msg = "Successfully read data from model sample file (%s)"%filename ; logging.info(msg) @@ -169,33 +176,21 @@ class CtObservations(Observation): f = io.CT_CDF(obsinputfile,method='create') msg = 'Creating new observations file for ObservationOperator (%s)' % obsinputfile ; logging.debug(msg) - dimid = f.AddDim('id',len(self.Data)) - dim30char = f.AddDim('string_of30chars',30) - dim24char = f.AddDim('string_of24chars',24) + dimid = f.AddDim('obs',len(self.Data)) + dim50char = f.AddDim('string_of50chars',50) dim10char = f.AddDim('string_of10chars',10) dimcalcomp = f.AddDim('calendar_components',6) data = self.Data.getvalues('id') savedict = io.std_savedict.copy() - savedict['name'] = "id" + savedict['name'] = "obs_num" savedict['dtype'] = "int" - savedict['long_name'] = "identification number" - savedict['units'] = "NA" + savedict['long_name'] = "Unique_Dataset_observation_index_number" + savedict['units'] = "" savedict['dims'] = dimid savedict['values'] = data.tolist() - savedict['comment'] = "This is a unique identifier for each preprocessed observation used in CarbonTracker" - dummy = f.AddData(savedict) - - data = [ToDectime(d) for d in self.Data.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['name'] = "decimal_date" - savedict['units'] = "years" - savedict['dims'] = dimid - savedict['values'] = data - savedict['missing_value'] = -1.e34 - savedict['_FillValue'] = -1.e34 + savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." dummy = f.AddData(savedict) data = [[d.year,d.month,d.day,d.hour,d.minute,d.second] for d in self.Data.getvalues('xdate') ] @@ -203,7 +198,7 @@ class CtObservations(Observation): savedict = io.std_savedict.copy() savedict['dtype'] = "int" savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date" + savedict['units'] = "integer components of UTC date/time" savedict['dims'] = dimid+dimcalcomp savedict['values'] = data savedict['missing_value'] = -9 @@ -215,7 +210,7 @@ class CtObservations(Observation): data = self.Data.getvalues('lat') savedict = io.std_savedict.copy() - savedict['name'] = "lat" + savedict['name'] = "latitude" savedict['units'] = "degrees_north" savedict['dims'] = dimid savedict['values'] = data.tolist() @@ -226,7 +221,7 @@ class CtObservations(Observation): data = self.Data.getvalues('lon') savedict = io.std_savedict.copy() - savedict['name'] = "lon" + savedict['name'] = "longitude" savedict['units'] = "degrees_east" savedict['dims'] = dimid savedict['values'] = data.tolist() @@ -237,7 +232,7 @@ class CtObservations(Observation): data = self.Data.getvalues('height') savedict = io.std_savedict.copy() - savedict['name'] = "alt" + savedict['name'] = "altitude" savedict['units'] = "meters_above_sea_level" savedict['dims'] = dimid savedict['values'] = data.tolist() @@ -257,47 +252,17 @@ class CtObservations(Observation): savedict['_FillValue'] = -9 dummy = f.AddData(savedict) - try: - - data = self.Data.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "eventnumber" - savedict['units'] = "NOAA database identifier" - savedict['dims'] = dimid+dim30char - savedict['values'] = data - savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) - - - data = self.Data.getvalues('species') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "species" - savedict['units'] = "chemical_species_name" - savedict['dims'] = dimid+dim10char - savedict['values'] = data - savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) + data = self.Data.getvalues('evn') - data = self.Data.getvalues('code') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "site" - savedict['units'] = "site_identifier" - savedict['dims'] = dimid+dim24char - savedict['values'] = data - savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) - - except: - msg = "Character arrays 'species' and 'sites' were not written by the io module" ; logging.warning (msg) + savedict = io.std_savedict.copy() + savedict['dtype'] = "char" + savedict['name'] = "obs_id" + savedict['units'] = "NOAA database identifier" + savedict['dims'] = dimid+dim50char + savedict['values'] = data + savedict['missing_value'] = '-' + savedict['_FillValue'] = '-' + dummy = f.AddData(savedict) dummy = f.close() @@ -368,13 +333,17 @@ class CtObservations(Observation): obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - if SiteInfo.has_key(obs.code): - msg = "Observation found (%s)" % obs.code ; logging.debug(msg) - obs.mdm = SiteInfo[obs.code]['error'] - obs.may_localize = SiteInfo[obs.code]['may_localize'] - obs.may_reject = SiteInfo[obs.code]['may_reject'] + species, site, method, lab, nr = obs.fromfile.split('_') + + identifier = "%s_%02d_%s"%(site,int(lab),method,) + + if SiteInfo.has_key(identifier): + msg = "Observation found (%s, %s)" % (obs.code, identifier,) ; logging.debug(msg) + obs.mdm = SiteInfo[identifier]['error'] + obs.may_localize = SiteInfo[identifier]['may_localize'] + obs.may_reject = SiteInfo[identifier]['may_reject'] else: - msg= "Observation NOT found (%s), please check sites.rc file (%s) !!!" % (obs.code, self.SitesFile,) ; logging.warning(msg) + msg= "Observation NOT found (%s, %s), please check sites.rc file (%s) !!!" % (obs.code, identifier, self.SitesFile,) ; logging.warning(msg) obs.flag = 99 # Add SiteInfo dictionary to the Observation object for future use @@ -396,7 +365,7 @@ class MixingRatioSample(object): """ - def __init__(self,id,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',species='co2',samplingstrategy=1,sdev=0.0): + def __init__(self,id,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',species='co2',samplingstrategy=1,sdev=0.0,fromfile='none.nc'): self.code = code.strip() # Site code self.xdate = xdate # Date of obs @@ -418,6 +387,7 @@ class MixingRatioSample(object): self.mag = not self.masl # Sample is in Meters Above Ground self.species = species.strip() self.samplingstrategy = samplingstrategy + self.fromfile = fromfile # netcdf filename inside ObsPack distribution, to write back later def __str__(self): day=self.xdate.strftime('%Y-%m-%d %H:%M:%S') -- GitLab