Commit e37d6346 authored by Peters, Wouter's avatar Peters, Wouter
Browse files

now a working version, but still needs tweaks based on final ObsPack design

parent da6931b9
......@@ -19,9 +19,9 @@ version = '0.0'
from da.baseclasses.obs import Observation
################### Begin Class CtObservations ###################
################### Begin Class ObsPackObservations ###################
class CtObservations(Observation):
class ObsPackObservations(Observation):
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def getid(self):
......@@ -39,18 +39,15 @@ class CtObservations(Observation):
self.enddate = self.DaCycle['time.sample.end']
DaSystem = self.DaCycle.DaSystem
sfname = DaSystem['obs.input.fname']
op_id = DaSystem['obspack.input.id']
op_dir = DaSystem['obspack.input.dir']
if sfname.endswith('.nc'):
filename = os.path.join(DaSystem['obs.input.dir'],sfname)
else:
filename = os.path.join(DaSystem['obs.input.dir'],sfname+'.'+self.startdate.strftime('%Y%m%d')+'.nc')
if not os.path.exists(filename):
msg = 'Could not find the required observation input file (%s) ' % filename ; logging.error(msg)
if not os.path.exists(op_dir):
msg = 'Could not find the required ObsPack distribution (%s) ' % op_dir ; logging.error(msg)
raise IOError,msg
else:
self.ObsFilename = filename
self.ObsPackDir = op_dir
self.ObsPackId = op_id
self.Data = MixingRatioList([])
......@@ -59,58 +56,68 @@ class CtObservations(Observation):
def AddObs(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
The CarbonTracker mixing ratio files are provided as one long list of obs for all possible dates. So we can
either:
(1) read all, and the subselect the data we will use in the rest of this cycle
(2) Use nco to make a subset of the data
The ObsPack mixing ratio files are provided as time series per site with all dates in sequence.
We will loop over all site files in the ObsPackage, and subset each to our needs
For now, we will stick with option (1)
"""
import da.tools.io4 as io
import datetime as dtm
from string import strip, join
from numpy import array, logical_and
ncf = io.CT_Read(self.ObsFilename,'read')
idates = ncf.GetVariable('date_components')
dates = array([dtm.datetime(*d) for d in idates])
subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]
dates = dates.take(subselect,axis=0)
ids = ncf.GetVariable('id').take(subselect,axis=0)
evn = ncf.GetVariable('eventnumber').take(subselect,axis=0)
evn = [s.tostring().lower() for s in evn]
evn = map(strip,evn)
sites = ncf.GetVariable('site').take(subselect,axis=0)
sites = [s.tostring().lower() for s in sites]
sites = map(strip,sites)
lats = ncf.GetVariable('lat').take(subselect,axis=0)
lons = ncf.GetVariable('lon').take(subselect,axis=0)
alts = ncf.GetVariable('alt').take(subselect,axis=0)
obs = ncf.GetVariable('obs').take(subselect,axis=0)
species = ncf.GetVariable('species').take(subselect,axis=0)
species = [s.tostring().lower() for s in species]
species = map(strip,species)
date = ncf.GetVariable('date').take(subselect,axis=0)
strategy = ncf.GetVariable('sampling_strategy').take(subselect,axis=0)
flags = ncf.GetVariable('NOAA_QC_flags').take(subselect,axis=0)
flags = [s.tostring().lower() for s in flags]
flags = map(strip,flags)
flags = [int(f == '...') for f in flags]
dummy = ncf.close()
# Step 1: Read list of available site files in package
infile = os.path.join(self.ObsPackDir,'summary','%s_dataset_summary.txt'%(self.ObsPackId,))
f = open(infile,'r')
lines = f.readlines()
dummy = f.close()
ncfilelist = []
for line in lines:
if line.startswith('#'): continue # header
ncfile, lab , dist, start_date, stop_date, data_comparison = line.split()
msg = "Successfully read data from obs file (%s)"%self.ObsFilename ; logging.debug(msg)
ncfilelist += [ncfile]
for n in range(len(dates)):
msg = "ObsPack dataset info read, proceeding with %d netcdf files"%(len(ncfilelist),) ; logging.debug(msg)
self.Data.append( MixingRatioSample(ids[n],dates[n],sites[n],obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],evn[n],species[n],strategy[n],0.0) )
for ncfile in ncfilelist:
msg = "Added %d observations to the Data list" % len(dates) ; logging.debug(msg)
infile = os.path.join(self.ObsPackDir,'data','nc',ncfile)
ncf = io.CT_Read(infile,'read')
idates = ncf.GetVariable('time_components')
dates = array([dtm.datetime(*d) for d in idates])
subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]
dates = dates.take(subselect,axis=0)
ids = ncf.GetVariable('obspack_num').take(subselect) # or should we propagate obs_num which is not unique across datasets??
evn = ncf.GetVariable('obspack_id').take(subselect,axis=0)
evn = [s.tostring().lower() for s in evn]
evn = map(strip,evn)
site = ncf.GetAttribute('site_code')
lats = ncf.GetVariable('latitude').take(subselect,axis=0)
lons = ncf.GetVariable('longitude').take(subselect,axis=0)
alts = ncf.GetVariable('altitude').take(subselect,axis=0)
obs = ncf.GetVariable('value').take(subselect,axis=0)
species = ncf.GetAttribute('dataset_parameter')
#strategy = ncf.GetVariable('sampling_strategy').take(subselect,axis=0)
strategy = 1
flags = ncf.GetVariable('qc_flag').take(subselect,axis=0)
flags = [s.tostring().lower() for s in flags]
flags = map(strip,flags)
flags = [int(f == '...') for f in flags]
dummy = ncf.close()
for n in range(len(dates)):
self.Data.append( MixingRatioSample(ids[n],dates[n],site,obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],evn[n],species,strategy,0.0, ncfile) )
msg = "Added %d observations from file (%s) to the Data list" % (len(dates),ncfile,) ; logging.debug(msg)
msg = "Observation list now holds %d values" % (len(self.Data),) ; logging.info(msg)
def AddSimulations(self,filename,silent=True):
""" Adds model simulated values to the mixing ratio objects """
......@@ -124,7 +131,7 @@ class CtObservations(Observation):
raise IOError,msg
ncf = io.CT_Read(filename,method='read')
ids = ncf.GetVariable('id')
ids = ncf.GetVariable('obs_num')
simulated = ncf.GetVariable('flask')
dummy = ncf.close()
msg = "Successfully read data from model sample file (%s)"%filename ; logging.info(msg)
......@@ -169,33 +176,21 @@ class CtObservations(Observation):
f = io.CT_CDF(obsinputfile,method='create')
msg = 'Creating new observations file for ObservationOperator (%s)' % obsinputfile ; logging.debug(msg)
dimid = f.AddDim('id',len(self.Data))
dim30char = f.AddDim('string_of30chars',30)
dim24char = f.AddDim('string_of24chars',24)
dimid = f.AddDim('obs',len(self.Data))
dim50char = f.AddDim('string_of50chars',50)
dim10char = f.AddDim('string_of10chars',10)
dimcalcomp = f.AddDim('calendar_components',6)
data = self.Data.getvalues('id')
savedict = io.std_savedict.copy()
savedict['name'] = "id"
savedict['name'] = "obs_num"
savedict['dtype'] = "int"
savedict['long_name'] = "identification number"
savedict['units'] = "NA"
savedict['long_name'] = "Unique_Dataset_observation_index_number"
savedict['units'] = ""
savedict['dims'] = dimid
savedict['values'] = data.tolist()
savedict['comment'] = "This is a unique identifier for each preprocessed observation used in CarbonTracker"
dummy = f.AddData(savedict)
data = [ToDectime(d) for d in self.Data.getvalues('xdate') ]
savedict = io.std_savedict.copy()
savedict['name'] = "decimal_date"
savedict['units'] = "years"
savedict['dims'] = dimid
savedict['values'] = data
savedict['missing_value'] = -1.e34
savedict['_FillValue'] = -1.e34
savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
dummy = f.AddData(savedict)
data = [[d.year,d.month,d.day,d.hour,d.minute,d.second] for d in self.Data.getvalues('xdate') ]
......@@ -203,7 +198,7 @@ class CtObservations(Observation):
savedict = io.std_savedict.copy()
savedict['dtype'] = "int"
savedict['name'] = "date_components"
savedict['units'] = "integer components of UTC date"
savedict['units'] = "integer components of UTC date/time"
savedict['dims'] = dimid+dimcalcomp
savedict['values'] = data
savedict['missing_value'] = -9
......@@ -215,7 +210,7 @@ class CtObservations(Observation):
data = self.Data.getvalues('lat')
savedict = io.std_savedict.copy()
savedict['name'] = "lat"
savedict['name'] = "latitude"
savedict['units'] = "degrees_north"
savedict['dims'] = dimid
savedict['values'] = data.tolist()
......@@ -226,7 +221,7 @@ class CtObservations(Observation):
data = self.Data.getvalues('lon')
savedict = io.std_savedict.copy()
savedict['name'] = "lon"
savedict['name'] = "longitude"
savedict['units'] = "degrees_east"
savedict['dims'] = dimid
savedict['values'] = data.tolist()
......@@ -237,7 +232,7 @@ class CtObservations(Observation):
data = self.Data.getvalues('height')
savedict = io.std_savedict.copy()
savedict['name'] = "alt"
savedict['name'] = "altitude"
savedict['units'] = "meters_above_sea_level"
savedict['dims'] = dimid
savedict['values'] = data.tolist()
......@@ -257,47 +252,17 @@ class CtObservations(Observation):
savedict['_FillValue'] = -9
dummy = f.AddData(savedict)
try:
data = self.Data.getvalues('evn')
savedict = io.std_savedict.copy()
savedict['dtype'] = "char"
savedict['name'] = "eventnumber"
savedict['units'] = "NOAA database identifier"
savedict['dims'] = dimid+dim30char
savedict['values'] = data
savedict['missing_value'] = '-'
savedict['_FillValue'] = '-'
dummy = f.AddData(savedict)
data = self.Data.getvalues('species')
savedict = io.std_savedict.copy()
savedict['dtype'] = "char"
savedict['name'] = "species"
savedict['units'] = "chemical_species_name"
savedict['dims'] = dimid+dim10char
savedict['values'] = data
savedict['missing_value'] = '-'
savedict['_FillValue'] = '-'
dummy = f.AddData(savedict)
data = self.Data.getvalues('evn')
data = self.Data.getvalues('code')
savedict = io.std_savedict.copy()
savedict['dtype'] = "char"
savedict['name'] = "site"
savedict['units'] = "site_identifier"
savedict['dims'] = dimid+dim24char
savedict['values'] = data
savedict['missing_value'] = '-'
savedict['_FillValue'] = '-'
dummy = f.AddData(savedict)
except:
msg = "Character arrays 'species' and 'sites' were not written by the io module" ; logging.warning (msg)
savedict = io.std_savedict.copy()
savedict['dtype'] = "char"
savedict['name'] = "obs_id"
savedict['units'] = "NOAA database identifier"
savedict['dims'] = dimid+dim50char
savedict['values'] = data
savedict['missing_value'] = '-'
savedict['_FillValue'] = '-'
dummy = f.AddData(savedict)
dummy = f.close()
......@@ -368,13 +333,17 @@ class CtObservations(Observation):
obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script
if SiteInfo.has_key(obs.code):
msg = "Observation found (%s)" % obs.code ; logging.debug(msg)
obs.mdm = SiteInfo[obs.code]['error']
obs.may_localize = SiteInfo[obs.code]['may_localize']
obs.may_reject = SiteInfo[obs.code]['may_reject']
species, site, method, lab, nr = obs.fromfile.split('_')
identifier = "%s_%02d_%s"%(site,int(lab),method,)
if SiteInfo.has_key(identifier):
msg = "Observation found (%s, %s)" % (obs.code, identifier,) ; logging.debug(msg)
obs.mdm = SiteInfo[identifier]['error']
obs.may_localize = SiteInfo[identifier]['may_localize']
obs.may_reject = SiteInfo[identifier]['may_reject']
else:
msg= "Observation NOT found (%s), please check sites.rc file (%s) !!!" % (obs.code, self.SitesFile,) ; logging.warning(msg)
msg= "Observation NOT found (%s, %s), please check sites.rc file (%s) !!!" % (obs.code, identifier, self.SitesFile,) ; logging.warning(msg)
obs.flag = 99
# Add SiteInfo dictionary to the Observation object for future use
......@@ -396,7 +365,7 @@ class MixingRatioSample(object):
"""
def __init__(self,id,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',species='co2',samplingstrategy=1,sdev=0.0):
def __init__(self,id,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',species='co2',samplingstrategy=1,sdev=0.0,fromfile='none.nc'):
self.code = code.strip() # Site code
self.xdate = xdate # Date of obs
......@@ -418,6 +387,7 @@ class MixingRatioSample(object):
self.mag = not self.masl # Sample is in Meters Above Ground
self.species = species.strip()
self.samplingstrategy = samplingstrategy
self.fromfile = fromfile # netcdf filename inside ObsPack distribution, to write back later
def __str__(self):
day=self.xdate.strftime('%Y-%m-%d %H:%M:%S')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment