Commit 9a038509 authored by karolina's avatar karolina
Browse files

a lot of minor / formatting changes

performed actions discussed in email #2
removed unused __str__ functions
a bit of ordering of the pipeline.py
parent 8726d7a6
......@@ -54,14 +54,6 @@ class DaSystem(dict):
logging.debug("Data Assimilation System initialized: %s" % self.Identifier)
def __str__(self):
msg = "===============================================================\n"
msg += "DA System Info rc-file is %s\n" % self.RcFileName
msg += "==============================================================="
return msg
def LoadRc(self, RcFileName):
"""
This method loads a DA System Info rc-file with settings for this simulation
......
......@@ -51,7 +51,7 @@ class Observation(object):
"""
self.Identifier = identifier
self.Version = version
self.Data = ObservationList([]) # Initialize with an empty list of obs
self.datalist = [] # Initialize with an empty list of obs
# The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can
# be added at a later moment.
......@@ -63,8 +63,8 @@ class Observation(object):
logging.info('Observation object initialized: %s' % self.Identifier)
def __str__(self):
return "This is a %s object, version %s" % (self.Identifier, self.Version)
def getlength(self):
return len(self.datalist)
def Initialize(self):
""" Perform all steps needed to start working with observational data, this can include moving data, concatenating files,
......@@ -93,23 +93,15 @@ class Observation(object):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
"""
################### End Class Observations ###################
################### Begin Class ObservationList ###################
class ObservationList(list):
""" This is a special type of list that holds observed sample objects. It has methods to extract data from such a list easily """
def getvalues(self, name, constructor=array):
result = constructor([getattr(o, name) for o in self])
result = constructor([getattr(o, name) for o in self.datalist])
if isinstance(result, ndarray):
return result.squeeze()
else:
return result
################### End Class ObservationList ###################
################### End Class Observations ###################
......@@ -52,9 +52,6 @@ class ObservationOperator(object):
def get_initial_data(self):
""" This method places all initial data needed by an ObservationOperator in the proper folder for the model """
def __str__(self):
return "This is a %s object, version %s" % (self.Identifier, self.Version)
def Initialize(self):
""" Perform all steps necessary to start the observation operator through a simple Run() call """
......
......@@ -160,7 +160,7 @@ class Optimizer(object):
outdir = DaCycle['dir.diagnostics']
filename = os.path.join(outdir, 'optimizer.%s.nc' % DaCycle['time.start'].strftime('%Y%m%d'))
DaCycle.OutputFileList += (filename,)
DaCycle.OutputFileList.append(filename)
# Open or create file
......@@ -182,139 +182,117 @@ class Optimizer(object):
# Add data, first the ones that are written both before and after the optimization
data = self.x
savedict = io.std_savedict.copy()
savedict['name'] = "statevectormean_%s" % type
savedict['long_name'] = "full_statevector_mean_%s" % type
savedict['units'] = "unitless"
savedict['dims'] = dimstate
savedict['values'] = data.tolist()
savedict['values'] = self.x.tolist()
savedict['comment'] = 'Full %s state vector mean ' % type
f.AddData(savedict)
data = self.X_prime
savedict = io.std_savedict.copy()
savedict['name'] = "statevectordeviations_%s" % type
savedict['long_name'] = "full_statevector_deviations_%s" % type
savedict['units'] = "unitless"
savedict['dims'] = dimstate + dimmembers
savedict['values'] = data.tolist()
savedict['values'] = self.X_prime.tolist()
savedict['comment'] = 'Full state vector %s deviations as resulting from the optimizer' % type
f.AddData(savedict)
data = self.Hx
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesmean_%s" % type
savedict['long_name'] = "modelsamplesforecastmean_%s" % type
savedict['units'] = "mol mol-1"
savedict['dims'] = dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.Hx.tolist()
savedict['comment'] = '%s mean mixing ratios based on %s state vector' % (type, type,)
f.AddData(savedict)
data = self.HX_prime
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesdeviations_%s" % type
savedict['long_name'] = "modelsamplesforecastdeviations_%s" % type
savedict['units'] = "mol mol-1"
savedict['dims'] = dimobs + dimmembers
savedict['values'] = data.tolist()
savedict['values'] = self.HX_prime.tolist()
savedict['comment'] = '%s mixing ratio deviations based on %s state vector' % (type, type,)
f.AddData(savedict)
# Continue with prior only data
if type == 'prior':
data = self.sitecode
savedict = io.std_savedict.copy()
savedict['name'] = "sitecode"
savedict['long_name'] = "site code propagated from observation file"
savedict['dtype'] = "char"
savedict['dims'] = dimobs + dim200char
savedict['values'] = data
savedict['values'] = self.sitecode
savedict['missing_value'] = '!'
f.AddData(savedict)
data = self.obs
savedict = io.std_savedict.copy()
savedict['name'] = "observed"
savedict['long_name'] = "observedvalues"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.obs.tolist()
savedict['comment'] = 'Observations used in optimization'
f.AddData(savedict)
data = self.obs_ids
savedict = io.std_savedict.copy()
savedict['name'] = "obspack_num"
savedict['dtype'] = "int64"
savedict['long_name'] = "Unique_ObsPack_observation_number"
savedict['units'] = ""
savedict['dims'] = dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.obs_ids.tolist()
savedict['comment'] = 'Unique observation number across the entire ObsPack distribution'
f.AddData(savedict)
data = self.R
savedict = io.std_savedict.copy()
savedict['name'] = "modeldatamismatchvariance"
savedict['long_name'] = "modeldatamismatch variance"
savedict['units'] = "[mol mol-1]^2"
savedict['dims'] = dimobs + dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.R.tolist()
savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch'
f.AddData(savedict)
# Continue with posterior only data
elif type == 'optimized':
data = self.HPHR
savedict = io.std_savedict.copy()
savedict['name'] = "totalmolefractionvariance"
savedict['long_name'] = "totalmolefractionvariance"
savedict['units'] = "[mol mol-1]^2"
savedict['dims'] = dimobs + dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.HPHR.tolist()
savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch'
f.AddData(savedict)
data = self.flags
savedict = io.std_savedict.copy()
savedict['name'] = "flag"
savedict['long_name'] = "flag_for_obs_model"
savedict['units'] = "None"
savedict['dims'] = dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.flags.tolist()
savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled'
f.AddData(savedict)
data = self.KG
savedict = io.std_savedict.copy()
savedict['name'] = "kalmangainmatrix"
savedict['long_name'] = "kalmangainmatrix"
savedict['units'] = "unitless molefraction-1"
savedict['dims'] = dimstate + dimobs
savedict['values'] = data.tolist()
savedict['values'] = self.KG.tolist()
savedict['comment'] = 'Kalman gain matrix of all obs and state vector elements'
#dummy = f.AddData(savedict)
f.close()
logging.debug('Diagnostics file closed')
return None
def serial_minimum_least_squares(self):
""" Make minimum least squares solution by looping over obs"""
......@@ -432,7 +410,6 @@ if __name__ == "__main__":
from da.tools.initexit import StartLogger
from da.tools.initexit import CycleControl
from da.ct.statevector import CtStateVector, PrepareState
from da.ct.obs import CtObservations
import numpy as np
......@@ -445,7 +422,6 @@ if __name__ == "__main__":
DaCycle.Initialize()
print DaCycle
StateVector = PrepareState(DaCycle)
samples = CtObservations(DaCycle.DaSystem, datetime.datetime(2005, 3, 5))
samples.add_observations()
......@@ -458,7 +434,7 @@ if __name__ == "__main__":
int(DaCycle.DaSystem['nparameters']),
nobs,)
opt = CtOptimizer(dims)
opt.state_to_matrix(StateVector)
opt.MinimumLeastSquares()
opt.matrix_to_state(StateVector)
# opt = CtOptimizer(dims)
# opt.state_to_matrix(StateVector)
# opt.MinimumLeastSquares()
# opt.matrix_to_state(StateVector)
......@@ -44,10 +44,6 @@ class PlatForm(object):
logging.debug('%s object initialized' % self.Identifier)
logging.debug('%s version: %s' % (self.Identifier, self.Version))
def __str__(self):
return self.Version
def ReturnBlockingFlag(self):
return ""
......
......@@ -63,9 +63,6 @@ class EnsembleMember(object):
self.membernumber = membernumber # the member number
self.ParameterValues = None # Parameter values of this member
def __str__(self):
return "%03d" % self.membernumber
################### End Class EnsembleMember ###################
################### Begin Class StateVector ###################
......@@ -94,9 +91,6 @@ class StateVector(object):
Option (1) is invoked using method :meth:`~da.baseclasses.statevector.StateVector.ReadFromFile`.
Option (2) consists of a call to method :meth:`~da.baseclasses.statevector.StateVector.make_new_ensemble`
Each option makes use of the same call to :meth:`~da.baseclasses.statevector.StateVector.GetNewMember`, to create a
data container to be filled: the :class:`~da.baseclasses.statevector.EnsembleMember`.
Once the StateVector object has been filled with data, it is used in the pipeline and a few more methods are
invoked from there:
* :meth:`~da.baseclasses.statevector.StateVector.propagate`, to advance the StateVector from t=t to t=t+1
......@@ -108,7 +102,6 @@ class StateVector(object):
.. automethod:: da.baseclasses.statevector.StateVector.ReadFromFile
.. automethod:: da.baseclasses.statevector.StateVector.WriteToFile
.. automethod:: da.baseclasses.statevector.StateVector.make_new_ensemble
.. automethod:: da.baseclasses.statevector.StateVector.GetNewMember
.. automethod:: da.baseclasses.statevector.StateVector.propagate
.. automethod:: da.baseclasses.statevector.StateVector.write_members_to_file
......@@ -206,9 +199,6 @@ class StateVector(object):
self.make_species_mask()
def __str__(self):
return "This is a base class derived state vector object"
def make_species_mask(self):
"""
......@@ -233,7 +223,6 @@ class StateVector(object):
logging.debug(" -> %s" % k)
def make_new_ensemble(self, lag, covariancematrix=None):
"""
:param lag: an integer indicating the time step in the lag order
......@@ -277,7 +266,7 @@ class StateVector(object):
# Create the first ensemble member with a deviation of 0.0 and add to list
newmember = self.GetNewMember(0)
newmember = EnsembleMember(0)
newmember.ParameterValues = newmean.flatten() # no deviations
self.EnsembleMembers[lag - 1].append(newmember)
......@@ -286,22 +275,12 @@ class StateVector(object):
for member in range(1, self.nmembers):
rands = np.random.randn(self.nparams)
newmember = self.GetNewMember(member)
newmember = EnsembleMember(member)
newmember.ParameterValues = np.dot(C, rands) + newmean
self.EnsembleMembers[lag - 1].append(newmember)
logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, lag))
def GetNewMember(self, memberno):
"""
:param memberno: an integer indicating the ensemble member number
:rtype: an empty ensemblemember object
Return an ensemblemember object
"""
return EnsembleMember(memberno)
def propagate(self):
"""
......@@ -416,7 +395,7 @@ class StateVector(object):
logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1))
for m in range(self.nmembers):
NewMember = self.GetNewMember(m)
NewMember = EnsembleMember(m)
NewMember.ParameterValues = EnsembleMembers[n, m, :].flatten() + MeanState[n] # add the mean to the deviations to hold the full parameter values
self.EnsembleMembers[n].append(NewMember)
......
......@@ -25,21 +25,16 @@ from da.baseclasses.obs import Observation
class CtObservations(Observation):
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def getlength(self):
return len(self.Data)
def Initialize(self):
self.startdate = self.DaCycle['time.sample.start']
self.enddate = self.DaCycle['time.sample.end']
DaSystem = self.DaCycle.DaSystem
sfname = DaSystem['obs.input.fname']
sfname = self.DaCycle.DaSystem['obs.input.fname']
if sfname.endswith('.nc'):
filename = os.path.join(DaSystem['obs.input.dir'], sfname)
filename = os.path.join(self.DaCycle.DaSystem['obs.input.dir'], sfname)
else:
filename = os.path.join(DaSystem['obs.input.dir'], sfname + '.' + self.startdate.strftime('%Y%m%d') + '.nc')
filename = os.path.join(self.DaCycle.DaSystem['obs.input.dir'], sfname + '.' + self.startdate.strftime('%Y%m%d') + '.nc')
if not os.path.exists(filename):
msg = 'Could not find the required observation input file (%s) ' % filename
......@@ -47,7 +42,7 @@ class CtObservations(Observation):
raise IOError, msg
else:
self.ObsFilename = filename
self.Data = MixingRatioList([])
self.datalist = []
def add_observations(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
......@@ -99,7 +94,7 @@ class CtObservations(Observation):
logging.debug("Successfully read data from obs file (%s)" % self.ObsFilename)
for n in range(len(dates)):
self.Data.append(MixingRatioSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.ObsFilename))
self.datalist.append(MixingRatioSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.ObsFilename))
logging.debug("Added %d observations to the Data list" % len(dates))
def add_simulations(self, filename, silent=True):
......@@ -120,7 +115,7 @@ class CtObservations(Observation):
ncf.close()
logging.info("Successfully read data from model sample file (%s)" % filename)
obs_ids = self.Data.getvalues('id')
obs_ids = self.getvalues('id')
obs_ids = obs_ids.tolist()
ids = map(int, ids)
......@@ -131,7 +126,7 @@ class CtObservations(Observation):
if idx in obs_ids:
index = obs_ids.index(idx)
#print id,val,val.shape
self.Data[index].simulated = val
self.datalist[index].simulated = val
else:
missing_samples.append(idx)
......@@ -153,15 +148,15 @@ class CtObservations(Observation):
f = io.CT_CDF(obsinputfile, method='create')
logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile)
dimid = f.AddDim('obs', len(self.Data))
dimid = f.AddDim('obs', len(self.datalist))
dim200char = f.AddDim('string_of200chars', 200)
dimcalcomp = f.AddDim('calendar_components', 6)
if len(self.Data) == 0:
if len(self.datalist) == 0:
f.close()
return obsinputfile
data = self.Data.getvalues('id')
data = self.getvalues('id')
savedict = io.std_savedict.copy()
savedict['name'] = "obs_num"
......@@ -173,7 +168,7 @@ class CtObservations(Observation):
savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
f.AddData(savedict)
data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.Data.getvalues('xdate') ]
data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ]
savedict = io.std_savedict.copy()
savedict['dtype'] = "int"
......@@ -186,7 +181,7 @@ class CtObservations(Observation):
savedict['order'] = "year, month, day, hour, minute, second"
f.AddData(savedict)
data = self.Data.getvalues('lat')
data = self.getvalues('lat')
savedict = io.std_savedict.copy()
savedict['name'] = "latitude"
......@@ -196,7 +191,7 @@ class CtObservations(Observation):
savedict['missing_value'] = -999.9
f.AddData(savedict)
data = self.Data.getvalues('lon')
data = self.getvalues('lon')
savedict = io.std_savedict.copy()
savedict['name'] = "longitude"
......@@ -206,7 +201,7 @@ class CtObservations(Observation):
savedict['missing_value'] = -999.9
f.AddData(savedict)
data = self.Data.getvalues('height')
data = self.getvalues('height')
savedict = io.std_savedict.copy()
savedict['name'] = "altitude"
......@@ -216,7 +211,7 @@ class CtObservations(Observation):
savedict['missing_value'] = -999.9
f.AddData(savedict)
data = self.Data.getvalues('samplingstrategy')
data = self.getvalues('samplingstrategy')
savedict = io.std_savedict.copy()
savedict['dtype'] = "int"
......@@ -227,7 +222,7 @@ class CtObservations(Observation):
savedict['missing_value'] = -9
f.AddData(savedict)
data = self.Data.getvalues('evn')
data = self.getvalues('evn')
savedict = io.std_savedict.copy()
savedict['dtype'] = "char"
......@@ -275,11 +270,11 @@ class CtObservations(Observation):
self.n_sites_active = int(SitesWeights['n.sites.active'])
self.n_sites_moved = int(SitesWeights['n.sites.moved'])
logging.debug('Model-data mismatch rejection threshold: %d ' % (self.rejection_threshold))
logging.debug('Model-data mismatch scaling factor : %f ' % (self.global_R_scaling))
logging.debug('Model-data mismatch site categories : %d ' % (self.n_site_categories))
logging.debug('Model-data mismatch active sites : %d ' % (self.n_sites_active))
logging.debug('Model-data mismatch moved sites : %d ' % (self.n_sites_moved))
logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold)
logging.debug('Model-data mismatch scaling factor : %f ' % self.global_R_scaling)
logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories)
logging.debug('Model-data mismatch active sites : %d ' % self.n_sites_active)
logging.debug('Model-data mismatch moved sites : %d ' % self.n_sites_moved)
cats = [k for k in SitesWeights.keys() if 'site.category' in k]
......@@ -304,7 +299,7 @@ class CtObservations(Observation):
SiteInfo[sitename] = SiteCategories[sitecategory]
#print sitename,SiteInfo[sitename]
for obs in self.Data:
for obs in self.datalist:
obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script
if SiteInfo.has_key(obs.code):
logging.debug("Observation found (%s)" % obs.code)
......@@ -331,15 +326,15 @@ class CtObservations(Observation):
f = io.CT_CDF(outfile, method='create')
logging.debug('Creating new Sample output file for postprocessing (%s)' % outfile)
dimid = f.AddDim('obs', len(self.Data))
dimid = f.AddDim('obs', len(self.datalist))
dim200char = f.AddDim('string_of200chars', 200)
dimcalcomp = f.AddDim('calendar_components', 6)
if len(self.Data) == 0:
if len(self.datalist) == 0:
f.close()
return outfile
data = self.Data.getvalues('id')
data = self.getvalues('id')
savedict = io.std_savedict.copy()
savedict['name'] = "obs_num"
......@@ -351,7 +346,7 @@ class CtObservations(Observation):
savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED."
f.AddData(savedict)
data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.Data.getvalues('xdate') ]
data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ]
savedict = io.std_savedict.copy()
savedict['dtype'] = "int"
......@@ -364,7 +359,7 @@ class CtObservations(Observation):
savedict['order'] = "year, month, day, hour, minute, second"
f.AddData(savedict)
data = self.Data.getvalues('obs')
data = self.getvalues('obs')
savedict = io.std_savedict.copy()
savedict['name'] = "observed"
......@@ -375,7 +370,7 @@ class CtObservations(Observation):
savedict['comment'] = 'Observations used in optimization'
f.AddData(savedict)
data = self.Data.getvalues('mdm')
data = self.getvalues('mdm')
savedict = io.std_savedict.copy()
savedict['name'] = "modeldatamismatch"
......@@ -386,7 +381,7 @@ class CtObservations(Observation):
savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch'
f.AddData(savedict)
data = self.Data.getvalues('simulated')
data = self.getvalues('simulated')
dimmembers = f.AddDim('members', data.shape[1])
......@@ -399,7 +394,7 @@ class CtObservations(Observation):
savedict['comment'] = 'simulated mixing ratios based on optimized state vector'
f.AddData(savedict)
data = self.Data.getvalues('fromfile')
data = self.getvalues('fromfile')
savedict = io.std_savedict.copy()
savedict['name'] = "inputfilename"
......@@ -410,7 +405,7 @@ class CtObservations(Observation):
savedict['missing_value'] = '!'
f.AddData(savedict)
data = self.Data.getvalues('code')
data = self.getvalues('code')
savedict = io.std_savedict.copy()
savedict['name'] = "sitecode"
......@@ -441,72 +436,36 @@ class MixingRatioSample(object):
objects for specific projects.
"""
def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'):
self.code = code.strip() # Site code
self.xdate = xdate # Date of obs
self.obs = obs # Value observed
self.simulated = simulated # Value simulated by model
self.resid = resid # Mixing ratio residuals
self.hphr = hphr # Mixing ratio prior uncertainty from fluxes and (HPH) and model data mismatch (R)
self.mdm = mdm # Model data mismatch
self.may_localize = True # Whether sample may be localized in optimizer
self.may_reject = True # Whether sample may be rejected if outside threshold
self.flag = flag # Flag
self.height = height # Sample height
self.lat = lat # Sample lat
self.lon = lon # Sample lon
self.id = idx # ID number
self.evn = evn # Event number
self.sdev = sdev # standard deviation of ensemble
self.masl = True # Sample is in Meters Above Sea Level
self.mag = not self.masl # Sample is in Meters Above Ground
self.species = species.strip()
self.samplingstrategy = samplingstrategy
self.fromfile = fromfile # netcdf filename inside observation distribution, to write back later