Commit 98c72a88 authored by karolina's avatar karolina
Browse files

removed variables as discussed in the first email

made cleanup in tm5_obsoperator
changed functions names - part 1
removed empty Validate function in obs, as well as its call
removed empty Initialize function in dasystem, as well as its call in pipeline (it's still called in some main functions)
parent abd55518
......@@ -75,12 +75,6 @@ class DaSystem(dict):
logging.debug("DA System Info rc-file (%s) loaded successfully" % self.RcFileName)
def Initialize(self):
"""
Initialize the object.
"""
def Validate(self):
"""
Validate the contents of the rc-file given a dictionary of required keys
......
......@@ -9,7 +9,7 @@ Revision History:
File created on 28 Jul 2010.
.. autoclass:: da.baseclasses.obs.Observation
:members: Initialize, Validate, AddObs, AddSimulations, AddModelDataMismatch, WriteSampleInfo
:members: Initialize, Validate, add_observations, add_simulations, add_model_data_mismatch, write_sample_info
.. autoclass:: da.baseclasses.obs.ObservationList
:members: __init__
......@@ -20,7 +20,7 @@ import logging
from numpy import array, ndarray
identifier = 'Observation baseclass'
version = '0.0'
version = '0.0'
################### Begin Class Observation ###################
......@@ -35,23 +35,23 @@ class Observation(object):
Upon initialization of the class, an object is created that holds no actual data, but has a placeholder attribute `self.Data`
which is an empty list of type :class:`~da.baseclasses.obs.ObservationList`. An ObservationList object is created when the
method :meth:`~da.baseclasses.obs.Observation.AddObs` is invoked in the pipeline.
method :meth:`~da.baseclasses.obs.Observation.add_observations` is invoked in the pipeline.
From the list of observations, a file is written by method
:meth:`~da.baseclasses.obs.Observation.WriteSampleInfo`
:meth:`~da.baseclasses.obs.Observation.write_sample_info`
with the sample info needed by the
:class:`~da.baseclasses.observationoperator.ObservationOperator` object. The values returned after sampling
are finally added by :meth:`~da.baseclasses.obs.Observation.AddSimulations`
are finally added by :meth:`~da.baseclasses.obs.Observation.add_simulations`
"""
def __init__(self, DaCycle = None):
def __init__(self, DaCycle=None):
"""
create an object with an identifier, version, and an empty ObservationList
"""
self.Identifier = self.getid()
self.Version = self.getversion()
self.Data = ObservationList([]) # Initialize with an empty list of obs
self.Version = self.getversion()
self.Data = ObservationList([]) # Initialize with an empty list of obs
# The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can
# be added at a later moment.
......@@ -61,7 +61,7 @@ class Observation(object):
else:
self.DaCycle = {}
logging.info('Observation object initialized: %s'% self.Identifier)
logging.info('Observation object initialized: %s' % self.Identifier)
def getid(self):
"""
......@@ -76,19 +76,14 @@ class Observation(object):
return identifier
def __str__(self):
return "This is a %s object, version %s" % (self.Identifier,self.Version)
return "This is a %s object, version %s" % (self.Identifier, self.Version)
def Initialize(self):
""" Perform all steps needed to start working with observational data, this can include moving data, concatenating files,
selecting datasets, etc.
"""
def Validate(self):
""" Make sure that data needed for the ObservationOperator (such as observation input lists, or parameter files)
are present.
"""
def AddObs(self):
def add_observations(self):
"""
Add actual observation data to the Observation object. This is in a form of an
:class:`~da.baseclasses.obs.ObservationList` that is contained in self.Data. The
......@@ -97,17 +92,17 @@ class Observation(object):
"""
def AddSimulations(self):
def add_simulations(self):
""" Add the simulation data to the Observation object.
"""
def AddModelDataMismatch(self ):
def add_model_data_mismatch(self):
"""
Get the model-data mismatch values for this cycle.
"""
def WriteSampleInfo(self ):
def write_sample_info(self):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
"""
......@@ -121,10 +116,10 @@ class Observation(object):
class ObservationList(list):
""" This is a special type of list that holds observed sample objects. It has methods to extract data from such a list easily """
def getvalues(self,name,constructor=array):
def getvalues(self, name, constructor=array):
result = constructor([getattr(o,name) for o in self])
if isinstance(result,ndarray):
result = constructor([getattr(o, name) for o in self])
if isinstance(result, ndarray):
return result.squeeze()
else:
return result
......
......@@ -54,7 +54,7 @@ class ObservationOperator(object):
def getversion(self):
return version
def GetInitialData(self):
def get_initial_data(self):
""" This method places all initial data needed by an ObservationOperator in the proper folder for the model """
def __str__(self):
......
......@@ -45,11 +45,11 @@ class Optimizer(object):
self.nmembers = dims[1]
self.nparams = dims[2]
self.nobs = dims[3]
self.CreateMatrices()
self.create_matrices()
return None
def CreateMatrices(self):
def create_matrices(self):
""" Create Matrix space needed in optimization routine """
import numpy as np
......@@ -86,11 +86,10 @@ class Optimizer(object):
# Kalman Gain matrix
self.KG = np.zeros((self.nlag * self.nparams, self.nobs,), float)
def StateToMatrix(self, StateVector):
def state_to_matrix(self, StateVector):
allsites = [] # collect all obs for n=1,..,nlag
allobs = [] # collect all obs for n=1,..,nlag
allmdm = [] # collect all mdm for n=1,..,nlag
allsamples = [] # collect all model samples for n=1,..,nlag
allids = [] # collect all model samples for n=1,..,nlag
allreject = [] # collect all model samples for n=1,..,nlag
alllocalize = [] # collect all model samples for n=1,..,nlag
......@@ -140,7 +139,7 @@ class Optimizer(object):
for i, mdm in enumerate(allmdm):
self.R[i, i] = mdm ** 2
def MatrixToState(self, StateVector):
def matrix_to_state(self, StateVector):
for n in range(self.nlag):
members = StateVector.EnsembleMembers[n]
for m, mem in enumerate(members):
......@@ -149,7 +148,7 @@ class Optimizer(object):
StateVector.isOptimized = True
logging.debug('Returning optimized data to the StateVector, setting "StateVector.isOptimized = True" ')
def WriteDiagnostics(self, DaCycle, StateVector, type='prior'):
def write_diagnostics(self, DaCycle, StateVector, type='prior'):
"""
Open a NetCDF file and write diagnostic output from optimization process:
......@@ -324,12 +323,9 @@ class Optimizer(object):
return None
def SerialMinimumLeastSquares(self):
def serial_minimum_least_squares(self):
""" Make minimum least squares solution by looping over obs"""
import numpy as np
import numpy.linalg as la
tvalue = 1.97591
for n in range(self.nobs):
......@@ -339,7 +335,6 @@ class Optimizer(object):
logging.debug('Skipping observation (%s,%s) because of flag value %d' % (self.sitecode[n], self.obs_ids[n], self.flags[n]))
continue
# Screen for outliers greather than 3x model-data mismatch, only apply if obs may be rejected
res = self.obs[n] - self.Hx[n]
......@@ -357,10 +352,10 @@ class Optimizer(object):
self.KG[:, n] = PHt / self.HPHR[n, n]
if self.may_localize[n]:
self.Localize(n)
self.localize(n)
logging.debug('Localized observation %s' % self.obs_ids[n])
else:
logging.debug('Not allowed to Localize observation %s' % self.obs_ids[n])
logging.debug('Not allowed to localize observation %s' % self.obs_ids[n])
alpha = np.double(1.0) / (np.double(1.0) + np.sqrt((self.R[n, n]) / self.HPHR[n, n]))
......@@ -385,7 +380,7 @@ class Optimizer(object):
self.HX_prime[m, :] = self.HX_prime[m, :] - alpha * fac * self.HX_prime[n, :]
def BulkMinimumLeastSquares(self):
def bulk_minimum_least_squares(self):
""" Make minimum least squares solution by solving matrix equations"""
import numpy as np
import numpy.linalg as la
......@@ -398,7 +393,7 @@ class Optimizer(object):
self.KG[:, :] = np.dot(HPb, la.inv(self.HPHR)) # K = HP/(HPH+R)
for n in range(self.nobs):
self.Localize(n)
self.localize(n)
self.x[:] = self.x + np.dot(self.KG, self.obs - self.Hx) # xa = xp + K (y-Hx)
......@@ -413,7 +408,6 @@ class Optimizer(object):
Kw = np.dot(part1, part2) # K~
self.X_prime[:, :] = np.dot(I, self.X_prime) - np.dot(Kw, self.HX_prime) # HX' = I - K~ * HX'
P_opt = np.dot(self.X_prime, np.transpose(self.X_prime)) / (self.nmembers - 1)
# Now do the adjustments of the modeled mixing ratios using the linearized ensemble. These are not strictly needed but can be used
# for diagnosis.
......@@ -425,13 +419,13 @@ class Optimizer(object):
logging.info('Minimum Least Squares solution was calculated, returning')
def SetLocalization(self):
def set_localization(self):
""" determine which localization to use """
self.localization = True
self.localizetype = "None"
logging.info("Current localization option is set to %s" % self.localizetype)
def Localize(self, n):
def localize(self, n):
""" localize the Kalman Gain matrix """
pass
......@@ -461,8 +455,8 @@ if __name__ == "__main__":
StateVector = PrepareState(DaCycle)
samples = CtObservations(DaCycle.DaSystem, datetime.datetime(2005, 3, 5))
samples.AddObs()
samples.AddSimulations('/Users/peters/tmp/test_da/output/20050305/samples.000.nc')
samples.add_observations()
samples.add_simulations('/Users/peters/tmp/test_da/output/20050305/samples.000.nc')
nobs = len(samples.Data)
......@@ -472,6 +466,6 @@ if __name__ == "__main__":
nobs,)
opt = CtOptimizer(dims)
opt.StateToMatrix(StateVector)
opt.state_to_matrix(StateVector)
opt.MinimumLeastSquares()
opt.MatrixToState(StateVector)
opt.matrix_to_state(StateVector)
......@@ -92,14 +92,14 @@ class StateVector(object):
2. By creating the data through a set of method calls
Option (1) is invoked using method :meth:`~da.baseclasses.statevector.StateVector.ReadFromFile`.
Option (2) consists of a call to method :meth:`~da.baseclasses.statevector.StateVector.MakeNewEnsemble`
Option (2) consists of a call to method :meth:`~da.baseclasses.statevector.StateVector.make_new_ensemble`
Each option makes use of the same call to :meth:`~da.baseclasses.statevector.StateVector.GetNewMember`, to create a
data container to be filled: the :class:`~da.baseclasses.statevector.EnsembleMember`.
Once the StateVector object has been filled with data, it is used in the pipeline and a few more methods are
invoked from there:
* :meth:`~da.baseclasses.statevector.StateVector.Propagate`, to advance the StateVector from t=t to t=t+1
* :meth:`~da.baseclasses.statevector.StateVector.propagate`, to advance the StateVector from t=t to t=t+1
* :meth:`~da.baseclasses.statevector.StateVector.WriteToFile`, to write the StateVector to a NetCDF file for later use
The methods are described below:
......@@ -107,10 +107,10 @@ class StateVector(object):
.. automethod:: da.baseclasses.statevector.StateVector.Initialize
.. automethod:: da.baseclasses.statevector.StateVector.ReadFromFile
.. automethod:: da.baseclasses.statevector.StateVector.WriteToFile
.. automethod:: da.baseclasses.statevector.StateVector.MakeNewEnsemble
.. automethod:: da.baseclasses.statevector.StateVector.make_new_ensemble
.. automethod:: da.baseclasses.statevector.StateVector.GetNewMember
.. automethod:: da.baseclasses.statevector.StateVector.Propagate
.. automethod:: da.baseclasses.statevector.StateVector.WriteMembersToFile
.. automethod:: da.baseclasses.statevector.StateVector.propagate
.. automethod:: da.baseclasses.statevector.StateVector.write_members_to_file
Finally, the StateVector can be mapped to a gridded array, or to a vector of TransCom regions, using:
......@@ -150,14 +150,12 @@ class StateVector(object):
An example is given below.
"""
dims = (int(self.DaCycle['time.nlag']),
int(self.DaCycle['da.optimizer.nmembers']),
int(self.DaCycle.DaSystem['nparameters']),
)
self.nlag = dims[0]
self.nmembers = dims[1]
self.nparams = dims[2]
self.nlag = int(self.DaCycle['time.nlag'])
self.nmembers = int(self.DaCycle['da.optimizer.nmembers'])
self.nparams = int(self.DaCycle.DaSystem['nparameters'])
self.nobs = 0
self.isOptimized = False
self.ObsToAssimmilate = () # empty containter to hold observations to assimilate later on
......@@ -212,12 +210,12 @@ class StateVector(object):
# Create a mask for species/unknowns
self.MakeSpeciesMask()
self.make_species_mask()
def __str__(self):
return "This is a base class derived state vector object"
def MakeSpeciesMask(self):
def make_species_mask(self):
"""
......@@ -242,7 +240,7 @@ class StateVector(object):
def MakeNewEnsemble(self, lag, covariancematrix=None):
def make_new_ensemble(self, lag, covariancematrix=None):
"""
:param lag: an integer indicating the time step in the lag order
:param covariancematrix: a matrix to draw random values from
......@@ -264,7 +262,7 @@ class StateVector(object):
# Make a cholesky decomposition of the covariance matrix
U, s, Vh = np.linalg.svd(covariancematrix)
_, s, _ = np.linalg.svd(covariancematrix)
dof = np.sum(s) ** 2 / sum(s ** 2)
C = np.linalg.cholesky(covariancematrix)
......@@ -274,30 +272,29 @@ class StateVector(object):
# Create mean values
NewMean = np.ones(self.nparams, float) # standard value for a new time step is 1.0
newmean = np.ones(self.nparams, float) # standard value for a new time step is 1.0
# If this is not the start of the filter, average previous two optimized steps into the mix
if lag == self.nlag and self.nlag >= 3:
NewMean += self.EnsembleMembers[lag - 2][0].ParameterValues + \
newmean += self.EnsembleMembers[lag - 2][0].ParameterValues + \
self.EnsembleMembers[lag - 3][0].ParameterValues
NewMean = NewMean / 3.0
newmean = newmean / 3.0
# Create the first ensemble member with a deviation of 0.0 and add to list
NewMember = self.GetNewMember(0)
NewMember.ParameterValues = NewMean.flatten() # no deviations
self.EnsembleMembers[lag - 1].append(NewMember)
newmember = self.GetNewMember(0)
newmember.ParameterValues = newmean.flatten() # no deviations
self.EnsembleMembers[lag - 1].append(newmember)
# Create members 1:nmembers and add to EnsembleMembers list
for member in range(1, self.nmembers):
randstate = np.random.get_state()
rands = np.random.randn(self.nparams)
NewMember = self.GetNewMember(member)
NewMember.ParameterValues = np.dot(C, rands) + NewMean
self.EnsembleMembers[lag - 1].append(NewMember)
newmember = self.GetNewMember(member)
newmember.ParameterValues = np.dot(C, rands) + newmean
self.EnsembleMembers[lag - 1].append(newmember)
logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, lag))
......@@ -312,7 +309,7 @@ class StateVector(object):
return EnsembleMember(memberno)
def Propagate(self):
def propagate(self):
"""
:rtype: None
......@@ -332,8 +329,8 @@ class StateVector(object):
# And now create a new time step of mean + members for n=nlag
date = self.DaCycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(self.DaCycle['time.cycle']))
cov = self.GetCovariance(date)
self.MakeNewEnsemble(self.nlag, cov)
cov = self.get_covariance(date)
self.make_new_ensemble(self.nlag, cov)
logging.info('The state vector has been propagated by one cycle')
......@@ -431,7 +428,7 @@ class StateVector(object):
logging.info('Successfully read the State Vector from file (%s) ' % filename)
def WriteMembersToFile(self, lag):
def write_members_to_file(self, lag):
"""
:param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag]
:rtype: None
......@@ -443,7 +440,7 @@ class StateVector(object):
This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object.
.. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you
can simply inherit from the StateVector baseclass and overwrite this WriteMembersToFile function.
can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function.
"""
......@@ -651,12 +648,12 @@ if __name__ == "__main__":
)
StateVector = StateVector(dims)
StateVector.MakeNewEnsemble(lag=1)
StateVector.make_new_ensemble(lag=1)
members = StateVector.EnsembleMembers[0]
members[0].WriteToFile(DaCycle['dir.input'])
StateVector.Propagate()
StateVector.propagate()
savedir = DaCycle['dir.output']
filename = os.path.join(savedir, 'savestate.nc')
......
......@@ -55,7 +55,7 @@ class CtObservations(Observation):
self.ObsFilename = filename
self.Data = MixingRatioList([])
def AddObs(self):
def add_observations(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
The CarbonTracker mixing ratio files are provided as one long list of obs for all possible dates. So we can
......@@ -95,7 +95,6 @@ class CtObservations(Observation):
species = ncf.GetVariable('species').take(subselect, axis=0)
species = [s.tostring().lower() for s in species]
species = map(strip, species)
date = ncf.GetVariable('date').take(subselect, axis=0)
strategy = ncf.GetVariable('sampling_strategy').take(subselect, axis=0)
flags = ncf.GetVariable('NOAA_QC_flags').take(subselect, axis=0)
flags = [s.tostring().lower() for s in flags]
......@@ -109,7 +108,7 @@ class CtObservations(Observation):
self.Data.append(MixingRatioSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.ObsFilename))
logging.debug("Added %d observations to the Data list" % len(dates))
def AddSimulations(self, filename, silent=True):
def add_simulations(self, filename, silent=True):
""" Adds model simulated values to the mixing ratio objects """
import da.tools.io4 as io
......@@ -148,7 +147,7 @@ class CtObservations(Observation):
logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))
def WriteSampleInfo(self):
def write_sample_info(self):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
......@@ -162,7 +161,6 @@ class CtObservations(Observation):
dimid = f.AddDim('obs', len(self.Data))
dim200char = f.AddDim('string_of200chars', 200)
dim10char = f.AddDim('string_of10chars', 10)
dimcalcomp = f.AddDim('calendar_components', 6)
if len(self.Data) == 0:
......@@ -254,7 +252,7 @@ class CtObservations(Observation):
return obsinputfile
def AddModelDataMismatch(self):
def add_model_data_mismatch(self):
"""
Get the model-data mismatch values for this cycle.
......@@ -327,7 +325,7 @@ class CtObservations(Observation):
self.SiteInfo = SiteInfo
def WriteObsToFile(self):
def write_obs_to_file(self):
"""
Write selected information contained in the Observation object to a file.
......@@ -341,7 +339,6 @@ class CtObservations(Observation):
dimid = f.AddDim('obs', len(self.Data))
dim200char = f.AddDim('string_of200chars', 200)
dim10char = f.AddDim('string_of10chars', 10)
dimcalcomp = f.AddDim('calendar_components', 6)
if len(self.Data) == 0:
......@@ -525,6 +522,6 @@ if __name__ == "__main__":
obs.Initialize()
obs.Validate()
obs.AddObs()
obs.add_observations()
print(obs.Data.getvalues('obs'))
......@@ -53,7 +53,7 @@ class ObsPackObservations(Observation):
self.Data = MixingRatioList([])
def AddObs(self):
def add_observations(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
The ObsPack mixing ratio files are provided as time series per site with all dates in sequence.
......@@ -118,7 +118,7 @@ class ObsPackObservations(Observation):
logging.info("Observation list now holds %d values" % len(self.Data))
def AddSimulations(self, filename, silent=False):
def add_simulations(self, filename, silent=False):
""" Adds model simulated values to the mixing ratio objects """
import da.tools.io4 as io
......@@ -157,7 +157,7 @@ class ObsPackObservations(Observation):
logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))
def WriteSampleInfo(self):
def write_sample_info(self):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
......@@ -264,7 +264,7 @@ class ObsPackObservations(Observation):
return obsinputfile
def AddModelDataMismatch(self):
def add_model_data_mismatch(self):
"""
Get the model-data mismatch values for this cycle.
......@@ -351,7 +351,7 @@ class ObsPackObservations(Observation):
self.SiteInfo = SiteInfo
def WriteObsToFile(self):
def write_obs_to_file(self):
"""
Write selected information contained in the Observation object to a file.
......@@ -549,8 +549,8 @@ if __name__ == "__main__":
obs.Initialize()
obs.Validate()
obs.AddObs()
obs.AddModelDataMismatch()
obs.add_observations()
obs.add_model_data_mismatch()
print(obs.Data.getvalues('obs'))
print(obs.Data.getvalues('mdm'))
......
......@@ -54,7 +54,7 @@ class ObsPackObservations(Observation):
return None
def AddObs(self):
def add_observations(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file
The ObsPack mixing ratio files are provided as time series per site with all dates in sequence.
......@@ -115,7 +115,7 @@ class ObsPackObservations(Observation):
logging.info("Observation list now holds %d values" % len(self.Data))
def AddSimulations(self, filename, silent=False):
def add_simulations(self, filename, silent=False):
""" Adds model simulated values to the mixing ratio objects """
import da.tools.io4 as io
......@@ -154,7 +154,7 @@ class ObsPackObservations(Observation):
logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples)))
def WriteSampleInfo(self):
def write_sample_info(self):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
......@@ -263,7 +263,7 @@ class ObsPackObservations(Observation):
return obsinputfile
def AddModelDataMismatch(self):
def add_model_data_mismatch(self):
"""
Get the model-data mismatch values for this cycle.
......@@ -360,7 +360,7 @@ class ObsPackObservations(Observation):
logging.debug("Added Model Data Mismatch to all samples ")
def WriteObsToFile(self):
def write_obs_to_file(self):
"""
Write selected information contained in the Observation object to a file.
......@@ -561,8 +561,8 @@ if __name__ == "__main__":
obs.Initialize()
obs.Validate()
obs.AddObs()
obs.AddModelDataMismatch()
obs.add_observations()
obs.add_model_data_mismatch()