Commit b0b1a255 authored by karolina's avatar karolina
Browse files

got rid of the getid and getversion functions (replaced with direct references...

got rid of the getid and getversion functions (replaced with direct references to identifiers when used)
did a cleanup to dagridded/statevector
parent 98c72a88
......@@ -49,8 +49,8 @@ class Observation(object):
"""
create an object with an identifier, version, and an empty ObservationList
"""
self.Identifier = self.getid()
self.Version = self.getversion()
self.Identifier = identifier
self.Version = version
self.Data = ObservationList([]) # Initialize with an empty list of obs
# The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can
......@@ -63,18 +63,6 @@ class Observation(object):
logging.info('Observation object initialized: %s' % self.Identifier)
def getid(self):
"""
Return the identifier of the Observation Object, used for logging purposed
"""
return identifier
def getversion(self):
"""
Return the version of the Observation Object, used for logging purposed
"""
return identifier
def __str__(self):
return "This is a %s object, version %s" % (self.Identifier, self.Version)
......@@ -99,7 +87,6 @@ class Observation(object):
def add_model_data_mismatch(self):
"""
Get the model-data mismatch values for this cycle.
"""
def write_sample_info(self):
......
......@@ -30,8 +30,8 @@ class ObservationOperator(object):
def __init__(self, RcFileName, DaCycle=None):
""" The instance of an ObservationOperator is application dependent """
self.Identifier = self.getid()
self.Version = self.getversion()
self.Identifier = identifier
self.Version = version
self.RestartFileList = []
self.outputdir = None # Needed for opening the samples.nc files created
......@@ -48,11 +48,6 @@ class ObservationOperator(object):
else:
self.DaCycle = {}
def getid(self):
return identifier
def getversion(self):
return version
def get_initial_data(self):
""" This method places all initial data needed by an ObservationOperator in the proper folder for the model """
......
......@@ -29,16 +29,11 @@ class Optimizer(object):
"""
def __init__(self):
self.Identifier = self.getid()
self.Version = self.getversion()
self.Identifier = identifier
self.Version = version
logging.info('Optimizer object initialized: %s' % self.Identifier)
def getid(self):
return identifier
def getversion(self):
return version
def Initialize(self, dims):
self.nlag = dims[0]
......
......@@ -122,8 +122,8 @@ class StateVector(object):
"""
def __init__(self, DaCycle=None):
self.Identifier = self.getid()
self.Version = self.getversion()
self.Identifier = identifier
self.Version = version
# The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can
# be added at a later moment.
......@@ -134,12 +134,6 @@ class StateVector(object):
self.DaCycle = {}
logging.info('Statevector object initialized: %s' % self.Identifier)
def getid(self):
return identifier
def getversion(self):
return version
def Initialize(self):
"""
Initialize the object by specifying the dimensions.
......
......@@ -25,12 +25,6 @@ from da.baseclasses.obs import Observation
class CtObservations(Observation):
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def getid(self):
return identifier
def getversion(self):
return version
def getlength(self):
return len(self.Data)
......
......@@ -25,12 +25,6 @@ from da.baseclasses.obs import Observation
class ObsPackObservations(Observation):
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def getid(self):
return identifier
def getversion(self):
return version
def getlength(self):
return len(self.Data)
......
......@@ -24,12 +24,6 @@ from da.baseclasses.obs import Observation
class ObsPackObservations(Observation):
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def getid(self):
return identifier
def getversion(self):
return version
def getlength(self):
return len(self.Data)
......
......@@ -28,11 +28,6 @@ class CtOptimizer(Optimizer):
All other methods are inherited from the base class Optimizer.
"""
def getid(self):
return identifier
def getversion(self):
return version
def set_localization(self, loctype='None'):
""" determine which localization to use """
......
......@@ -25,12 +25,6 @@ version = '0.0'
class CtStateVector(StateVector):
""" This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """
def getid(self):
return identifier
def getversion(self):
return version
def get_covariance(self, date):
""" Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector.
Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
......
......@@ -21,20 +21,14 @@ from da.baseclasses.statevector import EnsembleMember, StateVector
import numpy as np
identifier = 'CarbonTracker Gridded Statevector '
version = '0.0'
version = '0.0'
################### Begin Class CtStateVector ###################
class CtGriddedStateVector(StateVector):
""" This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """
def getid(self):
return identifier
def getversion(self):
return version
def get_covariance(self,date):
def get_covariance(self, date):
""" Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector.
Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
......@@ -50,51 +44,48 @@ class CtGriddedStateVector(StateVector):
file_ocn_cov = self.DaCycle.DaSystem['ocn.covariance']
cov_files = os.listdir(self.DaCycle.DaSystem['bio.cov.dir'])
cov_files = os.listdir(self.DaCycle.DaSystem['bio.cov.dir'])
cov_files = [os.path.join(self.DaCycle.DaSystem['bio.cov.dir'],f) for f in cov_files if self.DaCycle.DaSystem['bio.cov.prefix'] in f]
cov_files = [os.path.join(self.DaCycle.DaSystem['bio.cov.dir'], f) for f in cov_files if self.DaCycle.DaSystem['bio.cov.prefix'] in f]
msg = "Found %d covariances to use for biosphere" % len(cov_files) ; logging.debug(msg)
logging.debug("Found %d covariances to use for biosphere" % len(cov_files))
# replace YYYY.MM in the ocean covariance file string
file_ocn_cov = file_ocn_cov.replace('2000.01',date.strftime('%Y.%m'))
file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))
cov_files.append(file_ocn_cov)
covariancematrixlist = []
for file in cov_files:
if not os.path.exists(file):
msg = "Cannot find the specified file %s" % file ; logging.error(msg)
raise IOError,msg
msg = "Cannot find the specified file %s" % file
logging.error(msg)
raise IOError, msg
else:
logging.debug("Using covariance file: %s" % file)
msg = "Using covariance file: %s" % file ; logging.debug(msg)
f = io.CT_Read(file,'read')
f = io.CT_Read(file, 'read')
if 'pco2' in file:
cov_ocn = f.GetVariable('CORMAT')
cov = cov_ocn
cov_ocn = f.GetVariable('CORMAT')
cov = cov_ocn
else:
cov = f.GetVariable('covariance')
cov = f.GetVariable('covariance')
#cov_sf = 10.0/np.sqrt(cov.diagonal().sum()) # this scaling factor makes the total variance close to the value of a single ecoregion
cov_sf = 20.0/np.sqrt(cov.diagonal().sum()) # this scaling factor makes the total variance close to the value of a single ecoregion
cov = cov*cov_sf
dummy = f.close()
cov_sf = 20.0 / np.sqrt(cov.diagonal().sum()) # this scaling factor makes the total variance close to the value of a single ecoregion
cov = cov * cov_sf
f.close()
covariancematrixlist.append(cov)
dummy = logging.debug("Succesfully closed files after retrieving prior covariance matrices")
logging.debug("Succesfully closed files after retrieving prior covariance matrices")
# Once we have the matrices, we can start to make the full covariance matrix, and then decompose it
return covariancematrixlist
def make_new_ensemble(self,lag, covariancematrixlist = [None]):
def make_new_ensemble(self, lag, covariancematrixlist=[None]):
"""
:param lag: an integer indicating the time step in the lag order
:param covariancematrix: a list of matrices specifying the covariance distribution to draw from
......@@ -114,8 +105,8 @@ class CtGriddedStateVector(StateVector):
except:
pass
if not isinstance(covariancematrixlist,list):
msg = "The covariance matrix or matrices must be passed as a list of array objects, exiting..." ; logging.error(msg)
if not isinstance(covariancematrixlist, list):
logging.error("The covariance matrix or matrices must be passed as a list of array objects, exiting..." )
raise ValueError
# Check dimensions of covariance matrix list, must add up to nparams
......@@ -126,82 +117,81 @@ class CtGriddedStateVector(StateVector):
dims += matrix.shape[0]
if dims != self.nparams:
msg = "The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..."%(dims,self.nparams) ; logging.error(msg)
logging.error("The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..." % (dims, self.nparams))
raise ValueError
# Loop over list if identity matrices and create a matrix of (nparams,nmembers) with the deviations
istart = 0
istop = 0
dof = 0.0
dev_matrix = np.zeros((self.nparams,self.nmembers,),'float')
randstate = np.random.get_state()
istart = 0
istop = 0
dof = 0.0
dev_matrix = np.zeros((self.nparams, self.nmembers,), 'float')
randstate = np.random.get_state()
for matrix in covariancematrixlist:
# Make a cholesky decomposition of the covariance matrix
U,s,Vh = np.linalg.svd(matrix)
dof += np.sum(s)**2/sum(s**2)
U, s, Vh = np.linalg.svd(matrix)
dof += np.sum(s) ** 2 / sum(s ** 2)
try:
C = np.linalg.cholesky(matrix)
except np.linalg.linalg.LinAlgError,err:
msg = 'Cholesky decomposition has failed ' ; logging.error(msg)
msg = 'For a matrix of dimensions: %d'%matrix.shape[0] ; logging.error(msg)
C = np.linalg.cholesky(matrix)
except np.linalg.linalg.LinAlgError, err:
logging.error('Cholesky decomposition has failed ')
logging.error('For a matrix of dimensions: %d' % matrix.shape[0])
logging.debug(err)
raise np.linalg.linalg.LinAlgError
# Draw nmembers instances of this distribution
npoints = matrix.shape[0]
npoints = matrix.shape[0]
istop = istop+npoints
istop = istop + npoints
for member in range(1,self.nmembers):
rands = np.random.randn(npoints)
deviations = np.dot(C,rands)
dev_matrix[istart:istop,member-1] = deviations
dev_matrix[istop,member-1] = 1.e-10*np.random.randn()
for member in range(1, self.nmembers):
rands = np.random.randn(npoints)
deviations = np.dot(C, rands)
dev_matrix[istart:istop, member - 1] = deviations
dev_matrix[istop, member - 1] = 1.e-10 * np.random.randn()
#cov2 = np.dot(dev_matrix[istart:istop,:],np.transpose(dev_matrix[istart:istop,:])) / (self.nmembers-1)
#print matrix.sum(),cov2.sum(),abs(matrix.diagonal()-cov2.diagonal()).max(), matrix.shape,cov2.shape
istart = istart + npoints
istart = istart + npoints
msg = 'Successfully constructed a deviation matrix from covariance structure' ; logging.debug(msg)
msg = 'Appr. degrees of freedom in full covariance matrix is %s'%(int(dof)) ; logging.info(msg)
logging.debug('Successfully constructed a deviation matrix from covariance structure')
logging.info('Appr. degrees of freedom in full covariance matrix is %s' % (int(dof)))
# Now fill the ensemble members with the deviations we have just created
# Create mean values
NewMean = np.ones(self.nparams,float) # standard value for a new time step is 1.0
NewMean = np.ones(self.nparams, float) # standard value for a new time step is 1.0
# If this is not the start of the filter, average previous two optimized steps into the mix
if lag == self.nlag and self.nlag >= 3:
NewMean += self.EnsembleMembers[lag-2][0].ParameterValues + \
self.EnsembleMembers[lag-3][0].ParameterValues
NewMean = NewMean/3.0
NewMean += self.EnsembleMembers[lag - 2][0].ParameterValues + \
self.EnsembleMembers[lag - 3][0].ParameterValues
NewMean = NewMean / 3.0
# Create the first ensemble member with a deviation of 0.0 and add to list
NewMember = self.GetNewMember(0)
NewMember.ParameterValues = NewMean.flatten() # no deviations
dummy = self.EnsembleMembers[lag-1].append(NewMember)
NewMember = self.GetNewMember(0)
NewMember.ParameterValues = NewMean.flatten() # no deviations
self.EnsembleMembers[lag - 1].append(NewMember)
# Create members 1:nmembers and add to EnsembleMembers list
for member in range(1,self.nmembers):
NewMember = self.GetNewMember(member)
NewMember.ParameterValues = dev_matrix[:,member-1] + NewMean
dummy = self.EnsembleMembers[lag-1].append(NewMember)
for member in range(1, self.nmembers):
NewMember = self.GetNewMember(member)
NewMember.ParameterValues = dev_matrix[:, member - 1] + NewMean
self.EnsembleMembers[lag - 1].append(NewMember)
msg = '%d new ensemble members were added to the state vector # %d'%(self.nmembers,lag) ; logging.debug(msg)
logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, lag))
......@@ -226,24 +216,24 @@ if __name__ == "__main__":
opts = ['-v']
args = {'rc':'../../dagridded.rc'}
StartLogger(level = logging.DEBUG)
StartLogger(level=logging.DEBUG)
DaCycle = CycleControl(opts,args)
DaCycle = CycleControl(opts, args)
DaSystem = CtGriddedDaSystem('../../da/rc/carbontrackergridded.rc')
DaSystem = CtGriddedDaSystem('../../da/rc/carbontrackergridded.rc')
StateVector = CtGriddedStateVector()
DaSystem.Initialize()
DaSystem.Validate()
DaCycle.DaSystem = DaSystem
DaCycle.DaSystem = DaSystem
DaCycle.Initialize()
StateVector.DaCycle = DaCycle # also embed object in StateVector so it can access cycle information for I/O etc
dummy = StateVector.Initialize()
dummy = StateVector.Initialize()
for n in range(1):
cov = StateVector.get_covariance(DaCycle['time.start'])
dummy = StateVector.make_new_ensemble(n+1,cov)
cov = StateVector.get_covariance(DaCycle['time.start'])
dummy = StateVector.make_new_ensemble(n + 1, cov)
#StateVector.propagate()
......@@ -252,24 +242,24 @@ if __name__ == "__main__":
#dummy = StateVector.WriteToFile(filename)
#StateVector.ReadFromFile(filename)
StateVector.StateToTC(fluxvector=np.ones(StateVector.nparams) )
StateVector.StateToTC(fluxvector=np.ones(StateVector.nparams))
sys.exit(2)
members = StateVector.EnsembleMembers[-1]
members = StateVector.EnsembleMembers[-1]
for mem in members[0:1]:
mem.ParameterValues = np.arange(StateVector.nparams)+1.0
data = StateVector.VectorToGrid(vectordata=mem.ParameterValues)
params = StateVector.VectorToGrid(griddata=data, reverse=True, method = "minval")
mem.ParameterValues = np.arange(StateVector.nparams) + 1.0
data = StateVector.VectorToGrid(vectordata=mem.ParameterValues)
params = StateVector.VectorToGrid(griddata=data, reverse=True, method="minval")
tcparams = StateVector.VectorToTC(mem.ParameterValues)
tcparams = StateVector.VectorToTC(mem.ParameterValues)
print (StateVector.gridmap-data).min()
print (StateVector.gridmap-data).max()
print (mem.ParameterValues-params).min()
print (mem.ParameterValues-params).max()
print (StateVector.gridmap - data).min()
print (StateVector.gridmap - data).max()
print (mem.ParameterValues - params).min()
print (mem.ParameterValues - params).max()
......@@ -60,8 +60,8 @@ class TM5ObservationOperator(ObservationOperator):
def __init__(self, RcFileName, DaCycle=None):
""" The instance of an TMObservationOperator is application dependent """
self.Identifier = self.getid() # the identifier gives the model name
self.Version = self.getversion() # the model version used
self.Identifier = identifier # the identifier gives the model name
self.Version = version # the model version used
self.RestartFileList = []
self.OutputFileList = ()
self.RcFileType = 'None'
......@@ -160,12 +160,6 @@ class TM5ObservationOperator(ObservationOperator):
else:
logging.info('Compilation successful, continuing')
def getid(self):
return identifier
def getversion(self):
return version
def PrepareRun(self):
"""
Prepare a forward model TM5 run, this consists of:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment