From acf26a2bcd1e26faa64455e60fdc72bd1c15b6da Mon Sep 17 00:00:00 2001 From: karolina <amvdw95@gmail.com> Date: Thu, 26 Apr 2012 08:13:44 +0000 Subject: [PATCH] general cleanup, mostly changed the "dummy" stuff and 2-command logging --- da/baseclasses/dasystem.py | 37 +- da/baseclasses/obs.py | 22 +- da/baseclasses/observationoperator.py | 16 +- da/baseclasses/optimizer.py | 479 ++++++++++++-------------- da/baseclasses/platform.py | 100 +++--- da/baseclasses/statevector.py | 380 ++++++++++---------- da/ct/dasystem.py | 10 +- da/ct/obs.py | 453 ++++++++++++------------ da/ct/optimizer.py | 39 +-- da/ct/statevector.py | 77 ++--- da/examples/das.py | 35 +- da/tm5/observationoperator.py | 380 ++++++++++---------- da/tools/initexit.py | 410 ++++++++++------------ da/tools/io.py | 218 ++++++------ da/tools/io4.py | 8 +- da/tools/pipeline.py | 275 +++++++-------- 16 files changed, 1364 insertions(+), 1575 deletions(-) diff --git a/da/baseclasses/dasystem.py b/da/baseclasses/dasystem.py index 59a0b9d..881551b 100755 --- a/da/baseclasses/dasystem.py +++ b/da/baseclasses/dasystem.py @@ -46,17 +46,16 @@ class DaSystem(dict): Information on the data assimilation system used. This is normally an rc-file with settings. """ - def __init__(self,rcfilename): + def __init__(self, rcfilename): """ Initialization occurs from passed rc-file name, items in the rc-file will be added to the dictionary """ - self.Identifier = 'CarbonTracker CO2' # the identifier gives the platform name - + self.Identifier = 'CarbonTracker CO2' # the identifier gives the platform name self.LoadRc(rcfilename) - msg = 'Data Assimilation System initialized: %s'%self.Identifier ; logging.debug(msg) + logging.debug('Data Assimilation System initialized: %s' % self.Identifier) def __str__(self): @@ -71,48 +70,44 @@ class DaSystem(dict): return "" - def LoadRc(self,RcFileName): + def LoadRc(self, RcFileName): """ This method loads a DA System Info rc-file with settings for this simulation """ import da.tools.rc as rc - for k,v in rc.read(RcFileName).iteritems(): - self[k] = v - self.RcFileName = RcFileName - self.DaRcLoaded = True + for k, v in rc.read(RcFileName).iteritems(): + self[k] = v + self.RcFileName = RcFileName + self.DaRcLoaded = True - msg = 'DA System Info rc-file (%s) loaded successfully'%self.RcFileName ; logging.debug(msg) + logging.debug('DA System Info rc-file (%s) loaded successfully' % self.RcFileName) - return True - def Initialize(self ): + def Initialize(self): """ Initialize the object. """ - def Validate(self ): + def Validate(self): """ Validate the contents of the rc-file given a dictionary of required keys """ - needed_rc_items={} + needed_rc_items = {} - for k,v in self.iteritems(): + for k, v in self.iteritems(): if v == 'True' : self[k] = True if v == 'False': self[k] = False for key in needed_rc_items: - if not self.has_key(key): - status,msg = ( False,'Missing a required value in rc-file : %s' % key) + msg = 'Missing a required value in rc-file : %s' % key logging.error(msg) - raise IOError,msg - - status,msg = ( True,'DA System Info settings have been validated succesfully' ) ; logging.debug(msg) + raise IOError, msg + logging.debug('DA System Info settings have been validated succesfully') - return None ################### End Class DaSystem ################### diff --git a/da/baseclasses/obs.py b/da/baseclasses/obs.py index aad0736..1dec17d 100755 --- a/da/baseclasses/obs.py +++ b/da/baseclasses/obs.py @@ -22,7 +22,7 @@ import datetime from numpy import array, ndarray identifier = 'Observation baseclass' -version = '0.0' +version = '0.0' ################### Begin Class Observation ################### @@ -47,13 +47,13 @@ class Observation(object): """ - def __init__(self, DaCycle = None): + def __init__(self, DaCycle=None): """ create an object with an identifier, version, and an empty ObservationList """ self.Identifier = self.getid() - self.Version = self.getversion() - self.Data = ObservationList([]) # Initialize with an empty list of obs + self.Version = self.getversion() + self.Data = ObservationList([]) # Initialize with an empty list of obs # The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can # be added at a later moment. @@ -63,7 +63,7 @@ class Observation(object): else: self.DaCycle = {} - msg = 'Observation object initialized: %s'%self.Identifier ; logging.info(msg) + logging.info('Observation object initialized: %s' % self.Identifier) def getid(self): """ @@ -79,7 +79,7 @@ class Observation(object): def __str__(self): """ Prints a list of Observation objects""" - return "This is a %s object, version %s"%(self.Identifier,self.Version) + return "This is a %s object, version %s" % (self.Identifier, self.Version) def Initialize(self): """ Perform all steps needed to start working with observational data, this can include moving data, concatenating files, @@ -104,13 +104,13 @@ class Observation(object): """ Add the simulation data to the Observation object. """ - def AddModelDataMismatch(self ): + def AddModelDataMismatch(self): """ Get the model-data mismatch values for this cycle. """ - def WriteSampleInfo(self ): + def WriteSampleInfo(self): """ Write the information needed by the observation operator to a file. Return the filename that was written for later use """ @@ -124,10 +124,10 @@ class Observation(object): class ObservationList(list): """ This is a special type of list that holds observed sample objects. It has methods to extract data from such a list easily """ - def getvalues(self,name,constructor=array): + def getvalues(self, name, constructor=array): - result = constructor([getattr(o,name) for o in self]) - if isinstance(result,ndarray): + result = constructor([getattr(o, name) for o in self]) + if isinstance(result, ndarray): return result.squeeze() else: return result diff --git a/da/baseclasses/observationoperator.py b/da/baseclasses/observationoperator.py index c175f37..a5e9cb6 100755 --- a/da/baseclasses/observationoperator.py +++ b/da/baseclasses/observationoperator.py @@ -15,8 +15,8 @@ import sys import logging import datetime -identifier = 'GeneralObservationOperator' -version = '0.0' +identifier = 'GeneralObservationOperator' +version = '0.0' ################### Begin Class ObservationOperator ################### class ObservationOperator(object): @@ -31,17 +31,17 @@ class ObservationOperator(object): """ - def __init__(self, RcFileName,DaCycle = None): + def __init__(self, RcFileName, DaCycle=None): """ The instance of an ObservationOperator is application dependent """ self.Identifier = self.getid() - self.Version = self.getversion() + self.Version = self.getversion() self.RestartFileList = [] self.outputdir = None # Needed for opening the samples.nc files created - dummy = self.LoadRc(RcFilename) # load the specified rc-file - dummy = self.ValidateRc() # validate the contents + self.LoadRc(RcFilename) # load the specified rc-file + self.ValidateRc() # validate the contents - msg = 'Observation Operator object initialized: %s'%self.Identifier ; logging.info(msg) + logging.info('Observation Operator object initialized: %s' % self.Identifier) # The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can # be added at a later moment. @@ -63,7 +63,7 @@ class ObservationOperator(object): return None def __str__(self): - return "This is a %s object, version %s"%(self.Identifier,self.Version) + return "This is a %s object, version %s" % (self.Identifier, self.Version) def Initialize(self): """ Perform all steps necessary to start the observation operator through a simple Run() call """ diff --git a/da/baseclasses/optimizer.py b/da/baseclasses/optimizer.py index c8fa4b3..cd9d2ef 100755 --- a/da/baseclasses/optimizer.py +++ b/da/baseclasses/optimizer.py @@ -14,9 +14,12 @@ import os import sys import logging import datetime +import numpy as np +import numpy.linalg as la + identifier = 'Optimizer baseclass' -version = '0.0' +version = '0.0' ################### Begin Class Optimizer ################### @@ -30,9 +33,9 @@ class Optimizer(object): def __init__(self): self.Identifier = self.getid() - self.Version = self.getversion() + self.Version = self.getversion() - msg = 'Optimizer object initialized: %s'%self.Identifier ; logging.info(msg) + logging.info('Optimizer object initialized: %s' % self.Identifier) def getid(self): return identifier @@ -41,120 +44,106 @@ class Optimizer(object): return version def Initialize(self, dims): - - self.nlag = dims[0] - self.nmembers = dims[1] - self.nparams = dims[2] - self.nobs = dims[3] + self.nlag = dims[0] + self.nmembers = dims[1] + self.nparams = dims[2] + self.nobs = dims[3] self.CreateMatrices() - return None def CreateMatrices(self): """ Create Matrix space needed in optimization routine """ - import numpy as np - + # mean state [X] - self.x = np.zeros( (self.nlag*self.nparams,), float) + self.x = np.zeros((self.nlag * self.nparams,), float) # deviations from mean state [X'] - self.X_prime = np.zeros( (self.nlag*self.nparams,self.nmembers,), float) + self.X_prime = np.zeros((self.nlag * self.nparams, self.nmembers,), float) # mean state, transported to observation space [ H(X) ] - self.Hx = np.zeros( (self.nobs,), float) + self.Hx = np.zeros((self.nobs,), float) # deviations from mean state, transported to observation space [ H(X') ] - self.HX_prime = np.zeros( (self.nobs,self.nmembers), float) + self.HX_prime = np.zeros((self.nobs, self.nmembers), float) # observations - self.obs = np.zeros( (self.nobs,), float) + self.obs = np.zeros((self.nobs,), float) # covariance of observations - self.R = np.zeros( (self.nobs,self.nobs,), float) + self.R = np.zeros((self.nobs, self.nobs,), float) # localization of obs - self.may_localize = np.zeros(self.nobs,bool) + self.may_localize = np.zeros(self.nobs, bool) # rejection of obs - self.may_reject = np.zeros(self.nobs,bool) + self.may_reject = np.zeros(self.nobs, bool) # flags of obs - self.flags = np.zeros(self.nobs,int) + self.flags = np.zeros(self.nobs, int) # Total covariance of fluxes and obs in units of obs [H P H^t + R] - self.HPHR = np.zeros( (self.nobs,self.nobs,), float) + self.HPHR = np.zeros((self.nobs, self.nobs,), float) # Kalman Gain matrix - self.KG = np.zeros( (self.nlag*self.nparams,self.nobs,), float) + self.KG = np.zeros((self.nlag * self.nparams, self.nobs,), float) - def StateToMatrix(self,StateVector): - import numpy as np + def StateToMatrix(self, StateVector): try: import matplotlib.pyplot as plt except: pass - allobs=[] # collect all obs for n=1,..,nlag - allmdm=[] # collect all mdm for n=1,..,nlag - allsamples=[] # collect all model samples for n=1,..,nlag - allreject=[] # collect all model samples for n=1,..,nlag - alllocalize=[] # collect all model samples for n=1,..,nlag - allflags=[] # collect all model samples for n=1,..,nlag - allmemsamples=None # collect all members model samples for n=1,..,nlag + allobs = [] # collect all obs for n=1,..,nlag + allmdm = [] # collect all mdm for n=1,..,nlag + allsamples = [] # collect all model samples for n=1,..,nlag + allreject = [] # collect all model samples for n=1,..,nlag + alllocalize = [] # collect all model samples for n=1,..,nlag + allflags = [] # collect all model samples for n=1,..,nlag + allmemsamples = None # collect all members model samples for n=1,..,nlag for n in range(self.nlag): - - members = StateVector.EnsembleMembers[n] - self.x[n*self.nparams:(n+1)*self.nparams] = members[0].ParameterValues - self.X_prime[n*self.nparams:(n+1)*self.nparams,:] = np.transpose(np.array([m.ParameterValues for m in members])) + members = StateVector.EnsembleMembers[n] + self.x[n * self.nparams:(n + 1) * self.nparams] = members[0].ParameterValues + self.X_prime[n * self.nparams:(n + 1) * self.nparams, :] = np.transpose(np.array([m.ParameterValues for m in members])) if members[0].ModelSample != None: + self.rejection_threshold = members[0].ModelSample.rejection_threshold - self.rejection_threshold = members[0].ModelSample.rejection_threshold - - allreject.extend( members[0].ModelSample.Data.getvalues('may_reject') ) - alllocalize.extend( members[0].ModelSample.Data.getvalues('may_localize') ) - allflags.extend( members[0].ModelSample.Data.getvalues('flag') ) + allreject.extend(members[0].ModelSample.Data.getvalues('may_reject')) + alllocalize.extend(members[0].ModelSample.Data.getvalues('may_localize')) + allflags.extend(members[0].ModelSample.Data.getvalues('flag')) - allobs.extend( members[0].ModelSample.Data.getvalues('obs') ) - allsamples.extend( members[0].ModelSample.Data.getvalues('simulated') ) - allmdm.extend( members[0].ModelSample.Data.getvalues('mdm') ) + allobs.extend(members[0].ModelSample.Data.getvalues('obs')) + allsamples.extend(members[0].ModelSample.Data.getvalues('simulated')) + allmdm.extend(members[0].ModelSample.Data.getvalues('mdm')) - memsamples=[] + memsamples = [] for mem in members: - memsamples.append( mem.ModelSample.Data.getvalues('simulated') ) + memsamples.append(mem.ModelSample.Data.getvalues('simulated')) if allmemsamples == None : allmemsamples = np.array(memsamples) else: - allmemsamples = np.concatenate((allmemsamples,np.array(memsamples)),axis=1) - + allmemsamples = np.concatenate((allmemsamples, np.array(memsamples)), axis=1) - self.HX_prime[:,:] = np.transpose(allmemsamples) + self.HX_prime[:, :] = np.transpose(allmemsamples) - self.obs[:] = np.array(allobs) - self.Hx[:] = np.array(allsamples) + self.obs[:] = np.array(allobs) + self.Hx[:] = np.array(allsamples) - self.may_reject[:] = np.array(allreject) - self.may_localize[:] = np.array(alllocalize) - self.flags[:] = np.array(allflags) + self.may_reject[:] = np.array(allreject) + self.may_localize[:] = np.array(alllocalize) + self.flags[:] = np.array(allflags) + self.X_prime = self.X_prime - self.x[:, np.newaxis] # make into a deviation matrix + self.HX_prime = self.HX_prime - self.Hx[:, np.newaxis] # make a deviation matrix - self.X_prime = self.X_prime - self.x[:,np.newaxis] # make into a deviation matrix - self.HX_prime = self.HX_prime - self.Hx[:,np.newaxis] # make a deviation matrix + for i, mdm in enumerate(allmdm): + self.R[i, i] = mdm ** 2 + - for i,mdm in enumerate(allmdm): - - self.R[i,i] = mdm**2 - - return None - - def MatrixToState(self,StateVector): - import numpy as np - + def MatrixToState(self, StateVector): for n in range(self.nlag): - - members = StateVector.EnsembleMembers[n] - for m,mem in enumerate(members): - members[m].ParameterValues[:] = self.X_prime[n*self.nparams:(n+1)*self.nparams,m] + self.x[n*self.nparams:(n+1)*self.nparams] + members = StateVector.EnsembleMembers[n] + for m, mem in enumerate(members): + members[m].ParameterValues[:] = self.X_prime[n * self.nparams:(n + 1) * self.nparams, m] + self.x[n * self.nparams:(n + 1) * self.nparams] StateVector.isOptimized = True - msg = 'Returning optimized data to the StateVector, setting "StateVector.isOptimized = True" ' ; logging.debug(msg) - return None + logging.debug('Returning optimized data to the StateVector, setting "StateVector.isOptimized = True" ') - def WriteDiagnostics(self,DaCycle, StateVector,type='prior'): + def WriteDiagnostics(self, DaCycle, StateVector, type='prior'): """ Open a NetCDF file and write diagnostic output from optimization process: @@ -171,255 +160,224 @@ class Optimizer(object): import da.tools.io4 as io #import da.tools.io as io - outdir = DaCycle['dir.diagnostics'] - filename = os.path.join(outdir,'optimizer.%s.nc'% DaCycle['time.start'].strftime('%Y%m%d') ) - DaCycle.OutputFileList += ( filename, ) + filename = os.path.join(DaCycle['dir.diagnostics'], 'optimizer.%s.nc' % DaCycle['time.start'].strftime('%Y%m%d')) + + #LU wtf w sumie? + DaCycle.OutputFileList += (filename,) # Open or create file if type == 'prior': - f = io.CT_CDF(filename,method='create') - msg = 'Creating new diagnostics file for optimizer (%s)' % filename ; logging.debug(msg) + f = io.CT_CDF(filename, method='create') + logging.debug('Creating new diagnostics file for optimizer (%s)' % filename) elif type == 'optimized': - f = io.CT_CDF(filename,method='write') - msg = 'Opening existing diagnostics file for optimizer (%s)' % filename ; logging.debug(msg) + f = io.CT_CDF(filename, method='write') + logging.debug('Opening existing diagnostics file for optimizer (%s)' % filename) # Add dimensions - dimparams = f.AddParamsDim(self.nparams) - dimmembers = f.AddMembersDim(self.nmembers) - dimlag = f.AddLagDim(self.nlag, unlimited=False) - dimobs = f.AddObsDim(self.nobs) - dimstate = f.AddDim('nstate',self.nparams*self.nlag) + dimparams = f.AddParamsDim(self.nparams) + dimmembers = f.AddMembersDim(self.nmembers) + dimlag = f.AddLagDim(self.nlag, unlimited=False) + dimobs = f.AddObsDim(self.nobs) + dimstate = f.AddDim('nstate', self.nparams * self.nlag) # Add data, first the ones that are written both before and after the optimization - - data = self.x - - savedict = io.std_savedict.copy() - savedict['name'] = "statevectormean_%s" % type - savedict['long_name'] = "full_statevector_mean_%s" % type - savedict['units'] = "unitless" - savedict['dims'] = dimstate - savedict['values'] = data.tolist() - savedict['comment'] = 'Full %s state vector mean '% type - dummy = f.AddData(savedict) - - data = self.X_prime - - savedict = io.std_savedict.copy() - savedict['name'] = "statevectordeviations_%s" % type - savedict['long_name'] = "full_statevector_deviations_%s" % type - savedict['units'] = "unitless" - savedict['dims'] = dimstate+dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'Full state vector %s deviations as resulting from the optimizer'% type - dummy = f.AddData(savedict) - - data = self.Hx - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamplesmean_%s"%type - savedict['long_name'] = "modelsamplesforecastmean_%s" %type - savedict['units'] = "mol mol-1" - savedict['dims'] = dimobs - savedict['values'] = data.tolist() - savedict['comment'] = '%s mean mixing ratios based on %s state vector'% (type,type,) - dummy = f.AddData(savedict) - - data = self.HX_prime - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamplesdeviations_%s"% type - savedict['long_name'] = "modelsamplesforecastdeviations_%s"%type - savedict['units'] = "mol mol-1" - savedict['dims'] = dimobs+dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = '%s mixing ratio deviations based on %s state vector'% (type,type,) - dummy = f.AddData(savedict) - - # Continue with prior only data + # x, X_prime, Hx, HX_prime + + savedict = io.std_savedict.copy() + savedict['name'] = "statevectormean_%s" % type + savedict['long_name'] = "full_statevector_mean_%s" % type + savedict['units'] = "unitless" + savedict['dims'] = dimstate + savedict['values'] = self.x.tolist() + savedict['comment'] = 'Full %s state vector mean ' % type + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "statevectordeviations_%s" % type + savedict['long_name'] = "full_statevector_deviations_%s" % type + savedict['units'] = "unitless" + savedict['dims'] = dimstate + dimmembers + savedict['values'] = self.X_prime.tolist() + savedict['comment'] = 'Full state vector %s deviations as resulting from the optimizer' % type + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "modelsamplesmean_%s" % type + savedict['long_name'] = "modelsamplesforecastmean_%s" % type + savedict['units'] = "mol mol-1" + savedict['dims'] = dimobs + savedict['values'] = self.Hx.tolist() + savedict['comment'] = '%s mean mixing ratios based on %s state vector' % (type, type,) + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "modelsamplesdeviations_%s" % type + savedict['long_name'] = "modelsamplesforecastdeviations_%s" % type + savedict['units'] = "mol mol-1" + savedict['dims'] = dimobs + dimmembers + savedict['values'] = self.HX_prime.tolist() + savedict['comment'] = '%s mixing ratio deviations based on %s state vector' % (type, type,) + f.AddData(savedict) + + # Continue with prior only data: obs, R if type == 'prior': - - data = self.obs - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimobs - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - dummy = f.AddData(savedict) - - - data = self.R - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]^2" - savedict['dims'] = dimobs+dimobs - savedict['values'] = data.tolist() - savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch' - dummy = f.AddData(savedict) - - # Continue with posterior only data + savedict = io.std_savedict.copy() + savedict['name'] = "observed" + savedict['long_name'] = "observedvalues" + savedict['units'] = "mol mol-1" + savedict['dims'] = dimobs + savedict['values'] = self.obs.tolist() + savedict['comment'] = 'Observations used in optimization' + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "modeldatamismatch" + savedict['long_name'] = "modeldatamismatch" + savedict['units'] = "[mol mol-1]^2" + savedict['dims'] = dimobs + dimobs + savedict['values'] = self.R.tolist() + savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch' + f.AddData(savedict) + + # Continue with posterior only data: HPHR, flags, KG elif type == 'optimized': - - data = self.HPHR - - savedict = io.std_savedict.copy() - savedict['name'] = "molefractionvariance" - savedict['long_name'] = "molefractionvariance" - savedict['units'] = "[mol mol-1]^2" - savedict['dims'] = dimobs+dimobs - savedict['values'] = data.tolist() - savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch' - dummy = f.AddData(savedict) - - data = self.flags - - savedict = io.std_savedict.copy() - savedict['name'] = "flag" - savedict['long_name'] = "flag_for_obs_model" - savedict['units'] = "None" - savedict['dims'] = dimobs - savedict['values'] = data.tolist() - savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled' - dummy = f.AddData(savedict) - - data = self.KG - - savedict = io.std_savedict.copy() - savedict['name'] = "kalmangainmatrix" - savedict['long_name'] = "kalmangainmatrix" - savedict['units'] = "unitless molefraction-1" - savedict['dims'] = dimstate+dimobs - savedict['values'] = data.tolist() - savedict['comment'] = 'Kalman gain matrix of all obs and state vector elements' - dummy = f.AddData(savedict) - - - return None - + savedict = io.std_savedict.copy() + savedict['name'] = "molefractionvariance" + savedict['long_name'] = "molefractionvariance" + savedict['units'] = "[mol mol-1]^2" + savedict['dims'] = dimobs + dimobs + savedict['values'] = self.HPHR.tolist() + savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch' + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "flag" + savedict['long_name'] = "flag_for_obs_model" + savedict['units'] = "None" + savedict['dims'] = dimobs + savedict['values'] = self.flags.tolist() + savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled' + f.AddData(savedict) + + savedict = io.std_savedict.copy() + savedict['name'] = "kalmangainmatrix" + savedict['long_name'] = "kalmangainmatrix" + savedict['units'] = "unitless molefraction-1" + savedict['dims'] = dimstate + dimobs + savedict['values'] = self.KG.tolist() + savedict['comment'] = 'Kalman gain matrix of all obs and state vector elements' + f.AddData(savedict) +#LU not sure, but should b like that + f.close() + def SerialMinimumLeastSquares(self): """ Make minimum least squares solution by looping over obs""" - import numpy as np - import numpy.linalg as la - - tvalue=1.97591 + tvalue = 1.97591 for n in range(self.nobs): - # Screen for flagged observations (for instance site not found, or no sample written from model) - if self.flags[n] != 0: - msg = 'Skipping observation %d because of flag value %d'%(n,self.flags[n]) ; logging.debug(msg) + logging.debug('Skipping observation %d because of flag value %d' % (n, self.flags[n])) continue - # Screen for outliers greather than 3x model-data mismatch, only apply if obs may be rejected - - res = self.obs[n]-self.Hx[n] - + res = self.obs[n] - self.Hx[n] + if self.may_reject[n]: - threshold = self.rejection_threshold*np.sqrt(self.R[n,n]) + threshold = self.rejection_threshold * np.sqrt(self.R[n, n]) if np.abs(res) > threshold: - msg = 'Rejecting observation %d because residual (%f) exceeds threshold (%f)'%(n,res,threshold) ; logging.debug(msg) + logging.debug('Rejecting observation %d because residual (%f) exceeds threshold (%f)' % (n, res, threshold)) self.flags[n] == 2 continue - PHt = 1./(self.nmembers-1)*np.dot(self.X_prime,self.HX_prime[n,:]) - self.HPHR[n,n] = 1./(self.nmembers-1)*(self.HX_prime[n,:]*self.HX_prime[n,:]).sum()+self.R[n,n] + PHt = 1. / (self.nmembers - 1) * np.dot(self.X_prime, self.HX_prime[n, :]) + self.HPHR[n, n] = 1. / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[n, :]).sum() + self.R[n, n] - self.KG[:,n] = PHt/self.HPHR[n,n] + self.KG[:, n] = PHt / self.HPHR[n, n] if self.may_localize[n]: - dummy = self.Localize(n) - msg = 'Localized observation %d'%(n,) ; logging.debug(msg) + dummy = self.Localize(n) + logging.debug('Localized observation %d' % (n,)) else: - msg = 'Not allowed to Localize observation %d'%(n,) ; logging.debug(msg) + logging.debug('Not allowed to Localize observation %d' % (n,)) - alpha = np.double(1.0)/(np.double(1.0)+np.sqrt( (self.R[n,n])/self.HPHR[n,n] ) ) + alpha = np.double(1.0) / (np.double(1.0) + np.sqrt((self.R[n, n]) / self.HPHR[n, n])) - self.x[:] = self.x + self.KG[:,n]*res + self.x[:] = self.x + self.KG[:, n] * res for r in range(self.nmembers): - self.X_prime[:,r] = self.X_prime[:,r]-alpha*self.KG[:,n]*(self.HX_prime[n,r]) + self.X_prime[:, r] = self.X_prime[:, r] - alpha * self.KG[:, n] * (self.HX_prime[n, r]) #WP !!!! Very important to first do all obervations from n=1 through the end, and only then update 1,...,n. The current observation #WP should always be updated last because it features in the loop of the adjustments !!!! - for m in range(n+1,self.nobs): - res = self.obs[n]-self.Hx[n] - fac = 1.0/(self.nmembers-1)*(self.HX_prime[n,:]*self.HX_prime[m,:]).sum()/self.HPHR[n,n] - self.Hx[m] = self.Hx[m] + fac * res - self.HX_prime[m,:] = self.HX_prime[m,:] - alpha*fac*self.HX_prime[n,:] + for m in range(n + 1, self.nobs): + res = self.obs[n] - self.Hx[n] + fac = 1.0 / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[m, :]).sum() / self.HPHR[n, n] + self.Hx[m] = self.Hx[m] + fac * res + self.HX_prime[m, :] = self.HX_prime[m, :] - alpha * fac * self.HX_prime[n, :] - for m in range(1,n+1): - res = self.obs[n]-self.Hx[n] - fac = 1.0/(self.nmembers-1)*(self.HX_prime[n,:]*self.HX_prime[m,:]).sum()/self.HPHR[n,n] - self.Hx[m] = self.Hx[m] + fac * res - self.HX_prime[m,:] = self.HX_prime[m,:] - alpha*fac*self.HX_prime[n,:] + for m in range(1, n + 1): + res = self.obs[n] - self.Hx[n] + fac = 1.0 / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[m, :]).sum() / self.HPHR[n, n] + self.Hx[m] = self.Hx[m] + fac * res + self.HX_prime[m, :] = self.HX_prime[m, :] - alpha * fac * self.HX_prime[n, :] def BulkMinimumLeastSquares(self): """ Make minimum least squares solution by solving matrix equations""" - import numpy as np - import numpy.linalg as la - # Create full solution, first calculate the mean of the posterior analysis - HPH = np.dot(self.HX_prime,np.transpose(self.HX_prime))/(self.nmembers-1) # HPH = 1/N * HX' * (HX')^T - self.HPHR[:,:] = HPH+self.R # HPHR = HPH + R - HPb = np.dot(self.X_prime,np.transpose(self.HX_prime))/(self.nmembers-1) # HP = 1/N X' * (HX')^T - self.KG[:,:] = np.dot(HPb,la.inv(self.HPHR)) # K = HP/(HPH+R) + HPH = np.dot(self.HX_prime, np.transpose(self.HX_prime)) / (self.nmembers - 1) # HPH = 1/N * HX' * (HX')^T + self.HPHR[:, :] = HPH + self.R # HPHR = HPH + R + HPb = np.dot(self.X_prime, np.transpose(self.HX_prime)) / (self.nmembers - 1) # HP = 1/N X' * (HX')^T + self.KG[:, :] = np.dot(HPb, la.inv(self.HPHR)) # K = HP/(HPH+R) for n in range(self.nobs): - dummy = self.Localize(n) + dummy = self.Localize(n) - self.x[:] = self.x + np.dot(self.KG,self.obs-self.Hx) # xa = xp + K (y-Hx) + self.x[:] = self.x + np.dot(self.KG, self.obs - self.Hx) # xa = xp + K (y-Hx) # And next make the updated ensemble deviations. Note that we calculate P by using the full equation (10) at once, and # not in a serial update fashion as described in Whitaker and Hamill. # For the current problem with limited N_obs this is easier, or at least more straightforward to do. - I = np.identity(self.nlag*self.nparams) - sHPHR = la.cholesky(self.HPHR) # square root of HPH+R - part1 = np.dot(HPb,np.transpose(la.inv(sHPHR))) # HP(sqrt(HPH+R))^-1 - part2 = la.inv(sHPHR+np.sqrt(self.R)) # (sqrt(HPH+R)+sqrt(R))^-1 - Kw = np.dot(part1,part2) # K~ - self.X_prime[:,:] = np.dot(I,self.X_prime)-np.dot(Kw,self.HX_prime) # HX' = I - K~ * HX' + I = np.identity(self.nlag * self.nparams) + sHPHR = la.cholesky(self.HPHR) # square root of HPH+R + part1 = np.dot(HPb, np.transpose(la.inv(sHPHR))) # HP(sqrt(HPH+R))^-1 + part2 = la.inv(sHPHR + np.sqrt(self.R)) # (sqrt(HPH+R)+sqrt(R))^-1 + Kw = np.dot(part1, part2) # K~ + self.X_prime[:, :] = np.dot(I, self.X_prime) - np.dot(Kw, self.HX_prime) # HX' = I - K~ * HX' - P_opt = np.dot(self.X_prime,np.transpose(self.X_prime))/(self.nmembers-1) + P_opt = np.dot(self.X_prime, np.transpose(self.X_prime)) / (self.nmembers - 1) # Now do the adjustments of the modeled mixing ratios using the linearized ensemble. These are not strictly needed but can be used # for diagnosis. - part3 = np.dot(HPH,np.transpose(la.inv(sHPHR))) # HPH(sqrt(HPH+R))^-1 - Kw = np.dot(part3,part2) # K~ - self.Hx[:] = self.Hx + np.dot(np.dot(HPH,la.inv(self.HPHR)),self.obs-self.Hx) # Hx = Hx+ HPH/HPH+R (y-Hx) - self.HX_prime[:,:] = self.HX_prime-np.dot(Kw,self.HX_prime) # HX' = HX'- K~ * HX' + part3 = np.dot(HPH, np.transpose(la.inv(sHPHR))) # HPH(sqrt(HPH+R))^-1 + Kw = np.dot(part3, part2) # K~ + self.Hx[:] = self.Hx + np.dot(np.dot(HPH, la.inv(self.HPHR)), self.obs - self.Hx) # Hx = Hx+ HPH/HPH+R (y-Hx) + self.HX_prime[:, :] = self.HX_prime - np.dot(Kw, self.HX_prime) # HX' = HX'- K~ * HX' - msg = 'Minimum Least Squares solution was calculated, returning' ; logging.info(msg) + logging.info('Minimum Least Squares solution was calculated, returning') - return None def SetLocalization(self): """ determine which localization to use """ self.localization = True - self.localizetype = "None" - - msg = "Current localization option is set to %s"%self.localizetype ; logging.info(msg) + self.localizetype = "None" + + logging.info("Current localization option is set to %s" % self.localizetype) - def Localize(self,n): + def Localize(self, n): """ localize the Kalman Gain matrix """ - import numpy as np - + + #LU whaat? sth is left for then? or just a baseclass if not self.localization: return return @@ -443,31 +401,28 @@ if __name__ == "__main__": import da.tools.rc as rc opts = ['-v'] - args = {'rc':'../../da.rc','logfile':'da_initexit.log','jobrcfilename':'test.rc'} + args = {'rc':'../../da.rc', 'logfile':'da_initexit.log', 'jobrcfilename':'test.rc'} StartLogger() - DaCycle = CycleControl(opts,args) + DaCycle = CycleControl(opts, args) DaCycle.Initialize() print DaCycle StateVector = PrepareState(DaCycle) - samples = CtObservations(DaCycle.DaSystem,datetime.datetime(2005,3,5)) - dummy = samples.AddObs() - dummy = samples.AddSimulations('/Users/peters/tmp/test_da/output/20050305/samples.000.nc') + samples = CtObservations(DaCycle.DaSystem, datetime.datetime(2005, 3, 5)) + samples.AddObs() + samples.AddSimulations('/Users/peters/tmp/test_da/output/20050305/samples.000.nc') - nobs = len(samples.Data) - dims = ( int(DaCycle['time.nlag']), + nobs = len(samples.Data) + dims = (int(DaCycle['time.nlag']), int(DaCycle['da.optimizer.nmembers']), int(DaCycle.DaSystem['nparameters']), - nobs, ) + nobs,) opt = CtOptimizer(dims) - opt.StateToMatrix(StateVector) - opt.MinimumLeastSquares() - opt.MatrixToState(StateVector) diff --git a/da/baseclasses/platform.py b/da/baseclasses/platform.py index 44a1f29..93f8279 100755 --- a/da/baseclasses/platform.py +++ b/da/baseclasses/platform.py @@ -26,7 +26,7 @@ import os import logging import subprocess -std_joboptions={'jobname':'test','jobaccount':'co2','jobnodes':'nserial 1','jobshell':'/bin/sh','depends':'','jobtime':'00:30:00'} +std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobnodes':'nserial 1', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'00:30:00'} class PlatForm(object): """ @@ -39,18 +39,17 @@ class PlatForm(object): computer/user requires their own PlatForm object modifications, the init function is usually overwritten in the specific implementation of this class """ - self.Identifier = 'iPad' # the identifier gives the plaform name - self.Version = '1.0' # the platform version used + self.Identifier = 'iPad' # the identifier gives the plaform name + self.Version = '1.0' # the platform version used - msg1 = '%s object initialized'%self.Identifier ; logging.debug(msg1) - msg2 = '%s version: %s'%(self.Identifier,self.Version) ; logging.debug(msg2) + msg1 = '%s object initialized' % self.Identifier ; logging.debug(msg1) + msg2 = '%s version: %s' % (self.Identifier, self.Version) ; logging.debug(msg2) def __str__(self): - return self.Version - def GetJobTemplate(self,joboptions={},block=False): + def GetJobTemplate(self, joboptions={}, block=False): """ Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. The job template should return the preamble of a job that can be submitted to a queue on your platform, @@ -68,56 +67,49 @@ class PlatForm(object): job until the submitted job in this template has been completed fully. """ - template = """## \n"""+ \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n"""+ \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """+ \ - """## \n"""+ \ - """ \n"""+ \ - """#$ jobname \n"""+ \ - """#$ jobaccount \n"""+ \ - """#$ jobnodes \n"""+ \ - """#$ jobtime \n"""+ \ + template = """## \n""" + \ + """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ + """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ + """## \n""" + \ + """ \n""" + \ + """#$ jobname \n""" + \ + """#$ jobaccount \n""" + \ + """#$ jobnodes \n""" + \ + """#$ jobtime \n""" + \ """#$ jobshell \n """ if 'depends' in joboptions: template += """#$ -hold_jid depends \n""" # First replace from passed dictionary - for k,v in joboptions.iteritems(): + for k, v in joboptions.iteritems(): while k in template: - template = template.replace(k,v) + template = template.replace(k, v) # Fill remaining values with std_options - for k,v in std_joboptions.iteritems(): + for k, v in std_joboptions.iteritems(): while k in template: - template = template.replace(k,v) + template = template.replace(k, v) return template def GetMyID(self): """ Return the process ID, or job ID of the current process or job""" - return os.getpid() - def WriteJob(self,jobfile,template, jobid): + def WriteJob(self, jobfile, template, jobid): """ This method writes a jobfile to the exec dir and makes it executable (mod 477) """ + file = open(jobfile, 'w') + file.write(template) + file.close() + os.chmod(jobfile, 477) - # - # Done, write jobfile - # - f = open(jobfile,'w') - dummy = f.write(template) - dummy = f.close() - dummy = os.chmod(jobfile,477) - - msg = "A job file was created (%s)"%jobfile ; logging.debug(msg) - - return None + logging.debug("A job file was created (%s)" % jobfile) - def SubmitJob(self,jobfile,joblog=None,block=False): + def SubmitJob(self, jobfile, joblog=None, block=False): """ :param jobfile: a string with the filename of a jobfile to run :param joblog: a string with the filename of a logfile to write run output to @@ -128,8 +120,8 @@ class PlatForm(object): """ from string import join - cmd = ["sh",jobfile] - msg = "A new task will be started (%s)"%cmd ; logging.info(msg) + cmd = ["sh", jobfile] + logging.info("A new task will be started (%s)" % cmd) if block: jobid = subprocess.call(cmd) else: @@ -137,34 +129,30 @@ class PlatForm(object): # info ... infotext = [] - infotext.append( '\n' ) - infotext.append( 'Summary:\n' ) - infotext.append( '\n' ) - infotext.append( 'job script : %s\n' % jobfile ) - infotext.append( 'job log : %s\n' % joblog ) - infotext.append( '\n') - infotext.append( 'To manage this process:\n' ) - infotext.append( '\n' ) - infotext.append( ' # kill process:\n' ) - infotext.append( ' kill %i\n' % jobid ) - infotext.append( ' \n' ) - infotext.append( '\n' ) + infotext.append('\n') + infotext.append('Summary:\n') + infotext.append('\n') + infotext.append('job script : %s\n' % jobfile) + infotext.append('job log : %s\n' % joblog) + infotext.append('\n') + infotext.append('To manage this process:\n') + infotext.append('\n') + infotext.append(' # kill process:\n') + infotext.append(' kill %i\n' % jobid) + infotext.append(' \n') + infotext.append('\n') # write to log: - for line in infotext : logging.info( line.strip() ) + for line in infotext : logging.info(line.strip()) - return 0 - def KillJob(self,jobid): + def KillJob(self, jobid): """ This method kills a running job """ - return None - def StatJob(self,jobid): + def StatJob(self, jobid): """ This method gets the status of a running job """ import subprocess - - output = subprocess.Popen(['qstat',jobid], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) - + output = subprocess.Popen(['qstat', jobid], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) return output diff --git a/da/baseclasses/statevector.py b/da/baseclasses/statevector.py index a8ac4d0..4c0e4a5 100755 --- a/da/baseclasses/statevector.py +++ b/da/baseclasses/statevector.py @@ -29,7 +29,7 @@ import da.tools.io4 as io import datetime identifier = 'Baseclass Statevector ' -version = '0.0' +version = '0.0' ################### Begin Class EnsembleMember ################### @@ -62,12 +62,12 @@ class EnsembleMember(object): """ - self.membernumber = membernumber # the member number - self.ParameterValues = None # Parameter values of this member - self.ModelSample = None # Model Sampled Parameter values of this member + self.membernumber = membernumber # the member number + self.ParameterValues = None # Parameter values of this member + self.ModelSample = None # Model Sampled Parameter values of this member def __str__(self): - return "%03d"%self.membernumber + return "%03d" % self.membernumber ################### End Class EnsembleMember ################### @@ -124,9 +124,9 @@ class StateVector(object): """ - def __init__(self, DaCycle = None): + def __init__(self, DaCycle=None): self.Identifier = self.getid() - self.Version = self.getversion() + self.Version = self.getversion() # The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can # be added at a later moment. @@ -136,8 +136,7 @@ class StateVector(object): else: self.DaCycle = {} - - msg = 'Statevector object initialized: %s'%self.Identifier ; logging.info(msg) + logging.info('Statevector object initialized: %s' % self.Identifier) def getid(self): return identifier @@ -155,26 +154,22 @@ class StateVector(object): An example is given below. """ - import da.tools.io4 as io - import numpy as np - - - dims = ( int(self.DaCycle['time.nlag']), + dims = (int(self.DaCycle['time.nlag']), int(self.DaCycle['da.optimizer.nmembers']), int(self.DaCycle.DaSystem['nparameters']), ) - self.nlag = dims[0] - self.nmembers = dims[1] - self.nparams = dims[2] - self.nobs = 0 - self.isOptimized = False + self.nlag = dims[0] + self.nmembers = dims[1] + self.nparams = dims[2] + self.nobs = 0 + self.isOptimized = False # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. - self.EnsembleMembers = range(self.nlag) + self.EnsembleMembers = range(self.nlag) for n in range(self.nlag): self.EnsembleMembers[n] = [] @@ -183,47 +178,47 @@ class StateVector(object): # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - mapfile = os.path.join(self.DaCycle.DaSystem['regionsfile']) - ncf = io.CT_Read(mapfile,'read') - self.gridmap = ncf.GetVariable('regions') - self.tcmap = ncf.GetVariable('transcom_regions') - dummy = ncf.close() + mapfile = os.path.join(self.DaCycle.DaSystem['regionsfile']) + ncf = io.CT_Read(mapfile, 'read') + self.gridmap = ncf.GetVariable('regions') + self.tcmap = ncf.GetVariable('transcom_regions') + ncf.close() - msg = "A TransCom map on 1x1 degree was read from file %s"%self.DaCycle.DaSystem['regionsfile'] ; logging.debug(msg) - msg = "A parameter map on 1x1 degree was read from file %s"%self.DaCycle.DaSystem['regionsfile'] ; logging.debug(msg) + logging.debug("A TransCom map on 1x1 degree was read from file %s" % self.DaCycle.DaSystem['regionsfile']) + logging.debug("A parameter map on 1x1 degree was read from file %s" % self.DaCycle.DaSystem['regionsfile']) # Create a dictionary for state <-> gridded map conversions - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1,nparams+1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0])>0: - self.griddict[r]=sel + nparams = self.gridmap.max() + self.griddict = {} + for r in range(1, nparams + 1): + sel = (self.gridmap.flat == r).nonzero() + if len(sel[0]) > 0: + self.griddict[r] = sel - msg = "A dictionary to map grids to states and vice versa was created" ; logging.debug(msg) + logging.debug("A dictionary to map grids to states and vice versa was created") # Create a matrix for state <-> TransCom conversions - self.tcmatrix = np.zeros((self.nparams,23,),'float') + self.tcmatrix = np.zeros((self.nparams, 23,), 'float') - for r in range(1,self.nparams+1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0])<1: + for r in range(1, self.nparams + 1): + sel = (self.gridmap.flat == r).nonzero() + if len(sel[0]) < 1: continue else: n_tc = set(self.tcmap.flatten().take(sel[0])) if len(n_tc) > 1: - msg = "Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r,n_tc) ;logging.error(msg) + msg = "Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc) ;logging.error(msg) raise ValueError - self.tcmatrix[r-1,n_tc.pop()-1]= 1.0 + self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0 msg = "A matrix to map states to TransCom regions and vice versa was created" ; logging.debug(msg) def __str__(self): return "This is a base class derived state vector object" - def MakeNewEnsemble(self,lag, covariancematrix = None): + def MakeNewEnsemble(self, lag, covariancematrix=None): """ :param lag: an integer indicating the time step in the lag order :param covariancematrix: a matrix to draw random values from @@ -240,49 +235,49 @@ class StateVector(object): """ if covariancematrix == None: - covariancematrix=np.identity(self.nparams) + covariancematrix = np.identity(self.nparams) # Make a cholesky decomposition of the covariance matrix - U,s,Vh = np.linalg.svd(covariancematrix) - dof = np.sum(s)**2/sum(s**2) - C = np.linalg.cholesky(covariancematrix) + U, s, Vh = np.linalg.svd(covariancematrix) + dof = np.sum(s) ** 2 / sum(s ** 2) + C = np.linalg.cholesky(covariancematrix) - msg = 'Cholesky decomposition has succeeded ' ; logging.debug(msg) - msg = 'Appr. degrees of freedom in covariance matrix is %s'%(int(dof)) ; logging.info(msg) + logging.debug('Cholesky decomposition has succeeded ') + logging.debug('Appr. degrees of freedom in covariance matrix is %s' % (int(dof))) # Create mean values - NewMean = np.ones(self.nparams,float) # standard value for a new time step is 1.0 + NewMean = np.ones(self.nparams, float) # standard value for a new time step is 1.0 # If this is not the start of the filter, average previous two optimized steps into the mix if lag == self.nlag and self.nlag >= 3: - NewMean += self.EnsembleMembers[lag-2][0].ParameterValues + \ - self.EnsembleMembers[lag-3][0].ParameterValues - NewMean = NewMean/3.0 + NewMean += self.EnsembleMembers[lag - 2][0].ParameterValues + \ + self.EnsembleMembers[lag - 3][0].ParameterValues + NewMean = NewMean / 3.0 # Create the first ensemble member with a deviation of 0.0 and add to list - NewMember = self.GetNewMember(0) - NewMember.ParameterValues = NewMean.flatten() # no deviations - dummy = self.EnsembleMembers[lag-1].append(NewMember) + NewMember = self.GetNewMember(0) + NewMember.ParameterValues = NewMean.flatten() # no deviations + self.EnsembleMembers[lag - 1].append(NewMember) # Create members 1:nmembers and add to EnsembleMembers list - for member in range(1,self.nmembers): - randstate = np.random.get_state() - rands = np.random.randn(self.nparams) + for member in range(1, self.nmembers): + randstate = np.random.get_state() + rands = np.random.randn(self.nparams) - NewMember = self.GetNewMember(member) - NewMember.ParameterValues = np.dot(C,rands) + NewMean - dummy = self.EnsembleMembers[lag-1].append(NewMember) + NewMember = self.GetNewMember(member) + NewMember.ParameterValues = np.dot(C, rands) + NewMean + self.EnsembleMembers[lag - 1].append(NewMember) - msg = '%d new ensemble members were added to the state vector # %d'%(self.nmembers,lag) ; logging.debug(msg) + logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, lag)) - def GetNewMember(self,memberno): + def GetNewMember(self, memberno): """ :param memberno: an integer indicating the ensemble member number :rtype: an empty ensemblemember object @@ -308,22 +303,19 @@ class StateVector(object): # Remove State Vector n=1 by simply "popping" it from the list and appending a new empty list at the front. This empty list will # hold the new ensemble for the new cycle - dummy = self.EnsembleMembers.pop(0) - - dummy = self.EnsembleMembers.append([]) + self.EnsembleMembers.pop(0) + self.EnsembleMembers.append([]) # And now create a new time step of mean + members for n=nlag - date = self.DaCycle['time.start']+timedelta(days = (self.nlag-0.5)* int(self.DaCycle['time.cycle'])) - cov = self.GetCovariance(date) - dummy = self.MakeNewEnsemble(self.nlag,cov) + date = self.DaCycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(self.DaCycle['time.cycle'])) + cov = self.GetCovariance(date) + self.MakeNewEnsemble(self.nlag, cov) - msg = 'The state vector has been propagated by one cycle ' ; logging.info(msg) + logging.info('The state vector has been propagated by one cycle ') - return None - - def WriteToFile(self,filename): + def WriteToFile(self, filename): """ :param filename: the full filename for the output NetCDF file :rtype: None @@ -340,49 +332,47 @@ class StateVector(object): """ #import da.tools.io4 as io #import da.tools.io as io - import numpy as np + if not self.isOptimized: - f = io.CT_CDF(filename,method='create') - msg = 'Creating new StateVector output file (%s)' % filename ; logging.debug(msg) - qual = 'prior' + ncfile = io.CT_CDF(filename, method='create') + logging.debug('Creating new StateVector output file (%s)' % filename) + qual = 'prior' else: - f = io.CT_CDF(filename,method='write') - msg = 'Opening existing StateVector output file (%s)' % filename ; logging.debug(msg) - qual = 'opt' + ncfile = io.CT_CDF(filename, method='write') + logging.debug('Opening existing StateVector output file (%s)' % filename) + qual = 'opt' - dimparams = f.AddParamsDim(self.nparams) - dimmembers = f.AddMembersDim(self.nmembers) - dimlag = f.AddLagDim(self.nlag,unlimited=True) + dimparams = ncfile.AddParamsDim(self.nparams) + dimmembers = ncfile.AddMembersDim(self.nmembers) + dimlag = ncfile.AddLagDim(self.nlag, unlimited=True) for n in range(self.nlag): - - members = self.EnsembleMembers[n] - MeanState = members[0].ParameterValues - - savedict = f.StandardVar(varname='meanstate_%s'%qual) - savedict['dims'] = dimlag+dimparams - savedict['values'] = MeanState - savedict['count'] = n - savedict['comment'] = 'this represents the mean of the ensemble' - dummy = f.AddData(savedict) - - members = self.EnsembleMembers[n] - devs = np.asarray([m.ParameterValues.flatten() for m in members]) - data = devs- np.asarray(MeanState) - - savedict = f.StandardVar(varname='ensemblestate_%s'%qual) - savedict['dims'] = dimlag+dimmembers+dimparams - savedict['values'] = data - savedict['count'] = n - savedict['comment'] = 'this represents deviations from the mean of the ensemble' - dummy = f.AddData(savedict) - - dummy = f.close() - - msg = 'Successfully wrote the State Vector to file (%s) ' % (filename,) ; logging.info(msg) - - def ReadFromFile(self,filename,qual='opt'): + members = self.EnsembleMembers[n] + MeanState = members[0].ParameterValues + + savedict = ncfile.StandardVar(varname='meanstate_%s' % qual) + savedict['dims'] = dimlag + dimparams + savedict['values'] = MeanState + savedict['count'] = n + savedict['comment'] = 'this represents the mean of the ensemble' + ncfile.AddData(savedict) + + members = self.EnsembleMembers[n] + devs = np.asarray([m.ParameterValues.flatten() for m in members]) + data = devs - np.asarray(MeanState) + + savedict = ncfile.StandardVar(varname='ensemblestate_%s' % qual) + savedict['dims'] = dimlag + dimmembers + dimparams + savedict['values'] = data + savedict['count'] = n + savedict['comment'] = 'this represents deviations from the mean of the ensemble' + ncfile.AddData(savedict) + + ncfile.close() + logging.info('Successfully wrote the State Vector to file (%s) ' % filename) + + def ReadFromFile(self, filename, qual='opt'): """ :param filename: the full filename for the input NetCDF file :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file @@ -401,26 +391,23 @@ class StateVector(object): """ - #import da.tools.io as io - import numpy as np - - f = io.CT_Read(filename,'read') - MeanState = f.GetVariable('statevectormean_'+qual) - EnsembleMembers = f.GetVariable('statevectorensemble_'+qual) - dummy = f.close() + f = io.CT_Read(filename, 'read') + MeanState = f.GetVariable('statevectormean_' + qual) + EnsembleMembers = f.GetVariable('statevectorensemble_' + qual) + f.close() for n in range(self.nlag): if not self.EnsembleMembers[n] == []: self.EnsembleMembers[n] = [] - msg = 'Existing ensemble for lag=%d was removed to make place for newly read data'%(n+1,) ; logging.warning(msg) + logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1,)) for m in range(self.nmembers): - NewMember = self.GetNewMember(m) - NewMember.ParameterValues = EnsembleMembers[n,m,:].flatten() + MeanState[n] # add the mean to the deviations to hold the full parameter values - dummy = self.EnsembleMembers[n].append(NewMember) + NewMember = self.GetNewMember(m) + NewMember.ParameterValues = EnsembleMembers[n, m, :].flatten() + MeanState[n] # add the mean to the deviations to hold the full parameter values + self.EnsembleMembers[n].append(NewMember) - msg = 'Successfully read the State Vector from file (%s) ' % (filename,) ; logging.info(msg) + logging.info('Successfully read the State Vector from file (%s) ' % filename) def WriteMembersToFile(self, lag): """ @@ -444,45 +431,42 @@ class StateVector(object): #import da.tools.io as io #import da.tools.io4 as io - outdir = self.DaCycle['dir.input'] - - members = self.EnsembleMembers[lag-1] + outdir = self.DaCycle['dir.input'] + members = self.EnsembleMembers[lag - 1] for mem in members: + filename = os.path.join(outdir, 'parameters.%03d.nc' % mem.membernumber) + ncf = io.CT_CDF(filename, method='create') + dimparams = ncf.AddParamsDim(self.nparams) + dimgrid = ncf.AddLatLonDim() + + data = mem.ParameterValues + + savedict = io.std_savedict.copy() + savedict['name'] = "parametervalues" + savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber + savedict['units'] = "unitless" + savedict['dims'] = dimparams + savedict['values'] = data + savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber + ncf.AddData(savedict) + + griddata = self.VectorToGrid(vectordata=data) + + savedict = io.std_savedict.copy() + savedict['name'] = "parametermap" + savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber + savedict['units'] = "unitless" + savedict['dims'] = dimgrid + savedict['values'] = griddata.tolist() + savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber + ncf.AddData(savedict) + + ncf.close() - filename = os.path.join(outdir,'parameters.%03d.nc'% mem.membernumber) - ncf = io.CT_CDF(filename,method='create') - dimparams = ncf.AddParamsDim(self.nparams) - dimgrid = ncf.AddLatLonDim() - - data = mem.ParameterValues - - savedict = io.std_savedict.copy() - savedict['name'] = "parametervalues" - savedict['long_name'] = "parameter_values_for_member_%d"%mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimparams - savedict['values'] = data - savedict['comment'] = 'These are parameter values to use for member %d'%mem.membernumber - dummy = ncf.AddData(savedict) - - griddata = self.VectorToGrid(vectordata=data) - - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap" - savedict['long_name'] = "parametermap_for_member_%d"%mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata.tolist() - savedict['comment'] = 'These are gridded parameter values to use for member %d'%mem.membernumber - dummy = ncf.AddData(savedict) - - dummy = ncf.close() - - msg = 'Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber,filename,) ; logging.debug(msg) + logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename)) - def GridToVector(self,griddata=None, method = 'avg'): + def GridToVector(self, griddata=None, method='avg'): """ Map gridded data onto a vector of length (nparams,) @@ -507,24 +491,24 @@ class StateVector(object): """ - methods = ['avg','sum','minval'] + methods = ['avg', 'sum', 'minval'] if method not in methods: - msg = "To put data from a map into the statevector, please specify the method to use (%s)" % methods ; logging.error(msg) + logging.error("To put data from a map into the statevector, please specify the method to use (%s)" % methods) raise ValueError - result=np.zeros((self.nparams,),float) - for k,v in self.griddict.iteritems(): + result = np.zeros(self.nparams, float) + for k, v in self.griddict.iteritems(): #print k,k-1,result.shape, v if method == "avg": - result[k-1]=griddata.take(v).mean() + result[k - 1] = griddata.take(v).mean() elif method == "sum" : - result[k-1]=griddata.take(v).sum() + result[k - 1] = griddata.take(v).sum() elif method == "minval" : - result[k-1]=griddata.take(v).min() + result[k - 1] = griddata.take(v).min() return result # Note that the result is returned, but not yet placed in the member.ParameterValues attrtibute! - def VectorToGrid(self,vectordata=None): + def VectorToGrid(self, vectordata=None): """ Map vector elements to a map or vice cersa @@ -541,14 +525,14 @@ class StateVector(object): """ - result=np.zeros(self.gridmap.shape,float) - for k,v in self.griddict.iteritems(): + result = np.zeros(self.gridmap.shape, float) + for k, v in self.griddict.iteritems(): #print k,v - result.put(v,vectordata[k-1]) + result.put(v, vectordata[k - 1]) return result - def VectorToTC(self,vectordata,cov=False): + def VectorToTC(self, vectordata, cov=False): """ project Vector onto TransCom regions @@ -556,14 +540,13 @@ class StateVector(object): :param cov: a Boolean to specify whether the input dataset is a vector (mean), or a matrix (covariance) :rtype: ndarray: an array of size (23,) (cov:F) or of size (23,23,) (cov:T) """ - M = self.tcmatrix if cov: - return np.dot(np.transpose(M), np.dot(vectordata,M) ) + return np.dot(np.transpose(M), np.dot(vectordata, M)) else: - return np.dot(vectordata.squeeze(),M) + return np.dot(vectordata.squeeze(), M) - def StateToGrid(self, fluxvector = None, lag=1): + def StateToGrid(self, fluxvector=None, lag=1): """ Transforms the StateVector information (mean + covariance) to a 1x1 degree grid. @@ -580,23 +563,22 @@ class StateVector(object): if fluxvector == None: fluxvector = np.ones(self.nparams) - ensemble = self.EnsembleMembers[lag-1] - - ensemblemean =ensemble[0].ParameterValues + ensemble = self.EnsembleMembers[lag - 1] + ensemblemean = ensemble[0].ParameterValues # First transform the mean - gridmean = self.VectorToGrid(vectordata = ensemblemean*fluxvector) + gridmean = self.VectorToGrid(vectordata=ensemblemean * fluxvector) # And now the covariance, first create covariance matrix (!), and then multiply - deviations = np.array([mem.ParameterValues*fluxvector-ensemblemean for mem in ensemble]) - variance = deviations.std(axis=0)**2 - gridvar = self.VectorToGrid(variance) + deviations = np.array([mem.ParameterValues * fluxvector - ensemblemean for mem in ensemble]) + variance = deviations.std(axis=0) ** 2 + gridvar = self.VectorToGrid(variance) - return (gridmean,gridvar,) + return (gridmean, gridvar) - def StateToTC(self, fluxvector = None, lag=1): + def StateToTC(self, fluxvector=None, lag=1): """ Transforms the StateVector information (mean + covariance) to the TransCom regions. @@ -606,21 +588,20 @@ class StateVector(object): """ - ensemble = self.EnsembleMembers[lag-1] - - ensemblemean =ensemble[0].ParameterValues + ensemble = self.EnsembleMembers[lag - 1] + ensemblemean = ensemble[0].ParameterValues # First transform the mean - mean = self.VectorToTC(vectordata = ensemble[0].ParameterValues*fluxvector) + mean = self.VectorToTC(vectordata=ensemble[0].ParameterValues * fluxvector) # And now the covariance, first create covariance matrix (!), and then multiply - deviations = np.array([mem.ParameterValues*fluxvector-ensemblemean for mem in ensemble]) - covariance = np.dot(np.transpose(deviations),deviations)/(self.nmembers-1) - cov = self.VectorToTC(covariance,cov=True) + deviations = np.array([mem.ParameterValues * fluxvector - ensemblemean for mem in ensemble]) + covariance = np.dot(np.transpose(deviations), deviations) / (self.nmembers - 1) + cov = self.VectorToTC(covariance, cov=True) - return (mean,cov,) + return (mean, cov) ################### End Class StateVector ################### @@ -637,40 +618,39 @@ if __name__ == "__main__": import da.tools.rc as rc opts = ['-v'] - args = {'rc':'../../da.rc','logfile':'da_initexit.log','jobrcfilename':'test.rc'} + args = {'rc':'../../da.rc', 'logfile':'da_initexit.log', 'jobrcfilename':'test.rc'} StartLogger() - DaCycle = CycleControl(opts,args) + DaCycle = CycleControl(opts, args) - dummy = EnsembleMember(0) - print dummy + print EnsembleMember(0) + DaCycle.Initialize() print DaCycle - dims = ( int(DaCycle['time.nlag']), + dims = (int(DaCycle['time.nlag']), int(DaCycle['da.optimizer.nmembers']), int(DaCycle.DaSystem['nparameters']), ) StateVector = StateVector(dims) - StateVector.MakeNewEnsemble(lag=1) - members = StateVector.EnsembleMembers[0] + members = StateVector.EnsembleMembers[0] members[0].WriteToFile(DaCycle['dir.input']) StateVector.Propagate() - savedir = DaCycle['dir.output'] - filename = os.path.join(savedir,'savestate.nc') + savedir = DaCycle['dir.output'] + filename = os.path.join(savedir, 'savestate.nc') StateVector.WriteToFile(filename) - savedir = DaCycle['dir.output'] - filename = os.path.join(savedir,'savestate.nc') + savedir = DaCycle['dir.output'] + filename = os.path.join(savedir, 'savestate.nc') StateVector.ReadFromFile(filename) diff --git a/da/ct/dasystem.py b/da/ct/dasystem.py index 947f30f..12e66a9 100755 --- a/da/ct/dasystem.py +++ b/da/ct/dasystem.py @@ -36,20 +36,18 @@ class CtDaSystem(DaSystem): 'regtype'] - for k,v in self.iteritems(): + for k, v in self.iteritems(): if v == 'True' : self[k] = True if v == 'False': self[k] = False for key in needed_rc_items: - if not self.has_key(key): - status,msg = ( False,'Missing a required value in rc-file : %s' % key) + msg = 'Missing a required value in rc-file : %s' % key logging.error(msg) - raise IOError,msg + raise IOError, msg - status,msg = ( True,'DA System Info settings have been validated succesfully' ) ; logging.debug(msg) + logging.debug('DA System Info settings have been validated succesfully') - return None ################### End Class CtDaSystem ################### diff --git a/da/ct/obs.py b/da/ct/obs.py index f9f758c..702984c 100755 --- a/da/ct/obs.py +++ b/da/ct/obs.py @@ -12,6 +12,7 @@ import os import sys import logging import datetime + sys.path.append(os.getcwd()) identifier = 'CarbonTracker CO2 mixing ratios' @@ -36,25 +37,25 @@ class CtObservations(Observation): def Initialize(self): self.startdate = self.DaCycle['time.sample.start'] - self.enddate = self.DaCycle['time.sample.end'] - DaSystem = self.DaCycle.DaSystem + self.enddate = self.DaCycle['time.sample.end'] + DaSystem = self.DaCycle.DaSystem - sfname = DaSystem['obs.input.fname'] + sfname = DaSystem['obs.input.fname'] if sfname.endswith('.nc'): - filename = os.path.join(DaSystem['obs.input.dir'],sfname) + filename = os.path.join(DaSystem['obs.input.dir'], sfname) else: - filename = os.path.join(DaSystem['obs.input.dir'],sfname+'.'+self.startdate.strftime('%Y%m%d')+'.nc') + filename = os.path.join(DaSystem['obs.input.dir'], sfname + '.' + self.startdate.strftime('%Y%m%d') + '.nc') if not os.path.exists(filename): - msg = 'Could not find the required observation input file (%s) ' % filename ; logging.error(msg) - raise IOError,msg + msg = 'Could not find the required observation input file (%s) ' % filename + logging.error(msg) + raise IOError, msg else: self.ObsFilename = filename self.Data = MixingRatioList([]) - return None def AddObs(self): """ Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file @@ -73,86 +74,81 @@ class CtObservations(Observation): from string import strip, join from numpy import array, logical_and - ncf = io.CT_Read(self.ObsFilename,'read') - idates = ncf.GetVariable('date_components') - dates = array([dtm.datetime(*d) for d in idates]) + ncf = io.CT_Read(self.ObsFilename, 'read') + idates = ncf.GetVariable('date_components') + dates = array([dtm.datetime(*d) for d in idates]) - subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] + subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] - dates = dates.take(subselect,axis=0) + dates = dates.take(subselect, axis=0) - ids = ncf.GetVariable('id').take(subselect,axis=0) - evn = ncf.GetVariable('eventnumber').take(subselect,axis=0) - evn = [s.tostring().lower() for s in evn] - evn = map(strip,evn) - sites = ncf.GetVariable('site').take(subselect,axis=0) - sites = [s.tostring().lower() for s in sites] - sites = map(strip,sites) - lats = ncf.GetVariable('lat').take(subselect,axis=0) - lons = ncf.GetVariable('lon').take(subselect,axis=0) - alts = ncf.GetVariable('alt').take(subselect,axis=0) - obs = ncf.GetVariable('obs').take(subselect,axis=0) - species = ncf.GetVariable('species').take(subselect,axis=0) - species = [s.tostring().lower() for s in species] - species = map(strip,species) - date = ncf.GetVariable('date').take(subselect,axis=0) - strategy = ncf.GetVariable('sampling_strategy').take(subselect,axis=0) - flags = ncf.GetVariable('NOAA_QC_flags').take(subselect,axis=0) - flags = [s.tostring().lower() for s in flags] - flags = map(strip,flags) - flags = [int(f == '...') for f in flags] - dummy = ncf.close() - - msg = "Successfully read data from obs file (%s)"%self.ObsFilename ; logging.debug(msg) - - - for n in range(len(dates)): + ids = ncf.GetVariable('id').take(subselect, axis=0) + evn = ncf.GetVariable('eventnumber').take(subselect, axis=0) + evn = [s.tostring().lower() for s in evn] + evn = map(strip, evn) + sites = ncf.GetVariable('site').take(subselect, axis=0) + sites = [s.tostring().lower() for s in sites] + sites = map(strip, sites) + lats = ncf.GetVariable('lat').take(subselect, axis=0) + lons = ncf.GetVariable('lon').take(subselect, axis=0) + alts = ncf.GetVariable('alt').take(subselect, axis=0) + obs = ncf.GetVariable('obs').take(subselect, axis=0) + species = ncf.GetVariable('species').take(subselect, axis=0) + species = [s.tostring().lower() for s in species] + species = map(strip, species) + date = ncf.GetVariable('date').take(subselect, axis=0) + strategy = ncf.GetVariable('sampling_strategy').take(subselect, axis=0) + flags = ncf.GetVariable('NOAA_QC_flags').take(subselect, axis=0) + flags = [s.tostring().lower() for s in flags] + flags = map(strip, flags) + flags = [int(f == '...') for f in flags] + ncf.close() + + logging.debug("Successfully read data from obs file (%s)" % self.ObsFilename) - self.Data.append( MixingRatioSample(ids[n],dates[n],sites[n],obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],evn[n],species[n],strategy[n],0.0) ) + for n in range(len(dates)): + self.Data.append(MixingRatioSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0)) - msg = "Added %d observations to the Data list" % len(dates) ; logging.debug(msg) + logging.debug("Added %d observations to the Data list" % len(dates)) - def AddSimulations(self,filename,silent=True): + def AddSimulations(self, filename, silent=True): """ Adds model simulated values to the mixing ratio objects """ import da.tools.io4 as io if not os.path.exists(filename): - msge = "Sample output filename for observations could not be found : %s" % filename ; logging.error(msge) - msg = "Did the sampling step succeed?" ; logging.error(msg) - msg = "...exiting" ; logging.error(msge) - raise IOError,msg + msg = "Sample output filename for observations could not be found : %s" % filename + logging.error(msg) + logging.error("Did the sampling step succeed?") + logging.error("...exiting") + raise IOError, msg - ncf = io.CT_Read(filename,method='read') - ids = ncf.GetVariable('id') - simulated = ncf.GetVariable('flask') - dummy = ncf.close() - msg = "Successfully read data from model sample file (%s)"%filename ; logging.info(msg) + ncf = io.CT_Read(filename, method='read') + ids = ncf.GetVariable('id') + simulated = ncf.GetVariable('flask') + ncf.close() + logging.info("Successfully read data from model sample file (%s)" % filename) - obs_ids = self.Data.getvalues('id') + obs_ids = self.Data.getvalues('id') - obs_ids = obs_ids.tolist() - ids = map(int,ids) + obs_ids = obs_ids.tolist() + ids = map(int, ids) missing_samples = [] - for id,val in zip(ids,simulated): - + for id, val in zip(ids, simulated): if id in obs_ids: - index = obs_ids.index(id) #print id,val,val.shape - dummy = self.Data[index].simulated = val*1e6 # to umol/mol - + self.Data[index].simulated = val * 1e6 # to umol/mol else: - missing_samples.append(id) if not silent and missing_samples != []: - msg = 'Model samples were found that did not match any ID in the observation list. Skipping them...' ; logging.warning(msg) + logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') #msg = '%s'%missing_samples ; logging.warning(msg) - msg = "Added %d simulated values to the Data list" % (len(ids)-len(missing_samples) ) ; logging.debug(msg) + logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) def WriteSampleInfo(self): """ @@ -164,150 +160,150 @@ class CtObservations(Observation): import da.tools.io4 as io from da.tools.general import ToDectime - obsinputfile = os.path.join(self.DaCycle['dir.input'],'observations_%s.nc'% self.DaCycle['time.sample.stamp']) + obsinputfile = os.path.join(self.DaCycle['dir.input'], 'observations_%s.nc' % self.DaCycle['time.sample.stamp']) - f = io.CT_CDF(obsinputfile,method='create') - msg = 'Creating new observations file for ObservationOperator (%s)' % obsinputfile ; logging.debug(msg) + f = io.CT_CDF(obsinputfile, method='create') + logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - dimid = f.AddDim('id',len(self.Data)) - dim30char = f.AddDim('string_of30chars',30) - dim24char = f.AddDim('string_of24chars',24) - dim10char = f.AddDim('string_of10chars',10) - dimcalcomp = f.AddDim('calendar_components',6) + dimid = f.AddDim('id', len(self.Data)) + dim30char = f.AddDim('string_of30chars', 30) + dim24char = f.AddDim('string_of24chars', 24) + dim10char = f.AddDim('string_of10chars', 10) + dimcalcomp = f.AddDim('calendar_components', 6) - data = self.Data.getvalues('id') + data = self.Data.getvalues('id') - savedict = io.std_savedict.copy() - savedict['name'] = "id" - savedict['dtype'] = "int" - savedict['long_name'] = "identification number" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "This is a unique identifier for each preprocessed observation used in CarbonTracker" - dummy = f.AddData(savedict) + savedict = io.std_savedict.copy() + savedict['name'] = "id" + savedict['dtype'] = "int" + savedict['long_name'] = "identification number" + savedict['units'] = "NA" + savedict['dims'] = dimid + savedict['values'] = data.tolist() + savedict['comment'] = "This is a unique identifier for each preprocessed observation used in CarbonTracker" + f.AddData(savedict) - data = [ToDectime(d) for d in self.Data.getvalues('xdate') ] + data = [ToDectime(d) for d in self.Data.getvalues('xdate') ] - savedict = io.std_savedict.copy() - savedict['name'] = "decimal_date" - savedict['units'] = "years" - savedict['dims'] = dimid - savedict['values'] = data + savedict = io.std_savedict.copy() + savedict['name'] = "decimal_date" + savedict['units'] = "years" + savedict['dims'] = dimid + savedict['values'] = data savedict['missing_value'] = -1.e34 - savedict['_FillValue'] = -1.e34 - dummy = f.AddData(savedict) + savedict['_FillValue'] = -1.e34 + f.AddData(savedict) - data = [[d.year,d.month,d.day,d.hour,d.minute,d.second] for d in self.Data.getvalues('xdate') ] + data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.Data.getvalues('xdate') ] - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date" - savedict['dims'] = dimid+dimcalcomp - savedict['values'] = data + savedict = io.std_savedict.copy() + savedict['dtype'] = "int" + savedict['name'] = "date_components" + savedict['units'] = "integer components of UTC date" + savedict['dims'] = dimid + dimcalcomp + savedict['values'] = data savedict['missing_value'] = -9 - savedict['_FillValue'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - dummy = f.AddData(savedict) - - data = self.Data.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "lat" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() + savedict['_FillValue'] = -9 + savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." + savedict['order'] = "year, month, day, hour, minute, second" + f.AddData(savedict) + + data = self.Data.getvalues('lat') + + savedict = io.std_savedict.copy() + savedict['name'] = "lat" + savedict['units'] = "degrees_north" + savedict['dims'] = dimid + savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 - savedict['_FillValue'] = -999.9 - dummy = f.AddData(savedict) + savedict['_FillValue'] = -999.9 + f.AddData(savedict) - data = self.Data.getvalues('lon') + data = self.Data.getvalues('lon') - savedict = io.std_savedict.copy() - savedict['name'] = "lon" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() + savedict = io.std_savedict.copy() + savedict['name'] = "lon" + savedict['units'] = "degrees_east" + savedict['dims'] = dimid + savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 - savedict['_FillValue'] = -999.9 - dummy = f.AddData(savedict) + savedict['_FillValue'] = -999.9 + f.AddData(savedict) - data = self.Data.getvalues('height') + data = self.Data.getvalues('height') - savedict = io.std_savedict.copy() - savedict['name'] = "alt" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() + savedict = io.std_savedict.copy() + savedict['name'] = "alt" + savedict['units'] = "meters_above_sea_level" + savedict['dims'] = dimid + savedict['values'] = data.tolist() savedict['missing_value'] = -999.9 - savedict['_FillValue'] = -999.9 - dummy = f.AddData(savedict) + savedict['_FillValue'] = -999.9 + f.AddData(savedict) - data = self.Data.getvalues('samplingstrategy') + data = self.Data.getvalues('samplingstrategy') - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() + savedict = io.std_savedict.copy() + savedict['dtype'] = "int" + savedict['name'] = "sampling_strategy" + savedict['units'] = "NA" + savedict['dims'] = dimid + savedict['values'] = data.tolist() savedict['missing_value'] = -9 - savedict['_FillValue'] = -9 - dummy = f.AddData(savedict) + savedict['_FillValue'] = -9 + f.AddData(savedict) try: data = self.Data.getvalues('evn') - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "eventnumber" - savedict['units'] = "NOAA database identifier" - savedict['dims'] = dimid+dim30char - savedict['values'] = data + savedict = io.std_savedict.copy() + savedict['dtype'] = "char" + savedict['name'] = "eventnumber" + savedict['units'] = "NOAA database identifier" + savedict['dims'] = dimid + dim30char + savedict['values'] = data savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) + savedict['_FillValue'] = '-' + f.AddData(savedict) data = self.Data.getvalues('species') - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "species" - savedict['units'] = "chemical_species_name" - savedict['dims'] = dimid+dim10char - savedict['values'] = data + savedict = io.std_savedict.copy() + savedict['dtype'] = "char" + savedict['name'] = "species" + savedict['units'] = "chemical_species_name" + savedict['dims'] = dimid + dim10char + savedict['values'] = data savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) + savedict['_FillValue'] = '-' + f.AddData(savedict) data = self.Data.getvalues('code') - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "site" - savedict['units'] = "site_identifier" - savedict['dims'] = dimid+dim24char - savedict['values'] = data + savedict = io.std_savedict.copy() + savedict['dtype'] = "char" + savedict['name'] = "site" + savedict['units'] = "site_identifier" + savedict['dims'] = dimid + dim24char + savedict['values'] = data savedict['missing_value'] = '-' - savedict['_FillValue'] = '-' - dummy = f.AddData(savedict) + savedict['_FillValue'] = '-' + f.AddData(savedict) except: - msg = "Character arrays 'species' and 'sites' were not written by the io module" ; logging.warning (msg) + logging.warning("Character arrays 'species' and 'sites' were not written by the io module") - dummy = f.close() + f.close() - msg = "Successfully wrote data to obs file" ;logging.debug(msg) - msg = "Sample input file for obs operator now in place [%s]"%obsinputfile ; logging.info(msg) + logging.debug("Successfully wrote data to obs file") + logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) return obsinputfile - def AddModelDataMismatch(self ): + def AddModelDataMismatch(self): """ Get the model-data mismatch values for this cycle. @@ -319,38 +315,39 @@ class CtObservations(Observation): """ import da.tools.rc as rc - filename = self.DaCycle.DaSystem['obs.sites.rc'] + filename = self.DaCycle.DaSystem['obs.sites.rc'] if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s) ' % filename ; logging.error(msg) - raise IOError,msg + msg = 'Could not find the required sites.rc input file (%s) ' % filename + logging.error(msg) + raise IOError, msg else: - self.SitesFile = filename + self.SitesFile = filename - SitesWeights = rc.read(self.SitesFile) + SitesWeights = rc.read(self.SitesFile) self.rejection_threshold = int(SitesWeights['obs.rejection.threshold']) - self.global_R_scaling = float(SitesWeights['global.R.scaling']) - self.n_site_categories = int(SitesWeights['n.site.categories']) - self.n_sites_active = int(SitesWeights['n.sites.active']) - self.n_sites_moved = int(SitesWeights['n.sites.moved']) - - msg = 'Model-data mismatch rejection threshold: %d ' % (self.rejection_threshold) ; logging.debug(msg) - msg = 'Model-data mismatch scaling factor : %f ' % (self.global_R_scaling) ; logging.debug(msg) - msg = 'Model-data mismatch site categories : %d ' % (self.n_site_categories) ; logging.debug(msg) - msg = 'Model-data mismatch active sites : %d ' % (self.n_sites_active) ; logging.debug(msg) - msg = 'Model-data mismatch moved sites : %d ' % (self.n_sites_moved) ; logging.debug(msg) + self.global_R_scaling = float(SitesWeights['global.R.scaling']) + self.n_site_categories = int(SitesWeights['n.site.categories']) + self.n_sites_active = int(SitesWeights['n.sites.active']) + self.n_sites_moved = int(SitesWeights['n.sites.moved']) + + logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) + logging.debug('Model-data mismatch scaling factor : %f ' % self.global_R_scaling) + logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) + logging.debug('Model-data mismatch active sites : %d ' % self.n_sites_active) + logging.debug('Model-data mismatch moved sites : %d ' % self.n_sites_moved) - cats = [k for k in SitesWeights.keys() if 'site.category' in k] + cats = [k for k in SitesWeights.keys() if 'site.category' in k] - SiteCategories={} + SiteCategories = {} for key in cats: - name,error,may_localize,may_reject = SitesWeights[key].split(';') - name = name.strip().lower() - error = float(error) - may_localize = bool(may_localize) - may_reject = bool(may_reject) - SiteCategories[name] = {'error':error,'may_localize':may_localize,'may_reject':may_reject} + name, error, may_localize, may_reject = SitesWeights[key].split(';') + name = name.strip().lower() + error = float(error) + may_localize = bool(may_localize) + may_reject = bool(may_reject) + SiteCategories[name] = {'error':error, 'may_localize':may_localize, 'may_reject':may_reject} #print name,SiteCategories[name] @@ -358,10 +355,10 @@ class CtObservations(Observation): SiteInfo = {} for key in active: - sitename, sitecategory = SitesWeights[key].split(';') - sitename = sitename.strip().lower() - sitecategory = sitecategory.strip().lower() - SiteInfo[sitename] = SiteCategories[sitecategory] + sitename, sitecategory = SitesWeights[key].split(';') + sitename = sitename.strip().lower() + sitecategory = sitecategory.strip().lower() + SiteInfo[sitename] = SiteCategories[sitecategory] #print sitename,SiteInfo[sitename] for obs in self.Data: @@ -369,13 +366,13 @@ class CtObservations(Observation): obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script if SiteInfo.has_key(obs.code): - msg = "Observation found (%s)" % obs.code ; logging.debug(msg) - obs.mdm = SiteInfo[obs.code]['error'] - obs.may_localize = SiteInfo[obs.code]['may_localize'] - obs.may_reject = SiteInfo[obs.code]['may_reject'] + logging.debug("Observation found (%s)" % obs.code) + obs.mdm = SiteInfo[obs.code]['error'] + obs.may_localize = SiteInfo[obs.code]['may_localize'] + obs.may_reject = SiteInfo[obs.code]['may_reject'] else: - msg= "Observation NOT found (%s), please check sites.rc file (%s) !!!" % (obs.code, self.SitesFile,) ; logging.warning(msg) - obs.flag = 99 + logging.warning("Observation NOT found (%s), please check sites.rc file (%s) !!!" % (obs.code, self.SitesFile,)) + obs.flag = 99 # Add SiteInfo dictionary to the Observation object for future use @@ -396,32 +393,32 @@ class MixingRatioSample(object): """ - def __init__(self,id,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',species='co2',samplingstrategy=1,sdev=0.0): + def __init__(self, id, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0): - self.code = code.strip() # Site code - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mixing ratio residuals - self.hphr = hphr # Mixing ratio prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer + self.code = code.strip() # Site code + self.xdate = xdate # Date of obs + self.obs = obs # Value observed + self.simulated = simulated # Value simulated by model + self.resid = resid # Mixing ratio residuals + self.hphr = hphr # Mixing ratio prior uncertainty from fluxes and (HPH) and model data mismatch (R) + self.mdm = mdm # Model data mismatch + self.may_localize = True # Whether sample may be localized in optimizer self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = id # ID number - self.evn = evn # Event number - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() + self.flag = flag # Flag + self.height = height # Sample height + self.lat = lat # Sample lat + self.lon = lon # Sample lon + self.id = id # ID number + self.evn = evn # Event number + self.sdev = sdev # standard deviation of ensemble + self.masl = True # Sample is in Meters Above Sea Level + self.mag = not self.masl # Sample is in Meters Above Ground + self.species = species.strip() self.samplingstrategy = samplingstrategy def __str__(self): - day=self.xdate.strftime('%Y-%m-%d %H:%M:%S') - return ' '.join(map(str,[self.code,day,self.obs,self.flag,self.id])) + day = self.xdate.strftime('%Y-%m-%d %H:%M:%S') + return ' '.join(map(str, [self.code, day, self.obs, self.flag, self.id])) ################### End Class MixingRatioSample ################### @@ -431,10 +428,10 @@ class MixingRatioList(list): """ This is a special type of list that holds MixingRatioSample objects. It has methods to extract data from such a list easily """ from numpy import array, ndarray - def getvalues(self,name,constructor=array): + def getvalues(self, name, constructor=array): from numpy import ndarray - result = constructor([getattr(o,name) for o in self]) - if isinstance(result,ndarray): + result = constructor([getattr(o, name) for o in self]) + if isinstance(result, ndarray): return result.squeeze() else: return result @@ -442,11 +439,11 @@ class MixingRatioList(list): return MixingRatioSample([o for o in self if o.flag == 0]) def flagged(self): return MixingRatioSample([o for o in self if o.flag != 0]) - def selectsite(self,site='mlo'): - l=[o for o in self if o.code == site] + def selectsite(self, site='mlo'): + l = [o for o in self if o.code == site] return MixingRatioSample(l) - def selecthours(self,hours=range(1,24)): - l=[o for o in self if o.xdate.hour in hours] + def selecthours(self, hours=range(1, 24)): + l = [o for o in self if o.xdate.hour in hours] return MixingRatioSample(l) ################### End Class MixingRatioList ################### @@ -457,7 +454,7 @@ if __name__ == "__main__": from da.tools.pipeline import JobStart from datetime import datetime import logging - import sys,os + import sys, os sys.path.append(os.getcwd()) @@ -465,9 +462,9 @@ if __name__ == "__main__": obs = CtObservations() - DaCycle = JobStart(['-v'],{'rc':'da.rc'}) - DaCycle['time.sample.start'] = datetime(2000,1,1) - DaCycle['time.sample.end'] = datetime(2000,1,2) + DaCycle = JobStart(['-v'], {'rc':'da.rc'}) + DaCycle['time.sample.start'] = datetime(2000, 1, 1) + DaCycle['time.sample.end'] = datetime(2000, 1, 2) obs.Initialize() obs.Validate() diff --git a/da/ct/optimizer.py b/da/ct/optimizer.py index cd4bb68..c5d7783 100755 --- a/da/ct/optimizer.py +++ b/da/ct/optimizer.py @@ -11,9 +11,11 @@ File created on 28 Jul 2010. import os import sys -sys.path.append(os.getcwd()) import logging import datetime +import numpy as np +sys.path.append(os.getcwd()) + from da.baseclasses.optimizer import Optimizer @@ -36,7 +38,7 @@ class CtOptimizer(Optimizer): return version - def SetLocalization(self,type='None'): + def SetLocalization(self, type='None'): """ determine which localization to use """ if type == 'CT2007': @@ -46,54 +48,47 @@ class CtOptimizer(Optimizer): self.localization = False self.localizetype = 'None' - msg = "Current localization option is set to %s"%self.localizetype ; logging.info(msg) + logging.info("Current localization option is set to %s" % self.localizetype) - def Localize(self,n): + def Localize(self, n): """ localize the Kalman Gain matrix """ - import numpy as np - if not self.localization: return if self.localizetype == 'CT2007': - tvalue=1.97591 - if np.sqrt(self.R[n,n]) >= 1.5: - for r in range(self.nlag*self.nparams): - corr=np.corrcoef(self.HX_prime[n,:],self.X_prime[r,:].squeeze())[0,1] - prob=corr/np.sqrt((1.0-corr**2)/(self.nmembers-2)) + tvalue = 1.97591 + if np.sqrt(self.R[n, n]) >= 1.5: + for r in range(self.nlag * self.nparams): + corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1] + prob = corr / np.sqrt((1.0 - corr ** 2) / (self.nmembers - 2)) if abs(prob) < tvalue: - self.KG[r,n]=0.0 + self.KG[r, n] = 0.0 ################### End Class CtOptimizer ################### - - - if __name__ == "__main__": - import os - import sys from da.tools.initexit import StartLogger from da.tools.pipeline import JobStart sys.path.append(os.getcwd()) opts = ['-v'] - args = {'rc':'da.rc','logfile':'da_initexit.log','jobrcfilename':'test.rc'} + args = {'rc':'da.rc', 'logfile':'da_initexit.log', 'jobrcfilename':'test.rc'} StartLogger() - DaCycle = JobStart(opts,args) + DaCycle = JobStart(opts, args) DaCycle.Initialize() opt = CtOptimizer() - nobs = 100 - dims = ( int(DaCycle['time.nlag']), + nobs = 100 + dims = (int(DaCycle['time.nlag']), int(DaCycle['da.optimizer.nmembers']), int(DaCycle.DaSystem['nparameters']), - nobs, ) + nobs,) opt.Initialize(dims) opt.SetLocalization(type='CT2007') diff --git a/da/ct/statevector.py b/da/ct/statevector.py index 511b4a9..a1e6aa5 100755 --- a/da/ct/statevector.py +++ b/da/ct/statevector.py @@ -11,15 +11,17 @@ File created on 28 Jul 2010. import os import sys -sys.path.append(os.getcwd()) - import logging import datetime -from da.baseclasses.statevector import EnsembleMember, StateVector import numpy as np +import da.tools.io4 as io + +sys.path.append(os.getcwd()) +from da.baseclasses.statevector import EnsembleMember, StateVector + identifier = 'CarbonTracker Statevector ' -version = '0.0' +version = '0.0' ################### Begin Class CtStateVector ################### @@ -32,13 +34,13 @@ class CtStateVector(StateVector): def getversion(self): return version - def GetCovariance(self,date): + def GetCovariance(self, date): """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] """ - import da.tools.io4 as io + try: import matplotlib.pyplot as plt except: @@ -51,39 +53,37 @@ class CtStateVector(StateVector): # replace YYYY.MM in the ocean covariance file string - file_ocn_cov = file_ocn_cov.replace('2000.01',date.strftime('%Y.%m')) - - for file in [file_ocn_cov,file_bio_cov]: + file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m')) + for file in [file_ocn_cov, file_bio_cov]: if not os.path.exists(file): - - msg = "Cannot find the specified file %s" % file ; logging.error(msg) - raise IOError,msg + msg = "Cannot find the specified file %s" % file + logging.error(msg) + raise IOError, msg else: + logging.info("Using covariance file: %s" % file) - msg = "Using covariance file: %s" % file ; logging.info(msg) - - f_ocn = io.CT_Read(file_ocn_cov,'read') - f_bio = io.CT_Read(file_bio_cov,'read') + f_ocn = io.CT_Read(file_ocn_cov, 'read') + f_bio = io.CT_Read(file_bio_cov, 'read') cov_ocn = f_ocn.GetVariable('CORMAT') cov_bio = f_bio.GetVariable('qprior') - dummy = f_ocn.close() - dummy = f_bio.close() + f_ocn.close() + f_bio.close() - dummy = logging.debug("Succesfully closed files after retrieving prior covariance matrices") + logging.debug("Succesfully closed files after retrieving prior covariance matrices") # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it - fullcov = np.zeros((self.nparams,self.nparams),float) + fullcov = np.zeros((self.nparams, self.nparams), float) - nocn = cov_ocn.shape[0] - nbio = cov_bio.shape[0] + nocn = cov_ocn.shape[0] + nbio = cov_bio.shape[0] - fullcov[0:nbio,0:nbio] = cov_bio - fullcov[nbio:nbio+nocn,nbio:nbio+nocn] = 0.16 *np.dot(cov_ocn,cov_ocn) - fullcov[nocn+nbio,nocn+nbio] = 1.e-10 + fullcov[0:nbio, 0:nbio] = cov_bio + fullcov[nbio:nbio + nocn, nbio:nbio + nocn] = 0.16 * np.dot(cov_ocn, cov_ocn) + fullcov[nocn + nbio, nocn + nbio] = 1.e-10 try: @@ -91,7 +91,7 @@ class CtStateVector(StateVector): plt.colorbar() plt.savefig('fullcovariancematrix.png') plt.close('all') - dummy = logging.debug("Covariance matrix visualized for inspection") + logging.debug("Covariance matrix visualized for inspection") except: pass @@ -103,37 +103,30 @@ class CtStateVector(StateVector): if __name__ == "__main__": - import os - import sys + from da.tools.initexit import StartLogger from da.tools.pipeline import JobStart - import numpy as np + sys.path.append(os.getcwd()) opts = ['-v'] - args = {'rc':'da.rc','logfile':'da_initexit.log','jobrcfilename':'test.rc'} + args = {'rc':'da.rc', 'logfile':'da_initexit.log', 'jobrcfilename':'test.rc'} StartLogger() - DaCycle = JobStart(opts,args) - + DaCycle = JobStart(opts, args) DaCycle.Initialize() - StateVector = CtStateVector() - StateVector.Initialize() for n in range(dims[0]): - cov = StateVector.GetCovariance() - dummy = StateVector.MakeNewEnsemble(n+1,cov) + cov = StateVector.GetCovariance() + StateVector.MakeNewEnsemble(n + 1, cov) StateVector.Propagate() - - savedir = DaCycle['dir.output'] - filename = os.path.join(savedir,'savestate.nc') - - dummy = StateVector.WriteToFile() - + + filename = os.path.join(DaCycle['dir.output'], 'savestate.nc') + StateVector.WriteToFile() StateVector.ReadFromFile(filename) diff --git a/da/examples/das.py b/da/examples/das.py index 59fcdcf..6e61874 100755 --- a/da/examples/das.py +++ b/da/examples/das.py @@ -7,7 +7,8 @@ import sys import os import logging -dummy = sys.path.append(os.getcwd()) + +sys.path.append(os.getcwd()) ################################################################################################# # Next, import the tools needed to initialize a data assimilation cycle @@ -21,9 +22,9 @@ from da.tools.initexit import ParseOptions # Parse and validate the command line options, start logging ################################################################################################# -dummy = StartLogger() -opts, args = ParseOptions() -opts,args = ValidateOptsArgs(opts,args) +StartLogger() +opts, args = ParseOptions() +opts, args = ValidateOptsArgs(opts, args) ################################################################################################# # Create the Cycle Control object for this job @@ -31,7 +32,7 @@ opts,args = ValidateOptsArgs(opts,args) from da.tools.initexit import CycleControl -DaCycle = CycleControl(opts,args) +DaCycle = CycleControl(opts, args) ########################################################################################### ### IMPORT THE APPLICATION SPECIFIC MODULES HERE, TO BE PASSED INTO THE MAIN PIPELINE!!! ## @@ -45,39 +46,39 @@ from da.ct.obs import CtObservations from da.tm5.observationoperator import TM5ObservationOperator from da.ct.optimizer import CtOptimizer -PlatForm = MaunaloaPlatForm() -DaSystem = CtDaSystem(DaCycle['da.system.rc']) +PlatForm = MaunaloaPlatForm() +DaSystem = CtDaSystem(DaCycle['da.system.rc']) ObsOperator = TM5ObservationOperator(DaCycle['da.obsoperator.rc']) -Samples = CtObservations() +Samples = CtObservations() StateVector = CtStateVector() -Optimizer = CtOptimizer() +Optimizer = CtOptimizer() ########################################################################################## ################### ENTER THE PIPELINE WITH THE OBJECTS PASSED BY THE USER ############### ########################################################################################## -from da.tools.pipeline import header,footer +from da.tools.pipeline import header, footer -msg = header+"Entering Pipeline "+footer ; logging.info(msg) +logging.info(header + "Entering Pipeline " + footer) -EnsembleSmootherPipeline(DaCycle,PlatForm, DaSystem, Samples,StateVector,ObsOperator,Optimizer) +EnsembleSmootherPipeline(DaCycle, PlatForm, DaSystem, Samples, StateVector, ObsOperator, Optimizer) ########################################################################################## ################### All done, extra stuff can be added next, such as analysis ########################################################################################## -msg = header+"Starting analysis"+footer ; logging.info(msg) +logging.info(header + "Starting analysis" + footer) from da.analysis.expand_fluxes import SaveWeeklyAvg1x1Data from da.analysis.expand_fluxes import SaveWeeklyAvgStateData from da.analysis.expand_fluxes import SaveWeeklyAvgTCData -savedas = SaveWeeklyAvg1x1Data(DaCycle, StateVector) -savedas = SaveWeeklyAvgStateData(DaCycle, StateVector) -savedas = SaveWeeklyAvgTCData(DaCycle, StateVector) - +SaveWeeklyAvg1x1Data(DaCycle, StateVector) +SaveWeeklyAvgStateData(DaCycle, StateVector) +SaveWeeklyAvgTCData(DaCycle, StateVector) +#LU is that thing necessary? sys.exit(0) diff --git a/da/tm5/observationoperator.py b/da/tm5/observationoperator.py index 5784d19..42a8ef6 100755 --- a/da/tm5/observationoperator.py +++ b/da/tm5/observationoperator.py @@ -27,10 +27,10 @@ import shutil sys.path.append(os.getcwd()) -identifier = 'TM5' -version = 'release 3.0' -mpi_shell_filename = 'tm5_mpi_wrapper' -mpi_shell_location = 'da/bin/' +identifier = 'TM5' +version = 'release 3.0' +mpi_shell_filename = 'tm5_mpi_wrapper' +mpi_shell_location = 'da/bin/' ################### Begin Class TM5 ################### @@ -57,17 +57,17 @@ class TM5ObservationOperator(ObservationOperator): """ - def __init__(self,RcFileName, DaCycle=None): + def __init__(self, RcFileName, DaCycle=None): """ The instance of an TMObservationOperator is application dependent """ - self.Identifier = self.getid() # the identifier gives the model name - self.Version = self.getversion() # the model version used + self.Identifier = self.getid() # the identifier gives the model name + self.Version = self.getversion() # the model version used self.RestartFileList = [] self.OutputFileList = () - self.RcFileType = 'None' - self.outputdir = None # Needed for opening the samples.nc files created + self.RcFileType = 'None' + self.outputdir = None # Needed for opening the samples.nc files created - dummy = self.LoadRc(RcFileName) # load the specified rc-file - dummy = self.ValidateRc() # validate the contents + self.LoadRc(RcFileName) # load the specified rc-file + self.ValidateRc() # validate the contents # The following code allows the object to be initialized with a DaCycle object already present. Otherwise, it can # be added at a later moment. @@ -77,7 +77,7 @@ class TM5ObservationOperator(ObservationOperator): else: self.DaCycle = {} - msg = 'Observation Operator initialized: %s (%s)'%(self.Identifier, self.Version,) ; logging.info(msg) + logging.info('Observation Operator initialized: %s (%s)' % (self.Identifier, self.Version)) def getid(self): return identifier @@ -85,7 +85,7 @@ class TM5ObservationOperator(ObservationOperator): def getversion(self): return version - def Initialize(self ): + def Initialize(self): """ Prepare a forward model TM5 run, this consists of: @@ -100,15 +100,15 @@ class TM5ObservationOperator(ObservationOperator): # First reload the original tm5.rc file to get unmodified settings - dummy = self.LoadRc(self.RcFileName) # load the specified rc-file - dummy = self.ValidateRc() # validate the contents + self.LoadRc(self.RcFileName) # load the specified rc-file + self.ValidateRc() # validate the contents # Create a link from TM5 to the rundirectory of the das system - sourcedir = self.tm_settings[self.rundirkey] - targetdir = os.path.join(self.DaCycle['dir.exec'],'tm5') - dummy = CreateLinks(sourcedir,targetdir) - self.outputdir = self.tm_settings[self.outputdirkey] + sourcedir = self.tm_settings[self.rundirkey] + targetdir = os.path.join(self.DaCycle['dir.exec'], 'tm5') + CreateLinks(sourcedir, targetdir) + self.outputdir = self.tm_settings[self.outputdirkey] self.DaCycle['dir.exec.tm5'] = targetdir @@ -120,7 +120,7 @@ class TM5ObservationOperator(ObservationOperator): 'jobstep.timerange.start' : self.DaCycle['time.sample.start'] , 'jobstep.timerange.end' : self.DaCycle['time.sample.end'] , 'ct.params.input.dir' : self.DaCycle['dir.input'] , - 'ct.params.input.file' : os.path.join(self.DaCycle['dir.input'],'parameters') , + 'ct.params.input.file' : os.path.join(self.DaCycle['dir.input'], 'parameters') , 'output.flask.infile' : self.DaCycle['ObsOperator.inputfile'] } @@ -132,32 +132,27 @@ class TM5ObservationOperator(ObservationOperator): # If neither one is true, simply take the istart value from the tm5.rc file that was read self.ModifyRC(NewItems) - self.WriteRc() - #self.WriteRunRc() No longer needed with pycasso ! - - return 0 - - def LoadRc(self,RcFileName): + def LoadRc(self, RcFileName): """ This method loads a TM5 rc-file with settings for this simulation """ import da.tools.rcn as rc - self.tm_settings = rc.read(RcFileName) - self.RcFileName = RcFileName - self.Tm5RcLoaded = True + self.tm_settings = rc.read(RcFileName) + self.RcFileName = RcFileName + self.Tm5RcLoaded = True if 'my.source.dirs' in self.tm_settings.keys(): self.RcFileType = 'pycasso' else: self.RcfIleType = 'pre-pycasso' - msg = 'TM5 rc-file loaded successfully' ; logging.debug(msg) + logging.debug('TM5 rc-file loaded successfully') + - return True def ValidateRc(self): """ @@ -168,27 +163,27 @@ class TM5ObservationOperator(ObservationOperator): if self.RcFileType == 'pycasso': - self.projectkey = 'my.project.dir' - self.rundirkey = 'my.run.dir' - self.outputdirkey = 'output.dir' - self.savedirkey = 'restart.write.dir' - self.timestartkey = 'timerange.start' - self.timefinalkey = 'timerange.end' - self.timelengthkey = 'jobstep.length' - self.istartkey = 'istart' - self.restartvalue = 33 + self.projectkey = 'my.project.dir' + self.rundirkey = 'my.run.dir' + self.outputdirkey = 'output.dir' + self.savedirkey = 'restart.write.dir' + self.timestartkey = 'timerange.start' + self.timefinalkey = 'timerange.end' + self.timelengthkey = 'jobstep.length' + self.istartkey = 'istart' + self.restartvalue = 33 else: - self.projectkey = 'runid' - self.rundirkey = 'rundir' - self.outputdirkey = 'outputdir' - self.savedirkey = 'savedir' - self.timestartkey = 'time.start' - self.timefinalkey = 'time.final' - self.timelengthkey = 'time.break.nday' - self.istartkey = 'istart' - self.restartvalue = 3 + self.projectkey = 'runid' + self.rundirkey = 'rundir' + self.outputdirkey = 'outputdir' + self.savedirkey = 'savedir' + self.timestartkey = 'time.start' + self.timefinalkey = 'time.final' + self.timelengthkey = 'time.break.nday' + self.istartkey = 'istart' + self.restartvalue = 3 needed_rc_items = [ self.projectkey , @@ -201,32 +196,31 @@ class TM5ObservationOperator(ObservationOperator): self.istartkey ] - for k,v in self.tm_settings.iteritems(): + for k, v in self.tm_settings.iteritems(): if v == 'True' : self.tm_settings[k] = True if v == 'False': self.tm_settings[k] = False if 'date' in k : self.tm_settings[k] = ToDatetime(v) if 'time.start' in k : - self.tm_settings[k] = ToDatetime(v,fmt='TM5') + self.tm_settings[k] = ToDatetime(v, fmt='TM5') if 'time.final' in k : - self.tm_settings[k] = ToDatetime(v,fmt='TM5') + self.tm_settings[k] = ToDatetime(v, fmt='TM5') if 'timerange.start' in k : - self.tm_settings[k] = ToDatetime(v) + self.tm_settings[k] = ToDatetime(v) if 'timerange.end' in k : - self.tm_settings[k] = ToDatetime(v) + self.tm_settings[k] = ToDatetime(v) for key in needed_rc_items: - if not self.tm_settings.has_key(key): - status,msg = ( False,'Missing a required value in rc-file : %s' % key) + status, msg = (False, 'Missing a required value in rc-file : %s' % key) logging.error(msg) - raise IOError,msg - - status,msg = ( True,'rc-file has been validated succesfully' ) ; logging.debug(msg) + raise IOError, msg + status = True + logging.debug('rc-file has been validated succesfully') - def ModifyRC(self,NewValues): + def ModifyRC(self, NewValues): """ Modify parts of the tm5 settings, for instance to give control of file locations to the DA shell instead of to the tm5.rc script. @@ -235,19 +229,19 @@ class TM5ObservationOperator(ObservationOperator): """ - for k,v in NewValues.iteritems(): + for k, v in NewValues.iteritems(): if self.tm_settings.has_key(k): # keep previous value - v_orig= self.tm_settings[k] + v_orig = self.tm_settings[k] #replace with new self.tm_settings[k] = v #replace all instances of old with new, but only if it concerns a name of a path!!! if os.path.exists(str(v)): - for k_old,v_old in self.tm_settings.iteritems(): - if not isinstance(v_old,str): + for k_old, v_old in self.tm_settings.iteritems(): + if not isinstance(v_old, str): continue if str(v_orig) in str(v_old): - v_new = str(v_old).replace(str(v_orig),str(v)) + v_new = str(v_old).replace(str(v_orig), str(v)) self.tm_settings[k_old] = v_new logging.debug('Replaced tm5 rc-item %s ' % k) @@ -262,89 +256,81 @@ class TM5ObservationOperator(ObservationOperator): Write the rc-file settings to a tm5.rc file in the rundir """ import da.tools.rc as rc - - tm5rcfilename = os.path.join(self.tm_settings[self.rundirkey],'tm5.rc') - - dummy = rc.write(tm5rcfilename,self.tm_settings) - - msg = "Modified tm5.rc written (%s)" % tm5rcfilename ;logging.debug(msg) - + + tm5rcfilename = os.path.join(self.tm_settings[self.rundirkey], 'tm5.rc') + rc.write(tm5rcfilename, self.tm_settings) + logging.debug("Modified tm5.rc written (%s)" % tm5rcfilename) + def WriteRunRc(self): """ Create the tm5-runtime.rc file which is read by initexit.F90 to control time loop and restart from save files """ import da.tools.rc as rc - tm5rcfilename = os.path.join(self.tm_settings[self.rundirkey],'tm5_runtime.rc') - - dummy = rc.write(tm5rcfilename,self.tm_settings) - - rc_runtm5={} + tm5rcfilename = os.path.join(self.tm_settings[self.rundirkey], 'tm5_runtime.rc') + rc.write(tm5rcfilename, self.tm_settings) - rc_runtm5['year1'] = self.tm_settings[self.timestartkey].year + rc_runtm5 = {} + rc_runtm5['year1'] = self.tm_settings[self.timestartkey].year rc_runtm5['month1'] = self.tm_settings[self.timestartkey].month - rc_runtm5['day1'] = self.tm_settings[self.timestartkey].day - rc_runtm5['hour1'] = self.tm_settings[self.timestartkey].hour - rc_runtm5['minu1'] = 0 - rc_runtm5['sec1'] = 0 + rc_runtm5['day1'] = self.tm_settings[self.timestartkey].day + rc_runtm5['hour1'] = self.tm_settings[self.timestartkey].hour + rc_runtm5['minu1'] = 0 + rc_runtm5['sec1'] = 0 - rc_runtm5['year2'] = self.tm_settings[self.timefinalkey].year + rc_runtm5['year2'] = self.tm_settings[self.timefinalkey].year rc_runtm5['month2'] = self.tm_settings[self.timefinalkey].month - rc_runtm5['day2'] = self.tm_settings[self.timefinalkey].day - rc_runtm5['hour2'] = self.tm_settings[self.timefinalkey].hour - rc_runtm5['minu2'] = 0 - rc_runtm5['sec2'] = 0 + rc_runtm5['day2'] = self.tm_settings[self.timefinalkey].day + rc_runtm5['hour2'] = self.tm_settings[self.timefinalkey].hour + rc_runtm5['minu2'] = 0 + rc_runtm5['sec2'] = 0 - rc_runtm5[self.istartkey] = self.tm_settings[self.istartkey] - rc_runtm5[self.savedirkey] = self.tm_settings[self.savedirkey] + rc_runtm5[self.istartkey] = self.tm_settings[self.istartkey] + rc_runtm5[self.savedirkey] = self.tm_settings[self.savedirkey] rc_runtm5[self.outputdirkey] = self.tm_settings[self.outputdirkey] - rc.write(tm5rcfilename,rc_runtm5) - - msg = "Modified tm5_runtime.rc written (%s)" % tm5rcfilename ;logging.debug(msg) + rc.write(tm5rcfilename, rc_runtm5) + logging.debug("Modified tm5_runtime.rc written (%s)" % tm5rcfilename) def ValidateInput(self): """ Make sure that parameter files are written to the TM5 inputdir, and that observation lists are present """ - datadir = self.tm_settings['ct.params.input.dir'] + datadir = self.tm_settings['ct.params.input.dir'] if not os.path.exists(datadir): - msg = "The specified input directory for the TM5 model to read from does not exist (%s), exiting..."%datadir ; logging.error(msg) - raise IOError,msg + msg = "The specified input directory for the TM5 model to read from does not exist (%s), exiting..." % datadir ; logging.error(msg) + raise IOError, msg datafiles = os.listdir(datadir) - - obsfile = self.DaCycle['ObsOperator.inputfile'] + obsfile = self.DaCycle['ObsOperator.inputfile'] if not os.path.exists(obsfile): - msg = "The specified obs input file for the TM5 model to read from does not exist (%s), exiting..."%obsfile ; logging.error(msg) - raise IOError,msg + msg = "The specified obs input file for the TM5 model to read from does not exist (%s), exiting..." % obsfile ; logging.error(msg) + raise IOError, msg for n in range(int(self.DaCycle['da.optimizer.nmembers'])): - paramfile = 'parameters.%03d.nc'%n + paramfile = 'parameters.%03d.nc' % n if paramfile not in datafiles: - msg = "The specified parameter input file for the TM5 model to read from does not exist (%s), exiting..."%paramfile ; logging.error(msg) - raise IOError,msg + msg = "The specified parameter input file for the TM5 model to read from does not exist (%s), exiting..." % paramfile ; logging.error(msg) + raise IOError, msg # Next, make sure there is an actual model version compiled and ready to execute - targetdir = os.path.join(self.tm_settings[self.rundirkey]) + targetdir = os.path.join(self.tm_settings[self.rundirkey]) if self.RcFileType == 'pycasso': - self.Tm5Executable = os.path.join(targetdir,self.tm_settings['my.basename']+'.x') + self.Tm5Executable = os.path.join(targetdir, self.tm_settings['my.basename'] + '.x') else: - self.Tm5Executable = os.path.join(targetdir,'tm5.x') + self.Tm5Executable = os.path.join(targetdir, 'tm5.x') if not os.path.exists(self.Tm5Executable): - msg = "Required TM5 executable was not found %s"%self.Tm5Executable ; logging.error(msg) - msg = "Please compile the model with the specified rc-file and the regular TM5 scripts first" ; logging.error(msg) + logging.error("Required TM5 executable was not found %s" % self.Tm5Executable) + logging.error("Please compile the model with the specified rc-file and the regular TM5 scripts first") raise IOError - return 0 - def GetInitialData(self): """ This method places all initial data needed by an ObservationOperator in the proper folder for the model. For TM5, this means copying the save_*.hdf* files to the dir.save directory from which TM5 will read initial @@ -357,36 +343,31 @@ class TM5ObservationOperator(ObservationOperator): """ - msg = "Moving TM5 model restart data from the restart/current directory to the TM5 save dir" ; logging.debug(msg) + logging.debug("Moving TM5 model restart data from the restart/current directory to the TM5 save dir") # First get the restart data for TM5 from the current restart dir of the filter - sourcedir = self.DaCycle['dir.restart.current'] - targetdir = self.tm_settings[self.savedirkey] + sourcedir = self.DaCycle['dir.restart.current'] + targetdir = self.tm_settings[self.savedirkey] for file in os.listdir(sourcedir): - - file = os.path.join(sourcedir,file) - + #LU a lil mess + file = os.path.join(sourcedir, file) if os.path.isdir(file): # skip dirs - - msg = " [skip] .... %s " % file ; logging.debug(msg) + logging.debug(" [skip] .... %s " % file) continue #if not file.startswith('save_'): if not file.startswith('TM5_restart'): - msg = " [skip] .... %s " % file ; logging.debug(msg) + logging.debug(" [skip] .... %s " % file) continue # all okay, copy file - msg = " [copy] .... %s " % file ; logging.debug(msg) - dummy = shutil.copy(file,file.replace(sourcedir,targetdir) ) - - msg = "All restart data have been copied from the restart/current directory to the TM5 save dir" ; logging.debug(msg) + logging.debug(" [copy] .... %s " % file) + shutil.copy(file, file.replace(sourcedir, targetdir)) - - return None + logging.debug("All restart data have been copied from the restart/current directory to the TM5 save dir") def Run(self): @@ -419,7 +400,7 @@ class TM5ObservationOperator(ObservationOperator): code = self.TM5_With_N_tracers() if code == 0: - logging.info('Finished model executable succesfully (%s)'%code) + logging.info('Finished model executable succesfully (%s)' % code) self.Status = 'Success' else: logging.error('Error in model executable return code: %s ' % code) @@ -432,56 +413,55 @@ class TM5ObservationOperator(ObservationOperator): return code - def TM5_under_mpirun(self ): + def TM5_under_mpirun(self): """ Method handles the case where a shell runs an MPI process that forks into N TM5 model instances """ from string import join import datetime - DaPlatForm = self.DaCycle.DaPlatForm - - targetdir = os.path.join(self.tm_settings[self.rundirkey]) + DaPlatForm = self.DaCycle.DaPlatForm + targetdir = os.path.join(self.tm_settings[self.rundirkey]) - if not os.path.exists(os.path.join(mpi_shell_location,mpi_shell_filename) ): - msg = "Cannot find the mpi_shell wrapper needed for completion (%s) in (%s)"% (mpi_shell_filename,mpi_shell_location) ; logging.error(msg) - msg = "Please see the %s/readme_wrapper.txt file for instructions to compile it"%mpi_shell_location ; logging.error(msg) + if not os.path.exists(os.path.join(mpi_shell_location, mpi_shell_filename)): + logging.error("Cannot find the mpi_shell wrapper needed for completion (%s) in (%s)" % (mpi_shell_filename, mpi_shell_location)) + logging.error("Please see the %s/readme_wrapper.txt file for instructions to compile it" % mpi_shell_location) raise IOError - shutil.copy(os.path.join(mpi_shell_location,mpi_shell_filename) ,os.path.join(targetdir,mpi_shell_filename) ) + shutil.copy(os.path.join(mpi_shell_location, mpi_shell_filename) , os.path.join(targetdir, mpi_shell_filename)) # Go to executable directory and start the subprocess, using a new logfile - dummy = os.chdir(targetdir) - msg = 'Changing directory to %s ' % targetdir ;logging.debug(msg) + os.chdir(targetdir) + logging.debug('Changing directory to %s ' % targetdir) # Remove the tm5.ok file from a previous run, placed back only if a successful TM5 run is executed - okfile = 'tm5.ok' + okfile = 'tm5.ok' if os.path.exists(okfile): - dummy = os.remove(okfile) + os.remove(okfile) - nprocesses = self.DaCycle['da.optimizer.nmembers'] - jobparams = {'jobname':'tm5', - 'jobnodes':'ncomp %d'%int(nprocesses), + nprocesses = self.DaCycle['da.optimizer.nmembers'] + jobparams = {'jobname':'tm5', + 'jobnodes':'ncomp %d' % int(nprocesses), 'jobtime':'00:30:00', 'joblog':os.path.join(self.DaCycle['dir.jobs']) } # file ID and names - jobid = 'tm5' - targetdir = os.path.join(self.DaCycle['dir.exec']) - jobfile = os.path.join(targetdir,'jb.%s.jb'%jobid) - logfile = jobfile.replace('.jb','.log') + jobid = 'tm5' + targetdir = os.path.join(self.DaCycle['dir.exec']) + jobfile = os.path.join(targetdir, 'jb.%s.jb' % jobid) + logfile = jobfile.replace('.jb', '.log') - template = DaPlatForm.GetJobTemplate(jobparams,block=True) - template += 'cd %s\n'%targetdir - template += 'mpirun -np %d %s ./tm5.x\n'%(int(nprocesses),mpi_shell_filename,) + template = DaPlatForm.GetJobTemplate(jobparams, block=True) + template += 'cd %s\n' % targetdir + template += 'mpirun -np %d %s ./tm5.x\n' % (int(nprocesses), mpi_shell_filename,) - dummy = DaPlatForm.WriteJob(jobfile,template,jobid) + DaPlatForm.WriteJob(jobfile, template, jobid) - msg = 'Submitting job at %s'%datetime.datetime.now() ; logging.info(msg) - code = DaPlatForm.SubmitJob(jobfile,joblog=jobfile) - msg = 'Resuming job at %s'%datetime.datetime.now() ; logging.info(msg) - + logging.info('Submitting job at %s' % datetime.datetime.now()) + code = DaPlatForm.SubmitJob(jobfile, joblog=jobfile) + logging.info('Resuming job at %s' % datetime.datetime.now()) + #LU the value for code has changed and is changed further. senseless. if not os.path.exists(okfile): code = -1 else: @@ -495,43 +475,43 @@ class TM5ObservationOperator(ObservationOperator): from string import join import datetime - DaPlatForm = self.DaCycle.DaPlatForm + DaPlatForm = self.DaCycle.DaPlatForm - targetdir = os.path.join(self.tm_settings[self.rundirkey]) + targetdir = os.path.join(self.tm_settings[self.rundirkey]) # Go to executable directory and start the subprocess, using a new logfile - dummy = os.chdir(targetdir) - msg = 'Changing directory to %s ' % targetdir ;logging.debug(msg) + os.chdir(targetdir) + logging.debug('Changing directory to %s ' % targetdir) # Remove the tm5.ok file from a previous run, placed back only if a successful TM5 run is executed - okfile = 'tm5.ok' + okfile = 'tm5.ok' if os.path.exists(okfile): - dummy = os.remove(okfile) + os.remove(okfile) # file ID and names - jobid = 'tm5' - jobfile = os.path.join(targetdir,'jb.%s.jb'%jobid) - logfile = jobfile.replace('.jb','.log') + jobid = 'tm5' + jobfile = os.path.join(targetdir, 'jb.%s.jb' % jobid) + logfile = jobfile.replace('.jb', '.log') - nprocesses = int(self.DaCycle['da.optimizer.nmembers'])/5 # Note that we assign 5 tracers to each processor, this seems good for TM5 - jobparams = {'jobname':'tm5', - 'jobnodes':'ncomp %d'%int(nprocesses), + nprocesses = int(self.DaCycle['da.optimizer.nmembers']) / 5 # Note that we assign 5 tracers to each processor, this seems good for TM5 + jobparams = {'jobname':'tm5', + 'jobnodes':'ncomp %d' % int(nprocesses), 'jobtime':'00:30:00', 'joblog':os.path.join(self.DaCycle['dir.jobs']) } - template = DaPlatForm.GetJobTemplate(jobparams,block=True) - template += 'cd %s\n'%targetdir - template += '%s -np %d %s tm5.rc\n'%(self.tm_settings['mpirun.command'],int(nprocesses),self.Tm5Executable,) + template = DaPlatForm.GetJobTemplate(jobparams, block=True) + template += 'cd %s\n' % targetdir + template += '%s -np %d %s tm5.rc\n' % (self.tm_settings['mpirun.command'], int(nprocesses), self.Tm5Executable,) - dummy = DaPlatForm.WriteJob(jobfile,template,jobid) + DaPlatForm.WriteJob(jobfile, template, jobid) - msg = 'Submitting job at %s'%datetime.datetime.now() ; logging.info(msg) - code = DaPlatForm.SubmitJob(jobfile,block=True) - msg = 'Resuming job at %s'%datetime.datetime.now() ; logging.info(msg) - + logging.info('Submitting job at %s' % datetime.datetime.now()) + code = DaPlatForm.SubmitJob(jobfile, block=True) + logging.info('Resuming job at %s' % datetime.datetime.now()) + #LU same here! if not os.path.exists(okfile): code = -1 else: @@ -548,12 +528,12 @@ class TM5ObservationOperator(ObservationOperator): Note 2: also adding the weekly mean flux output to the OutputFileList for later collection """ - sourcedir = os.path.join(self.tm_settings[self.savedirkey]) - filter = ['%s'%self.tm_settings[self.timefinalkey].strftime('%Y%m%d')] + sourcedir = os.path.join(self.tm_settings[self.savedirkey]) + filter = ['%s' % self.tm_settings[self.timefinalkey].strftime('%Y%m%d')] - msg = "Creating a new list of TM5 restart data" ; logging.debug(msg) - msg = " from directory: %s " % sourcedir ; logging.debug(msg) - msg = " with filter: %s " % filter ; logging.debug(msg) + logging.debug("Creating a new list of TM5 restart data") + logging.debug(" from directory: %s " % sourcedir) + logging.debug(" with filter: %s " % filter) # Start from empty lists for each TM5 run. Note that these "private" lists from the obs operator are later on appended to the system @@ -562,13 +542,11 @@ class TM5ObservationOperator(ObservationOperator): self.RestartFileList = [] for file in os.listdir(sourcedir): - - file = os.path.join(sourcedir,file) - + file = os.path.join(sourcedir, file) if os.path.isdir(file): # skip dirs skip = True elif filter == []: # copy all - skip= False + skip = False else: # check filter skip = True # default skip for f in filter: @@ -577,34 +555,35 @@ class TM5ObservationOperator(ObservationOperator): break if skip: - msg = " [skip] .... %s " % file ; logging.debug(msg) + logging.debug(" [skip] .... %s " % file) continue - dummy = self.RestartFileList.append( file ) - msg = " [added to restart list] .... %s " % file ; logging.debug(msg) + self.RestartFileList.append(file) + logging.debug(" [added to restart list] .... %s " % file) + + sourcedir = os.path.join(self.tm_settings[self.outputdirkey]) + sd_ed = self.DaCycle['time.sample.stamp'] + filter = ['flask_%s' % sd_ed, 'flux1x1_%s' % sd_ed] - sourcedir = os.path.join(self.tm_settings[self.outputdirkey]) - sd_ed = self.DaCycle['time.sample.stamp'] - filter = ['flask_%s'%sd_ed,'flux1x1_%s'%sd_ed] + logging.debug("Creating a new list of TM5 output data to collect") + logging.debug(" from directory: %s " % sourcedir) + logging.debug(" with filter: %s " % filter) - msg = "Creating a new list of TM5 output data to collect" ; logging.debug(msg) - msg = " from directory: %s " % sourcedir ; logging.debug(msg) - msg = " with filter: %s " % filter ; logging.debug(msg) # Start from empty lists for each TM5 run. Note that these "private" lists from the obs operator are later on appended to the system # lists - self.OutputFileList = [] + self.OutputFileList = [] for file in os.listdir(sourcedir): - file = os.path.join(sourcedir,file) + file = os.path.join(sourcedir, file) if os.path.isdir(file): # skip dirs skip = True elif filter == []: # copy all - skip= False + skip = False else: # check filter skip = True # default skip for f in filter: @@ -613,11 +592,11 @@ class TM5ObservationOperator(ObservationOperator): break if skip: - msg = " [skip] .... %s " % file ; logging.debug(msg) + logging.debug(" [skip] .... %s " % file) continue - dummy = self.OutputFileList.append( file ) - msg = " [added to output list] .... %s " % file ; logging.debug(msg) + self.OutputFileList.append(file) + logging.debug(" [added to output list] .... %s " % file) ################### End Class TM5 ################### @@ -628,19 +607,18 @@ if __name__ == "__main__": from da.tools.initexit import StartLogger from da.tools.pipeline import JobStart import datetime as dtm - import sys,os sys.path.append(os.getcwd()) StartLogger() - DaCycle = JobStart([],{'rc':'da.rc'}) + DaCycle = JobStart([], {'rc':'da.rc'}) DaCycle.Initialize() - DaCycle['time.sample.start'] = dtm.datetime(2000,1,1) - DaCycle['time.sample.end'] = dtm.datetime(2000,1,2) + DaCycle['time.sample.start'] = dtm.datetime(2000, 1, 1) + DaCycle['time.sample.end'] = dtm.datetime(2000, 1, 2) DaCycle['time.sample.window'] = 0 - tm=TM5ObservationOperator() + tm = TM5ObservationOperator() tm.Initialize() tm.Run() tm.SaveData() diff --git a/da/tools/initexit.py b/da/tools/initexit.py index 613f6ca..8bea911 100755 --- a/da/tools/initexit.py +++ b/da/tools/initexit.py @@ -55,7 +55,7 @@ Other functions in the module initexit that are related to the control of a DA c """ -needed_da_items=[ +needed_da_items = [ 'time.start', 'time.finish', 'time.nlag', @@ -83,7 +83,7 @@ class CycleControl(dict): This object controls the CTDAS system flow and functionality. """ - def __init__(self,opts=[],args={}): + def __init__(self, opts=[], args={}): """ The CycleControl object is instantiated with a set of options and arguments. The list of arguments must contain the name of an existing ``rc-file``. @@ -101,13 +101,13 @@ class CycleControl(dict): # Add some useful variables to the rc-file dictionary - self['jobrcfilename'] = self.RcFileName - self['dir.da_submit'] = os.getcwd() + self['jobrcfilename'] = self.RcFileName + self['dir.da_submit'] = os.getcwd() self['da.crash.recover'] = '-r' in opts - self['verbose'] = '-v' in opts - self.DaSystem = None # to be filled later - self.RestartFileList = [] # List of files needed for restart, to be extended later - self.OutputFileList = [] # List of files needed for output, to be extended later + self['verbose'] = '-v' in opts + self.DaSystem = None # to be filled later + self.RestartFileList = [] # List of files needed for restart, to be extended later + self.OutputFileList = [] # List of files needed for output, to be extended later def __str__(self): """ @@ -124,21 +124,20 @@ class CycleControl(dict): return "" - def LoadRc(self,RcFileName): + def LoadRc(self, RcFileName): """ This method loads a DA Cycle rc-file with settings for this simulation """ import da.tools.rc as rc rcdata = rc.read(RcFileName) - for k,v in rcdata.iteritems(): + for k, v in rcdata.iteritems(): self[k] = v - self.RcFileName = RcFileName - self.DaRcLoaded = True + self.RcFileName = RcFileName + self.DaRcLoaded = True - msg = 'DA Cycle rc-file (%s) loaded successfully'%self.RcFileName ; logging.info(msg) + logging.info('DA Cycle rc-file (%s) loaded successfully' % self.RcFileName) - return True def ValidateRC(self): @@ -148,10 +147,10 @@ class CycleControl(dict): """ from da.tools.general import ToDatetime - for k,v in self.iteritems(): - if v in ['True','true', 't','T', 'y', 'yes']: + for k, v in self.iteritems(): + if v in ['True', 'true', 't', 'T', 'y', 'yes']: self[k] = True - if v in ['False','false', 'f','F', 'n', 'no']: + if v in ['False', 'false', 'f', 'F', 'n', 'no']: self[k] = False if 'date' in k : self[k] = ToDatetime(v) if 'time.start' in k : @@ -162,19 +161,17 @@ class CycleControl(dict): self[k] = ToDatetime(v) for key in needed_da_items: - if not self.has_key(key): - status,msge = ( False,'Missing a required value in rc-file : %s' % key) + msge = 'Missing a required value in rc-file : %s' % key logging.error(msge) - msg = '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ' ; logging.error(msg) - msg = 'Please note the update on Dec 02 2011 where rc-file names for DaSystem and ' ; logging.error(msg) - msg = 'are from now on specified in the main rc-file (see da/rc/da.rc for example)' ; logging.error(msg) - msg = '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ' ; logging.error(msg) - raise IOError,msge + logging.error('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ') + logging.error('Please note the update on Dec 02 2011 where rc-file names for DaSystem and ') + logging.error('are from now on specified in the main rc-file (see da/rc/da.rc for example)') + logging.error('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ') + raise IOError, msge - status,msg = ( True,'DA Cycle settings have been validated succesfully' ) ; logging.debug(msg) + logging.debug('DA Cycle settings have been validated succesfully') - return None def ParseTimes(self): """ @@ -186,8 +183,7 @@ class CycleControl(dict): finaldate = self['time.finish'] if finaldate <= startdate: - msg = 'The start date (%s) is not greater than the end date (%s), please revise'%(startdate.strftime('%Y%m%d'),finaldate.strftime('%Y%m%d')) - logging.error(msg) + logging.error('The start date (%s) is not greater than the end date (%s), please revise' % (startdate.strftime('%Y%m%d'), finaldate.strftime('%Y%m%d'))) raise ValueError # cyclelength = self['time.cycle'] # get time step @@ -197,30 +193,29 @@ class CycleControl(dict): if cyclelength == 'infinite': enddate = finaldate else: - enddate = AdvanceTime(startdate,cyclelength) + enddate = AdvanceTime(startdate, cyclelength) - dt = enddate-startdate + dt = enddate - startdate # if enddate > finaldate: # do not run beyon finaldate enddate = finaldate - self['time.start'] = startdate - self['time.end'] = enddate - self['time.finish'] = finaldate - self['cyclelength'] = dt + self['time.start'] = startdate + self['time.end'] = enddate + self['time.finish'] = finaldate + self['cyclelength'] = dt - msg = "===============================================================" ; logging.info(msg) - msg = "DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg) - msg = "DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg) - msg = "DA Cycle final date is %s" % finaldate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg) - msg = "DA Cycle cycle length is %s" % cyclelength ; logging.info(msg) - msg = "DA Cycle restart is %s" % str(self['time.restart']) ; logging.info(msg) - msg = "===============================================================" ; logging.info(msg) + logging.info("===============================================================") + logging.info("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) + logging.info("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) + logging.info( "DA Cycle final date is %s" % finaldate.strftime('%Y-%m-%d %H:%M')) + logging.info("DA Cycle cycle length is %s" % cyclelength) + logging.info("DA Cycle restart is %s" % str(self['time.restart'])) + logging.info("===============================================================") - return None - def SetSampleTimes(self,lag): + def SetSampleTimes(self, lag): """ Set the times over which a sampling interval will loop, depending on the lag. Note that lag falls in the interval [0,nlag-1] @@ -228,13 +223,13 @@ class CycleControl(dict): import copy # Start from cycle times - self['time.sample.start'] = copy.deepcopy(self['time.start']) - self['time.sample.end'] = copy.deepcopy(self['time.end']) + self['time.sample.start'] = copy.deepcopy(self['time.start']) + self['time.sample.end'] = copy.deepcopy(self['time.end']) # Now advance depending on lag for l in range(lag): - dummy = self.AdvanceSampleTimes() + self.AdvanceSampleTimes() return None @@ -244,40 +239,23 @@ class CycleControl(dict): """ from da.tools.general import AdvanceTime - startdate = self['time.sample.start'] - enddate = self['time.sample.end'] - cyclelength = self['cyclelength'] - - startdate = AdvanceTime(startdate,cyclelength.days) - enddate = AdvanceTime(enddate,cyclelength.days) - - self['time.sample.start'] = startdate - self['time.sample.end'] = enddate + self['time.sample.start'] = AdvanceTime(self['time.sample.start'], self['cyclelength'].days) + self['time.sample.end'] = AdvanceTime(self['time.sample.end'], self['cyclelength'].days) - return None def AdvanceCycleTimes(self): """ Advance cycle start and end time by one cycle interval """ from da.tools.general import AdvanceTime + + self['time.start'] = start = AdvanceTime(self['time.start'], self['cyclelength'].days) + self['time.end'] = AdvanceTime(self['time.end'], self['cyclelength'].days) + self['dir.output'] = os.path.join(self['dir.da_run'], 'output', start.strftime('%Y%m%d')) - startdate = self['time.start'] - enddate = self['time.end'] - cyclelength = self['cyclelength'] - - startdate = AdvanceTime(startdate,cyclelength.days) - enddate = AdvanceTime(enddate,cyclelength.days) - - filtertime = startdate.strftime('%Y%m%d') - self['dir.output'] = os.path.join(self['dir.da_run'],'output',filtertime) - - self['time.start'] = startdate - self['time.end'] = enddate - return None - def RandomSeed(self,action='read'): + def RandomSeed(self, action='read'): """ Get the randomseed and save it, or read the random seed and set it. The seed is currently stored in a python :mod:`pickle` file, residing in the ``exec`` directory @@ -286,29 +264,28 @@ class CycleControl(dict): import cPickle import numpy as np - filename = os.path.join(self['dir.exec'],'randomseed.pickle') + filename = os.path.join(self['dir.exec'], 'randomseed.pickle') if action == 'write': - f = open(filename,'wb') - seed = np.random.get_state() - dummy = cPickle.dump (seed,f,-1) - dummy = f.close() + f = open(filename, 'wb') + seed = np.random.get_state() + cPickle.dump(seed, f, -1) + f.close() - msg = "Saved the random seed generator values to file" + msg = "Saved the random seed generator values to file" if action == 'read': - f = open(filename,'rb') - seed = cPickle.load(f) - dummy = np.random.set_state(seed) - dummy = f.close() + f = open(filename, 'rb') + seed = cPickle.load(f) + np.random.set_state(seed) + f.close() - msg = "Retrieved the random seed generator values from file" + msg = "Retrieved the random seed generator values from file" logging.info(msg) - dummy = self.RestartFileList.extend([filename]) - - msg = "Added the randomseed.pickle file to the RestartFileList" ; logging.debug(msg) + self.RestartFileList.append(filename) + logging.debug("Added the randomseed.pickle file to the RestartFileList") return None @@ -353,39 +330,27 @@ class CycleControl(dict): # case 1: A recover from a previous crash, this is signaled by flag "-r" # if self['da.crash.recover']: - - msg = "Recovering simulation from data in: %s" % self['dir.da_run'] ; logging.info(msg) - - dummy = self.SetupFileStructure() - - dummy = self.RecoverRun() - - dummy = self.RandomSeed('read') + logging.info("Recovering simulation from data in: %s" % self['dir.da_run']) + + self.SetupFileStructure() + self.RecoverRun() + self.RandomSeed('read') # # case 2: A continuation, this is signaled by rc-item time.restart = True # elif self['time.restart']: - - msg = "Restarting filter from previous step" ; logging.info(msg) - - dummy = self.SetupFileStructure() - - dummy = self.RandomSeed('read') + logging.info("Restarting filter from previous step") + + self.SetupFileStructure() + self.RandomSeed('read') # # case 3: A fresh start, this is signaled by rc-item time.restart = False # elif not self['time.restart']: - msg = "First time step in filter sequence" ; logging.info(msg) - - dummy = self.SetupFileStructure() - + logging.info("First time step in filter sequence") + self.SetupFileStructure() # expand jobrcfilename to include exec dir from now on. - - # First strip current leading path from filename - - strippedname = os.path.split(self['jobrcfilename'])[-1] - - self['jobrcfilename'] = os.path.join(self['dir.exec'],strippedname) + self['jobrcfilename'] = os.path.join(self['dir.exec'], os.path.split(self['jobrcfilename'])[-1]) self.ParseTimes() @@ -421,17 +386,17 @@ class CycleControl(dict): # Create the run directory for this DA job, including I/O structure - filtertime = self['time.start'].strftime('%Y%m%d') + filtertime = self['time.start'].strftime('%Y%m%d') - self['dir.exec'] = os.path.join(self['dir.da_run'],'exec') - self['dir.input'] = os.path.join(self['dir.da_run'],'input') - self['dir.output'] = os.path.join(self['dir.da_run'],'output',filtertime) - self['dir.diagnostics'] = os.path.join(self['dir.da_run'],'diagnostics') - self['dir.analysis'] = os.path.join(self['dir.da_run'],'analysis') - self['dir.jobs'] = os.path.join(self['dir.da_run'],'jobs') - self['dir.restart'] = os.path.join(self['dir.da_run'],'restart') - self['dir.restart.current'] = os.path.join(self['dir.restart'],'current') - self['dir.restart.oneago'] = os.path.join(self['dir.restart'],'one-ago') + self['dir.exec'] = os.path.join(self['dir.da_run'], 'exec') + self['dir.input'] = os.path.join(self['dir.da_run'], 'input') + self['dir.output'] = os.path.join(self['dir.da_run'], 'output', filtertime) + self['dir.diagnostics'] = os.path.join(self['dir.da_run'], 'diagnostics') + self['dir.analysis'] = os.path.join(self['dir.da_run'], 'analysis') + self['dir.jobs'] = os.path.join(self['dir.da_run'], 'jobs') + self['dir.restart'] = os.path.join(self['dir.da_run'], 'restart') + self['dir.restart.current'] = os.path.join(self['dir.restart'], 'current') + self['dir.restart.oneago'] = os.path.join(self['dir.restart'], 'one-ago') CreateDirs(self['dir.da_run']) CreateDirs(os.path.join(self['dir.exec'])) @@ -444,7 +409,7 @@ class CycleControl(dict): CreateDirs(os.path.join(self['dir.restart.current'])) CreateDirs(os.path.join(self['dir.restart.oneago'])) - msg = 'Succesfully created the file structure for the assimilation job' ; logging.info(msg) + logging.info('Succesfully created the file structure for the assimilation job') def RecoverRun(self): @@ -463,32 +428,29 @@ class CycleControl(dict): # Replace rc-items with those from the crashed run's last rc-file (now in restart.current dir) - file_rc_rec = os.path.join(self['dir.restart.current'],'da_runtime.rc') - rc_rec = rc.read(file_rc_rec) + file_rc_rec = os.path.join(self['dir.restart.current'], 'da_runtime.rc') + rc_rec = rc.read(file_rc_rec) - for k,v in rc_rec.iteritems(): + for k, v in rc_rec.iteritems(): self[k] = v self.ValidateRC() - msg = "Replaced rc-items.... " ; logging.debug(msg) - msg = "Next cycle start date is %s" % self['time.start'] ; logging.debug(msg) + logging.debug("Replaced rc-items.... ") + logging.debug("Next cycle start date is %s" % self['time.start']) # Copy randomseed.pickle file to exec dir - source = os.path.join(self['dir.restart.current'],'randomseed.pickle') - dest = os.path.join(self['dir.exec'],'randomseed.pickle') - dummy = shutil.copy(source,dest) + source = os.path.join(self['dir.restart.current'], 'randomseed.pickle') + dest = os.path.join(self['dir.exec'], 'randomseed.pickle') + shutil.copy(source, dest) - msg = "Replaced randomseed file with previous cycles' last values" ; logging.debug(msg) + logging.debug("Replaced randomseed file with previous cycles' last values") # Re-create the output dir for this time step, if needed - filtertime = self['time.start'].strftime('%Y%m%d') - self['dir.output'] = os.path.join(self['dir.da_run'],'output',filtertime) - dummy = CreateDirs(os.path.join(self['dir.output'])) - - return None + self['dir.output'] = os.path.join(self['dir.da_run'], 'output', self['time.start'].strftime('%Y%m%d')) + CreateDirs(os.path.join(self['dir.output'])) def Finalize(self): """ @@ -503,12 +465,12 @@ class CycleControl(dict): """ - dummy = self.RandomSeed('write') - dummy = self.WriteNewRCfile() - dummy = self.MoveRestartData(io_option='store') # Move restart data from current to one-ago - dummy = self.CollectRestartData() # Collect restart data for next cycle into a clean restart/current folder - dummy = self.CollectOutput() # Collect restart data for next cycle into a clean restart/current folder - dummy = self.SubmitNextCycle() + self.RandomSeed('write') + self.WriteNewRCfile() + self.MoveRestartData(io_option='store') # Move restart data from current to one-ago + self.CollectRestartData() # Collect restart data for next cycle into a clean restart/current folder + self.CollectOutput() # Collect restart data for next cycle into a clean restart/current folder + self.SubmitNextCycle() def CollectOutput(self): """ Collect files that are part of the requested output for this cycle. This function allows users to add files @@ -521,23 +483,22 @@ class CycleControl(dict): """ from da.tools.general import CreateDirs - targetdir = os.path.join(self['dir.output']) + targetdir = os.path.join(self['dir.output']) - CreateDirs(os.path.join(targetdir) ) + CreateDirs(os.path.join(targetdir)) - msg = "Collecting the required output data" ; logging.info(msg) - msg = " to directory: %s " % targetdir ; logging.debug(msg) + logging.info("Collecting the required output data") + logging.debug(" to directory: %s " % targetdir) for file in set(self.OutputFileList): - if os.path.isdir(file): # skip dirs continue if not os.path.exists(file): # skip dirs - msg = " [not found] .... %s " % file ; logging.warning(msg) + logging.warning(" [not found] .... %s " % file) continue - msg = " [copy] .... %s " % file ; logging.debug(msg) - dummy = shutil.copy(file,file.replace(os.path.split(file)[0],targetdir) ) + logging.debug(" [copy] .... %s " % file) + shutil.copy(file, file.replace(os.path.split(file)[0], targetdir)) @@ -566,24 +527,23 @@ class CycleControl(dict): """ from da.tools.general import CreateDirs - targetdir = os.path.join(self['dir.restart.current']) - - msg = "Purging the current restart directory before collecting new data" ; logging.info(msg) - - CreateDirs(os.path.join(targetdir),forceclean=True) + targetdir = os.path.join(self['dir.restart.current']) + + logging.info("Purging the current restart directory before collecting new data") + + CreateDirs(os.path.join(targetdir), forceclean=True) - msg = "Collecting the required restart data" ; logging.info(msg) - msg = " to directory: %s " % targetdir ; logging.debug(msg) + logging.info("Collecting the required restart data") + logging.debug(" to directory: %s " % targetdir) for file in set(self.RestartFileList): - if os.path.isdir(file): # skip dirs continue if not os.path.exists(file): # skip dirs - msg = " [not found] .... %s " % file ; logging.warning(msg) + logging.warning(" [not found] .... %s " % file) else: - msg = " [copy] .... %s " % file ; logging.debug(msg) - dummy = shutil.copy(file,file.replace(os.path.split(file)[0],targetdir) ) + logging.debug(" [copy] .... %s " % file) + shutil.copy(file, file.replace(os.path.split(file)[0], targetdir)) def MoveRestartData(self, io_option='restore'): @@ -600,8 +560,8 @@ class CycleControl(dict): """ from da.tools.general import CreateDirs - if io_option not in ['store','restore']: - raise ValueError,'Invalid option specified for io_option (%s)' % io_option + if io_option not in ['store', 'restore']: + raise ValueError, 'Invalid option specified for io_option (%s)' % io_option if io_option == 'store': @@ -616,29 +576,23 @@ class CycleControl(dict): # If "store" is requested, recreate target dir, cleaning the contents if io_option == 'store': - CreateDirs(os.path.join(targetdir),forceclean=True) - - msg = "Performing a %s of data" % (io_option) ; logging.debug(msg) - msg = " from directory: %s " % sourcedir ; logging.debug(msg) - msg = " to directory: %s " % targetdir ; logging.debug(msg) + CreateDirs(os.path.join(targetdir), forceclean=True) + logging.debug("Performing a %s of data" % io_option) + logging.debug(" from directory: %s " % sourcedir) + logging.debug(" to directory: %s " % targetdir) for file in os.listdir(sourcedir): - - file = os.path.join(sourcedir,file) + file = os.path.join(sourcedir, file) if not os.path.exists(file): - msg = "Cannot find requested file to move: %s " % file ; logging.debug(msg) + logging.debug("Cannot find requested file to move: %s " % file) sys.exit(2) - - if os.path.isdir(file): # skip dirs - - msg = " [skip] .... %s " % file ; logging.debug(msg) + logging.debug(" [skip] .... %s " % file) continue else: - - msg = " [copy] .... %s " % file ; logging.debug(msg) - dummy = shutil.copy(file,file.replace(sourcedir,targetdir) ) + logging.debug(" [copy] .... %s " % file) + shutil.copy(file, file.replace(sourcedir, targetdir)) # def WriteNewRCfile(self): @@ -655,31 +609,31 @@ class CycleControl(dict): # We make a copy of the current DaCycle object, and modify the start + end dates and restart value - newDaCycle = copy.deepcopy(self) - dummy = newDaCycle.AdvanceCycleTimes() - newDaCycle['time.restart'] = True + newDaCycle = copy.deepcopy(self) + newDaCycle.AdvanceCycleTimes() + newDaCycle['time.restart'] = True # Create the name of the rc-file that will hold this new input, and write it - fname = os.path.join(self['dir.exec'],'da_runtime.rc') # current exec dir holds next rc file - dummy = rc.write(fname,newDaCycle) - msg = 'Wrote new da_runtime.rc (%s) to exec dir'%fname ; logging.debug(msg) + fname = os.path.join(self['dir.exec'], 'da_runtime.rc') # current exec dir holds next rc file + rc.write(fname, newDaCycle) + logging.debug('Wrote new da_runtime.rc (%s) to exec dir' % fname) # The rest is info needed for a system restart, so it modifies the current DaCycle object (self) - self['da.restart.fname'] = fname # needed for next job template - dummy = self.RestartFileList.extend([fname]) # current restart list holds next rc file name + self['da.restart.fname'] = fname # needed for next job template + self.RestartFileList.extend([fname]) # current restart list holds next rc file name - msg = 'Added da_runtime.rc to the RestartFileList for later collection' ; logging.debug(msg) + logging.debug('Added da_runtime.rc to the RestartFileList for later collection') - def WriteRC(self,fname): + def WriteRC(self, fname): """ Write RC file after each process to reflect updated info """ import da.tools.rc as rc - dummy = rc.write(fname,self) - msg = 'Wrote expanded rc-file (%s)'%(fname) ; logging.debug(msg) - return None + rc.write(fname, self) + logging.debug('Wrote expanded rc-file (%s)' % (fname)) + def SubmitNextCycle(self): """ @@ -701,27 +655,27 @@ class CycleControl(dict): if self['time.end'] < self['time.finish']: # file ID and names - jobid = self['time.end'].strftime('%Y%m%d') - targetdir = os.path.join(self['dir.exec']) - jobfile = os.path.join(targetdir,'jb.%s.jb'%jobid) - logfile = jobfile.replace('.jb','.log') + jobid = self['time.end'].strftime('%Y%m%d') + targetdir = os.path.join(self['dir.exec']) + jobfile = os.path.join(targetdir, 'jb.%s.jb' % jobid) + logfile = jobfile.replace('.jb', '.log') # Template and commands for job - jobparams = {'jobname':"j.%s"%jobid,'jobtime':'01:30:00'} - template = DaPlatForm.GetJobTemplate(jobparams) - execcommand = os.path.join(self['dir.da_submit'],sys.argv[0]) - template += 'python %s rc=%s %s >& %s' % (execcommand,self['da.restart.fname'],join(self.opts,''),logfile,) + jobparams = {'jobname':"j.%s" % jobid, 'jobtime':'01:30:00'} + template = DaPlatForm.GetJobTemplate(jobparams) + execcommand = os.path.join(self['dir.da_submit'], sys.argv[0]) + template += 'python %s rc=%s %s >& %s' % (execcommand, self['da.restart.fname'], join(self.opts, ''), logfile,) # write and submit - dummy = DaPlatForm.WriteJob(jobfile,template,jobid) - jobid = DaPlatForm.SubmitJob(jobfile,joblog=logfile) + DaPlatForm.WriteJob(jobfile, template, jobid) + DaPlatForm.SubmitJob(jobfile, joblog=logfile) else: logging.info('Final date reached, no new cycle started') - return None - def SubmitSubStep(self,stepname): + + def SubmitSubStep(self, stepname): """ Submit the next substep of a DA cycle, this consists of * getting a job template as returned by :meth:`~da.tools.baseclasses.platform.GetJobTemplate` @@ -734,33 +688,31 @@ class CycleControl(dict): import os from string import join - DaPlatForm = self.DaPlatForm - jobparams = {'jobname':'das.%s'%stepname} - template = DaPlatForm.GetJobTemplate(jobparams) - template += 'cd %s\n'%os.getcwd() - template += '%s rc=%s process=%s %s' % (sys.argv[0],self['jobrcfilename'],stepname,join(self.opts,''),) - jobfile = DaPlatForm.WriteJob(self,template,stepname) - jobid = DaPlatForm.SubmitJob(jobfile) - - return None + jobparams = {'jobname':'das.%s' % stepname} + template = DaPlatForm.GetJobTemplate(jobparams) + template += 'cd %s\n' % os.getcwd() + template += '%s rc=%s process=%s %s' % (sys.argv[0], self['jobrcfilename'], stepname, join(self.opts, ''),) + jobfile = DaPlatForm.WriteJob(self, template, stepname) + jobid = DaPlatForm.SubmitJob(jobfile) +#LU cool def CleanUpCycle(self): """ Nothing to do for now anymore """ - +#LU nie czaaaje def StartLogger(level=logging.INFO): """ start the logging of messages to screen""" # start the logging basic configuration by setting up a log file - logging.basicConfig(level = level, - format = ' [%(levelname)-7s] (%(asctime)s) py-%(module)-20s : %(message)s', - datefmt = '%Y-%m-%d %H:%M:%S') + logging.basicConfig(level=level, + format=' [%(levelname)-7s] (%(asctime)s) py-%(module)-20s : %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') def ParseOptions(): """ @@ -781,16 +733,16 @@ def ParseOptions(): # Parse keywords, the only option accepted so far is the "-h" flag for help - opts=[] - args=[] + opts = [] + args = [] try: opts, args = getopt.gnu_getopt(sys.argv[1:], "-hrv") except getopt.GetoptError, msg: - logging.error('%s'%msg) + logging.error('%s' % msg) sys.exit(2) for options in opts: - options=options[0].lower() + options = options[0].lower() if options == '-h': print "" print helptext @@ -799,16 +751,16 @@ def ParseOptions(): logging.info('-r flag specified on command line: recovering from crash') if options == '-v': logging.info('-v flag specified on command line: extra verbose output') - dummy = logging.root.setLevel(logging.DEBUG) + logging.root.setLevel(logging.DEBUG) if opts: - optslist=[item[0] for item in opts] + optslist = [item[0] for item in opts] else: - optslist=[] + optslist = [] # Parse arguments and return as dictionary - arguments={} + arguments = {} for item in args: #item=item.lower() @@ -817,15 +769,15 @@ def ParseOptions(): if '=' in item: key, arg = item.split('=') else: - logging.error('%s'%'Argument passed without description (%s)' % item) - raise getopt.GetoptError,arg + logging.error('%s' % 'Argument passed without description (%s)' % item) + raise getopt.GetoptError, arg - arguments[key]=arg + arguments[key] = arg return optslist, arguments -def ValidateOptsArgs(opts,args): +def ValidateOptsArgs(opts, args): """ Validate the options and arguments passed from the command line before starting the cycle. The validation consists of checking for the presence of an argument "rc", and the existence of the specified rc-file. @@ -834,10 +786,10 @@ def ValidateOptsArgs(opts,args): if not args.has_key("rc"): msg = "There is no rc-file specified on the command line. Please use rc=yourfile.rc" ; logging.error(msg) - raise IOError,msg + raise IOError, msg elif not os.path.exists(args['rc']): msg = "The specified rc-file (%s) does not exist " % args['rc'] ; logging.error(msg) - raise IOError,msg + raise IOError, msg # WP not needed anymore #if not args.has_key('process'): @@ -847,13 +799,13 @@ def ValidateOptsArgs(opts,args): # msg = "The specified process (%s) is not valid"%args['process'] ; logging.error(msg) # raise IOError,msg - return opts,args + return opts, args if __name__ == "__main__": sys.path.append('../../') - opts,args = ParseOptions() + opts, args = ParseOptions() print opts print args diff --git a/da/tools/io.py b/da/tools/io.py index 9bb8f09..9f38c83 100755 --- a/da/tools/io.py +++ b/da/tools/io.py @@ -13,15 +13,15 @@ import datetime as dt from numpy import array import os -disclaimer = "This data belongs to the CarbonTracker project" -email = "wouter.peters@wur.nl" -url = "http://carbontracker.wur.nl" +disclaimer = "This data belongs to the CarbonTracker project" +email = "wouter.peters@wur.nl" +url = "http://carbontracker.wur.nl" institution = "Wageningen University and Research Center" -source = "CarbonTracker release 2.0" +source = "CarbonTracker release 2.0" conventions = "CF-1.1" -historytext = 'Created on '+dt.datetime.now().strftime('%B %d, %Y')+' by %s'%os.environ['USER'] +historytext = 'Created on ' + dt.datetime.now().strftime('%B %d, %Y') + ' by %s' % os.environ['USER'] -std_savedict={'name':'unknown','values':None,'dims':None,'units':'','long_name':'','_FillValue':float(-999.),'comment':''} +std_savedict = {'name':'unknown', 'values':None, 'dims':None, 'units':'', 'long_name':'', '_FillValue':float(-999.), 'comment':''} class CT_CDF(object): """ @@ -51,73 +51,69 @@ class CT_CDF(object): close() : close the file """ - def __init__(self,filename,method='create',*arguments): + def __init__(self, filename, method='create', *arguments): import Nio - import os - if method not in ['read','write','create']: + if method not in ['read', 'write', 'create']: raise ValueError, 'Method %s is not defined for a CarbonTracker NetCDF file object' % method - - if method == 'read': - self.file = Nio.open_file(filename,mode='r',*arguments) + self.file = Nio.open_file(filename, mode='r', *arguments) elif method == 'write': - self.file = Nio.open_file(filename,mode=method,*arguments) + self.file = Nio.open_file(filename, mode=method, *arguments) self.AddCTHeader() elif method == 'create': if os.path.exists(filename): os.remove(filename) - self.file = Nio.open_file(filename,mode=method,*arguments) + self.file = Nio.open_file(filename, mode=method, *arguments) self.AddCTHeader() - if method != 'read': self.AddCTHeader() def AddCTHeader(self): # - setattr(self.file,'Institution',institution) - setattr(self.file,'Contact',email) - setattr(self.file,'URL',url) - setattr(self.file,'Source',source) - setattr(self.file,'Convention',conventions) - setattr(self.file,'Disclaimer',disclaimer) - setattr(self,'History',historytext) + setattr(self.file, 'Institution', institution) + setattr(self.file, 'Contact', email) + setattr(self.file, 'URL', url) + setattr(self.file, 'Source', source) + setattr(self.file, 'Convention', conventions) + setattr(self.file, 'Disclaimer', disclaimer) + setattr(self, 'History', historytext) - def AddParamsDim(self,nparams): + def AddParamsDim(self, nparams): - dimparams=self.file.create_dimension('nparameters',nparams) + dimparams = self.file.create_dimension('nparameters', nparams) return ('nparameters',) - def AddMembersDim(self,nmembers): + def AddMembersDim(self, nmembers): - dimmembers=self.file.create_dimension('nmembers',nmembers) + dimmembers = self.file.create_dimension('nmembers', nmembers) return ('nmembers',) - def AddLagDim(self,nlag=0,unlimited=True): + def AddLagDim(self, nlag=0, unlimited=True): if unlimited: - dimlag =self.file.create_dimension('nlag',None) + dimlag = self.file.create_dimension('nlag', None) else: - dimlag=self.file.create_dimension('nlag',nlag) - + dimlag = self.file.create_dimension('nlag', nlag) +#LU what a sense in here? dimlag = ('nlag',) # Also create the variable of the same name so it can be queried for length - var = self.file.create_variable('nlag','i',dimlag) + var = self.file.create_variable('nlag', 'i', dimlag) return ('nlag',) - def AddObsDim(self,nobs): + def AddObsDim(self, nobs): - dimobs=self.file.create_dimension('nobs',nobs) + dimobs = self.file.create_dimension('nobs', nobs) return ('nobs',) - def AddLatLonDim(self,istart=0,iend=360,jstart=0,jend=180): + def AddLatLonDim(self, istart=0, iend=360, jstart=0, jend=180): """ Add dimensions + data for latitude and longitude values of a rectangular 1x1 grid, the scope of the arrays can be limited by using the istart,...,jend integers @@ -126,45 +122,45 @@ class CT_CDF(object): from numpy import arange, float64 - if 'latitude' in self.file.dimensions.keys(): return ('latitude','longitude',) # already exists + if 'latitude' in self.file.dimensions.keys(): return ('latitude', 'longitude',) # already exists - lons=-180+arange(360)*1.0+0.5 - lats=-90+arange(180)*1.0+0.5 + lons = -180 + arange(360) * 1.0 + 0.5 + lats = -90 + arange(180) * 1.0 + 0.5 # - lats=lats[jstart:jend] - lons=lons[istart:iend] + lats = lats[jstart:jend] + lons = lons[istart:iend] # - dimlon=self.file.create_dimension('longitude',lons.shape[0]) - dimlon=('longitude',) - dimlat=self.file.create_dimension('latitude',lats.shape[0]) - dimlat=('latitude',) - - savedict=self.StandardVar(varname='latitude') - savedict['values']=lats.tolist() - savedict['actual_range']=(float(lats[0]),float(lats[-1])) - savedict['dims']=dimlat + dimlon = self.file.create_dimension('longitude', lons.shape[0]) + dimlon = ('longitude',) + dimlat = self.file.create_dimension('latitude', lats.shape[0]) + dimlat = ('latitude',) + + savedict = self.StandardVar(varname='latitude') + savedict['values'] = lats.tolist() + savedict['actual_range'] = (float(lats[0]), float(lats[-1])) + savedict['dims'] = dimlat self.AddData(savedict) - savedict=self.StandardVar(varname='longitude') - savedict['values']=lons.tolist() - savedict['actual_range']=(float(lons[0]),float(lons[-1])) - savedict['dims']=dimlon + savedict = self.StandardVar(varname='longitude') + savedict['values'] = lons.tolist() + savedict['actual_range'] = (float(lons[0]), float(lons[-1])) + savedict['dims'] = dimlon self.AddData(savedict) - return dimlat+dimlon + return dimlat + dimlon - def AddDateDim(self,ndate=0,unlimited=True): + def AddDateDim(self, ndate=0, unlimited=True): """ Add a date dimension, give it the unlimited length if requested """ if unlimited: - dimdate =self.file.create_dimension('date',None) + dimdate = self.file.create_dimension('date', None) else: - dimdate=self.file.create_dimension('date',ndate) + dimdate = self.file.create_dimension('date', ndate) dimdate = ('date',) # Also create the variable of the same name so it can be queried for length - var = self.file.create_variable('date','d',dimdate) + var = self.file.create_variable('date', 'd', dimdate) return dimdate @@ -174,19 +170,19 @@ class CT_CDF(object): if 'yyyymmddhhmmss' in self.file.dimensions.keys(): pass else: - dummy = self.file.create_dimension('yyyymmddhhmmss',6) # already exists + dummy = self.file.create_dimension('yyyymmddhhmmss', 6) # already exists dimdateformat = ('yyyymmddhhmmss',) return dimdateformat - def AddDim(self,dimname,dimsize): + def AddDim(self, dimname, dimsize): if dimname in self.file.dimensions.keys(): pass else: - newdim = self.file.create_dimension(dimname,dimsize) + newdim = self.file.create_dimension(dimname, dimsize) return (dimname,) - def has_date(self,dd): + def has_date(self, dd): """ Check if a passed date (dd) is already present in the dates included in a file """ if self.file.variables['date'].shape[0] > 0: @@ -197,11 +193,11 @@ class CT_CDF(object): else: return False - def GetVariable(self,varname): + def GetVariable(self, varname): """ get variable from ncf file""" return self.file.variables[varname][:] - def StandardVar(self,varname): + def StandardVar(self, varname): """ return properties of standard variables """ import standardvariables @@ -216,16 +212,16 @@ class CT_CDF(object): """ Get the index and name of the unlimited dimension""" try: - index = self.file.dimensions.values().index(None) + index = self.file.dimensions.values().index(None) unlimname = self.file.dimensions.keys()[index] - unlimlen = self.file.variables[unlimname].shape[0] + unlimlen = self.file.variables[unlimname].shape[0] except: unlimlen = -1 unlimname = 'None' - return (unlimname,unlimlen,) + return (unlimname, unlimlen,) - def AddData(self,datadict,silent=True): + def AddData(self, datadict, silent=True): """ Add data to the Nio file object. This is achieved by passing a data dictionary to this routine that holds all metadata plus the real data values. For example: @@ -268,12 +264,12 @@ class CT_CDF(object): # Make sure the passed dictionary has proper values for the important entries if None == datadict['values']: - raise IOError,"The passed data dictionary does not contain values, please provide data to write" + raise IOError, "The passed data dictionary does not contain values, please provide data to write" if None == datadict['dims']: - raise IOError,"The passed data dictionary does not contain valid dimensions, please provide names of dimensions for the dataset" + raise IOError, "The passed data dictionary does not contain valid dimensions, please provide names of dimensions for the dataset" - if not isinstance(datadict['values'],(list,np.ndarray,)): - raise IOError,"Please pass a list or array to the AddData() method, not a %s object"%datadict['values'].__class__ + if not isinstance(datadict['values'], (list, np.ndarray,)): + raise IOError, "Please pass a list or array to the AddData() method, not a %s object" % datadict['values'].__class__ # First, try to get the requested dataset number to place next in file. Note that this is an attribute of the # savedict and has to be specified by the user. If not specified, the 'next' dataset is number 0 @@ -281,11 +277,11 @@ class CT_CDF(object): try: next = datadict['count'] except: - next=0 + next = 0 # Check if the requested variable already exists in the file, if so, get a variable handle directly - existing_vars=self.file.variables.keys() + existing_vars = self.file.variables.keys() if datadict['name'] in existing_vars: @@ -294,31 +290,31 @@ class CT_CDF(object): # If the variable name is new, create a new dataset else: - if not silent: print 'Creating new dataset: '+datadict['name'] + if not silent: print 'Creating new dataset: ' + datadict['name'] if datadict.has_key('dtype'): # datatype is part of the data dictionary passed if datadict['dtype'] == 'int': - var = self.file.create_variable(datadict['name'],'i',datadict['dims']) + var = self.file.create_variable(datadict['name'], 'i', datadict['dims']) elif datadict['dtype'] == 'char': - var = self.file.create_variable(datadict['name'],'s1',datadict['dims']) + var = self.file.create_variable(datadict['name'], 's1', datadict['dims']) elif datadict['dtype'] == 'double': - var = self.file.create_variable(datadict['name'],'d',datadict['dims']) + var = self.file.create_variable(datadict['name'], 'd', datadict['dims']) else: - var = self.file.create_variable(datadict['name'],'f',datadict['dims']) + var = self.file.create_variable(datadict['name'], 'f', datadict['dims']) else: - var = self.file.create_variable(datadict['name'],'d',datadict['dims']) # default is 'double' + var = self.file.create_variable(datadict['name'], 'd', datadict['dims']) # default is 'double' # All other keys in the datadict are assumed to be variable attributes, except for a few reserved ones - for k,v in datadict.iteritems(): - if k not in ['name','dims','values','_FillValue','count']: - setattr(var,k,v) + for k, v in datadict.iteritems(): + if k not in ['name', 'dims', 'values', '_FillValue', 'count']: + setattr(var, k, v) # Now that the variable instance is returned, start to add the data. First figure out if we are dealing with an unlimited dimension - unlimname,unlimlen = self.inquire_unlimited() + unlimname, unlimlen = self.inquire_unlimited() has_unlimited_dim = (unlimname != 'None') # If so, check if the first dimension of the passed dataset corresponds to the unlimited dimension @@ -330,7 +326,7 @@ class CT_CDF(object): if len(datadict['values']) == 1: var[next] = datadict['values'][0] else: - var[next,:]=datadict['values'] + var[next, :] = datadict['values'] # Add the data , note the special occassion for passing only one value (a record) @@ -338,21 +334,21 @@ class CT_CDF(object): if len(datadict['values']) == 1: var.assign_value(datadict['values'][0]) else: - var[:]=datadict['values'] + var[:] = datadict['values'] def close(self): """ close the file object """ return self.file.close() -def CreateDirs(rundat,dirname): +def CreateDirs(rundat, dirname): - dirname=os.path.join(rundat.outputdir,dirname) + dirname = os.path.join(rundat.outputdir, dirname) if not os.path.exists(dirname): - print "Creating new output directory "+dirname + print "Creating new output directory " + dirname os.makedirs(dirname) else: - print 'Writing files to directory: %s'%(dirname,) + print 'Writing files to directory: %s' % (dirname,) return dirname @@ -371,35 +367,35 @@ if __name__ == '__main__': except: pass - ncf=CT_CDF('test.nc','w') - dimgrid=ncf.AddLatLonDim() - dimdate=ncf.AddDateDim(unlimited=True) + ncf = CT_CDF('test.nc', 'w') + dimgrid = ncf.AddLatLonDim() + dimdate = ncf.AddDateDim(unlimited=True) #dimlag=ncf.AddLagDim(unlimited=True) #dimidate=ncf.AddDateDimFormat() - unlimname,unlimlen = ncf.inquire_unlimited() + unlimname, unlimlen = ncf.inquire_unlimited() - if unlimlen < 0: unlimlen=0 + if unlimlen < 0: unlimlen = 0 for n in range(100): - savedict=ncf.StandardVar(varname='testarray') - savedict['dims'] = ('date','latitude',) - savedict['count'] = unlimlen+n - savedict['values'] = np.arange(180)+n*1.5 - ncf.AddData(savedict,silent=False) - - savedict=ncf.StandardVar(varname='testarray2') - savedict['dims'] = ('date','latitude',) - savedict['count'] = unlimlen+n - savedict['values'] = np.arange(180)-n*1.5 - ncf.AddData(savedict,silent=False) - - savedict=ncf.StandardVar(varname='date') - savedict['values']= [100.0+n] - savedict['count'] = unlimlen+n - savedict['dims']=dimdate - ncf.AddData(savedict,silent=False) + savedict = ncf.StandardVar(varname='testarray') + savedict['dims'] = ('date', 'latitude',) + savedict['count'] = unlimlen + n + savedict['values'] = np.arange(180) + n * 1.5 + ncf.AddData(savedict, silent=False) + + savedict = ncf.StandardVar(varname='testarray2') + savedict['dims'] = ('date', 'latitude',) + savedict['count'] = unlimlen + n + savedict['values'] = np.arange(180) - n * 1.5 + ncf.AddData(savedict, silent=False) + + savedict = ncf.StandardVar(varname='date') + savedict['values'] = [100.0 + n] + savedict['count'] = unlimlen + n + savedict['dims'] = dimdate + ncf.AddData(savedict, silent=False) #savedict=ncf.StandardVar(varname='testfail') #savedict['values']=range(5) diff --git a/da/tools/io4.py b/da/tools/io4.py index ebd8e41..aa7a629 100755 --- a/da/tools/io4.py +++ b/da/tools/io4.py @@ -54,8 +54,8 @@ class CT_CDF(netCDF4.Dataset): try: super(CT_CDF,self).__init__(filename, 'r') except RuntimeError: - msg = 'Requested file not found for opening: %s'%filename ; logging.error(msg) - msg = "Exiting" ; logging.info(msg) + logging.error('Requested file not found for opening: %s'%filename) + logging.info("Exiting") sys.exit(2) elif method == 'write': #print 'Adding to existing file' @@ -362,8 +362,8 @@ class CT_HDF(hdf.SD): try: super(CT_HDF,self).__init__(filename) except hdf.HDF4Error: - msg = 'Requested file not found for opening: %s'%filename ; logging.error(msg) - msg = "Exiting" ; logging.info(msg) + logging.error('Requested file not found for opening: %s'%filename) + logging.info("Exiting") sys.exit(2) def GetVariable(self,varname): diff --git a/da/tools/pipeline.py b/da/tools/pipeline.py index eec4daf..8e12470 100755 --- a/da/tools/pipeline.py +++ b/da/tools/pipeline.py @@ -16,12 +16,13 @@ import os import sys import shutil import datetime +import copy import da.tools.rc as rc -header = '\n\n *************************************** ' -footer = ' *************************************** \n ' +header = '\n\n *************************************** ' +footer = ' *************************************** \n ' -def EnsembleSmootherPipeline(DaCycle,PlatForm, DaSystem, Samples,StateVector,ObsOperator,Optimizer): +def EnsembleSmootherPipeline(DaCycle, PlatForm, DaSystem, Samples, StateVector, ObsOperator, Optimizer): """ The main point of entry for the pipeline """ # Append current working dir to path @@ -30,57 +31,51 @@ def EnsembleSmootherPipeline(DaCycle,PlatForm, DaSystem, Samples,StateVector,Obs # Import methods and classes contained in this package - from da.tools.general import StoreData,RestoreData + from da.tools.general import StoreData, RestoreData - msg = header+"Initializing current cycle"+footer ; logging.info(msg) + logging.info(header + "Initializing current cycle" + footer) - dummy = JobStart(DaCycle, DaSystem, PlatForm, StateVector, Samples, ObsOperator) + JobStart(DaCycle, DaSystem, PlatForm, StateVector, Samples, ObsOperator) - msg = header+"starting PrepareState"+footer ; logging.info(msg) + logging.info(header + "starting PrepareState" + footer) - dummy = PrepareState(DaCycle, StateVector) + PrepareState(DaCycle, StateVector) - msg = header+"starting SampleState"+footer ; logging.info(msg) + logging.info(header + "starting SampleState" + footer) - dummy = SampleState(DaCycle,Samples,StateVector,ObsOperator) + SampleState(DaCycle, Samples, StateVector, ObsOperator) - msg = header+"starting Invert"+footer ; logging.info(msg) + logging.info(header + "starting Invert" + footer) - dummy = Invert(DaCycle, StateVector, Optimizer) + Invert(DaCycle, StateVector, Optimizer) - msg = header+"starting Advance"+footer ; logging.info(msg) + logging.info(header + "starting Advance" + footer) - dummy = Advance(DaCycle, Samples, StateVector, ObsOperator) - - msg = header+"starting SaveAndSubmit"+footer ; logging.info(msg) - - dummy = SaveAndSubmit(DaCycle, StateVector) - - msg = "Cycle finished...exiting pipeline" ; logging.info(msg) + Advance(DaCycle, Samples, StateVector, ObsOperator) + + logging.info(header + "starting SaveAndSubmit" + footer) + + SaveAndSubmit(DaCycle, StateVector) + + logging.info("Cycle finished...exiting pipeline") #################################################################################################### def JobStart(DaCycle, DaSystem, DaPlatForm, StateVector, Samples, ObsOperator): """ Set up the job specific directory structure and create an expanded rc-file """ - dummy = DaSystem.Initialize() - - dummy = DaSystem.Validate() - - DaCycle.DaSystem = DaSystem - - DaCycle.DaPlatForm = DaPlatForm + DaSystem.Initialize() + DaSystem.Validate() + + DaCycle.DaSystem = DaSystem + DaCycle.DaPlatForm = DaPlatForm - dummy = DaCycle.Initialize() + DaCycle.Initialize() StateVector.DaCycle = DaCycle # also embed object in StateVector so it can access cycle information for I/O etc - - Samples.DaCycle = DaCycle # also embed object in Samples object so it can access cycle information for I/O etc - + Samples.DaCycle = DaCycle # also embed object in Samples object so it can access cycle information for I/O etc ObsOperator.DaCycle = DaCycle # also embed object in ObsOperator object so it can access cycle information for I/O etc - return None - def PrepareState(DaCycle, StateVector): """ Set up the input data for the forward model: obs and parameters/fluxes""" @@ -90,42 +85,34 @@ def PrepareState(DaCycle, StateVector): # with new values for each week. After we have constructed the StateVector, it will be propagated by one cycle length so it is ready to be used # in the current cycle - dummy = StateVector.Initialize() + StateVector.Initialize() if not DaCycle['time.restart']: # Fill each week from n=1 to n=nlag with a new ensemble - - nlag = StateVector.nlag - for n in range(0,nlag): - date = DaCycle['time.start']+datetime.timedelta(days = (n+0.5)* int(DaCycle['time.cycle'])) - cov = StateVector.GetCovariance(date) - dummy = StateVector.MakeNewEnsemble(n+1,cov) + for n in range(0, StateVector.nlag): + date = DaCycle['time.start'] + datetime.timedelta(days=(n + 0.5) * int(DaCycle['time.cycle'])) + cov = StateVector.GetCovariance(date) + StateVector.MakeNewEnsemble(n + 1, cov) else: # Read the StateVector data from file - savedir = DaCycle['dir.restart.current'] - filtertime = DaCycle['time.start'].strftime('%Y%m%d') - filename = os.path.join(savedir,'savestate.nc') - + #filtertime = DaCycle['time.start'].strftime('%Y%m%d') + filename = os.path.join(DaCycle['dir.restart.current'], 'savestate.nc') StateVector.ReadFromFile(filename) # by default will read "opt"(imized) variables, and then propagate # Now propagate the ensemble by one cycle to prepare for the current cycle - - dummy = StateVector.Propagate() + StateVector.Propagate() # Finally, also write the StateVector to a file so that we can always access the a-priori information - savedir = DaCycle['dir.output'] - filename = os.path.join(savedir,'savestate.nc') + filename = os.path.join(DaCycle['dir.output'], 'savestate.nc') + StateVector.WriteToFile(filename) # write prior info because StateVector.Isoptimized == False for now - dummy = StateVector.WriteToFile(filename) # write prior info because StateVector.Isoptimized == False for now - return None - -def SampleState(DaCycle,Samples,StateVector, ObservationOperator): +def SampleState(DaCycle, Samples, StateVector, ObservationOperator): """ Sample the filter state for the inversion """ @@ -137,178 +124,154 @@ def SampleState(DaCycle,Samples,StateVector, ObservationOperator): #dummy = DaCycle.MoveSaveData(io_option='store',save_option='partial',filter=[]) #msg = "All restart data have been copied to the save/tmp directory for future use" ; logging.debug(msg) - nlag = int(DaCycle['time.nlag']) - msg = "Sampling model will be run over %d cycles"% nlag ; logging.info(msg) + nlag = int(DaCycle['time.nlag']) + logging.info("Sampling model will be run over %d cycles" % nlag) for lag in range(nlag): - - msg = header+".....Ensemble Kalman Filter at lag %d"% (int(lag)+1,) ; logging.info(msg) - - dummy = SampleOneCycle(DaCycle,Samples,StateVector, ObservationOperator,lag) - +#LU czy tu nie wystarczy inaczej ta krotke, bez nawiasow? i w ogole? +#LU tutaj nie powinno byc int(lag) tylko lag + logging.info(header + ".....Ensemble Kalman Filter at lag %d" % (int(lag) + 1,)) + SampleOneCycle(DaCycle, Samples, StateVector, ObservationOperator, lag) # Optionally, post-processing of the model output can be added that deals for instance with # sub-sampling of time series, vertical averaging, etc. - return None -def SampleOneCycle(DaCycle,Samples,StateVector, ObservationOperator,lag): +def SampleOneCycle(DaCycle, Samples, StateVector, ObservationOperator, lag): """ Perform all actions needed to sample one cycle """ - import copy - + # First set up the information for time start and time end of this sample - +#LU dla pierwszego w sampleOneCycle, bierzemy initial data z obsoperatora if lag == 0: - dummy = ObservationOperator.GetInitialData() + ObservationOperator.GetInitialData() +#LU a to co robi?? + DaCycle.SetSampleTimes(lag) - dummy = DaCycle.SetSampleTimes(lag) + startdate = DaCycle['time.sample.start'] + enddate = DaCycle['time.sample.end'] - startdate = DaCycle['time.sample.start'] - enddate = DaCycle['time.sample.end'] - DaCycle['time.sample.window'] = lag - DaCycle['time.sample.stamp'] = "%s_%s"%(startdate.strftime("%Y%m%d%H"),enddate.strftime("%Y%m%d%H"),) + DaCycle['time.sample.window'] = lag + DaCycle['time.sample.stamp'] = "%s_%s" % (startdate.strftime("%Y%m%d%H"), enddate.strftime("%Y%m%d%H")) - msg = "New simulation interval set : " ; logging.info(msg) - msg = " start date : %s " % startdate.strftime('%F %H:%M') ; logging.info(msg) - msg = " end date : %s " % enddate.strftime('%F %H:%M') ; logging.info(msg) - msg = " file stamp: %s " % DaCycle['time.sample.stamp'] ; logging.info(msg) + logging.info("New simulation interval set : ") + logging.info(" start date : %s " % startdate.strftime('%F %H:%M')) + logging.info(" end date : %s " % enddate.strftime('%F %H:%M')) + logging.info(" file stamp: %s " % DaCycle['time.sample.stamp']) # Implement something that writes the ensemble member parameter info to file, or manipulates them further into the # type of info needed in your transport model - dummy = StateVector.WriteMembersToFile(lag+1) - - dummy = Samples.Initialize() - - dummy = Samples.Validate() + StateVector.WriteMembersToFile(lag + 1) + Samples.Initialize() + Samples.Validate() + Samples.AddObs() - dummy = Samples.AddObs() - - filename = Samples.WriteSampleInfo() + filename = Samples.WriteSampleInfo() # Write filename to DaCycle, and to output collection list DaCycle['ObsOperator.inputfile'] = filename - - DaCycle.OutputFileList.extend([filename]) - msg = "Appended Obs filename to DaCycle for collection " ; logging.debug(msg) + DaCycle.OutputFileList.append(filename) + logging.debug("Appended Obs filename to DaCycle for collection ") # Run the observation operator - dummy = RunForecastModel(DaCycle,ObservationOperator) + RunForecastModel(DaCycle, ObservationOperator) # Add the observation operator restart+output files to the general Output+RestartFileList, only after 'Advance' - +#LU again: if lag == 0 +#LU to znaczy wiec ze jesli mamy lag = 0 to restartfilelist jest brana z obsoperator +#LU output tez, a wczesniej dodano tez inputfilename z obsoperator... czyli zawsze input jest outputem if lag == 0: - DaCycle.RestartFileList.extend( ObservationOperator.RestartFileList ) - DaCycle.OutputFileList.extend(ObservationOperator.OutputFileList ) - msg = "Appended ObsOperator restart and output file lists to DaCycle for collection " ; logging.debug(msg) + DaCycle.RestartFileList.extend(ObservationOperator.RestartFileList) + DaCycle.OutputFileList.extend(ObservationOperator.OutputFileList) + logging.debug("Appended ObsOperator restart and output file lists to DaCycle for collection ") # Add model-data mismatch to all samples, this *might* use output from the ensemble in the future?? - dummy = Samples.AddModelDataMismatch() - - msg = "Added Model Data Mismatch to all samples " ; logging.debug(msg) + Samples.AddModelDataMismatch() + logging.debug("Added Model Data Mismatch to all samples ") # Read forecast model samples that were written to NetCDF files by each member, also add the obs to the statevector # Note that obs will only be added to the statevector is either this is the first step (restart=False), or lag==nlag - - if lag == int(DaCycle['time.nlag'])-1 or DaCycle['time.restart']==False : +#LU jesli jest ostatni, wtedy bedziemy dodawac symulacje. + if lag == int(DaCycle['time.nlag']) - 1 or DaCycle['time.restart'] == False: # We retrieve all model samples from one output file written by the ObsOperator. If the ObsOperator creates # one file per member, some logic needs to be included to merge all files!!! - filename = os.path.join(ObservationOperator.outputdir,'flask_%s.nc'%DaCycle['time.sample.stamp']) - dummy = Samples.AddSimulations(filename) + filename = os.path.join(ObservationOperator.outputdir, 'flask_%s.nc' % DaCycle['time.sample.stamp']) + Samples.AddSimulations(filename) # Give each member a model sample by first copying all samples, and then selecting the data for Member #n members = StateVector.EnsembleMembers[lag] - for n,Member in enumerate(members): - - Member.ModelSample = copy.deepcopy(Samples) - + for n, Member in enumerate(members): +#LU is it necessary like that?? +#LU najpierw mamy sample do kazdego membera taka sama. a potem... + Member.ModelSample = copy.deepcopy(Samples) # Now subselect the data needed for member #n - for Sim in Member.ModelSample.Data: Sim.simulated = Sim.simulated[n] + StateVector.nobs += Samples.getlength() + logging.debug("Added samples from the observation operator to each member of the StateVector") + logging.debug("StateVector now carries %d samples" % StateVector.nobs) - StateVector.nobs += Samples.getlength() - msg = "Added samples from the observation operator to each member of the StateVector" ; logging.debug(msg) - - msg = "StateVector now carries %d samples"%StateVector.nobs ; logging.debug(msg) - - return None - -def Invert(DaCycle, StateVector, Optimizer ): +def Invert(DaCycle, StateVector, Optimizer): """ Perform the inverse calculation """ - dims = ( int(DaCycle['time.nlag']), + dims = (int(DaCycle['time.nlag']), int(DaCycle['da.optimizer.nmembers']), int(DaCycle.DaSystem['nparameters']), - StateVector.nobs, ) + StateVector.nobs,) - dummy = Optimizer.Initialize(dims) + Optimizer.Initialize(dims) + Optimizer.StateToMatrix(StateVector) + Optimizer.WriteDiagnostics(DaCycle, StateVector, type='prior') - dummy = Optimizer.StateToMatrix(StateVector ) - - dummy = Optimizer.WriteDiagnostics(DaCycle,StateVector,type='prior') - - dummy = Optimizer.SetLocalization('None') + Optimizer.SetLocalization('None') if not DaCycle.DaSystem.has_key('opt.algorithm'): - msg = "There was no minimum least squares algorithm specified in the DA System rc file (key : opt.algorithm)" ; logging.info(msg) - msg = "...using serial algorithm as default..." ; logging.info(msg) - dummy = Optimizer.SerialMinimumLeastSquares() + logging.info("There was no minimum least squares algorithm specified in the DA System rc file (key : opt.algorithm)") + logging.info("...using serial algorithm as default...") + Optimizer.SerialMinimumLeastSquares() elif DaCycle.DaSystem['opt.algorithm'] == 'serial': - msg = "Using the serial minimum least squares algorithm to solve ENKF equations" ; logging.info(msg) - dummy = Optimizer.SerialMinimumLeastSquares() + logging.info("Using the serial minimum least squares algorithm to solve ENKF equations") + Optimizer.SerialMinimumLeastSquares() elif DaCycle.DaSystem['opt.algorithm'] == 'bulk': - msg = "Using the bulk minimum least squares algorithm to solve ENKF equations" ; logging.info(msg) - dummy = Optimizer.BulkMinimumLeastSquares() - + logging.info("Using the bulk minimum least squares algorithm to solve ENKF equations") + Optimizer.BulkMinimumLeastSquares() +#LU czy tu by sie nie przydalo jakies else error? - dummy = Optimizer.MatrixToState(StateVector ) - - dummy = Optimizer.WriteDiagnostics(DaCycle,StateVector,type='optimized') + Optimizer.MatrixToState(StateVector) + Optimizer.WriteDiagnostics(DaCycle, StateVector, type='optimized') StateVector.isOptimized = True - return None - -def Advance( DaCycle, Samples, StateVector, ObservationOperator): +def Advance(DaCycle, Samples, StateVector, ObservationOperator): """ Advance the filter state to the next step """ # This is the advance of the modeled CO2 state. Optionally, routines can be added to advance the state vector (mean+covariance) - # Then, restore model state from the start of the filter - msg = "Sampling model will be run over 1 cycle" ; logging.info(msg) + logging.info("Sampling model will be run over 1 cycle") + SampleOneCycle(DaCycle, Samples, StateVector, ObservationOperator, 0) - SampleOneCycle(DaCycle,Samples,StateVector, ObservationOperator,0) - return None - -def SaveAndSubmit( DaCycle, StateVector): +def SaveAndSubmit(DaCycle, StateVector): """ Save the model state and submit the next job """ - savedir = DaCycle['dir.output'] - filename = os.path.join(savedir,'savestate.nc') - - dummy = StateVector.WriteToFile(filename) - - DaCycle.RestartFileList.extend( [filename] ) # write optimized info because StateVector.Isoptimized == False for now - - - dummy = DaCycle.Finalize() - - return None + filename = os.path.join(DaCycle['dir.output'], 'savestate.nc') + + StateVector.WriteToFile(filename) + DaCycle.RestartFileList.append(filename) # write optimized info because StateVector.Isoptimized == False for now + DaCycle.Finalize() -def RunForecastModel(DaCycle,ObsOperator): +def RunForecastModel(DaCycle, ObsOperator): """Prepare and execute a forecast step using an external Fortran model. Note that the flavor of model used is determined in the very first line where the import statement of module "model" depends on a setting in your da.rc file. After that choice, the module written specifically for a particular @@ -324,15 +287,13 @@ def RunForecastModel(DaCycle,ObsOperator): submitting a string of jobs to the queue, or simply spawning a subprocess, or ... """ - status = ObsOperator.Initialize() - - status = ObsOperator.ValidateInput() - - status = ObsOperator.Run() + ObsOperator.Initialize() + #LU is that fcn existing + ObsOperator.ValidateInput() + ObsOperator.Run() + ObsOperator.SaveData() - dummy = ObsOperator.SaveData() - return status -- GitLab