Commit 8f88ead7 authored by Peters, Wouter's avatar Peters, Wouter
Browse files

Slowly getting closer to the new design. Observations and StateVector are now...

Slowly getting closer to the new design. Observations and StateVector are now class based, but objects are getting more elaborate each time. One remaining problem is the relation between the StateVector, the ensemble members, and the observations. These are all linked but no object of one type can easily contain the other. 

For instance, does a model mixing ratio sample belong to an observation object because that is what it represents and is derived from? Or is it part of an ensemble member object because each member creates one sample set? And does a member object belong to the state vector and if so, how does it play together with the ohter time steps in the filter? Are these all separate StateVectoir objects or is there only one StateVector with nlag sets of data??

The issue with nlag is not nicely resolved now.

Next task is to build the optimizer such that it takes all info from the different objects and performs a LSQM.
parent cfc6bb67
...@@ -12,6 +12,7 @@ import os ...@@ -12,6 +12,7 @@ import os
import sys import sys
import rc import rc
import logging import logging
import datetime
identifier = 'CarbonTracker CO2' identifier = 'CarbonTracker CO2'
...@@ -23,131 +24,256 @@ needed_rc_items = ['obs.input.dir', ...@@ -23,131 +24,256 @@ needed_rc_items = ['obs.input.dir',
'deltaco2.prefix', 'deltaco2.prefix',
'regtype'] 'regtype']
def MakeResiduals(rc_da_shell): ################### Begin Class DaInfo ###################
""" Method makes residuals of CO2 mixing ratios from the observed and sampled values """
import pyhdf.SD as HDF
# Read observations NetCDF file class DaInfo():
""" Information on the data assimilation system used. For CarbonTracker, this is an rc-file with settings.
The settings list includes at least the values above, in needed_rc_items.
"""
ObsFileName = os.path.join(rc_da_shell['dir.output'],rc_da_shell['obs.full.filename']) def __init__(self,rcfilename):
ncf = HDF.SD(ObsFileName) """
obsdates = ncf.select('decimal_date').get() Initialization occurs from passed rc-file name
dummy = ncf.end() """
msg = "Successfully read data from obs file (%s)"%ObsFileName ; logging.debug(msg)
# Read samples NetCDF file self.LoadRc(rcfilename)
self.ValidateRC()
SamplesFileName = ObsFileName.replace('observations','samples').replace('input','output') def __str__(self):
ncf = HDF.SD(SamplesFileName) """
sampdates = ncf.select('decimal_date').get() String representation of a DaInfo object
dummy = ncf.end() """
msg = "Successfully read data from model sample file (%s)"%SamplesFileName ; logging.debug(msg)
if (obsdates==sampdates).all(): msg = "===============================================================" ; print msg
pass msg = "DA System Info rc-file is %s" % self.RcFileName ; print msg
msg = "===============================================================" ; print msg
else: return ""
msg = "Dates in the observation file and samples file are not equal, exiting..." ; logging.error(msg)
def LoadRc(self,RcFileName):
"""
This method loads a DA System Info rc-file with settings for this simulation
"""
self.da_settings = rc.read(RcFileName)
self.RcFileName = RcFileName
self.DaRcLoaded = True
msg = 'DA System Info rc-file (%s) loaded successfully'%self.RcFileName ; logging.info(msg)
return True
def ValidateRC(self):
"""
Validate the contents of the rc-file given a dictionary of required keys
"""
for k,v in self.da_settings.iteritems():
if v == 'True' : self.da_settings[k] = True
if v == 'False': self.da_settings[k] = False
for key in needed_rc_items:
if not self.da_settings.has_key(key):
status,msg = ( False,'Missing a required value in rc-file : %s' % key)
logging.error(msg)
raise IOError,msg raise IOError,msg
msg = "Residuals created, continuing" ; logging.info(msg) status,msg = ( True,'DA System Info settings have been validated succesfully' ) ; logging.debug(msg)
return None return None
################### End Class DaInfo ###################
def PrepareObs(rc_da_shell,rc_da_system,type='forecast'): ################### Begin Class MixingRatioSample ###################
""" PrepareObs """
import pycdf as CDF class MixingRatioSample():
import shutil """
Holds the data that defines a Mixing Ratio Sample in the data assimilation framework. Sor far, this includes all
attributes listed below in the __init__ method. One can additionally make more types of data, or make new
objects for specific projects.
"""
def __init__(self,xdate,code='XXX',obs=0.0,simulated=0.0,resid=0.0,hphr=0.0,mdm=0.0,flag=0,height=0.0,lat=-999.,lon=-999.,evn='0000',sdev=0.0):
saveas = os.path.join(rc_da_shell['dir.input'],'observations.nc') self.code = code.strip() # Site code
# ncf = CDF.CDF(saveas,CDF.NC.WRITE|CDF.NC.TRUNC|CDF.NC.CREATE) self.xdate = xdate # Date of obs
# ncf.CreationDate = datetime.datetime.now().strftime('%Y %B %d') self.obs = obs # Value observed
# ncf.CreationUser = os.environ['USER'] self.simulated = simulated # Value simulated by model
# ncf.CreationDir = rc_da_shell['dir.da_submit'] self.resid = resid # Mixing ratio residuals
# ncf.CreationScript = sys.argv[0] self.hphr = hphr # Mixing ratio prior uncertainty from fluxes and (HPH) and model data mismatch (R)
# ncf.close() self.mdm = mdm # Model data mismatch
self.flag = flag # Flag
self.height = height # Sample height
self.lat = lat # Sample lat
self.lon = lon # Sample lon
self.id = evn # Event number
self.sdev = sdev # standard deviation of ensemble
self.masl = True # Sample is in Meters Above Sea Level
self.mag = not self.masl # Sample is in Meters Above Ground
# msg = 'Created a new observation input file (%s) ' % saveas ; logging.info(msg) def __str__(self):
day=self.xdate.strftime('%Y-%m-%d %H:%M:%S')
return ' '.join(map(str,[self.code,day,self.obs,self.flag,self.id]))
# For carbontracker the observations already come in netcdf format thanks to Andy and Ken. We will therefore simply merge ################### End Class MixingRatioSample ###################
# the observations.nc file with the pre-cooked obs files
################### Begin Class MixingRatioList ###################
# Select observation input file for this time step class MixingRatioList(list):
#sfname = rc_da_system['obs.input.fname']+'.%s'%(rc_da_shell['startdate'].strftime('%Y%m%d') )+'.nc' """ This is a special type of list that holds MixingRatioSample objects. It has methods to extract data from such a list easily """
sfname = rc_da_system['obs.input.fname']+'.%s'%('20050305')+'.nc' from numpy import array
def getvalues(self,name,constructor=array):
return constructor([getattr(o,name) for o in self])
def unflagged(self):
return MixingRatioSample([o for o in self if o.flag == 0])
def flagged(self):
return MixingRatioSample([o for o in self if o.flag != 0])
def selectsite(self,site='mlo'):
l=[o for o in self if o.code == site]
return MixingRatioSample(l)
def selecthours(self,hours=range(1,24)):
l=[o for o in self if o.xdate.hour in hours]
return MixingRatioSample(l)
################### End Class MixingRatioList ###################
################### Begin Class CtObservations ###################
class CtObservations():
""" an object that holds data + methods and attributes needed to manipulate mixing ratio values """
def __init__(self,DaInfo):
sfname = DaInfo.da_settings['obs.input.fname']+'.%s'%('20050305')+'.nc'
msg = 'This filename is hardcoded but needs replacement' ; logging.warning(msg) msg = 'This filename is hardcoded but needs replacement' ; logging.warning(msg)
filename = os.path.join(rc_da_system['obs.input.dir'],sfname) filename = os.path.join(DaInfo.da_settings['obs.input.dir'],sfname)
if not os.path.exists(filename): if not os.path.exists(filename):
msg = 'Could not find the required observation input file (%s) ' % filename ; logging.error(msg) msg = 'Could not find the required observation input file (%s) ' % filename ; logging.error(msg)
raise IOError,msg raise IOError,msg
else: else:
self.ObsFilename = filename
dummy = shutil.copy2(filename,saveas) self.MrData = None
msg = 'Successfully copied file (%s) to (%s) ' % (filename,saveas) ; logging.info(msg)
rc_da_shell['obs.full.filename'] = saveas def __str__(self):
""" Prints a list of MixingRatioSample objects"""
return self.ObsFilename
return rc_da_shell def AddObs(self):
""" Returns a MixingRatioList holding individual MixingRatioSample objects for all obs in a file"""
import pyhdf.SD as HDF
import datetime as dtm
from string import strip, join
ncf = HDF.SD(self.ObsFilename)
idates = ncf.select('date_components').get()
ids = ncf.select('id').get()
sites = ncf.select('site').get()
sites = [join(s,'').lower() for s in sites]
sites = map(strip,sites)
lats = ncf.select('lat').get()
lons = ncf.select('lon').get()
alts = ncf.select('alt').get()
obs = ncf.select('obs').get()
species = ncf.select('species').get()
date = ncf.select('date').get()
window = ncf.select('sampling_strategy').get()
flags = ncf.select('NOAA_QC_flags').get()
flags = [join(s,'') for s in flags]
flags = map(strip,flags)
dummy = ncf.end()
msg = "Successfully read data from obs file (%s)"%self.ObsFilename ; logging.debug(msg)
def PrepareEnsemble(rc_da_shell ): dates = [dtm.datetime(*d) for d in idates]
"""
Prepare the ensemble of parameters needed by the forecast model. There are two possible pathways: if self.MrData == None:
self.MrData = MixingRatioList([])
(A) Construct a brand new ensemble from a covariance matrix for n in range(len(dates)):
self.MrData.append( MixingRatioSample(dates[n],sites[n],obs[n],0.0,0.0,0.0,0.0,flags[n],alts[n],lats[n],lons[n],ids[n],0.0) )
(B) Get an existing ensemble and propagate it msg = "Added %d observations to the MrData list" % len(dates) ; logging.info(msg)
def AddSimulations(self, filename):
""" Adds model simulated values to the mixing ratio objects """
The steps for procedure A are: import pyhdf.SD as HDF
(A1) Construct the covariance matrix
(A2) Make a Cholesky decomposition
(A3) Prepare a set of [nmembers] input parameters for each forecast member
The steps for procedure B are: ncf = HDF.SD(filename)
ids = ncf.select('id').get()
simulated = ncf.select('sampled_mole_frac').get()*1e6
dummy = ncf.end()
msg = "Successfully read data from model sample file (%s)"%filename ; logging.debug(msg)
(B1) Get existing parameter values for each member obs_ids = self.MrData.getvalues('id')
(B2) Run them through a forecast model
for n in range(len(ids)):
Both procedures finish with writing the parameter values to a NetCDF file try:
index = obs_ids.index(ids[n])
except:
msg = 'A model sample was found that did not match any ID in the existing observation list. Skipping...' ; logging.warning(msg)
continue
dummy = self.MrData[index].simulated = simulated[n]
""" msg = "Added %d simulated values to the MrData list" % len(ids) ; logging.info(msg)
# Get the da system specific rc-items, note that they've been validated already by FillObs
rc_da_system = rc.read(rc_da_shell['da.system.rc'],silent = True) ################### End Class CtObservations ###################
nlag = int(rc_da_shell['time.nlag']) ################### Begin Class CtMember ###################
for n in range(nlag): class CtMember():
"""
An ensemble member for CarbonTracker. This consists of
* parameter values
* a CtObservations object with sampled mixing ratios
* file names to write/read the data for this member
"""
if n == nlag: def __init__(self, membernumber):
self.membernumber = membernumber # the member number
self.randomstate = (None,) # the state of the random number generator needed to reproduce the random sequence
self.ModelSamples = None # CtObservations object that holds model samples for this member
self.ParameterValues = None # Parameter values of this member
self.ParameterOutputFile = None # File to which the member info should be written
self.SampleInputFile = None # File from which the sample info can be read
# Route A: make a new ensemble def __str__(self):
return "%03d"%self.membernumber
Xprime,Qprior = MakeNewEnsemble(rc_da_shell, rc_da_system)
else: ################### End Class CtMember ###################
# Route B: Propagate existing ensemble ################### Begin Class CtStateVector ###################
Xprime,Qprior = [0,0] class CtStateVector():
""" an object that holds data + methods and attributes needed to manipulate state vector values """
def __init__(self,CycleInfo,DaInfo):
self.CycleInfo = CycleInfo
self.DaInfo = DaInfo
self.nlag = int(CycleInfo.da_settings['time.nlag'])
self.nmembers = int(CycleInfo.da_settings['forecast.nmembers'])
self.nparams = int(DaInfo.da_settings['nparameters'])
self.isOptimized = False
return Xprime, Qprior self.EnsembleMembers = range(self.nlag)
for n in range(self.nlag):
self.EnsembleMembers[n] = []
def __str__(self):
return "This is a CarbonTracker state vector object"
def MakeNewEnsemble(rc_da_shell, rc_da_system): def MakeNewEnsemble(self,nlag):
""" Make a new ensemble from specified matrices """ """ Make a new ensemble from specified matrices """
import pyhdf.SD as hdf import pyhdf.SD as hdf
...@@ -156,8 +282,8 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system): ...@@ -156,8 +282,8 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system):
# Get the needed matrices from the specified covariance files # Get the needed matrices from the specified covariance files
file_ocn_cov = rc_da_system['ocn.covariance'] file_ocn_cov = self.DaInfo.da_settings['ocn.covariance']
file_bio_cov = rc_da_system['bio.covariance'] file_bio_cov = self.DaInfo.da_settings['bio.covariance']
for file in [file_ocn_cov,file_bio_cov]: for file in [file_ocn_cov,file_bio_cov]:
...@@ -182,10 +308,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system): ...@@ -182,10 +308,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system):
# Once we have the matrices, we can start to make the full covariance matrix, and then decompose it # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it
nparams = int(rc_da_system['nparameters']) fullcov = np.zeros((self.nparams,self.nparams),float)
nmembers = int(rc_da_shell['forecast.nmembers'])
fullcov = np.zeros((nparams,nparams),float)
nocn = cov_ocn.shape[0] nocn = cov_ocn.shape[0]
nbio = cov_bio.shape[0] nbio = cov_bio.shape[0]
...@@ -195,7 +318,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system): ...@@ -195,7 +318,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system):
# Visually inspect full covariance matrix if verbose # Visually inspect full covariance matrix if verbose
if rc_da_shell['verbose']: if self.CycleInfo.da_settings['verbose']:
plt.imshow(fullcov) plt.imshow(fullcov)
plt.colorbar() plt.colorbar()
...@@ -212,7 +335,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system): ...@@ -212,7 +335,7 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system):
msg = 'Cholesky decomposition has succeeded offline' ; logging.info(msg) msg = 'Cholesky decomposition has succeeded offline' ; logging.info(msg)
msg = 'Appr. degrees of freedom in covariance matrix is %s'%(int(dof)) ; logging.info(msg) msg = 'Appr. degrees of freedom in covariance matrix is %s'%(int(dof)) ; logging.info(msg)
if rc_da_shell['verbose']: if self.CycleInfo.da_settings['verbose']:
plt.plot(s) plt.plot(s)
plt.savefig('eigenvaluespectrum.png') plt.savefig('eigenvaluespectrum.png')
...@@ -220,50 +343,183 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system): ...@@ -220,50 +343,183 @@ def MakeNewEnsemble(rc_da_shell, rc_da_system):
# Now create a set of random instances of the given covariance structure # Now create a set of random instances of the given covariance structure
# First see the random number generator with a fixed value from the rc-file, this is to make reproducible results dummy = np.random.seed()
dummy = np.random.seed(int(rc_da_system['random.seed'])) # mean does not hold data yet
# Structure to hold the data NewMember = CtMember(0)
NewMember.ParameterValues = np.ones((self.nparams,self.nmembers),float)
dummy = self.EnsembleMembers[nlag].append(NewMember)
parameter_deviations = np.zeros((nparams,nmembers),float) # This is array X' # Create members 1:nmembers and add to EnsembleMembers list
for member in range(nmembers): for member in range(1,self.nmembers):
rands = np.random.randn(nparams) randstate = np.random.get_state()
parameter_deviations[:,member] = np.dot(C,rands) rands = np.random.randn(self.nparams)
if rc_da_shell['verbose']: NewMember = CtMember(member)
NewMember.ParameterValues = np.dot(C,rands)
NewMember.randomstate = randstate
dummy = self.EnsembleMembers[nlag].append(NewMember)
msg = '%d new ensemble members were added to the state vector'%(self.nmembers) ; logging.info(msg)
plt.hist(parameter_deviations[20,:],20)
plt.savefig('par20histogram.png')
plt.close('all')
return parameter_deviations, fullcov ################### End Class CtStateVector ###################
################### Begin Class CtOptimizer ###################
class CtOptimizer():
"""
This creates an instance of a CarbonTracker optimization object. It handles the minimum least squares optimization
of the CT state vector given a set of CT sample objects. Two routines will be implemented: one where the optimization
is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
and efficiency.
"""
def __init__(self, CycleInfo, DaInfo):
self.nlag = int(CycleInfo.da_settings['time.nlag'])
self.nmembers = int(CycleInfo.da_settings['forecast.nmembers'])
self.nparams = int(DaInfo.da_settings['nparameters'])
self.CreateMatrices()
return None
def CreateMatrices(self):
""" Create Matrix space needed in optimization routine """
import numpy as np
self.X_prior = np.array( (self.nlag*self.nparams,), 'float')
self.X_prior_prime = np.array( (self.nmembers,self.nlag*self.nparams,), 'float')
def FillMatrices(self,StateVector):
return None
################### End Class CtOptimizer ###################
################# Stand alone methods that manipulate the objects above ##############
def PrepareObs(CycleInfo,type='forecast'):
""" PrepareObs """
import shutil
# First, we make a CarbonTracker Observation object which holds all the information related to the mixing ratios going in
# and coming out of the model. This object also holds the observations themselves, and the simulated value + statistics
DaSystem = DaInfo(CycleInfo.da_settings['da.system.rc'])
samples = CtObservations(DaSystem)
dummy = samples.AddObs()
#
# Next should follow a section where the observations that are read are further processes, and written to a NetCDF file for
# the transport model to use for x,y,z,t smapling.
#
# For carbontracker the observations already come in netcdf format thanks to Andy and Ken. We will therefore simply copy
# the observations.nc file to the pre-cooked obs files
#
filename= samples.ObsFilename
saveas = os.path.join(CycleInfo.da_settings['dir.input'],'observations.nc')
dummy = shutil.copy2(filename,saveas)
msg = 'Successfully copied file (%s) to (%s) ' % (filename,saveas) ; logging.info(msg)
return samples
def PrepareState(CycleInfo):
"""
Prepare the ensemble of parameters needed by the forecast model. There are two possible pathways:
(A) Construct a brand new ensemble from a covariance matrix
(B) Get an existing ensemble and propagate it
The steps for procedure A are:
(A1) Construct the covariance matrix
(A2) Make a Cholesky decomposition
(A3) Prepare a set of [nmembers] input parameters for each forecast member
The steps for procedure B are:
(B1) Get existing parameter values for each member
(B2) Run them through a forecast model
Both procedures finish with writing the parameter values to a NetCDF file
"""
# Get the da system specific rc-items, note that they've been validated already by FillObs
DaSystem = DaInfo(CycleInfo.da_settings['da.system.rc'])
StateVector = CtStateVector(CycleInfo,DaSystem)
nlag = int(CycleInfo.da_settings['time.nlag'])
for n in range(1,nlag+1):
if n == nlag:
# Route A: make a new ensemble
dummy = StateVector.MakeNewEnsemble(n-1)
else:
# Route B: Propagate existing ensemble
Xprime,Qprior = [0,0]
return StateVector
if __name__ == "__main__": if __name__ == "__main__":
import rc
import os import os
import logging import sys
from tools_da import StartLogger
from da_initexit import CycleControl
opts = ['-v']
args = {'rc':'da.rc','logfile':'da_initexit.log','jobrcfilename':'test.rc'}
StartLogger()
DaCycle = CycleControl(opts,args)
Da_Info = DaInfo('carbontracker.rc')
dummy = CtMember(0)
print dummy
DaCycle.Initialize()
print DaCycle