Skip to content
Snippets Groups Projects
Commit b4e4a62a authored by Auke van der Woude's avatar Auke van der Woude
Browse files

initialising commit

parent fb8f88ad
Branches
No related tags found
1 merge request!18Master
#!/usr/bin/env python
# control.py
"""
Author : peters
Revision History:
File created on 26 Aug 2010.
Adapted by super004 on 26 Jan 2017.
"""
import logging
################### Begin Class CO2DaSystem ###################
from da.baseclasses.dasystem import DaSystem
class CO2DaSystem(DaSystem):
""" Information on the data assimilation system used. This is normally an rc-file with settings.
"""
def validate(self):
"""
validate the contents of the rc-file given a dictionary of required keys
"""
needed_rc_items = ['obs.input.id',
'obs.input.nr',
'obs.spec.nr',
'obs.cat.nr',
'nparameters',
'random.seed',
'emis.pparam',
'ff.covariance',
'obs.bgswitch',
'obs.background',
'emis.input.spatial',
'emis.input.tempobs',
'emis.input.tempprior',
'emis.paramfile',
'emis.paramfile2',
'run.emisflag',
'run.emisflagens',
'run.obsflag']
for k, v in self.iteritems():
if v == 'True' :
self[k] = True
if v == 'False':
self[k] = False
for key in needed_rc_items:
if not self.has_key(key):
logging.warning('Missing a required value in rc-file : %s' % key)
logging.debug('DA System Info settings have been validated succesfully')
################### End Class CO2DaSystem ###################
if __name__ == "__main__":
pass
#!/usr/bin/env python
# stilt_tools.py
"""
Author : I. Super
Revision History:
Newly developed code, September 2017
This module holds an emission model that prepares emission files used by the observation operator and
to create pseudo-data
"""
import shutil
import os
import logging
import datetime as dtm
import numpy as np
from numpy import array, logical_and
import da.tools.io4 as io
import math
import da.tools.rc as rc
from da.tools.general import create_dirs, to_datetime
identifier = 'EmissionModel ensemble '
version = '1.0'
################### Begin Class Emission model ###################
class EmisModel(object):
def __init__(self, dacycle=None):
if dacycle != None:
self.dacycle = dacycle
else:
self.dacycle = {}
def setup(self, dacycle):
self.dacycle = dacycle
self.startdate = self.dacycle['time.fxstart']
self.enddate = self.dacycle['time.finish']
self.emisdir = dacycle.dasystem['datadir']
self.proxyfile = dacycle.dasystem['emis.input.spatial']
self.tempfileo = dacycle.dasystem['emis.input.tempobs']
self.tempfilep = dacycle.dasystem['emis.input.tempprior']
self.btime = int(dacycle.dasystem['run.backtime'])
self.obsfile = dacycle.dasystem['obs.input.id']
self.nrspc = int(dacycle.dasystem['obs.spec.nr'])
self.nrcat = int(dacycle.dasystem['obs.cat.nr'])
self.nparams = int(dacycle.dasystem['nparameters'])
self.nmembers = int(dacycle['da.optimizer.nmembers'])
self.pparam = dacycle.dasystem['emis.pparam']
self.paramfile = dacycle.dasystem['emis.paramfile']
#self.paramfile2 = dacycle.dasystem['emis.paramfile2']
def get_emis(self, dacycle, psdo):
"""set up emission information for pseudo-obs (psdo=1) and ensemble runs (psdo=0)"""
if psdo==1:
priorparam=os.path.join(self.emisdir,self.pparam)
f = io.ct_read(priorparam, 'read')
self.prm = f.get_variable('true_values')[:self.nparams]
f.close()
self.get_spatial(dacycle, n=999, infile=os.path.join(self.emisdir, self.paramfile))
self.get_temporal(dacycle, n=999, psdo=1)
elif psdo==0:
self.timestartkey = self.dacycle['time.sample.start']
self.timefinishkey = self.dacycle['time.sample.end']
for j in range(self.nmembers):
#first remove old files, then create new emission files per ensemble member
if self.startdate == self.timestartkey:
file = os.path.join(dacycle.dasystem['datadir'],'temporal_data_%03d.nc'%j)
try:
os.remove(file)
except OSError:
pass
prmfile=os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%j)
f = io.ct_read(prmfile, 'read')
self.prm = f.get_variable('parametervalues')
f.close()
self.get_spatial(dacycle, n=j, infile=os.path.join(self.emisdir, self.paramfile))
self.get_temporal(dacycle, n=j, psdo=0)
def get_totals(self, infile=None):
"""gather all required data and calculate total emissions per sector in kg/yr"""
yremis = np.array(self.nrcat*[self.nrspc*[0.]])
self.spatial_var = []
self.spatial_inc = []
self.temporal_var = []
self.temporal_inc = []
self.ops_sector = []
### Read in parameter values for the emission functions and ratios to calculate total emissions
f = open(infile, 'r')
lines = f.readlines()
f.close()
ct = 0
for line in lines:
dum=line.split(",")
if dum[0]=='#':
continue
else:
id = int(dum[0])
### If id == 0 this (sub)sector is treated only by WRF-STILT; if id takes another value the local sources are treated by OPS
if id != 0:
self.ops_sector.append(ct)
inc = int(dum[2])
### If inc == 999 this parameter is not in the state vector and will not be optimized; otherwise inc refers to the
### position of this parameter in the state vector
if inc!=999:
EC = float(dum[1])*self.prm[inc]
else:
EC = float(dum[1])
inc = int(dum[4])
if inc!=999:
EF = float(dum[3])*self.prm[inc]
else:
EF = float(dum[3])
inc = int(dum[6])
if inc!=999:
AD = float(dum[5])*self.prm[inc]
else:
AD = float(dum[5])
inc = int(dum[8])
if inc!=999:
fr = float(dum[7])*self.prm[inc]
else:
fr = float(dum[7])
### emission = energy consumption per activity x emission factor x activity
### fr can be used to divide sectoral emissions over several subsectors (e.g. to split road traffic into cars and HDV)
ems = EC*EF*AD*fr
### Now we calculated emissions of CO2. To get emissions of other trace gases we multiply with an emission ratio
for s in range(self.nrspc):
inc = int(dum[15+s*3])
if inc!=999:
rat = float(dum[13+s*3])*self.prm[inc]
else:
rat = float(dum[13+s*3])
### Some emission ratios have lognormal uncertainty distributions (label 'l')
if dum[14+s*3]=='n':
yremis[ct,s] = ems*rat
elif dum[14+s*3]=='l':
yremis[ct,s] = ems*np.exp(rat)
ct = ct + 1
### Here we list the spatial and temporal variables that are part of the state vector for use in the get_spatial and get_temporal functions
self.spatial_var.append(dum[9])
self.spatial_inc.append(int(dum[10]))
self.temporal_var.append(dum[11])
self.temporal_inc.append(int(dum[12]))
logging.debug("Successfully calculated total emissions")
return yremis
def get_spatial(self, dacycle, n, infile=None):
"""read in proxy data used for spatial distribution of the gridded emissions, disaggregate yearly totals for the area"""
yremis=self.get_totals(infile)
# read in species information
infile = os.path.join(self.emisdir, self.obsfile)
f = open(infile, 'r')
lines = f.readlines()
f.close()
M_mass = []
spname = []
for line in lines:
dum=line.split(",")
if dum[0]=='#':
continue
else:
M_mass.append(float(dum[6])*1e-9) #kg/micromole
spname.append(dum[5]) #name of the species
sec_year = 8760.*3600. #seconds in a year
arcor = 1e6 #km2 -> m2
conv = np.array(M_mass)*sec_year*arcor # to convert emissions in kg/km2/yr to micromole/m2/s
#read in proxy data for spatial disaggregation
infile = os.path.join(self.emisdir, self.proxyfile)
prx = io.ct_read(infile, method='read')
sp_distr = []
for c in range(self.nrcat):
sp_distr.append(prx.get_variable(self.spatial_var[c]))
sp_distr=np.array(sp_distr)
prx.close()
### create output file
prior_file = os.path.join(self.emisdir, 'prior_spatial_%03d.nc'%n)
f = io.CT_CDF(prior_file, method='create')
dimid = f.add_dim('ncat', self.nrcat)
dimid2 = f.add_dim('ops', 3)
dimlon = f.add_dim('lon', sp_distr.shape[1])
dimlat = f.add_dim('lat', sp_distr.shape[2])
#loop over all tracers
for s in range(self.nrspc):
# determine which fraction of the emissions are local and are treated by OPS
datalistOPS = []
opsfr = [0.317, 0.317, 0.267]
for j in range(len(self.ops_sector)):
OPSemis = yremis[self.ops_sector[j],s] * opsfr[j] * 1E3 / sec_year
datalistOPS.append(OPSemis)
savedict = io.std_savedict.copy()
savedict['name'] = "OPStotals_%s"%spname[s]
savedict['long_name'] = "total OPS emissions"
savedict['units'] = "g/s"
savedict['dims'] = dimid2
savedict['values'] = datalistOPS
f.add_data(savedict)
datalist = []
ct = 0
for c in range(self.nrcat):
inc = self.spatial_inc[c]
if inc!=999:
### note that the spatial distribution has to gridded state vector, such that a scaling factor would affect the total
### emissions in this area
distr = sp_distr[c]*self.prm[inc]
else:
distr = sp_distr[c]
if c in self.ops_sector:
emis_spatial = (yremis[c,s]*(1-opsfr[ct])) / conv[s] * distr
ct = ct + 1
else:
emis_spatial = yremis[c,s] / conv[s] * distr
datalist.append(emis_spatial)
savedict = io.std_savedict.copy()
savedict['name'] = spname[s]
savedict['long_name'] = "Spatially distributed emissions"
savedict['units'] = "micromole/m2/s"
savedict['dims'] = dimid + dimlon + dimlat
savedict['values'] = datalist
f.add_data(savedict)
f.close()
logging.debug("Successfully wrote data to prior spatial distribution file (%s)" % prior_file)
def get_temporal(self, dacycle, n, psdo):
"""read in time profiles used for temporal distribution of the emissions"""
### For pseudo-observation (psdo==1) or when no time profiles need to be optimized the profiles are simply read from the
### input file and copied to another file.
### Otherwise create a new file per ensemble member at t=0 and update the profiles for each time step
if psdo==0 and min(self.temporal_inc)<999:
ensfile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%n)
if os.path.exists(ensfile) == False:
dumfile = os.path.join(self.emisdir, self.tempfilep)
shutil.copy2(dumfile,ensfile)
tpr = io.ct_read(ensfile, method='read')
itimes = tpr.get_variable('Times')
times = array([dtm.datetime(int(''.join(d[:4])),int(''.join(d[5:7])),int(''.join(d[8:10])),int(''.join(d[11:13])),int(''.join(d[14:16]))) for d in itimes])
subselect = logical_and(times >= self.timestartkey , times <= self.timefinishkey).nonzero()[0]
datlist = times.take(subselect, axis=0)
### The time profiles should always cover at least one full year
start_date = dtm.datetime(self.timestartkey.year,1,1,0,0) #first time included
end_date = dtm.datetime(self.timestartkey.year,12,31,23,0) #last time included
dt = dtm.timedelta(0,3600)
dum = start_date
times=[]
while dum<=end_date:
times.append(dum)
dum=dum+dt
times=np.array(times)
stidx = np.where(times==self.timestartkey)[0][0]
stidx2 = np.where(times==self.startdate)[0][0]
edidx = np.where(times==self.timefinishkey)[0][0]
dumsel = np.where((times<self.startdate)|(times>self.timefinishkey))[0]
""" Time profiles should, for a full year, always have an average value of 1.0. Therefore, a new method has been developed
to optimize time profiles such that we comply with this and the time profiles do not affect the total emissions.
For this purpose we apply the scaling factor (statevector) to the period covered in this cycle. The time profile for all dates
outside this period are scaled equally such that the time profile remains its average value of 1.0. Except previously updated
dates (from previous cycles) are maintained (they are already optimized!)."""
profiles = []
for c in range(self.nrcat):
if self.temporal_inc[c]!=999:
f_orig = tpr.get_variable(self.temporal_var[c])
f_sel = tpr.get_variable(self.temporal_var[c]).take(subselect, axis=0)
f_new = f_sel*self.prm[self.temporal_inc[c]]
dumsum = np.array(f_orig[dumsel]).sum()
for i in range(len(f_new)):
f_orig[:stidx2]=f_orig[:stidx2]-(f_orig[:stidx2]/dumsum)*(f_new.sum()-f_sel.sum())
f_orig[edidx+1:]=f_orig[edidx+1:]-(f_orig[edidx+1:]/dumsum)*(f_new.sum()-f_sel.sum())
f_orig[stidx:edidx+1]=f_new
profiles.append(f_orig)
tpr.close()
f = io.CT_CDF(ensfile, method='write')
ct=0
for c in range(self.nrcat):
if self.temporal_inc[c]!=999:
f.variables[self.temporal_var[c]][:] = profiles[ct]
ct=ct+1
f.close()
### Now read in the correct profiles, select the correct time period and write the profiles into one file per ensemble member
if psdo==1:
infile = os.path.join(self.emisdir, self.tempfileo)
elif psdo==0 and min(self.temporal_inc)==999:
infile = os.path.join(self.emisdir, self.tempfilep)
elif psdo==0 and min(self.temporal_inc)<999:
infile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%n)
tpr = io.ct_read(infile, method='read')
itimes = tpr.get_variable('Times')
times = array([dtm.datetime(int(''.join(d[:4])),int(''.join(d[5:7])),int(''.join(d[8:10])),int(''.join(d[11:13])),int(''.join(d[14:16]))) for d in itimes])
if psdo == 1:
startdum=dtm.datetime(self.startdate.year,self.startdate.month,self.startdate.day-1,1,0)
subselect = logical_and(times >= startdum, times <= self.enddate).nonzero()[0]
else:
dum = self.timestartkey - dtm.timedelta(0,self.btime*3600)
if dum.hour != 0:
startdum = dtm.datetime(dum.year,dum.month,dum.day,1,0)
else:
startdum = dtm.datetime(dum.year,dum.month,dum.day-1,1,0)
subselect = logical_and(times >= startdum , times <= self.timefinishkey).nonzero()[0]
datlist = times.take(subselect, axis=0)
profiles=[]
for c in range(self.nrcat):
f_orig = tpr.get_variable(self.temporal_var[c]).take(subselect, axis=0)
profiles.append(f_orig)
tpr.close()
profiles=np.array(profiles)
prior_file = os.path.join(self.emisdir, 'prior_temporal_%03d.nc'%n)
f = io.CT_CDF(prior_file, method='create')
dimtime = f.add_dim('Times', len(datlist))
dum=[]
for c in range(self.nrcat):
if self.temporal_var[c] in dum:
continue
else:
savedict = io.std_savedict.copy()
savedict['name'] = self.temporal_var[c]
savedict['long_name'] = "Temporal distribution"
savedict['units'] = ""
savedict['dims'] = dimtime
savedict['values'] = profiles[c,:]
f.add_data(savedict)
dum.append(self.temporal_var[c])
f.close()
logging.debug("Successfully wrote data to prior temporal distribution file (%s)" % prior_file)
################### End Class Emission model ###################
if __name__ == "__main__":
pass
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python
# optimizer.py
"""
.. module:: optimizer
.. moduleauthor:: Wouter Peters
Revision History:
File created on 28 Jul 2010.
"""
import os
import logging
import numpy as np
import numpy.linalg as la
import da.tools.io4 as io
identifier = 'Optimizer CO2'
version = '0.0'
from da.baseclasses.optimizer import Optimizer
################### Begin Class CO2Optimizer ###################
class CO2Optimizer(Optimizer):
"""
This creates an instance of an optimization object. It handles the minimum least squares optimization
of the state vector given a set of sample objects. Two routines will be implemented: one where the optimization
is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
and efficiency.
"""
def setup(self, dims):
self.nlag = dims[0]
self.nmembers = dims[1]
self.nparams = dims[2]
self.nobs = dims[3]
self.nrloc = dims[4]
self.nrspc = dims[5]
self.inputdir = dims[6]
#self.specfile = dims[7]
self.create_matrices()
def set_localization(self, loctype='None'):
""" determine which localization to use """
if loctype == 'CT2007':
self.localization = True
self.localizetype = 'CT2007'
#T-test values for two-tailed student's T-test using 95% confidence interval for some options of nmembers
if self.nmembers == 50:
self.tvalue = 2.0086
elif self.nmembers == 100:
self.tvalue = 1.9840
elif self.nmembers == 150:
self.tvalue = 1.97591
elif self.nmembers == 200:
self.tvalue = 1.9719
else: self.tvalue = 0
elif loctype == 'multitracer':
self.localization = True
self.localizetype = 'multitracer'
elif loctype == 'multitracer2':
self.localization = True
self.localizetype = 'multitracer2'
else:
self.localization = False
self.localizetype = 'None'
logging.info("Current localization option is set to %s" % self.localizetype)
def localize(self, n):
""" localize the Kalman Gain matrix """
import numpy as np
if not self.localization:
logging.debug('Not localized observation %i' % self.obs_ids[n])
return
if self.localizetype == 'CT2007':
count_localized = 0
for r in range(self.nlag * self.nparams):
corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
prob = corr / np.sqrt((1.000000001 - corr ** 2) / (self.nmembers - 2))
if abs(prob) < self.tvalue:
self.KG[r] = 0.0
count_localized = count_localized + 1
logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
if self.localizetype == 'multitracer':
count_localized = 0
###read emission ratios for source type related to each parameter
infile = os.path.join(self.inputdir, self.specfile)
f = open(infile, 'r')
lines = f.readlines()
f.close()
speclist = []
speclist.append('CO2')
emr = []
for line in lines:
dum=line.split(",")
if dum[0]=='#' or dum[0]=='CO2' or dum[0]=='SNAP':
continue
else:
sp = dum[0]
emrs = []
for k in range(self.nparams):
emrs.append(float(dum[k+1]))
speclist.append(sp)
emr.append(emrs)
emr=np.array(emr)
###find obs and model value for this time step and species; calculate differences and ratios
lnd = self.nobs/(self.nrspc*self.nrloc)
for i in range(self.nrspc):
if self.species[n] == speclist[i]:
idx = [0-i,1-i,2-i,3-i]
Xobs = []
Xmod = []
Robs = []
Rmod = []
for i in range(self.nrspc):
Xobs.append(self.obs[n+lnd*idx[i]])
Xmod.append(self.Hx[n+lnd*idx[i]])
if i>0:
Robs.append(Xobs[i]/Xobs[0])
Rmod.append(Xmod[i]/Xmod[0])
Xobs = np.array(Xobs)
Xmod = np.array(Xmod)
Robs = np.array(Robs)
Rmod = np.array(Rmod)
dX = Xmod - Xobs
dR = abs(Rmod - Robs)
flg = []
if Xobs[0]>1.: #and self.species[n] == 'CO2'
flg=0
for i in range(self.nrspc):
if Xobs[i] == 0 or Xmod[i] == 0.:
flg=1
else:
flg=1
###old version
# for i in range(self.nrspc):
# if dX[i]>0:
# flg.append(1)
# elif dX[i]<0:
# flg.append(0)
# else:
# flg.append(np.nan)
### This routine determines which source types are likely to cause the model-data mismatch, according to the following principle:
### if the modeled CO:CO2 ratio is higher than the observed ratio this means we either overestimate emissions with a high CO:CO2 ratio
### or we underestimate emissions with a low CO:CO2 ratio; if the CO concentration is overestimated, it is likely to be the first option
### (i.e. too much emissions). We only include information from species if the model-data mismatch in the ratio is >5% of the observed ratio
dums = []
dum = []
tst1 = []
if flg == 0:
for i in range(self.nrspc-1):
if dX[0]>0:
if dX[i+1]>0:
if Rmod[i]>Robs[i]:
tst1.append(1)
elif Rmod[i]<Robs[i]:
tst1.append(-1)
elif dX[i+1]<0:
if Rmod[i]>Robs[i]:
tst1.append(0)
elif Rmod[i]<Robs[i]:
tst1.append(2)
elif dX[0]<0:
if dX[i+1]>0:
if Rmod[i]>Robs[i]:
tst1.append(2)
elif Rmod[i]<Robs[i]:
tst1.append(0)
elif dX[i+1]<0:
if Rmod[i]>Robs[i]:
tst1.append(-1)
elif Rmod[i]<Robs[i]:
tst1.append(1)
if 2 in tst1:
dums1=[]
dums2=[]
for i in range(self.nrspc-1):
for k in range(len(emr[i])):
if emr[i,k]<Robs[i]:
dums1.append(k)
if emr[i,k]>Robs[i]:
dums2.append(k)
for j in range(self.nparams):
ct = dums1.count(j)
ctc = dums2.count(j)
if ct == (self.nrspc - 1) or ctc == (self.nrspc - 1):
### all requirements are met, so do not localize
dum.append(j)
else:
for i in range(self.nrspc-1):
if tst1[i] == 1:
for k in range(len(emr[i])):
if emr[i,k]>Robs[i]:
dums.append(k)
elif tst1[i] == -1:
for k in range(len(emr[i])):
if emr[i,k]<Robs[i]:
dums.append(k)
for j in range(self.nparams):
ct = dums.count(j)
if ct == (self.nrspc - 1):
### all requirements are met, so do not localize
dum.append(j)
###old version
# if sum(flg) == 0 or sum(flg) == 4:
### if this is not the case, it is likely a mixture of over- and underestimation, which we can't specify; so no localization applied
# for i in range(self.nrspc-1):
# if dR[i]>0.05*Robs[i]:
# if (Rmod[i]>Robs[i] and dX[i+1]>0) or (Rmod[i]<Robs[i] and dX[i+1]<0):
# tst1.append(1.)
# elif (Rmod[i]<Robs[i] and dX[i+1]>0) or (Rmod[i]>Robs[i] and dX[i+1]<0):
# tst1.append(-1.)
# else:
# tst1.append(0)
# for i in range(self.nrspc-1):
# if tst1[i] == 1:
# for k in range(len(emr[i])):
# if emr[i,k]>Robs[i]:
# dums.append(k)
# elif tst1[i] == -1:
# for k in range(len(emr[i])):
# if emr[i,k]<Robs[i]:
# dums.append(k)
# for j in range(self.nparams):
# ct = dums.count(j)
# if ct == (self.nrspc - 1):
### all requirements are met, so do not localize
# dum.append(j)
if len(dum) > 0:
### what to do when we can't attribute model-data mismatch? update all parameters or set them all to zero?? (adapt dum)
for r in range(self.nlag * self.nparams):
if r in dum:
continue
else:
self.KG[r] = 0.0
self.test_localize[r] = self.test_localize[r] + 1
count_localized = count_localized + 1
logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
if self.localizetype == 'multitracer2':
### This routine used more strict rules for source attribution by comparing the observed concentration ratios to emission ratios
### per source type
count_localized = 0
###find obs and model value for this time step and species; calculate differences and ratios
lnd = self.nobs/(self.nrspc*self.nrloc)
for i in range(self.nrspc):
if self.species[n] == speclist[i]:
idx = [0-i,1-i,2-i,3-i]
Xobs = []
Robs = []
for i in range(self.nrspc):
Xobs.append(self.obs[n+lnd*idx[i]])
if i>0:
Robs.append(Xobs[i]/Xobs[0])
dum=[]
if Robs[2]>0.1:
if Robs[1]<1. and Robs[0]<1.:
dum.append(4)
elif Robs[2]>2.5 and Robs[1]<1.>8. and Robs[0]>3.:
dum.append(8)
elif Robs[0]>1. and Robs[2]<0.1:
if Robs[0]<4. and Robs[1]<1.<1.:
dum.append(2)
dum.append(3)
elif Robs[0]>7. and Robs[1]<1.>1.5:
dum.append(5)
dum.append(6)
dum.append(7)
elif Robs[0]>3.5 and Robs[1]<1.<2.5:
dum.append(2)
dum.append(3)
dum.append(5)
dum.append(6)
dum.append(7)
elif Robs[0]<0.6 and Robs[1]<1.<0.6:
dum.append(0)
dum.append(1)
if len(dum) > 0 and len(dum) < self.nparams:
for r in range(self.nlag * self.nparams):
if r in dum:
continue
else:
self.KG[r] = 0.0
count_localized = count_localized + 1
logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
################### End Class OPSOptimizer ###################
if __name__ == "__main__":
pass
This diff is collapsed.
#!/usr/bin/env python
# ct_statevector_tools.py
"""
.. module:: statevector
.. moduleauthor:: Wouter Peters
Revision History:
File created on 28 Jul 2010.
Adapted by super004 on 26 Jan 2017.
The module statevector implements the data structure and methods needed to work with state vectors (a set of unknown parameters to be optimized by a DA system) of different lengths, types, and configurations. Two baseclasses together form a generic framework:
* :class:`~da.baseclasses.statevector.StateVector`
* :class:`~da.baseclasses.statevector.EnsembleMember`
As usual, specific implementations of StateVector objects are done through inheritance form these baseclasses. An example of designing
your own baseclass StateVector we refer to :ref:`tut_chapter5`.
.. autoclass:: da.baseclasses.statevector.StateVector
.. autoclass:: da.baseclasses.statevector.EnsembleMember
"""
import os
import sys
sys.path.append(os.getcwd())
import logging
import numpy as np
from da.baseclasses.statevector import StateVector, EnsembleMember
from da.tools.general import create_dirs, to_datetime
import datetime as dtm
import da.tools.io4 as io
identifier = 'CarbonTracker Statevector '
version = '0.0'
################### Begin Class CO2StateVector ###################
class CO2StateVector(StateVector):
def __init__(self, dacycle=None):
if dacycle != None:
self.dacycle = dacycle
else:
self.dacycle = {}
def setup(self, dacycle):
"""
setup the object by specifying the dimensions.
There are two major requirements for each statvector that you want to build:
(1) is that the statevector can map itself onto a regular grid
(2) is that the statevector can map itself (mean+covariance) onto TransCom regions
An example is given below.
"""
self.dacycle = dacycle
self.nlag = int(self.dacycle['time.nlag'])
self.nmembers = int(self.dacycle['da.optimizer.nmembers'])
self.nparams = int(self.dacycle.dasystem['nparameters'])
self.obsdir = self.dacycle.dasystem['datadir']
self.pparam = self.dacycle.dasystem['emis.pparam']
self.covm = self.dacycle.dasystem['ff.covariance']
self.prop = int(self.dacycle.dasystem['run.propscheme'])
self.nobs = 0
self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on
# These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist
# of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread.
self.ensemble_members = range(self.nlag)
for n in range(self.nlag):
self.ensemble_members[n] = []
# This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember
# that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid.
## Initialise an array with an element for each parameter to optimise
self.gridmap = np.arange(1,self.nparams+1,1)
# Create a dictionary for state <-> gridded map conversions
nparams = self.gridmap.max()
self.griddict = {}
for r in range(1, int(nparams) + 1):
sel = (self.gridmap.flat == r).nonzero()
if len(sel[0]) > 0:
self.griddict[r] = sel
logging.debug("A dictionary to map grids to states and vice versa was created")
# Create a mask for species/unknowns
self.make_species_mask()
def get_covariance(self, date, dacycle):
file=os.path.join(self.obsdir,self.covm)
f = io.ct_read(file, 'read')
covmatrix = f.get_variable('covariances')[:self.nparams,:self.nparams]
f.close()
return covmatrix
def write_members_to_file(self, lag, outdir,endswith='.nc'):
"""
:param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag]
:param: outdir: Directory where to write files
:param: endswith: Optional label to add to the filename, default is simply .nc
:rtype: None
Write ensemble member information to a NetCDF file for later use. The standard output filename is
*parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location
is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside
called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360).
This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object.
.. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you
can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function.
"""
# These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was
# to do the import already at the start of the module, not just in this method.
#import da.tools.io as io
#import da.tools.io4 as io
members = self.ensemble_members[lag]
for mem in members:
filename = os.path.join(outdir, 'parameters.%03d%s' % (mem.membernumber, endswith))
ncf = io.CT_CDF(filename, method='create')
dimparams = ncf.add_params_dim(self.nparams)
data = mem.param_values
savedict = io.std_savedict.copy()
savedict['name'] = "parametervalues"
savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber
savedict['units'] = "unitless"
savedict['dims'] = dimparams
savedict['values'] = data
savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber
ncf.add_data(savedict)
ncf.close()
logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename))
def make_new_ensemble(self, dacycle, lag, covariancematrix=None):
"""
:param lag: an integer indicating the time step in the lag order
:param covariancematrix: a matrix to draw random values from
:rtype: None
Make a new ensemble, the attribute lag refers to the position in the state vector.
Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is
used to draw ensemblemembers from. If this argument is not passed it will ne substituted with an
identity matrix of the same dimensions.
"""
self.seed = int(dacycle.dasystem['random.seed'])
if self.seed != 0:
np.random.seed(self.seed)
sds = np.random.randint(1,10000,20)
else:
sds = np.random.randint(1,10000,20)
sid = (dacycle['time.start'] - dacycle['time.fxstart']).days
np.random.seed(sds[sid])
enssds = np.random.randint(1,10000,self.nmembers)
#option 1: start each cycle with the same prior values (makes several independent estimates)
if self.prop == 1 or dacycle['time.restart']==False:
file=os.path.join(self.obsdir,self.pparam)
f = io.ct_read(file, 'read')
prmval = f.get_variable('prior_values')[:self.nparams]
f.close()
#option 2: propagate optimized parameter values, but not the covariance matrix
elif self.prop == 2:
selectdate = dacycle['time.start']-dtm.timedelta(1)
dt=selectdate.strftime('%Y%m%d')
file=os.path.join(dacycle['dir.da_run'], 'output', selectdate.strftime('%Y%m%d'),'optimizer.%s.nc' % selectdate.strftime('%Y%m%d'))
f = io.ct_read(file, 'read')
prmval = f.get_variable('statevectormean_optimized')[:]
f.close()
elif self.prop == 3:
#option 3: start each cycle with the parameter values and uncertainties of the previous cycle (optimized)
selectdate = dacycle['time.start']-dtm.timedelta(1)
dt=selectdate.strftime('%Y%m%d')
file=os.path.join(dacycle['dir.da_run'], 'output', selectdate.strftime('%Y%m%d'),'optimizer.%s.nc' % selectdate.strftime('%Y%m%d'))
f = io.ct_read(file, 'read')
prmval = f.get_variable('statevectormean_optimized')[:]
devs = f.get_variable('statevectordeviations_optimized')[:]
f.close()
covariancematrix = (np.dot(devs,devs.T)/(devs.shape[1]-1))
# Check dimensions of covariance matrix list, must add up to nparams
dims = covariancematrix.shape[0]
if dims != self.nparams:
logging.error("The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..." % (dims, self.nparams))
raise ValueError
# Make a cholesky decomposition of the covariance matrix
try:
_, s, _ = np.linalg.svd(covariancematrix)
except:
s = np.linalg.svd(covariancematrix, full_matrices=1, compute_uv=0) #Cartesius fix
dof = np.sum(s) ** 2 / sum(s ** 2)
C = np.linalg.cholesky(covariancematrix)
logging.debug('Cholesky decomposition has succeeded ')
logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof)))
# Create mean values
newmean = np.ones(self.nparams, float) * prmval # standard value for a new time step is 1.0
# If this is not the start of the filter, average previous two optimized steps into the mix
if lag == self.nlag - 1 and self.nlag >= 3:
newmean += self.ensemble_members[lag - 1][0].param_values + \
self.ensemble_members[lag - 2][0].param_values
newmean = newmean / 3.0
# Create the first ensemble member with a deviation of 0.0 and add to list
newmember = EnsembleMember(0)
newmember.param_values = newmean.flatten() # no deviations
self.ensemble_members[lag].append(newmember)
# Create members 1:nmembers and add to ensemble_members list
for member in range(1, self.nmembers):
np.random.seed(enssds[member])
rands = np.random.randn(self.nparams)
# logging.debug('rands are %f, %f, %f, %f, %f'%(rands[0],rands[1],rands[2],rands[3],rands[4]))
newmember = EnsembleMember(member)
newmember.param_values = np.dot(C, rands) + newmean
self.ensemble_members[lag].append(newmember)
logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, (lag + 1)))
################### End Class OPSStateVector ###################
if __name__ == "__main__":
pass
!!! Info for the CarbonTracker data assimilation system
datadir : /Storage/CO2/super004/STILT_model/Data
! list of all observation sites
obs.input.id : obsfiles.csv
! number of observation sites included; number of species included and to be used in inversion
obs.input.nr : 7
obs.spec.nr : 4
! number of emission categories defined in the emission model
obs.cat.nr : 14
! For Rdam obs
obs.sites.rc : ${datadir}/sites_weights.rc
! number of parameters
nparameters : 44
! set fixed seed for random number generator, or use 0 if you want to use any random seed
random.seed : 4385
!file with prior estimate of scaling factors (statevector) and covariances
emis.pparam : param_values.nc
ff.covariance : covariances.nc
!file with emission model parameter values
emis.paramfile : emis_parameters.csv
! switch (1=on/0=off) and input data for background CO2 and CO concentrations
obs.bgswitch : 1
obs.background : ${datadir}/background.nc
! input data for emission model
emis.input.spatial : spatial_data.nc
emis.input.tempobs : temporal_data.nc
emis.input.tempprior : temporal_data.nc
! overwrite existing prior/ensemble emission files + pseudo-data (0: keep existing files; 1: create new files)
run.emisflag : 0
run.emisflagens : 1
run.obsflag : 0
! back trajectory time of STILT footprints, also applied to OPS (in hours)
run.backtime : 6
! choose propagation scheme:
! 1: no propagation, start each cycle with the same prior parameter values and covariance matrix
! 2: propagation of optimized parameter values, but not of the covariance matrix
! 3: propagation of both optimized parameter values and covariance matrix
run.propscheme : 3
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment