Skip to content
Snippets Groups Projects
Commit 3d76c114 authored by Ingrid Luijkx's avatar Ingrid Luijkx
Browse files

Deleted duplicate of methane_2lambdas

parent 71f118dc
Branches
No related tags found
No related merge requests found
#!/usr/bin/env python
# expand_fluxes.py
import sys
sys.path.append('../../')
import os
from datetime import datetime, timedelta
import logging
import numpy as np
from da.tools.general import date2num, num2date
import da.methane.io4 as io
from da.methane.analysis.tools_regions import globarea,state_to_grid
from da.tools.general import create_dirs
"""
Author: aki
Revision History:
File created on April 2016 by Aki T.
"""
def save_weekly_avg_1x1_data(dacycle, statevector):
"""
Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the
:class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
:param dacycle: a :class:`~da.tools.initexit.CycleControl` object
:param statevector: a :class:`~da.baseclasses.statevector.StateVector`
:rtype: None
"""
#
dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly'))
#
# Some help variables
#
dectime0 = date2num(datetime(2000, 1, 1))
dt = dacycle['cyclelength']
startdate = dacycle['time.start']
enddate = dacycle['time.end']
nlag = statevector.nlag
logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
#
# Region bio_land and fossil_land
#
region_land_file = dacycle.dasystem['regionsfile']
nfcr = io.CT_CDF(region_land_file, 'read')
logging.info('region land file read: %s' %region_land_file)
bio_land = nfcr.variables['bio_land'][:]
fossil_land = nfcr.variables['fossil_land'][:]
#logging.debug("bio_land %s" %bio_land[25,205])
#logging.debug("ff_land %s" %fossil_land[25,205])
#
# Create or open NetCDF output file
#
logging.debug("Create NetCDF output file: flux_1x1.%s.nc" % startdate.strftime('%Y-%m-%d'))
saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d'))
ncf = io.CT_CDF(saveas, 'write')
#
# Create dimensions and lat/lon grid
#
dimgrid = ncf.add_latlon_dim()
dimensemble = ncf.add_dim('members', statevector.nmembers)
dimdate = ncf.add_date_dim()
#
# set title and tell GMT that we are using "pixel registration"
#
setattr(ncf, 'Title', 'CarbonTracker fluxes')
setattr(ncf, 'node_offset', 1)
#
# skip dataset if already in file
#
ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
skip = ncf.has_date(ncfdate)
if skip:
logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
else:
#
# if not, process this cycle. Start by getting flux input data from CTDAS
#
filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))
file = io.ct_read(filename, 'read')
bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux']))
ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux']))
fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux']))
fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux']))
term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux']))
#mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
file.close()
next = ncf.inq_unlimlen()[0]
# Start adding datasets from here on, both prior and posterior datasets for bio and ocn
for prior in [True, False]:
#
# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
#
if prior:
logging.debug("prior")
qual_short = 'prior'
for n in range(nlag, 0, -1):
priordate = enddate - timedelta(dt.days * n)
savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
if os.path.exists(filename):
statevector.read_from_file(filename, qual=qual_short)
gridmean, gridensemble = statevector.state_to_grid(lag=n)
# Replace the mean statevector by all ones (assumed priors)
gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,))
logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n))
break
else:
logging.debug("opt")
qual_short = 'opt'
savedir = dacycle['dir.output']
filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
statevector.read_from_file(filename, qual=qual_short)
gridmean, gridensemble = statevector.state_to_grid(lag=1)
logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1))
#
# if prior, do not multiply fluxes with parameters, otherwise do
#
logging.debug("gridmean %s" %gridmean[25,205])
logging.debug("bio land %s" %bio_land[25,205])
w = np.where(bio_land == 0)
s = gridmean.shape
gridmean_bio = np.zeros(shape=s)
gridmean_bio[:,:] = gridmean[:,:]
gridmean_bio[w] = 1.0
biomapped = bio * gridmean_bio
s = gridensemble.shape
gridensemble_bio = np.zeros(shape=s)
gridensemble_bio[:,:,:] = gridensemble[:,:,:]
gridensemble_bio[:, w[0],w[1]] = 0.0
biovarmapped = bio * gridensemble_bio
w = np.where(fossil_land == 0)
gridmean_fossil = gridmean
gridmean_fossil[w] = 1.0
fossilmapped = fossil * gridmean_fossil
gridensemble_fossil = gridensemble
gridensemble_fossil[:, w[0],w[1]] = 0.0
fossilvarmapped = fossil * gridensemble_fossil
logging.debug("gridmean %s" %gridmean[25,205])
logging.debug("gridmean bio %s" %gridmean_bio[25,205])
logging.debug("gridmean ff %s" %gridmean_fossil[25,205])
#
#
# For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write
#
savedict = ncf.standard_var(varname='bio_flux_' + qual_short)
savedict['values'] = biomapped.tolist()
savedict['dims'] = dimdate + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
#
savedict = ncf.standard_var(varname='fossil_flux_' + qual_short)
savedict['values'] = fossilmapped.tolist()
savedict['dims'] = dimdate + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
#print biovarmapped.shape
savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
savedict['values'] = biovarmapped.tolist()
savedict['dims'] = dimdate + dimensemble + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
#
savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short)
savedict['values'] = fossilvarmapped.tolist()
savedict['dims'] = dimdate + dimensemble + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
# End prior/posterior block
savedict = ncf.standard_var(varname='fire_flux_imp')
savedict['values'] = fire.tolist()
savedict['dims'] = dimdate + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
#
savedict = ncf.standard_var(varname='ocn_flux_imp')
savedict['values'] = ocean.tolist()
savedict['dims'] = dimdate + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
savedict = ncf.standard_var(varname='term_flux_imp')
savedict['values'] = term.tolist()
savedict['dims'] = dimdate + dimgrid
savedict['count'] = next
ncf.add_data(savedict)
#area = globarea()
#savedict = ncf.standard_var(varname='cell_area')
#savedict['values'] = area.tolist()
#savedict['dims'] = dimgrid
#ncf.add_data(savedict)
#
savedict = ncf.standard_var(varname='date')
savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0
savedict['dims'] = dimdate
savedict['count'] = next
ncf.add_data(savedict)
sys.stdout.write('.')
sys.stdout.flush()
#
# Done, close the new NetCDF file
#
ncf.close()
#
# Return the full name of the NetCDF file so it can be processed by the next routine
#
logging.info("Gridded weekly average fluxes now written")
return saveas
def save_weekly_avg_state_data(dacycle, statevector):
"""
Function creates a NetCDF file with output for all parameters. It uses the flux data written by the
:class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
:param dacycle: a :class:`~da.tools.initexit.CycleControl` object
:param statevector: a :class:`~da.baseclasses.statevector.StateVector`
:rtype: None
"""
logging.debug('start: save weekly avg state data')
dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly'))
#
# Some help variables
#
dectime0 = date2num(datetime(2000, 1, 1))
dt = dacycle['cyclelength']
startdate = dacycle['time.start']
enddate = dacycle['time.end']
nlag = statevector.nlag
area = globarea()
vectorarea = statevector.grid2vector(griddata=area, method='sum')
logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
#
# Create or open NetCDF output file
#
saveas = os.path.join(dirname, 'statefluxes.nc')
ncf = io.CT_CDF(saveas, 'write')
logging.debug('save weekly avg state data to file %s' %saveas)
#
# Create dimensions and lat/lon grid
#
dimregs = ncf.add_dim('nparameters', statevector.nparams)
dimmembers = ncf.add_dim('nmembers', statevector.nmembers)
dimdate = ncf.add_date_dim()
#
# set title and tell GMT that we are using "pixel registration"
#
setattr(ncf, 'Title', 'CarbonTracker fluxes')
setattr(ncf, 'node_offset', 1)
#
# skip dataset if already in file
#
ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
skip = ncf.has_date(ncfdate)
if skip:
logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
else:
next = ncf.inq_unlimlen()[0]
logging.debug('Writing of data for date %s: file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
#
# if not, process this cycle. Start by getting flux input data from CTDAS
#
filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))
file = io.ct_read(filename, 'read')
bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux']))
ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux']))
fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux']))
fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux']))
term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux']))
#mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
file.close()
next = ncf.inq_unlimlen()[0]
vectorbio = statevector.grid2vector(griddata=bio * area, method='sum')
vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum')
vectorfire = statevector.grid2vector(griddata=fire * area, method='sum')
vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum')
vectorterm = statevector.grid2vector(griddata=term * area, method='sum')
# Start adding datasets from here on, both prior and posterior datasets for bio and ocn
for prior in [True, False]:
#
# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
#
if prior:
qual_short = 'prior'
for n in range(nlag, 0, -1):
priordate = enddate - timedelta(dt.days * n)
savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
if os.path.exists(filename):
statevector.read_from_file(filename, qual=qual_short)
# Replace the mean statevector by all ones (assumed priors)
statemean = np.ones((statevector.nparams,))
choicelag = n
logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag))
break
else:
qual_short = 'opt'
savedir = dacycle['dir.output']
filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
statevector.read_from_file(filename)
choicelag = 1
statemean = statevector.ensemble_members[choicelag - 1][0].param_values
logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag))
#
# if prior, do not multiply fluxes with parameters, otherwise do
#
data = statemean * vectorbio # units of mole region-1 s-1
savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short)
savedict['values'] = data
savedict['dims'] = dimdate + dimregs
savedict['count'] = next
ncf.add_data(savedict)
#
# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the
# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
#
# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values
# which are assumed 1.0 in the prior always.
#
members = statevector.ensemble_members[choicelag - 1]
deviations = np.array([mem.param_values * vectorbio for mem in members])
deviations = deviations - deviations[0, :]
savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
savedict['values'] = deviations.tolist()
savedict['dims'] = dimdate + dimmembers + dimregs
savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
savedict['units'] = "mol region-1 s-1"
savedict['count'] = next
ncf.add_data(savedict)
savedict = ncf.standard_var('unknown')
savedict['name'] = 'bio_flux_%s_std' % qual_short
savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short
savedict['values'] = deviations.std(axis=0)
savedict['dims'] = dimdate + dimregs
savedict['comment'] = "This is the standard deviation on each parameter"
savedict['units'] = "mol region-1 s-1"
savedict['count'] = next
ncf.add_data(savedict)
data = statemean * vectorfossil # units of mole region-1 s-1
savedict = ncf.standard_var(varname='fossil_flux_%s' % qual_short)
savedict['values'] = data
savedict['dims'] = dimdate + dimregs
savedict['count'] = next
ncf.add_data(savedict)
#
# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the
# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
#
# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values
# which are assumed 1.0 in the prior always.
#
deviations = np.array([mem.param_values * vectorfossil for mem in members])
deviations = deviations - deviations[0, :]
savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short)
savedict['values'] = deviations.tolist()
savedict['dims'] = dimdate + dimmembers + dimregs
savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
savedict['units'] = "mol region-1 s-1"
savedict['count'] = next
ncf.add_data(savedict)
savedict = ncf.standard_var('unknown')
savedict['name'] = 'fossil_flux_%s_std' % qual_short
savedict['long_name'] = 'Fossil flux standard deviation, %s' % qual_short
savedict['values'] = deviations.std(axis=0)
savedict['dims'] = dimdate + dimregs
savedict['comment'] = "This is the standard deviation on each parameter"
savedict['units'] = "mol region-1 s-1"
savedict['count'] = next
ncf.add_data(savedict)
data = vectorfire
savedict = ncf.standard_var(varname='fire_flux_imp')
savedict['values'] = data
savedict['dims'] = dimdate + dimregs
savedict['count'] = next
ncf.add_data(savedict)
data = vectorterm
savedict = ncf.standard_var(varname='term_flux_imp')
savedict['values'] = data
savedict['dims'] = dimdate + dimregs
savedict['count'] = next
ncf.add_data(savedict)
data = vectorocn
savedict = ncf.standard_var(varname='ocn_flux_imp')
savedict['values'] = data
savedict['dims'] = dimdate + dimregs
savedict['count'] = next
ncf.add_data(savedict)
savedict = ncf.standard_var(varname='date')
savedict['values'] = ncfdate
savedict['dims'] = dimdate
savedict['count'] = next
ncf.add_data(savedict)
sys.stdout.write('.')
sys.stdout.flush()
#
# Done, close the new NetCDF file
#
ncf.close()
#
# Return the full name of the NetCDF file so it can be processed by the next routine
#
logging.info("Vector weekly average fluxes now written")
return saveas
#!/usr/bin/env python
# ct_statevector_tools.py
"""
Author : aki
Revision History:
File created on 18 Nov 2013.
"""
import os
import sys
sys.path.append(os.getcwd())
import logging
import numpy as np
from da.baseclasses.statevector import StateVector, EnsembleMember
from datetime import timedelta
import da.methane.io4 as io
identifier = 'CarbonTracker Statevector '
version = '0.0'
################### Begin Class CH4StateVector ###################
class MethaneStateVector(StateVector):
""" This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """
def setup(self, dacycle):
"""
setup the object by specifying the dimensions.
There are two major requirements for each statvector that you want to build:
(1) is that the statevector can map itself onto a regular grid
(2) is that the statevector can map itself (mean+covariance) onto TransCom regions
An example is given below.
"""
self.nlag = int(dacycle['time.nlag'])
self.nmembers = int(dacycle['da.optimizer.nmembers'])
#self.distribution = dacycle['da.distribution']
self.nobs = 0
self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on
# These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist
# of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread.
self.ensemble_members = range(self.nlag)
for n in range(self.nlag):
self.ensemble_members[n] = []
# This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember
# that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid.
mapfile = os.path.join(dacycle.dasystem['regionsfile'])
ncf = io.ct_read(mapfile, 'read')
self.gridmap = ncf.get_variable('regions')
self.tcmap = ncf.get_variable('transcom_regions')
self.nparams = int(self.gridmap.max())
self.nparams_bio = int(ncf.get_variable('nparams_bio'))
ncf.close()
logging.info ("Regional information read from file %s" % dacycle.dasystem['regionsfile'])
logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile'])
logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile'])
# Create a dictionary for state <-> gridded map conversions
nparams = self.gridmap.max()
self.griddict = {}
for r in range(1, int(nparams) + 1):
sel = (self.gridmap.flat == r).nonzero()
if len(sel[0]) > 0:
self.griddict[r] = sel
logging.debug("A dictionary to map grids to states and vice versa was created")
# Create a matrix for state <-> TransCom conversions
self.tcmatrix = np.zeros((self.nparams, int(self.tcmap.max())), 'float')
for r in range(1, self.nparams + 1):
sel = (self.gridmap.flat == r).nonzero()
if len(sel[0]) < 1:
continue
else:
n_tc = set(self.tcmap.flatten().take(sel[0]))
if len(n_tc) > 1:
logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc))
raise ValueError
self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0
logging.debug("A matrix to map states to TransCom regions and vice versa was created")
# Create a mask for species/unknowns
self.make_species_mask()
def make_species_mask(self):
self.speciesdict = {'ch4': np.ones(self.nparams)}
logging.debug("A species mask was created, only the following species are recognized in this system:")
for k in self.speciesdict.keys():
logging.debug(" -> %s" % k)
def get_covariance(self, date, dacycle):
""" Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector.
Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
"""
try:
import matplotlib.pyplot as plt
except:
pass
#----Arbitray covariance matrix ----#
#fullcov = np.zeros((self.nparams, self.nparams), float)
#for i in xrange(self.nparams):
# #fullcov[i,i] = 1. # Identity matrix
# fullcov[i,i] = 0.08
#fullcov[self.nparams-1,self.nparams-1] = 1.e-10
#---- Covariance read from file -----#
cov_file = dacycle.dasystem['covfile']
nfcr = io.CT_CDF(cov_file, 'read')
logging.info('covariance read from file: %s' %cov_file)
fullcov = nfcr.variables['covariance_matrix'][:]
# try:
# plt.imshow(fullcov)
# plt.colorbar()
# plt.savefig('fullcovariancematrix.png')
# plt.close('all')
# logging.debug("Covariance matrix visualized for inspection")
# except:
# pass
return fullcov
def read_from_legacy_file(self, filename, qual='opt'):
"""
:param filename: the full filename for the input NetCDF file
:param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file
:rtype: None
Read the StateVector information from a NetCDF file and put in a StateVector object
In principle the input file will have only one four datasets inside
called:
* `meanstate_prior`, dimensions [nlag, nparamaters]
* `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters]
* `meanstate_opt`, dimensions [nlag, nparamaters]
* `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters]
This NetCDF information can be written to file using
:meth:`~da.baseclasses.statevector.StateVector.write_to_file`
"""
f = io.ct_read(filename, 'read')
for n in range(self.nlag):
if qual == 'opt':
meanstate = f.get_variable('xac_%02d' % (n + 1))
EnsembleMembers = f.get_variable('adX_%02d' % (n + 1))
elif qual == 'prior':
meanstate = f.get_variable('xpc_%02d' % (n + 1))
EnsembleMembers = f.get_variable('pdX_%02d' % (n + 1))
if not self.ensemble_members[n] == []:
self.ensemble_members[n] = []
logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1))
for m in range(self.nmembers):
newmember = EnsembleMember(m)
newmember.param_values = EnsembleMembers[m, :].flatten() + meanstate # add the mean to the deviations to hold the full parameter values
self.ensemble_members[n].append(newmember)
f.close()
logging.info('Successfully read the State Vector from file (%s) ' % filename)
def write_members_to_file(self, lag, outdir, dacycle):
"""
:param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag]
:rtype: None
Write ensemble member information to a NetCDF file for later use. The standard output filename is
*parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location
is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside
called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360).
This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object.
.. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you
can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function.
"""
# These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was
# to do the import already at the start of the module, not just in this method.
#import da.tools.io as io
#import da.tools.io4 as io
members = self.ensemble_members[lag]
#--- regionfile ---#
region_land_file = dacycle.dasystem['regionsfile']
nfcr = io.CT_CDF(region_land_file, 'read')
logging.debug('region land file read: %s' %region_land_file)
bio_land = nfcr.variables['bio_land'][:]
fossil_land = nfcr.variables['fossil_land'][:]
for mem in members:
filename = os.path.join(outdir, 'parameters.%03d.nc' % mem.membernumber)
ncf = io.CT_CDF(filename, method='create')
dimparams = ncf.add_params_dim(self.nparams)
dimgrid = ncf.add_latlon_dim()
data = mem.param_values
savedict = io.std_savedict.copy()
savedict['name'] = "parametervalues"
savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber
savedict['units'] = "unitless"
savedict['dims'] = dimparams
savedict['values'] = data
savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber
ncf.add_data(savedict)
# #--- bio ---#
# dimparams_bio = ncf.add_params_dim(self.nparams_bio)
# data_bio = data[0:self.nparams_bio]
#
# savedict = io.std_savedict.copy()
# savedict['name'] = "parametervalues_bio"
# savedict['long_name'] = "parameter_bio_values_for_member_%d" % mem.membernumber
# savedict['units'] = "unitless"
# savedict['dims'] = dimparams_bio
# savedict['values'] = data_bio
# savedict['comment'] = 'These are parameter bio values to use for member %d' % mem.membernumber
# ncf.add_data(savedict)
#
# #--- fossil ---#
# nparams_fossil = self.nparams - self.nparams_bio
# dimparams_fossil = ncf.add_params_dim(nparams_fossil)
# data_fossil = data[self.nparams_bio:nparams_fossil]
#
# savedict = io.std_savedict.copy()
# savedict['name'] = "parametervalues_ff"
# savedict['long_name'] = "parameter_ff_values_for_member_%d" % mem.membernumber
# savedict['units'] = "unitless"
# savedict['dims'] = dimparams_fossil
# savedict['values'] = data_fossil
# savedict['comment'] = 'These are parameter fossil values to use for member %d' % mem.membernumber
# ncf.add_data(savedict)
#--- All parameters, gridded ---#
griddata = self.vector2grid(vectordata=data)
savedict = io.std_savedict.copy()
savedict['name'] = "parametermap"
savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber
savedict['units'] = "unitless"
savedict['dims'] = dimgrid
savedict['values'] = griddata.tolist()
savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber
ncf.add_data(savedict)
#--- bio parameters, gridded ---#
griddata_bio = griddata[:]
w = np.where(bio_land==0)
griddata_bio[w] = 1.0
savedict = io.std_savedict.copy()
savedict['name'] = "parametermap_bio"
savedict['long_name'] = "parametermap_bio_for_member_%d" % mem.membernumber
savedict['units'] = "unitless"
savedict['dims'] = dimgrid
savedict['values'] = griddata_bio.tolist()
savedict['comment'] = 'These are gridded parameter bio values to use for member %d' % mem.membernumber
ncf.add_data(savedict)
#--- All parameters, gridded ---#
griddata = self.vector2grid(vectordata=data)
griddata_fossil = griddata[:]
w = np.where(fossil_land==0)
griddata_fossil[w] = 1.0
savedict = io.std_savedict.copy()
savedict['name'] = "parametermap_ff"
savedict['long_name'] = "parametermap_ff_for_member_%d" % mem.membernumber
savedict['units'] = "unitless"
savedict['dims'] = dimgrid
savedict['values'] = griddata_fossil.tolist()
savedict['comment'] = 'These are gridded parameter fossil values to use for member %d' % mem.membernumber
ncf.add_data(savedict)
ncf.close()
logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename))
# def write_to_file(self, filename, qual):
# """
# :param filename: the full filename for the output NetCDF file
# :rtype: None
#
# Write the StateVector information to a NetCDF file for later use.
# In principle the output file will have only one two datasets inside
# called:
# * `meanstate`, dimensions [nlag, nparamaters]
# * `ensemblestate`, dimensions [nlag,nmembers, nparameters]
#
# This NetCDF information can be read back into a StateVector object using
# :meth:`~da.baseclasses.statevector.StateVector.read_from_file`
#
# """
# #import da.tools.io4 as io
# #import da.tools.io as io
#
# if qual == 'prior':
# f = io.CT_CDF(filename, method='create')
# logging.debug('Creating new StateVector output file (%s)' % filename)
# #qual = 'prior'
# else:
# f = io.CT_CDF(filename, method='write')
# logging.debug('Opening existing StateVector output file (%s)' % filename)
# #qual = 'opt'
#
# dimparams = f.add_params_dim(self.nparams)
# dimmembers = f.add_members_dim(self.nmembers)
# dimlag = f.add_lag_dim(self.nlag, unlimited=True)
#
# for n in range(self.nlag):
# members = self.ensemble_members[n]
# mean_state = members[0].param_values
#
# savedict = f.standard_var(varname='meanstate_%s' % qual)
# savedict['dims'] = dimlag + dimparams
# savedict['values'] = mean_state
# savedict['count'] = n
# savedict['comment'] = 'this represents the mean of the ensemble'
# f.add_data(savedict)
#
# savedict = f.standard_var(varname='meanstate_bio_%s' % qual)
# savedict['dims'] = dimlag + dimparams_bio
# savedict['values'] = mean_state_bio
# savedict['count'] = n
# savedict['comment'] = 'this represents the mean of the ensemble'
# f.add_data(savedict)
#
# savedict = f.standard_var(varname='meanstate_fossil_%s' % qual)
# savedict['dims'] = dimlag + dimparams_fossil
# savedict['values'] = mean_state_fossil
# savedict['count'] = n
# savedict['comment'] = 'this represents the mean of the ensemble'
# f.add_data(savedict)
#
# members = self.ensemble_members[n]
# devs = np.asarray([m.param_values.flatten() for m in members])
# data = devs - np.asarray(mean_state)
#
# savedict = f.standard_var(varname='ensemblestate_%s' % qual)
# savedict['dims'] = dimlag + dimmembers + dimparams
# savedict['values'] = data
# savedict['count'] = n
# savedict['comment'] = 'this represents deviations from the mean of the ensemble'
# f.add_data(savedict)
# f.close()
#
# logging.info('Successfully wrote the State Vector to file (%s) ' % filename)
def make_new_ensemble(self, lag, dacycle, covariancematrix=None):
"""
:param lag: an integer indicating the time step in the lag order
:param covariancematrix: a matrix to draw random values from
:rtype: None
Make a new ensemble, the attribute lag refers to the position in the state vector.
Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is
used to draw ensemblemembers from. If this argument is not passed it will be substituted with an
identity matrix of the same dimensions.
"""
if covariancematrix == None:
covariancematrix = np.identity(self.nparams)
# Make a cholesky decomposition of the covariance matrix
_, s, _ = np.linalg.svd(covariancematrix)
dof = np.sum(s) ** 2 / sum(s ** 2)
C = np.linalg.cholesky(covariancematrix)
logging.debug('Cholesky decomposition has succeeded ')
logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof)))
# Create mean values
newmean = np.ones(self.nparams, float) # standard value for a new time step is 1.0
# If this is not the start of the filter, average previous two optimized steps into the mix
if lag == self.nlag - 1 and self.nlag >= 3:
newmean += self.ensemble_members[lag - 1][0].param_values + \
self.ensemble_members[lag - 2][0].param_values
newmean = newmean / 3.0
# Create the first ensemble member with a deviation of 0.0 and add to list
newmember = EnsembleMember(0)
newmember.param_values = newmean.flatten() # no deviations
self.ensemble_members[lag].append(newmember)
# Create members 1:nmembers and add to ensemble_members list
#dist = self.distribution
for member in range(1, self.nmembers):
# if dist == 'normal':
# rands = np.random.randn(self.nparams)
# elif dist == 'gamma11':
# rands = np.random.exponential(1,size=self.nparams) - 1 #substruct mean=1.
# elif dist == 'lognormal':
# rands = np.random.lognormal(0,0.5,size=self.nparams) - 1 #substruct median=1.
# else:
# logging.error('Distribution (%s)to generate from not known' %dist)
# sys.exit(2)
rands = np.random.randn(self.nparams)
newmember = EnsembleMember(member)
newmember.param_values = np.dot(C, rands) + newmean
self.ensemble_members[lag].append(newmember)
logging.info('%d new ensemble members generated from %s were added to the state vector # %d'\
#%(self.nmembers,dist, (lag + 1)))
%(self.nmembers,'normal', (lag + 1)))
def propagate(self, dacycle):
"""
:rtype: None
Propagate the parameter values in the StateVector to the next cycle. This means a shift by one cycle
step for all states that will
be optimized once more, and the creation of a new ensemble for the time step that just
comes in for the first time (step=nlag).
In the future, this routine can incorporate a formal propagation of the statevector.
"""
# Remove State Vector n=1 by simply "popping" it from the list and appending a new empty list at the front. This empty list will
# hold the new ensemble for the new cycle
self.ensemble_members.pop(0)
self.ensemble_members.append([])
# And now create a new time step of mean + members for n=nlag
date = dacycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(dacycle['time.cycle']))
cov = self.get_covariance(date, dacycle)
self.make_new_ensemble(self.nlag - 1, dacycle, cov)
logging.info('The state vector has been propagated by one cycle')
################### End Class MethaneStateVector ###################
if __name__ == "__main__":
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment