diff --git a/da/methane_2lambdas/__init__.py b/da/methane_2lambdas/__init__.py deleted file mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/da/methane_2lambdas/analysis/__init__.py b/da/methane_2lambdas/analysis/__init__.py deleted file mode 100755 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/da/methane_2lambdas/analysis/expand_fluxes.py b/da/methane_2lambdas/analysis/expand_fluxes.py deleted file mode 100755 index 0af128caa2404cdef7c7a2cd9ae1653ac8a5a6df..0000000000000000000000000000000000000000 --- a/da/methane_2lambdas/analysis/expand_fluxes.py +++ /dev/null @@ -1,473 +0,0 @@ -#!/usr/bin/env python -# expand_fluxes.py -import sys -sys.path.append('../../') -import os -from datetime import datetime, timedelta - -import logging -import numpy as np -from da.tools.general import date2num, num2date -import da.methane.io4 as io -from da.methane.analysis.tools_regions import globarea,state_to_grid -from da.tools.general import create_dirs - - - -""" -Author: aki - -Revision History: -File created on April 2016 by Aki T. - -""" - -def save_weekly_avg_1x1_data(dacycle, statevector): - """ - Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Region bio_land and fossil_land -# - - region_land_file = dacycle.dasystem['regionsfile'] - nfcr = io.CT_CDF(region_land_file, 'read') - logging.info('region land file read: %s' %region_land_file) - bio_land = nfcr.variables['bio_land'][:] - fossil_land = nfcr.variables['fossil_land'][:] - #logging.debug("bio_land %s" %bio_land[25,205]) - #logging.debug("ff_land %s" %fossil_land[25,205]) - -# -# Create or open NetCDF output file -# - logging.debug("Create NetCDF output file: flux_1x1.%s.nc" % startdate.strftime('%Y-%m-%d')) - saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) - ncf = io.CT_CDF(saveas, 'write') - -# -# Create dimensions and lat/lon grid -# - dimgrid = ncf.add_latlon_dim() - dimensemble = ncf.add_dim('members', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux'])) - term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - file.close() - - next = ncf.inq_unlimlen()[0] - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - - if prior: - logging.debug("prior") - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = enddate - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=n) - -# Replace the mean statevector by all ones (assumed priors) - - gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,)) - - logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n)) - break - else: - logging.debug("opt") - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=1) - - logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - logging.debug("gridmean %s" %gridmean[25,205]) - logging.debug("bio land %s" %bio_land[25,205]) - w = np.where(bio_land == 0) - s = gridmean.shape - gridmean_bio = np.zeros(shape=s) - gridmean_bio[:,:] = gridmean[:,:] - gridmean_bio[w] = 1.0 - biomapped = bio * gridmean_bio - s = gridensemble.shape - gridensemble_bio = np.zeros(shape=s) - gridensemble_bio[:,:,:] = gridensemble[:,:,:] - gridensemble_bio[:, w[0],w[1]] = 0.0 - biovarmapped = bio * gridensemble_bio - - w = np.where(fossil_land == 0) - gridmean_fossil = gridmean - gridmean_fossil[w] = 1.0 - fossilmapped = fossil * gridmean_fossil - gridensemble_fossil = gridensemble - gridensemble_fossil[:, w[0],w[1]] = 0.0 - fossilvarmapped = fossil * gridensemble_fossil - - logging.debug("gridmean %s" %gridmean[25,205]) - logging.debug("gridmean bio %s" %gridmean_bio[25,205]) - logging.debug("gridmean ff %s" %gridmean_fossil[25,205]) -# -# -# For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write -# - savedict = ncf.standard_var(varname='bio_flux_' + qual_short) - savedict['values'] = biomapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='fossil_flux_' + qual_short) - savedict['values'] = fossilmapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - #print biovarmapped.shape - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - savedict['values'] = biovarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short) - savedict['values'] = fossilvarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - # End prior/posterior block - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = fire.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='ocn_flux_imp') - savedict['values'] = ocean.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname='term_flux_imp') - savedict['values'] = term.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - #area = globarea() - #savedict = ncf.standard_var(varname='cell_area') - #savedict['values'] = area.tolist() - #savedict['dims'] = dimgrid - #ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='date') - savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0 - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Gridded weekly average fluxes now written") - - return saveas - -def save_weekly_avg_state_data(dacycle, statevector): - """ - Function creates a NetCDF file with output for all parameters. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ - logging.debug('start: save weekly avg state data') - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - area = globarea() - vectorarea = statevector.grid2vector(griddata=area, method='sum') - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Create or open NetCDF output file -# - saveas = os.path.join(dirname, 'statefluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - - logging.debug('save weekly avg state data to file %s' %saveas) -# -# Create dimensions and lat/lon grid -# - dimregs = ncf.add_dim('nparameters', statevector.nparams) - dimmembers = ncf.add_dim('nmembers', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - next = ncf.inq_unlimlen()[0] - logging.debug('Writing of data for date %s: file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux'])) - term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - file.close() - - next = ncf.inq_unlimlen()[0] - - vectorbio = statevector.grid2vector(griddata=bio * area, method='sum') - vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum') - vectorfire = statevector.grid2vector(griddata=fire * area, method='sum') - vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum') - vectorterm = statevector.grid2vector(griddata=term * area, method='sum') - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - if prior: - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = enddate - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) -# Replace the mean statevector by all ones (assumed priors) - statemean = np.ones((statevector.nparams,)) - choicelag = n - logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag)) - break - else: - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename) - choicelag = 1 - statemean = statevector.ensemble_members[choicelag - 1][0].param_values - logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - data = statemean * vectorbio # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - members = statevector.ensemble_members[choicelag - 1] - deviations = np.array([mem.param_values * vectorbio for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'bio_flux_%s_std' % qual_short - savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = statemean * vectorfossil # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='fossil_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - deviations = np.array([mem.param_values * vectorfossil for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short) - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'fossil_flux_%s_std' % qual_short - savedict['long_name'] = 'Fossil flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorfire - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorterm - - savedict = ncf.standard_var(varname='term_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorocn - - savedict = ncf.standard_var(varname='ocn_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Vector weekly average fluxes now written") - - return saveas - - diff --git a/da/methane_2lambdas/statevector.py b/da/methane_2lambdas/statevector.py deleted file mode 100755 index b64c6330af011a51b8897ea380fb8def95f01c6c..0000000000000000000000000000000000000000 --- a/da/methane_2lambdas/statevector.py +++ /dev/null @@ -1,483 +0,0 @@ -#!/usr/bin/env python -# ct_statevector_tools.py - -""" -Author : aki - -Revision History: -File created on 18 Nov 2013. - -""" - -import os -import sys -sys.path.append(os.getcwd()) - -import logging -import numpy as np -from da.baseclasses.statevector import StateVector, EnsembleMember - -from datetime import timedelta -import da.methane.io4 as io - -identifier = 'CarbonTracker Statevector ' -version = '0.0' - -################### Begin Class CH4StateVector ################### - -class MethaneStateVector(StateVector): - """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """ - - def setup(self, dacycle): - """ - setup the object by specifying the dimensions. - There are two major requirements for each statvector that you want to build: - - (1) is that the statevector can map itself onto a regular grid - (2) is that the statevector can map itself (mean+covariance) onto TransCom regions - - An example is given below. - """ - - self.nlag = int(dacycle['time.nlag']) - self.nmembers = int(dacycle['da.optimizer.nmembers']) - #self.distribution = dacycle['da.distribution'] - self.nobs = 0 - - self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on - - # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist - # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. - - self.ensemble_members = range(self.nlag) - - for n in range(self.nlag): - self.ensemble_members[n] = [] - - - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember - # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - - mapfile = os.path.join(dacycle.dasystem['regionsfile']) - ncf = io.ct_read(mapfile, 'read') - self.gridmap = ncf.get_variable('regions') - self.tcmap = ncf.get_variable('transcom_regions') - self.nparams = int(self.gridmap.max()) - self.nparams_bio = int(ncf.get_variable('nparams_bio')) - ncf.close() - - logging.info ("Regional information read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - - # Create a dictionary for state <-> gridded map conversions - - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) > 0: - self.griddict[r] = sel - - logging.debug("A dictionary to map grids to states and vice versa was created") - - # Create a matrix for state <-> TransCom conversions - - self.tcmatrix = np.zeros((self.nparams, int(self.tcmap.max())), 'float') - - for r in range(1, self.nparams + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) < 1: - continue - else: - n_tc = set(self.tcmap.flatten().take(sel[0])) - if len(n_tc) > 1: - logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc)) - raise ValueError - self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0 - - logging.debug("A matrix to map states to TransCom regions and vice versa was created") - - # Create a mask for species/unknowns - - self.make_species_mask() - - - def make_species_mask(self): - - self.speciesdict = {'ch4': np.ones(self.nparams)} - logging.debug("A species mask was created, only the following species are recognized in this system:") - for k in self.speciesdict.keys(): - logging.debug(" -> %s" % k) - - def get_covariance(self, date, dacycle): - """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - """ - try: - import matplotlib.pyplot as plt - except: - pass - - #----Arbitray covariance matrix ----# - #fullcov = np.zeros((self.nparams, self.nparams), float) - - #for i in xrange(self.nparams): - # #fullcov[i,i] = 1. # Identity matrix - # fullcov[i,i] = 0.08 - #fullcov[self.nparams-1,self.nparams-1] = 1.e-10 - - #---- Covariance read from file -----# - cov_file = dacycle.dasystem['covfile'] - nfcr = io.CT_CDF(cov_file, 'read') - logging.info('covariance read from file: %s' %cov_file) - fullcov = nfcr.variables['covariance_matrix'][:] - -# try: -# plt.imshow(fullcov) -# plt.colorbar() -# plt.savefig('fullcovariancematrix.png') -# plt.close('all') -# logging.debug("Covariance matrix visualized for inspection") -# except: -# pass - - return fullcov - - def read_from_legacy_file(self, filename, qual='opt'): - """ - :param filename: the full filename for the input NetCDF file - :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file - :rtype: None - - Read the StateVector information from a NetCDF file and put in a StateVector object - In principle the input file will have only one four datasets inside - called: - * `meanstate_prior`, dimensions [nlag, nparamaters] - * `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters] - * `meanstate_opt`, dimensions [nlag, nparamaters] - * `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters] - - This NetCDF information can be written to file using - :meth:`~da.baseclasses.statevector.StateVector.write_to_file` - - """ - - f = io.ct_read(filename, 'read') - - for n in range(self.nlag): - if qual == 'opt': - meanstate = f.get_variable('xac_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('adX_%02d' % (n + 1)) - - elif qual == 'prior': - meanstate = f.get_variable('xpc_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('pdX_%02d' % (n + 1)) - - if not self.ensemble_members[n] == []: - self.ensemble_members[n] = [] - logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1)) - - for m in range(self.nmembers): - newmember = EnsembleMember(m) - newmember.param_values = EnsembleMembers[m, :].flatten() + meanstate # add the mean to the deviations to hold the full parameter values - self.ensemble_members[n].append(newmember) - - - f.close() - - logging.info('Successfully read the State Vector from file (%s) ' % filename) - - def write_members_to_file(self, lag, outdir, dacycle): - """ - :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag] - :rtype: None - - Write ensemble member information to a NetCDF file for later use. The standard output filename is - *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location - is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside - called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). - This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. - - .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you - can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function. - - """ - - # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was - # to do the import already at the start of the module, not just in this method. - - #import da.tools.io as io - #import da.tools.io4 as io - - members = self.ensemble_members[lag] - - #--- regionfile ---# - region_land_file = dacycle.dasystem['regionsfile'] - nfcr = io.CT_CDF(region_land_file, 'read') - logging.debug('region land file read: %s' %region_land_file) - bio_land = nfcr.variables['bio_land'][:] - fossil_land = nfcr.variables['fossil_land'][:] - - - for mem in members: - filename = os.path.join(outdir, 'parameters.%03d.nc' % mem.membernumber) - ncf = io.CT_CDF(filename, method='create') - dimparams = ncf.add_params_dim(self.nparams) - dimgrid = ncf.add_latlon_dim() - - data = mem.param_values - - savedict = io.std_savedict.copy() - savedict['name'] = "parametervalues" - savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimparams - savedict['values'] = data - savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - -# #--- bio ---# -# dimparams_bio = ncf.add_params_dim(self.nparams_bio) -# data_bio = data[0:self.nparams_bio] -# -# savedict = io.std_savedict.copy() -# savedict['name'] = "parametervalues_bio" -# savedict['long_name'] = "parameter_bio_values_for_member_%d" % mem.membernumber -# savedict['units'] = "unitless" -# savedict['dims'] = dimparams_bio -# savedict['values'] = data_bio -# savedict['comment'] = 'These are parameter bio values to use for member %d' % mem.membernumber -# ncf.add_data(savedict) -# -# #--- fossil ---# -# nparams_fossil = self.nparams - self.nparams_bio -# dimparams_fossil = ncf.add_params_dim(nparams_fossil) -# data_fossil = data[self.nparams_bio:nparams_fossil] -# -# savedict = io.std_savedict.copy() -# savedict['name'] = "parametervalues_ff" -# savedict['long_name'] = "parameter_ff_values_for_member_%d" % mem.membernumber -# savedict['units'] = "unitless" -# savedict['dims'] = dimparams_fossil -# savedict['values'] = data_fossil -# savedict['comment'] = 'These are parameter fossil values to use for member %d' % mem.membernumber -# ncf.add_data(savedict) - - #--- All parameters, gridded ---# - griddata = self.vector2grid(vectordata=data) - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap" - savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata.tolist() - savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - #--- bio parameters, gridded ---# - griddata_bio = griddata[:] - w = np.where(bio_land==0) - griddata_bio[w] = 1.0 - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap_bio" - savedict['long_name'] = "parametermap_bio_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata_bio.tolist() - savedict['comment'] = 'These are gridded parameter bio values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - #--- All parameters, gridded ---# - griddata = self.vector2grid(vectordata=data) - griddata_fossil = griddata[:] - w = np.where(fossil_land==0) - griddata_fossil[w] = 1.0 - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap_ff" - savedict['long_name'] = "parametermap_ff_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata_fossil.tolist() - savedict['comment'] = 'These are gridded parameter fossil values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - ncf.close() - - logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename)) - -# def write_to_file(self, filename, qual): -# """ -# :param filename: the full filename for the output NetCDF file -# :rtype: None -# -# Write the StateVector information to a NetCDF file for later use. -# In principle the output file will have only one two datasets inside -# called: -# * `meanstate`, dimensions [nlag, nparamaters] -# * `ensemblestate`, dimensions [nlag,nmembers, nparameters] -# -# This NetCDF information can be read back into a StateVector object using -# :meth:`~da.baseclasses.statevector.StateVector.read_from_file` -# -# """ -# #import da.tools.io4 as io -# #import da.tools.io as io -# -# if qual == 'prior': -# f = io.CT_CDF(filename, method='create') -# logging.debug('Creating new StateVector output file (%s)' % filename) -# #qual = 'prior' -# else: -# f = io.CT_CDF(filename, method='write') -# logging.debug('Opening existing StateVector output file (%s)' % filename) -# #qual = 'opt' -# -# dimparams = f.add_params_dim(self.nparams) -# dimmembers = f.add_members_dim(self.nmembers) -# dimlag = f.add_lag_dim(self.nlag, unlimited=True) -# -# for n in range(self.nlag): -# members = self.ensemble_members[n] -# mean_state = members[0].param_values -# -# savedict = f.standard_var(varname='meanstate_%s' % qual) -# savedict['dims'] = dimlag + dimparams -# savedict['values'] = mean_state -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# savedict = f.standard_var(varname='meanstate_bio_%s' % qual) -# savedict['dims'] = dimlag + dimparams_bio -# savedict['values'] = mean_state_bio -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# savedict = f.standard_var(varname='meanstate_fossil_%s' % qual) -# savedict['dims'] = dimlag + dimparams_fossil -# savedict['values'] = mean_state_fossil -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# members = self.ensemble_members[n] -# devs = np.asarray([m.param_values.flatten() for m in members]) -# data = devs - np.asarray(mean_state) -# -# savedict = f.standard_var(varname='ensemblestate_%s' % qual) -# savedict['dims'] = dimlag + dimmembers + dimparams -# savedict['values'] = data -# savedict['count'] = n -# savedict['comment'] = 'this represents deviations from the mean of the ensemble' -# f.add_data(savedict) -# f.close() -# -# logging.info('Successfully wrote the State Vector to file (%s) ' % filename) - - - def make_new_ensemble(self, lag, dacycle, covariancematrix=None): - """ - :param lag: an integer indicating the time step in the lag order - :param covariancematrix: a matrix to draw random values from - :rtype: None - - Make a new ensemble, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - - The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is - used to draw ensemblemembers from. If this argument is not passed it will be substituted with an - identity matrix of the same dimensions. - - """ - - if covariancematrix == None: - covariancematrix = np.identity(self.nparams) - - # Make a cholesky decomposition of the covariance matrix - - - _, s, _ = np.linalg.svd(covariancematrix) - dof = np.sum(s) ** 2 / sum(s ** 2) - C = np.linalg.cholesky(covariancematrix) - - logging.debug('Cholesky decomposition has succeeded ') - logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof))) - - - # Create mean values - - newmean = np.ones(self.nparams, float) # standard value for a new time step is 1.0 - - # If this is not the start of the filter, average previous two optimized steps into the mix - - if lag == self.nlag - 1 and self.nlag >= 3: - newmean += self.ensemble_members[lag - 1][0].param_values + \ - self.ensemble_members[lag - 2][0].param_values - newmean = newmean / 3.0 - - # Create the first ensemble member with a deviation of 0.0 and add to list - - newmember = EnsembleMember(0) - newmember.param_values = newmean.flatten() # no deviations - self.ensemble_members[lag].append(newmember) - - # Create members 1:nmembers and add to ensemble_members list - #dist = self.distribution - - for member in range(1, self.nmembers): - # if dist == 'normal': - # rands = np.random.randn(self.nparams) - # elif dist == 'gamma11': - # rands = np.random.exponential(1,size=self.nparams) - 1 #substruct mean=1. - # elif dist == 'lognormal': - # rands = np.random.lognormal(0,0.5,size=self.nparams) - 1 #substruct median=1. - # else: - # logging.error('Distribution (%s)to generate from not known' %dist) - # sys.exit(2) - - rands = np.random.randn(self.nparams) - newmember = EnsembleMember(member) - newmember.param_values = np.dot(C, rands) + newmean - self.ensemble_members[lag].append(newmember) - - logging.info('%d new ensemble members generated from %s were added to the state vector # %d'\ - #%(self.nmembers,dist, (lag + 1))) - %(self.nmembers,'normal', (lag + 1))) - - def propagate(self, dacycle): - """ - :rtype: None - - Propagate the parameter values in the StateVector to the next cycle. This means a shift by one cycle - step for all states that will - be optimized once more, and the creation of a new ensemble for the time step that just - comes in for the first time (step=nlag). - In the future, this routine can incorporate a formal propagation of the statevector. - - """ - - # Remove State Vector n=1 by simply "popping" it from the list and appending a new empty list at the front. This empty list will - # hold the new ensemble for the new cycle - - self.ensemble_members.pop(0) - self.ensemble_members.append([]) - - # And now create a new time step of mean + members for n=nlag - date = dacycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(dacycle['time.cycle'])) - cov = self.get_covariance(date, dacycle) - self.make_new_ensemble(self.nlag - 1, dacycle, cov) - - logging.info('The state vector has been propagated by one cycle') - -################### End Class MethaneStateVector ################### - - -if __name__ == "__main__": - pass