diff --git a/.gitignore b/.gitignore index c74b61496ef78a2f757661002757d1d2d049cb65..a5c826fe991a357dd91fa2dff3c6dfd1c736e214 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,12 @@ *.py *.rc *.jb -/da/*/__pycache__ -/da/*/*.pyc +joblog +# Exclude the logfiles +*.log +# Exclude files created by the slurm system +slurm*.out +# Exclude compiled files +da/**/__pycache__/ +da/**/*.pyc +#da/**/*.bak diff --git a/.project b/.project deleted file mode 100644 index e7a10c149e51dc88ac08d6b9122bec7dc38addd1..0000000000000000000000000000000000000000 --- a/.project +++ /dev/null @@ -1,17 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<projectDescription> - <name>ctdas_light_refactor_new</name> - <comment></comment> - <projects> - </projects> - <buildSpec> - <buildCommand> - <name>org.python.pydev.PyDevBuilder</name> - <arguments> - </arguments> - </buildCommand> - </buildSpec> - <natures> - <nature>org.python.pydev.pythonNature</nature> - </natures> -</projectDescription> diff --git a/.pydevproject b/.pydevproject deleted file mode 100644 index d2dbf6b18c95c95979481baf6c8192e09e375788..0000000000000000000000000000000000000000 --- a/.pydevproject +++ /dev/null @@ -1,10 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<?eclipse-pydev version="1.0"?> - -<pydev_project> -<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> -<path>/ctdas_light_refactor_new</path> -</pydev_pathproperty> -<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property> -<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> -</pydev_project> diff --git a/da/__pycache__/__init__.cpython-37.pyc b/da/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index f4c07d50f2545418c97a2c5feebcb6ba199a5303..0000000000000000000000000000000000000000 Binary files a/da/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/analysis/country_regiondict.pickle b/da/analysis/country_regiondict.pickle deleted file mode 100644 index 4e6182c4914dc1d1fd750ea6aef8711f7ec239ce..0000000000000000000000000000000000000000 Binary files a/da/analysis/country_regiondict.pickle and /dev/null differ diff --git a/da/analysis/expand_fluxes.py.bak b/da/analysis/expand_fluxes.py.bak deleted file mode 100755 index aa91400c81c1a1e8bd95eedc3588329fda581b97..0000000000000000000000000000000000000000 --- a/da/analysis/expand_fluxes.py.bak +++ /dev/null @@ -1,1137 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# expand_fluxes.py -import sys -import os -sys.path.append('../../') -rootdir = os.getcwd().split('da/')[0] -analysisdir = os.path.join(rootdir, 'da/analysis') -from datetime import datetime, timedelta - -import logging -import numpy as np -from da.tools.general import date2num, num2date -import da.tools.io4 as io -from da.analysis.tools_regions import globarea, state_to_grid -from da.tools.general import create_dirs -from da.analysis.tools_country import countryinfo # needed here -from da.analysis.tools_transcom import transcommask, ExtendedTCRegions -import netCDF4 as cdf - -import da.analysis.tools_transcom as tc -import da.analysis.tools_country as ct -import da.analysis.tools_time as timetools - - - -""" -Author: Wouter Peters (Wouter.Peters@noaa.gov) - -Revision History: -File created on 21 Ocotber 2008. - -""" - -def proceed_dialog(txt, yes=['y', 'yes'], all=['a', 'all', 'yes-to-all']): - """ function to ask whether to proceed or not """ - response = raw_input(txt) - if response.lower() in yes: - return 1 - if response.lower() in all: - return 2 - return 0 - -def save_weekly_avg_1x1_data(dacycle, statevector): - """ - Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Create or open NetCDF output file -# - saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) - ncf = io.CT_CDF(saveas, 'write') - -# -# Create dimensions and lat/lon grid -# - dimgrid = ncf.add_latlon_dim() - dimensemble = ncf.add_dim('members', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys(): - sam = True - biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux'])) - firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux'])) - else: sam = False - file.close() - - if sam: - bio = bio + biosam - fire = fire + firesam - - next = ncf.inq_unlimlen()[0] - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - - if prior: - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = startdate + n*dt - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=n) - -# Replace the mean statevector by all ones (assumed priors) - - gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,)) - - logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n)) - break - else: - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=1) - - logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - print gridensemble.shape, bio.shape, gridmean.shape - biomapped = bio * gridmean - oceanmapped = ocean * gridmean - biovarmapped = bio * gridensemble - oceanvarmapped = ocean * gridensemble - -# -# -# For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write -# - savedict = ncf.standard_var(varname='bio_flux_' + qual_short) - savedict['values'] = biomapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='ocn_flux_' + qual_short) - savedict['values'] = oceanmapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - print biovarmapped.shape - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - savedict['values'] = biovarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short) - savedict['values'] = oceanvarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - # End prior/posterior block - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = fire.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='fossil_flux_imp') - savedict['values'] = fossil.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - area = globarea() - savedict = ncf.standard_var(varname='cell_area') - savedict['values'] = area.tolist() - savedict['dims'] = dimgrid - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='date') - savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0 - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Gridded weekly average fluxes now written") - - return saveas - -def save_weekly_avg_state_data(dacycle, statevector): - """ - Function creates a NetCDF file with output for all parameters. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ - - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - area = globarea() - vectorarea = statevector.grid2vector(griddata=area, method='sum') - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Create or open NetCDF output file -# - saveas = os.path.join(dirname, 'statefluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - -# -# Create dimensions and lat/lon grid -# - dimregs = ncf.add_dim('nparameters', statevector.nparams) - dimmembers = ncf.add_dim('nmembers', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - next = ncf.inq_unlimlen()[0] - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys(): - sam = True - biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux'])) - firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux'])) - else: sam = False - file.close() - - if sam: - bio = bio + biosam - fire = fire + firesam - - next = ncf.inq_unlimlen()[0] - - vectorbio = statevector.grid2vector(griddata=bio * area, method='sum') - vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum') - vectorfire = statevector.grid2vector(griddata=fire * area, method='sum') - vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum') - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - if prior: - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = enddate - timedelta(dt.days * n) - priordate = startdate + n*dt - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) -# Replace the mean statevector by all ones (assumed priors) - statemean = np.ones((statevector.nparams,)) - choicelag = n - logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag)) - break - else: - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename) - choicelag = 1 - statemean = statevector.ensemble_members[choicelag - 1][0].param_values - logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - data = statemean * vectorbio # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - members = statevector.ensemble_members[choicelag - 1] - deviations = np.array([mem.param_values * vectorbio for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'bio_flux_%s_std' % qual_short - savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = statemean * vectorocn # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='ocn_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - deviations = np.array([mem.param_values * vectorocn for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short) - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'ocn_flux_%s_std' % qual_short - savedict['long_name'] = 'Ocean flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorfire - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorfossil - - savedict = ncf.standard_var(varname='fossil_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Vector weekly average fluxes now written") - - return saveas - - -def save_weekly_avg_tc_data(dacycle, statevector): - """ - Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the - function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected - onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - - This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve - these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete - statevector in units of mol/box/s which we then turn into TC fluxes and covariances. - """ - -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - # Write/Create NetCDF output file - # - saveas = os.path.join(dirname, 'tcfluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimregs = ncf.add_region_dim(type='tc') -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') - setattr(ncf, 'node_offset', 1) - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - - # Get input data - - area = globarea() - - infile = os.path.join(dacycle['dir.analysis'], 'data_state_weekly', 'statefluxes.nc') - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making TC fluxes ") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making TC fluxes") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the flux_1x1 file - - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - - data = ncf_in.get_variable(vname)[index] - - if vname in ['latitude','longitude', 'date', 'idate'] or 'std' in vname: - continue - elif 'ensemble' in vname: - tcdata = [] - for member in data: - tcdata.append(statevector.vector2tc(vectordata=member)) - - tcdata = np.array(tcdata) - try: - cov = tcdata.transpose().dot(tcdata) / (statevector.nmembers - 1) - except: - cov = np.dot(tcdata.transpose(), tcdata) / (statevector.nmembers - 1) # Huygens fix - - #print vname,cov.sum() - - tcdata = cov - - savedict = ncf.standard_var(varname=vname.replace('ensemble', 'cov')) - savedict['units'] = '[mol/region/s]**2' - savedict['dims'] = dimdate + dimregs + dimregs - - else: - - tcdata = statevector.vector2tc(vectordata=data) # vector to TC - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - savedict['count'] = index - savedict['values'] = tcdata - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("TransCom weekly average fluxes now written") - - return saveas - -def save_weekly_avg_ext_tc_data(dacycle): - """ Function SaveTCDataExt saves surface flux data to NetCDF files for extended TransCom regions - - *** Inputs *** - rundat : a RunInfo object - - *** Outputs *** - NetCDF file containing n-hourly global surface fluxes per TransCom region - - *** Example *** - ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ - - -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - # Write/Create NetCDF output file - # - saveas = os.path.join(dirname, 'tc_extfluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimregs = ncf.add_region_dim(type='tc_ext') -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') - setattr(ncf, 'node_offset', 1) - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - infile = os.path.join(dacycle['dir.analysis'], 'data_tc_weekly', 'tcfluxes.nc') - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making extended TC fluxes") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making extended TC fluxes ") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the tcfluxes.nc file - - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - - data = ncf_in.get_variable(vname)[index] - - if vname == 'latitude': continue - elif vname == 'longitude': continue - elif vname == 'date': continue - elif vname == 'idate': continue - elif 'cov' in vname: - - tcdata = ExtendedTCRegions(data, cov=True) - - savedict = ncf.standard_var(varname=vname) - savedict['units'] = '[mol/region/s]**2' - savedict['dims'] = dimdate + dimregs + dimregs - - else: - - tcdata = ExtendedTCRegions(data, cov=False) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - savedict['count'] = index - savedict['values'] = tcdata - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("TransCom weekly average extended fluxes now written") - - return saveas - -def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'): - """ - Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the - function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected - onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param StateVector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - - This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve - these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete - statevector in units of mol/box/s which we then turn into TC fluxes and covariances. - """ - -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_%s_weekly' % region_aggregate)) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - logging.debug("Aggregating 1x1 fluxes to %s totals" % region_aggregate) - - - # Write/Create NetCDF output file - # - saveas = os.path.join(dirname, '%s_fluxes.%s.nc' % (region_aggregate, startdate.strftime('%Y-%m-%d'))) - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimgrid = ncf.add_latlon_dim() # for mask -# -# Select regions to aggregate to -# - - if region_aggregate == "olson": - regionmask = tc.olson240mask - dimname = 'olson' - dimregs = ncf.add_dim(dimname, regionmask.max()) - - regionnames = [] - for i in range(11): - for j in range(19): - regionnames.append("%s_%s" % (tc.transnams[i], tc.olsonnams[j],)) - regionnames.extend(tc.oifnams) - xform = False - - for i, name in enumerate(regionnames): - lab = 'Aggregate_Region_%03d' % (i + 1,) - setattr(ncf, lab, name) - - elif region_aggregate == "olson_extended": - regionmask = tc.olson_ext_mask - dimname = 'olson_ext' - dimregs = ncf.add_dim(dimname, regionmask.max()) - xform = False - - for i, name in enumerate(tc.olsonextnams): - lab = 'Aggreate_Region_%03d'%(i+1) - setattr(ncf, lab, name) - - elif region_aggregate == "transcom": - regionmask = tc.transcommask - dimname = 'tc' - dimregs = ncf.add_region_dim(type='tc') - xform = False - - elif region_aggregate == "transcom_extended": - regionmask = tc.transcommask - dimname = 'tc_ext' - dimregs = ncf.add_region_dim(type='tc_ext') - xform = True - - elif region_aggregate == "amazon": - regfile = cdf.Dataset(os.path.join(analysisdir,'amazon_mask.nc')) - regionmask = regfile.variables['regionmask'][:] - regfile.close() - dimname = 'amazon' - dimregs = ncf.add_dim(dimname, regionmask.max()) - xform = False - - elif region_aggregate == "country": - - xform = False - countrydict = ct.get_countrydict() - selected = ['Russia', 'Canada', 'China', 'United States', 'EU27', 'Brazil', 'Australia', 'India'] #,'G8','UNFCCC_annex1','UNFCCC_annex2'] - regionmask = np.zeros((180, 360,), 'float') - - for i, name in enumerate(selected): - lab = 'Country_%03d' % (i + 1,) - setattr(ncf, lab, name) - - if name == 'EU27': - namelist = ct.EU27 - elif name == 'EU25': - namelist = ct.EU25 - elif name == 'G8': - namelist = ct.G8 - elif name == 'UNFCCC_annex1': - namelist = ct.annex1 - elif name == 'UNFCCC_annex2': - namelist = ct.annex2 - else: - namelist = [name] - - for countryname in namelist: - try: - country = countrydict[countryname] - regionmask.put(country.gridnr, i + 1) - except: - continue - - dimname = 'country' - dimregs = ncf.add_dim(dimname, regionmask.max()) - - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - # - # set title and tell GMT that we are using "pixel registration" - # - setattr(ncf, 'Title', 'CTDAS Aggregated fluxes') - setattr(ncf, 'node_offset', 1) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'regionmask' - savedict['comment'] = 'numerical mask used to aggregate 1x1 flux fields, each integer 0,...,N is one region aggregated' - savedict['values'] = regionmask.tolist() - savedict['units'] = '-' - savedict['dims'] = dimgrid - savedict['count'] = 0 - ncf.add_data(savedict) - - # Get input data from 1x1 degree flux files - - area = globarea() - - infile = os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly', 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making TC fluxes ") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making TC fluxes ") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the statevector file - - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - if vname == 'latitude': continue - elif vname == 'longitude': continue - elif vname == 'date': continue - elif vname == 'idate': continue - elif 'std' in vname: continue - elif 'ensemble' in vname: - - data = ncf_in.get_variable(vname)[index] - - dimensemble = ncf.add_dim('members', data.shape[0]) - - regiondata = [] - for member in data: - aggdata = state_to_grid(member * area, regionmask, reverse=True, mapname=region_aggregate) - regiondata.append(aggdata) - - regiondata = np.array(regiondata) - try: - regioncov = regiondata.transpose().dot(regiondata) / (data.shape[0] - 1) - except: - regioncov = np.dot(regiondata.transpose(), regiondata) / (data.shape[0] - 1) # Huygens fix - - if xform: - regiondata = ExtendedTCRegions(regiondata,cov=False) - regioncov = ExtendedTCRegions(regioncov,cov=True) - - savedict = ncf.standard_var(varname=vname) - savedict['name'] = vname.replace('ensemble','covariance') - savedict['units'] = '[mol/region/s]^2' - savedict['dims'] = dimdate + dimregs + dimregs - savedict['count'] = index - savedict['values'] = regioncov - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname=vname) - savedict['name'] = vname - savedict['units'] = 'mol/region/s' - savedict['dims'] = dimdate + dimensemble + dimregs - - - elif 'flux' in vname: - - data = ncf_in.get_variable(vname)[index] - - regiondata = state_to_grid(data * area, regionmask, reverse=True, mapname=region_aggregate) - - if xform: - regiondata = ExtendedTCRegions(regiondata) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - else: - - data = ncf_in.get_variable(vname)[:] - regiondata = state_to_grid(data, regionmask, reverse=True, mapname=region_aggregate) - if xform: - regiondata = ExtendedTCRegions(regiondata) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - - savedict['count'] = index - savedict['values'] = regiondata - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("%s aggregated weekly average fluxes now written" % dimname) - - return saveas - -def save_time_avg_data(dacycle, infile, avg='monthly'): - """ Function saves time mean surface flux data to NetCDF files - - *** Inputs *** - rundat : a RunInfo object - - *** Outputs *** - daily NetCDF file containing 1-hourly global surface fluxes at 1x1 degree - - *** Example *** - ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ - - if 'weekly' in infile: - intime = 'weekly' - if 'monthly' in infile: - intime = 'monthly' - if 'yearly' in infile: - intime = 'yearly' - - dirname, filename = os.path.split(infile) - outdir = create_dirs(os.path.join(dacycle['dir.analysis'], dirname.replace(intime, avg))) - - dectime0 = date2num(datetime(2000, 1, 1)) - -# Create NetCDF output file -# - saveas = os.path.join(outdir, filename) - ncf = io.CT_CDF(saveas, 'create') - dimdate = ncf.add_date_dim() -# -# Open input file specified from the command line -# - if not os.path.exists(infile): - logging.error("Needed input file (%s) not found. Please create this first:" % infile) - logging.error("returning...") - return None - else: - pass - - file = io.ct_read(infile, 'read') - datasets = file.variables.keys() - date = file.get_variable('date') - globatts = file.ncattrs() - - for att in globatts: - attval = file.getncattr(att) - if not att in ncf.ncattrs(): - ncf.setncattr(att, attval) - - - time = [datetime(2000, 1, 1) + timedelta(days=d) for d in date] - -# loop over datasets in infile, skip idate and date as we will make new time axis for the averaged data - - for sds in ['date'] + datasets: - -# get original data - - data = file.get_variable(sds) - varatts = file.variables[sds].ncattrs() - vardims = file.variables[sds].dimensions -# -# Depending on dims of input dataset, create dims for output dataset. Note that we add the new dimdate now. -# - - for d in vardims: - if 'date' in d: - continue - if d in ncf.dimensions.keys(): - pass - else: - dim = ncf.createDimension(d, size=len(file.dimensions[d])) - - savedict = ncf.standard_var(sds) - savedict['name'] = sds - savedict['dims'] = vardims - savedict['units'] = file.variables[sds].units - savedict['long_name'] = file.variables[sds].long_name - savedict['comment'] = file.variables[sds].comment - savedict['standard_name'] = file.variables[sds].standard_name - savedict['count'] = 0 - - if not 'date' in vardims: - savedict['values'] = data - ncf.add_data(savedict) - else: - - if avg == 'monthly': - time_avg, data_avg = timetools.monthly_avg(time, data) - elif avg == 'seasonal': - time_avg, data_avg = timetools.season_avg(time, data) - elif avg == 'yearly': - time_avg, data_avg = timetools.yearly_avg(time, data) - elif avg == 'longterm': - time_avg, data_avg = timetools.longterm_avg(time, data) - time_avg = [time_avg] - data_avg = [data_avg] - else: - raise ValueError, 'Averaging (%s) does not exist' % avg - - count = -1 - for dd, data in zip(time_avg, data_avg): - count = count + 1 - if sds == 'date': - savedict['values'] = date2num(dd) - dectime0 - else: - savedict['values'] = data - savedict['count'] = count - ncf.add_data(savedict, silent=True) - - sys.stdout.write('.') - - sys.stdout.write('\n') - sys.stdout.flush() - -# end NetCDF file access - file.close() - ncf.close() - - logging.info("------------------- Finished time averaging---------------------------------") - - return saveas - -if __name__ == "__main__": - from da.tools.initexit import CycleControl - from da.carbondioxide.dasystem import CO2DaSystem - from da.carbondioxide.statevector import CO2StateVector - - sys.path.append('../../') - - logging.root.setLevel(logging.DEBUG) - - dacycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'}) - dasystem = CO2DaSystem('../rc/carbontracker_ct09_opfnew.rc') - dacycle.dasystem = dasystem - dacycle.setup() - dacycle.parse_times() - - - - statevector = CO2StateVector() - statevector.setup(dacycle) - - while dacycle['time.start'] < dacycle['time.finish']: - save_weekly_avg_1x1_data(dacycle, statevector) - save_weekly_avg_state_data(dacycle, statevector) - save_weekly_avg_tc_data(dacycle, statevector) - save_weekly_avg_ext_tc_data(dacycle) - save_weekly_avg_agg_data(dacycle, region_aggregate='olson') - save_weekly_avg_agg_data(dacycle, region_aggregate='olson_extended') - save_weekly_avg_agg_data(dacycle, region_aggregate='transcom') - save_weekly_avg_agg_data(dacycle, region_aggregate='transcom_extended') - save_weekly_avg_agg_data(dacycle, region_aggregate='country') - save_weekly_avg_agg_data(dacycle, region_aggregate='amazon') - - dacycle.advance_cycle_times() - - statevector = None # free memory - - sys.exit(0) - diff --git a/da/analysis/map_to_tc.pickle b/da/analysis/map_to_tc.pickle deleted file mode 100644 index ccea75a3084680e9e29f56401cee418c6c991ee8..0000000000000000000000000000000000000000 Binary files a/da/analysis/map_to_tc.pickle and /dev/null differ diff --git a/da/analysis/merge_ctdas_runs.py.bak b/da/analysis/merge_ctdas_runs.py.bak deleted file mode 100755 index 6df2d429ded912e0c4060edc22c7da9f04b8f095..0000000000000000000000000000000000000000 --- a/da/analysis/merge_ctdas_runs.py.bak +++ /dev/null @@ -1,104 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# merge_ctdas_runs.py - -""" -Author : peters - -Revision History: -File created on 14 Jul 2014. - -This scrip merges the analysis directory from multiple projects into one new folder. -It steps over existing analysis output files from weekly means, and then averages these to daily/monthy/yearly values. - -""" - -import datetime as dt -import os -import sys -import shutil -import time_avg_fluxes as tma - -basedir = '/Storage/CO2/ingrid/' -basedir2 = '/Storage/CO2/peters/' -targetproject = 'geocarbon-ei-sibcasa-gfed4-zoom-gridded-combined-convec-20011230-20130101' -targetdir = os.path.join(basedir2,targetproject) - -sources = { - '2000-01-01 through 2011-12-31': os.path.join(basedir,'carbontracker','geocarbon-ei-sibcasa-gfed4-zoom-gridded-convec-combined'), - '2012-01-01 through 2012-12-31': os.path.join(basedir2,'geocarbon-ei-sibcasa-gfed4-zoom-gridded-convec-20111231-20140101'), - } - -dirs = ['flux1x1','transcom','country','olson'] - -dacycle = {} -dacycle['time.start'] = dt.datetime(2000,12,30) -dacycle['time.end'] = dt.datetime(2013,1,1) -dacycle['cyclelength'] = dt.timedelta(days=7) -dacycle['dir.analysis'] = os.path.join(targetdir,'analysis') - - -if __name__ == "__main__": - - if not os.path.exists(targetdir): - os.makedirs(targetdir) - if not os.path.exists(os.path.join(targetdir,'analysis')): - os.makedirs(os.path.join(targetdir,'analysis') ) - for nam in dirs: - if not os.path.exists(os.path.join(targetdir,'analysis','data_%s_weekly'%nam)): - os.makedirs(os.path.join(targetdir,'analysis','data_%s_weekly'%nam) ) - - timedirs=[] - for ss,vv in sources.iteritems(): - sds,eds = ss.split(' through ') - sd = dt.datetime.strptime(sds,'%Y-%m-%d') - ed = dt.datetime.strptime(eds,'%Y-%m-%d') - timedirs.append([sd,ed,vv]) - print sd,ed, vv - - while dacycle['time.start'] < dacycle['time.end']: - - # copy the weekly flux1x1 file from the original dir to the new project dir - - for td in timedirs: - if dacycle['time.start'] >= td[0] and dacycle['time.start'] <= td[1]: - indir=td[2] - - # Now time avg new fluxes - - infile = os.path.join(indir,'analysis','data_flux1x1_weekly','flux_1x1.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) - #print os.path.exists(infile),infile - shutil.copy(infile,infile.replace(indir,targetdir) ) - tma.time_avg(dacycle,avg='flux1x1') - - infile = os.path.join(indir,'analysis','data_transcom_weekly','transcom_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) - #print os.path.exists(infile),infile - shutil.copy(infile,infile.replace(indir,targetdir) ) - tma.time_avg(dacycle,avg='transcom') - - infile = os.path.join(indir,'analysis','data_olson_weekly','olson_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) - #print os.path.exists(infile),infile - shutil.copy(infile,infile.replace(indir,targetdir) ) - tma.time_avg(dacycle,avg='olson') - - infile = os.path.join(indir,'analysis','data_country_weekly','country_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) - #print os.path.exists(infile),infile - shutil.copy(infile,infile.replace(indir,targetdir) ) - tma.time_avg(dacycle,avg='country') - - dacycle['time.start'] += dacycle['cyclelength'] - - - - diff --git a/da/analysis/olson_regiondict.pickle b/da/analysis/olson_regiondict.pickle deleted file mode 100644 index 768812c86020ba9d4d465e3bcd49e705060778ec..0000000000000000000000000000000000000000 Binary files a/da/analysis/olson_regiondict.pickle and /dev/null differ diff --git a/da/analysis/siteseries.py.bak b/da/analysis/siteseries.py.bak deleted file mode 100755 index 7512574ca9daff09a090e9ddfe8e53031871869f..0000000000000000000000000000000000000000 --- a/da/analysis/siteseries.py.bak +++ /dev/null @@ -1,974 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# siteseries_c.py - -""" -Author : peters - -Revision History: -File created on 23 Dec 2008. - -""" -import sys -sys.path.append('../../') - -import matplotlib -matplotlib.use('pdf') - -import sys -import os -from da.tools.general import create_dirs -import matplotlib.dates as pltdt -import matplotlib.pyplot as plt -from matplotlib.mlab import normpdf -from matplotlib.font_manager import FontProperties -import numpy as np -import datetime as dt -import da.tools.io4 as io -import logging -import copy -from da.analysis.summarize_obs import nice_lon, nice_lat, nice_alt -from PIL import Image -import urllib2 -import StringIO - -""" - General data needed to set up proper aces inside a figure instance -""" - -ts_x1 = 0.125 -ts_x2 = 0.825 -ts_xspan = ts_x2 - ts_x1 - -ts_y1 = 0.18 -ts_y2 = 0.72 -ts_yspan = ts_y2 - ts_y1 - -markersize = 2 -fontsize = 16 - -small_logos = ['EC','RUG','LBNL-ARM','NIES-MRI','NIWA','ECN'] - -""" - Three routines that provide loops around the number of sites. These also create the rundirs and such and make sure the - figures that were created are stamped and saved properly -""" - -def site_timeseries(analysisdir,option='final'): - """*************************************************************************************** - Call example: - - "***************************************************************************************""" - # - # Repeat all options to user - # - # - # Create directories if needed - # - - mrdir = os.path.join(analysisdir, 'timeseries_molefractions') - if not os.path.exists(mrdir): - create_dirs(mrdir) - # - # Make a dictionary of available sites and the NetCDF file names associated with them - # - sitelist = os.listdir(os.path.join(analysisdir, 'data_molefractions')) - sitelist = [f for f in sitelist if f.endswith('.nc')] - # - # Loop over site and sitefiles - # - for sitefile in sitelist: - # - # Create filename and extract site codes for the sites that had day/night data separated - # - if not 'co2_poc' in sitefile: continue - - filename = os.path.join(analysisdir, 'data_molefractions', sitefile) - saveas = os.path.join(mrdir, sitefile[:-3] + '_timeseries') - - if not os.path.exists(saveas+'.pdf'): - logging.debug('Making timeseries figures for %s: ' % sitefile) - # - # Create a figure instance to hold plot - # - fig = plt.figure(1, figsize=(15, 7,))#,frameon=False) - # - # Make plot - # - fig = timevssite_new(fig, filename) - # - # Save image - # - fig.savefig(saveas+'.pdf', dpi=100) - fig.savefig(saveas+'.png', dpi=50) - fig.savefig(saveas+'.large.png', dpi=150) - # - plt.close(fig) - # - # Residuals next - # - saveas = os.path.join(mrdir, sitefile[:-3] + '_residuals') - if not os.path.exists(saveas+'.pdf'): - logging.debug('Making residuals figures for %s: ' % sitefile) - # - # Create a figure instance to hold plot - # - fig = plt.figure(1, figsize=(15, 7,))#,frameon=False) - # - # Make plot - # - fig = residuals_new(fig, filename, option) - # - # Save image - # - fig.savefig(saveas+'.pdf', dpi=100) - fig.savefig(saveas+'.png', dpi=50) - fig.savefig(saveas+'.large.png', dpi=150) - # - # next in loop over sites - # - plt.close(fig) - # - # histograms next - # - saveas = os.path.join(mrdir, sitefile[:-3] + '_histograms') - if not os.path.exists(saveas+'.pdf'): - logging.debug('Making histograms figures for %s: ' % sitefile) - # - # Create a figure instance to hold plot - # - fig = plt.figure(1, figsize=(15, 7,))#,frameon=False) - # - # Make plot - # - fig = timehistograms_new(fig, filename, option) - # - # Save image - # - fig.savefig(saveas+'.pdf', dpi=100) - fig.savefig(saveas+'.png', dpi=50) - fig.savefig(saveas+'.large.png', dpi=150) - # - # next in loop over sites - # - plt.close(fig) - # - # html table next - # - saveas = os.path.join(mrdir, sitefile[:-3] + '.html') - if not os.path.exists(saveas): - logging.debug('Making html info table for %s' % sitefile) - f = io.CT_CDF(filename, 'read') - with open(saveas, "wt") as fout: - with open("sitetable.html", "rt") as fin: - for lineout in fin: - lineout = lineout.replace('site_name',f.site_name) - if 'site_country' in lineout: - if 'site_country' in f.ncattrs(): - lineout = lineout.replace('site_country',f.site_country) - else: lineout = lineout.replace('site_country','Multiple') - if abs(f.site_latitude) > 9999: - lineout = lineout.replace('site_latitude','Variable') - lineout = lineout.replace('site_longitude','Variable') - else: - lineout = lineout.replace('site_latitude',nice_lat(f.site_latitude,'html')) - lineout = lineout.replace('site_longitude',nice_lon(f.site_longitude,'html')) - if abs(f.site_latitude) > 9999 and not 'shipboard' in sitefile: - lineout = lineout.replace('site_elevation','Variable') - lineout = lineout.replace('intake_height','Variable') - else: - lineout = lineout.replace('site_elevation',str(f.site_elevation)) - lineout = lineout.replace('intake_height',str(f.variables['intake_height'][:].max())) - lineout = lineout.replace('site_map',str(f.dataset_map)) - lineout = lineout.replace('lab_1_abbr',f.lab_1_abbr) - lineout = lineout.replace('lab_1_name',f.lab_1_name) - lineout = lineout.replace('lab_1_country',f.lab_1_country) - lineout = lineout.replace('lab_1_provider',f.provider_1_name) - if 'lab_1_url' in lineout: - if 'lab_1_url' in f.ncattrs(): - lineout = lineout.replace('lab_1_url',f.lab_1_url) - else: lineout = '' - lineout = lineout.replace('lab_1_logo',f.lab_1_logo) - lineout = lineout.replace('dataset_selection',f.dataset_selection_tag) - lineout = lineout.replace('dataset_project',f.dataset_project) - lineout = lineout.replace('dataset_calibration_scale',f.dataset_calibration_scale) - if f.variables['modeldatamismatch'][:].max() > 0.0009: - lineout = lineout.replace('assimilated','No') - else: lineout = lineout.replace('assimilated','Yes') - fout.write(lineout) - f.close() - -def timehistograms_new(fig, infile, option='final'): - """ - This routine makes two side-by-side histograms representing summer and winter PDFs of the residuals. It uses the special - x-axis and y-axis definitions from above. Note that currently, the PDFs are based on forecast-observed CO2, and not on - optimized-observed CO2. - """ - - fontsize = 17 - # - # Get data - # - f = io.CT_CDF(infile, 'read') - species = f.dataset_parameter - if species == 'co2': - molefac=1e6 - units = '$\mu$mol mol$^{-1}$' - species = "CO$_2$" - if species == 'co2c13': - molefac=1.0 - units = 'permil' - species = "$\delta^{13}$C" - - date = f.get_variable('time') - obs = f.get_variable('value') * molefac - mdm = f.get_variable('modeldatamismatch') * molefac - hphtr = f.get_variable('totalmolefractionvariance_forecast') * molefac * molefac - if option == 'final': - simulated = f.get_variable('modelsamplesmean') * molefac - if option == 'forecast': - simulated = f.get_variable('modelsamplesmean_forecast') * molefac - flags = f.get_variable('flag_forecast') - mdm_fig = mdm.compress(flags == 0).mean() - - if 'site_country' in f.ncattrs(): - longsitestring = f.site_name + ', ' + f.site_country - else: longsitestring = f.site_name - if abs(f.site_latitude) > 9999: - location = 'Variable' - else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation) - - SDSInfo = {} - for k in f.ncattrs(): - SDSInfo[k] = f.getncattr(k) - - f.close() - - pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) - - sampled = (np.ma.getmaskarray(simulated) == False) - - if len(sampled.nonzero()[0]) < 2: - logging.warning("Too few simulated values found, continuing...") - return fig - - simulated = simulated.compress(sampled) - obs = obs.compress(sampled) - pydates = pydates.compress(sampled) - mdm = mdm.compress(sampled) - hphtr = hphtr.compress(sampled) - flags = flags.compress(sampled) - - residual = simulated - obs - if option == 'final': - chisquared = (residual ** 2) / mdm - elif option == 'forecast': - chisquared = (residual ** 2) / hphtr - - rejected = (flags == 2.0) - notused = (flags == 99.0) - - #if notused.all(): - # return fig - #else: - obslabel = 'Residual' - - sd = pydates[0] - ed = pydates[-1] - - summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ] # JJAS - winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ] # NDJFMA - - # Create two side-by-side axes, turn off their frame - - ax1 = fig.add_axes([0.05, 0.18, 0.4, 0.7]) - ax2 = fig.add_axes([0.55, 0.18, 0.4, 0.7]) - - - # Loop simultaneously over ax1/ax2 and summer/winter values - - for ax, sel in zip([ax1, ax2], [summer, winter]): - - if not np.array(sel).any(): continue - - # Subselect data for winter/summer - - sel_obs = obs.take(sel) - sel_fc = simulated.take(sel) - sel_hqhr = hphtr.take(sel) - sel_mdm = mdm.take(sel) - sel_flags = flags.take(sel) - sel_rej = rejected.take(sel) - - # Calculate residual and chi squared values - - if SDSInfo['site_code'] == 'POC': - sel_obs = sel_obs.compress(sel_fc > -9000) - sel_mdm = sel_mdm.compress(sel_fc > -9000) - sel_flags = sel_flags.compress(sel_fc > -9000) - sel_fc = sel_fc.compress(sel_fc > -9000) - - #res = sel_fc - sel_obs - if option == 'final': - if mdm.mean() < 900: - res = sel_fc.compress(sel_flags == 0) - sel_obs.compress(sel_flags == 0) - chi = res / np.sqrt(sel_mdm.compress(sel_flags == 0)) - nr_obs = sel_obs.compress(sel_flags == 0).shape[0] - else: - res = sel_fc.compress(sel_flags != 2) - sel_obs.compress(sel_flags != 2) - chi = res / np.sqrt(sel_mdm.compress(sel_flags != 2)) - nr_obs = sel_obs.compress(sel_flags != 2).shape[0] - elif option == 'forecast': - res = sel_fc - sel_obs - chi = res / np.sqrt(sel_hqhr) - - # Get a scaling factor for the x-axis range. Now we will include 5 standard deviations - - sc = res.std() - print 'sc',sc - # If there is too little data for a reasonable PDF, skip to the next value in the loop - - if res.shape[0] < 10: continue - - # make a histogram plot of the residuals with a minimum of 10 bins, and maximum of N/10 bins, normalize the PDF to an area of 1.0 - - n, bins, patches = ax.hist(res, max(res.shape[0] / 10, 10), normed=1) - - # Change the colors on the bars - - p = plt.setp(patches, 'facecolor', 'tan', 'edgecolor', 'tan', label='None') - - # Create two normal distributions for the line plots over the interval of the x-axis - - bins = np.arange(-5 * sc, 5 * sc, 0.1) - n = normpdf(bins, res.mean(), res.std()) - l = ax.plot(bins, n, 'b-', linewidth=2) # plot the PDF of the histogram in blue - n = normpdf(bins, 0.0 , sel_mdm[0]) - l = ax.plot(bins, n, 'g-', linewidth=2) # plot the PDF of the model-data-mismatch in green - # - # Add a legend, not as a legend object but simply as text labels - # - if option == 'final': - strX = '' - elif option == 'forecast': - strX = 'Inn. ' - if chi.mean() != chi.mean() or mdm.mean() < 900: - labs = [ - '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \ - 'N = %d' % nr_obs, \ - '%s$\chi^2$= %.2f'%(strX, (chi**2).mean()) - ] - else: - labs = [ - '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \ - 'N = %d' % sel_obs.shape[0] - ] - - # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes - - for i, l in enumerate(labs): - ax.text(0.75, 0.9 - 0.07 * i, l, transform=ax.transAxes, fontsize=fontsize, horizontalalignment='center', color='blue') - # - # Set Tick Font Size on x and y labels - # - #dummy = [lab.set_fontsize(20) for lab in ax.get_xticklabels()] - #dummy = [lab.set_fontsize(20) for lab in ax.get_yticklabels()] - - # set limits on x-axis and get limits on y-axis to determine the position of the x-axis labels (offset keyword to make_yaxis) - - ax.set_xlim(-5 * sc, 5 * sc) - - ax.spines['left'].set_position(('data', 0)) - ax.spines['right'].set_color('none') - ax.spines['right'].axis.set_ticks([]) - ax.spines['bottom'].set_position(('data', 0)) - ax.spines['top'].set_color('none') - #ax.spines['left'].set_smart_bounds(True) - #ax.spines['bottom'].set_smart_bounds(True) - ax.spines['left'].set_linewidth(1.5) - ax.spines['bottom'].set_linewidth(1.5) - ax.spines['bottom'].set_position(('outward', 10)) - - matplotlib.rcParams.update({'font.size': 18}) - ax.xaxis.set_ticks_position('bottom') - - ax.xaxis.labelpad = -5 - ax.set_xlabel('[%s]'%units,size=16) - - # - # All custom titles and auxiliary info are placed onto the figure directly (fig.text) in relative coordinates - # - fig.text(0.5, 0.02, 'Simulated - Observed %s [%s]\nData from %s to %s' %(species,units,pydates[0].strftime('%d-%b-%Y'), pydates[-1].strftime('%d-%b-%Y')), horizontalalignment='center', fontsize=fontsize) - fig.text(0.5, 0.35, 'model-data\nmismatch:\n%.2f %s' % (mdm_fig, units), horizontalalignment='center', fontsize=fontsize, color='green') - #fig.text(0.5, 0.35, 'model-data\nmismatch:\n%.2f %s' % (sel_mdm.mean(), units), horizontalalignment='center', fontsize=fontsize, color='green') - fig.text(0.12, 0.75, 'NH Summer\n(Jun-Sep)', horizontalalignment='center', fontsize=fontsize) - fig.text(0.62, 0.75, 'NH Winter\n(Nov-Apr)', horizontalalignment='center', fontsize=fontsize) - # - # Title - # - - plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 4) - - # - # Add info to plot - # - font0= FontProperties(size=14,style='italic',weight='bold') - txt='CarbonTracker Europe\n $\copyright$ Wageningen University' - clr='green' - fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr ) - - #now = dt.datetime.today() - #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y') - #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5') - #str1 = 'data provided by %s'%SDSInfo['provider_1_name'] - #fig.text(0.12,0.16,str1,fontsize=0.8*fontsize,color='0.75') - - try: - img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read() - except: - logging.warning("No logo found for this program, continuing...") - return fig - - im = Image.open(StringIO.StringIO(img)) - height = im.size[1] - width = im.size[0] - - # We need a float array between 0-1, rather than - # a uint8 array between 0-255 - im = np.array(im).astype(np.float)[::-1, :] / 255 - - # With newer (1.0) versions of matplotlib, you can - # use the "zorder" kwarg to make the image overlay - # the plot, rather than hide behind it... (e.g. zorder=10) - if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2 - else: scalingf = 1 - ax3 = fig.add_axes([0.47-0.05*scalingf, 0.65, 0.15*scalingf, 0.15*scalingf * height / width]) - ax3.axis('off') - ax3.imshow(im, interpolation='None') - - return fig - -def timevssite_new(fig, infile): - fontsize = 17 - # - # Get data - # - f = io.CT_CDF(infile, 'read') - species = f.dataset_parameter - if species == 'co2': - molefac=1e6 - units = '$\mu$mol mol$^{-1}$' - species = "CO$_2$" - if species == 'co2c13': - molefac=1.0 - units = 'permil' - species = "$\delta^{13}$C" - date = f.get_variable('time') - obs = f.get_variable('value') * molefac - mdm = f.get_variable('modeldatamismatch') * molefac - simulated = f.get_variable('modelsamplesmean') * molefac - flags = f.get_variable('flag_forecast') - - if 'site_country' in f.ncattrs(): - longsitestring = f.site_name + ', ' + f.site_country - else: longsitestring = f.site_name - if abs(f.site_latitude) > 9999: - location = 'Variable' - else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation) - - SDSInfo = {} - for k in f.ncattrs(): - SDSInfo[k] = f.getncattr(k) - - f.close() - - pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) - sampled = (np.ma.getmaskarray(simulated) == False) - - if len(sampled.nonzero()[0]) < 2: - logging.warning("Too few simulated values found, continuing...") - return fig - - simulated = simulated.compress(sampled) - obs = obs.compress(sampled) - pydates = pydates.compress(sampled) - mdm = mdm.compress(sampled) - flags = flags.compress(sampled) - - residual = simulated - obs - - assimilated = (flags == 0.0) - rejected = (flags == 2.0) - notused = (flags == 99.0) - - sd = pydates[0] - ed = pydates[-1] - - ax1 = fig.add_axes([0.1, 0.12, 0.7, 0.75]) - ax2 = fig.add_axes([0.85, 0.12, 0.12, 0.75]) - - ax1.spines['right'].set_color('none') - ax1.spines['top'].set_color('none') - ax1.spines['left'].set_linewidth(1.5) - ax1.spines['bottom'].set_linewidth(1.5) - ax1.spines['left'].set_position(('outward', 10)) - ax1.spines['bottom'].set_position(('outward', 10)) - - ax2.spines['right'].set_color('none') - ax2.spines['top'].set_color('none') - ax2.spines['left'].set_linewidth(1.5) - ax2.spines['bottom'].set_linewidth(1.5) - ax2.spines['left'].set_position(('outward', 10)) - ax2.spines['bottom'].set_position(('outward', 10)) - - markersize = 8 - fontsize = 16 - - # - # Plot observations - # - if assimilated.any(): - p1 = ax1.plot(pydates.compress(assimilated), obs.compress(assimilated), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='k', label='Observed (assimilated)', markersize=markersize) - - if notused.any(): - p2 = ax1.plot(pydates.compress(notused), obs.compress(notused), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='tan', label='Observed (not assimilated)', markersize=markersize) - # - # Add the simulated values - # - q = ax1.plot(pydates, simulated, marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', \ - markeredgecolor='lightblue', label='Simulated', markersize=markersize) - # - # Add the rejected values if available - # - if rejected.any(): - r = ax1.plot(pydates.compress(rejected), simulated.compress(rejected), marker='s', markeredgewidth=1, markerfacecolor='r', markeredgecolor='r', linestyle='None', label='Model Rejected (N=%d)' % len(pydates.compress(rejected)), markersize=markersize) - - # - # Set up x axis labels - # - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] - # - # Location and format of xticks - # - ax1.xaxis.set_major_locator(pltdt.MonthLocator([7],bymonthday=7)) - ax1.xaxis.set_major_formatter(pltdt.DateFormatter('%Y')) - # - # Legend - # - leg = ax1.legend(prop=FontProperties(size=(0.75 * fontsize)), borderpad=0.1, loc='upper left') - #leg.get_frame().set_visible(False) - leg.set_zorder(20) - leg.get_frame().set_color('1.0') - dummy = [lab.set_fontsize(16) for lab in leg.get_texts()] - # - # include grid - # - ax1.grid(True, ls='-', color='0.75', axis='y') - ax1.autoscale(enable=True, axis='y', tight=False) - #ax1.set_ylim(obs.min()-3*residual.std(),obs.max()+5*residual.std()) - #ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1))) - ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1))) - #ax1.set_ylim(360,430) #LUT - - ym = ax1.get_ylim() - ymin=ym[0] ; ymax =ym[1] - for yr in range(sd.year,ed.year+1,2): - x1=dt.datetime(yr,1,1) - x2=dt.datetime(yr+1,1,1) - ax1.fill([x1,x2,x2,x1],[ymin,ymin,ymax,ymax],color='0.9',zorder=1) - - ax1.set_ylim(ymin,ymax) - # - # - # Set Tick Font Size - # - #matplotlib.rcParams.update({'font.size': 30}) - ax1.xaxis.set_ticks_position('bottom') - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] - - #xtitle='Time' - #ax1.set_xlabel(xtitle, fontsize=fontsize) # label x axis - ax1.set_ylabel(r"%s [%s]"% (species,units), fontsize=fontsize + 5) # label y-axis - - # - # Axes 2 - # - if mdm.mean() < 900: - residual = residual.compress(flags == 0) - else: residual = residual.compress(flags != 2) - if SDSInfo['site_code'] == 'POC': residual = residual.compress(simulated > -9000) - offset = 0.0 - n, bins, patches = ax2.hist(residual, max(residual.shape[0] / 15, 15), normed=1, orientation='horizontal') - p = plt.setp(patches, 'facecolor', 'tan' , 'edgecolor', 'tan', label='None', alpha=0.25) - - # Create normal distributions for the line plots over the interval of the x-axis - sc = residual.std() - bins = np.arange(-4 * sc, 4 * sc, 0.1) - n = normpdf(bins, residual.mean(), residual.std()) - l = ax2.plot(n, bins, linestyle='-', color='lightblue', linewidth=1) # plot the PDF of the histogram in blue - - dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_xticklabels()] - dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_yticklabels()] - labs = [ - '%+.2f $\pm$ %.2f\nN=%d' % (residual.mean(), residual.std(), residual.shape[0],) - ] - # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes - - ax2.text(0.6, 0.01 + offset, labs[0], transform=ax2.transAxes, fontsize=1.1 * fontsize, horizontalalignment='center', color='k') - offset += -0.05 - - ax2.set_ylim(-6 * sc, 6 * sc) - - ax2.spines['left'].set_position(('axes', 0.0)) - ax2.spines['right'].set_color('none') - ax2.spines['bottom'].axis.set_ticks([]) - ax2.spines['bottom'].set_position(('axes', 0.5)) - - ax2.spines['top'].set_color('none') - ax2.spines['left'].set_smart_bounds(True) - ax2.spines['bottom'].set_smart_bounds(True) - ax2.spines['left'].set_linewidth(1.5) - ax2.spines['bottom'].set_linewidth(1.5) - ax2.spines['bottom'].set_position(('outward', 10)) - - matplotlib.rcParams.update({'font.size': 18}) - ax2.yaxis.set_ticks_position('left') - ax2.xaxis.set_ticklabels([]) - - #ax2.set_ylabel(r"CO$_2$ [ppm]", fontsize=fontsize) # label y-axis - #ax2.set_xlabel("frequency", fontsize=fontsize) # label x-axis - #ax2.grid(True, axis='y') - ax2.grid(True, ls='-', color='0.75', axis='y') - - # - # Title - # - - plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 5) - - # - # Add info to plot - # - font0= FontProperties(size=14,style='italic',weight='bold') - txt='CarbonTracker Europe\n $\copyright$ Wageningen University' - clr='green' - fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr ) - - #now = dt.datetime.today() - #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y') - #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5') - #str1 = 'data provided by %s' % SDSInfo['provider_1_name'] - #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75') - - try: - img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read() - except: - logging.warning("No logo found for this program, continuing...") - return fig - - im = Image.open(StringIO.StringIO(img)) - height = im.size[1] - width = im.size[0] - - # We need a float array between 0-1, rather than - # a uint8 array between 0-255 - im = np.array(im).astype(np.float)[::-1, :] / 255 - - # With newer (1.0) versions of matplotlib, you can - # use the "zorder" kwarg to make the image overlay - # the plot, rather than hide behind it... (e.g. zorder=10) - if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2 - else: scalingf = 1 - ax3 = fig.add_axes([0.85-0.15*scalingf, 0.125, 0.15*scalingf, 0.15*scalingf * height / width]) - ax3.axis('off') - ax3.imshow(im, interpolation='None') - - return fig - -def residuals_new(fig, infile, option): - - fontsize = 17 - # - # Get data - # - f = io.CT_CDF(infile, 'read') - species = f.dataset_parameter - if species == 'co2': - molefac=1e6 - units = '$\mu$mol mol$^{-1}$' - species = "CO$_2$" - if species == 'co2c13': - molefac=1.0 - units = 'permil' - species = "$\delta^{13}$C" - date = f.get_variable('time') - obs = f.get_variable('value') * molefac - mdm = f.get_variable('modeldatamismatch') * molefac - if option == 'final': - simulated = f.get_variable('modelsamplesmean') * molefac - if option == 'forecast': - simulated = f.get_variable('modelsamplesmean_forecast') * molefac - hphtr = f.get_variable('totalmolefractionvariance_forecast') * molefac * molefac - flags = f.get_variable('flag_forecast') - - if 'site_country' in f.ncattrs(): - longsitestring = f.site_name + ', ' + f.site_country - else: longsitestring = f.site_name - if abs(f.site_latitude) > 9999: - location = 'Variable' - else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation) - - SDSInfo = {} - for k in f.ncattrs(): - SDSInfo[k] = f.getncattr(k) - - f.close() - - pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) - sampled = (np.ma.getmaskarray(simulated) == False) - - if len(sampled.nonzero()[0]) < 2: - logging.warning("Too few simulated values found, continuing...") - return fig - - simulated = simulated.compress(sampled) - obs = obs.compress(sampled) - pydates = pydates.compress(sampled) - mdm = mdm.compress(sampled) - hphtr = hphtr.compress(sampled) - flags = flags.compress(sampled) - - assimilated = (flags == 0.0) - rejected = (flags == 2.0) - notused = (flags == 99.0) - - residual = simulated - obs - - sd = pydates[0] - ed = pydates[-1] - - ax1 = fig.add_axes([0.1, 0.12, 0.7, 0.75]) - ax2 = fig.add_axes([0.85, 0.12, 0.12, 0.75]) - - ax1.spines['right'].set_color('none') - ax1.spines['top'].set_color('none') - ax1.spines['left'].set_linewidth(1.5) - ax1.spines['bottom'].set_linewidth(1.5) - ax1.spines['left'].set_position(('outward', 10)) - ax1.spines['bottom'].set_position(('outward', 10)) - - ax2.spines['right'].set_color('none') - ax2.spines['top'].set_color('none') - ax2.spines['left'].set_linewidth(1.5) - ax2.spines['bottom'].set_linewidth(1.5) - ax2.spines['left'].set_position(('outward', 10)) - ax2.spines['bottom'].set_position(('outward', 10)) - - markersize = 8 - fontsize = 16 - # - # Plot observations - # - if assimilated.any(): - p1 = ax1.plot(pydates.compress(assimilated), residual.compress(assimilated), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='k', label='Residual (assimilated)' , markersize=markersize,zorder=9) - if notused.any(): - p2 = ax1.plot(pydates.compress(notused), residual.compress(notused), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='tan', label='Residual (not assimilated)', markersize=markersize,zorder=8) - # - # Add the model-data mismatch - # - mdm_fill = mdm.compress(assimilated).mean() - q = ax1.fill_between(pydates, mdm_fill, -1.0 * mdm_fill, label='model-data mismatch', color='tan', alpha=0.25, zorder=5) - # - # Add the rejected values if available - # - if rejected.any(): - r = ax1.plot(pydates.compress(rejected), residual.compress(rejected), marker='s', markeredgewidth=1, markeredgecolor='red', markerfacecolor='red', linestyle='None', label='Model Rejected (N=%d)' % len(pydates.compress(rejected)), markersize=markersize,zorder=10) - - # - # Axes 2 - # - if option == 'final': - if mdm.mean() < 900: - residual = simulated.compress(flags == 0) - obs.compress(flags == 0) - pydates = pydates.compress(flags == 0) - mdm = mdm.compress(flags == 0) - else: - residual = simulated.compress(flags != 2) - obs.compress(flags != 2) - pydates = pydates.compress(flags != 2) - mdm = mdm.compress(flags != 2) - chisquared = (residual ** 2) / mdm - elif option == 'forecast': - chisquared = (residual ** 2) / hphtr - offset = 0.0 - - if SDSInfo['site_code'] == 'POC': residual = residual.compress(simulated > -9000) - - n, bins, patches = ax2.hist(residual, max(residual.shape[0] / 15, 15), normed=1, orientation='horizontal') - p = plt.setp(patches, 'facecolor', 'tan' , 'edgecolor', 'tan', label='None', alpha=0.25) - - # Create normal distributions for the line plots over the interval of the x-axis - - sc = residual.std() - bins = np.arange(-4 * sc, 4 * sc, 0.1) - n = normpdf(bins, residual.mean(), residual.std()) - l = ax2.plot(n, bins, linestyle='-', color='lightblue', linewidth=1) # plot the PDF of the histogram in blue - - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_xticklabels()] - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_yticklabels()] - if option == 'final': - strX = '' - elif option == 'forecast': - strX = 'Inn. ' - if chisquared.mean() != chisquared.mean() or mdm.mean() < 900: - labs = [ - '%+.2f $\pm$ %.2f\nN=%d\n%s $\chi^2$ = %5.2f'%(residual.mean(), residual.std(), residual.shape[0], strX, chisquared.mean(),) - ] - else: - labs = [ - '%+.2f $\pm$ %.2f\nN=%d'%(residual.mean(), residual.std(), residual.shape[0],) - ] - - # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes - - ax2.text(0.6, 0.01 + offset, labs[0], transform=ax2.transAxes, fontsize=1.1 * fontsize, horizontalalignment='center', color='k') - offset += -0.05 - - ax2.set_ylim(-6 * sc, 6 * sc) - - ax2.spines['left'].set_position(('axes', 0.0)) - ax2.spines['right'].set_color('none') - ax2.spines['bottom'].axis.set_ticks([]) - ax2.spines['bottom'].set_position(('axes', 0.5)) - - ax2.spines['top'].set_color('none') - ax2.spines['left'].set_smart_bounds(True) - ax2.spines['bottom'].set_smart_bounds(True) - ax2.spines['left'].set_linewidth(1.5) - ax2.spines['bottom'].set_linewidth(1.5) - ax2.spines['bottom'].set_position(('outward', 10)) - - ax2.yaxis.set_ticks_position('left') - ax2.xaxis.set_ticklabels([]) - - #ax2.set_ylabel(r"CO$_2$ [ppm]", fontsize=fontsize) # label y-axis - #ax2.set_xlabel("frequency", fontsize=fontsize) # label x-axis - ax2.grid(True, ls='-', color='0.75', axis='y') - - # - # Set up x axis labels - # - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] - # - # Location and format of xticks - # - ax1.xaxis.set_major_locator(pltdt.MonthLocator([7],bymonthday=7)) - ax1.xaxis.set_major_formatter(pltdt.DateFormatter('%Y')) - # - # Legend - # - leg = ax1.legend(prop=FontProperties(size=(0.75 * fontsize)), borderpad=0.1, loc='upper left') - #leg.get_frame().set_visible(False) - leg.set_zorder(20) - leg.get_frame().set_color('1.0') - dummy = [lab.set_fontsize(16) for lab in leg.get_texts()] - # - # include grid - # - ax1.grid(True, ls='-', color='0.75', axis='y') - ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1))) - - ax1.set_ylim(-6 * sc, 6 * sc) - ym = ax1.get_ylim() - ymin=ym[0] ; ymax =ym[1] - for yr in range(sd.year,ed.year+1,2): - x1=dt.datetime(yr,1,1) - x2=dt.datetime(yr+1,1,1) - ax1.fill([x1,x2,x2,x1],[ymin,ymin,ymax,ymax],color='0.9',zorder=1) - - #ax1.set_ylim(ymin,ymax) - # - # - # Set Tick Font Size - # - matplotlib.rcParams.update({'font.size': 18}) - ax1.xaxis.set_ticks_position('bottom') - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()] - #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()] - - #xtitle='Time' - #ax1.set_xlabel(xtitle, fontsize=fontsize) # label x axis - ax1.set_ylabel(r"%s [%s]"%(species,units), fontsize=fontsize+5) # label y-axis - # - # Title - # - - plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 5) - - # - # Add info to plot - # - font0= FontProperties(size=14,style='italic',weight='bold') - txt='CarbonTracker Europe\n $\copyright$ Wageningen University' - clr='green' - fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr ) - - #now = dt.datetime.today() - #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y') - #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5') - #str1 = 'data provided by %s' % SDSInfo['provider_1_name'] - #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75') - - try: - img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read() - except: - logging.warning("No logo found for this program, continuing...") - return fig - - im = Image.open(StringIO.StringIO(img)) - height = im.size[1] - width = im.size[0] - - # We need a float array between 0-1, rather than - # a uint8 array between 0-255 - im = np.array(im).astype(np.float)[::-1, :] / 255 - - # With newer (1.0) versions of matplotlib, you can - # use the "zorder" kwarg to make the image overlay - # the plot, rather than hide behind it... (e.g. zorder=10) - if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2 - else: scalingf = 1 - ax3 = fig.add_axes([0.85-0.15*scalingf, 0.125, 0.15*scalingf, 0.15*scalingf * height / width]) - ax3.axis('off') - ax3.imshow(im, interpolation='None') - - return fig - - -# main body if called as script - -if __name__ == '__main__': # started as script - - sys.path.append('../../') - - logging.root.setLevel(logging.DEBUG) - - analysisdir = "/Users/ingrid/mnt/promise/CO2/ingrid/carbontracker/cartesius/gcp2-combined/analysis/" - site_timeseries(analysisdir,option='final') - - sys.exit(0) - - diff --git a/da/analysis/summarize_obs.py.bak b/da/analysis/summarize_obs.py.bak deleted file mode 100755 index 5b13c804ffa72a74da739edd8afac41cafbac970..0000000000000000000000000000000000000000 --- a/da/analysis/summarize_obs.py.bak +++ /dev/null @@ -1,545 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -import sys -sys.path.append('../../') -import os -import numpy as np -import string -import datetime as dt -import logging -import re -import da.tools.io4 as io - -fontsize = 10 - -def nice_lat(cls,format='html'): - # - # Convert latitude from decimal to cardinal - # - if cls > 0: - h = 'N' - else: - h = 'S' - - dec, deg = np.math.modf(cls) - - #return string.strip('%2d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - if format == 'python': - return string.strip('%3d$^\circ$%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - if format == 'html': - return string.strip('%3d°%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - -def nice_lon(cls,format='html'): - # - # Convert longitude from decimal to cardinal - # - if cls > 0: - h = 'E' - else: - h = 'W' - - dec, deg = np.math.modf(cls) - - #return string.strip('%3d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - if format == 'python': - return string.strip('%3d$^\circ$%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - if format == 'html': - return string.strip('%3d°%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h)) - -def nice_alt(cls): - # - # Reformat elevation or altitude - # - #return string.strip('%10.1f masl' % round(cls, -1)) - return string.strip('%i masl' %cls) - - -def summarize_obs(analysisdir, printfmt='html'): - """*************************************************************************************** - Call example: - - python summarize_obs.py - - Option printfmt : [tex,scr,html] print summary table in latex, terminal, or html format - - Other options are all those needed to create a dacycle object - - OR: - - call directly from a python script as: - - q=summarize_obs(dacycle,printfmt='html') - - ***************************************************************************************""" - - sumdir = os.path.join(analysisdir, 'summary') - if not os.path.exists(sumdir): - logging.info("Creating new directory " + sumdir) - os.makedirs(sumdir) - - mrdir = os.path.join(analysisdir, 'data_molefractions') - if not os.path.exists(mrdir): - logging.error("Input directory does not exist (%s), exiting... " % mrdir) - return None - - mrfiles = os.listdir(mrdir) - infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')] - - if printfmt == 'tex': - print '\\begin{tabular*}{\\textheight}{l l l l r r r r}' - print 'Code & Name & Lat, Lon, Elev & Lab & N (flagged) & $\\sqrt{R}$ &Inn \\XS &Bias\\\\' - print '\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] ' - fmt = '%8s & ' + ' %55s & ' + '%20s &' + '%6s &' + ' %4d (%d) & ' + ' %5.2f & ' + ' %5.2f & ' + '%+5.2f \\\\' - elif printfmt == 'html': - tablehead = \ - "<TR>\n <TH> Site code </TH> \ - <TH> Sampling Type </TH> \ - <TH> Lab. </TH> \ - <TH> Country </TH> \ - <TH> Lat, Lon, Elev. (m ASL) </TH> \ - <TH> No. Obs. Available </TH> \ - <TH> No. Obs. Assimilated </TH> \ - <TH> √R (μmol mol<sup>-1</sup>) </TH> \ - <TH> √HPH (μmol mol<sup>-1</sup>) </TH> \ - <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \ - <TH> H(x)-y (JJAS) (μmol mol<sup>-1</sup>) </TH> \ - <TH> H(x)-y (NDJFMA) (μmol mol<sup>-1</sup>) </TH> \ - <TH> Inn. Χ<sup>2</sup></TH> \ - <TH> Site code </TH>\n \ - </TR>\n" - - fmt = """<TR> \n \ - <TD><a href='javascript:LoadCO2Tseries("%s")'>%s </a></TD>\ - <TD>%s</TD>\ - <TD>%s</TD>\ - <TD>%40s</TD>\ - <TD>%s</TD>\ - <TD>%d</TD>\ - <TD>%d</TD>\ - <TD>%+5.2f</TD>\ - <TD>%+5.2f</TD>\ - <TD>%+5.2f±%5.2f</TD>\ - <TD>%+5.2f±%5.2f</TD>\ - <TD>%+5.2f±%5.2f</TD>\ - <TD bgcolor=%s>%+5.2f</TD>\ - <TD>%s</TD>\n \ - </TR>\n""" - elif printfmt == 'scr': - print 'Code Site NObs flagged R Inn X2' - fmt = '%8s ' + ' %55s %s %s' + ' %4d ' + ' %4d ' + ' %5.2f ' + ' %5.2f' - - table = [] - - for infile in infiles: - #if not 'mlo_surface-insitu' in infile: continue - #if not 'poc' in infile: continue - logging.debug( infile ) - f = io.CT_CDF(infile, 'read') - date = f.get_variable('time') - obs = f.get_variable('value') * 1e6 - mdm = f.get_variable('modeldatamismatch') * 1e6 - simulated_fc = f.get_variable('modelsamplesmean_forecast') * 1e6 - simulated = f.get_variable('modelsamplesmean') * 1e6 - simulated_std = f.get_variable('modelsamplesstandarddeviation_forecast') * 1e6 - hphtr = f.get_variable('totalmolefractionvariance_forecast') * 1e6 * 1e6 - flag = f.get_variable('flag_forecast') - obs_avail = len(np.ma.compressed(mdm)) - - pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date]) - - sampled = (np.ma.getmaskarray(simulated) == False) - - pydates = pydates.compress(sampled) - simulated = simulated.compress(sampled) - simulated_fc = simulated_fc.compress(sampled) - obs = obs.compress(sampled) - mdm = mdm.compress(sampled) - hphtr = hphtr.compress(sampled) - flag = flag.compress(sampled) - - if f.site_code.upper() == 'POC': - pydates = pydates.compress(simulated > -9000) - simulated_fc = simulated_fc.compress(simulated > -9000) - obs = obs.compress(simulated > -9000) - mdm = mdm.compress(simulated > -9000) - hphtr = hphtr.compress(simulated > -9000) - flag = flag.compress(simulated > -9000) - simulated = simulated.compress(simulated > -9000) - - if mdm.mean() > 900: - pydates = pydates.compress(flag != 2) - simulated_fc = simulated_fc.compress(flag != 2) - simulated = simulated.compress(flag != 2) - obs = obs.compress(flag != 2) - mdm = mdm.compress(flag != 2) - hphtr = hphtr.compress(flag != 2) - obs_assim = 0 - else: - pydates = pydates.compress(flag == 0) - simulated_fc = simulated_fc.compress(flag == 0) - simulated = simulated.compress(flag == 0) - obs = obs.compress(flag == 0) - mdm = mdm.compress(flag == 0) - hphtr = hphtr.compress(flag == 0) - obs_assim = len(np.ma.compressed(mdm)) - - summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ] - winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ] - - diff = ((simulated - obs).mean()) - diffsummer = ((simulated - obs).take(summer).mean()) - diffwinter = ((simulated - obs).take(winter).mean()) - diffstd = ((simulated - obs).std()) - diffsummerstd = ((simulated - obs).take(summer).std()) - diffwinterstd = ((simulated - obs).take(winter).std()) - #chi_summer = ((simulated - obs)**2/mdm).take(summer).mean() - #chi_winter = ((simulated - obs)**2/mdm).take(winter).mean() - #n_summer = simulated.take(summer).shape[0] - #n_winter = simulated.take(winter).shape[0] - #print 'summer: %0.2f, %0.2f, %0.2f, %i'%(diffsummer,diffsummerstd,chi_summer,n_summer) - #print 'winter: %0.2f, %0.2f, %0.2f, %i'%(diffwinter,diffwinterstd,chi_winter,n_winter) - chi_sq = -99 - if mdm.mean() < 900: - chi_sq = ((simulated_fc - obs)**2/hphtr).mean() - #chi_sq = ((simulated - obs)**2/mdm).mean() - if mdm.mean() > 900: - chi_clr = '#EEEEEE' - elif chi_sq > 1.2: - chi_clr = '#ff0000' - elif chi_sq < 0.5: - chi_clr = '#ff7f00' - else: chi_clr = '#00cc00' - - if abs(f.site_latitude) > 9999: - location = 'Variable' - else:location = nice_lat(f.site_latitude,'html') + ', ' + nice_lon(f.site_longitude,'html') + ', ' + nice_alt(f.site_elevation) - if 'site_country' in f.ncattrs(): - country = f.site_country - else: country = 'Multiple' - - if printfmt == 'html': - ss = (f.dataset_name[4:], - f.site_code.upper(), - f.dataset_project, - f.lab_1_abbr, - country, - location, - obs_avail, - obs_assim, - mdm.mean(), - np.sqrt((simulated_std ** 2).mean()), - diff, diffstd, - diffsummer, diffsummerstd, - diffwinter, diffwinterstd, - chi_clr, chi_sq, - f.site_code.upper()) - - table.append(ss) - f.close() - - if printfmt == 'tex': - saveas = os.path.join(sumdir, 'site_table.tex') - f = open(saveas, 'w') - elif printfmt == 'html': - saveas = os.path.join(sumdir, 'site_table.html') - f = open(saveas, 'w') - txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" - f.write(txt) - txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" - f.write(txt) - - f.write(tablehead) - - for i, ss in enumerate(table): - - f.write(fmt % ss) - if (i + 1) % 15 == 0: - f.write(tablehead) - - if printfmt == 'tex': - f.write('\cline{2-8}\\\\') - f.write('\hline \\\\') - f.write('\end{tabular*}') - else: - txt = "\n</table>" - f.write(txt) - f.close() - - logging.info("File written with summary: %s" % saveas) - -def make_map(analysisdir): #makes a map of amount of assimilated observations per site - import netCDF4 as cdf - import matplotlib.pyplot as plt - import matplotlib - from maptools import * - from matplotlib.font_manager import FontProperties - - sumdir = os.path.join(analysisdir, 'summary') - if not os.path.exists(sumdir): - logging.info("Creating new directory " + sumdir) - os.makedirs(sumdir) - - mrdir = os.path.join(analysisdir, 'data_molefractions') - if not os.path.exists(mrdir): - logging.error("Input directory does not exist (%s), exiting... " % mrdir) - return None - - mrfiles = os.listdir(mrdir) - infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')] - - lats=[] - lons=[] - labs=[] - nobs=[] - for files in infiles: - f=cdf.Dataset(files) - if f.variables['modeldatamismatch'][:].max() < 0.001: - sim = f.variables['modelsamplesmean'][:] - flag = f.variables['flag_forecast'][:] - sim = sim.compress(flag != 2) - sampled = (np.ma.getmaskarray(sim) == False) - sim = sim.compress(sampled) - lats.append(f.site_latitude) - lons.append(f.site_longitude) - labs.append(f.site_code) - nobs.append(len(sim)) - f.close() - - lats = np.array(lats) - lons = np.array(lons) - labs = np.array(labs) - nobs = np.array(nobs) - - saveas = os.path.join(sumdir, 'networkmap') - logging.info("Making map: %s" % saveas) - - fig = plt.figure(1,figsize=(20,12)) - ax = fig.add_axes([0.05,0.1,0.9,0.8]) - #m,nx,ny = select_map('Global Cylinder') - m,nx,ny = select_map('Europe Conformal') - m.drawcountries() - m.drawcoastlines() - parallels = arange(-90.,91,30.) - m.drawparallels(parallels,color='grey',linewidth=0.5,dashes=[1,0.001],labels=[1,0,0,1],fontsize=16) - meridians = arange(-180.,181.,60.) - m.drawmeridians(meridians,color='grey',linewidth=0.5,dashes=[1,0.001],labels=[1,0,0,1],fontsize=16) - - #for lon,lat,name,n in zip(lons,lats,names,nobs): - count = 0 - for i in range(len(lats)): - if nobs[i] < 250: - n = 0 - c = 'blue' - elif nobs[i] < 500: - n = 1 - c = 'green' - elif nobs[i] < 750: - n = 2 - c = 'orange' - elif nobs[i] < 1000: - n = 3 - c = 'brown' - else: - n = 4 - c = 'red' - if lons[i] > -900: - x,y = m(lons[i],lats[i]) - ax.plot(x,y,'o',color=c,markersize=12+1.5*n)#,markeredgecolor='k',markeredgewidth=2) - #ax.annotate(labs[i],xy=m(lons[i],lats[i]),xycoords='data',fontweight='bold') - else: - x,y = m(169,87-count) - ax.plot(x,y,'o',color=c,markersize=12+1.5*n) - ax.annotate(labs[i],xy=m(172,86-count),xycoords='data',fontweight='bold') - count = count + 4 - - fig.text(0.15,0.945,u'\u2022',fontsize=35,color='blue') - fig.text(0.16,0.95,': N<250',fontsize=24,color='blue') - fig.text(0.30,0.94,u'\u2022',fontsize=40,color='green') - fig.text(0.31,0.95,': N<500',fontsize=24,color='green') - fig.text(0.45,0.94,u'\u2022',fontsize=45,color='orange') - fig.text(0.46,0.95,': N<750',fontsize=24,color='orange') - fig.text(0.60,0.939,u'\u2022',fontsize=50,color='brown') - fig.text(0.61,0.95,': N<1000',fontsize=24,color='brown') - fig.text(0.75,0.938,u'\u2022',fontsize=55,color='red') - fig.text(0.765,0.95,': N>1000',fontsize=24,color='red') - ax.set_title('Assimilated observations',fontsize=24) - - font0= FontProperties(size=15,style='italic',weight='bold') - txt='CarbonTracker Europe\n $\copyright$ Wageningen University' - clr='green' - fig.text(0.82,0.01,txt,ha='left',font_properties = font0, color=clr ) - saveas=os.path.join(sumdir,'networkmap.png') - fig.savefig(saveas,dpi=200) - saveas=os.path.join(sumdir,'networkmap.large.png') - fig.savefig(saveas,dpi=300) - close(fig) - -def summarize_stats(dacycle): - """ - Summarize the statistics of the observations for this cycle - This includes X2 statistics, RMSD, and others for both forecast and - final fluxes - """ - - - sumdir = os.path.join(dacycle['dir.analysis'], 'summary') - if not os.path.exists(sumdir): - logging.info("Creating new directory " + sumdir) - os.makedirs(sumdir) - - # get forecast data from optimizer.ddddd.nc - - startdate = dacycle['time.start'] - dacycle['time.sample.stamp'] = "%s" % (startdate.strftime("%Y%m%d"),) - infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % dacycle['time.sample.stamp']) - - if not os.path.exists(infile): - logging.error("File not found: %s" % infile) - raise IOError - - f = io.CT_CDF(infile, 'read') - sites = f.get_variable('sitecode') - y0 = f.get_variable('observed') * 1e6 - hx = f.get_variable('modelsamplesmean_prior') * 1e6 - dF = f.get_variable('modelsamplesdeviations_prior') * 1e6 - HPHTR = f.get_variable('totalmolefractionvariance').diagonal() * 1e6 * 1e6 - R = f.get_variable('modeldatamismatchvariance').diagonal() * 1e6 * 1e6 - flags = f.get_variable('flag') - f.close() - - HPHT = dF.dot(np.transpose(dF)).diagonal() / (dF.shape[1] - 1.0) - rejected = (flags == 2.0) - - sitecodes = [string.join(s.compressed(), '').strip() for s in sites] - - - # calculate X2 per observation for this time step - - x2 = [] - for i, site in enumerate(sitecodes): - - x2.append((y0[i] - hx[i]) ** 2 / HPHTR[i]) - - x2 = np.ma.masked_where(HPHTR == 0.0, x2) - - # calculate X2 per site - saveas = os.path.join(sumdir, 'x2_table_%s.html' % dacycle['time.sample.stamp']) - logging.info("Writing HTML tables for this cycle (%s)" % saveas) - f = open(saveas, 'w') - txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" - f.write(txt) - txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" - f.write(txt) - tablehead = \ - "<TR>\n <TH> Site code </TH> \ - <TH> N<sub>obs</sub> </TH> \ - <TH> N<sub>rejected</sub> </TH> \ - <TH> √R (μmol mol<sup>-1</sup>) </TH> \ - <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ - <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ - <TH> X2 </TH> \n \ - </TR>\n" - - fmt = """<TR> \n \ - <TD>%s</TD>\ - <TD>%d</TD>\ - <TD>%d</TD>\ - <TD>%+5.2f</TD>\ - <TD>%+5.2f</TD>\ - <TD>%+5.2f±%5.2f</TD>\ - <TD>%5.2f</TD>\n \ - </TR>\n""" - - f.write(tablehead) - - set_sites = set(sitecodes) - set_sites = np.sort(list(set_sites)) - - for i, site in enumerate(set_sites): - sel = [i for i, s in enumerate(sitecodes) if s == site] - ss = (site, len(sel), rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx - y0).take(sel).mean(), (hx - y0).take(sel).std(), x2.take(sel).mean(),) - #print site,sel,x2.take(sel) - - f.write(fmt % ss) - if (i + 1) % 15 == 0: - f.write(tablehead) - - txt = "\n</table>" - f.write(txt) - f.close() - - # Now summarize for each site across time steps - - if not dacycle['time.start'] >= dt.datetime(2008, 12, 29): - return - - logging.info("Writing HTML tables for each site") - for site in set_sites: - saveas = os.path.join(sumdir, '%s_x2.html' % site) - f = open(saveas, 'w') - logging.debug(saveas) - txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n" - f.write(txt) - txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n" - f.write(txt) - tablehead = \ - "<TR>\n <TH> From File </TH> \ - <TH> Site </TH> \ - <TH> N<sub>obs</sub> </TH> \ - <TH> N<sub>rejected</sub> </TH> \ - <TH> √R (μmol mol<sup>-1</sup>) </TH> \ - <TH> √HPH<sup>T</sup> (μmol mol<sup>-1</sup>) </TH> \ - <TH> H(x)-y (μmol mol<sup>-1</sup>) </TH> \n \ - <TH> X2 </TH> \n \ - </TR>\n" - f.write(tablehead) - - files = os.listdir(sumdir) - x2_files = [fil for fil in files if fil.startswith('x2')] - for htmlfile in x2_files: - lines = grep(site, os.path.join(sumdir, htmlfile)) - for line in lines: - f.write('<TR>\n') - f.write('<TD>' + htmlfile + '</TD>') - f.write(line + '\n') - f.write('</TR>\n') - - txt = "\n</table>" - f.write(txt) - f.close() - - -def grep(pattern, fil): - fileObj = open(fil, 'r') - r = [] - for line in fileObj: - if re.search(pattern, line): - r.append(line) - return r - -# main body if called as script - -if __name__ == '__main__': # started as script - - sys.path.append('../../') - - logging.root.setLevel(logging.DEBUG) - analysisdir = "/Users/ingrid/mnt/promise/CO2/ingrid/carbontracker/cartesius/gcp2-combined/analysis/" - - summarize_obs(analysisdir) - #make_map(analysisdir) - - sys.exit(0) - - diff --git a/da/analysis/time_avg_fluxes.py.bak b/da/analysis/time_avg_fluxes.py.bak deleted file mode 100755 index 0d3a0146491350f6639e548c1b4d2ae47aa610d9..0000000000000000000000000000000000000000 --- a/da/analysis/time_avg_fluxes.py.bak +++ /dev/null @@ -1,255 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# time_avg_fluxes.py - -""" -Author : peters - -Revision History: -File created on 20 Dec 2012. - -""" -import sys -sys.path.append('../../') -import os -import sys -import shutil -from dateutil.relativedelta import relativedelta -import datetime -import subprocess - -def time_avg(dacycle,avg='transcom'): - """ Function to create a set of averaged files in a folder, needed to make longer term means """ - - if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']: - raise IOError,'Choice of averaging invalid' - - analysisdir = dacycle['dir.analysis'] - - if not os.path.exists(analysisdir): - raise IOError,'analysis dir requested (%s) does not exist, exiting...'%analysisdir - - daily_avg(dacycle,avg) - - monthly_avg(dacycle,avg) - - yearly_avg(dacycle,avg) - - longterm_avg(dacycle,avg) - -def new_month(dacycle): - """ check whether we just entered a new month""" - - this_month = dacycle['time.start'].month - prev_month = (dacycle['time.start']-dacycle['cyclelength']).month - - return (this_month != prev_month) - -def new_year(dacycle): - """ check whether we just entered a new year""" - - this_year = dacycle['time.start'].year - prev_year = (dacycle['time.start']-dacycle['cyclelength']).year - - return (this_year != prev_year) - -def daily_avg(dacycle,avg): - """ Function to create a set of daily files in a folder, needed to make longer term means """ - - if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']: - raise IOError,'Choice of averaging invalid' - - analysisdir = dacycle['dir.analysis'] - weekdir = os.path.join(analysisdir , 'data_%s_weekly'%avg) - daydir = os.path.join(analysisdir , 'data_%s_daily'%avg) - - if not os.path.exists(daydir): - print "Creating new output directory " + daydir - os.makedirs(daydir) - - files = os.listdir(weekdir) - files = [f for f in files if '-' in f and f.endswith('.nc')] - - fileinfo = {} - for filename in files: - date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d') - fileinfo[filename] = date - - dt = dacycle['cyclelength'] - - for k,v in fileinfo.iteritems(): - cycle_file = os.path.join(weekdir,k) - for i in range(abs(dt.days)): - daily_file = os.path.join(daydir,'%s_fluxes.%s.nc'%(avg,(v+datetime.timedelta(days=i)).strftime('%Y-%m-%d'))) - if not os.path.lexists(daily_file): - os.symlink(cycle_file,daily_file) - #print daily_file,cycle_file - -def monthly_avg(dacycle,avg): - """ Function to average a set of files in a folder from daily to monthly means """ - - if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']: - raise IOError,'Choice of averaging invalid' - - analysisdir = dacycle['dir.analysis'] - - daydir = os.path.join(analysisdir , 'data_%s_daily'%avg) - monthdir = os.path.join(analysisdir,'data_%s_monthly'%avg) - - if not os.path.exists(monthdir): - print "Creating new output directory " + monthdir - os.makedirs(monthdir) - - - files = os.listdir(daydir) # get daily files - files = [f for f in files if '-' in f and f.endswith('.nc')] - - if len(files) < 28: - print 'No month is yet complete, skipping monthly average' - return - - fileinfo = {} - for filename in files: # parse date from each of them - date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d') - fileinfo[filename] = date - - years = [d.year for d in fileinfo.values()] # get actual years - months = set([d.month for d in fileinfo.values()]) # get actual months - - sd = datetime.datetime(min(years),1,1) - ed = datetime.datetime(max(years)+1,1,1) - - while sd < ed: - - nd = sd + relativedelta(months=+1) - - ndays_in_month = (nd-sd).days - - avg_files = [os.path.join(daydir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd] - - if len(avg_files) != ndays_in_month: # only once month complete - #print 'New month (%02d) is not yet complete, skipping monthly average'%(sd.month) - pass - else: - targetfile = os.path.join(monthdir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y-%m'))) - if not os.path.exists(targetfile): - print "New month (%02d) is complete, I have %d days for the next file"%(sd.month,ndays_in_month) - command = ['ncra','-O']+ avg_files + [targetfile] - status = subprocess.check_call(command) - else: - pass - - sd = nd - -def yearly_avg(dacycle,avg): - """ Function to average a set of files in a folder from monthly to yearly means """ - - if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']: - raise IOError,'Choice of averaging invalid' - - analysisdir = dacycle['dir.analysis'] - monthdir = os.path.join(analysisdir , 'data_%s_monthly'%avg ) - yeardir = os.path.join(analysisdir,'data_%s_yearly'%avg) - - if not os.path.exists(yeardir): - print "Creating new output directory " + yeardir - os.makedirs(yeardir) - - files = os.listdir(monthdir) # get monthly files - files = [f for f in files if '-' in f and f.endswith('.nc')] - - if not files: - print "No full year finished yet, skipping yearly average..." - return - - fileinfo = {} - for filename in files: - date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m') - fileinfo[filename] = date - - years = set([d.year for d in fileinfo.values()]) - - sd = datetime.datetime(min(years),1,1) - ed = datetime.datetime(max(years)+1,1,1) - - while sd < ed: - - nd = sd + relativedelta(years=+1) - - avg_files = [os.path.join(monthdir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd] - - if not len(avg_files) == 12 : - print "Year %04d not finished yet, skipping yearly average..."%sd.year - else: - targetfile = os.path.join(yeardir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y'))) - - if not os.path.exists(targetfile): - print "Year %04d is complete, I have 12 months for the next file"%sd.year - command = ['ncra','-O']+ avg_files + [targetfile] - status = subprocess.check_call(command) - - sd = nd - -def longterm_avg(dacycle,avg): - """ Function to average a set of files in a folder from monthly to yearly means """ - - if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']: - raise IOError,'Choice of averaging invalid' - - analysisdir = dacycle['dir.analysis'] - - yeardir = os.path.join(analysisdir , 'data_%s_yearly'%avg ) - longtermdir = os.path.join(analysisdir,'data_%s_longterm'%avg) - - if not os.path.exists(longtermdir): - print "Creating new output directory " + longtermdir - os.makedirs(longtermdir) - - files = os.listdir(yeardir) - files = [f for f in files if '-' in f and f.endswith('.nc')] - - if not files: - print "No full year finished yet, skipping longterm average..." - return - - dates = [] - for filename in files: - date=datetime.datetime.strptime(filename.split('.')[-2],'%Y') - dates.append( date ) - - avg_files = [os.path.join(yeardir,k) for k in files] - - if len(avg_files) > 0 : - command = ['ncra','-O']+ avg_files + [os.path.join(longtermdir,'%s_fluxes.%04d-%04d.nc'%(avg,min(dates).year, max(dates).year))] - status = subprocess.check_call(command) - -if __name__ == "__main__": - - from da.tools.initexit import CycleControl - - sys.path.append('../../') - - dacycle = CycleControl(args={'rc':'../../ctdas-ei-nobcb-zoom-ecoregions.rc'}) - dacycle.setup() - dacycle.parse_times() - - while dacycle['time.end'] < dacycle['time.finish']: - time_avg(dacycle,avg='flux1x1') - time_avg(dacycle,avg='transcom') - time_avg(dacycle,avg='transcom_extended') - time_avg(dacycle,avg='olson') - time_avg(dacycle,avg='olson_extended') - time_avg(dacycle,avg='country') - dacycle.advance_cycle_times() - diff --git a/da/analysis/tools_country.py.bak b/da/analysis/tools_country.py.bak deleted file mode 100755 index 00943548f44eb5b85aed957a394dea8031ad2a4f..0000000000000000000000000000000000000000 --- a/da/analysis/tools_country.py.bak +++ /dev/null @@ -1,233 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# tools_country.py - -""" -Author : peters - -Revision History: -File created on 25 Jul 2008. - -This module provides an interface to go from arrays of gridded data to aggregates for countries. -It uses the country information from the UNEP database, and the database created by them for this purpose. See: - -http://na.unep.net/metadata/unep/GRID/GRIDCTRY.html - -The module sets up a class 'countryinfo' that holds a number of attributes. These attributes can be used to -extract gridded information about the country. A build-in method agg_1x1 is provided for convencience. -The routine get_countrydict() creates a dictionary with this information for a large number of countries. - -CAUTION: - -The country data only covers the land areas of a nation and aggregation will exclude the fraction of a land covered -by oceans, sea, or open water. The aggregation will thus work best on arrays that are *not* defined by unit area! - -""" -import sys -import cPickle -import os -sys.path.append('../../') -rootdir = os.getcwd().split('da/')[0] -analysisdir = os.path.join(rootdir, 'da/analysis') - -from numpy import sum, array -try: - from dbfpy import dbf -except: - print "the python DBF lib might be needed, please install from:" - print "http://dbfpy.sourceforge.net/" - print " Trying to complete anyway..." - -sys.path.append('../../') -from da.analysis.tools_regions import globarea - -EU25 = ["Austria", "Belgium", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Slovakia", "Slovenia", "Spain", "Sweden", "United Kingdom"] -EU27 = ["Austria", "Belgium", "Bulgaria", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden", "United Kingdom"] -G8 = [ "France", "Germany", "Italy", "Japan", "United Kingdom", "United States"] -annex1 = [ "Australia", "Austria", "Belarus", "Belgium", "Bulgaria", "Canada", "Croatia", "Czech Republic", "Denmark", "European Union", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Japan", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Monaco", "Netherlands", "New Zealand", "Norway", "Poland", "Portugal", "Romania", "Russia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "Ukraine", "United Kingdom", "United States"] -annex2 = [ "Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland", "France", "Germany", "Greece", "Iceland", "Ireland", "Italy", "Japan", "Luxembourg", "Netherlands", "New Zealand", "Norway", "Portugal", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States"] - -class countryinfo(object): - """ Defines the information about 1x1 gridboxes covering each country """ - - def __init__(self, code): - self.country = code - self.ngrids = 0 - self.gridij = [] - self.gridnr = [] - self.gridcoords = [] - - self.gridlandmask = [] - - self.gridsharedocean = [] - self.gridsharedborder = [] - - def add_gridinfo(self, grid_i, grid_j, gridlandmask, shared_border, shared_water): - """ add information from one gridbox to the object """ - - self.gridij.append((grid_j, grid_i,)) # add tuple with j,i coordinates - lon = -180 + grid_i + 0.5 - lat = -90 + grid_j + 0.5 - self.gridcoords.append((lat, lon,)) # add tuple with lon,lat coordinates - self.gridnr.append(grid_j * 360 + grid_i) # add grid number for take() function - - self.gridlandmask.append(gridlandmask) # this gives the total fraction of land - - self.gridsharedocean.append(shared_water) - self.gridsharedborder.append(shared_border) - self.ngrids = self.ngrids + 1 - - def agg_1x1(self, field): - """ aggregate a 1x1 input field to country total """ - - #print field.take(self.gridnr) - #print self.gridlandmask - - return (field.take(self.gridnr) * array(self.gridlandmask)).sum() - - def __str__(self): - return ' Country name : %s\n' % self.country + \ - ' Number of gridpoints : %d ' % self.ngrids - #' Gridpoint indices : %s ' % self.gridnr - -def fix_eu(rec): - """ fix Czech Republic and Slovakia manually """ - - alternative_slov = { - '140202': (2.0, 28.1), \ - '140201': (2.0, 34.0), \ - '140200': (2.0, 44.0), \ - '140199': (3.0, 25.5), \ - '139203': (3.0, 18.9), \ - '139202': (2.0, 57.5), \ - '139201': (2.0, 59.7), \ - '139200': (2.0, 87.2), \ - '139199': (1.0, 100.0), \ - '139198': (2.0, 72.8), \ - '138198': (3.0, 7.7), \ - '138199':(2.0, 10.0) } - - alternative_czech = { - '141193': (2.0, 23.0), \ - '141194': (2.0, 62.1), \ - '141195': (3.0, 89.5), \ - '141196': (2.0, 79.4), \ - '141197': (2.0, 42.3), \ - '141198': (2.0, 24.5), \ - '141199': (2.0, 0.1), \ - '140193': (2.0, 20.6), \ - '140194': (2.0, 88.9), \ - '140195': (1.0, 100.0), \ - '140196': (1.0, 100.0), \ - '140197': (1.0, 100.0), \ - '140198': (1.0, 100.0), \ - '140199': (3.0, 50.0), \ - '139195': (2.0, 70.6), \ - '139196': (2.0, 12.4), \ - '139197': (2.0, 30.9), \ - '139198': (2.0, 25.0) } - - id = str(int(rec['GRID'])) - for dict in [alternative_slov, alternative_czech]: - if id in dict: - rec['COVER_ID'] = dict[id][0] - rec['RATE_IN_GR'] = dict[id][1] - - return rec - -def get_countrydict(): - """ Create a dictionary with grid-to-country information from a dbf file""" - - countrydict = countryinfo('Test') - - file = os.path.join(analysisdir,'country_dictionary.dat') - - try: - countrydict = cPickle.load(open(file, 'rb')) - except: - db = dbf.Dbf(os.path.join(analysisdir,'GRIDCTRY.DBF')) - - countrydict = {} - for n, rec in enumerate(db): - code = rec['COUNTRY'] - gridid = str(int(rec['GRID'])) - - if code in ['Czech Republic', 'Slovakia']: - rec = fix_eu(rec) - - rate_in_gr = rec['RATE_IN_GR'] * 1.e-2 - - i = int(gridid[-3::]) - j = int(gridid[0:-3]) - lat = -91 + j + 0.5 - lon = -181 + i + 0.5 - if code in countrydict: - a = countrydict[code] - else: - a = countryinfo(code) - - - shared_border = False - shared_water = False - if rec['COVER_ID'] == 0.0: - shared_border = False - shared_water = True - if rec['COVER_ID'] >= 2.0: - shared_border = True - if rec['COVER_ID'] >= 10.0: - shared_water = True - - a.add_gridinfo(i - 1, j - 1, rate_in_gr, shared_border, shared_water) - - countrydict[code] = a - - db.close() - - cPickle.dump(countrydict, open(file, 'wb'), -1) - - return countrydict - -if __name__ == "__main__": - - countrydict = get_countrydict() - - area = globarea() - - areas = [] - for k, v in countrydict.items(): - ar = v.agg_1x1(area) / 1.e6 - areas.append((ar, k)) - - areas.sort() - areas.reverse() - for a in areas: print a - - v = countrydict['Ocean'] - print v.agg_1x1(area) - v = countrydict['Netherlands'] - print v.agg_1x1(area) - v = countrydict['Slovakia'] - print v.agg_1x1(area) - v = countrydict['Czech Republic'] - print v.agg_1x1(area) - v = countrydict['Czechoslovakia'] - print v.agg_1x1(area) - - - - - - - - diff --git a/da/analysis/tools_regions.py.bak b/da/analysis/tools_regions.py.bak deleted file mode 100755 index 21e4c8a5db4d7b339a8fef0c20bb7ae74a3a6b4f..0000000000000000000000000000000000000000 --- a/da/analysis/tools_regions.py.bak +++ /dev/null @@ -1,102 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python - -import numpy as np -import cPickle -from da.analysis.tools_transcom import * - -# Aggregated olson ecosystem regions for CT Europe - -aggregates = { - "Forest" : (1, 2, 3, 5, 8, 10,) , \ - "Grass" : (4, 6, 13) , \ - "Crops": (14,) , \ - "Tundra" : (7, 9, 16) , \ - "Coastal" : (11, 15, 17) , \ - "IceWaterDeserts" : (12, 18, 19) \ -} - -ext_econams = [a for a, b in aggregates.iteritems()] -ext_ecocomps = [b for a, b in aggregates.iteritems()] - -eco19_to_ecosums = zeros((19, 6), float) -for i, k in enumerate(ext_ecocomps): - indices = [x - 1 for x in k] - eco19_to_ecosums[:, i].put(indices, 1.0) - -##### END OF REGION DEFINITIONS - -def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None): - """ - This method converts parameters from a CarbonTracker StateVector object to a gridded map of linear multiplication values. These - can subsequently be used in the transport model code to multiply/manipulate fluxes - - """ - nregions = regionmap.max() - try: - if not mapname: - raise Exception - - regionselect = cPickle.load(open('%s_regiondict.pickle' % mapname, 'rb')) - except: - - # dictionary for region <-> map conversions - regs = {} - for r in np.arange(1, nregions + 1): - sel = (regionmap.flat == r).nonzero() - if len(sel[0]) > 0: - regs[r] = sel - - regionselect = regs - - cPickle.dump(regionselect, open('%s_regiondict.pickle' % mapname, 'wb'), -1) - print 'Pickling region map' - - if reverse: - """ project 1x1 degree map onto ecoregions """ - - result = np.zeros(nregions, float) - for k, v in regionselect.iteritems(): - if avg: - result[k - 1] = values.ravel().take(v).mean() - else : - result[k - 1] = values.ravel().take(v).sum() - return result - - else: - """ project ecoregion properties onto 1x1 degree map """ - - result = np.zeros((180, 360,), float) - for k, v in regionselect.iteritems(): - result.put(v, values[k - 1]) - - return result - -def globarea(im=360, jm=180, silent=True): - """ Function calculates the surface area according to TM5 definitions""" - - radius = 6.371e6 # the earth radius in meters - deg2rad = np.pi / 180. - g = 9.80665 - - dxx = 360.0 / im * deg2rad - dyy = 180.0 / jm * deg2rad - lat = np.arange(-90 * deg2rad, 90 * deg2rad, dyy) - dxy = dxx * (np.sin(lat + dyy) - np.sin(lat)) * radius ** 2 - area = np.resize(np.repeat(dxy, im, axis=0) , [jm, im]) - if not silent: - print 'total area of field = ', np.sum(area.flat) - print 'total earth area = ', 4 * np.pi * radius ** 2 - return area - diff --git a/da/analysis/tools_time.py.bak b/da/analysis/tools_time.py.bak deleted file mode 100755 index a8e8afbf7f25232d65daed318646d6bb37ab32ac..0000000000000000000000000000000000000000 --- a/da/analysis/tools_time.py.bak +++ /dev/null @@ -1,345 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#! /usr/bin/env python -import sys -import calendar -import copy -from datetime import datetime, timedelta -from da.tools.general import date2num, num2date -from numpy import array, zeros, newaxis, logical_and, arange - - -def Fromdatetime(date): - dt = date.timetuple() - return datetime(*dt[0:6]) - -def increase_time(dd, **kwargs): - """ Function increases the time by specified amount""" - return dd + timedelta(**kwargs) - -def chardate(dd, cut=8): - return dd.strftime('%Y%m%d%H%M')[0:cut] - -def timegen(sd, ed, dt): - dd = [] - while sd <= ed: - dd.append(Fromdatetime(sd)) - sd = sd + dt - return dd - -def itau2datetime(itau, iyear0): - """ Function returns a datetime object from TM5s itau times""" - date0 = datetime(iyear0, 1, 1, 0, 0, 0) - if len(itau) == 1: - itau = [itau] - for time in itau: - sec = time % 60 - min = (time / 60) % 60 - hrs = (time / 3600) % 24 - day = (time / 86400) - dt = timedelta(days=day, hours=hrs, minutes=min, seconds=sec) - yield date0 + dt - -def date2dec(date): - """ Function converts datetime object to a Decimal number time (e.g., 1991.875 such as in IDL, CCG) """ - if not isinstance(date, list): date = [date] - - newdate = [] - for dd in date: - Days0 = date2num(datetime(dd.year, 1, 1)) - if calendar.isleap(dd.year): - DaysPerYear = 366. - else: - DaysPerYear = 365. - DayFrac = date2num(dd) - newdate.append(dd.year + (DayFrac - Days0) / DaysPerYear) - if len(newdate) == 1: return newdate[0] - return newdate - -def dec2date(dectime): - """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python datetime object """ - dt = num2date(dec2num(dectime)).timetuple() - return datetime(*dt[0:7]) - -def dec2num(dectime): - """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python decimal numtime """ - from pylab import floor, drange, num2date, date2num - if not isinstance(dectime, list): dectime = [dectime] - - newdectime = [] - for dd in dectime: - yr = floor(dd) - Days0 = date2num(datetime(int(yr), 1, 1)) - if calendar.isleap(yr): - DaysPerYear = 366. - else: - DaysPerYear = 365. - DayFrac = (dd - yr) * DaysPerYear - newdectime.append(Days0 + DayFrac) - if len(newdectime) == 1: return newdectime[0] - return newdectime - -def num2dec(numtime): - """ Function converts python decimal numtime to an IDL decimal time """ - from pylab import floor, drange, num2date, date2num - res = date2dec(num2mydate(numtime)) - return res - -def num2mydate(num): - """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python datetime object """ - dt = num2date(num).timetuple() - return datetime(*dt[0:7]) - -def monthgen(sd, ed): - """ Generate sequence of datetime objects spaced by one month""" - from pylab import arange - if ed < sd: - raise ValueError, 'start date exceeds end date' - sys.exit(2) - dates = [] - for year in arange(sd.year, ed.year + 2): - for month in arange(1, 13): - date = datetime(year, month, 1) - if date > ed: return dates - else: dates.append(date) - -def nextmonth(dd): - """ Find next 1st of the month following the date dd""" - - if dd.month == 12: - cc = dd.replace(year=dd.year + 1) - ee = cc.replace(month=1) - else: - ee = dd.replace(month=dd.month + 1) - ff = ee.replace(day=1) - return ff - -def in_interval(start, stop, times_in): - """ returns a list of fractions in time interval """ - times = copy.copy(times_in) - - interval = times[1] - times[0] - times.append(times[-1] + interval) # extend by one interval - times_filled = [times[0] + timedelta(days=d) for d in range((times[-1] - times[0]).days)] - - b = [] - in_int = 0.0 - for t in times_filled: # loop over days - if t in times[1:]: # if new interval starts - b.append(in_int) # add previous aggregate to output - in_int = 0.0 # reset counter - in_int += int(logical_and(t >= start, t < stop)) # count if in interval [start,stop > - b.append(in_int) - - if len(b) != len(times_in) : raise ValueError - - return b - -def yearly_avg(time, data, sdev=False): - """ make monthly average from array using rundat and data""" - - years = array([d.year for d in time]) - - aa = [] - ss = [] - tt = [] - dd = time[0] - ed = time[-1] - while dd <= ed: - ddnext = datetime(dd.year + 1, 1, 1) - weights = in_interval(dd, ddnext, time) - if len(weights) > 1: - weights = array(weights) - if weights.sum() > 0.0: - weights = weights / weights.sum() - else: - weights = weights - - if weights.shape[0] != data.shape[0]: - raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]) - - sel = (weights != 0.0).nonzero()[0] - #print sel,array(time).take(sel),dd,ddnext - if data.ndim == 1: - avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0) - std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0) - elif data.ndim == 2: - avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - elif data.ndim == 3: - avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - else: - raise ValueError, 'yearly_avg takes 1, 2, or 3d arrays only' - elif len(weights) == 1: - avg_data = data[0] - std_data = 0.0 - else: - continue # next year - - aa.append(avg_data) - ss.append(std_data) - tt.append(datetime(dd.year, 6, 15)) - - dd = ddnext - - aa = array(aa).squeeze() - ss = array(ss).squeeze() - time = tt - if len(tt) == 1: - aa = aa.reshape(1, *aa.shape) - ss = ss.reshape(1, *ss.shape) - if sdev: return time, aa, ss - else : return time, aa - -def monthly_avg(time, data, sdev=False): - """ make monthly average from array using rundat and data""" - - years = array([d.year for d in time]) - months = array([d.month for d in time]) - - mm = [] - ss = [] - tt = [] - dd = time[0] - ed = time[-1] - - while dd <= ed: - ddnext = nextmonth(dd) - weights = in_interval(dd, ddnext, time) - if len(weights) > 1: - weights = array(weights) - if weights.sum() > 0.0: - weights = weights / weights.sum() - else: - weights = weights - - if weights.shape[0] != data.shape[0]: - raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]) - - sel = (weights != 0.0).nonzero()[0] - #print sel,array(time).take(sel),dd,nextmonth(dd) - if data.ndim == 1: - avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0) - std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0) - elif data.ndim == 2: - avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - elif data.ndim == 3: - avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - else: - raise ValueError, 'monthly_avg takes 1, 2, or 3d arrays only' - elif len(weights) == 1: - avg_data = data[0] - std_data = 0.0 - else: - continue # next month - - mm.append(avg_data) - ss.append(std_data) - tt.append(datetime(dd.year, dd.month, 15)) - - dd = ddnext - - - mm = array(mm).squeeze() - ss = array(ss).squeeze() - time = tt - - if len(tt) == 1: - mm = mm.reshape(-1, *mm.shape) - ss = ss.reshape(-1, *ss.shape) - - if sdev: return time, mm, ss - else : return time, mm - -def season_avg(time, data, sdev=False): - """ make season average from array using rundat and data""" - - seas = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] - - mm = [] - ss = [] - tt = [] - dd = time[0] - ed = time[-1] - - while dd <= ed: - ddmid = nextmonth(dd) - ddnext = nextmonth(nextmonth(nextmonth(dd))) - weights = in_interval(dd, ddnext, time) - if len(weights) > 1: - weights = array(weights) - if weights.sum() > 0.0: - weights = weights / weights.sum() - else: - weights = weights - - if weights.shape[0] != data.shape[0]: - raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]) - - sel = (weights != 0.0).nonzero()[0] - #print sel,array(time).take(sel),dd,nextmonth(dd) - if data.ndim == 1: - avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0) - std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0) - elif data.ndim == 2: - avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - elif data.ndim == 3: - avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze() - std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze() - else: - raise ValueError, 'season_avg takes 1, 2, or 3d arrays only' - elif len(weights) == 1: - avg_data = data[0] - std_data = 0.0 - else: - continue # next month - - mm.append(avg_data) - ss.append(std_data) - tt.append(datetime(ddmid.year, ddmid.month, 15)) - - dd = ddnext - - - mm = array(mm).squeeze() - ss = array(ss).squeeze() - time = tt - - if len(tt) == 1: - mm = mm.reshape(-1, *mm.shape) - ss = ss.reshape(-1, *ss.shape) - - if sdev: return time, mm, ss - else : return time, mm - -def longterm_avg(time, data): - """ Create long term mean """ - - time_avg = num2date(date2num(time).mean()) - data_avg = data.mean(axis=0) - - return time_avg, data_avg - - - -if __name__ == '__main__': - #print monthgen(datetime(2000,1,1),datetime(2006,5,1)) - dd = datetime(2002, 3, 1) - print nextmonth(dd), dd - - - diff --git a/da/analysis/tools_transcom.py.bak b/da/analysis/tools_transcom.py.bak deleted file mode 100755 index 68287fb065644e5e8184b1c9bf13b5e8012c0da1..0000000000000000000000000000000000000000 --- a/da/analysis/tools_transcom.py.bak +++ /dev/null @@ -1,357 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python - -import os -import sys -sys.path.append('../../') -rootdir = os.getcwd().split('da/')[0] -analysisdir = os.path.join(rootdir, 'da/analysis') - -from string import join, split -from numpy import array, identity, zeros, arange, dot -import da.tools.io4 as io - -# Get masks of different region definitions - -matrix_file = os.path.join(analysisdir, 'copied_regions.nc') -cdf_temp = io.CT_CDF(matrix_file, 'read') -transcommask = cdf_temp.get_variable('transcom_regions') -if transcommask.max() < 23: - if 'transcom_regions_original' in cdf_temp.variables: - transcommask = cdf_temp.get_variable('transcom_regions_original') -olson240mask = cdf_temp.get_variable('regions') -olsonmask = cdf_temp.get_variable('land_ecosystems') -oifmask = cdf_temp.get_variable('ocean_regions') -dummy = cdf_temp.close() - -matrix_file = os.path.join(analysisdir, 'copied_regions_extended.nc') -cdf_temp = io.CT_CDF(matrix_file, 'read') -olson_ext_mask = cdf_temp.get_variable('regions') -dummy = cdf_temp.close() - -# Names and short names of TransCom regions - -transshort = [] -transnams = [] -transland = [] -temp = open(os.path.join(analysisdir, 't3_region_names'), 'r').readlines() -for line in temp: - items = line.split() - if items: - num, abbr, name = (items[0], items[1], join(items[2:]),) - transnams.append(name.strip('"')) - transshort.append(abbr) - if abbr.startswith('T'): - transland.append(name.strip('"')) - -transnams.append("Non-optimized") -transshort.append("I-NNOP") - -# Names and short names of Olson regions - -olsonnams = [] -olsonshort = [] -temp = open(os.path.join(analysisdir, 'olson19_region_names'), 'r').readlines() -for line in temp: - items = line.split() - if items: - num, abbr, name = (items[0], items[1], join(items[2:]),) - olsonnams.append(name.strip('"')) - olsonshort.append(abbr) - -olsonextnams = [] -matrix_file = os.path.join(analysisdir, 'copied_regions_extended.nc') -cdf_temp = io.CT_CDF(matrix_file, 'read') -keys = cdf_temp.ncattrs() -keys.sort() -for k in keys: - if 'Region' in k: - olsonextnams.append(getattr(cdf_temp, k)) -cdf_temp.close() - -ext_transnams = [] -ext_transshort = [] -ext_transcomps = [] - -# Get names of aggregated regions for post aggregation - -matrix_file = os.path.join(analysisdir, 'postagg_definitions.nc') -cdf_temp = io.CT_CDF(matrix_file, 'read') -xform = cdf_temp.get_variable('xform') -keys = cdf_temp.ncattrs() - -keys.sort() -for k in keys: - if 'longname' in k: - ext_transnams.append(getattr(cdf_temp, k)) - if 'shortname' in k: - ext_transshort.append(getattr(cdf_temp, k)) - if 'component' in k: - ext_transcomps.append(map(int, getattr(cdf_temp, k).split(','))) - -cdf_temp.close() - - -# Names of the ocean inversion flux regions, to go along with oifmask - -oifnams = ['(1) NOCN Arctic Ocean', \ - '(2) NAH North Atlantic (49 - 76N)', \ - '(3) NAM North Atlantic (36 - 49N)', \ - '(4) NAL North Atlantic (18 - 36N)', \ - '(5) NAT North Atlantic ( 0 - 18N)', \ - '(6) SAT South Atlantic ( 0 - 18S)', \ - '(7) SAL South Atlantic (18 - 31S)', \ - '(8) SAM South Atlantic (31 - 44S)', \ - '(9) SAH South Atlantic (44 - 58S)', \ - '(10) SOCN Southern Ocean (S of 58S)', \ - '(11) NPHW North Pacific (N of 49N, W of 195E)', \ - '(12) NPHE North Pacific (N of 36N, E of 195E)', \ - '(13) NPK North Pacific (Kuroshio Extension)', \ - '(14) NPLW North Pacific (18N - K.Ext, W of 195E)', \ - '(15) NPLE North Pacific (18 - 36N, E of 195E)', \ - '(16) NPTW North Pacific ( 0 - 18N, W of 199E)', \ - '(17) NPTE North Pacific ( 0 - 18N, E of 199E)', \ - '(18) SPTW South Pacific ( 0 - 18S, W of 199E)', \ - '(19) SPTE South Pacific ( 0 - 18S, E of 199E)', \ - '(20) SPLW South Pacific (18 - 31S, W of 233E)', \ - '(21) SPLE South Pacific (18 - 31S, E of 233E)', \ - '(22) SPMW South Pacific (31 - 44S, W of 248E)', \ - '(23) SPME South Pacific (31 - 44S, E of 248E, W of 278E)', \ - '(24) SPMC South Pacific (31 - 44S, coastal E of 278E)', \ - '(25) SPH South Pacific (44 - 58S) ', \ - '(26) NI North Indian', \ - '(27) SIT South Indian (0 - 18S)', \ - '(28) SIL South Indian (18 - 31S)', \ - '(29) SIM South Indian (31 - 44S)', \ - '(30) SIH South Indian (44 - 58S)'] - -oiflocs = [ (200, 80,), \ - (330, 55,), \ - (330, 40,), \ - (330, 22,), \ - (330, 8,), \ - (350, -12,), \ - (350, -27,), \ - (350, -40,), \ - (350, -53,), \ - (200, -70,), \ - (178, 54,), \ - (210, 40,), \ - (165, 38,), \ - (178, 25,), \ - (215, 25,), \ - (170, 8,), \ - (230, 8,), \ - (175, -10,), \ - (240, -10,), \ - (195, -27,), \ - (265, -27,), \ - (195, -40,), \ - (262, -40,), \ - (283, -40,), \ - (220, -53,), \ - (68, 8,), \ - (75, -10,), \ - (75, -27,), \ - (75, -40,), \ - (75, -53,)] - - -translocs = [ (-177, 0), \ - (-92, 53,), \ - (-108, 34,), \ - (-66, 4,), \ - (-50, -17,), \ - (15, 17,), \ - (26, -12,), \ - (84, 63,), \ - (103, 30,), \ - (115, 0,), \ - (132, -25,), \ - (9, 50,), \ - (-174, 46,), \ - (136, 6,), \ - (-108, 6,), \ - (-123, -15,), \ - (-32, 58,), \ - (-32, 38,), \ - (-32, 0,), \ - (-32, -38,), \ - (-14, -65,), \ - (68, 2,)] - -#olsonshort=[str(name.split()[1:2]).join(' ') for name in olsonnams] -old_olsonshort = [join(split(name, ' ')[1:2], ' ') for name in olsonnams] - -olsonlabs = ['Conifer Forest', 'Broadleaf Forest', 'Mixed Forest', 'Grass/Shrub', 'Tropical Forest', 'Scrub/Woods', 'Semitundra', 'Fields/Woods/\nSavanna', \ - 'Northern Taiga', 'Forest/Field', 'Wetland', 'Deserts', 'Shrub/Tree/\nSuc ', 'Crops', 'Conifer\n Snowy/Coastal', \ - 'Wooded tundra', 'Mangrove', 'Ice and \nPolar desert', 'Water'] - -ecmwfnams = [ ' 1 CRPSMF Crops, mixed farming', \ - ' 2 SHGRSS Short Grass', \ - ' 3 EVNDLF Evergreen Needleleaf', \ - ' 4 DECNDLF Deciduous Needleleaf', \ - ' 5 EVBRDLF Evergreen Broadleaf', \ - ' 6 DECBRLF Deciduous Broadleaf', \ - ' 7 TLGRSS Tall Grass', \ - ' 8 DES Desert', \ - ' 9 TDR Tundra', \ - '10 IRRCR Irrigated Crops', \ - '11 SMDES Semidesert', \ - '12 ICE Ice Caps', \ - '13 BGM Bogs and Marches', \ - '14 INW Inland Water', \ - '15 OCE Ocean', \ - '16 EVSHRB Evergreen Shrubs', \ - '17 DECSHR Deciduous shrubs', \ - '18 MXFRST Mixed Forest', \ - '19 INTFRST Interrupted Forest'] - -ecmwfshort = [str(name.split()[1:2]).join(' ') for name in ecmwfnams] - -ecmwflabs = ['Crops, mixed farming', 'Short Grass', 'Evergreen Needleleaf', 'Deciduous Needleleaf', 'Evergreen Broadleaf', \ - 'Deciduous Broadleaf', 'Tall Grass', 'Desert', \ - 'Tundra', 'Irrigated Crops', 'Semidesert', 'Ice Caps', 'Bogs and Marches', 'Inland Water', 'Ocean', \ - 'Evergreen Shrubs', 'Deciduous shrubs', 'Mixed Forest', 'Interrupted Forest'] - -a = array([\ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 1 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , \ - 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0]) -O30to11 = a.reshape(11, 30).transpose() - -O11to11 = identity(11) - -ntcland = 11 # TC standard -ntcocean = 11 # TC standard - -Ols259_to_TC23 = zeros((259, 23), float) -Ols240_to_TC23 = zeros((240, 23), float) -Ols221_to_TC23 = zeros((221, 23), float) -for i in arange(ntcland): - Ols259_to_TC23[i * 19:(i + 1) * 19, i] = 1.0 - -Ols259_to_TC23[190:228, 10] = 1.0 # Europe -Ols259_to_TC23[228:258, 11:22] = O30to11 -for i in arange(ntcland): - Ols240_to_TC23[i * 19:(i + 1) * 19, i] = 1.0 -Ols240_to_TC23[209:239, 11:22] = O30to11 -for i in arange(ntcland): - Ols221_to_TC23[i * 19:(i + 1) * 19, i] = 1.0 -Ols221_to_TC23[209:220:, 11:22] = O11to11 - -Ols221_to_TC23[220, 22] = 1.0 -Ols240_to_TC23[239, 22] = 1.0 -Ols259_to_TC23[258, 22] = 1.0 - - -ntcland = 11 # TC standard -ntcocean = 11 # TC standard - -ExtendedTCRegionsFile = 'postagg_definitions.nc' - -def ExtendedTCRegions(data, cov=False): - """ convert to extended transcom shaped regions""" - - nparams = data.shape[-1] - if nparams != 23: - raise ValueError('Do not know how to convert %s regions to 37 extended transcom regions' % (nparams,)) - M = xform - - if not cov: - return dot(array(data).squeeze(), M) - else: - try: - return M.transpose().dot(data).dot(M) - except: - return dot(dot(M.transpose(), data), M) #Huygens fix - -def cov2corr(A): - b = 1. / sqrt(A.diagonal()) - return A * dot(b[:, newaxis], b[newaxis, :]) - - """ function projects 1x1 degree map onto TransCom regions by adding gridboxes over larger areas """ - from hdf2field import Sds2field - import cPickle - import os - from plottools import rebin - - transcommapfile = 'tc_land11_oif30.hdf' - transcomconversionfile = 'map_to_tc.pickle' - try: - regionselect = cPickle.load(open(transcomconversionfile, 'rb')) - except: - # read map from NetCDF - print '[in map_to_tc() in tctools.py:] ' + \ - 'Creating conversion map and pickle file for future quick use, patience please...' - map = Sds2field(transcommapfile, 'tc_region') - - # create dictionary for region <-> map conversions based on 1x1 map - - regs = {} - nregions = map.max() - for r in arange(1, nregions + 1): - sel = (map.flat == r).nonzero() - if len(sel[0]) > 0: - regs[r] = sel - regionselect = regs - dummy = cPickle.dump(regionselect, open(transcomconversionfile, 'wb'), -1) - - result = zeros(len(regionselect.keys()), float) - for k, v in regionselect.iteritems(): - result[k - 1] = data.ravel().take(v).sum() - return result - - """ return name of region number reg """ - - if longnames: - econames = olsonnams - else : - econames = olsonshort - - if tc: - return (transnams[reg - 1],) - elif eco: - if reg > rundat.npparameters: - raise IOError, 'Region number exceeds definitions' - elif reg > rundat.n_land and reg != rundat.nparameters: - ret = ('Ocean', oifnams[reg - rundat.n_land - 1]) - elif reg > 209 and reg <= rundat.n_land: - ret = ('Europe', econames[(reg - 1) % 19] + "_East") - elif reg == rundat.nparameters: - ret = (transnams[-1]) - else: - ret = (transnams[(reg - 1) / 19], econames[(reg - 1) % 19]) - return ret - elif olson: - return (econames[(reg - 1) % 19],) - -if __name__ == '__main__': - print transnams - print transshort - print ext_transnams - print ext_transshort - print olsonnams - print olsonshort - print ext_transcomps - print olsonextnams - diff --git a/da/analysis/transcom_regiondict.pickle b/da/analysis/transcom_regiondict.pickle deleted file mode 100644 index 5d977f37a5ed0cf07e6dfbf1be6ca01ed4569790..0000000000000000000000000000000000000000 Binary files a/da/analysis/transcom_regiondict.pickle and /dev/null differ diff --git a/da/baseclasses/__pycache__/__init__.cpython-37.pyc b/da/baseclasses/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index aafaa3add1631c23af8adf055b0307e5cc15269a..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/dasystem.cpython-37.pyc b/da/baseclasses/__pycache__/dasystem.cpython-37.pyc deleted file mode 100644 index 5119f9750c75a1fe218f406a894d45737c1b9148..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/dasystem.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/obs.cpython-37.pyc b/da/baseclasses/__pycache__/obs.cpython-37.pyc deleted file mode 100644 index e9e087365494f0d01af5fbd905ec85f391978fed..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/obs.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/observationoperator.cpython-37.pyc b/da/baseclasses/__pycache__/observationoperator.cpython-37.pyc deleted file mode 100644 index 4346d0ab3101790bca72c6835f8c7b39a71e606b..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/observationoperator.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/optimizer.cpython-37.pyc b/da/baseclasses/__pycache__/optimizer.cpython-37.pyc deleted file mode 100644 index 214a7f74748d1fe1e42bd70b21ef137edd9ca4cd..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/optimizer.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/platform.cpython-37.pyc b/da/baseclasses/__pycache__/platform.cpython-37.pyc deleted file mode 100644 index 8827f229378488e0ed725f01c052c0fb92fa3eae..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/platform.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/__pycache__/statevector.cpython-37.pyc b/da/baseclasses/__pycache__/statevector.cpython-37.pyc deleted file mode 100644 index cc2af136499a8bd24e6f5f9ca768feb852094fc5..0000000000000000000000000000000000000000 Binary files a/da/baseclasses/__pycache__/statevector.cpython-37.pyc and /dev/null differ diff --git a/da/baseclasses/dasystem.py.bak b/da/baseclasses/dasystem.py.bak deleted file mode 100755 index 6439eaa320549528eda6907aa37756bbc11d0ccc..0000000000000000000000000000000000000000 --- a/da/baseclasses/dasystem.py.bak +++ /dev/null @@ -1,102 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# control.py - -""" -.. module:: dasystem -.. moduleauthor:: Wouter Peters - -Revision History: -File created on 26 Aug 2010. - -The DaSystem class is found in the module :mod:`dasystem`, or in a specific implementation under the da/ source tree. It is derived from the standard python :class:`dictionary` object. - -It describes the details of the data assimilation system used (i.e., CarbonTracker, or CT Methane, or ....) :: - - datadir : /Volumes/Storage/CO2/carbontracker/input/ct08/ ! The directory where input data is found - obs.input.dir : ${datadir}/obsnc/with_fillvalue ! the observation input dir - obs.input.fname : obs_forecast.nc ! the observation input file - ocn.covariance : ${datadir}/oif_p3_era40.dpco2.2000.01.hdf ! the ocean flux covariance file - bio.covariance : ${datadir}/covariance_bio_olson19.nc ! the biosphere flux covariance file - deltaco2.prefix : oif_p3_era40.dpco2 ! the type of ocean product used - regtype : olson19_oif30 ! the ecoregion definitions - nparameters : 240 ! the number of parameters to solve for - random.seed : 4385 ! the random seed for the first cycle - regionsfile : transcom_olson19_oif30.hdf ! the ecoregion defintion mask file - - ! Info on the sites file used - - obs.sites.rc : ${datadir}/sites_and_weights_co2.ct10.rc ! the weights in the covariance matric of each obs - -The full baseclass description: - -.. autoclass:: da.baseclasses.dasystem.DaSystem - :members: - -""" - -import logging -import da.tools.rc as rc -################### Begin Class DaSystem ################### - -class DaSystem(dict): - """ - Information on the data assimilation system used. This is normally an rc-file with settings. - """ - - def __init__(self, rcfilename): - """ - Initialization occurs from passed rc-file name, items in the rc-file will be added - to the dictionary - """ - - self.ID = 'CarbonTracker CO2' # the identifier gives the platform name - self.load_rc(rcfilename) - - logging.debug("Data Assimilation System initialized: %s" % self.ID) - - def load_rc(self, rcfilename): - """ - This method loads a DA System Info rc-file with settings for this simulation - """ - for k, v in rc.read(rcfilename).items(): - self[k] = v - - logging.debug("DA System Info rc-file (%s) loaded successfully" % rcfilename) - - - def validate(self): - """ - validate the contents of the rc-file given a dictionary of required keys - """ - needed_rc_items = {} - - for k, v in self.items(): - if v == 'True' : - self[k] = True - if v == 'False': - self[k] = False - - for key in needed_rc_items: - if key not in self: - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - raise IOError(msg) - logging.debug('DA System Info settings have been validated succesfully') - -################### End Class DaSystem ################### - - -if __name__ == "__main__": - pass diff --git a/da/baseclasses/platform.py.bak b/da/baseclasses/platform.py.bak deleted file mode 100755 index 6243f3a8951bb77beecdbda6c8c13c1e99b93a3a..0000000000000000000000000000000000000000 --- a/da/baseclasses/platform.py.bak +++ /dev/null @@ -1,159 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# jobcontrol.py - -""" -.. module:: platform -.. moduleauthor:: Wouter Peters - -Revision History: -File created on 06 Sep 2010. - -The Platform class is found in the module :mod:`platform`, or in a specific implementation under the da/source tree. - -The platform object holds attributes and methods that allow job control on each specific platform. This includes methods to create and submit jobs, but also to obtain process and/or job ID's. These are needed to control the flow of -the system on each platform. - -Typically, every platform needs specific implementations of this object (through inheritance), and you should refer to your specific Platform object documentation for details (see *da/platform/*). - -.. autoclass:: da.baseclasses.platform.Platform - :members: - :inherited-members: - -""" - -import os -import logging -import subprocess - -std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobnodes':'nserial 1', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'01:00:00'} - -class Platform(object): - """ - This specifies platform dependent options under generic object calls. A platform object is used to control and submit jobs - """ - - def __init__(self): - """ - The init function reports the hard-coded ``Identifier`` and ``Version`` of the Platform. Since each new - computer/user requires their own Platform object modifications, the init function is usually overwritten - in the specific implementation of this class - """ - self.ID = 'iPad' # the identifier gives the plaform name - self.version = '1.0' # the platform version used - - logging.debug('%s object initialized' % self.ID) - logging.debug('%s version: %s' % (self.ID, self.version)) - - def give_blocking_flag(self): - return "" - - def give_queue_type(self): - return "foreground" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - template = """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """ \n""" + \ - """#$ jobname \n""" + \ - """#$ jobaccount \n""" + \ - """#$ jobnodes \n""" + \ - """#$ jobtime \n""" + \ - """#$ jobshell \n """ - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - return template - - def get_my_id(self): - """ Return the process ID, or job ID of the current process or job""" - return os.getpid() - - def write_job(self, jobfile, template, jobid): - """ - This method writes a jobfile to the exec dir and makes it executable (mod 477) - """ - # - # Done, write jobfile - # - f = open(jobfile, 'w') - f.write(template) - f.close() - os.chmod(jobfile, 477) - logging.debug("A job file was created (%s)" % jobfile) - - def submit_job(self, jobfile, joblog=None, block=False): - """ - :param jobfile: a string with the filename of a jobfile to run - :param joblog: a string with the filename of a logfile to write run output to - :param block: Boolean specifying whether to submit and continue (F), or submit and wait (T) - :rtype: integer - - This method submits a jobfile to the queue, and returns the job ID - """ - cmd = ["sh", jobfile] - logging.info("A new task will be started (%s)" % cmd) - if block: - jobid = subprocess.call(cmd) - else: - jobid = subprocess.Popen(cmd).pid - - logging.info('Summary:') - logging.info('job script : %s' % jobfile) - logging.info('job log : %s' % joblog) - logging.info('To manage this process:') - logging.info(' # kill process:') - logging.info(' kill %i\n' % jobid) - - - def kill_job(self, jobid): - """ This method kills a running job """ - - def job_stat(self, jobid): - """ This method gets the status of a running job """ - output = subprocess.Popen(['qstat', jobid], stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - return output - - -if __name__ == "__main__": - pass diff --git a/da/baseclasses/statevector.py b/da/baseclasses/statevector.py index 7b5a9547c0999615897d55b7518862f50fc78af6..ba848c250f82e603cee716cb5071aa95b8d1e5dd 100755 --- a/da/baseclasses/statevector.py +++ b/da/baseclasses/statevector.py @@ -34,6 +34,7 @@ your own baseclass StateVector we refer to :ref:`tut_chapter5`. """ import os +import sys import logging import numpy as np from datetime import timedelta @@ -162,46 +163,14 @@ class StateVector(object): self.ensemble_members[n] = [] - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember - # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - - mapfile = os.path.join(dacycle.dasystem['regionsfile']) - ncf = io.ct_read(mapfile, 'read') - self.gridmap = ncf.get_variable('regions') - self.tcmap = ncf.get_variable('transcom_regions') - ncf.close() - - logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - - # Create a dictionary for state <-> gridded map conversions - - nparams = self.gridmap.max() + self.gridmap = np.random.randint(low=1,high=self.nparams+1,size=(180,360,)) self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = np.nonzero(self.gridmap.flat == r) + for r in range(1, self.nparams+1): + sel = np.nonzero(self.gridmap.flat == r) if len(sel[0]) > 0: self.griddict[r] = sel - logging.debug("A dictionary to map grids to states and vice versa was created") - # Create a matrix for state <-> TransCom conversions - - self.tcmatrix = np.zeros((self.nparams, 23), 'float') - - for r in range(1, self.nparams + 1): - sel = np.nonzero(self.gridmap.flat == r) - if len(sel[0]) < 1: - continue - else: - n_tc = set(self.tcmap.flatten().take(sel[0])) - if len(n_tc) > 1: - logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc)) - raise ValueError - self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0 - - logging.debug("A matrix to map states to TransCom regions and vice versa was created") - # Create a mask for species/unknowns self.make_species_mask() @@ -230,7 +199,7 @@ class StateVector(object): logging.debug(" -> %s" % k) - def make_new_ensemble(self, lag, covariancematrix=None): + def make_new_ensemble(self, lag, covariancematrix=[None]): """ :param lag: an integer indicating the time step in the lag order :param covariancematrix: a matrix to draw random values from diff --git a/da/baseclasses/statevector.py.bak b/da/baseclasses/statevector.py.bak deleted file mode 100755 index 94cb6e97da9ced531eda5b50bcfdcb120cf8acff..0000000000000000000000000000000000000000 --- a/da/baseclasses/statevector.py.bak +++ /dev/null @@ -1,613 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# ct_statevector_tools.py - -""" -.. module:: statevector -.. moduleauthor:: Wouter Peters - -Revision History: -File created on 28 Jul 2010. - -The module statevector implements the data structure and methods needed to work with state vectors (a set of unknown parameters to be optimized by a DA system) of different lengths, types, and configurations. Two baseclasses together form a generic framework: - * :class:`~da.baseclasses.statevector.StateVector` - * :class:`~da.baseclasses.statevector.EnsembleMember` - -As usual, specific implementations of StateVector objects are done through inheritance form these baseclasses. An example of designing -your own baseclass StateVector we refer to :ref:`tut_chapter5`. - -.. autoclass:: da.baseclasses.statevector.StateVector - -.. autoclass:: da.baseclasses.statevector.EnsembleMember - -""" - -import os -import logging -import numpy as np -from datetime import timedelta -import da.tools.io4 as io - -identifier = 'Baseclass Statevector ' -version = '0.0' - -################### Begin Class EnsembleMember ################### - -class EnsembleMember(object): - """ - An ensemble member object consists of: - * a member number - * parameter values - * an observation object to hold sampled values for this member - - Ensemble members are initialized by passing only an ensemble member number, all data is added by methods - from the :class:`~da.baseclasses.statevector.StateVector`. Ensemble member objects have almost no functionality - except to write their data to file using method :meth:`~da.baseclasses.statevector.EnsembleMember.write_to_file` - - .. automethod:: da.baseclasses.statevector.EnsembleMember.__init__ - .. automethod:: da.baseclasses.statevector.EnsembleMember.write_to_file - .. automethod:: da.baseclasses.statevector.EnsembleMember.AddCustomFields - - """ - - def __init__(self, membernumber): - """ - :param memberno: integer ensemble number - :rtype: None - - An EnsembleMember object is initialized with only a number, and holds two attributes as containter for later - data: - * param_values, will hold the actual values of the parameters for this data - * ModelSample, will hold an :class:`~da.baseclasses.obs.Observation` object and the model samples resulting from this members' data - - """ - self.membernumber = membernumber # the member number - self.param_values = None # Parameter values of this member - -################### End Class EnsembleMember ################### - -################### Begin Class StateVector ################### - - -class StateVector(object): - """ - The StateVector object first of all contains the data structure of a statevector, defined by 3 attributes that define the - dimensions of the problem in parameter space: - * nlag - * nparameters - * nmembers - - The fourth important dimension `nobs` is not related to the StateVector directly but is initialized to 0, and later on - modified to be used in other parts of the pipeline: - * nobs - - These values are set as soon as the :meth:`~da.baseclasses.statevector.StateVector.setup` is called from the :ref:`pipeline`. - Additionally, the value of attribute `isOptimized` is set to `False` indicating that the StateVector holds a-priori values - and has not been modified by the :ref:`optimizer`. - - StateVector objects can be filled with data in two ways - 1. By reading the data from file - 2. By creating the data through a set of method calls - - Option (1) is invoked using method :meth:`~da.baseclasses.statevector.StateVector.read_from_file`. - Option (2) consists of a call to method :meth:`~da.baseclasses.statevector.StateVector.make_new_ensemble` - - Once the StateVector object has been filled with data, it is used in the pipeline and a few more methods are - invoked from there: - * :meth:`~da.baseclasses.statevector.StateVector.propagate`, to advance the StateVector from t=t to t=t+1 - * :meth:`~da.baseclasses.statevector.StateVector.write_to_file`, to write the StateVector to a NetCDF file for later use - - The methods are described below: - - .. automethod:: da.baseclasses.statevector.StateVector.setup - .. automethod:: da.baseclasses.statevector.StateVector.read_from_file - .. automethod:: da.baseclasses.statevector.StateVector.write_to_file - .. automethod:: da.baseclasses.statevector.StateVector.make_new_ensemble - .. automethod:: da.baseclasses.statevector.StateVector.propagate - .. automethod:: da.baseclasses.statevector.StateVector.write_members_to_file - - Finally, the StateVector can be mapped to a gridded array, or to a vector of TransCom regions, using: - - .. automethod:: da.baseclasses.statevector.StateVector.grid2vector - .. automethod:: da.baseclasses.statevector.StateVector.vector2grid - .. automethod:: da.baseclasses.statevector.StateVector.vector2tc - .. automethod:: da.baseclasses.statevector.StateVector.state2tc - - """ - - def __init__(self): - self.ID = identifier - self.version = version - - # The following code allows the object to be initialized with a dacycle object already present. Otherwise, it can - # be added at a later moment. - - logging.info('Statevector object initialized: %s' % self.ID) - - def setup(self, dacycle): - """ - setup the object by specifying the dimensions. - There are two major requirements for each statvector that you want to build: - - (1) is that the statevector can map itself onto a regular grid - (2) is that the statevector can map itself (mean+covariance) onto TransCom regions - - An example is given below. - """ - - self.nlag = int(dacycle['time.nlag']) - self.nmembers = int(dacycle['da.optimizer.nmembers']) - self.nparams = int(dacycle.dasystem['nparameters']) - self.nobs = 0 - - self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on - - # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist - # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. - - self.ensemble_members = list(range(self.nlag)) - - for n in range(self.nlag): - self.ensemble_members[n] = [] - - - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember - # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - - mapfile = os.path.join(dacycle.dasystem['regionsfile']) - ncf = io.ct_read(mapfile, 'read') - self.gridmap = ncf.get_variable('regions') - self.tcmap = ncf.get_variable('transcom_regions') - ncf.close() - - logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - - # Create a dictionary for state <-> gridded map conversions - - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = np.nonzero(self.gridmap.flat == r) - if len(sel[0]) > 0: - self.griddict[r] = sel - - logging.debug("A dictionary to map grids to states and vice versa was created") - - # Create a matrix for state <-> TransCom conversions - - self.tcmatrix = np.zeros((self.nparams, 23), 'float') - - for r in range(1, self.nparams + 1): - sel = np.nonzero(self.gridmap.flat == r) - if len(sel[0]) < 1: - continue - else: - n_tc = set(self.tcmap.flatten().take(sel[0])) - if len(n_tc) > 1: - logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc)) - raise ValueError - self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0 - - logging.debug("A matrix to map states to TransCom regions and vice versa was created") - - # Create a mask for species/unknowns - - self.make_species_mask() - - def make_species_mask(self): - - """ - - This method creates a dictionary with as key the name of a tracer, and as values an array of 0.0/1.0 values - specifying which StateVector elements are constrained by this tracer. This mask can be used in - the optimization to ensure that certain types of osbervations only update certain unknowns. - - An example would be that the tracer '14CO2' can be allowed to only map onto fossil fuel emissions in the state - - The form of the mask is: - - {'co2': np.ones(self.nparams), 'co2c14', np.zeros(self.nparams) } - - so that 'co2' maps onto all parameters, and 'co2c14' on none at all. These arrays are used in the Class - optimizer when state updates are actually performed - - """ - self.speciesdict = {'co2': np.ones(self.nparams)} - logging.debug("A species mask was created, only the following species are recognized in this system:") - for k in self.speciesdict.keys(): - logging.debug(" -> %s" % k) - - - def make_new_ensemble(self, lag, covariancematrix=None): - """ - :param lag: an integer indicating the time step in the lag order - :param covariancematrix: a matrix to draw random values from - :rtype: None - - Make a new ensemble, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - - The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is - used to draw ensemblemembers from. If this argument is not passed it will ne substituted with an - identity matrix of the same dimensions. - - """ - - if covariancematrix == None: - covariancematrix = np.identity(self.nparams) - - # Make a cholesky decomposition of the covariance matrix - - - try: - _, s, _ = np.linalg.svd(covariancematrix) - except: - s = np.linalg.svd(covariancematrix, full_matrices=1, compute_uv=0) #Cartesius fix - dof = np.sum(s) ** 2 / sum(s ** 2) - C = np.linalg.cholesky(covariancematrix) - - logging.debug('Cholesky decomposition has succeeded ') - logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof))) - - - # Create mean values - - newmean = np.ones(self.nparams, float) # standard value for a new time step is 1.0 - - # If this is not the start of the filter, average previous two optimized steps into the mix - - if lag == self.nlag - 1 and self.nlag >= 3: - newmean += self.ensemble_members[lag - 1][0].param_values + \ - self.ensemble_members[lag - 2][0].param_values - newmean = newmean / 3.0 - - # Create the first ensemble member with a deviation of 0.0 and add to list - - newmember = EnsembleMember(0) - newmember.param_values = newmean.flatten() # no deviations - self.ensemble_members[lag].append(newmember) - - # Create members 1:nmembers and add to ensemble_members list - - for member in range(1, self.nmembers): - rands = np.random.randn(self.nparams) - - newmember = EnsembleMember(member) - newmember.param_values = np.dot(C, rands) + newmean - self.ensemble_members[lag].append(newmember) - - logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, (lag + 1))) - - - def propagate(self, dacycle): - """ - :rtype: None - - Propagate the parameter values in the StateVector to the next cycle. This means a shift by one cycle - step for all states that will - be optimized once more, and the creation of a new ensemble for the time step that just - comes in for the first time (step=nlag). - In the future, this routine can incorporate a formal propagation of the statevector. - - """ - - # Remove State Vector n=1 by simply "popping" it from the list and appending a new empty list at the front. This empty list will - # hold the new ensemble for the new cycle - - self.ensemble_members.pop(0) - self.ensemble_members.append([]) - - # And now create a new time step of mean + members for n=nlag - date = dacycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(dacycle['time.cycle'])) - cov = self.get_covariance(date, dacycle) - self.make_new_ensemble(self.nlag - 1, cov) - - logging.info('The state vector has been propagated by one cycle') - - - def write_to_file(self, filename, qual): - """ - :param filename: the full filename for the output NetCDF file - :rtype: None - - Write the StateVector information to a NetCDF file for later use. - In principle the output file will have only one two datasets inside - called: - * `meanstate`, dimensions [nlag, nparamaters] - * `ensemblestate`, dimensions [nlag,nmembers, nparameters] - - This NetCDF information can be read back into a StateVector object using - :meth:`~da.baseclasses.statevector.StateVector.read_from_file` - - """ - #import da.tools.io4 as io - #import da.tools.io as io - - if qual == 'prior': - f = io.CT_CDF(filename, method='create') - logging.debug('Creating new StateVector output file (%s)' % filename) - #qual = 'prior' - else: - f = io.CT_CDF(filename, method='write') - logging.debug('Opening existing StateVector output file (%s)' % filename) - #qual = 'opt' - - dimparams = f.add_params_dim(self.nparams) - dimmembers = f.add_members_dim(self.nmembers) - dimlag = f.add_lag_dim(self.nlag, unlimited=True) - - for n in range(self.nlag): - members = self.ensemble_members[n] - mean_state = members[0].param_values - - savedict = f.standard_var(varname='meanstate_%s' % qual) - savedict['dims'] = dimlag + dimparams - savedict['values'] = mean_state - savedict['count'] = n - savedict['comment'] = 'this represents the mean of the ensemble' - f.add_data(savedict) - - members = self.ensemble_members[n] - devs = np.asarray([m.param_values.flatten() for m in members]) - data = devs - np.asarray(mean_state) - - savedict = f.standard_var(varname='ensemblestate_%s' % qual) - savedict['dims'] = dimlag + dimmembers + dimparams - savedict['values'] = data - savedict['count'] = n - savedict['comment'] = 'this represents deviations from the mean of the ensemble' - f.add_data(savedict) - f.close() - - logging.info('Successfully wrote the State Vector to file (%s) ' % filename) - - def read_from_file(self, filename, qual='opt'): - """ - :param filename: the full filename for the input NetCDF file - :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file - :rtype: None - - Read the StateVector information from a NetCDF file and put in a StateVector object - In principle the input file will have only one four datasets inside - called: - * `meanstate_prior`, dimensions [nlag, nparamaters] - * `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters] - * `meanstate_opt`, dimensions [nlag, nparamaters] - * `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters] - - This NetCDF information can be written to file using - :meth:`~da.baseclasses.statevector.StateVector.write_to_file` - - """ - - #import da.tools.io as io - f = io.ct_read(filename, 'read') - meanstate = f.get_variable('statevectormean_' + qual) - ensmembers = f.get_variable('statevectorensemble_' + qual) - f.close() - - for n in range(self.nlag): - if not self.ensemble_members[n] == []: - self.ensemble_members[n] = [] - logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1)) - - for m in range(self.nmembers): - newmember = EnsembleMember(m) - newmember.param_values = ensmembers[n, m, :].flatten() + meanstate[n] # add the mean to the deviations to hold the full parameter values - self.ensemble_members[n].append(newmember) - - logging.info('Successfully read the State Vector from file (%s) ' % filename) - - def write_members_to_file(self, lag, outdir,endswith='.nc'): - """ - :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag] - :param: outdir: Directory where to write files - :param: endswith: Optional label to add to the filename, default is simply .nc - :rtype: None - - Write ensemble member information to a NetCDF file for later use. The standard output filename is - *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location - is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside - called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). - This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. - - .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you - can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function. - - """ - - # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was - # to do the import already at the start of the module, not just in this method. - - #import da.tools.io as io - #import da.tools.io4 as io - - members = self.ensemble_members[lag] - - for mem in members: - filename = os.path.join(outdir, 'parameters.%03d%s' % (mem.membernumber, endswith)) - ncf = io.CT_CDF(filename, method='create') - dimparams = ncf.add_params_dim(self.nparams) - dimgrid = ncf.add_latlon_dim() - - data = mem.param_values - - savedict = io.std_savedict.copy() - savedict['name'] = "parametervalues" - savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimparams - savedict['values'] = data - savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - griddata = self.vector2grid(vectordata=data) - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap" - savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata.tolist() - savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - ncf.close() - - logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename)) - - def grid2vector(self, griddata=None, method='avg'): - """ - Map gridded data onto a vector of length (nparams,) - - :param griddata: a gridded dataset to use. This dataset is mapped onto a vector of length `nparams` - :param method: a string that specifies the method to combine grid boxes in case reverse=True. Must be either ['avg','sum','minval'] - :rtype: ndarray: size (nparameters,) - - This method makes use of a dictionary that links every parameter number [1,...,nparams] to a series of gridindices. These - indices specify a location on a 360x180 array, stretched into a vector using `array.flat`. There are multiple ways of calling - this method:: - - values = self.grid2vector(griddata=mygriddeddata,method='minval') # - using the minimum value of all datapoints covered by that parameter index - - values = self.grid2vector(griddata=mygriddeddata,method='avg') # - using the average value of all datapoints covered by that parameter index - - values = self.grid2vector(griddata=mygriddeddata,method='sum') # - using the sum of values of all datapoints covered by that parameter index - - .. note:: This method uses a DaSystem object that must be initialized with a proper parameter map. See :class:`~da.baseclasses.dasystem` for details - - """ - - methods = ['avg', 'sum', 'minval'] - if method not in methods: - logging.error("To put data from a map into the statevector, please specify the method to use (%s)" % methods) - raise ValueError - - result = np.zeros((self.nparams,), float) - for k, v in self.griddict.items(): - #print k,k-1,result.shape, v - if method == "avg": - result[k - 1] = griddata.take(v).mean() - elif method == "sum" : - result[k - 1] = griddata.take(v).sum() - elif method == "minval" : - result[k - 1] = griddata.take(v).min() - return result # Note that the result is returned, but not yet placed in the member.param_values attrtibute! - - - def vector2grid(self, vectordata=None): - """ - Map vector elements to a map or vice cersa - - :param vectordata: a vector dataset to use in case `reverse = False`. This dataset is mapped onto a 1x1 grid and must be of length `nparams` - :rtype: ndarray: an array of size (360,180,) - - This method makes use of a dictionary that links every parameter number [1,...,nparams] to a series of gridindices. These - indices specify a location on a 360x180 array, stretched into a vector using `array.flat`. There are multiple ways of calling - this method:: - - griddedarray = self.vector2grid(vectordata=param_values) # simply puts the param_values onto a (180,360,) array - - .. note:: This method uses a DaSystem object that must be initialzied with a proper parameter map. See :class:`~da.baseclasses.dasystem` for details - - """ - result = np.zeros(self.gridmap.shape, float) - for k, v in self.griddict.items(): - #print k,v - result.put(v, vectordata[k - 1]) - return result - - def vector2tc(self, vectordata, cov=False): - """ - project Vector onto TransCom regions - - :param vectordata: a vector dataset to use, must be of length `nparams` - :param cov: a Boolean to specify whether the input dataset is a vector (mean), or a matrix (covariance) - :rtype: ndarray: an array of size (23,) (cov:F) or of size (23,23,) (cov:T) - """ - - M = self.tcmatrix - if cov: - return np.dot(np.transpose(M), np.dot(vectordata, M)) - else: - return np.dot(vectordata.squeeze(), M) - - def state_to_grid(self, fluxvector=None, lag=1): - """ - Transforms the StateVector information (mean + covariance) to a 1x1 degree grid. - - :param: fluxvector: a vector of length (nparams,) that holds the fluxes associated with each parameter in the StateVector - :param: lag: the lag at which to evaluate the StateVector - :rtype: a tuple of two arrays (gridmean,gridvariance) with dimensions (180,360,) - - If the attribute `fluxvector` is not passed, the function will return the mean parameter value and its variance on a 1x1 map. - - ..note:: Although we can return the variance information for each gridbox, the covariance information contained in the original ensemble is lost when mapping to 1x1 degree! - - """ - - if fluxvector == None: - fluxvector = np.ones(self.nparams) - - ensemble = self.ensemble_members[lag - 1] - ensemblemean = ensemble[0].param_values - - # First transform the mean - gridmean = self.vector2grid(vectordata=ensemblemean * fluxvector) - - # And now the covariance, first create covariance matrix (!), and then multiply - deviations = np.array([mem.param_values * fluxvector - ensemblemean for mem in ensemble]) - ensemble = [] - for mem in deviations: - ensemble.append(self.vector2grid(mem)) - - return (gridmean, np.array(ensemble)) - - def state2tc(self, fluxvector=None, lag=1): - """ - Transforms the StateVector information (mean + covariance) to the TransCom regions. - - :param: fluxvector: a vector of length (nparams,) that holds the fluxes associated with each parameter in the StateVector - :param: lag: the lag at which to evaluate the StateVector - :rtype: a tuple of two arrays (mean,covariance) with dimensions ((23,), (23,23,) ) - - """ - ensemble = self.ensemble_members[lag - 1] - ensemblemean = ensemble[0].param_values - - # First transform the mean - - mean = self.vector2tc(vectordata=ensemble[0].param_values * fluxvector) - - # And now the covariance, first create covariance matrix (!), and then multiply - - deviations = np.array([mem.param_values * fluxvector - ensemblemean for mem in ensemble]) - covariance = np.dot(np.transpose(deviations), deviations) / (self.nmembers - 1) - cov = self.vector2tc(covariance, cov=True) - - return (mean, cov) - - def get_covariance(self, date, cycleparams): - pass - -################### End Class StateVector ################### - -if __name__ == "__main__": - pass - diff --git a/da/carbondioxide/__pycache__/__init__.cpython-37.pyc b/da/carbondioxide/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index d10224a78181baa6a03316c0bc4f25e60e95b89b..0000000000000000000000000000000000000000 Binary files a/da/carbondioxide/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/carbondioxide/__pycache__/obspack_globalviewplus2.cpython-37.pyc b/da/carbondioxide/__pycache__/obspack_globalviewplus2.cpython-37.pyc deleted file mode 100644 index 7abfc3d555959f902e07e328f3fa87536b3b50dc..0000000000000000000000000000000000000000 Binary files a/da/carbondioxide/__pycache__/obspack_globalviewplus2.cpython-37.pyc and /dev/null differ diff --git a/da/carbondioxide/dasystem.py.bak b/da/carbondioxide/dasystem.py.bak deleted file mode 100755 index 35204b61d12a42e59da7cfc3a2cc78bdaaa042ff..0000000000000000000000000000000000000000 --- a/da/carbondioxide/dasystem.py.bak +++ /dev/null @@ -1,64 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# control.py - -""" -Author : peters - -Revision History: -File created on 26 Aug 2010. - -""" - -import logging - -################### Begin Class CO2DaSystem ################### - -from da.baseclasses.dasystem import DaSystem - -class CO2DaSystem(DaSystem): - """ Information on the data assimilation system used. This is normally an rc-file with settings. - """ - def validate(self): - """ - validate the contents of the rc-file given a dictionary of required keys - """ - - needed_rc_items = ['obs.input.dir', - 'obs.input.fname', - 'obspack.input.id', - 'obspack.input.dir', - 'ocn.covariance', - 'nparameters', - 'bio.covariance', - 'deltaco2.prefix', - 'regtype'] - - - for k, v in self.items(): - if v == 'True' : - self[k] = True - if v == 'False': - self[k] = False - - for key in needed_rc_items: - if key not in self: - logging.warning('Missing a required value in rc-file : %s' % key) - logging.debug('DA System Info settings have been validated succesfully') - -################### End Class CO2DaSystem ################### - - -if __name__ == "__main__": - pass diff --git a/da/carbondioxide/obs.py.bak b/da/carbondioxide/obs.py.bak deleted file mode 100755 index 775feb955bc60970348bb33d6766343147fa916b..0000000000000000000000000000000000000000 --- a/da/carbondioxide/obs.py.bak +++ /dev/null @@ -1,471 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# obs.py - -""" -Author : peters - -Revision History: -File created on 28 Jul 2010. - -""" -import os -import sys -import logging -#from da.baseclasses.statevector import filename -import datetime as dtm -from string import strip -from numpy import array, logical_and - -sys.path.append(os.getcwd()) -sys.path.append('../../') - -identifier = 'CarbonTracker CO2 mole fractions' -version = '0.0' - -from da.baseclasses.obs import Observations -import da.tools.io4 as io -import da.tools.rc as rc - -################### Begin Class CO2Observations ################### - -class CO2Observations(Observations): - """ an object that holds data + methods and attributes needed to manipulate mole fraction values """ - - def setup(self, dacycle): - self.startdate = dacycle['time.sample.start'] - self.enddate = dacycle['time.sample.end'] - - sfname = dacycle.dasystem['obs.input.fname'] - if sfname.endswith('.nc'): - filename = os.path.join(dacycle.dasystem['obs.input.dir'], sfname) - else: - filename = os.path.join(dacycle.dasystem['obs.input.dir'], sfname + '.' + self.startdate.strftime('%Y%m%d') + '.nc') - - if not os.path.exists(filename): - msg = 'Could not find the required observation input file (%s) ' % filename - logging.error(msg) - raise IOError(msg) - else: - self.obs_filename = filename - self.datalist = [] - - def add_observations(self): - """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file - - The CarbonTracker mole fraction files are provided as one long list of obs for all possible dates. So we can - either: - - (1) read all, and the subselect the data we will use in the rest of this cycle - (2) Use nco to make a subset of the data - - For now, we will stick with option (1) - - """ - ncf = io.ct_read(self.obs_filename, 'read') - idates = ncf.get_variable('date_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate, dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect, axis=0) - - ids = ncf.get_variable('id').take(subselect, axis=0) - evn = ncf.get_variable('eventnumber').take(subselect, axis=0) - evn = [s.tostring().lower() for s in evn] - evn = map(strip, evn) - sites = ncf.get_variable('site').take(subselect, axis=0) - sites = [s.tostring().lower() for s in sites] - sites = map(strip, sites) - lats = ncf.get_variable('lat').take(subselect, axis=0) - lons = ncf.get_variable('lon').take(subselect, axis=0) - alts = ncf.get_variable('alt').take(subselect, axis=0) - obs = ncf.get_variable('obs').take(subselect, axis=0) * 1.e-6 - logging.info("Converting observed values from ppm to mol/mol!!!!") - species = ncf.get_variable('species').take(subselect, axis=0) - species = [s.tostring().lower() for s in species] - species = map(strip, species) - strategy = ncf.get_variable('sampling_strategy').take(subselect, axis=0) - flags = ncf.get_variable('NOAA_QC_flags').take(subselect, axis=0) - flags = [s.tostring().lower() for s in flags] - flags = map(strip, flags) - flags = [int(f == '...') for f in flags] - ncf.close() - - logging.debug("Successfully read data from obs file (%s)" % self.obs_filename) - - for n in range(len(dates)): - self.datalist.append(MoleFractionSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.obs_filename)) - logging.debug("Added %d observations to the Data list" % len(dates)) - - def add_simulations(self, filename, silent=True): - """ Adds model simulated values to the mole fraction objects """ - - - if not os.path.exists(filename): - msg = "Sample output filename for observations could not be found : %s" % filename - logging.error(msg) - logging.error("Did the sampling step succeed?") - logging.error("...exiting") - raise IOError(msg) - - ncf = io.ct_read(filename, method='read') - ids = ncf.get_variable('obs_num') - simulated = ncf.get_variable('flask') - ncf.close() - logging.info("Successfully read data from model sample file (%s)" % filename) - - obs_ids = self.getvalues('id') - - obs_ids = obs_ids.tolist() - ids = list(map(int, ids)) - - missing_samples = [] - - for idx, val in zip(ids, simulated): - if idx in obs_ids: - index = obs_ids.index(idx) - #print id,val,val.shape - self.datalist[index].simulated = val - else: - missing_samples.append(idx) - - if not silent and missing_samples != []: - logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') - #msg = '%s'%missing_samples ; logging.warning(msg) - - logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) - - def write_sample_coords(self, obsinputfile): - """ - Write the information needed by the observation operator to a file. Return the filename that was written for later use - - """ - f = io.CT_CDF(obsinputfile, method='create') - logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return obsinputfile - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "latitude" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('lon') - - savedict = io.std_savedict.copy() - savedict['name'] = "longitude" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('height') - - savedict = io.std_savedict.copy() - savedict['name'] = "altitude" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('samplingstrategy') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -9 - f.add_data(savedict) - - data = self.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "obs_id" - savedict['units'] = "NOAA database identifier" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to obs file") - logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) - - - - - def add_model_data_mismatch(self, filename): - """ - Get the model-data mismatch values for this cycle. - - (1) Open a sites_weights file - (2) Parse the data - (3) Compare site list against data - (4) Take care of double sites, etc - - """ - - - - - if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s)' % filename - logging.error(msg) - raise IOError(msg) - else: - self.sites_file = filename - - sites_weights = rc.read(self.sites_file) - - self.rejection_threshold = int(sites_weights['obs.rejection.threshold']) - self.global_R_scaling = float(sites_weights['global.R.scaling']) - self.n_site_categories = int(sites_weights['n.site.categories']) - self.n_sites_active = int(sites_weights['n.sites.active']) - self.n_sites_moved = int(sites_weights['n.sites.moved']) - - logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) - logging.debug('Model-data mismatch scaling factor : %f ' % self.global_R_scaling) - logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) - logging.debug('Model-data mismatch active sites : %d ' % self.n_sites_active) - logging.debug('Model-data mismatch moved sites : %d ' % self.n_sites_moved) - - cats = [k for k in sites_weights.keys() if 'site.category' in k] - - SiteCategories = {} - for key in cats: - name, error, may_localize, may_reject = sites_weights[key].split(';') - name = name.strip().lower() - error = float(error) - may_reject = ("TRUE" in may_reject.upper()) - may_localize = ("TRUE" in may_localize.upper()) - SiteCategories[name] = {'error':error, 'may_localize':may_localize, 'may_reject':may_reject} - #print name,SiteCategories[name] - - - active = [k for k in sites_weights.keys() if 'site.active' in k] - - site_info = {} - for key in active: - sitename, sitecategory = sites_weights[key].split(';') - sitename = sitename.strip().lower() - sitecategory = sitecategory.strip().lower() - site_info[sitename] = SiteCategories[sitecategory] - #print sitename,site_info[sitename] - - for obs in self.datalist: - obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - if obs.code in site_info: - logging.debug("Observation found (%s)" % obs.code) - obs.mdm = site_info[obs.code]['error'] * self.global_R_scaling - obs.may_localize = site_info[obs.code]['may_localize'] - obs.may_reject = site_info[obs.code]['may_reject'] - else: - logging.warning("Observation NOT found (%s, %s), please check sites.rc file (%s) !!!" % (obs.code, identifier, self.sites_file)) - obs.flag = 99 - - # Add site_info dictionary to the Observations object for future use - - self.site_info = site_info - - def write_sample_auxiliary(self, auxoutputfile): - """ - Write selected information contained in the Observations object to a file. - - """ - - f = io.CT_CDF(auxoutputfile, method='create') - logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return outfile - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - - data = self.getvalues('simulated') - - dimmembers = f.add_dim('members', data.shape[1]) - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamples" - savedict['long_name'] = "modelsamples for all ensemble members" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid + dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'simulated mole fractions based on optimized state vector' - f.add_data(savedict) - - data = self.getvalues('fromfile') - - savedict = io.std_savedict.copy() - savedict['name'] = "inputfilename" - savedict['long_name'] = "name of file where original obs data was taken from" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - data = self.getvalues('code') - - savedict = io.std_savedict.copy() - savedict['name'] = "sitecode" - savedict['long_name'] = "site code propagated from observation file" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile) - - #return outfile - - -################### End Class CO2Observations ################### - - - -################### Begin Class MoleFractionSample ################### - -class MoleFractionSample(object): - """ - Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all - attributes listed below in the __init__ method. One can additionally make more types of data, or make new - objects for specific projects. - - """ - - def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'): - self.code = code.strip() # Site code - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mole fraction residuals - self.hphr = hphr # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer - self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = idx # ID number - self.evn = evn # Event number - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() - self.samplingstrategy = samplingstrategy - self.fromfile = fromfile # netcdf filename inside observation distribution, to write back later - -################### End Class MoleFractionSample ################### - - -if __name__ == "__main__": - pass diff --git a/da/carbondioxide/obspack_geocarbon.py.bak b/da/carbondioxide/obspack_geocarbon.py.bak deleted file mode 100755 index 26b54962b9deff469b43222f4ff6002a723fffc2..0000000000000000000000000000000000000000 --- a/da/carbondioxide/obspack_geocarbon.py.bak +++ /dev/null @@ -1,548 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# obs.py - -""" -Author : peters - -Revision History: -File created on 28 Jul 2010. - -""" -import os -import sys -import logging - -import datetime as dtm -from string import strip -from numpy import array, logical_and -sys.path.append(os.getcwd()) -sys.path.append('../../') - -identifier = 'CarbonTracker CO2 mole fractions' -version = '0.0' - -from da.baseclasses.obs import Observations -import da.tools.io4 as io -import da.tools.rc as rc -################### Begin Class ObsPackObservations ################### - -class ObsPackObservations(Observations): - """ an object that holds data + methods and attributes needed to manipulate mole fraction values """ - - def setup(self, dacycle): - - self.startdate = dacycle['time.sample.start'] - self.enddate = dacycle['time.sample.end'] - - op_id = dacycle.dasystem['obspack.input.id'] - op_dir = dacycle.dasystem['obspack.input.dir'] - - if not os.path.exists(op_dir): - msg = 'Could not find the required ObsPack distribution (%s) ' % op_dir - logging.error(msg) - raise IOError(msg) - else: - self.obspack_dir = op_dir - self.obspack_id = op_id - - self.datalist = [] - - def add_observations(self): - """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file - - The ObsPack mole fraction files are provided as time series per site with all dates in sequence. - We will loop over all site files in the ObsPackage, and subset each to our needs - - """ - - # Step 1: Read list of available site files in package - - infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,)) - f = open(infile, 'r') - lines = f.readlines() - f.close() - - ncfilelist = [] - for line in lines: - if line.startswith('#'): continue # header - - items = line.split() - #ncfile, lab , start_date, stop_date, data_comparison = items[0:5] - ncfile, lab , start_date, stop_date, data_comparison= line[:105].split() - - - ncfilelist += [ncfile] - - logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist)) - - for ncfile in ncfilelist: - - infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc') - ncf = io.ct_read(infile, 'read') - idates = ncf.get_variable('time_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect, axis=0) - - obspacknum = ncf.get_variable('obspack_num').take(subselect) # or should we propagate obs_num which is not unique across datasets?? - obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0) - obspackid = [s.tostring().lower() for s in obspackid] - obspackid = list(map(str.strip,str(obspackid))) - datasetname = ncfile # use full name of dataset to propagate for clarity - lats = ncf.get_variable('latitude').take(subselect, axis=0) - lons = ncf.get_variable('longitude').take(subselect, axis=0) - alts = ncf.get_variable('altitude').take(subselect, axis=0) - obs = ncf.get_variable('value').take(subselect, axis=0) - species = ncf.get_attribute('dataset_parameter') - flags = ncf.get_variable('obs_flag').take(subselect, axis=0) - ncf.close() - - for n in range(len(dates)): - self.datalist.append(MoleFractionSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile)) - - logging.debug("Added %d observations from file (%s) to the Data list" % (len(dates), ncfile)) - - logging.info("Observations list now holds %d values" % len(self.datalist)) - - def add_simulations(self, filename, silent=False): - """ Adds model simulated values to the mole fraction objects """ - - - if not os.path.exists(filename): - msg = "Sample output filename for observations could not be found : %s" % filename - logging.error(msg) - logging.error("Did the sampling step succeed?") - logging.error("...exiting") - raise IOError(msg) - - ncf = io.ct_read(filename, method='read') - ids = ncf.get_variable('obs_num') - simulated = ncf.get_variable('flask') - ncf.close() - logging.info("Successfully read data from model sample file (%s)" % filename) - - obs_ids = self.getvalues('id').tolist() - ids = list(map(int, ids)) - - missing_samples = [] - - for idx, val in zip(ids, simulated): - if idx in obs_ids: - index = obs_ids.index(idx) - - self.datalist[index].simulated = val # in mol/mol - else: - missing_samples.append(idx) - - if not silent and missing_samples != []: - logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') - #msg = '%s'%missing_samples ; logging.warning(msg) - - logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) - - def write_sample_coords(self, obsinputfile): - """ - Write the information needed by the observation operator to a file. Return the filename that was written for later use - - """ - - if len(self.datalist) == 0: - #f.close() - #return obsinputfile - logging.debug("No observations found for this time period, nothing written to obs file") - else: - f = io.CT_CDF(obsinputfile, method='create') - logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "latitude" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('lon') - - savedict = io.std_savedict.copy() - savedict['name'] = "longitude" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('height') - - savedict = io.std_savedict.copy() - savedict['name'] = "altitude" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('samplingstrategy') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -9 - f.add_data(savedict) - - data = self.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "obs_id" - savedict['units'] = "ObsPack datapoint identifier" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - f.close() - - logging.debug("Successfully wrote data to obs file") - logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) - - - - def add_model_data_mismatch(self, filename): - """ - Get the model-data mismatch values for this cycle. - - (1) Open a sites_weights file - (2) Parse the data - (3) Compare site list against data - (4) Take care of double sites, etc - - """ - - if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s) ' % filename - logging.error(msg) - raise IOError(msg) - else: - self.sites_file = filename - - sites_weights = rc.read(self.sites_file) - - self.rejection_threshold = int(sites_weights['obs.rejection.threshold']) - self.global_R_scaling = float(sites_weights['global.R.scaling']) - self.n_site_categories = int(sites_weights['n.site.categories']) - - logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) - logging.warning('Model-data mismatch scaling factor : %f ' % self.global_R_scaling) - logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) - - cats = [k for k in sites_weights.keys() if 'site.category' in k] - - site_categories = {} - for key in cats: - name, error, may_localize, may_reject = sites_weights[key].split(';') - name = name.strip().lower() - error = float(error) - may_reject = ("TRUE" in may_reject.upper()) - may_localize = ("TRUE" in may_localize.upper()) - site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject} - - site_info = {} - site_move = {} - site_hourly = {} # option added to include only certain hours of the day (for e.g. PAL) IvdL - site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file - for key, value in sites_weights.items(): - if 'co2_' in key or 'sf6' in key: # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP - sitename, sitecategory = key, value - sitename = sitename.strip() - sitecategory = sitecategory.split()[0].strip().lower() - site_info[sitename] = site_categories[sitecategory] - if 'site.move' in key: - identifier, latmove, lonmove = value.split(';') - site_move[identifier.strip()] = (float(latmove), float(lonmove)) - if 'site.hourly' in key: - identifier, hourfrom, hourto = value.split(';') - site_hourly[identifier.strip()] = (int(hourfrom), int(hourto)) - if 'site.incalt' in key: - identifier, incalt = value.split(';') - site_incalt[identifier.strip()] = (int(incalt)) - - for obs in self.datalist: # loop over all available data points - - obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - obs.flag = 99 # default is do-not-use , until explicitly set by script - exclude_hourly = False # default is that hourly values are not included - - identifier = obs.code - species, site, method, lab, datasetnr = identifier.split('_') - - if identifier in site_info: - if identifier in site_hourly: - obs.samplingstrategy = 2 - hourf, hourt = site_hourly[identifier] - if int(obs.xdate.hour) >= hourf and int(obs.xdate.hour) <= hourt: - logging.warning("Observations in hourly dataset INCLUDED, while sampling time %s was between %s:00-%s:00"%(obs.xdate.time(),hourf,hourt)) - else: - logging.warning("Observation in hourly dataset EXCLUDED, while sampling time %s was outside %s:00-%s:00"%(obs.xdate.time(),hourf,hourt)) - exclude_hourly = True - if site_info[identifier]['category'] == 'do-not-use' or exclude_hourly: - logging.warning("Observation found (%s, %d), but not used in assimilation !!!" % (identifier, obs.id)) - obs.mdm = site_info[identifier]['error'] * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 99 - else: - logging.debug("Observation found (%s, %d)" % (identifier, obs.id)) - obs.mdm = site_info[identifier]['error'] * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 0 - - else: - logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s) !!!" % (identifier, obs.id, self.sites_file)) - - if identifier in site_move: - - movelat, movelon = site_move[identifier] - obs.lat = obs.lat + movelat - obs.lon = obs.lon + movelon - - logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon)) - - if identifier in site_incalt: - - incalt = site_incalt[identifier] - obs.height = obs.height + incalt - - logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt)) - - - # Add site_info dictionary to the Observations object for future use - - self.site_info = site_info - self.site_move = site_move - self.site_hourly = site_hourly - self.site_incalt = site_incalt - - logging.debug("Added Model Data Mismatch to all samples ") - - def write_sample_auxiliary(self, auxoutputfile): - """ - Write selected information contained in the Observations object to a file. - - """ - - f = io.CT_CDF(auxoutputfile, method='create') - logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return outfile - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - - data = self.getvalues('simulated') - - dimmembers = f.add_dim('members', data.shape[1]) - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamples" - savedict['long_name'] = "modelsamples for all ensemble members" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid + dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'simulated mole fractions based on optimized state vector' - f.add_data(savedict) - - data = self.getvalues('fromfile') - - savedict = io.std_savedict.copy() - savedict['name'] = "inputfilename" - savedict['long_name'] = "name of file where original obs data was taken from" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile) - - #return outfile - - - -################### End Class CtObservations ################### - - - -################### Begin Class MoleFractionSample ################### - -class MoleFractionSample(object): - """ - Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all - attributes listed below in the __init__ method. One can additionally make more types of data, or make new - objects for specific projects. - - """ - - def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'): - self.code = code.strip() # dataset identifier, i.e., co2_lef_tower_insitu_1_99 - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mole fraction residuals - self.hphr = hphr # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer - self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height in masl - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = idx # Obspack ID within distrution (integer), e.g., 82536 - self.evn = evn # Obspack Number within distrution (string), e.g., obspack_co2_1_PROTOTYPE_v0.9.2_2012-07-26_99_82536 - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() - self.samplingstrategy = samplingstrategy - self.fromfile = fromfile # netcdf filename inside ObsPack distribution, to write back later - -################### End Class MoleFractionSample ################### - - -if __name__ == "__main__": - pass - - - diff --git a/da/carbondioxide/obspack_globalviewplus.py.bak b/da/carbondioxide/obspack_globalviewplus.py.bak deleted file mode 100755 index da1a09a3ac58962d660391f8f35eac2e463228cb..0000000000000000000000000000000000000000 --- a/da/carbondioxide/obspack_globalviewplus.py.bak +++ /dev/null @@ -1,549 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# obs.py - -""" -Author : peters - -Revision History: -File created on 28 Jul 2010. - -""" -import os -import sys -import logging - -import datetime as dtm -#from string import strip -from numpy import array, logical_and, sqrt -sys.path.append(os.getcwd()) -sys.path.append('../../') - -identifier = 'CarbonTracker CO2 mole fractions' -version = '0.0' - -from da.baseclasses.obs import Observations -import da.tools.io4 as io -import da.tools.rc as rc -################### Begin Class ObsPackObservations ################### - -class ObsPackObservations(Observations): - """ an object that holds data + methods and attributes needed to manipulate mole fraction values """ - - def setup(self, dacycle): - - self.startdate = dacycle['time.sample.start'] - self.enddate = dacycle['time.sample.end'] - - op_id = dacycle.dasystem['obspack.input.id'] - op_dir = dacycle.dasystem['obspack.input.dir'] - - if not os.path.exists(op_dir): - msg = 'Could not find the required ObsPack distribution (%s) ' % op_dir - logging.error(msg) - raise IOError(msg) - else: - self.obspack_dir = op_dir - self.obspack_id = op_id - - self.datalist = [] - - def add_observations(self): - """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file - - The ObsPack mole fraction files are provided as time series per site with all dates in sequence. - We will loop over all site files in the ObsPackage, and subset each to our needs - - """ - - # Step 1: Read list of available site files in package - - infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,)) - f = open(infile, 'r') - lines = f.readlines() - f.close() - - ncfilelist = [] - for line in lines: - if line.startswith('#'): continue # header - - items = line.split() - #ncfile, lab , start_date, stop_date, data_comparison = items[0:5] - ncfile, lab , start_date, stop_date, data_comparison= line[:105].split() - - - ncfilelist += [ncfile] - - logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist)) - - for ncfile in ncfilelist: - - infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc') - ncf = io.ct_read(infile, 'read') - idates = ncf.get_variable('time_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect, axis=0) - - if 'merge_num' in ncf.variables: - obspacknum = ncf.get_variable('merge_num').take(subselect) - else: - obspacknum = ncf.get_variable('obspack_num').take(subselect) - if 'ccggAllData' in ncfile: - obspackid = ncf.get_variable('id').take(subselect, axis=0) - else: - obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0) - obspackid = [s.tostring().lower() for s in obspackid] - obspackid = list(map(str.strip,str(obspackid))) - datasetname = ncfile # use full name of dataset to propagate for clarity - lats = ncf.get_variable('latitude').take(subselect, axis=0) - lons = ncf.get_variable('longitude').take(subselect, axis=0) - alts = ncf.get_variable('altitude').take(subselect, axis=0) - obs = ncf.get_variable('value').take(subselect, axis=0) - species = ncf.get_attribute('dataset_parameter') - flags = ncf.get_variable('obs_flag').take(subselect, axis=0) - ncf.close() - - for n in range(len(dates)): - self.datalist.append(MoleFractionSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile)) - - logging.debug("Added %d observations from file (%s) to the Data list" % (len(dates), ncfile)) - - logging.info("Observations list now holds %d values" % len(self.datalist)) - - def add_simulations(self, filename, silent=False): - """ Adds model simulated values to the mole fraction objects """ - - - if not os.path.exists(filename): - msg = "Sample output filename for observations could not be found : %s" % filename - logging.error(msg) - logging.error("Did the sampling step succeed?") - logging.error("...exiting") - raise IOError(msg) - - ncf = io.ct_read(filename, method='read') - ids = ncf.get_variable('obs_num') - simulated = ncf.get_variable('flask') - ncf.close() - logging.info("Successfully read data from model sample file (%s)" % filename) - - obs_ids = self.getvalues('id').tolist() - ids = list(map(int, ids)) - - missing_samples = [] - - for idx, val in zip(ids, simulated): - if idx in obs_ids: - index = obs_ids.index(idx) - - self.datalist[index].simulated = val # in mol/mol - else: - missing_samples.append(idx) - - if not silent and missing_samples != []: - logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') - #msg = '%s'%missing_samples ; logging.warning(msg) - - logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) - - def write_sample_coords(self, obsinputfile): - """ - Write the information needed by the observation operator to a file. Return the filename that was written for later use - - """ - - if len(self.datalist) == 0: - #f.close() - #return obsinputfile - logging.debug("No observations found for this time period, nothing written to obs file") - else: - f = io.CT_CDF(obsinputfile, method='create') - logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "latitude" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('lon') - - savedict = io.std_savedict.copy() - savedict['name'] = "longitude" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('height') - - savedict = io.std_savedict.copy() - savedict['name'] = "altitude" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('samplingstrategy') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -9 - f.add_data(savedict) - - data = self.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "obs_id" - savedict['units'] = "ObsPack datapoint identifier" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - f.close() - - logging.debug("Successfully wrote data to obs file") - logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) - - - - def add_model_data_mismatch(self, filename): - """ - Get the model-data mismatch values for this cycle. - - (1) Open a sites_weights file - (2) Parse the data - (3) Compare site list against data - (4) Take care of double sites, etc - - """ - - if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s) ' % filename - logging.error(msg) - raise IOError(msg) - else: - self.sites_file = filename - - sites_weights = rc.read(self.sites_file) - - self.rejection_threshold = int(sites_weights['obs.rejection.threshold']) - self.global_R_scaling = float(sites_weights['global.R.scaling']) - self.n_site_categories = int(sites_weights['n.site.categories']) - - logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) - logging.warning('Model-data mismatch scaling factor : %f ' % self.global_R_scaling) - logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) - - cats = [k for k in sites_weights.keys() if 'site.category' in k] - - site_categories = {} - for key in cats: - name, error, may_localize, may_reject = sites_weights[key].split(';') - name = name.strip().lower() - error = float(error) - may_reject = ("TRUE" in may_reject.upper()) - may_localize = ("TRUE" in may_localize.upper()) - site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject} - - site_info = {} - site_move = {} - site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file - for key, value in sites_weights.items(): - if 'co2_' in key or 'sf6' in key: # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP - sitename, sitecategory = key, value - sitename = sitename.strip() - sitecategory = sitecategory.split()[0].strip().lower() - site_info[sitename] = site_categories[sitecategory] - if 'site.move' in key: - identifier, latmove, lonmove = value.split(';') - site_move[identifier.strip()] = (float(latmove), float(lonmove)) - if 'site.incalt' in key: - identifier, incalt = value.split(';') - site_incalt[identifier.strip()] = (int(incalt)) - - for obs in self.datalist: # first loop over all available data points to set flags correctly - - obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - if obs.flag == 1: # flag is taken from the gv+ datasets: 1=background/representative, 0=local. - obs.flag = 0 - elif obs.flag == 0: - obs.flag = 99 # 99 means: do-not-use - else: obs.flag = 99 - - for obs in self.datalist: # second loop over all available data points to set mdm - - identifier = obs.code - species, site, method, lab, datasetnr = identifier.split('_') - - if identifier in site_info: - if site_info[identifier]['category'] == 'do-not-use' or obs.flag == 99: - logging.warning("Observation found (%s, %d), but not used in assimilation." % (identifier, obs.id)) - obs.mdm = site_info[identifier]['error'] * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 99 - else: - if site_info[identifier]['category'] == 'aircraft': - nr_obs_per_day = 1 - else: - nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0]) - logging.debug("Observation found (%s, %d), mdm category is: %0.2f, scaled with number of observations per day (%i), final mdm applied is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'],nr_obs_per_day,site_info[identifier]['error']*sqrt(nr_obs_per_day))) - obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 0 - else: - logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s) !!!" % (identifier, obs.id, self.sites_file)) - - if identifier in site_move: - - movelat, movelon = site_move[identifier] - obs.lat = obs.lat + movelat - obs.lon = obs.lon + movelon - - logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon)) - - if identifier in site_incalt: - - incalt = site_incalt[identifier] - obs.height = obs.height + incalt - - logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt)) - - - # Add site_info dictionary to the Observations object for future use - - self.site_info = site_info - self.site_move = site_move - self.site_incalt = site_incalt - - logging.debug("Added Model Data Mismatch to all samples ") - - def write_sample_auxiliary(self, auxoutputfile): - """ - Write selected information contained in the Observations object to a file. - - """ - - f = io.CT_CDF(auxoutputfile, method='create') - logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return outfile - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - - data = self.getvalues('simulated') - - dimmembers = f.add_dim('members', data.shape[1]) - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamples" - savedict['long_name'] = "modelsamples for all ensemble members" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid + dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'simulated mole fractions based on optimized state vector' - f.add_data(savedict) - - data = self.getvalues('fromfile') - - savedict = io.std_savedict.copy() - savedict['name'] = "inputfilename" - savedict['long_name'] = "name of file where original obs data was taken from" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile) - - #return outfile - - - -################### End Class CtObservations ################### - - - -################### Begin Class MoleFractionSample ################### - -class MoleFractionSample(object): - """ - Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all - attributes listed below in the __init__ method. One can additionally make more types of data, or make new - objects for specific projects. - - """ - - def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'): - self.code = code.strip() # dataset identifier, i.e., co2_lef_tower_insitu_1_99 - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mole fraction residuals - self.hphr = hphr # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer - self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height in masl - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = idx # Obspack ID within distrution (integer), e.g., 82536 - self.evn = evn # Obspack Number within distrution (string), e.g., obspack_co2_1_PROTOTYPE_v0.9.2_2012-07-26_99_82536 - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() - self.samplingstrategy = samplingstrategy - self.fromfile = fromfile # netcdf filename inside ObsPack distribution, to write back later - -################### End Class MoleFractionSample ################### - - -if __name__ == "__main__": - pass - - - diff --git a/da/carbondioxide/obspack_globalviewplus2.py.bak b/da/carbondioxide/obspack_globalviewplus2.py.bak deleted file mode 100755 index 3a7aec3ebcb4c49f00d9eac7fbd71363803ea3c8..0000000000000000000000000000000000000000 --- a/da/carbondioxide/obspack_globalviewplus2.py.bak +++ /dev/null @@ -1,549 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# obs.py - -""" -Author : peters - -Revision History: -File created on 28 Jul 2010. - -""" -import os -import sys -import logging - -import datetime as dtm -#from string import strip -from numpy import array, logical_and, sqrt -sys.path.append(os.getcwd()) -sys.path.append('../../') - -identifier = 'CarbonTracker CO2 mole fractions' -version = '0.0' - -from da.baseclasses.obs import Observations -import da.tools.io4 as io -import da.tools.rc as rc -################### Begin Class ObsPackObservations ################### - -class ObsPackObservations(Observations): - """ an object that holds data + methods and attributes needed to manipulate mole fraction values """ - - def setup(self, dacycle): - - self.startdate = dacycle['time.sample.start'] - self.enddate = dacycle['time.sample.end'] - - op_id = dacycle.dasystem['obspack.input.id'] - op_dir = dacycle.dasystem['obspack.input.dir'] - - if not os.path.exists(op_dir): - msg = 'Could not find the required ObsPack distribution (%s) ' % op_dir - logging.error(msg) - raise IOError(msg) - else: - self.obspack_dir = op_dir - self.obspack_id = op_id - - self.datalist = [] - - def add_observations(self): - """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file - - The ObsPack mole fraction files are provided as time series per site with all dates in sequence. - We will loop over all site files in the ObsPackage, and subset each to our needs - - """ - - # Step 1: Read list of available site files in package - - infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,)) - f = open(infile, 'r') - lines = f.readlines() - f.close() - - ncfilelist = [] - for line in lines: - if not line.startswith('# dataset:'): continue - - items = line.split(':') - #ncfile, lab , start_date, stop_date, data_comparison = items[0:5] - #ncfile, lab , start_date, stop_date, data_comparison= line[:105].split() - ncfile = items[1].strip() - - ncfilelist += [ncfile] - - logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist)) - - for ncfile in ncfilelist: - - infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc') - ncf = io.ct_read(infile, 'read') - idates = ncf.get_variable('time_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect, axis=0) - - if 'merge_num' in ncf.variables: - obspacknum = ncf.get_variable('merge_num').take(subselect) - else: - obspacknum = ncf.get_variable('obspack_num').take(subselect) - if 'ccggAllData' in ncfile: - obspackid = ncf.get_variable('id').take(subselect, axis=0) - else: - obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0) - obspackid = [s.tostring().lower() for s in obspackid] - obspackid = list(map(str.strip,str(obspackid))) - datasetname = ncfile # use full name of dataset to propagate for clarity - lats = ncf.get_variable('latitude').take(subselect, axis=0) - lons = ncf.get_variable('longitude').take(subselect, axis=0) - alts = ncf.get_variable('altitude').take(subselect, axis=0) - obs = ncf.get_variable('value').take(subselect, axis=0) - species = ncf.get_attribute('dataset_parameter') - flags = ncf.get_variable('obs_flag').take(subselect, axis=0) - ncf.close() - - for n in range(len(dates)): - self.datalist.append(MoleFractionSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile)) - - logging.debug("Added %d observations from file (%s) to the Data list" % (len(dates), ncfile)) - - logging.info("Observations list now holds %d values" % len(self.datalist)) - - def add_simulations(self, filename, silent=False): - """ Adds model simulated values to the mole fraction objects """ - - - if not os.path.exists(filename): - msg = "Sample output filename for observations could not be found : %s" % filename - logging.error(msg) - logging.error("Did the sampling step succeed?") - logging.error("...exiting") - raise IOError(msg) - - ncf = io.ct_read(filename, method='read') - ids = ncf.get_variable('obs_num') - simulated = ncf.get_variable('flask') - ncf.close() - logging.info("Successfully read data from model sample file (%s)" % filename) - - obs_ids = self.getvalues('id').tolist() - ids = list(map(int, ids)) - - missing_samples = [] - - for idx, val in zip(ids, simulated): - if idx in obs_ids: - index = obs_ids.index(idx) - - self.datalist[index].simulated = val # in mol/mol - else: - missing_samples.append(idx) - - if not silent and missing_samples != []: - logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') - #msg = '%s'%missing_samples ; logging.warning(msg) - - logging.debug("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) - - def write_sample_coords(self, obsinputfile): - """ - Write the information needed by the observation operator to a file. Return the filename that was written for later use - - """ - - if len(self.datalist) == 0: - #f.close() - #return obsinputfile - logging.debug("No observations found for this time period, nothing written to obs file") - else: - f = io.CT_CDF(obsinputfile, method='create') - logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "latitude" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('lon') - - savedict = io.std_savedict.copy() - savedict['name'] = "longitude" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('height') - - savedict = io.std_savedict.copy() - savedict['name'] = "altitude" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('samplingstrategy') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -9 - f.add_data(savedict) - - data = self.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "obs_id" - savedict['units'] = "ObsPack datapoint identifier" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - f.close() - - logging.debug("Successfully wrote data to obs file") - logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) - - - - def add_model_data_mismatch(self, filename): - """ - Get the model-data mismatch values for this cycle. - - (1) Open a sites_weights file - (2) Parse the data - (3) Compare site list against data - (4) Take care of double sites, etc - - """ - - if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s) ' % filename - logging.error(msg) - raise IOError(msg) - else: - self.sites_file = filename - - sites_weights = rc.read(self.sites_file) - - self.rejection_threshold = int(sites_weights['obs.rejection.threshold']) - self.global_R_scaling = float(sites_weights['global.R.scaling']) - self.n_site_categories = int(sites_weights['n.site.categories']) - - logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) - logging.warning('Model-data mismatch scaling factor : %f ' % self.global_R_scaling) - logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) - - cats = [k for k in sites_weights.keys() if 'site.category' in k] - - site_categories = {} - for key in cats: - name, error, may_localize, may_reject = sites_weights[key].split(';') - name = name.strip().lower() - error = float(error) - may_reject = ("TRUE" in may_reject.upper()) - may_localize = ("TRUE" in may_localize.upper()) - site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject} - - site_info = {} - site_move = {} - site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file - for key, value in sites_weights.items(): - if 'co2_' in key or 'sf6' in key: # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP - sitename, sitecategory = key, value - sitename = sitename.strip() - sitecategory = sitecategory.split()[0].strip().lower() - site_info[sitename] = site_categories[sitecategory] - if 'site.move' in key: - identifier, latmove, lonmove = value.split(';') - site_move[identifier.strip()] = (float(latmove), float(lonmove)) - if 'site.incalt' in key: - identifier, incalt = value.split(';') - site_incalt[identifier.strip()] = (int(incalt)) - - for obs in self.datalist: # first loop over all available data points to set flags correctly - - obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - if obs.flag == 1: # flag is taken from the gv+ datasets: 1=background/representative, 0=local. - obs.flag = 0 - elif obs.flag == 0: - obs.flag = 99 # 99 means: do-not-use - else: obs.flag = 99 - - for obs in self.datalist: # second loop over all available data points to set mdm - - identifier = obs.code - species, site, method, lab, datasetnr = identifier.split('_') - - if identifier in site_info: - if site_info[identifier]['category'] == 'do-not-use' or obs.flag == 99: - logging.warning("Observation found (%s, %d), but not used in assimilation." % (identifier, obs.id)) - obs.mdm = site_info[identifier]['error'] * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 99 - else: - if site_info[identifier]['category'] == 'aircraft': - nr_obs_per_day = 1 - else: - nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0]) - logging.debug("Observation found (%s, %d), mdm category is: %0.2f, scaled with number of observations per day (%i), final mdm applied is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'],nr_obs_per_day,site_info[identifier]['error']*sqrt(nr_obs_per_day))) - obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling - obs.may_localize = site_info[identifier]['may_localize'] - obs.may_reject = site_info[identifier]['may_reject'] - obs.flag = 0 - else: - logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s) !!!" % (identifier, obs.id, self.sites_file)) - - if identifier in site_move: - - movelat, movelon = site_move[identifier] - obs.lat = obs.lat + movelat - obs.lon = obs.lon + movelon - - logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon)) - - if identifier in site_incalt: - - incalt = site_incalt[identifier] - obs.height = obs.height + incalt - - logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt)) - - - # Add site_info dictionary to the Observations object for future use - - self.site_info = site_info - self.site_move = site_move - self.site_incalt = site_incalt - - logging.debug("Added Model Data Mismatch to all samples ") - - def write_sample_auxiliary(self, auxoutputfile): - """ - Write selected information contained in the Observations object to a file. - - """ - - f = io.CT_CDF(auxoutputfile, method='create') - logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dim10char = f.add_dim('string_of10chars', 10) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return outfile - - for key, value in self.site_move.items(): - msg = "Site is moved by %3.2f degrees latitude and %3.2f degrees longitude" % value - f.add_attribute(key, msg) - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - - data = self.getvalues('simulated') - - dimmembers = f.add_dim('members', data.shape[1]) - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamples" - savedict['long_name'] = "modelsamples for all ensemble members" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid + dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'simulated mole fractions based on optimized state vector' - f.add_data(savedict) - - data = self.getvalues('fromfile') - - savedict = io.std_savedict.copy() - savedict['name'] = "inputfilename" - savedict['long_name'] = "name of file where original obs data was taken from" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile) - - #return outfile - - - -################### End Class CtObservations ################### - - - -################### Begin Class MoleFractionSample ################### - -class MoleFractionSample(object): - """ - Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all - attributes listed below in the __init__ method. One can additionally make more types of data, or make new - objects for specific projects. - - """ - - def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='co2', samplingstrategy=1, sdev=0.0, fromfile='none.nc'): - self.code = code.strip() # dataset identifier, i.e., co2_lef_tower_insitu_1_99 - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mole fraction residuals - self.hphr = hphr # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer - self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height in masl - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = idx # Obspack ID within distrution (integer), e.g., 82536 - self.evn = evn # Obspack Number within distrution (string), e.g., obspack_co2_1_PROTOTYPE_v0.9.2_2012-07-26_99_82536 - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() - self.samplingstrategy = samplingstrategy - self.fromfile = fromfile # netcdf filename inside ObsPack distribution, to write back later - -################### End Class MoleFractionSample ################### - - -if __name__ == "__main__": - pass - - - diff --git a/da/ccffdas/__pycache__/__init__.cpython-36.pyc b/da/ccffdas/__pycache__/__init__.cpython-36.pyc deleted file mode 100755 index 3dd71dd72bc94d56a968f0fb5e406ebc325fac95..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/dasystem.cpython-36.pyc b/da/ccffdas/__pycache__/dasystem.cpython-36.pyc deleted file mode 100755 index febb5aca343cbad0c649db11035944c4eac5c349..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/dasystem.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/emissionmodel.cpython-36.pyc b/da/ccffdas/__pycache__/emissionmodel.cpython-36.pyc deleted file mode 100755 index b1676e575c2d2d278e7e84485b8aeb796697427e..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/emissionmodel.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/obs.cpython-36.pyc b/da/ccffdas/__pycache__/obs.cpython-36.pyc deleted file mode 100644 index df02d7cf71890ba110d8daab8ed854cdf0158144..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/obs.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/observationoperator.cpython-36.pyc b/da/ccffdas/__pycache__/observationoperator.cpython-36.pyc deleted file mode 100644 index ced3207169c04b4be6561ba8a0641b03d2d98694..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/observationoperator.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/pipeline.cpython-36.pyc b/da/ccffdas/__pycache__/pipeline.cpython-36.pyc deleted file mode 100755 index 4d05a5879147a0fb76c0f6fda2acb04914cf2f5a..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/pipeline.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/__pycache__/statevector.cpython-36.pyc b/da/ccffdas/__pycache__/statevector.cpython-36.pyc deleted file mode 100755 index 534139479b12734650a38af4d1952fe2f3a311e9..0000000000000000000000000000000000000000 Binary files a/da/ccffdas/__pycache__/statevector.cpython-36.pyc and /dev/null differ diff --git a/da/ccffdas/category_info.py b/da/ccffdas/category_info.py index 4d96d9f8f99d129eb30400cc4f848450ce34a917..4786367d1cdd371134b4e9526425078658a242ce 100755 --- a/da/ccffdas/category_info.py +++ b/da/ccffdas/category_info.py @@ -59,7 +59,7 @@ categories = { 'CO2.uncertainty': 'n', 'CO': -4.32, 'CO.uncertainty': 'l'}, - 'cars middle road': {'name': 'cars middle road', + 'heavy duty': {'name': 'heavy duty', 'model': 1, 'spatial': 'Road transport', 'temporal': 't_carmr', diff --git a/da/ccffdas/emissionmodel.py b/da/ccffdas/emissionmodel.py index 7412f22164866a1c2934492d09f433767f9d1b18..59ffe170771fc725607be294275cb466ad312a89 100755 --- a/da/ccffdas/emissionmodel.py +++ b/da/ccffdas/emissionmodel.py @@ -20,19 +20,24 @@ import numpy as np from numpy import array, logical_and import da.tools.io4 as io import math +import pytz import da.tools.rc as rc from da.tools.general import create_dirs, to_datetime import netCDF4 as nc + +from multiprocessing import Pool +from functools import partial + identifier = 'EmissionModel ensemble ' version = '1.0' - +#from da.ccffdas.observationoperator import get_time_indices # Improve: should not be a python file? Defenitely not be stored in this folder! -from da.ffdas import energy_use_country +from da.ccffdas import energy_use_country energy_use_per_country = energy_use_country.energy_use_per_country -from da.ffdas import category_info +from da.ccffdas import category_info categories = category_info.categories ################### Begin Class Emission model ################### @@ -51,27 +56,37 @@ class EmisModel(object): self.emisdir = dacycle.dasystem['datadir'] self.inputdir = dacycle.dasystem['inputdir'] self.proxyfile = dacycle.dasystem['emis.input.spatial'] - self.tempfileo = dacycle.dasystem['emis.input.tempobs'] - self.tempfilep = dacycle.dasystem['emis.input.tempprior'] - self.btime = int(dacycle.dasystem['run.backtime']) - self.obsfile = dacycle.dasystem['obs.input.id'] self.nrspc = int(dacycle.dasystem['obs.spec.nr']) self.species = dacycle.dasystem['obs.spec.name'].split(',') self.nrcat = int(dacycle.dasystem['obs.cat.nr']) - self.nparams = int(dacycle.dasystem['nparameters']) self.nmembers = int(dacycle['da.optimizer.nmembers']) - self.pparam = dacycle.dasystem['emis.pparam'] self.paramfile = dacycle.dasystem['emis.paramfiles'] self.countries = [country.strip() for country in dacycle.dasystem['countries'].split(';')] areafile = dacycle.dasystem['area.file'] self.area = nc.Dataset(areafile)['Area'][:] + self.time_prof_file = dacycle.dasystem['file.timeprofs'] - self.stations2country = rc.read('/home/awoude/ffdas/RINGO/Data/station_countries.rc') self.energy_use_per_country = energy_use_per_country self.categories = categories - self.paramdict = rc.read('/projects/0/ctdas/RINGO/inversions/Data/paramdict.rc') + self.paramdict = rc.read(dacycle.dasystem['paramdict']) + self.country_mask_file = dacycle.dasystem['country.mask'] + + + self.countries_dict = { # From ISO3166-1 alpha-3 to alpha-2 + 'AUS': 'at', + 'BEL': 'be', + 'CZE': 'cz', + 'FRA': 'fr', + 'DEU': 'de', + 'LUX': 'lu', + 'NED': 'nl', + 'POL': 'pl', + 'CHE': 'cz', + 'GBR': 'gb'} + + logging.debug('Emismodel has been set-up') - def find_in_state(self, station, cat, name=None, return_index=False): + def find_in_state(self, params, station, cat, name=None, return_index=False): """Function that finds the index in the state vector""" if not name: key = station + '.' + cat @@ -82,86 +97,75 @@ class EmisModel(object): i_in_state = int(self.paramdict[key]) if return_index: return i_in_state else: - value = self.prm[i_in_state] + value = params[i_in_state] return value elif return_index: return False - return 1 + else: return 1 - def get_emis(self, dacycle, samples, do_pseudo): + def get_emis(self, dacycle, indices): """set up emission information for pseudo-obs (do_pseudo=1) and ensemble runs (do_pseudo=0)""" - if do_pseudo==1: - priorparam=os.path.join(self.emisdir,self.pparam) - f = io.ct_read(priorparam, 'read') - self.prm = f.get_variable('prior_values')[:self.nparams] - f.close() - self.get_spatial(dacycle, 999, samples, infile=os.path.join(self.emisdir, self.paramfile)) - self.get_temporal(dacycle, 999, samples, do_pseudo=1) - elif do_pseudo==0: - self.timestartkey = self.dacycle['time.sample.start'] - self.timefinishkey = self.dacycle['time.sample.end'] - for member in range(self.nmembers): - #first remove old files, then create new emission files per ensemble member - if self.startdate == self.timestartkey: - file = os.path.join(dacycle.dasystem['datadir'],'temporal_data_%03d.nc'%member) - try: - os.remove(file) - except OSError: - pass - prmfile=os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%member) - f = io.ct_read(prmfile, 'read') - self.prm = f.get_variable('parametervalues') - f.close() - self.get_yearly_emissions(samples) - self.get_spatial(dacycle, member, samples, infile=os.path.join(self.emisdir, self.paramfile)) - self.get_temporal(dacycle, member, samples, do_pseudo=0) - logging.debug('Succesfully wrote prior spatial and temporal emission files') - - def get_yearly_emissions(self, samples): - codes = samples.getvalues('code') - stationnames = [] - for code in codes: - two_names = any(x in code for x in ['UW', 'DW']) - stationnames.append('_'.join(code.split('_')[1:2 + two_names])) - stationnames = list(set(stationnames)) + self.timestartkey = self.dacycle['time.sample.start'] + self.timefinishkey = self.dacycle['time.sample.end'] + time_profiles = self.make_time_profiles(indices=indices) + self.time_profiles= time_profiles + + pool = Pool(self.nmembers) + # Create the function that calculates the conentration + func = partial(self.get_emissions, dacycle, time_profiles) + # We need to run over all members + memberlist = list(range(0, self.nmembers)) + _ = pool.map(func, memberlist) + pool.close() + pool.join() + logging.debug('Succesfully wrote prior emission files') - yremis = np.zeros((len(self.categories), len(self.countries), len(self.species))) + def get_yearly_emissions(self, params): + yremis = np.zeros((len(self.categories), len(self.countries), len(self.species)), dtype=np.float32) for i_country, country in enumerate(self.countries): for i_cat, (cat, values) in enumerate(self.categories.items()): emission_factor = values['emission_factors'] - emission_factor *= self.find_in_state(country, cat, 'emission_factors') - fraction_of_total = values['fraction_of_total'] - fraction_of_total *= self.find_in_state(country, cat, 'fraction_of_total') + emission_factor *= self.find_in_state(params, country, cat, 'emission_factors') + if cat == 'Public power gas': + fraction_of_total = energy_use_per_country[country]['Public power ratio gas'] + + elif cat == 'Public power coal': + fraction_of_total = 1 - energy_use_per_country[country]['Public power ratio gas'] + else: fraction_of_total = values['fraction_of_total'] + fraction_of_total *= self.find_in_state(params, country, cat, 'fraction_of_total') e_use = self.energy_use_per_country[country][values['spatial']] for i_species, specie in enumerate(self.species): emission_ratio = values[specie] + emission_ratio *= self.find_in_state(params, country, cat, specie) uncertainty = values[specie+'.uncertainty'] if uncertainty == 'l': emission_ratio = np.exp(emission_ratio) - if emission_ratio > 1: - logging.debug('{} {} {} {}'.format(country, cat, specie, emission_ratio)) - emission_ratio *= self.find_in_state(country, cat, specie) emission = e_use * fraction_of_total * emission_factor * emission_ratio yremis[i_cat, i_country, i_species] = emission - self.yremis = yremis + return yremis - def get_spatial(self, dacycle, member, samples, infile=None): + def get_emissions(self, dacycle, time_profiles, member): """read in proxy data used for spatial distribution of the gridded emissions, disaggregate yearly totals for the area""" - codes = samples.getvalues('code') - stationnames = [] - for code in codes: - two_names = any(x in code for x in ['UW', 'DW']) - stationnames.append('_'.join(code.split('_')[1:2 + two_names])) - stationnames = list(set(stationnames)) - + if isinstance(member, int) or member == 'optimised': + if isinstance(member, int): + prmfile = os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%member) + prmname = 'parametervalues' + elif member == 'optimised': + prmfile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % dacycle['time.start'].strftime('%Y%m%d')) + prmname = 'statevectormean_optimized' + f = io.ct_read(prmfile, 'read') + params = f.get_variable(prmname) + f.close() + elif member == 'true': + params = np.ones(100) + yremis = self.get_yearly_emissions(params) # Create a recalculation factor from kg/km2/yr to umol/m2/sec - M_mass = [44e-9, 28e-9] - sec_year = 24*366*3600. #seconds in a year (leapyear) + M_mass = np.array([44e-9, 28e-9][:self.nrspc]) + sec_year = 24*3600.*self.ndays #seconds in a year (leapyear) kgperkmperyr2umolperm2pers = np.array(M_mass)[:, None, None] * sec_year * self.area[None, :, :] - self.kgperkmperyr2umolperm2pers = kgperkmperyr2umolperm2pers #read in proxy data for spatial disaggregation infile = os.path.join(self.emisdir, self.proxyfile) @@ -169,153 +173,283 @@ class EmisModel(object): proxy_category_names = proxy['emis_category_names'][:] proxy_category_names = [b''.join(category_name).strip().decode() for category_name in proxy_category_names] - proxy_country_names = proxy['country_names'][:] - proxy_country_names = [b''.join(country_name).strip().decode() for country_name in proxy_country_names] - - spatial_distributions = np.zeros((self.nrcat, len(self.countries), self.area.shape[0], self.area.shape[1])) - for country in self.countries: - country_index = self.countries.index(country) - # Create the spatial distributions - # Loop over all categories - for i, category in enumerate(self.categories): - spatial_name = self.categories[category]['spatial'] - cat_index = proxy_category_names.index(spatial_name) - + spatial_distributions = np.zeros((self.nrcat, len(self.countries), *self.area.shape), dtype=np.float32) + for i, category in enumerate(self.categories): + spatial_name = self.categories[category]['spatial'] + cat_index = proxy_category_names.index(spatial_name) + for country in self.countries: + country_index = self.countries.index(country) # Get the emission distribution for the category category_distribution_country = proxy['proxy_maps'][cat_index, country_index, :, :] spatial_distributions[i, country_index, :, :] = category_distribution_country # Multiply spatial distributions with the yearly emissions in the country to get spatially distributed emissions - spatial_emissions = spatial_distributions[:, :, None, :, :] * self.yremis[:, :, :, None, None] # cat, country, species, lat, lon - + spatial_emissions = spatial_distributions[:, :, None, :, :] * yremis[:, :, :, None, None] # cat, country, species, lat, lon # Sum over the countries to overlay them. spatial_emissions = spatial_emissions.sum(axis=1) # Indices: category, species, lat, lon - spatial_emissions = np.swapaxes(spatial_emissions, 0,1) # Indices: species, category, lat, lon - + + emissions = [] + for i, category in enumerate(self.categories): + spatial_name = self.categories[category]['spatial'] + temporal_name = self.categories[category]['temporal'] + temporal_profile = time_profiles[temporal_name] + emissions.append(spatial_emissions[i, :, :, :] * temporal_profile[None, :, :, :]) + + self.emissions = emissions + emissions = np.array(emissions) # [cat, species, time, lat, lon] + emissions = np.swapaxes(emissions, 0, 2) # [time, cat, species, lat, lon] + emissions = np.swapaxes(emissions, 1, 2) # [time, species, cat, lat, lon] + # Recalculate spatial emissions to umol/sec/m2 - spatial_emissions = spatial_emissions / kgperkmperyr2umolperm2pers[:, None, :, :] - self.spatial_emissions = spatial_emissions + emissions = emissions / kgperkmperyr2umolperm2pers[None, :, None, :, :] ## create output file - prior_file = os.path.join(self.inputdir, 'prior_spatial_{0:03d}.nc'.format(member)) - f = io.CT_CDF(prior_file, method='create') - dimid = f.add_dim('ncat', self.nrcat) - dimid2 = f.add_dim('ops',2 ) - dimlat = f.add_dim('lat', self.area.shape[0]) - dimlon = f.add_dim('lon', self.area.shape[1]) - - #loop over all tracers - for i, species in enumerate(self.species): - savedict = io.std_savedict.copy() - savedict['name'] = species - savedict['long_name'] = "Spatially distributed emissions" - savedict['units'] = "micromole/m2/s" - savedict['dims'] = dimid + dimlat + dimlon - savedict['values'] = spatial_emissions[i,:,:,:] - f.add_data(savedict) - f.close() + if not isinstance(member, str): + prior_file = os.path.join(self.inputdir, 'prior_spatial_{0:03d}.nc'.format(member)) + f = io.CT_CDF(prior_file, method='create') + if self.dacycle.dasystem['cat.sum_emissions']: + emissions = emissions.sum(axis=2) # [time, species, lat, lon] + logging.debug('Summed emissions') + else: + dimid = f.add_dim('ncat', self.nrcat) + dimid2 = f.add_dim('ops',2 ) + dimtime= f.add_dim('time', emissions.shape[0]) + dimlat = f.add_dim('lat', self.area.shape[0]) + dimlon = f.add_dim('lon', self.area.shape[1]) - def get_temporal(self, dacycle, member, samples, do_pseudo): - """read in time profiles used for temporal distribution of the emissions""" - # First, get the station names from the smaples. For these stations, the time profiles will be written. - codes = samples.getvalues('code') - self.codes = codes - ## SI: Get the country names - stationnames = [] - for code in codes: - two_names = any(x in code for x in ['UW', 'DW']) - stationnames.append('_'.join(code.split('_')[1:2 + two_names])) - stationnames = list(set(stationnames)) - # For pseudo-observation (do_pseudo==1) or when no time profiles need to be optimized the profiles are simply read from the - # input file and copied to another file. Otherwise create a new file per ensemble member at t=0 and update the profiles for each time step - # Check if the ensemble file exists. Otherwise create. - ## SI: tmeporal data should include only countries. - ensfile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%member) - if not os.path.exists(ensfile): - dumfile = os.path.join(self.emisdir, self.tempfilep) - shutil.copy2(dumfile,ensfile) - time_profiles_ds = nc.Dataset(ensfile) - times = time_profiles_ds['Times'][:] - times = np.array([dtm.datetime.strptime(time, '%Y-%m-%d %H:%M:%S') for time in np.array(times)]) + #loop over all tracers + for i, species in enumerate(self.species): + savedict = io.std_savedict.copy() + savedict['name'] = species + savedict['long_name'] = "Spatially distributed emissions" + savedict['units'] = "micromole/m2/s" + if self.dacycle.dasystem['cat.sum_emissions']: + dims = dimtime + dimlat + dimlon + else: dims = dimtime + dimid + dimlat + dimlon + savedict['dims'] = dims + if self.dacycle.dasystem['cat.sum_emissions']: + savedict['values'] = emissions[:, i] + else: savedict['values'] = emissions[:, :, i] + savedict['dtype'] = 'float' + f.add_data(savedict) + f.close() + + if member == 0 or isinstance(member, str): + if member == 0: qual = 'prior' + elif member == 'optimised': qual = 'optimised' + elif member == 'true': qual = 'true' + name = 'ff_emissions_{}_{}.nc'.format(qual, self.dacycle['time.sample.start'].strftime('%Y%m%d')) + emisfile = os.path.join(dacycle['dir.output'], name) + f = io.CT_CDF(emisfile, method='create') + if self.dacycle.dasystem['cat.sum_emissions']: + emissions = emissions.sum(axis=2) # [time, species, lat, lon] + else: + dimid = f.add_dim('ncat', self.nrcat) + dimid2 = f.add_dim('ops',2 ) + backtime = int(dacycle.dasystem['run.backtime']) + dimtime= f.add_dim('time', emissions.shape[0]-backtime) + dimlat = f.add_dim('lat', self.area.shape[0]) + dimlon = f.add_dim('lon', self.area.shape[1]) + + #loop over all tracers + for i, species in enumerate(self.species): + savedict = io.std_savedict.copy() + savedict['name'] = species + savedict['long_name'] = "Spatially distributed emissions" + savedict['units'] = "micromole/m2/s" + if self.dacycle.dasystem['cat.sum_emissions']: + dims = dimtime + dimlat + dimlon + else: dims = dimtime + dimid + dimlat + dimlon + savedict['dims'] = dims + if self.dacycle.dasystem['cat.sum_emissions']: + savedict['values'] = emissions[backtime:, i] + else: savedict['values'] = emissions[backtime:, :, i] + savedict['dtype'] = 'float' + f.add_data(savedict) + f.close() + + def make_time_profiles(self, indices): + """Function that calculates the time profiles based on pre-specified + monthly, daily and hourly profiles. Temperature and radiation affect + household heating and powerplant time profiles. + Input: + year: int: The year for which the profiles should be calculated + Returns: + dict of np.arrays: + The temporal profiles (one for each hour) for each gridcel and timestep """ + # --- Settings + year = self.startdate.year + ndays_year = (dtm.datetime(year, 12, 31) - dtm.datetime(year - 1, 12, 31)).days + self.ndays = ndays_year + times = np.array([dtm.datetime(year, 1, 1, 0, 0, 0) + dtm.timedelta(hours = i) for i in range(ndays_year*24)]) + times_add1 = times[indices.start: indices.stop + 1] + times = times[indices] self.times = times - subselect = logical_and(times >= self.timestartkey , times < self.timefinishkey).nonzero()[0] - date_selection = times.take(subselect, axis=0) - # The time profiles should always cover at least one full year - start_date = dtm.datetime(self.timestartkey.year,1,1,0,0) #first time included - end_date = dtm.datetime(self.timestartkey.year,12,31,23,0) #last time included - dt = dtm.timedelta(0,3600) - starttime_index = np.where(times==self.timestartkey)[0][0] - startdate_index = np.where(times==self.startdate)[0][0] - end_index = np.where(times==self.timefinishkey)[0][0] - - """ Time profiles should, for a full year, always have an average value of 1.0. Therefore, a new method has been developed - to optimize time profiles such that we comply with this and the time profiles do not affect the total emissions. - For this purpose we apply the scaling factor (statevector) to the period covered in this cycle. The time profile for all dates - outside this period are scaled equally such that the time profile remains its average value of 1.0. Except previously updated - dates (from previous cycles) are maintained (they are already optimized!).""" - - unselected_day = np.where((times<self.startdate) | (times>self.timefinishkey))[0] - category_names = list(time_profiles_ds['category_name'][:]) - self.category_names = category_names - station_names_ds = list(time_profiles_ds['station_names'][:]) - profiles = np.zeros(time_profiles_ds['time_profile'][:].shape) - for category, values in self.categories.items(): - cat_index = category_names.index(values['temporal']) - for station in stationnames: - ## SI: for country in countries: - paramvalue = self.find_in_state(station, category, values['temporal']) - if paramvalue != 1: - station_index = station_names_ds.index(station) - original_profile = time_profiles_ds['time_profile'][station_index, cat_index, :] - selected_profile = time_profiles_ds['time_profile'][station_index, cat_index, :].take(subselect) - new_profile = selected_profile[:] * paramvalue - daily_sum = np.array(original_profile[unselected_day]).sum() - - original_profile[:startdate_index] = original_profile[:startdate_index] - (original_profile[:startdate_index] / daily_sum) * (new_profile.sum() - selected_profile.sum()) - original_profile[end_index:] = original_profile[end_index:] - (original_profile[end_index:] / daily_sum) * (new_profile.sum() - selected_profile.sum()) - original_profile[starttime_index:end_index] = new_profile - profiles[station_index, cat_index, :] = original_profile - time_profiles_ds.close() - # Now, write the output - tofile = nc.Dataset(ensfile, 'r+') - - for category, values in self.categories.items(): - cat_index = category_names.index(values['temporal']) - for station in stationnames: - ## SI: for country in countries: - if self.find_in_state(station, category, 'time_profile') != 1: - station_index = station_names_ds.index(station) - tofile['time_profile'][station_index, cat_index, :] = profiles[station_index, cat_index, :] - tofile.close() - # Now read in the correct profiles, select the correct time period and write the profiles into one file per ensemble member - time_profiles_ds = nc.Dataset(ensfile) - subselect = logical_and(times >= times[0] , times <= times[-1]).nonzero()[0] - date_selection = times.take(subselect, axis=0) + numdays = (times[-1] - times[0]).days + 1 + day_start = times[0].timetuple().tm_yday + datapath = '/projects/0/ctdas/RINGO/EmissionInventories/DynEmMod_TimeProfiles' + infilename = '{}/RINGO_ECMWF_DailyMeteo{}.nc'.format(datapath, year) + with nc.Dataset(infilename) as infile: + T2myr = infile.variables['T2m' ][:ndays_year] - 273.15 # T2m K --> oC + U10 = infile.variables['U10m' ][:ndays_year] # u m/s + Rinyr = infile.variables['Rsin' ][:ndays_year] / (24. * 1.0e4) # Radiation (?) J/m2/day --> J/cm2/hr + T2myr_av = infile.variables['T2m_avg' ][:ndays_year] - 273.15 # + U10_av = infile.variables['U10m_avg'][:ndays_year] + Rinyr_av = infile.variables['Rsin_avg'][:ndays_year] + + ndays, nlat, nlon = T2myr.shape + + # --- calculate degree day sum and corresponding time profiles + fr_cons = 0.2 # constant emission for consumers (cooking, warm water) + fr_gls = 0. # no constant emission for glasshouses + fr_coal = 0.8 # constant emission for coal-fired power plants + fr_gas = 0.1 # constant emission for gas-fired power plants + + T0_cons = 18. # temperature threshold for consumer heating + T0_gls = 15. # temperature threshold for glasshouse heating + T0_coal = 25. # temperature threshold for coal-fired power plants + U0_gas = 10. # wind speed threshold for gas-fired power plants + R0_gas = 50. # radiation threshold for gas-fired power plants + + HDC_cons = np.empty((ndays, nlat, nlon)) # Heating Demand Category 1 (Household heating) + HDC_gls = np.empty((ndays, nlat, nlon)) # Heating Demand Category 2 (Glasshouse heating) + HDC_coal = np.empty((ndays, nlat, nlon)) # Heating Demand Category 3 (Power plants) + HDC_gas = np.empty((ndays, nlat, nlon)) # Heating Demand Category 4 (Renewable activity) + + for day in range(ndays): # + HDC_cons[day] = np.fmax(T0_cons - T2myr[day, :, :], 0) # Daily demand for consumers / household heating + HDC_gls [day] = np.fmax(T0_gls - T2myr[day, :, :], 0) # Daily demand for glasshouse heating + HDC_coal[day] = np.fmax(T0_coal - T2myr[day, :, :], 0) # Daily demand for coal-fired powerplant productvity + wind_engy = np.fmax(U0_gas - U10[day, :, :], 0) # Wind energy + solar_engy = np.fmax(R0_gas - Rinyr[day, :, :], 0) # Solar energy + HDC_gas [day] = wind_engy * solar_engy + HC_cons = HDC_cons.mean(axis=0) + HC_gls = HDC_gls.mean(axis=0) + HC_coal = HDC_coal.mean(axis=0) + HC_gas = HDC_gas.mean(axis=0) + + t_consd = ((HDC_cons + fr_cons * HC_cons) / ((1 + fr_cons) * HC_cons))[day_start-1:]# daily time profile for consumer/household heating, starting from the day of the inversion + t_glsd = ((HDC_gls + fr_gls * HC_gls) / ((1 + fr_gls ) * HC_gls))[day_start-1:] # glasshouse + t_coald = ((HDC_coal + fr_coal * HC_coal) / ((1 + fr_coal) * HC_coal))[day_start-1:] # coal-fired powerplant + t_gasd = ((HDC_gas + fr_gas * HC_gas) / ((1 + fr_gas ) * HC_gas))[day_start-1:] # gas-fired powerplant + + #### Get the time profiles for all sectors: + with nc.Dataset(self.time_prof_file) as ds: + public_power_monthly= ds['FM_A'][:] + public_power_weekly = ds['FW_A'][:] + public_power_hourly = ds['FH_A'][:] + + industry_monthly = ds['FM_B'][:] + industry_weekly = np.array([1.02] * 6 + [0.88]) # TNO https://atmosphere.copernicus.eu/sites/default/files/2019-07/MACC_TNO_del_1_3_v2.pdf + industry_hourly = 1 - for station in stationnames: - ## SI: for country in countries: - station_index = station_names_ds.index(station) - prior_file = os.path.join(self.inputdir, 'prior_temporal_{0}_{1:03}.nc'.format(station, member)) - f = io.CT_CDF(prior_file, method='create') - dimtime = f.add_dim('Times', len(date_selection)) + other_stationary_monthly = ds['FM_C'][:] + other_stationary_hourly = ds['FH_C'][:] + + road_transport_monthly = ds['FM_F'][:] + road_transport_weekly = ds['FW_F'][:] + road_transport_hourly_wkd = ds['FHwd_F'][:] + road_transport_hourly_sat = ds['FHst_F'][:] + road_transport_hourly_sun = ds['FHsn_F'][:] + + shipping_monthly = np.array([0.88, 0.92, 0.98, 1.03, 1.05, 1.06, 1.01, 1.02, 1.06, 1.05, 1.01, 0.93]) # TNO + shipping_weekly = 1 + shipping_daily = 1 + + t_public_power_coal = np.zeros((len(times_add1), *self.area.shape)) + t_public_power_gas = np.zeros_like(t_public_power_coal) + t_industry = np.zeros_like(t_public_power_coal) + t_other_stat_cons = np.zeros_like(t_public_power_coal) + t_other_stat_gls = np.zeros_like(t_public_power_coal) + t_road = np.zeros_like(t_public_power_coal) + t_ship = np.zeros_like(t_public_power_coal) + + + for i, t in enumerate(times_add1): + month = t.month -1 # Make index + day = (t.day -1) % 7 # Make weekly index + day_ind = t.timetuple().tm_yday - day_start + hour = t.hour # Hours start at 0 + weekday = t.weekday() < 6 + saturday = t.weekday() == 6 + sunday = t.weekday() == 7 + self.t = t + + # Monthly time profiles + public_power_month_mul = public_power_monthly[month, :, :] + industry_month_mul = industry_monthly[month, :, :] + other_stationary_month_mul = other_stationary_monthly[month, :, :] + road_transport_month_mul = road_transport_monthly[month, :, :] + shipping_month_mul = shipping_monthly[month] + + # Weekly ('daily') profiles: + public_power_day_mul_coal = t_coald[day_ind, :, :] + public_power_day_mul_gas = t_gasd[day_ind, :, :] + industry_day_mul = industry_weekly[day] + other_stat_day_mul_cons = t_consd[day_ind, :, :] # Index should start at startday, check! + other_stat_day_mul_gls = t_glsd[day_ind, :, :] + road_transport_day_mul = road_transport_weekly[day, :, :] + shipping_day_mul = shipping_weekly + + # Hourly profiles: + public_power_hour_mul = public_power_hourly[hour, :, :] + industry_hour_mul = industry_hourly + other_stationary_hour_mul = other_stationary_hourly[hour, :, :] + if weekday: road_hour_mul = road_transport_hourly_wkd[hour, :, :] + elif saturday: road_hour_mul = road_transport_hourly_sat[hour, :, :] + elif sunday: road_hour_mul = road_transport_hourly_sun[hour, :, :] + shipping_hour_mul = shipping_daily + + public_power_coal_tprof = public_power_month_mul * public_power_day_mul_coal * public_power_hour_mul + public_power_gas_tprof = public_power_month_mul * public_power_day_mul_gas * public_power_hour_mul + industry_tprof = industry_month_mul * industry_day_mul * industry_hour_mul + other_stat_cons_tprof = other_stationary_month_mul * other_stat_day_mul_cons * other_stationary_hour_mul + other_stat_gls_tprof = other_stationary_month_mul * other_stat_day_mul_gls * other_stationary_hour_mul + road_tprof = road_transport_month_mul * road_transport_day_mul * road_hour_mul + shipping_tprof = shipping_month_mul * shipping_day_mul * shipping_hour_mul + + t_public_power_coal[i, :, :] = public_power_coal_tprof + t_public_power_gas[i, :, :] = public_power_gas_tprof + t_industry[i, :, :] = industry_tprof + t_other_stat_cons[i, :, :] = other_stat_cons_tprof + t_other_stat_gls[i, :, :] = other_stat_gls_tprof + t_road[i, :, :] = road_tprof + t_ship[i, :, :] = shipping_tprof + + time_profiles = { + 't_gas': t_public_power_gas, + 't_coal': t_public_power_coal, + 't_ind': t_industry, + 't_cons': t_other_stat_cons, + 't_gls': t_other_stat_gls, + 't_road': t_road, + 't_ship': t_ship + } + + # Roll the time profiles to be consisten with time zones + with nc.Dataset(self.country_mask_file) as ds: + masks = ds['country_mask'][:] + country_names = [b''.join(c).decode() for c in ds['country_names'][:]] + + for sect, profile in time_profiles.items(): + new_profile = np.zeros((len(times), *profile.shape[1:])) + for country in self.countries: + mask = masks[country_names.index(country)] + country_times = mask * profile - cat_names_done = [] - for category, values in self.categories.items(): - cat_name = values['temporal'] - cat_index = category_names.index(cat_name) - if not cat_name in cat_names_done: - profile = np.array(time_profiles_ds['time_profile'][station_index, cat_index, :].take(subselect)) - savedict = io.std_savedict.copy() - savedict['name'] = cat_name - savedict['long_name'] = "Temporal distribution" - savedict['units'] = "" - savedict['dims'] = dimtime - savedict['values'] = profile - f.add_data(savedict) - cat_names_done.append(cat_name) - f.close() + timezone = pytz.timezone(pytz.country_timezones[self.countries_dict[country]][0]) + offset = timezone.utcoffset(times[0]).seconds//3600 + + rolled = np.roll(country_times, -offset, 0)[:len(times)] + new_profile += rolled + + new_profile[new_profile == 0] = 1 + time_profiles[sect] = new_profile.astype(np.float32) + + logging.debug('Time profiles created') + + return time_profiles - time_profiles_ds.close() ################### End Class Emission model ################### diff --git a/da/ccffdas/emissionmodel.py.ori b/da/ccffdas/emissionmodel.py.ori new file mode 100755 index 0000000000000000000000000000000000000000..fc7622e94f137815f42134d1064a1f628057ead2 --- /dev/null +++ b/da/ccffdas/emissionmodel.py.ori @@ -0,0 +1,410 @@ +#!/usr/bin/env python +# stilt_tools.py + +""" +Author : I. Super + +Revision History: +Newly developed code, September 2017 + +This module holds an emission model that prepares emission files used by the observation operator and +to create pseudo-data + +""" + +import shutil +import os +import logging +import datetime as dtm +import numpy as np +from numpy import array, logical_and +import da.tools.io4 as io +import math + +import da.tools.rc as rc +from da.tools.general import create_dirs, to_datetime +import netCDF4 as nc +identifier = 'EmissionModel ensemble ' +version = '1.0' + +#from da.ccffdas.observationoperator import get_time_indices +# Improve: should not be a python file? Defenitely not be stored in this folder! +from da.ccffdas import energy_use_country +energy_use_per_country = energy_use_country.energy_use_per_country + +from da.ccffdas import category_info +categories = category_info.categories +################### Begin Class Emission model ################### + +class EmisModel(object): + + def __init__(self, dacycle=None): + if dacycle != None: + self.dacycle = dacycle + else: + self.dacycle = {} + + def setup(self, dacycle): + self.dacycle = dacycle + self.startdate = self.dacycle['time.fxstart'] + self.enddate = self.dacycle['time.finish'] + self.emisdir = dacycle.dasystem['datadir'] + self.inputdir = dacycle.dasystem['inputdir'] + self.proxyfile = dacycle.dasystem['emis.input.spatial'] + self.tempfileo = dacycle.dasystem['emis.input.tempobs'] + self.tempfilep = dacycle.dasystem['emis.input.tempprior'] + self.btime = int(dacycle.dasystem['run.backtime']) + self.obsfile = dacycle.dasystem['obs.input.id'] + self.nrspc = int(dacycle.dasystem['obs.spec.nr']) + self.species = dacycle.dasystem['obs.spec.name'].split(',') + self.nrcat = int(dacycle.dasystem['obs.cat.nr']) + self.nparams = int(dacycle.dasystem['nparameters']) + self.nmembers = int(dacycle['da.optimizer.nmembers']) + self.pparam = dacycle.dasystem['emis.pparam'] + self.paramfile = dacycle.dasystem['emis.paramfiles'] + self.countries = [country.strip() for country in dacycle.dasystem['countries'].split(';')] + areafile = dacycle.dasystem['area.file'] + self.area = nc.Dataset(areafile)['Area'][:] + + self.stations2country = rc.read('/home/awoude/ffdas/RINGO/Data/station_countries.rc') + self.energy_use_per_country = energy_use_per_country + self.categories = categories + self.paramdict = rc.read(dacycle.dasystem['paramdict']) + + # --- Settings + year = self.startdate.year + ndays = (dtm.datetime(year, 12, 31) - dtm.datetime(year - 1, 12, 31)).days + self.ndays = ndays + times = np.array([dtm.datetime(year, 1, 1, 0, 0, 0) + dtm.timedelta(hours = i) for i in range(ndays*24)]) + self.times = times + datapath = '/projects/0/ctdas/RINGO/EmissionInventories/DynEmMod_TimeProfiles' + infilename = '{}/RINGO_ECMWF_DailyMeteo{}.nc'.format(datapath, year) + with nc.Dataset(infilename) as infile: + self.T2myr = infile.variables['T2m' ][:ndays] - 273.15 # T2m K --> oC + self.U10 = infile.variables['U10m' ][:ndays] # u m/s + self.Rinyr = infile.variables['Rsin' ][:ndays] / (24. * 1.0e4) # Radiation (?) J/m2/day --> J/cm2/hr + self.T2myr_av = infile.variables['T2m_avg' ][:ndays] - 273.15 # + self.U10_av = infile.variables['U10m_avg'][:ndays] + self.Rinyr_av = infile.variables['Rsin_avg'][:ndays] + + + def find_in_state(self, station, cat, name=None, return_index=False): + """Function that finds the index in the state vector""" + if not name: + key = station + '.' + cat + else: + key = station + '.' + cat + '.' + name + + if key in self.paramdict: + i_in_state = int(self.paramdict[key]) + if return_index: return i_in_state + else: + value = self.prm[i_in_state] + return value + elif return_index: return False + return 1 + + def get_emis(self, dacycle, samples, indices, do_pseudo): + """set up emission information for pseudo-obs (do_pseudo=1) and ensemble runs (do_pseudo=0)""" + + self.timestartkey = self.dacycle['time.sample.start'] + self.timefinishkey = self.dacycle['time.sample.end'] + time_profiles = self.make_time_profiles(indices=indices) + for member in range(self.nmembers): + #first remove old files, then create new emission files per ensemble member + if self.startdate == self.timestartkey: + file = os.path.join(dacycle.dasystem['datadir'],'temporal_data_%03d.nc'%member) + try: + os.remove(file) + except OSError: + pass + prmfile=os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%member) + f = io.ct_read(prmfile, 'read') + self.prm = f.get_variable('parametervalues') + f.close() + self.get_yearly_emissions() + self.get_emissions(dacycle, member, time_profiles, infile=os.path.join(self.emisdir, self.paramfile)) + logging.debug('Succesfully wrote prior emission files') + + def get_yearly_emissions(self): + yremis = np.zeros((len(self.categories), len(self.countries), len(self.species))) + for i_country, country in enumerate(self.countries): + for i_cat, (cat, values) in enumerate(self.categories.items()): + emission_factor = values['emission_factors'] + + emission_factor *= self.find_in_state(country, cat, 'emission_factors') + fraction_of_total = values['fraction_of_total'] + fraction_of_total *= self.find_in_state(country, cat, 'fraction_of_total') + + e_use = self.energy_use_per_country[country][values['spatial']] + for i_species, specie in enumerate(self.species): + emission_ratio = values[specie] + uncertainty = values[specie+'.uncertainty'] + if uncertainty == 'l': + emission_ratio = np.exp(emission_ratio) + emission_ratio *= self.find_in_state(country, cat, specie) + emission = e_use * fraction_of_total * emission_factor * emission_ratio + yremis[i_cat, i_country, i_species] = emission + self.yremis = yremis + + def get_emissions(self, dacycle, member, time_profiles, infile=None): + """read in proxy data used for spatial distribution of the gridded emissions, disaggregate yearly totals for the area""" + # Create a recalculation factor from kg/km2/yr to umol/m2/sec + M_mass = [44e-9, 28e-9] + sec_year = 24*366*3600. #seconds in a year (leapyear) + kgperkmperyr2umolperm2pers = np.array(M_mass)[:, None, None] * sec_year * self.area[None, :, :] + self.kgperkmperyr2umolperm2pers = kgperkmperyr2umolperm2pers + + #read in proxy data for spatial disaggregation + infile = os.path.join(self.emisdir, self.proxyfile) + proxy = io.ct_read(infile, method='read') + proxy_category_names = proxy['emis_category_names'][:] + proxy_category_names = [b''.join(category_name).strip().decode() for category_name in proxy_category_names] + + proxy_country_names = proxy['country_names'][:] + proxy_country_names = [b''.join(country_name).strip().decode() for country_name in proxy_country_names] + + spatial_distributions = np.zeros((self.nrcat, len(self.countries), self.area.shape[0], self.area.shape[1])) + for i, category in enumerate(self.categories): + spatial_name = self.categories[category]['spatial'] + cat_index = proxy_category_names.index(spatial_name) + for country in self.countries: + country_index = self.countries.index(country) + # Get the emission distribution for the category + category_distribution_country = proxy['proxy_maps'][cat_index, country_index, :, :] + spatial_distributions[i, country_index, :, :] = category_distribution_country + + # Multiply spatial distributions with the yearly emissions in the country to get spatially distributed emissions + spatial_emissions = spatial_distributions[:, :, None, :, :] * self.yremis[:, :, :, None, None] # cat, country, species, lat, lon + # Sum over the countries to overlay them. + spatial_emissions = spatial_emissions.sum(axis=1) # Indices: category, species, lat, lon + self.spatial_emissions = spatial_emissions + + emissions = [] + for i, category in enumerate(self.categories): + spatial_name = self.categories[category]['spatial'] + cat_index = proxy_category_names.index(spatial_name) + temporal_name = self.categories[category]['temporal'] + temporal_profile = time_profiles[temporal_name] + emissions.append(spatial_emissions[cat_index, :, :, :] * temporal_profile[None, :, :, :]) + + emissions = np.asarray(emissions) + emissions = np.swapaxes(emissions, 0,1) # Indices: species, category, time, lat, lon + + # Recalculate spatial emissions to umol/sec/m2 + emissions = emissions / kgperkmperyr2umolperm2pers[:, None, None, :, :] + ## create output file + prior_file = os.path.join(self.inputdir, 'prior_spatial_{0:03d}.nc'.format(member)) + f = io.CT_CDF(prior_file, method='create') + dimid = f.add_dim('ncat', self.nrcat) + dimid2 = f.add_dim('ops',2 ) + dimtime= f.add_dim('time', emissions.shape[2]) + dimlat = f.add_dim('lat', self.area.shape[0]) + dimlon = f.add_dim('lon', self.area.shape[1]) + + #loop over all tracers + for i, species in enumerate(self.species): + savedict = io.std_savedict.copy() + savedict['name'] = species + savedict['long_name'] = "Spatially distributed emissions" + savedict['units'] = "micromole/m2/s" + savedict['dims'] = dimid + dimtime + dimlat + dimlon + savedict['values'] = emissions[i, :, :, :] + savedict['dtype'] = 'float' + f.add_data(savedict) + f.close() + + def make_time_profiles(self, indices): + """Function that calculates the time profiles based on pre-specified + monthly, daily and hourly profiles. Temperature and radiation affect + household heating and powerplant time profiles. + Input: + year: int: The year for which the profiles should be calculated + Returns: + dict of np.arrays: + The temporal profiles (one for each hour) for each gridcel and timestep """ + + ndays, nlat, nlon = self.T2myr.shape + # --- calculate degree day sum and corresponding time profiles + fr_cons = 0.2 # constant emission for consumers (cooking, warm water) + fr_gls = 0. # no constant emission for glasshouses + fr_coal = 0.8 # constant emission for coal-fired power plants + fr_gas = 0.1 # constant emission for gas-fired power plants + + T0_cons = 18. # temperature threshold for consumer heating + T0_gls = 15. # temperature threshold for glasshouse heating + T0_coal = 25. # temperature threshold for coal-fired power plants + U0_gas = 10. # wind speed threshold for gas-fired power plants + R0_gas = 50. # radiation threshold for gas-fired power plants + + HDC_cons = np.empty((ndays, nlat, nlon)) # Heating Demand Category 1 (Household heating) + HDC_gls = np.empty((ndays, nlat, nlon)) # Heating Demand Category 2 (Glasshouse heating) + HDC_coal = np.empty((ndays, nlat, nlon)) # Heating Demand Category 3 (Power plants) + HDC_gas = np.empty((ndays, nlat, nlon)) # Heating Demand Category 4 (Renewable activity) + + for i in range(ndays): + HDC_cons[i] = np.fmax(T0_cons - self.T2myr[i, :, :], 0) # Daily demand for consumers / household heating + HDC_gls [i] = np.fmax(T0_gls - self.T2myr[i, :, :], 0) # Daily demand for glasshouse heating + HDC_coal[i] = np.fmax(T0_coal - self.T2myr[i, :, :], 0) # Daily demand for coal-fired powerplant productvity + dum1 = np.fmax(U0_gas - self.U10[i, :, :], 0) # Wind energy + dum2 = np.fmax(R0_gas - self.Rinyr[i, :, :], 0) # Solar energy + HDC_gas [i] = dum1 * dum2 + + HC_cons = HDC_cons.mean(axis=0) + HC_gls = HDC_gls.mean(axis=0) + HC_coal = HDC_coal.mean(axis=0) + HC_gas = HDC_gas.mean(axis=0) + + t_consd = (HDC_cons + fr_cons * HC_cons) / ((1 + fr_cons) * HC_cons) # daily time profile for consumer/household heating + t_glsd = (HDC_gls + fr_gls * HC_gls) / ((1 + fr_gls ) * HC_gls) # glasshouse + t_coald = (HDC_coal + fr_coal * HC_coal) / ((1 + fr_coal) * HC_coal) # coal-fired powerplant + t_gasd = (HDC_gas + fr_gas * HC_gas) / ((1 + fr_gas ) * HC_gas) # gas-fired powerplant + + # Hourly time profiles for energy use and consumers + engy_hr = np.array([0.79, 0.72, 0.72, 0.71, 0.74, 0.80, + 0.92, 1.08, 1.19, 1.22, 1.21, 1.21, + 1.17, 1.15, 1.14, 1.13, 1.10, 1.07, + 1.04, 1.02, 1.02, 1.01, 0.96, 0.88]) + cons_hr = np.array([0.38, 0.36, 0.36, 0.36, 0.37, 0.50, + 1.19, 1.53, 1.57, 1.56, 1.35, 1.16, + 1.07, 1.06, 1.00, 0.98, 0.99, 1.12, + 1.41, 1.52, 1.39, 1.35, 1.00, 0.42]) + + engy_hrm = np.tile(engy_hr, (nlat, nlon, 1)).transpose(2, 0, 1) #Repeat the daily time profile for all days + cons_hrm = np.tile(cons_hr, (nlat, nlon, 1)).transpose(2, 0, 1) + + nt = len(self.times) + t_gas = np.empty((nt, nlat, nlon)) + t_coal = np.empty((nt, nlat, nlon)) + t_cons = np.empty((nt, nlat, nlon)) + t_gls = np.empty((nt, nlat, nlon)) + + #Repeat the daily time profile for all days + for iday in range(ndays): + t_gas [iday * 24:(iday + 1) * 24, :, :] = np.tile(t_gasd [iday], (24, 1, 1)) * engy_hrm # hourly time profile = daily * hourly + t_coal[iday * 24:(iday + 1) * 24, :, :] = np.tile(t_coald[iday], (24, 1, 1)) * engy_hrm + t_cons[iday * 24:(iday + 1) * 24, :, :] = np.tile(t_consd[iday], (24, 1, 1)) * cons_hrm + t_gls [iday * 24:(iday + 1) * 24, :, :] = np.tile(t_glsd [iday], (24, 1, 1)) * cons_hrm + + ####### + # road traffic time profiles + ####### + carhw_mnt = [1.04, 1.05, 0.98, 1.00, 1.00, 1.04, 1.01, 0.93, 0.86, 1.01, 1.07, 1.02] # monthly time profiles car highway + carmr_mnt = [1.05, 1.06, 0.93, 1.01, 0.99, 1.01, 1.00, 1.00, 0.86, 1.02, 1.08, 1.03] # car middle road + carur_mnt = [1.06, 1.10, 0.97, 1.00, 1.01, 1.00, 1.00, 0.98, 0.85, 1.01, 1.07, 1.01] # car urban road + hdvhw_mnt = [0.92, 0.89, 1.03, 1.02, 1.07, 1.05, 1.03, 1.02, 0.90, 0.97, 0.98, 0.94] + hdvmr_mnt = [1.06, 1.09, 0.93, 0.98, 0.99, 1.01, 1.02, 1.02, 0.78, 1.09, 1.08, 1.03] + hdvur_mnt = [1.10, 1.17, 0.91, 0.94, 0.76, 1.02, 1.04, 1.04, 0.82, 1.13, 1.14, 1.03] + carhw_wk = [1.00, 1.09, 1.10, 1.10, 1.11, 0.87, 0.73] # weekly time profile (mon-sun) + carmr_wk = [0.99, 1.07, 1.08, 1.07, 1.09, 0.93, 0.76] + carur_wk = [0.98, 1.06, 1.07, 1.07, 1.09, 0.92, 0.80] + hdvhw_wk = [1.23, 1.30, 1.27, 1.27, 1.33, 0.42, 0.19] + hdvmr_wk = [1.27, 1.25, 1.26, 1.29, 1.29, 0.45, 0.19] + hdvur_wk = [1.07, 1.24, 1.23, 1.21, 1.23, 0.63, 0.38] + carhw_hrw = [0.17, 0.09, 0.05, 0.05, 0.09, 0.42, 1.38, 1.87, # hourly time profile weekdays + 1.81, 1.31, 1.11, 1.13, 1.26, 1.30, 1.39, 1.63, + 1.95, 1.97, 1.46, 1.05, 0.78, 0.69, 0.62, 0.42] + carmr_hrw = [0.20, 0.09, 0.05, 0.05, 0.11, 0.36, 1.04, 1.42, + 1.49, 1.26, 1.20, 1.25, 1.37, 1.42, 1.49, 1.62, + 1.83, 1.92, 1.56, 1.23, 0.92, 0.81, 0.77, 0.56] + carur_hrw = [0.30, 0.20, 0.12, 0.09, 0.10, 0.25, 0.63, 1.22, + 1.53, 1.17, 1.10, 1.21, 1.37, 1.41, 1.52, 1.71, + 1.94, 2.03, 1.59, 1.18, 0.98, 0.91, 0.83, 0.61] + hdvhw_hrw = [0.13, 0.12, 0.12, 0.15, 0.28, 0.68, 1.59, 1.50, + 1.46, 1.67, 1.72, 1.77, 1.70, 1.77, 1.82, 1.85, + 1.60, 1.18, 0.97, 0.73, 0.49, 0.33, 0.22, 0.16] + hdvmr_hrw = [0.13, 0.09, 0.11, 0.10, 0.21, 0.52, 1.58, 1.75, + 1.61, 1.74, 1.70, 1.69, 1.58, 1.70, 1.79, 1.96, + 1.82, 1.23, 0.93, 0.61, 0.43, 0.30, 0.23, 0.19] + hdvur_hrw = [0.14, 0.08, 0.06, 0.08, 0.15, 0.36, 0.95, 1.61, + 1.86, 1.77, 1.76, 1.79, 1.74, 1.75, 1.83, 2.16, + 1.76, 1.27, 0.92, 0.65, 0.47, 0.37, 0.29, 0.21] + carhw_hrd = [0.57, 0.34, 0.20, 0.13, 0.11, 0.17, 0.29, 0.38, # hourly time profile weekends + 0.65, 1.03, 1.30, 1.54, 1.75, 1.95, 1.94, 1.83, + 1.81, 1.72, 1.32, 1.27, 1.18, 1.00, 0.84, 0.68] + carmr_hrd = [0.58, 0.35, 0.22, 0.14, 0.14, 0.25, 0.32, 0.38, + 0.61, 1.01, 1.29, 1.50, 1.73, 1.90, 1.96, 1.88, + 1.84, 1.76, 1.33, 1.18, 1.10, 0.93, 0.88, 0.72] + carur_hrd = [0.79, 0.58, 0.45, 0.33, 0.28, 0.27, 0.28, 0.32, + 0.48, 0.79, 1.04, 1.29, 1.60, 1.77, 1.85, 1.87, + 1.86, 1.76, 1.38, 1.18, 1.10, 0.99, 0.93, 0.81] + hdvhw_hrd = [0.44, 0.38, 0.36, 0.38, 0.42, 0.67, 0.99, 1.19, + 1.51, 1.69, 1.77, 1.68, 1.63, 1.53, 1.39, 1.34, + 1.33, 1.29, 1.12, 0.86, 0.69, 0.55, 0.44, 0.35] + hdvmr_hrd = [0.49, 0.33, 0.35, 0.23, 0.36, 0.56, 0.93, 1.21, + 1.31, 1.47, 1.58, 1.76, 1.63, 1.52, 1.52, 1.28, + 1.24, 1.09, 0.89, 0.67, 0.61, 0.58, 0.55, 0.52] + hdvur_hrd = [0.47, 0.41, 0.33, 0.33, 0.34, 0.42, 0.61, 0.85, + 1.04, 1.27, 1.43, 1.55, 1.72, 1.72, 1.69, 1.69, + 1.66, 1.56, 1.34, 1.03, 0.83, 0.68, 0.58, 0.46] + + t_carhw = np.empty((nt, nlat, nlon)) + t_carmr = np.empty((nt, nlat, nlon)) + t_carur = np.empty((nt, nlat, nlon)) + t_hdvhw = np.empty((nt, nlat, nlon)) + t_hdvmr = np.empty((nt, nlat, nlon)) + t_hdvur = np.empty((nt, nlat, nlon)) + t_ship = np.empty((nt, nlat, nlon)) + + # Multiply the hourly, daily and monthly time profiles to create new profile + # Only for the times in which we are interested + enum_times = list(enumerate(self.times))[indices] + for i, t in enum_times: + month_idx = t.month - 1 + day_idx = t.weekday() + hour_idx = t.hour + if day_idx < 5: # weekdays + t_carhw[i, None, None] = carhw_mnt[month_idx] * carhw_wk[day_idx] * carhw_hrw[hour_idx] + t_carmr[i, None, None] = carmr_mnt[month_idx] * carmr_wk[day_idx] * carmr_hrw[hour_idx] + t_carur[i, None, None] = carur_mnt[month_idx] * carur_wk[day_idx] * carur_hrw[hour_idx] + t_hdvhw[i, None, None] = hdvhw_mnt[month_idx] * hdvhw_wk[day_idx] * hdvhw_hrw[hour_idx] + t_hdvmr[i, None, None] = hdvmr_mnt[month_idx] * hdvmr_wk[day_idx] * hdvmr_hrw[hour_idx] + t_hdvur[i, None, None] = hdvur_mnt[month_idx] * hdvur_wk[day_idx] * hdvur_hrw[hour_idx] + t_ship [i, None, None] = hdvhw_mnt[month_idx] * hdvhw_wk[day_idx] * hdvhw_hrw[hour_idx] + else: # weekends + t_carhw[i, None, None] = carhw_mnt[month_idx] * carhw_wk[day_idx] * carhw_hrd[hour_idx] + t_carmr[i, None, None] = carmr_mnt[month_idx] * carmr_wk[day_idx] * carmr_hrd[hour_idx] + t_carur[i, None, None] = carur_mnt[month_idx] * carur_wk[day_idx] * carur_hrd[hour_idx] + t_hdvhw[i, None, None] = hdvhw_mnt[month_idx] * hdvhw_wk[day_idx] * hdvhw_hrd[hour_idx] + t_hdvmr[i, None, None] = hdvmr_mnt[month_idx] * hdvmr_wk[day_idx] * hdvmr_hrd[hour_idx] + t_hdvur[i, None, None] = hdvur_mnt[month_idx] * hdvur_wk[day_idx] * hdvur_hrd[hour_idx] + t_ship [i, None, None] = hdvhw_mnt[month_idx] * hdvhw_wk[day_idx] * hdvhw_hrd[hour_idx] + # --- normalize time profiles + t_gas /= nt + t_coal /= nt + t_cons /= nt + t_gls /= nt + t_carhw /= nt + t_carmr /= nt + t_carur /= nt + t_hdvhw /= nt + t_hdvmr /= nt + t_hdvur /= nt + t_ship /= nt + + datepoint = self.startdate + enddate = self.enddate + + t_ind = np.ones((nt, nlat, nlon)) + # Make container (dict) containing all time profiles + time_profiles = {'t_gas': t_gas[indices].astype(np.float32), + 't_coal': t_coal[indices].astype(np.float32), + 't_ind': t_ind[indices].astype(np.float32), + 't_cons': t_cons[indices].astype(np.float32), + 't_gls': t_gls[indices].astype(np.float32), + 't_carhw': t_carhw[indices].astype(np.float32), + 't_carmr': t_carmr[indices].astype(np.float32), + 't_carur': t_carur[indices].astype(np.float32), + 't_hdvhw': t_hdvhw[indices].astype(np.float32), + 't_hdvmr': t_hdvmr[indices].astype(np.float32), + 't_hdvur': t_hdvur[indices].astype(np.float32), + 't_ship': t_ship[indices].astype(np.float32)} + logging.debug('Time profiles created') + return time_profiles + +################### End Class Emission model ################### + + +if __name__ == "__main__": + pass + diff --git a/da/ccffdas/observationoperator.py b/da/ccffdas/observationoperator.py index 2cdfb0ba7adc583e782c7009b3deea332eae4cec..d866d04df57fa7f88ff138c5aff07554eba95e4d 100755 --- a/da/ccffdas/observationoperator.py +++ b/da/ccffdas/observationoperator.py @@ -36,19 +36,22 @@ from multiprocessing import Process, Pool import da.tools.io4 as io import da.tools.rc as rc +import da.tools.sibtools as st from da.tools.general import create_dirs, to_datetime from da.ccffdas.emissionmodel import EmisModel from da.baseclasses.observationoperator import ObservationOperator +#import matplotlib.pyplot as plt + try: # Import memoization. This speeds up the functions a lot. from memoization import cached -except: # The memoization package is not always included. Import the memoization from functools +except: # The memoization package is not always included. Import the memoization from #functools from functools import lru_cache as cached cached = cached() import xlrd -from da.ffdas import category_info +from da.ccffdas import category_info categories = category_info.categories # global constants, which will be used in the following classes identifier = 'WRF-STILT' @@ -86,112 +89,80 @@ epsilon_14CO2_gpp = -36. alpha_14CO2_gpp = 1 + epsilon_14CO2_gpp/1000. # = 0.964 ################### Begin Class STILT ################### -model_settings = rc.read('/projects/0/ctdas/RINGO/inversions/ffdas/exec/da/rc/ffdas/stilt.rc') -recalc_factors = [1, 1000] +recalc_factors = {'CO2': 1, 'CO': 1000} spname = ['CO2', 'CO'] -def run_STILT(footprint, datepoint, site, i_species, path, i_member): + + +def run_STILT(dacycle, footprint, datepoint, species, path, i_member): """This function reads in STILT footprints and returns hourly concentration data Input: footprint: np.array: the footprint of the station datepoint: datepoint: datetime: the datepoint for which the concentrations should be calculated site : str: name of the location - i_species : int: index of the species + species : str: the species i_member : int: index of the ensemblemember Returns: float: the concentration increase at the respective location due to the emissions from the respective species""" - # get the date: - temp_profiles = get_temporal_profiles(datepoint, site, path, i_member) - spatial_emissions = get_spatial_emissions(i_member, i_species, path) + # get the emission data: + spatial_emissions = get_spatial_emissions(dacycle, i_member, species, path, datepoint) # multiply footprint with emissions for each hour in the back trajectory. Indices: Time, Category, lat, lon - foot_flux = (footprint[:, None, :, :] * spatial_emissions[None, :, :, :] * temp_profiles[:, :, None, None]).sum() - concentration_increase = float(recalc_factors[i_species]) * foot_flux - + if dacycle.dasystem['cat.sum_emissions']: + foot_flux = (footprint * spatial_emissions).sum() + else: + foot_flux = (footprint[:, None, :, :] * spatial_emissions[:, :, :, :]).sum() + concentration_increase = float(recalc_factors[species]) * foot_flux return concentration_increase @cached -def get_temporal_profiles(datepoint, station_name, path, i_member): - """ Function that reads in the temporal profiles for the current timestep - Input: - datepoint: datepoint: datetime for which the temporal profile should be found - i_station: int: the index of the location for which the c14 concentration should be calculated - i_member: int: the index of the member for which the simulation is run - Returns: - np.array (2): the time profiles for all categories. Indices: time, category""" - #read temporal profiles for the times within the footprint - time_indices = get_time_indices(datepoint) - temporal_prior = path + 'prior_temporal_{0}_{1:03}.nc'.format(station_name, i_member) - temporal_prior = io.ct_read(temporal_prior, 'read') - temp_profiles = [] - for category, values in categories.items(): - temporal_var_name = values['temporal'] - temporal_variation = temporal_prior.get_variable(temporal_var_name)[time_indices] - temp_profiles.append(temporal_variation) - #temporal_prior.close() - temp_profiles = np.array(temp_profiles) - return temp_profiles.T # Transpose to be similar to spatial data in dimensions - -@cached -def get_spatial_emissions(i_member, i_species, path): +def get_spatial_emissions(dacycle, i_member, species, path, datepoint): """ Function that gets the spatial emissions Input: i_member: int: the index of the member for which the simulation is run - i_species: int: the index of the species for which the simulation is run + species: str: the species for which the simulation is run Returns: np.ndarray (3d): the spatial emissions per category, lat and lon""" #read spatially distributed emissions calculated with the emission model - emisfile = path +'prior_spatial_{0:03d}.nc'.format(i_member) #format: species[SNAP,lon,lat] + backtimes = 24 + int(dacycle['time.cycle']) * 24 # extra hours in backtime + backtimes = dtm.timedelta(hours=backtimes) + start_time = (dacycle['time.sample.end'] - backtimes) + indices = get_time_indices(datepoint, start_time) + + emisfile = path +'prior_spatial_{0:03d}.nc'.format(i_member) #format: species[SNAP, time, lon, lat] f = io.ct_read(emisfile,method='read') - emis=f.get_variable(spname[i_species]) + emis=f.get_variable(species) + emis = emis[indices].astype(np.float32) # index the interesting times f.close() + + #plt.figure() + #plt.imshow(np.log(emis[0, 0, :, :]), cmap='binary') + #plt.savefig('ff.png') return emis + def get_time_index_nc(time=None, startdate=None): """Function that gets the time index from the flux files based on the cycletime and the first time in all the files (hardcoded in stilt.rc) Input: time: datetime.datetime: The time for which the index needs to be found. Default: current time cycle datetime + startdate: datetime: The time at which the data's first index is """ # Get the start date of all cycles - if not startdate: startdate = model_settings['files_startdate'] if isinstance(startdate, str): startdate = dtm.datetime.strptime(startdate, '%Y-%m-%d %H:%M:%S') # Get the difference between the current and the start # Note that this is in hours, and thus assumes that the flux files are hourly as well timediff = time - startdate timediff_hours = int(timediff.total_seconds()/3600) # 1hour could also be softcoded from time.cycle - time_index = int(timediff_hours) + DO_RINGO + time_index = int(timediff_hours) return time_index -@cached(ttl=5) -def get_time_indices(datepoint, startdate=None): +def get_time_indices(datepoint, startdate=None, backtimes=24): """Function that gets the time indices in the flux files Because if the footprint is for 24 hours back, we need the fluxes 24 hours back""" time_index = get_time_index_nc(datepoint, startdate=startdate) - return slice(time_index - int(model_settings['num_backtimes']), time_index) - -def get_biosphere_concentration(foot, gpp_mem, ter_mem): - """Function that calculates the atmospheric increase due to the exchange of fluxes over the footprint - Input: - foot: np.array(times, lat, lon): - the footprint of the station - gpp_mem: np.array(nmmebers, time, lat, lon) - map of the gpp for each member - ter_mem: np.array(nmembers, time, lat, lon) - map of the ter for each member - Returns: - tuple of 2 floats: GPP and TER in umol/s """ - - # First, recalculate to good units - gpp = gpp_mem[:, :, :, :] * (1./MASS_C) * MOL2UMOL * PERHR2PERS - ter = ter_mem[:, :, :, :] * (1./MASS_C) * MOL2UMOL * PERHR2PERS - - # Multiply with the footprint - gpp_increase = - (gpp * foot[None, :, :, :]).sum() - ter_increase = (ter * foot[None, :, :, :]).sum() - - return gpp_increase, ter_increase + return slice(time_index - backtimes, time_index) class STILTObservationOperator(ObservationOperator): def __init__(self, filename, dacycle=None): #only the filename used to specify the location of the stavector file for wrf-stilt runs @@ -226,22 +197,23 @@ class STILTObservationOperator(ObservationOperator): self.bgswitch = int(dacycle.dasystem['obs.bgswitch']) self.bgfile = dacycle.dasystem['obs.background'] self.btime = int(dacycle.dasystem['run.backtime']) - self.categories = categories + + self.categories = categories # Imported from file at top self.inputdir = dacycle.dasystem['inputdir'] - self.paramdict = rc.read('/projects/0/ctdas/RINGO/inversions/Data/paramdict.rc') - biosphere_fluxes = nc.Dataset(dacycle.dasystem['biosphere_fluxdir']) - self.gpp = biosphere_fluxes['GPP'][:]#[time_indices] - self.ter = biosphere_fluxes['TER'][:]#[time_indices] - biosphere_fluxes.close() + self.pftfile = dacycle.dasystem['file.pft'] + + self.lon_start = float(dacycle.dasystem['domain.lon.start']) + self.lon_end = float(dacycle.dasystem['domain.lon.end']) + self.lat_start = float(dacycle.dasystem['domain.lat.start']) + self.lat_end = float(dacycle.dasystem['domain.lat.end']) + + self.domain_shape = (int(dacycle.dasystem['domain.lat.num']), int(dacycle.dasystem['domain.lon.num'])) - with nc.Dataset(self.dacycle.dasystem['country.mask']) as countries: - self.masks = countries['country_mask'][:] - self.country_names = countries['country_names'][:] self.nffparams = int(self.dacycle.dasystem['nffparameters']) self.nbioparams = int(self.dacycle.dasystem['nbioparameters']) - self.noise = {'CO2': 2.2, 'CO': 8, 'C14': 2, 'C14_PSEUDO': 0, 'C14_INTEGRATED': 2, 'C14targeted': 2} - logging.info('Noise is hardcoded to be: {}'.format(self.noise)) + self.do_observations = dacycle.dasystem['make.obs'] + def get_initial_data(self, samples): """Function that loads the initial data to the observation operator""" @@ -263,6 +235,10 @@ class STILTObservationOperator(ObservationOperator): 1 if pseudo_observations are used Returns: None""" + backtimes = 24 + int(self.dacycle['time.cycle']) * 24 # extra hours in backtime + backtimes_dtm = dtm.timedelta(hours=backtimes) + self.start_time = (self.dacycle['time.sample.end'] - backtimes_dtm) + self.indices = get_time_indices(self.dacycle['time.sample.end'], backtimes=backtimes, startdate=self.dacycle.dasystem['files_startdate']) # Define the name of the file that will contain the modeled output of each observation if adv==0: @@ -276,69 +252,15 @@ class STILTObservationOperator(ObservationOperator): self.forecast_nmembers = int(self.dacycle['da.optimizer.nmembers']) # Set the inputdir self.param_inputdir = self.dacycle.dasystem['inputdir'] - #list all observation locations and species - self.lat = [] - self.lon = [] - self.hgt = [] - self.obsnc = [] - self.mmass = [] - self.sitenames=[] - self.recalc_factors = [] - infile = os.path.join(self.obsdir, self.obsfile) - f = open(infile, 'r') - lines = f.readlines() - f.close() - self.spname = [] - - # Parse input file - for line in lines: - if line[0] == '#': continue - else: - ct_, filename, lat, lon, height, species_name, species_mass, recalc_factor, *_ = line.split(',') - two_names = any(x in filename for x in ['UW', 'DW']) - sitename = ('_'.join(filename.split('_')[1:2 + two_names])) - ct = int(ct_) - 1 # Set the counter - self.obsnc.append(filename) - self.sitenames.append(sitename) - if species_name in self.spname: - if species_name == self.spname[0]: - self.lat.append(lat) - self.lon.append(lon) - self.hgt.append(height) - else: - self.spname.append(species_name) - self.mmass.append(species_mass) - self.recalc_factors.append(recalc_factor) - if ct == 0: - self.lat.append(lat) - self.lon.append(lon) - self.hgt.append(height) - - self.temporal_var = [] - for k, v in self.categories.items(): - self.temporal_var.append(v['temporal']) + self.temporal_var = [v['temporal'] for k, v in self.categories.items()] + #set time control variables for this cycle if do_pseudo==0: - self.timestartkey = dtm.datetime(self.dacycle['time.sample.start'].year, - self.dacycle['time.sample.start'].month, - self.dacycle['time.sample.start'].day, - self.dacycle['time.sample.start'].hour, 0) - self.timefinishkey = dtm.datetime(self.dacycle['time.sample.end'].year, - self.dacycle['time.sample.end'].month, - self.dacycle['time.sample.end'].day, - self.dacycle['time.sample.end'].hour, 0) + self.timestartkey = self.dacycle['time.sample.start'] + self.timefinishkey = self.dacycle['time.sample.end'] elif do_pseudo==1: - self.timestartkey = dtm.datetime(self.dacycle['time.fxstart'].year, - self.dacycle['time.fxstart'].month, - self.dacycle['time.fxstart'].day, - self.dacycle['time.fxstart'].hour, 0) - self.timefinishkey = dtm.datetime(self.dacycle['time.finish'].year, - self.dacycle['time.finish'].month, - self.dacycle['time.finish'].day, - self.dacycle['time.finish'].hour, 0) - self.prepare_c14() - - self.run_dynamic_emis_model() + self.timestartkey = self.dacycle['time.fxstart'] + self.timefinishkey = self.dacycle['time.finish'] param_values = [] for mem in range(self.forecast_nmembers): @@ -346,49 +268,116 @@ class STILTObservationOperator(ObservationOperator): param_values.append(ds['parametervalues'][:]) self.param_values = np.asarray(param_values) + logging.info('Running emismodel') + self.run_dynamic_emis_model() + logging.info('Getting biosphere fluxes') + self.get_biosphere_emissions() + logging.info('Preparing biosphere emissions') + self.prepare_biosphere() + logging.info('Preparing C14') + self.prepare_c14() + + def run_dynamic_emis_model(self): """Function that runs the dynamic emission model and writes the spatial emissions and time profiles to nc files based on emismodel.py""" + logging.info('Calculating the emissions; entering Emismodel.py') emismodel = EmisModel() emismodel.setup(self.dacycle) - emismodel.get_emis(self.dacycle, self.allsamples, do_pseudo=0) self.emismodel = emismodel - - def prepare_biosphere(self, datepoint): + emismodel.get_emis(self.dacycle, indices=self.indices) + logging.info('Emissions calculated') + + def get_biosphere_emissions(self): + with nc.Dataset(self.dacycle.dasystem['biosphere_fluxdir']) as biosphere_fluxes: + nee = np.flip(biosphere_fluxes['NEE'][self.indices, :, :].astype(np.float32), axis=1) + self.nee = nee + #plt.figure() + #plt.imshow(nee[0, :, :], cmap='binary') + #plt.savefig('nee.png') + + def prepare_biosphere(self): """Function that prepares the biosphere fluxes on a map Input: datepoint: datetime.datetime: time for which the biosphere fluxes should be prepared Returns: None, writes the biosphere fluxes to self""" + logging.debug('Preparing biosphere') # First, get the time indices - indices = get_time_indices(datepoint) - - # Get the biosphere flux - gpp = self.gpp[indices] - ter = self.ter[indices] + indices = self.indices + + with nc.Dataset(self.pftfile) as ds: + pftdata = np.flipud(ds['pft'][:]).astype(int) + self.pftdata = pftdata + + #plt.figure() + #plt.imshow(pftdata[:, :], cmap='binary') + #plt.savefig('pft.png') + + logging.debug('Starting paint-by-number') + pool = Pool(self.forecast_nmembers) + # Create the function that calculates the conentration + + func = partial(self.paint_by_number, self.nee, self.param_values, + pftdata, self.emismodel.find_in_state, self.average2d, self.domain_shape) + scaled_nees = pool.map(func, list(range(self.forecast_nmembers))) + pool.close() + pool.join() + self.nee_mem = np.asarray(scaled_nees) # Indices: mem, time, *domain_shape - self.gpp_mem = np.zeros((self.forecast_nmembers, *gpp.shape)) - self.ter_mem = np.zeros_like(self.gpp_mem) - for mem in range(self.forecast_nmembers): - # multiply with the statevectorvalues - param_values = self.param_values[mem] - gpp_new = np.zeros_like(gpp) - ter_new = np.zeros_like(ter) - for i, country in enumerate(self.country_names): - self.country = country - country = b''.join(np.array(country)).decode() + logging.debug('Paint-by-number done, biosphere prepared') - country_mask = self.masks[i] - index_in_state = self.emismodel.find_in_state(country.upper(), 'bio', return_index=True) - if index_in_state: param_value = param_values[index_in_state] - else: param_value = 1 - gpp_country = country_mask[None, :, :] * gpp[:, :, :] * param_value - ter_country = country_mask[None, :, :] * ter[:, :, :] * param_value - gpp_new += gpp_country - ter_new += ter_country - self.gpp_mem[mem] = gpp_new - self.ter_mem[mem] = ter_new + self.write_biosphere_fluxes(self.nee_mem[0, self.btime:, :, :]) + + def write_biosphere_fluxes(self, values, qual='prior'): + # Write the biosphere fluxes to a file + bio_emis_file = os.path.join(self.outputdir, 'bio_emissions_{}_{}.nc'.format(qual, self.dacycle['time.sample.start'].strftime('%Y%m%d'))) + f = io.CT_CDF(bio_emis_file, method='create') + dimtime= f.add_dim('time', values.shape[0]) + dimlat = f.add_dim('lat', values.shape[1]) + dimlon = f.add_dim('lon', values.shape[2]) + + savedict = io.std_savedict.copy() + savedict['name'] = 'CO2' + savedict['long_name'] = 'Biosphere CO2 emissions' + savedict['units'] = "micromole/m2/s" + dims = dimtime + dimlat + dimlon + savedict['dims'] = dims + savedict['values'] = values + savedict['dtype'] = 'float' + f.add_data(savedict) + f.close() + + #plt.figure() + #plt.imshow(np.log(self.nee_mem[0, 0, :, :]), cmap='binary') + #plt.savefig('bio.png') + + @staticmethod + def average2d(arr, new_shape): + """ Function to average the paint-by-colour NEE to the domain size""" + shape = (new_shape[0], arr.shape[0] // new_shape[0], + new_shape[1], arr.shape[1] // new_shape[1]) + return arr.reshape(shape).mean(-1).mean(1) + + @staticmethod + def paint_by_number(nee, all_param_values, pftdata, + find_in_state, average2d, domain_shape, member): + + scaled_nees = np.zeros((len(nee), *domain_shape), dtype=np.float32) + all_lutypes = np.unique(pftdata.reshape(-1)) + param_values = all_param_values[member] + newnee = np.zeros_like(nee) + for lutype_ind, lutype in enumerate(all_lutypes): + index_in_state = find_in_state(param_values, 'BIO', str(lutype), return_index=True) + if index_in_state: + param_value = param_values[index_in_state] + else: param_value = 1 + mask = np.where(pftdata == lutype, 1, 0) + newnee += (nee * mask * param_value) + for i, n in enumerate(newnee): + scaled_nees[i] = average2d(n, domain_shape) + return scaled_nees def prepare_c14(self): """Function that loads the nuclear power temporal variation""" @@ -399,45 +388,12 @@ class STILTObservationOperator(ObservationOperator): time_profile_nuc = np.array(worksheet.col_values(19)) self.nuc_time_profile = time_profile_nuc - file = self.model_settings['biosphere_fluxdir'] - dis_eq_flux = self.get_nc_variable(file, 'TER_dC14') - self.dis_eq_flux = dis_eq_flux - + self.dis_eq_flux = self.get_nc_variable(file, 'TER_dC14', np.float32)[self.indices, :, :] file = self.model_settings['nuclear_fluxdir'] - nuc_flux = self.get_nc_variable(file, 'E_14CO2_nuc') - self.nuc_flux = nuc_flux - - ### !!!! Only cache if made dependent on datepoint/datetime !!!! - def get_time_index_nc(self, time=None, startdate=None): - """Function that gets the time index from the flux files - based on the cycletime and the first time in all the files (hardcoded in stilt.rc) - Input: - time: datetime.datetime: The time for which the index needs to be found. Default: current time cycle datetime - """ - if time == None: - # Get the time of the current cycle - time = self.dacycle['time.start'] - # Get the start date of all cycles - if not startdate: startdate = self.model_settings['files_startdate'] - if isinstance(startdate, str): - startdate = dtm.datetime.strptime(startdate, '%Y-%m-%d %H:%M:%S') - # Get the difference between the current and the start - # Note that this is in hours, and thus assumes that the flux files are hourly as well - timediff = time - startdate - timediff_hours = int(timediff.total_seconds()/3600) # 1hour could also be softcoded from time.cycle - time_index = int(timediff_hours) + DO_RINGO - return time_index - - def get_time_indices(self, datepoint, startdate=None): - """Function that gets the time indices in the flux files - Because if the footprint is for 24 hours back, we need the fluxes 24 hours back""" + self.nuc_flux = self.get_nc_variable(file, 'E_14CO2_nuc', np.float32) - time_index = self.get_time_index_nc(datepoint, startdate=startdate) - return slice(time_index - int(self.model_settings['num_backtimes']), time_index) - - # @cached def get_foot(self, site, datepoint): """Function that gets the footprint for the current time and site. Returns a 3D np.array with dims (time, lat, lon) @@ -457,15 +413,20 @@ class STILTObservationOperator(ObservationOperator): f = glob.glob(fname)[0] footprint = nc.Dataset(f)['foot'] + #plt.figure() + #plt.imshow(np.log(footprint[0, :, :]), cmap='binary') + #plt.title(site) + #plt.savefig('foot.png') + + # Flip, because the times are negative - return np.flipud(footprint) + return np.flipud(footprint).astype(np.float32) - #@cached - def get_background(self, i_species, site, datepoint): + def get_background(self, species, site, datepoint): """Function that finds the center of mass of the first footprint and the time corresponding to it. and finds the concentration in the center of mass. This is used as the background. Input: - i_species: int: the index of the species for which the background should be found + species: str: the species for which the background should be found i_loc : int: the index of the locatin for which the background should be found datepoint: datetime.datetime: the datetime of the background concentration Returns: @@ -485,7 +446,7 @@ class STILTObservationOperator(ObservationOperator): center_of_mass = ndimage.measurements.center_of_mass(fp[start_influence]) center_of_mass = np.array(np.rint(center_of_mass), dtype=int) - species_name = self.spname[i_species] + species_name = self.spname[species] index = self.get_time_index_nc() - (int(self.model_settings['num_backtimes']) - start_influence) @@ -499,9 +460,7 @@ class STILTObservationOperator(ObservationOperator): def get_background_orig(self, species): """Function that finds the background concentration, non-time dependent and hard-coded. Input: - i_species: int: the index of the species for which the background should be found - i_loc : int: the index of the locatin for which the background should be found - datepoint: datetime for which the background concentration should be found. + species: the species for which the background should be found Returns: float: background concentration """ @@ -509,33 +468,23 @@ class STILTObservationOperator(ObservationOperator): backgrounds = {'CO2': 406.15, 'CO': 127.2, 'C14': 0} return backgrounds[species] - def get_biosphere_concentration(self, foot, gpp_mem, ter_mem): + def get_biosphere_concentration(self, foot, nee_mem, datepoint): """Function that calculates the atmospheric increase due to the exchange of fluxes over the footprint Input: i_member: int: member number for which the biosphere concentration should be calculated site: str: location for which the concentration should be calculated - datepoint: datetime.datetime: the datepoint for which the concentration should be calculated total: bool, optional: Whether to returned summed values or gpp and ter seperately as tuple Returns: if total == True: float: The total biosphere flux in umol/s else: tuple of 2 floats: GPP and TER in umol/s """ - - # First, get the footprint - gpp = gpp_mem[:, :, :, :] * (1./MASS_C) * MOL2UMOL * PERHR2PERS - ter = ter_mem[:, :, :, :] * (1./MASS_C) * MOL2UMOL * PERHR2PERS - - gpp_increase = - (gpp * foot[None, :, :, :]).sum(axis=(1,2,3)) - ter_increase = (ter * foot[None, :, :, :]).sum(axis=(1,2,3)) - - # logging.debug('GGP flux = {0:.3}; TER flux = {1:.3}'.format(gpp_increase, ter_increase)) - - return gpp_increase, ter_increase + indices = get_time_indices(datepoint, self.start_time) + nee_increase = (nee_mem[:, indices, :, :] * foot[None, :, :, :]).sum(axis=(1,2,3)) # (1./MASS_C) * MOL2UMOL * PERHR2PERS + return nee_increase -# @cached - def get_c14_concentration(self, datepoint, gpp, ter, ff_flux, background): + def get_c14_concentration(self, datepoint, nee, ff_flux, background): """Function that gets the c14 concentration based on the emissions by the biosphere, fossil fuels and nuclear power. The constants are defined at the top of this script. Input: @@ -548,7 +497,7 @@ class STILTObservationOperator(ObservationOperator): float: The C14 in ppm float: Delta(14C) """ # Get the time indices - indices = get_time_indices(datepoint) + indices = get_time_indices(datepoint, startdate=self.dacycle.dasystem['files_startdate']) # Get the fluxes from the nuclear plants nuclear_time_profile = np.array(self.nuc_time_profile[indices], dtype=float) # Get the fluxes and multiply them by the footprint and time profile @@ -582,8 +531,7 @@ class STILTObservationOperator(ObservationOperator): delta_14CO2 = (Asn * np.exp(lambda_14CO2 * dt_obs) / Aabs -1) * 1000.# per mille return delta_14CO2 -# @cached - def get_nc_variable(self, file, fluxname): + def get_nc_variable(self, file, fluxname, dtype): """Helper function that gets the values from an nc file Input: file: str: filename of the nc file of which the value should be taken @@ -592,14 +540,42 @@ class STILTObservationOperator(ObservationOperator): np.ndarray: the values in the nc file""" data = nc.Dataset(file) fluxmap = data[fluxname][:] + fluxmap = fluxmap.astype(dtype) data.close() return fluxmap - + def run_forecast_model(self,dacycle,do_pseudo,adv): + logging.info('Preparing run...') + if self.do_observations: + self.make_observations(dacycle) self.prepare_run(do_pseudo,adv) + logging.info('Starting to simulate concentrations...') self.run(dacycle,do_pseudo) self.save_data() -# self.run_breakdown() + + + def make_observations(self, dacycle, do_pseudo=0, adv=0): + logging.info('Prepare run...') + logging.info('Making observations, nmembers = 1') + dacycle['da.optimizer.nmembers'] = 1 + self.forecast_nmembers = 1 + self.prepare_run(do_pseudo, adv) + logging.info('Starting to simulate concentrations...') + import pandas as pd + import sys + import copy + all_samples = copy.deepcopy(self.samples) + sites = set([sample.code.split('/')[1].split('_')[1] for sample in all_samples]) + print(sites) + for site in sites: + current_samples = [sample for sample in all_samples if site in sample.code] + self.samples = current_samples + self.run(dacycle, do_pseudo) + times = [sample.xdate for sample in current_samples] + df = pd.DataFrame(self.calculated_concentrations, times, columns=['concentration']) + df.to_csv(site + dtm.datetime.strftime(dacycle['time.sample.start'], "%Y%m%d") + '.csv') + dacycle.finalize(make_obs=True) + sys.exit(0) def run(self, dacycle, do_pseudo): """Function that calculates the simulated concentrations for all samples @@ -609,7 +585,6 @@ class STILTObservationOperator(ObservationOperator): # Initialise a list with the calculated concentrations. To be filled in this function. calculated_concentrations = np.zeros((len(self.samples), self.forecast_nmembers)) calculated_concentrations[:, :] = np.nan - self.calculated_concentrations = calculated_concentrations # Initialise a datepoint that has been done. For now, this is None previously_done_datepoint = None; previous_day = None previously_done_site = None @@ -623,13 +598,6 @@ class STILTObservationOperator(ObservationOperator): self.sample = sample datepoint = sample.xdate - - if datepoint != previously_done_datepoint: - # If it is a new date, we need a new biosphere prior - # Note that this could be optimised because we just - # Shift the time index by one. However, this takes - # Only about half a second - self.prepare_biosphere(datepoint) if datepoint.day != previous_day: logging.debug('Working on year {}, month {}, day {}'.format(datepoint.year, datepoint.month, datepoint.day)) # We are now ready to make a simulation for this datepoint @@ -646,26 +614,21 @@ class STILTObservationOperator(ObservationOperator): self.c14_dict = c14_dict concentrations = self.calc_concentrations(sample) - try: # If there is a concentration returned, add this to the calculated concentrations. - # Otherwise, keep the Nans. - if any(concentrations): calculated_concentrations[i, :] = concentrations - except: pass #calculated_concentrations = np.delete(calculated_concentrations, i, axis=0) + # If there is a concentration returned, add this to the calculated concentrations. + # Otherwise, keep the Nans. + if concentrations is not None: calculated_concentrations[i, :] = concentrations # Set the time and site - previously_done_datepoint = datepoint; previous_day = datepoint.day - previously_done_site = site + previously_done_datepoint = datepoint + previous_day = datepoint.day + previously_done_site = site + # This is how ingrid did it, so I will too... self.mod = np.array(calculated_concentrations) # add the calculated concentrations to the object self.calculated_concentrations = calculated_concentrations - - # Clear the cache from the functions, so that a new cycle does - # (with different parameters) does not find the same results. - get_spatial_emissions.cache_clear() - get_temporal_profiles.cache_clear() - def calc_concentrations(self, sample): """Function that calculates the concentration for a sample and all members. Gets the relevant information from the sample and then calculates the concentration @@ -680,7 +643,6 @@ class STILTObservationOperator(ObservationOperator): # The species, the sitename, the datepoint # Note that these could also be passed from the function that calls this function species = sample.species[:3] - i_species = self.spname.index(species.upper()) datepoint = sample.xdate # logging.debug('{} {}'.format(datepoint, sample.species)) two_names = any(x in sample.code for x in ['UW', 'DW']) @@ -690,7 +652,9 @@ class STILTObservationOperator(ObservationOperator): background = self.get_background_orig(species.upper()) # First, add noise (this represents errors in the transport model - noise = np.random.normal(0, self.noise[sample.species.upper()]) + if self.do_observations: + noise = 0 + noise = np.random.normal(0, sample.mdm) # Some different cases for different species. # First case: Species is not c14: @@ -698,7 +662,7 @@ class STILTObservationOperator(ObservationOperator): if not 'C14' in species.upper(): pool = Pool(self.forecast_nmembers) # Create the function that calculates the conentration - func = partial(run_STILT, self.foot, datepoint, site, i_species, self.inputdir) + func = partial(run_STILT, self.dacycle, self.foot, datepoint, species, self.inputdir) # We need to run over all members memberlist = list(range(0, self.forecast_nmembers)) @@ -713,10 +677,9 @@ class STILTObservationOperator(ObservationOperator): # Initialise a dictionary that holds the biosphere and FF fluxes from this time and site. if not datepoint in self.c14_dict[site].keys(): self.c14_dict[site][datepoint] = {} - bio_fluxes = self.get_biosphere_concentration(self.foot, self.gpp_mem, self.ter_mem) - nee = np.array(sum(bio_fluxes)) - - self.c14_dict[site][datepoint]['bio'] = bio_fluxes[:] + nee = self.get_biosphere_concentration(self.foot, self.nee_mem, datepoint) + + self.c14_dict[site][datepoint]['bio'] = nee[:] self.c14_dict[site][datepoint]['ff'] = ff_increase[:] # If not the species is CO2, the NEE is 0. @@ -736,9 +699,9 @@ class STILTObservationOperator(ObservationOperator): # If the CO2 concentrations are not yet calculated for this sample: # We calculate these fluxes. First the biosphere self.c14_dict[site][sample.xdate] = {} - bio_fluxes = self.get_biosphere_concentration(self.foot, self.gpp_mem, self.ter_mem) + nee = self.get_biosphere_concentration(self.foot, self.nee_mem, datepoint) - self.c14_dict[site][datepoint]['bio'] = bio_fluxes[:] + self.c14_dict[site][datepoint]['nee'] = nee[:] # Pretend the sample is a CO2 sample, so that the FF increase is calculated sample.species = 'CO2' @@ -777,10 +740,9 @@ class STILTObservationOperator(ObservationOperator): _ = self.calc_concentrations(sample) sample.species = 'C14' for mem in range(self.forecast_nmembers): - gpp = self.c14_dict[site][datepoint]['bio'][0][mem] - ter = self.c14_dict[site][datepoint]['bio'][1][mem] + nee = self.c14_dict[site][datepoint]['nee'][mem] ff_flux = self.c14_dict[site][datepoint]['ff'][mem] - c14concentrations[mem] = self.get_c14_concentration(datepoint, gpp, ter, ff_flux, self.get_background_orig('CO2')) + c14concentrations[mem] = self.get_c14_concentration(datepoint, nee, ff_flux, self.get_background_orig('CO2')) return c14concentrations + noise @@ -791,12 +753,11 @@ class STILTObservationOperator(ObservationOperator): def save_data(self): """ Write the data that is needed for a restart or recovery of the Observation Operator to the save directory """ - - import da.tools.io4 as io - + # Create a new file f = io.CT_CDF(self.simulated_file, method='create') logging.debug('Creating new simulated observation file in ObservationOperator (%s)' % self.simulated_file) + # Save the id of the observation dimid = f.createDimension('obs_num', size=None) dimid = ('obs_num',) savedict = io.std_savedict.copy() @@ -808,6 +769,7 @@ class STILTObservationOperator(ObservationOperator): savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." f.add_data(savedict,nsets=0) + # Save the simulated mole fraction dimmember = f.createDimension('nmembers', size=self.forecast_nmembers) dimmember = ('nmembers',) savedict = io.std_savedict.copy() @@ -825,91 +787,9 @@ class STILTObservationOperator(ObservationOperator): for i,data in enumerate(zip(ids,self.mod)): f.variables['obs_num'][i] = data[0] f.variables['model'][i,:] = data[1] - dum=f.variables['model'][:] f.close() f_in.close() - def save_obs(self): - """Write pseudo-observations to file""" - - ct1=0 - for k in range(self.nrloc*self.nrspc): - newfile = os.path.join(self.obsdir,self.obsnc[k]+'.nc') - f = io.CT_CDF(newfile, method='create') - logging.debug('Creating new pseudo observation file in ObservationOperator (%s)' % newfile) - - dimid = f.add_dim('Time',len(self.datelist)) - ln = len(self.datelist) - str19 = f.add_dim('strlen',19) - str3 = f.add_dim('strlen2',3) - - data=[] - for it,t in enumerate(self.datelist): - data.append(list(t.isoformat().replace('T','_'))) - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "Times" - savedict['units'] = "" - savedict['dims'] = dimid + str19 - savedict['values'] = data - f.add_data(savedict) - - data = ln*[self.lat[int(k/self.nrspc)]] - savedict = io.std_savedict.copy() - savedict['name'] = "lat" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data - f.add_data(savedict) - - data = ln*[self.lon[int(k/self.nrspc)]] - savedict = io.std_savedict.copy() - savedict['name'] = "lon" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data - f.add_data(savedict) - - data = ln*[self.hgt[int(k/self.nrspc)]] - savedict = io.std_savedict.copy() - savedict['name'] = "alt" - savedict['units'] = "m above ground" - savedict['dims'] = dimid - savedict['values'] = data - f.add_data(savedict) - - savedict = io.std_savedict.copy() - savedict['name'] = "obs" - savedict['units'] = "ppm or ppb" - savedict['dims'] = dimid - savedict['values'] = self.mod_prior[ct1:ct1+ln] - f.add_data(savedict) - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "species" - savedict['units'] = "observed species" - savedict['dims'] = dimid + str3 - savedict['values'] = self.sps[ct1:ct1+ln] - f.add_data(savedict) - - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "ids" - savedict['units'] = "unique observation identification number" - savedict['dims'] = dimid - savedict['values'] = self.ids[ct1:ct1+ln] - f.add_data(savedict) - - - f.close() - - ct1 += ln - - logging.debug("Successfully wrote data to obs file %s"%newfile) - ################### End Class STILT ################### diff --git a/da/ccffdas/pipeline.py b/da/ccffdas/pipeline.py index d8d8e04dbd3f02a1c41604ebaa32fe406e52c6b5..89f1cc1a6181f33df2a3caa95e489d2b93087236 100755 --- a/da/ccffdas/pipeline.py +++ b/da/ccffdas/pipeline.py @@ -16,6 +16,7 @@ import os import sys import datetime import copy +import numpy as np header = '\n\n *************************************** ' footer = ' *************************************** \n ' @@ -32,7 +33,7 @@ def ensemble_smoother_pipeline(dacycle, platform, dasystem, samples, statevector invert(dacycle, statevector, optimizer, obsoperator) - advance(dacycle, samples, statevector, obsoperator) + advance(dacycle, samples, statevector, obsoperator, optimizer) save_and_submit(dacycle, statevector) logging.info("Cycle finished...exiting pipeline") @@ -115,7 +116,7 @@ def forward_pipeline(dacycle, platform, dasystem, samples, statevector, obsopera # Finally, we run forward with these parameters - advance(dacycle, samples, statevector, obsoperator) + advance(dacycle, samples, statevector, obsoperator, optimizer) # In save_and_submit, the posterior statevector will be added to the savestate.nc file, and it is added to the copy list. # This way, we have both the prior and posterior data from another run copied into this assimilation, for later analysis. @@ -400,31 +401,41 @@ def invert(dacycle, statevector, optimizer, obsoperator): -def advance(dacycle, samples, statevector, obsoperator): +def advance(dacycle, samples, statevector, obsoperator, optimizer): """ Advance the filter state to the next step """ # This is the advance of the modeled CO2 state. Optionally, routines can be added to advance the state vector (mean+covariance) # Then, restore model state from the start of the filter logging.info(header + "starting advance" + footer) - logging.info("Sampling model will be run over 1 cycle") - - obsoperator.get_initial_data(samples) - - sample_step(dacycle, samples, statevector, obsoperator, 0, True) - - dacycle.restart_filelist.extend(obsoperator.restart_filelist) - dacycle.output_filelist.extend(obsoperator.output_filelist) - logging.debug("Appended ObsOperator restart and output file lists to dacycle for collection ") - - dacycle.output_filelist.append(dacycle['ObsOperator.inputfile']) - logging.debug("Appended Observation filename to dacycle for collection: %s"%(dacycle['ObsOperator.inputfile'])) - - sampling_coords_file = os.path.join(dacycle['dir.input'], 'sample_coordinates_%s.nc' % dacycle['time.sample.stamp']) - if os.path.exists(sampling_coords_file): - outfile = os.path.join(dacycle['dir.output'], 'sample_auxiliary_%s.nc' % dacycle['time.sample.stamp']) - samples.write_sample_auxiliary(outfile, obsoperator.simulated_file) - else: logging.warning("Sample auxiliary output not written, because input file does not exist (no samples found in obspack)") + optimised_nee = obsoperator.paint_by_number(obsoperator.nee, [optimizer.x], obsoperator.pftdata, + obsoperator.emismodel.find_in_state, obsoperator.average2d, obsoperator.domain_shape, 0) + true_nee = obsoperator.paint_by_number(obsoperator.nee, [np.ones_like(optimizer.x)], obsoperator.pftdata, + obsoperator.emismodel.find_in_state, obsoperator.average2d, obsoperator.domain_shape, 0) + obsoperator.write_biosphere_fluxes(optimised_nee[obsoperator.btime:], qual='optimised') + obsoperator.write_biosphere_fluxes(true_nee[obsoperator.btime:], qual='true') + + time_profiles = obsoperator.emismodel.make_time_profiles(obsoperator.indices) + obsoperator.emismodel.get_emissions(dacycle, time_profiles, member='optimised') + obsoperator.emismodel.get_emissions(dacycle, time_profiles, member='true') + # logging.info("Sampling model will be run over 1 cycle") + + # obsoperator.get_initial_data(samples) + + # sample_step(dacycle, samples, statevector, obsoperator, 0, True) + + # dacycle.restart_filelist.extend(obsoperator.restart_filelist) + # dacycle.output_filelist.extend(obsoperator.output_filelist) + # logging.debug("Appended ObsOperator restart and output file lists to dacycle for collection ") + # + # dacycle.output_filelist.append(dacycle['ObsOperator.inputfile']) + # logging.debug("Appended Observation filename to dacycle for collection: %s"%(dacycle['ObsOperator.inputfile'])) + + # sampling_coords_file = os.path.join(dacycle['dir.input'], 'sample_coordinates_%s.nc' % dacycle['time.sample.stamp']) + # if os.path.exists(sampling_coords_file): + # outfile = os.path.join(dacycle['dir.output'], 'sample_auxiliary_%s.nc' % dacycle['time.sample.stamp']) + # samples.write_sample_auxiliary(outfile, obsoperator.simulated_file) + # else: logging.warning("Sample auxiliary output not written, because input file does not exist (no samples found in obspack)") def save_and_submit(dacycle, statevector): """ Save the model state and submit the next job """ diff --git a/da/ccffdas/statevector.py b/da/ccffdas/statevector.py index 709d6e561e428fbdee887fe8250f4d38c2a529b4..470b07f8a97181f8dbf9c997c52cf706184f8c99 100755 --- a/da/ccffdas/statevector.py +++ b/da/ccffdas/statevector.py @@ -74,11 +74,6 @@ class CO2StateVector(StateVector): # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. self.ensemble_members = [[] for n in range(self.nlag)] -# self.ensemble_members = list(range(self.nlag)) -# -# for n in range(self.nlag): -# self.ensemble_members[n] = [] - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. @@ -88,13 +83,13 @@ class CO2StateVector(StateVector): # Create a dictionary for state <-> gridded map conversions - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) > 0: - self.griddict[r] = sel - +# nparams = self.gridmap.max() +# self.griddict = {} +# for r in range(1, int(nparams) + 1): +# sel = (self.gridmap.flat == r).nonzero() +# if len(sel[0]) > 0: +# self.griddict[r] = sel +# # biosphere_fluxes = nc.Dataset(dacycle.dasystem['biosphere_fluxdir']) # self.gpp = biosphere_fluxes['GPP']#[time_indices] @@ -233,7 +228,10 @@ class CO2StateVector(StateVector): if self.prop == 1 or dacycle['time.restart']==False: file=os.path.join(self.obsdir,self.pparam) f = io.ct_read(file, 'read') - prmval = f.get_variable('prior_values')[:self.nparams] + if dacycle.dasystem['make.obs']: + prmval = f.get_variable('true_values')[:self.nparams] + else: + prmval = f.get_variable('prior_values')[:self.nparams] f.close() # 2: Propagate mean else: @@ -290,7 +288,10 @@ class CO2StateVector(StateVector): if self.prop == 1 or dacycle['time.restart']==False: file=os.path.join(self.obsdir,self.pparam) f = io.ct_read(file, 'read') - prmval = f.get_variable('prior_values')[:self.nparams] + if dacycle.dasystem['make.obs']: + prmval = f.get_variable('true_values')[:self.nparams] + else: + prmval = f.get_variable('prior_values')[:self.nparams] f.close() else: prmval = self.prmval diff --git a/da/ccffdas/stilt-ops_urbanall.rc b/da/ccffdas/stilt-ops_urbanall.rc index f219a7bd86c023231aeb8357f6115b717017d1ee..bbbf3321cfb0f6f8e3ac8d2f57a07981797aa245 100755 --- a/da/ccffdas/stilt-ops_urbanall.rc +++ b/da/ccffdas/stilt-ops_urbanall.rc @@ -1,30 +1,48 @@ !!! Info for the CarbonTracker data assimilation system -basepath : /projects/0/ctdas/RINGO/inversions/ +basepath : /projects/0/ctdas/awoude/develop/ name : -strategy : CO2C14 -datadir : ${basepath}/Data -inputdir : ${basepath}/${name}${strategy}/input/ -outputdir : ${basepath}/${name}${strategy}/output/ -restartdir : ${basepath}/${name}${strategy}/restart/ +datadir : /projects/0/ctdas/RINGO/inversions/Data +strategy : Paint1c +datadir : /projects/0/ctdas/RINGO/inversions/Data/ +inputdir : ${basepath}/input/ +outputdir : ${basepath}/output/ +restartdir : ${basepath}/restart/ ! list of all observation sites obs.input.id : obsfiles.csv ! number of observation sites included; number of species included and to be used in inversion obs.input.nr : 100 -obs.spec.nr : 2 +obs.spec.nr : 1 obs.dir : obsfiles${strategy} do.co : 0 do.c14integrated: 0 do.c14targeted: 0 -obs.spec.name : CO2,CO +obs.spec.name : CO2 ! number of emission categories defined in the emission model -obs.cat.nr : 14 +obs.cat.nr : 10 +! Flag to calculate the summed (=total) emission, or specific per category +cat.sum_emissions: False +! Flag to create observations: +make.obs : False ! For Rdam obs -obs.sites.rc : ${datadir}/sites_weights.rc +obs.sites.rc : ${datadir}/sites_weights2.rc ! number of parameters ! In the covmatrix and statevector, the ff parameters are first, then the bio parameters! -nffparameters : 16 -nbioparameters : 4 -nparameters : 20 +nffparameters : 24 +nbioparameters : 4 +nparameters : ${nffparameters} + ${nbioparameters} + +file.pft : ${datadir}/PFT_highres.nc +file.timeprofs : ${datadir}/CAMS_TEMPO_Tprof_subset.nc + +! Settings for the domain: +domain.lon.start : -5.95 +domain.lon.end : 19.95 +domain.lat.start : 43.025 +domain.lat.end : 54.975 +domain.lon.num : 260 +domain.lat.num : 240 + +paramdict : ${datadir}/paramdict.rc ! set fixed seed for random number generator, or use 0 if you want to use any random seed random.seed : 4385 !file with prior estimate of scaling factors (statevector) and covariances @@ -43,8 +61,6 @@ obs.background : ${datadir}/background.nc ! input data for emission model emis.input.spatial : spatial_data.nc -emis.input.tempobs : time_profiles_stations.nc -emis.input.tempprior : time_profiles_stations.nc ! Area of the gridboxes area.file : ${datadir}/area.nc @@ -56,7 +72,8 @@ run.obsflag : 0 ! back trajectory time of STILT footprints, also applied to OPS (in hours) run.backtime : 24 -biosphere_fluxdir : /projects/0/ctdas/RINGO/EmissionInventories/True/RINGO_ORCHIDEE_GPP_TER_dC14_old.nc +biosphere_fluxdir : ${datadir}/SiBNEE.2016.01-2016.02_painted.nc +!biosphere_fluxdir : ${datadir}/SiBfile.nc files_startdate : 2016-01-01 00:00:00 ! choose propagation scheme: diff --git a/da/co2gridded/__pycache__/__init__.cpython-37.pyc b/da/co2gridded/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index e0c3863125f421a32123122adf70032592bbb474..0000000000000000000000000000000000000000 Binary files a/da/co2gridded/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/co2gridded/__pycache__/statevector.cpython-37.pyc b/da/co2gridded/__pycache__/statevector.cpython-37.pyc deleted file mode 100644 index 0ac5d4dce91a51e310af5dfbbc249be6c3276f05..0000000000000000000000000000000000000000 Binary files a/da/co2gridded/__pycache__/statevector.cpython-37.pyc and /dev/null differ diff --git a/da/co2gridded/dasystem.py.bak b/da/co2gridded/dasystem.py.bak deleted file mode 100755 index 1ca8f752760c8ab88e5f72ea2ac2e1a8b0517cd0..0000000000000000000000000000000000000000 --- a/da/co2gridded/dasystem.py.bak +++ /dev/null @@ -1,75 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# control.py - -""" -Author : peters - -Revision History: -File created on 26 Aug 2010. - -""" - -import logging - - -################### Begin Class CtDaSystem ################### - -from da.baseclasses.dasystem import DaSystem - -class CO2GriddedDaSystem(DaSystem): - """ Information on the data assimilation system used. This is normally an rc-file with settings. - """ - - def __init__(self, rcfilename): - """ - Initialization occurs from passed rc-file name, items in the rc-file will be added - to the dictionary - """ - - self.ID = 'CarbonTracker Gridded CO2' # the identifier gives the platform name - self.load_rc(rcfilename) - - logging.debug('Data Assimilation System initialized: %s' % self.ID) - - def validate(self): - """ - validate the contents of the rc-file given a dictionary of required keys - """ - - needed_rc_items = ['obs.input.dir', - 'obs.input.fname', - 'ocn.covariance', - 'nparameters', - 'deltaco2.prefix', - 'regtype'] - - - for k, v in self.items(): - if v == 'True' : self[k] = True - if v == 'False': self[k] = False - - for key in needed_rc_items: - if key not in self: - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - raise IOError(msg) - - logging.debug('DA System Info settings have been validated succesfully') - -################### End Class CtDaSystem ################### - - -if __name__ == "__main__": - pass diff --git a/da/doc/source/conf.py.bak b/da/doc/source/conf.py.bak deleted file mode 100644 index 9235dc527cde5492aa6c2ce630293fbccb7cc6f0..0000000000000000000000000000000000000000 --- a/da/doc/source/conf.py.bak +++ /dev/null @@ -1,229 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -# -*- coding: utf-8 -*- -# -# CarbonTracker Data Assimilation Shell documentation build configuration file, created by -# sphinx-quickstart on Sun Sep 26 13:39:23 2010. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys, os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(),'../../../'))) -print sys.path - -# -- General configuration ----------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'CarbonTracker Data Assimilation Shell' -copyright = u'2010, Wouter Peters' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '0.1' -# The full version, including alpha/beta/rc tags. -release = '0.1' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinxdoc' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# "<project> v<release> documentation". -html_title = 'CarbonTracker DAS' - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = './images/carbontracker.png' - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a <link> tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'CarbonTrackerDataAssimilationShelldoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'CarbonTrackerDataAssimilationShell.tex', u'CarbonTracker Data Assimilation Shell Documentation', - u'Wouter Peters', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output -------------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'carbontrackerdataassimilationshell', u'CarbonTracker Data Assimilation Shell Documentation', - [u'Wouter Peters'], 1) -] diff --git a/da/methane/analysis/expand_fluxes.py.bak b/da/methane/analysis/expand_fluxes.py.bak deleted file mode 100755 index 15d12d120dd62a747a747013de450da161c55dda..0000000000000000000000000000000000000000 --- a/da/methane/analysis/expand_fluxes.py.bak +++ /dev/null @@ -1,1112 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# expand_fluxes.py -import sys -sys.path.append('../../') -import os -from datetime import datetime, timedelta - -import logging -import numpy as np -from da.tools.general import date2num, num2date -import da.methane.io4 as io -from da.methane.analysis.tools_regions import globarea -from da.methane.tools import state_to_grid -from da.tools.general import create_dirs -from da.analysis.tools_country import countryinfo # needed here -from da.methane.analysis.tools_transcom import transcommask, ExtendedTCRegions - - -import da.methane.analysis.tools_transcom as tc -import da.analysis.tools_country as ct -import da.analysis.tools_time as timetools - - - -""" -Author: Wouter Peters (Wouter.Peters@noaa.gov) - -Revision History: -File created on 21 Ocotber 2008. - -""" - -def proceed_dialog(txt, yes=['y', 'yes'], all=['a', 'all', 'yes-to-all']): - """ function to ask whether to proceed or not """ - response = raw_input(txt) - if response.lower() in yes: - return 1 - if response.lower() in all: - return 2 - return 0 - -def save_weekly_avg_1x1_data(dacycle, statevector): - """ - Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Create or open NetCDF output file -# - logging.debug("Create NetCDF output file: flux_1x1.%s.nc" % startdate.strftime('%Y-%m-%d')) - saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) - ncf = io.CT_CDF(saveas, 'write') - -# -# Create dimensions and lat/lon grid -# - dimgrid = ncf.add_latlon_dim() - dimensemble = ncf.add_dim('members', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux'])) - term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - file.close() - - next = ncf.inq_unlimlen()[0] - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - - if prior: - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = enddate - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=n) - -# Replace the mean statevector by all ones (assumed priors) - - gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,)) - - logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n)) - break - else: - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename, qual=qual_short) - gridmean, gridensemble = statevector.state_to_grid(lag=1) - - logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - #print gridensemble.shape, bio.shape, gridmean.shape - biomapped = bio * gridmean - fossilmapped = fossil * gridmean - biovarmapped = bio * gridensemble - fossilvarmapped = fossil * gridensemble - -# -# -# For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write -# - savedict = ncf.standard_var(varname='bio_flux_' + qual_short) - savedict['values'] = biomapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='fossil_flux_' + qual_short) - savedict['values'] = fossilmapped.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - #print biovarmapped.shape - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - savedict['values'] = biovarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short) - savedict['values'] = fossilvarmapped.tolist() - savedict['dims'] = dimdate + dimensemble + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - # End prior/posterior block - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = fire.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='ocn_flux_imp') - savedict['values'] = ocean.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname='term_flux_imp') - savedict['values'] = term.tolist() - savedict['dims'] = dimdate + dimgrid - savedict['count'] = next - ncf.add_data(savedict) - - #area = globarea() - #savedict = ncf.standard_var(varname='cell_area') - #savedict['values'] = area.tolist() - #savedict['dims'] = dimgrid - #ncf.add_data(savedict) -# - savedict = ncf.standard_var(varname='date') - savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0 - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Gridded weekly average fluxes now written") - - return saveas - -def save_weekly_avg_state_data(dacycle, statevector): - """ - Function creates a NetCDF file with output for all parameters. It uses the flux data written by the - :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and - variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - """ - - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - nlag = statevector.nlag - - area = globarea() - vectorarea = statevector.grid2vector(griddata=area, method='sum') - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - -# -# Create or open NetCDF output file -# - saveas = os.path.join(dirname, 'statefluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - -# -# Create dimensions and lat/lon grid -# - dimregs = ncf.add_dim('nparameters', statevector.nparams) - dimmembers = ncf.add_dim('nmembers', statevector.nmembers) - dimdate = ncf.add_date_dim() -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker fluxes') - setattr(ncf, 'node_offset', 1) -# -# skip dataset if already in file -# - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - next = ncf.inq_unlimlen()[0] - -# -# if not, process this cycle. Start by getting flux input data from CTDAS -# - filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H'))) - - file = io.ct_read(filename, 'read') - bio = np.array(file.get_variable(dacycle.dasystem['background.ch4.bio.flux'])) - ocean = np.array(file.get_variable(dacycle.dasystem['background.ch4.ocean.flux'])) - fire = np.array(file.get_variable(dacycle.dasystem['background.ch4.fires.flux'])) - fossil = np.array(file.get_variable(dacycle.dasystem['background.ch4.fossil.flux'])) - term = np.array(file.get_variable(dacycle.dasystem['background.ch4.term.flux'])) - #mapped_parameters = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1'])) - file.close() - - next = ncf.inq_unlimlen()[0] - - vectorbio = statevector.grid2vector(griddata=bio * area, method='sum') - vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum') - vectorfire = statevector.grid2vector(griddata=fire * area, method='sum') - vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum') - vectorterm = statevector.grid2vector(griddata=term * area, method='sum') - - -# Start adding datasets from here on, both prior and posterior datasets for bio and ocn - - for prior in [True, False]: -# -# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step -# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date.. -# - if prior: - qual_short = 'prior' - for n in range(nlag, 0, -1): - priordate = enddate - timedelta(dt.days * n) - savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d')) - filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d')) - if os.path.exists(filename): - statevector.read_from_file(filename, qual=qual_short) -# Replace the mean statevector by all ones (assumed priors) - statemean = np.ones((statevector.nparams,)) - choicelag = n - logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag)) - break - else: - qual_short = 'opt' - savedir = dacycle['dir.output'] - filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d')) - statevector.read_from_file(filename) - choicelag = 1 - statemean = statevector.ensemble_members[choicelag - 1][0].param_values - logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag)) -# -# if prior, do not multiply fluxes with parameters, otherwise do -# - data = statemean * vectorbio # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - members = statevector.ensemble_members[choicelag - 1] - deviations = np.array([mem.param_values * vectorbio for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short) - - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'bio_flux_%s_std' % qual_short - savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = statemean * vectorfossil # units of mole region-1 s-1 - - savedict = ncf.standard_var(varname='fossil_flux_%s' % qual_short) - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - -# -# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to -# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the -# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want. -# -# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values -# which are assumed 1.0 in the prior always. -# - - deviations = np.array([mem.param_values * vectorfossil for mem in members]) - deviations = deviations - deviations[0, :] - - savedict = ncf.standard_var(varname='fossil_flux_%s_ensemble' % qual_short) - savedict['values'] = deviations.tolist() - savedict['dims'] = dimdate + dimmembers + dimregs - savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'fossil_flux_%s_std' % qual_short - savedict['long_name'] = 'Fossil flux standard deviation, %s' % qual_short - savedict['values'] = deviations.std(axis=0) - savedict['dims'] = dimdate + dimregs - savedict['comment'] = "This is the standard deviation on each parameter" - savedict['units'] = "mol region-1 s-1" - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorfire - - savedict = ncf.standard_var(varname='fire_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorterm - - savedict = ncf.standard_var(varname='term_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - data = vectorocn - - savedict = ncf.standard_var(varname='ocn_flux_imp') - savedict['values'] = data - savedict['dims'] = dimdate + dimregs - savedict['count'] = next - ncf.add_data(savedict) - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = next - ncf.add_data(savedict) - - sys.stdout.write('.') - sys.stdout.flush() -# -# Done, close the new NetCDF file -# - ncf.close() -# -# Return the full name of the NetCDF file so it can be processed by the next routine -# - logging.info("Vector weekly average fluxes now written") - - return saveas - - -def save_weekly_avg_tc_data(dacycle, statevector): - """ - Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the - function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected - onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param statevector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - - This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve - these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete - statevector in units of mol/box/s which we then turn into TC fluxes and covariances. - """ - -# - logging.debug("Analysis data tc weekly") - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - # Write/Create NetCDF output file - # - logging.debug("...write to file: tcfluxes.nc") - saveas = os.path.join(dirname, 'tcfluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimregs = ncf.add_region_dim(type='tc') -# -# set title and tell GMT that we are using "pixel registration" -# - logging.debug("...Set attributes") - setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') - setattr(ncf, 'node_offset', 1) - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - - # Get input data - - logging.debug("...get input data") - area = globarea() - - infile = os.path.join(dacycle['dir.analysis'], 'data_state_weekly', 'statefluxes.nc') - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - logging.debug("...read file") - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making TC fluxes ") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making TC fluxes") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - logging.debug("...add date") - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the flux_1x1 file - - logging.debug("...convert other variables") - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - - logging.debug("......%s" %vname) - data = ncf_in.get_variable(vname)[index] - - if vname in ['latitude','longitude', 'date', 'idate'] or 'std' in vname: - continue - elif 'ensemble' in vname: - tcdata = [] - logging.debug(".........append ensembles") - for member in data: - tcdata.append(statevector.vector2tc(vectordata=member)) - - logging.debug(".........create tcdata") - tcdata = np.array(tcdata) - try: - cov = tcdata.transpose().dot(tcdata) / (statevector.nmembers - 1) - except: - cov = np.dot(tcdata.transpose(), tcdata) / (statevector.nmembers - 1) # Huygens fix - - #print vname,cov.sum() - - tcdata = cov - - logging.debug(".........save") - savedict = ncf.standard_var(varname=vname.replace('ensemble', 'cov')) - savedict['units'] = '[mol/region/s]**2' - savedict['dims'] = dimdate + dimregs + dimregs - - else: - - logging.debug(".........create tcdata") - tcdata = statevector.vector2tc(vectordata=data) # vector to TC - - logging.debug(".........create variables, dimension, units, count and values") - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - savedict['count'] = index - savedict['values'] = tcdata - logging.debug("......save to file") - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("TransCom weekly average fluxes now written") - - return saveas - -def save_weekly_avg_ext_tc_data(dacycle): - """ Function SaveTCDataExt saves surface flux data to NetCDF files for extended TransCom regions - - *** Inputs *** - rundat : a RunInfo object - - *** Outputs *** - NetCDF file containing n-hourly global surface fluxes per TransCom region - - *** Example *** - ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ - - -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly')) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - # Write/Create NetCDF output file - # - saveas = os.path.join(dirname, 'tc_extfluxes.nc') - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimregs = ncf.add_region_dim(type='tc_ext') -# -# set title and tell GMT that we are using "pixel registration" -# - setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes') - setattr(ncf, 'node_offset', 1) - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - infile = os.path.join(dacycle['dir.analysis'], 'data_tc_weekly', 'tcfluxes.nc') - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making extended TC fluxes") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making extended TC fluxes ") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the tcfluxes.nc file - - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - - data = ncf_in.get_variable(vname)[index] - - if vname == 'latitude': continue - elif vname == 'longitude': continue - elif vname == 'date': continue - elif vname == 'idate': continue - elif 'cov' in vname: - - tcdata = ExtendedTCRegions(data, cov=True) - - savedict = ncf.standard_var(varname=vname) - savedict['units'] = '[mol/region/s]**2' - savedict['dims'] = dimdate + dimregs + dimregs - - else: - - tcdata = ExtendedTCRegions(data, cov=False) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - savedict['count'] = index - savedict['values'] = tcdata - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("TransCom weekly average extended fluxes now written") - - return saveas - -def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'): - """ - Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the - function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected - onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`. - - :param dacycle: a :class:`~da.tools.initexit.CycleControl` object - :param StateVector: a :class:`~da.baseclasses.statevector.StateVector` - :rtype: None - - This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve - these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete - statevector in units of mol/box/s which we then turn into TC fluxes and covariances. - """ - -# - dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_%s_weekly' % region_aggregate)) -# -# Some help variables -# - dectime0 = date2num(datetime(2000, 1, 1)) - dt = dacycle['cyclelength'] - startdate = dacycle['time.start'] - enddate = dacycle['time.end'] - ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0 - - logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - - logging.debug("Aggregating 1x1 fluxes to %s totals" % region_aggregate) - - - # Write/Create NetCDF output file - # - saveas = os.path.join(dirname, '%s_fluxes.%s.nc' % (region_aggregate, startdate.strftime('%Y-%m-%d'))) - ncf = io.CT_CDF(saveas, 'write') - dimdate = ncf.add_date_dim() - dimidateformat = ncf.add_date_dim_format() - dimgrid = ncf.add_latlon_dim() # for mask -# -# Select regions to aggregate to -# - - if region_aggregate == "olson": - regionmask = tc.olson240mask - dimname = 'olson' - dimregs = ncf.add_dim(dimname, regionmask.max()) - - regionnames = [] - for i in range(11): - for j in range(19): - regionnames.append("%s_%s" % (tc.transnams[i], tc.olsonnams[j],)) - regionnames.extend(tc.oifnams) - - for i, name in enumerate(regionnames): - lab = 'Aggregate_Region_%03d' % (i + 1,) - setattr(ncf, lab, name) - - elif region_aggregate == "transcom": - regionmask = tc.transcommask - dimname = 'tc' - dimregs = ncf.add_region_dim(type='tc') - - elif region_aggregate == "country": - - countrydict = ct.get_countrydict() - selected = ['Russia', 'Canada', 'China', 'United States', 'EU27', 'Brazil', 'Australia', 'India'] #,'G8','UNFCCC_annex1','UNFCCC_annex2'] - regionmask = np.zeros((180, 360,), 'float') - - for i, name in enumerate(selected): - lab = 'Country_%03d' % (i + 1,) - setattr(ncf, lab, name) - - if name == 'EU27': - namelist = ct.EU27 - elif name == 'EU25': - namelist = ct.EU25 - elif name == 'G8': - namelist = ct.G8 - elif name == 'UNFCCC_annex1': - namelist = ct.annex1 - elif name == 'UNFCCC_annex2': - namelist = ct.annex2 - else: - namelist = [name] - - for countryname in namelist: - try: - country = countrydict[countryname] - regionmask.put(country.gridnr, i + 1) - except: - continue - - dimname = 'country' - dimregs = ncf.add_dim(dimname, regionmask.max()) - - elif region_aggregate == "methane": - regionmask = tc.methane71mask - dimname = 'methaneland' - dimregs = ncf.add_dim(dimname, regionmask.max()) - - regionnames = [] - for i in range(6): - for j in range(16): - regionnames.append("%s_%s" % (tc.transnams[i], tc.methanenams[j],)) - regionnames.extend(tc.oceannams) - - for i, name in enumerate(regionnames): - lab = 'Aggregate_Region_%03d' % (i + 1,) - setattr(ncf, lab, name) - - - # - - skip = ncf.has_date(ncfdate) - if skip: - logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas)) - else: - # - # set title and tell GMT that we are using "pixel registration" - # - setattr(ncf, 'Title', 'CTDAS Aggregated fluxes') - setattr(ncf, 'node_offset', 1) - - savedict = ncf.standard_var('unknown') - savedict['name'] = 'regionmask' - savedict['comment'] = 'numerical mask used to aggregate 1x1 flux fields, each integer 0,...,N is one region aggregated' - savedict['values'] = regionmask.tolist() - savedict['units'] = '-' - savedict['dims'] = dimgrid - savedict['count'] = 0 - ncf.add_data(savedict) - - # Get input data from 1x1 degree flux files - - area = globarea() - - infile = os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly', 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d')) - if not os.path.exists(infile): - logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile) - return None - - ncf_in = io.ct_read(infile, 'read') - - # Transform data one by one - - # Get the date variable, and find index corresponding to the dacycle date - - try: - dates = ncf_in.variables['date'][:] - except KeyError: - logging.error("The variable date cannot be found in the requested input file (%s) " % infile) - logging.error("Please make sure you create gridded fluxes before making TC fluxes ") - raise KeyError - - try: - index = dates.tolist().index(ncfdate) - except ValueError: - logging.error("The requested cycle date is not yet available in file %s " % infile) - logging.error("Please make sure you create state based fluxes before making TC fluxes ") - raise ValueError - - # First add the date for this cycle to the file, this grows the unlimited dimension - - savedict = ncf.standard_var(varname='date') - savedict['values'] = ncfdate - savedict['dims'] = dimdate - savedict['count'] = index - ncf.add_data(savedict) - - # Now convert other variables that were inside the statevector file - - vardict = ncf_in.variables - for vname, vprop in vardict.iteritems(): - if vname == 'latitude': continue - elif vname == 'longitude': continue - elif vname == 'date': continue - elif vname == 'idate': continue - elif 'std' in vname: continue - elif 'ensemble' in vname: - - data = ncf_in.get_variable(vname)[index] - - dimensemble = ncf.add_dim('members', data.shape[0]) - - regiondata = [] - for member in data: - aggdata = state_to_grid(member * area, regionmask, reverse=True, mapname=region_aggregate) - regiondata.append(aggdata) - - regiondata = np.array(regiondata) - - savedict = ncf.standard_var(varname=vname) - savedict['units'] = 'mol/region/s' - savedict['dims'] = dimdate + dimensemble + dimregs - - elif 'flux' in vname: - - data = ncf_in.get_variable(vname)[index] - - regiondata = state_to_grid(data * area, regionmask, reverse=True, mapname=region_aggregate) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - savedict['units'] = 'mol/region/s' - - else: - - data = ncf_in.get_variable(vname)[:] - regiondata = state_to_grid(data, regionmask, reverse=True, mapname=region_aggregate) - - savedict = ncf.standard_var(varname=vname) - savedict['dims'] = dimdate + dimregs - - savedict['count'] = index - savedict['values'] = regiondata - ncf.add_data(savedict) - - ncf_in.close() - ncf.close() - - logging.info("%s aggregated weekly average fluxes now written" % dimname) - - return saveas - -def save_time_avg_data(dacycle, infile, avg='monthly'): - """ Function saves time mean surface flux data to NetCDF files - - *** Inputs *** - rundat : a RunInfo object - - *** Outputs *** - daily NetCDF file containing 1-hourly global surface fluxes at 1x1 degree - - *** Example *** - ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """ - - if 'weekly' in infile: - intime = 'weekly' - if 'monthly' in infile: - intime = 'monthly' - if 'yearly' in infile: - intime = 'yearly' - - dirname, filename = os.path.split(infile) - outdir = create_dirs(os.path.join(dacycle['dir.analysis'], dirname.replace(intime, avg))) - - dectime0 = date2num(datetime(2000, 1, 1)) - -# Create NetCDF output file -# - saveas = os.path.join(outdir, filename) - ncf = io.CT_CDF(saveas, 'create') - dimdate = ncf.add_date_dim() -# -# Open input file specified from the command line -# - if not os.path.exists(infile): - logging.error("Needed input file (%s) not found. Please create this first:" % infile) - logging.error("returning...") - return None - else: - pass - - file = io.ct_read(infile, 'read') - datasets = file.variables.keys() - date = file.get_variable('date') - globatts = file.ncattrs() - - for att in globatts: - attval = file.getncattr(att) - if not att in ncf.ncattrs(): - ncf.setncattr(att, attval) - - - time = [datetime(2000, 1, 1) + timedelta(days=d) for d in date] - -# loop over datasets in infile, skip idate and date as we will make new time axis for the averaged data - - for sds in ['date'] + datasets: - -# get original data - - data = file.get_variable(sds) - varatts = file.variables[sds].ncattrs() - vardims = file.variables[sds].dimensions -# -# Depending on dims of input dataset, create dims for output dataset. Note that we add the new dimdate now. -# - - for d in vardims: - if 'date' in d: - continue - if d in ncf.dimensions.keys(): - pass - else: - dim = ncf.createDimension(d, size=len(file.dimensions[d])) - - savedict = ncf.standard_var(sds) - savedict['name'] = sds - savedict['dims'] = vardims - savedict['units'] = file.variables[sds].units - savedict['long_name'] = file.variables[sds].long_name - savedict['comment'] = file.variables[sds].comment - savedict['standard_name'] = file.variables[sds].standard_name - savedict['count'] = 0 - - if not 'date' in vardims: - savedict['values'] = data - ncf.add_data(savedict) - else: - - if avg == 'monthly': - time_avg, data_avg = timetools.monthly_avg(time, data) - elif avg == 'seasonal': - time_avg, data_avg = timetools.season_avg(time, data) - elif avg == 'yearly': - time_avg, data_avg = timetools.yearly_avg(time, data) - elif avg == 'longterm': - time_avg, data_avg = timetools.longterm_avg(time, data) - time_avg = [time_avg] - data_avg = [data_avg] - else: - raise ValueError, 'Averaging (%s) does not exist' % avg - - count = -1 - for dd, data in zip(time_avg, data_avg): - count = count + 1 - if sds == 'date': - savedict['values'] = date2num(dd) - dectime0 - else: - savedict['values'] = data - savedict['count'] = count - ncf.add_data(savedict, silent=True) - - sys.stdout.write('.') - - sys.stdout.write('\n') - sys.stdout.flush() - -# end NetCDF file access - file.close() - ncf.close() - - logging.info("------------------- Finished time averaging---------------------------------") - - return saveas - -if __name__ == "__main__": - from da.tools.initexit import CycleControl - from da.carbondioxide.dasystem import CO2DaSystem - from da.carbondioxide.statevector import CO2StateVector - - sys.path.append('../../') - - logging.root.setLevel(logging.DEBUG) - - dacycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'}) - dacycle.setup() - dacycle.parse_times() - - dasystem = CO2DaSystem('../rc/carbontracker_ct09_opfnew.rc') - - dacycle.dasystem = dasystem - - statevector = CO2StateVector() - statevector.setup(dacycle) - - while dacycle['time.end'] < dacycle['time.finish']: - save_weekly_avg_1x1_data(dacycle, statevector) - save_weekly_avg_state_data(dacycle, statevector) - save_weekly_avg_tc_data(dacycle, statevector) - save_weekly_avg_ext_tc_data(dacycle) - save_weekly_avg_agg_data(dacycle, region_aggregate='olson') - save_weekly_avg_agg_data(dacycle, region_aggregate='transcom') - save_weekly_avg_agg_data(dacycle, region_aggregate='country') - - dacycle.advance_cycle_times() - - statevector = None # free memory - - sys.exit(0) - diff --git a/da/methane/analysis/tools_regions.py.bak b/da/methane/analysis/tools_regions.py.bak deleted file mode 100755 index 7b48a93a24e764078da8bd8a2b40e8eeabc5758c..0000000000000000000000000000000000000000 --- a/da/methane/analysis/tools_regions.py.bak +++ /dev/null @@ -1,80 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python - -import numpy as np -import cPickle - -def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None): - """ - This method converts parameters from a CarbonTracker StateVector object to a gridded map of linear multiplication values. These - can subsequently be used in the transport model code to multiply/manipulate fluxes - - """ - nregions = regionmap.max() - try: - if not mapname: - raise Exception - - regionselect = cPickle.load(open('%s_regiondict.pickle' % mapname, 'rb')) - except: - - # dictionary for region <-> map conversions - regs = {} - for r in np.arange(1, nregions + 1): - sel = (regionmap.flat == r).nonzero() - if len(sel[0]) > 0: - regs[r] = sel - - regionselect = regs - - cPickle.dump(regionselect, open('%s_regiondict.pickle' % mapname, 'wb'), -1) - print 'Pickling region map' - - if reverse: - """ project 1x1 degree map onto ecoregions """ - - result = np.zeros(nregions, float) - for k, v in regionselect.iteritems(): - if avg: - result[k - 1] = values.ravel().take(v).mean() - else : - result[k - 1] = values.ravel().take(v).sum() - return result - - else: - """ project ecoregion properties onto 1x1 degree map """ - - result = np.zeros((180, 360,), float) - for k, v in regionselect.iteritems(): - result.put(v, values[k - 1]) - - return result - -def globarea(im=360, jm=180, silent=True): - """ Function calculates the surface area according to TM5 definitions""" - - radius = 6.371e6 # the earth radius in meters - deg2rad = np.pi / 180. - g = 9.80665 - - dxx = 360.0 / im * deg2rad - dyy = 180.0 / jm * deg2rad - lat = np.arange(-90 * deg2rad, 90 * deg2rad, dyy) - dxy = dxx * (np.sin(lat + dyy) - np.sin(lat)) * radius ** 2 - area = np.resize(np.repeat(dxy, im, axis=0) , [jm, im]) - if not silent: - print 'total area of field = ', np.sum(area.flat) - print 'total earth area = ', 4 * np.pi * radius ** 2 - return area - diff --git a/da/methane/dasystem.py.bak b/da/methane/dasystem.py.bak deleted file mode 100755 index 1ec022ebb8a1ce61046460a84a15324c3438d993..0000000000000000000000000000000000000000 --- a/da/methane/dasystem.py.bak +++ /dev/null @@ -1,57 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# control.py - -""" -Author : aki - -Revision History: -File created on 16 Nov 2013. - -""" - -import logging - -################### Begin Class MethaneDaSystem ################### - -from da.baseclasses.dasystem import DaSystem - -class MethaneDaSystem(DaSystem): - """ Information on the data assimilation system used. This is normally an rc-file with settings. - """ - def validate(self): - """ - validate the contents of the rc-file given a dictionary of required keys - """ - - needed_rc_items = ['obs.input.dir', - 'obs.input.fname'] - - - for k, v in self.iteritems(): - if v == 'True' : - self[k] = True - if v == 'False': - self[k] = False - - for key in needed_rc_items: - if key not in self: - logging.warning('Missing a required value in rc-file : %s' % key) - logging.debug('DA System Info settings have been validated succesfully') - -################### End Class MethaneDaSystem ################### - - -if __name__ == "__main__": - pass diff --git a/da/methane/io4.py.bak b/da/methane/io4.py.bak deleted file mode 100755 index 5624873b1cd61a88fb36984417022abd6e210904..0000000000000000000000000000000000000000 --- a/da/methane/io4.py.bak +++ /dev/null @@ -1,70 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# io.py - -""" -Author : aki - -Revision History: -File created on Apr 2016. - -""" -import standardvariables -import datetime as dt -from numpy import array, arange -import os -import logging -import sys -sys.path.append('/stornext/store/carbon/CarbonTracker/netcdf4/python_packages_install_dir/lib/python2.7/site-packages/') -import netCDF4 -sys.path.append('/stornext/store/carbon/CarbonTracker/pyhdf/pyhdf-0.8.3/lib/python2.7/site-packages/') -import pyhdf.SD as hdf -sys.path.append('../') -from da.tools.io4 import * - - -disclaimer = "This data belongs to the CarbonTracker project" -email = "aki.tsuruta@fmi.fi" -url = "http://en.ilmatieteenlaitos.fi/carbon-cycle-modelling" -institution = "Finnish Meteorological Institute, Climate research" -source = "CarbonTracker release 1.0" -conventions = "CF-1.1" -historytext = 'created on '+dt.datetime.now().strftime('%B %d, %Y')+' by %s'%os.environ['USER'] - - -def add_tc_header2(self): - - # - self.setncattr('Institution',institution) - self.setncattr('Contact',email) - self.setncattr('URL',url) - self.setncattr('Source',source) - self.setncattr('Convention',conventions) - self.setncattr('Disclaimer',disclaimer) - self.setncattr('History',historytext) - -def standard_var2(self,varname): - """ return properties of standard variables """ - import standardvariables - - if varname in standardvariables.standard_variables.keys(): - return standardvariables.standard_variables[varname] - else: - return standardvariables.standard_variables['unknown'] - - - -CT_CDF.add_tc_header = add_tc_header2 -CT_CDF.standard_var = standard_var2 -CT_HDF.standard_var = standard_var2 diff --git a/da/methane/obs.py.bak b/da/methane/obs.py.bak deleted file mode 100755 index e5bd46cb7bf9f9696814b86797820ea4dce37f06..0000000000000000000000000000000000000000 --- a/da/methane/obs.py.bak +++ /dev/null @@ -1,473 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# obs.py - -""" -Author : aki - -Revision History: -File revised on Feb 2017. - -""" -import os -import sys -import logging -#from da.baseclasses.statevector import filename -import datetime as dtm -from string import strip -from numpy import array, logical_and - -sys.path.append(os.getcwd()) -sys.path.append('../../') - -identifier = 'CarbonTracker CH4 mole fractions' -version = '0.0' - -from da.baseclasses.obs import Observations -import da.methane.io4 as io -import da.tools.rc as rc - -################### Begin Class CH4Observations ################### - -class MethaneObservations(Observations): - """ an object that holds data + methods and attributes needed to manipulate mole fraction values """ - - def setup(self, dacycle): - self.startdate = dacycle['time.sample.start'] - self.enddate = dacycle['time.sample.end'] - - sfname = dacycle.dasystem['obs.input.fname'] - if sfname.endswith('.nc'): - filename = os.path.join(dacycle.dasystem['obs.input.dir'], sfname) - else: - filename = os.path.join(dacycle.dasystem['obs.input.dir'], sfname + '.' + self.startdate.strftime('%Y%m%d') + '.nc') - - if not os.path.exists(filename): - msg = 'Could not find the required observation input file (%s) ' % filename - logging.error(msg) - raise IOError(msg) - else: - self.obs_filename = filename - self.datalist = [] - - def add_observations(self): - """ Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file - - The CarbonTracker mole fraction files are provided as one long list of obs for all possible dates. So we can - either: - - (1) read all, and the subselect the data we will use in the rest of this cycle - (2) Use nco to make a subset of the data - - For now, we will stick with option (1) - - """ - ncf = io.ct_read(self.obs_filename, 'read') - idates = ncf.get_variable('date_components') - dates = array([dtm.datetime(*d) for d in idates]) - - subselect = logical_and(dates >= self.startdate, dates <= self.enddate).nonzero()[0] - - dates = dates.take(subselect, axis=0) - - ids = ncf.get_variable('obs_num').take(subselect, axis=0) - #evn = ncf.get_variable('eventnumber').take(subselect, axis=0) - #evn = [s.tostring().lower() for s in evn] - #evn = map(strip, evn) - sites = ncf.get_variable('obs_id').take(subselect, axis=0) - sites = [s.tostring().lower() for s in sites] - sites = map(strip, sites) - lats = ncf.get_variable('latitude').take(subselect, axis=0) - lons = ncf.get_variable('longitude').take(subselect, axis=0) - alts = ncf.get_variable('altitude').take(subselect, axis=0) - obs = ncf.get_variable('obs').take(subselect, axis=0) * 1.e-9 - logging.info("Converting observed values from ppb to mol/mol!!!!") - #species = ncf.get_variable('species').take(subselect, axis=0) - #species = [s.tostring().lower() for s in species] - #species = map(strip, species) - #strategy = ncf.get_variable('sampling_strategy').take(subselect, axis=0) - #flags = ncf.get_variable('NOAA_QC_flags').take(subselect, axis=0) - #flags = [s.tostring().lower() for s in flags] - #flags = map(strip, flags) - #flags = [int(f == '...') for f in flags] - ncf.close() - - logging.debug("Successfully read data from obs file (%s)" % self.obs_filename) - - for n in range(len(dates)): - obs[n] = obs[n] - #self.datalist.append(MoleFractionSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, flags[n], alts[n], lats[n], lons[n], evn[n], species[n], strategy[n], 0.0, self.obs_filename)) - self.datalist.append( MoleFractionSample(ids[n], dates[n], sites[n], obs[n], 0.0, 0.0, 0.0, 0.0, 0.0, alts[n], lats[n], lons[n], '0000', 'ch4', 1.0, 0.0, self.obs_filename) ) - logging.debug("Added %d observations to the Data list" % len(dates)) - - def add_simulations(self, filename, silent=True): - """ Adds model simulated values to the mole fraction objects """ - - - if not os.path.exists(filename): - msg = "Sample output filename for observations could not be found : %s" % filename - logging.error(msg) - logging.error("Did the sampling step succeed?") - logging.error("...exiting") - raise IOError(msg) - - ncf = io.ct_read(filename, method='read') - ids = ncf.get_variable('obs_num') - simulated = ncf.get_variable('flask') - #for i in xrange(simulated.shape[0]): - # print simulated[i,:] - ncf.close() - logging.info("Successfully read data from model sample file (%s)" % filename) - - obs_ids = self.getvalues('id') - - obs_ids = obs_ids.tolist() - ids = map(int, ids) - - missing_samples = [] - - for idx, val in zip(ids, simulated): - if idx in obs_ids: - index = obs_ids.index(idx) - #print id,val,val.shape - self.datalist[index].simulated = val - else: - missing_samples.append(idx) - - if not silent and missing_samples != []: - logging.warning('Model samples were found that did not match any ID in the observation list. Skipping them...') - #msg = '%s'%missing_samples ; logging.warning(msg) - - logging.info("Added %d simulated values to the Data list" % (len(ids) - len(missing_samples))) - - def write_sample_coords(self, obsinputfile): - """ - Write the information needed by the observation operator to a file. Return the filename that was written for later use - - """ - f = io.CT_CDF(obsinputfile, method='create') - logging.debug('Creating new observations file for ObservationOperator (%s)' % obsinputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return obsinputfile - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate') ] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('lat') - - savedict = io.std_savedict.copy() - savedict['name'] = "latitude" - savedict['units'] = "degrees_north" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('lon') - - savedict = io.std_savedict.copy() - savedict['name'] = "longitude" - savedict['units'] = "degrees_east" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('height') - - savedict = io.std_savedict.copy() - savedict['name'] = "altitude" - savedict['units'] = "meters_above_sea_level" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -999.9 - f.add_data(savedict) - - data = self.getvalues('samplingstrategy') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "sampling_strategy" - savedict['units'] = "NA" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['missing_value'] = -9 - f.add_data(savedict) - - data = self.getvalues('evn') - - savedict = io.std_savedict.copy() - savedict['dtype'] = "char" - savedict['name'] = "obs_id" - savedict['units'] = "NOAA database identifier" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to obs file") - logging.info("Sample input file for obs operator now in place [%s]" % obsinputfile) - - - - - def add_model_data_mismatch(self, filename): - """ - Get the model-data mismatch values for this cycle. - - (1) Open a sites_weights file - (2) Parse the data - (3) Compare site list against data - (4) Take care of double sites, etc - - """ - - if not os.path.exists(filename): - msg = 'Could not find the required sites.rc input file (%s)' % filename - logging.error(msg) - raise IOError(msg) - else: - self.sites_file = filename - - sites_weights = rc.read(self.sites_file) - - self.rejection_threshold = int(sites_weights['obs.rejection.threshold']) - self.global_R_scaling = float(sites_weights['global.R.scaling']) - self.n_site_categories = int(sites_weights['n.site.categories']) - self.n_sites_active = int(sites_weights['n.sites.active']) - self.n_sites_moved = int(sites_weights['n.sites.moved']) - - logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold) - logging.debug('Model-data mismatch scaling factor : %s ' % self.global_R_scaling) - logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories) - logging.debug('Model-data mismatch active sites : %d ' % self.n_sites_active) - logging.debug('Model-data mismatch moved sites : %d ' % self.n_sites_moved) - - cats = [k for k in sites_weights.keys() if 'site.category' in k] - - SiteCategories = {} - for key in cats: - name, error, may_localize, may_reject = sites_weights[key].split(';') - name = name.strip().lower() - error = float(error) - may_reject = ("TRUE" in may_reject.upper()) - may_localize = ("TRUE" in may_localize.upper()) - SiteCategories[name] = {'error':error, 'may_localize':may_localize, 'may_reject':may_reject} - #print name,SiteCategories[name] - - active = [k for k in sites_weights.keys() if 'site.active' in k] - - site_info = {} - for key in active: - sitename, sitecategory = sites_weights[key].split(';') - sitename = sitename.strip().lower() - sitecategory = sitecategory.strip().lower() - site_info[sitename] = SiteCategories[sitecategory] - #print sitename,site_info[sitename] - - for obs in self.datalist: - obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script - if obs.code in site_info: - logging.debug("Observation found (%s)" % obs.code) - obs.mdm = site_info[obs.code]['error'] * self.global_R_scaling - obs.may_localize = site_info[obs.code]['may_localize'] - obs.may_reject = site_info[obs.code]['may_reject'] - else: - logging.warning("Observation NOT found (%s, %s), please check sites.rc file (%s) !!!" % (obs.code, identifier, self.sites_file)) - # raise IOError - obs.flag = 99 - logging.debug("obs %s model-data-mismatch: %s" %(obs.code, obs.mdm)) - - # Add site_info dictionary to the Observations object for future use - - self.site_info = site_info - - def write_sample_auxiliary(self, auxoutputfile): - """ - Write selected information contained in the Observations object to a file. - - """ - - f = io.CT_CDF(auxoutputfile, method='create') - logging.debug('Creating new auxiliary sample output file for postprocessing (%s)' % auxoutputfile) - - dimid = f.add_dim('obs', len(self.datalist)) - dim200char = f.add_dim('string_of200chars', 200) - dimcalcomp = f.add_dim('calendar_components', 6) - - if len(self.datalist) == 0: - f.close() - #return outfile - - data = self.getvalues('id') - - savedict = io.std_savedict.copy() - savedict['name'] = "obs_num" - savedict['dtype'] = "int" - savedict['long_name'] = "Unique_Dataset_observation_index_number" - savedict['units'] = "" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = "Unique index number within this dataset ranging from 0 to UNLIMITED." - f.add_data(savedict) - - data = [[d.year, d.month, d.day, d.hour, d.minute, d.second] for d in self.getvalues('xdate')] - - savedict = io.std_savedict.copy() - savedict['dtype'] = "int" - savedict['name'] = "date_components" - savedict['units'] = "integer components of UTC date/time" - savedict['dims'] = dimid + dimcalcomp - savedict['values'] = data - savedict['missing_value'] = -9 - savedict['comment'] = "Calendar date components as integers. Times and dates are UTC." - savedict['order'] = "year, month, day, hour, minute, second" - f.add_data(savedict) - - data = self.getvalues('obs') - - savedict = io.std_savedict.copy() - savedict['name'] = "observed" - savedict['long_name'] = "observedvalues" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Observations used in optimization' - f.add_data(savedict) - - data = self.getvalues('mdm') - - savedict = io.std_savedict.copy() - savedict['name'] = "modeldatamismatch" - savedict['long_name'] = "modeldatamismatch" - savedict['units'] = "[mol mol-1]" - savedict['dims'] = dimid - savedict['values'] = data.tolist() - savedict['comment'] = 'Standard deviation of mole fractions resulting from model-data mismatch' - f.add_data(savedict) - - data = self.getvalues('simulated') - - dimmembers = f.add_dim('members', data.shape[1]) - - savedict = io.std_savedict.copy() - savedict['name'] = "modelsamples" - savedict['long_name'] = "modelsamples for all ensemble members" - savedict['units'] = "mol mol-1" - savedict['dims'] = dimid + dimmembers - savedict['values'] = data.tolist() - savedict['comment'] = 'simulated mole fractions based on optimized state vector' - f.add_data(savedict) - - data = self.getvalues('fromfile') - - savedict = io.std_savedict.copy() - savedict['name'] = "inputfilename" - savedict['long_name'] = "name of file where original obs data was taken from" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - data = self.getvalues('code') - - savedict = io.std_savedict.copy() - savedict['name'] = "sitecode" - savedict['long_name'] = "site code propagated from observation file" - savedict['dtype'] = "char" - savedict['dims'] = dimid + dim200char - savedict['values'] = data - savedict['missing_value'] = '!' - f.add_data(savedict) - - f.close() - - logging.debug("Successfully wrote data to auxiliary sample output file (%s)" % auxoutputfile) - - #return outfile - - -################### End Class CH4Observations ################### - - - -################### Begin Class MoleFractionSample ################### - -class MoleFractionSample(object): - """ - Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all - attributes listed below in the __init__ method. One can additionally make more types of data, or make new - objects for specific projects. - - """ - - def __init__(self, idx, xdate, code='XXX', obs=0.0, simulated=0.0, resid=0.0, hphr=0.0, mdm=0.0, flag=0, height=0.0, lat= -999., lon= -999., evn='0000', species='ch4', samplingstrategy=1, sdev=0.0, fromfile='none.nc'): - self.code = code.strip() # Site code - self.xdate = xdate # Date of obs - self.obs = obs # Value observed - self.simulated = simulated # Value simulated by model - self.resid = resid # Mole fraction residuals - self.hphr = hphr # Mole fraction prior uncertainty from fluxes and (HPH) and model data mismatch (R) - self.mdm = mdm # Model data mismatch - self.may_localize = True # Whether sample may be localized in optimizer - self.may_reject = True # Whether sample may be rejected if outside threshold - self.flag = flag # Flag - self.height = height # Sample height - self.lat = lat # Sample lat - self.lon = lon # Sample lon - self.id = idx # ID number - self.evn = evn # Event number - self.sdev = sdev # standard deviation of ensemble - self.masl = True # Sample is in Meters Above Sea Level - self.mag = not self.masl # Sample is in Meters Above Ground - self.species = species.strip() - self.samplingstrategy = samplingstrategy - self.fromfile = fromfile # netcdf filename inside observation distribution, to write back later - -################### End Class MoleFractionSample ################### - - -if __name__ == "__main__": - pass diff --git a/da/methane/statevector.py.bak b/da/methane/statevector.py.bak deleted file mode 100755 index 6d27370678ae8f12bb2170fe96ca712bf1cacbd0..0000000000000000000000000000000000000000 --- a/da/methane/statevector.py.bak +++ /dev/null @@ -1,125 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# ct_statevector_tools.py - -""" -Author : aki - -Revision History: -File created on 18 Nov 2013. - -""" - -import os -import sys -sys.path.append(os.getcwd()) - -import logging -import numpy as np -from da.baseclasses.statevector import StateVector, EnsembleMember - -import da.tools.io4 as io - -identifier = 'CarbonTracker Statevector' -version = '0.0' - -################### Begin Class CO2StateVector ################### - -class MethaneStateVector(StateVector): - """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """ - - def make_species_mask(self): - - self.speciesdict = {'ch4': np.ones(self.nparams)} - logging.debug("A species mask was created, only the following species are recognized in this system:") - for k in self.speciesdict.keys(): - logging.debug(" -> %s" % k) - - def get_covariance(self, date, dacycle): - """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - """ - try: - import matplotlib.pyplot as plt - except: - pass - - fullcov = np.zeros((self.nparams, self.nparams), float) - - for i in xrange(self.nparams): - #fullcov[i,i] = 1. # Identity matrix - fullcov[i,i] = 0.08 - fullcov[self.nparams-1,self.nparams-1] = 1.e-10 - - try: - plt.imshow(fullcov) - plt.colorbar() - plt.savefig('fullcovariancematrix.png') - plt.close('all') - logging.debug("Covariance matrix visualized for inspection") - except: - pass - - return fullcov - - def read_from_legacy_file(self, filename, qual='opt'): - """ - :param filename: the full filename for the input NetCDF file - :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file - :rtype: None - - Read the StateVector information from a NetCDF file and put in a StateVector object - In principle the input file will have only one four datasets inside - called: - * `meanstate_prior`, dimensions [nlag, nparamaters] - * `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters] - * `meanstate_opt`, dimensions [nlag, nparamaters] - * `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters] - - This NetCDF information can be written to file using - :meth:`~da.baseclasses.statevector.StateVector.write_to_file` - - """ - - f = io.ct_read(filename, 'read') - - for n in range(self.nlag): - if qual == 'opt': - meanstate = f.get_variable('xac_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('adX_%02d' % (n + 1)) - - elif qual == 'prior': - meanstate = f.get_variable('xpc_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('pdX_%02d' % (n + 1)) - - if not self.ensemble_members[n] == []: - self.ensemble_members[n] = [] - logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1)) - - for m in range(self.nmembers): - newmember = EnsembleMember(m) - newmember.param_values = EnsembleMembers[m, :].flatten() + meanstate # add the mean to the deviations to hold the full parameter values - self.ensemble_members[n].append(newmember) - - - f.close() - - logging.info('Successfully read the State Vector from file (%s) ' % filename) - -################### End Class MethaneStateVector ################### - - -if __name__ == "__main__": - pass diff --git a/da/methane/statevector_2lambdas.py.bak b/da/methane/statevector_2lambdas.py.bak deleted file mode 100755 index 5c137ca153bfd55de47c6cef85c7ea2b616468cb..0000000000000000000000000000000000000000 --- a/da/methane/statevector_2lambdas.py.bak +++ /dev/null @@ -1,495 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# ct_statevector_tools.py - -""" -Author : aki - -Revision History: -File created on 18 Nov 2013. - -""" - -import os -import sys -sys.path.append(os.getcwd()) - -import logging -import numpy as np -from da.baseclasses.statevector import StateVector, EnsembleMember - -from datetime import timedelta -import da.methane.io4 as io - -identifier = 'CarbonTracker Statevector ' -version = '0.0' - -################### Begin Class CH4StateVector ################### - -class MethaneStateVector(StateVector): - """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """ - - def setup(self, dacycle): - """ - setup the object by specifying the dimensions. - There are two major requirements for each statvector that you want to build: - - (1) is that the statevector can map itself onto a regular grid - (2) is that the statevector can map itself (mean+covariance) onto TransCom regions - - An example is given below. - """ - - self.nlag = int(dacycle['time.nlag']) - self.nmembers = int(dacycle['da.optimizer.nmembers']) - #self.distribution = dacycle['da.distribution'] - self.nobs = 0 - - self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on - - # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist - # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. - - self.ensemble_members = range(self.nlag) - - for n in range(self.nlag): - self.ensemble_members[n] = [] - - - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember - # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - - mapfile = os.path.join(dacycle.dasystem['regionsfile']) - ncf = io.ct_read(mapfile, 'read') - self.gridmap = ncf.get_variable('regions') - self.tcmap = ncf.get_variable('transcom_regions') - self.nparams = int(self.gridmap.max()) - self.nparams_bio = int(ncf.get_variable('nparams_bio')) - ncf.close() - - logging.info ("Regional information read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A parameter map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - - # Create a dictionary for state <-> gridded map conversions - - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) > 0: - self.griddict[r] = sel - - logging.debug("A dictionary to map grids to states and vice versa was created") - - # Create a matrix for state <-> TransCom conversions - - self.tcmatrix = np.zeros((self.nparams, int(self.tcmap.max())), 'float') - - for r in range(1, self.nparams + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) < 1: - continue - else: - n_tc = set(self.tcmap.flatten().take(sel[0])) - if len(n_tc) > 1: - logging.error("Parameter %d seems to map to multiple TransCom regions (%s), I do not know how to handle this" % (r, n_tc)) - raise ValueError - self.tcmatrix[r - 1, n_tc.pop() - 1] = 1.0 - - logging.debug("A matrix to map states to TransCom regions and vice versa was created") - - # Create a mask for species/unknowns - - self.make_species_mask() - - - def make_species_mask(self): - - self.speciesdict = {'ch4': np.ones(self.nparams)} - logging.debug("A species mask was created, only the following species are recognized in this system:") - for k in self.speciesdict.keys(): - logging.debug(" -> %s" % k) - - def get_covariance(self, date, dacycle): - """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - """ - try: - import matplotlib.pyplot as plt - except: - pass - - #----Arbitray covariance matrix ----# - #fullcov = np.zeros((self.nparams, self.nparams), float) - - #for i in xrange(self.nparams): - # #fullcov[i,i] = 1. # Identity matrix - # fullcov[i,i] = 0.08 - #fullcov[self.nparams-1,self.nparams-1] = 1.e-10 - - #---- Covariance read from file -----# - cov_file = dacycle.dasystem['covfile'] - nfcr = io.CT_CDF(cov_file, 'read') - logging.info('covariance read from file: %s' %cov_file) - fullcov = nfcr.variables['covariance_matrix'][:] - -# try: -# plt.imshow(fullcov) -# plt.colorbar() -# plt.savefig('fullcovariancematrix.png') -# plt.close('all') -# logging.debug("Covariance matrix visualized for inspection") -# except: -# pass - - return fullcov - - def read_from_legacy_file(self, filename, qual='opt'): - """ - :param filename: the full filename for the input NetCDF file - :param qual: a string indicating whether to read the 'prior' or 'opt'(imized) StateVector from file - :rtype: None - - Read the StateVector information from a NetCDF file and put in a StateVector object - In principle the input file will have only one four datasets inside - called: - * `meanstate_prior`, dimensions [nlag, nparamaters] - * `ensemblestate_prior`, dimensions [nlag,nmembers, nparameters] - * `meanstate_opt`, dimensions [nlag, nparamaters] - * `ensemblestate_opt`, dimensions [nlag,nmembers, nparameters] - - This NetCDF information can be written to file using - :meth:`~da.baseclasses.statevector.StateVector.write_to_file` - - """ - - f = io.ct_read(filename, 'read') - - for n in range(self.nlag): - if qual == 'opt': - meanstate = f.get_variable('xac_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('adX_%02d' % (n + 1)) - - elif qual == 'prior': - meanstate = f.get_variable('xpc_%02d' % (n + 1)) - EnsembleMembers = f.get_variable('pdX_%02d' % (n + 1)) - - if not self.ensemble_members[n] == []: - self.ensemble_members[n] = [] - logging.warning('Existing ensemble for lag=%d was removed to make place for newly read data' % (n + 1)) - - for m in range(self.nmembers): - newmember = EnsembleMember(m) - newmember.param_values = EnsembleMembers[m, :].flatten() + meanstate # add the mean to the deviations to hold the full parameter values - self.ensemble_members[n].append(newmember) - - - f.close() - - logging.info('Successfully read the State Vector from file (%s) ' % filename) - - def write_members_to_file(self, lag, outdir, dacycle): - """ - :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag] - :rtype: None - - Write ensemble member information to a NetCDF file for later use. The standard output filename is - *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location - is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside - called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). - This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. - - .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you - can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function. - - """ - - # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was - # to do the import already at the start of the module, not just in this method. - - #import da.tools.io as io - #import da.tools.io4 as io - - members = self.ensemble_members[lag] - - #--- regionfile ---# - region_land_file = dacycle.dasystem['regionsfile'] - nfcr = io.CT_CDF(region_land_file, 'read') - logging.debug('region land file read: %s' %region_land_file) - bio_land = nfcr.variables['bio_land'][:] - fossil_land = nfcr.variables['fossil_land'][:] - - - for mem in members: - filename = os.path.join(outdir, 'parameters.%03d.nc' % mem.membernumber) - ncf = io.CT_CDF(filename, method='create') - dimparams = ncf.add_params_dim(self.nparams) - dimgrid = ncf.add_latlon_dim() - - data = mem.param_values - - savedict = io.std_savedict.copy() - savedict['name'] = "parametervalues" - savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimparams - savedict['values'] = data - savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - -# #--- bio ---# -# dimparams_bio = ncf.add_params_dim(self.nparams_bio) -# data_bio = data[0:self.nparams_bio] -# -# savedict = io.std_savedict.copy() -# savedict['name'] = "parametervalues_bio" -# savedict['long_name'] = "parameter_bio_values_for_member_%d" % mem.membernumber -# savedict['units'] = "unitless" -# savedict['dims'] = dimparams_bio -# savedict['values'] = data_bio -# savedict['comment'] = 'These are parameter bio values to use for member %d' % mem.membernumber -# ncf.add_data(savedict) -# -# #--- fossil ---# -# nparams_fossil = self.nparams - self.nparams_bio -# dimparams_fossil = ncf.add_params_dim(nparams_fossil) -# data_fossil = data[self.nparams_bio:nparams_fossil] -# -# savedict = io.std_savedict.copy() -# savedict['name'] = "parametervalues_ff" -# savedict['long_name'] = "parameter_ff_values_for_member_%d" % mem.membernumber -# savedict['units'] = "unitless" -# savedict['dims'] = dimparams_fossil -# savedict['values'] = data_fossil -# savedict['comment'] = 'These are parameter fossil values to use for member %d' % mem.membernumber -# ncf.add_data(savedict) - - #--- All parameters, gridded ---# - griddata = self.vector2grid(vectordata=data) - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap" - savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata.tolist() - savedict['comment'] = 'These are gridded parameter values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - #--- bio parameters, gridded ---# - griddata_bio = griddata[:] - w = np.where(bio_land==0) - griddata_bio[w] = 1.0 - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap_bio" - savedict['long_name'] = "parametermap_bio_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata_bio.tolist() - savedict['comment'] = 'These are gridded parameter bio values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - #--- All parameters, gridded ---# - griddata = self.vector2grid(vectordata=data) - griddata_fossil = griddata[:] - w = np.where(fossil_land==0) - griddata_fossil[w] = 1.0 - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap_ff" - savedict['long_name'] = "parametermap_ff_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata_fossil.tolist() - savedict['comment'] = 'These are gridded parameter fossil values to use for member %d' % mem.membernumber - ncf.add_data(savedict) - - ncf.close() - - logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename)) - -# def write_to_file(self, filename, qual): -# """ -# :param filename: the full filename for the output NetCDF file -# :rtype: None -# -# Write the StateVector information to a NetCDF file for later use. -# In principle the output file will have only one two datasets inside -# called: -# * `meanstate`, dimensions [nlag, nparamaters] -# * `ensemblestate`, dimensions [nlag,nmembers, nparameters] -# -# This NetCDF information can be read back into a StateVector object using -# :meth:`~da.baseclasses.statevector.StateVector.read_from_file` -# -# """ -# #import da.tools.io4 as io -# #import da.tools.io as io -# -# if qual == 'prior': -# f = io.CT_CDF(filename, method='create') -# logging.debug('Creating new StateVector output file (%s)' % filename) -# #qual = 'prior' -# else: -# f = io.CT_CDF(filename, method='write') -# logging.debug('Opening existing StateVector output file (%s)' % filename) -# #qual = 'opt' -# -# dimparams = f.add_params_dim(self.nparams) -# dimmembers = f.add_members_dim(self.nmembers) -# dimlag = f.add_lag_dim(self.nlag, unlimited=True) -# -# for n in range(self.nlag): -# members = self.ensemble_members[n] -# mean_state = members[0].param_values -# -# savedict = f.standard_var(varname='meanstate_%s' % qual) -# savedict['dims'] = dimlag + dimparams -# savedict['values'] = mean_state -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# savedict = f.standard_var(varname='meanstate_bio_%s' % qual) -# savedict['dims'] = dimlag + dimparams_bio -# savedict['values'] = mean_state_bio -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# savedict = f.standard_var(varname='meanstate_fossil_%s' % qual) -# savedict['dims'] = dimlag + dimparams_fossil -# savedict['values'] = mean_state_fossil -# savedict['count'] = n -# savedict['comment'] = 'this represents the mean of the ensemble' -# f.add_data(savedict) -# -# members = self.ensemble_members[n] -# devs = np.asarray([m.param_values.flatten() for m in members]) -# data = devs - np.asarray(mean_state) -# -# savedict = f.standard_var(varname='ensemblestate_%s' % qual) -# savedict['dims'] = dimlag + dimmembers + dimparams -# savedict['values'] = data -# savedict['count'] = n -# savedict['comment'] = 'this represents deviations from the mean of the ensemble' -# f.add_data(savedict) -# f.close() -# -# logging.info('Successfully wrote the State Vector to file (%s) ' % filename) - - - def make_new_ensemble(self, lag, dacycle, covariancematrix=None): - """ - :param lag: an integer indicating the time step in the lag order - :param covariancematrix: a matrix to draw random values from - :rtype: None - - Make a new ensemble, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - - The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is - used to draw ensemblemembers from. If this argument is not passed it will be substituted with an - identity matrix of the same dimensions. - - """ - - if covariancematrix == None: - covariancematrix = np.identity(self.nparams) - - # Make a cholesky decomposition of the covariance matrix - - - _, s, _ = np.linalg.svd(covariancematrix) - dof = np.sum(s) ** 2 / sum(s ** 2) - C = np.linalg.cholesky(covariancematrix) - - logging.debug('Cholesky decomposition has succeeded ') - logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof))) - - - # Create mean values - - newmean = np.ones(self.nparams, float) # standard value for a new time step is 1.0 - - # If this is not the start of the filter, average previous two optimized steps into the mix - - if lag == self.nlag - 1 and self.nlag >= 3: - newmean += self.ensemble_members[lag - 1][0].param_values + \ - self.ensemble_members[lag - 2][0].param_values - newmean = newmean / 3.0 - - # Create the first ensemble member with a deviation of 0.0 and add to list - - newmember = EnsembleMember(0) - newmember.param_values = newmean.flatten() # no deviations - self.ensemble_members[lag].append(newmember) - - # Create members 1:nmembers and add to ensemble_members list - #dist = self.distribution - - for member in range(1, self.nmembers): - # if dist == 'normal': - # rands = np.random.randn(self.nparams) - # elif dist == 'gamma11': - # rands = np.random.exponential(1,size=self.nparams) - 1 #substruct mean=1. - # elif dist == 'lognormal': - # rands = np.random.lognormal(0,0.5,size=self.nparams) - 1 #substruct median=1. - # else: - # logging.error('Distribution (%s)to generate from not known' %dist) - # sys.exit(2) - - rands = np.random.randn(self.nparams) - newmember = EnsembleMember(member) - newmember.param_values = np.dot(C, rands) + newmean - self.ensemble_members[lag].append(newmember) - - logging.info('%d new ensemble members generated from %s were added to the state vector # %d'\ - #%(self.nmembers,dist, (lag + 1))) - %(self.nmembers,'normal', (lag + 1))) - - def propagate(self, dacycle): - """ - :rtype: None - - Propagate the parameter values in the StateVector to the next cycle. This means a shift by one cycle - step for all states that will - be optimized once more, and the creation of a new ensemble for the time step that just - comes in for the first time (step=nlag). - In the future, this routine can incorporate a formal propagation of the statevector. - - """ - - # Remove State Vector n=1 by simply "popping" it from the list and appending a new empty list at the front. This empty list will - # hold the new ensemble for the new cycle - - self.ensemble_members.pop(0) - self.ensemble_members.append([]) - - # And now create a new time step of mean + members for n=nlag - date = dacycle['time.start'] + timedelta(days=(self.nlag - 0.5) * int(dacycle['time.cycle'])) - cov = self.get_covariance(date, dacycle) - self.make_new_ensemble(self.nlag - 1, dacycle, cov) - - logging.info('The state vector has been propagated by one cycle') - -################### End Class MethaneStateVector ################### - - -if __name__ == "__main__": - pass diff --git a/da/platform/__pycache__/__init__.cpython-37.pyc b/da/platform/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index e8579db66a6264a39f6f4d29b8e4fc23bf3c4813..0000000000000000000000000000000000000000 Binary files a/da/platform/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/platform/__pycache__/capegrim.cpython-37.pyc b/da/platform/__pycache__/capegrim.cpython-37.pyc deleted file mode 100644 index 797a5cc6ff8a09370d8ec54e44d6b5ed61b3eca0..0000000000000000000000000000000000000000 Binary files a/da/platform/__pycache__/capegrim.cpython-37.pyc and /dev/null differ diff --git a/da/platform/capegrim.py.bak b/da/platform/capegrim.py.bak deleted file mode 100755 index 269a3a3e8941339cb4ee7cd9a63e7258ae36d37c..0000000000000000000000000000000000000000 --- a/da/platform/capegrim.py.bak +++ /dev/null @@ -1,94 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# maunaloa.py - -""" -Author : peters - -Revision History: -File created on 06 Sep 2010. - -""" - -import logging - -from da.baseclasses.platform import Platform, std_joboptions - -class CapeGrimPlatform(Platform): - def __init__(self): - - self.ID = 'WU capegrim' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - def give_blocking_flag(self): - return "" - - def give_queue_type(self): - return "foreground" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - template = """#!/bin/sh \n""" + \ - """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """ \n""" + \ - """#$ jobname \n""" + \ - """#$ jobaccount \n""" + \ - """#$ jobnodes \n""" + \ - """#$ jobtime \n""" + \ - """#$ jobshell \n""" + \ - """\n""" + \ - """source /usr/local/Modules/3.2.8/init/sh\n""" + \ - """source /opt/intel/bin/ifortvars.sh intel64\n""" + \ - """export HOST='capegrim'\n""" + \ - """module load python\n""" + \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - - return template - - - -if __name__ == "__main__": - pass diff --git a/da/platform/cartesius.py b/da/platform/cartesius.py index adbad11a5af948c87cedc2e7ccfe9566a9f46e4d..0136595978c8531442a931013a53cc1dca8c556c 100755 --- a/da/platform/cartesius.py +++ b/da/platform/cartesius.py @@ -26,7 +26,7 @@ import subprocess from da.baseclasses.platform import Platform -std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobtype':'serial', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'24:00:00', 'jobinput':'/dev/null', 'jobnodes':'1', 'jobtasks':'', 'modulenetcdf':'netcdf/4.1.2', 'networkMPI':'','jobqueue': 'normal'} +std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobtype':'serial', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'24:00:00', 'jobinput':'/dev/null', 'jobnodes':'1', 'jobtasks':'', 'modulenetcdf':'netcdf/4.1.2', 'networkMPI':'','jobqueue': 'short'} class CartesiusPlatform(Platform): @@ -100,7 +100,7 @@ class CartesiusPlatform(Platform): """#SBATCH -n jobnodes \n""" + \ """#SBATCH -t jobtime \n""" + \ """#SBATCH -o joblog \n""" + \ - """module load python\n""" + \ + """module load Python/3.6.6-intel-2018b\n""" + \ """module load nco\n""" + \ """\n""" diff --git a/da/platform/cartesius.py.bak b/da/platform/cartesius.py.bak deleted file mode 100755 index d4c8ed3ce47f02bdd72e0a2cee347cdb1fea0fef..0000000000000000000000000000000000000000 --- a/da/platform/cartesius.py.bak +++ /dev/null @@ -1,184 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# cartesius.py - -""" -Author : peters - -Revision History: -File created on 06 Sep 2010. - -""" - -import logging -import subprocess - -from da.baseclasses.platform import Platform - -std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobtype':'serial', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'24:00:00', 'jobinput':'/dev/null', 'jobnodes':'1', 'jobtasks':'', 'modulenetcdf':'netcdf/4.1.2', 'networkMPI':'','jobqueue': 'normal'} - - -class CartesiusPlatform(Platform): - def __init__(self): - self.ID = 'cartesius' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - - def give_blocking_flag(self): - """ - Returns a blocking flag, which is important if tm5 is submitted in a queue system. The python ctdas code is forced to wait before tm5 run is finished - - -on Huygens: return "-s" - -on Maunaloa: return "" (no queue available) - -on Jet/Zeus: return - """ - return "" - - def give_queue_type(self): - """ - Return a queue type depending whether your computer system has a queue system, or whether you prefer to run in the foreground. - On most large systems using the queue is mandatory if you run a large job. - -on Huygens: return "queue" - -on Maunaloa: return "foreground" (no queue available) - -on Jet/Zeus: return - - """ - return "foreground" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - #template = """## \n"""+ \ - # """## This is a set of dummy names, to be replaced by values from the dictionary \n"""+ \ - # """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """+ \ - # """## \n"""+ \ - # """ \n"""+ \ - # """#$ jobname \n"""+ \ - # """#$ jobaccount \n"""+ \ - # """#$ jobnodes \n"""+ \ - # """#$ jobtime \n"""+ \ - # """#$ jobshell \n"""+ \ - # """\n"""+ \ - # """source /usr/bin/sh\n"""+ \ - # """module load python\n"""+ \ - # """\n""" - - - template = """#!/bin/bash \n""" + \ - """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """#SBATCH -J jobname \n""" + \ - """#SBATCH -p jobqueue \n""" + \ - """#SBATCH -n jobnodes \n""" + \ - """#SBATCH -t jobtime \n""" + \ - """#SBATCH -o joblog \n""" + \ - """module load python\n""" + \ - """module load nco\n""" + \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in list(joboptions.items()): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in list(std_joboptions.items()): - while k in template: - template = template.replace(k, v) - - return template - - - def submit_job(self, jobfile, joblog=None, block=False): - """ This method submits a jobfile to the queue, and returns the queue ID """ - - - #cmd = ["llsubmit","-s",jobfile] - #msg = "A new task will be started (%s)"%cmd ; logging.info(msg) - - if block: - cmd = ["salloc",'-n',std_joboptions['jobnodes'],'-t',std_joboptions['jobtime'], jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - print('output', output) - jobid = output.split()[-1] - print('jobid', jobid) - else: - cmd = ["sbatch", jobfile] - logging.info("A new job will be submitted (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) - jobid = output.split()[-1] - - return jobid - - - - - - - -# jobid = output.split()[2] -# retcode = output.split()[-1] -# -# #for huygens -# print 'output', output -# test = output.split()[3] -# dummy, jobid =test.split('nl.') -# jobid='%s%s' %('"',jobid) -# submitmsg ='%s%s%s'%(output.split()[4],output.split()[5],output.split()[6]) -# if submitmsg=='hasbeensubmitted.': -# retcode=2 -# print 'retcode',submitmsg,retcode -# return retcode -# -# def KillJob(self,jobid): -# """ This method kills a running job """ -# -# output = subprocess.Popen(['qdel',jobid], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) -# -# return output -# -# def StatJob(self,jobid): -# """ This method gets the status of a running job """ -# import subprocess -# -# #output = subprocess.Popen(['sgestat'], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) -# -# return '' - - - -if __name__ == "__main__": - pass diff --git a/da/platform/fmi.py.bak b/da/platform/fmi.py.bak deleted file mode 100755 index 02e7330768a75750097e87f59f13fbf7ac793908..0000000000000000000000000000000000000000 --- a/da/platform/fmi.py.bak +++ /dev/null @@ -1,117 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# fmi.py - -""" -Author : Aki - -Revision History: -File created on 25 Jun 2013. - -""" - -import logging -import subprocess - -from da.baseclasses.platform import Platform -std_joboptions = {'jobaccount':'ch4'} - -class FmiPlatform(Platform): - def __init__(self): - self.ID = 'FMI voima' # the identifier gives the platform name - self.version = '' # the platform version used - - #def give_blocking_flag(self): - # """ - # Returns a blocking flag, which is important if tm5 is submitted in a queue system. The python ctdas code is forced to wait before tm5 run is finished - # """ - # return "-s" - - #def give_queue_type(self): - # """ - # Return a queue type depending whether your computer system has a queue system, or whether you prefer to run in the foreground. - # On most large systems using the queue is mandatory if you run a large job. - # """ - # return "queue" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - template = """#!/bin/bash"""+ \ - """ \n"""+ \ - """ \n"""+ \ - """#PBS -N jobname \n"""+ \ - """#PBS -j oe \n"""+ \ - """#PBS -l walltime=jobtime \n"""+ \ - """#PBS -l mppwidth=jobpes \n"""+ \ - """#PBS -l mppnppn=jobnodes \n"""+ \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k,v in joboptions.items(): - while k in template: - template = template.replace(k,v) - - # Fill remaining values with std_options - for k,v in std_joboptions.items(): - while k in template: - template = template.replace(k,v) - - return template - - - #msg1 = 'Platform initialized: %s'%self.Identifier ; logging.info(msg1) - #msg2 = '%s version: %s'%(self.Identifier,self.Version) ; logging.info(msg2) - - - def submit_job(self, jobfile, joblog=None, block=False): - """ This method submits a jobfile to the queue, and returns the queue ID """ - - if block: - cmd = ["qsub", "-s", jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - print 'output', output - jobid = output.split()[0] - print 'jobid', jobid - else: - cmd = ["qsub", jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) - jobid = output.split()[0] - - return jobid - - -if __name__ == "__main__": - pass diff --git a/da/platform/huygens.py.bak b/da/platform/huygens.py.bak deleted file mode 100755 index fc9c51669dc1927afc2fed513f50a8f8fe45dcfd..0000000000000000000000000000000000000000 --- a/da/platform/huygens.py.bak +++ /dev/null @@ -1,222 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# maunaloa.py - -""" -Author : peters - -Revision History: -File created on 06 Sep 2010. - -""" - -import logging -import subprocess - -from da.baseclasses.platform import Platform - -std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobtype':'serial', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'24:00:00', 'jobinput':'/dev/null', 'jobnode':'', 'jobtasks':'', 'modulenetcdf':'netcdf/4.1.2', 'networkMPI':''} - - -class HuygensPlatform(Platform): - def __init__(self): - self.ID = 'huygens' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - - def give_blocking_flag(self): - """ - Returns a blocking flag, which is important if tm5 is submitted in a queue system. The python ctdas code is forced to wait before tm5 run is finished - - -on Huygens: return "-s" - -on Maunaloa: return "" (no queue available) - -on Jet/Zeus: return - """ - return "-s" - - def give_queue_type(self): - """ - Return a queue type depending whether your computer system has a queue system, or whether you prefer to run in the foreground. - On most large systems using the queue is mandatory if you run a large job. - -on Huygens: return "queue" - -on Maunaloa: return "foreground" (no queue available) - -on Jet/Zeus: return - - """ - return "queue" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - #template = """## \n"""+ \ - # """## This is a set of dummy names, to be replaced by values from the dictionary \n"""+ \ - # """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """+ \ - # """## \n"""+ \ - # """ \n"""+ \ - # """#$ jobname \n"""+ \ - # """#$ jobaccount \n"""+ \ - # """#$ jobnodes \n"""+ \ - # """#$ jobtime \n"""+ \ - # """#$ jobshell \n"""+ \ - # """\n"""+ \ - # """source /usr/bin/sh\n"""+ \ - # """module load python\n"""+ \ - # """\n""" - - - template = """#!/bin/bash \n""" + \ - """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """## @ node_usage = normal\n""" + \ - """jobnode \n""" + \ - """jobtasks \n""" + \ - """networkMPI \n""" + \ - """# @ notification = never\n""" + \ - """# @ input = jobinput\n""" + \ - """# @ output = logfile.$(jobid)\n""" + \ - """# @ error = logfile.$(jobid)\n""" + \ - """# @ wall_clock_limit = jobtime\n""" + \ - """# @ job_type = jobtype \n""" + \ - """# @ shell = /bin/bash\n""" + \ - """# @ queue \n""" + \ - """\n""" + \ - """module load ctdas\n""" + \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - - return template - - - - - # msg1 = 'Platform initialized: %s' % self.Identifier ; logging.info(msg1) -# #msg2 = '%s version: %s'%(self.Identifier,self.Version) ; logging.info(msg2) - - -# def GetMyID(self): -# try: -# return os.environ['JOB_ID'] -# except: -# return os.getpid() -# - -# def submit_job(self,jobfile,joblog=None,block=False): -# """ This method submits a jobfile to the queue, and returns the queue ID """ -# -# -# cmd = ["llsubmit","-s",jobfile] -# msg = "A new task will be started (%s)"%cmd ; logging.info(msg) -# -# if block: -# jobid = subprocess.call(cmd) -# print 'jobid',jobid -# else: -# jobid = subprocess.Popen(cmd).pid -# #output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) -# code=0 - - - - - def submit_job(self, jobfile, joblog=None, block=False): - """ This method submits a jobfile to the queue, and returns the queue ID """ - - - #cmd = ["llsubmit","-s",jobfile] - #msg = "A new task will be started (%s)"%cmd ; logging.info(msg) - - if block: - cmd = ["llsubmit", "-s", jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - print 'output', output - jobid = output.split()[3] - print 'jobid', jobid - else: - cmd = ["llsubmit", jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) - jobid = output.split()[3] - - return jobid - - - - - - - -# jobid = output.split()[2] -# retcode = output.split()[-1] -# -# #for huygens -# print 'output', output -# test = output.split()[3] -# dummy, jobid =test.split('nl.') -# jobid='%s%s' %('"',jobid) -# submitmsg ='%s%s%s'%(output.split()[4],output.split()[5],output.split()[6]) -# if submitmsg=='hasbeensubmitted.': -# retcode=2 -# print 'retcode',submitmsg,retcode -# return retcode -# -# def KillJob(self,jobid): -# """ This method kills a running job """ -# -# output = subprocess.Popen(['qdel',jobid], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) -# -# return output -# -# def StatJob(self,jobid): -# """ This method gets the status of a running job """ -# import subprocess -# -# #output = subprocess.Popen(['sgestat'], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) -# -# return '' - - - -if __name__ == "__main__": - pass diff --git a/da/platform/jet.py.bak b/da/platform/jet.py.bak deleted file mode 100755 index d28fcf5bd826ac76ee6d81424d064ab983659805..0000000000000000000000000000000000000000 --- a/da/platform/jet.py.bak +++ /dev/null @@ -1,103 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# jet.py - -""" -Author : peters - -Revision History: -File created on 06 Sep 2010. - -""" -import os -import logging -import subprocess - -from da.baseclasses.platform import Platform - -std_joboptions = {'jobname':'test', 'jobaccount':'co2', 'jobnodes':'nserial 1', 'jobshell':'/bin/sh', 'depends':'', 'jobtime':'00:30:00', 'joblog':os.getcwd()} - -class JetPlatform(Platform): - def __init__(self): - self.ID = 'NOAA jet' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - logging.debug('%s platform object initialized' % self.ID) - logging.debug('%s version: %s' % (self.ID, self.version)) - - - def get_job_template(self, joboptions={}, block=False): - """ Return the job template for a given computing system, and fill it with options from the dictionary provided as argument""" - - template = """#$ -N jobname \n""" + \ - """#$ -A jobaccount \n""" + \ - """#$ -pe jobnodes \n""" + \ - """#$ -l h_rt=jobtime \n""" + \ - """#$ -S jobshell \n""" + \ - """#$ -o joblog \n""" + \ - """#$ -cwd\n""" + \ - """#$ -r n\n""" + \ - """#$ -V\n""" + \ - """#$ -j y\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - if block: - template += """#$ -sync y\n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - - return template - - def get_my_id(self): - try: - return os.environ['JOB_ID'] - except: - return os.getpid() - - def submit_job(self, jobfile, joblog=None, block=False): - """ This method submits a jobfile to the queue, and returns the queue ID """ - - cmd = ["qsub", jobfile] - logging.info("A new task will be started (%s)" % cmd) - output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - #jobid = output.split()[2] - retcode = output.split()[-1] - - return retcode - - def kill_job(self, jobid): - """ This method kills a running job """ - output = subprocess.Popen(['qdel', jobid], stdout=subprocess.PIPE).communicate()[0] - logging.info(output) - return output - - def job_stat(self, jobid): - """ This method gets the status of a running job """ - - #output = subprocess.Popen(['sgestat'], stdout=subprocess.PIPE).communicate()[0] ; logging.info(output) - return '' - -if __name__ == "__main__": - pass diff --git a/da/platform/kermadec.py.bak b/da/platform/kermadec.py.bak deleted file mode 100755 index cbad74bf0ba120c324dc318bcaa0a2f3b424031e..0000000000000000000000000000000000000000 --- a/da/platform/kermadec.py.bak +++ /dev/null @@ -1,91 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# kermadec.py - -""" -Author : peters - -Revision History: -File created on 05 Jan 2015. - -""" - -import logging - -from da.baseclasses.platform import Platform, std_joboptions - -class KermadecPlatform(Platform): - def __init__(self): - - self.ID = 'RUG kermadec' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - def give_blocking_flag(self): - return "" - - def give_queue_type(self): - return "foreground" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - template = """#!/bin/sh \n""" + \ - """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """ \n""" + \ - """#$ jobname \n""" + \ - """#$ jobaccount \n""" + \ - """#$ jobnodes \n""" + \ - """#$ jobtime \n""" + \ - """#$ jobshell \n""" + \ - """\n""" + \ - """export HOST='kermadec'\n""" + \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - - return template - - - -if __name__ == "__main__": - pass diff --git a/da/platform/maunaloa.py.bak b/da/platform/maunaloa.py.bak deleted file mode 100755 index 0f5919f1398431ebc44fabbf4a5dc27180d6ecc2..0000000000000000000000000000000000000000 --- a/da/platform/maunaloa.py.bak +++ /dev/null @@ -1,87 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# maunaloa.py - -""" -Author : peters - -Revision History: -File created on 06 Sep 2010. - -""" - -from da.baseclasses.platform import Platform, std_joboptions - -class MaunaloaPlatform(Platform): - def __init__(self): - self.ID = 'WU maunaloa' # the identifier gives the platform name - self.version = '1.0' # the platform version used - - def give_blocking_flag(self): - return "" - - def give_queue_type(self): - return "foreground" - - def get_job_template(self, joboptions={}, block=False): - """ - Returns the job template for a given computing system, and fill it with options from the dictionary provided as argument. - The job template should return the preamble of a job that can be submitted to a queue on your platform, - examples of popular queuing systems are: - - SGE - - MOAB - - XGrid - - - - A list of job options can be passed through a dictionary, which are then filled in on the proper line, - an example is for instance passing the dictionary {'account':'co2'} which will be placed - after the ``-A`` flag in a ``qsub`` environment. - - An extra option ``block`` has been added that allows the job template to be configured to block the current - job until the submitted job in this template has been completed fully. - """ - - template = """## \n""" + \ - """## This is a set of dummy names, to be replaced by values from the dictionary \n""" + \ - """## Please make your own platform specific template with your own keys and place it in a subfolder of the da package.\n """ + \ - """## \n""" + \ - """ \n""" + \ - """#$ jobname \n""" + \ - """#$ jobaccount \n""" + \ - """#$ jobnodes \n""" + \ - """#$ jobtime \n""" + \ - """#$ jobshell \n""" + \ - """\n""" + \ - """source /usr/local/Modules/3.2.8/init/sh\n""" + \ - """module load python\n""" + \ - """\n""" - - if 'depends' in joboptions: - template += """#$ -hold_jid depends \n""" - - # First replace from passed dictionary - for k, v in joboptions.items(): - while k in template: - template = template.replace(k, v) - - # Fill remaining values with std_options - for k, v in std_joboptions.items(): - while k in template: - template = template.replace(k, v) - - return template - - -if __name__ == "__main__": - pass diff --git a/da/rc/carbontracker_random.rc b/da/rc/carbontracker_random.rc index 2b8b5c42e95e2d9da64e00fc45fbc711b2b86110..09c341716ad0da961f7a8d385c8936739eab1fbc 100644 --- a/da/rc/carbontracker_random.rc +++ b/da/rc/carbontracker_random.rc @@ -11,15 +11,7 @@ ! You should have received a copy of the GNU General Public License along with this ! program. If not, see <http://www.gnu.org/licenses/>. -datadir : /Storage/CO2/carbontracker/input/ctdas_2016/ -ocn.covariance : ${datadir}/oceans/oif/cov_ocean.2000.01.nc -deltaco2.prefix : oif_p3_era40.dpco2 -bio.cov.dir : ${datadir}/covariances/gridded_NH/ -bio.cov.prefix : cov_ecoregion - -regtype : gridded_oif30 -nparameters : 9835 +nparameters : 100 random.seed : 4385 -regionsfile : ${datadir}/covariances/gridded_NH/griddedNHparameters.nc obs.sites.rc : Random diff --git a/da/rc/stilt/stilt.rc b/da/rc/stilt/stilt.rc index 264eab5f6d8818317a939e2fe728ae21d0f7ce62..52838914f36f0619ec46437371e40ada12128f29 100644 --- a/da/rc/stilt/stilt.rc +++ b/da/rc/stilt/stilt.rc @@ -11,17 +11,15 @@ ! You should have received a copy of the GNU General Public License along with this ! program. If not, see <http://www.gnu.org/licenses/>. homedir : /projects/0/ctdas/RINGO - sibdir : /projects/0/ctdas/RINGO/EmissionInventories/ +biosphere_fluxdir : /projects/0/ctdas/RINGO/EmissionInventories/True/RINGO_ORCHIDEE_GPP_TER_dC14_old.nc +nuclear_timeprofiledir : /projects/0/ctdas/RINGO/EmissionInventories/True/Power_TempDistributionFrance.xlsx +nuclear_fluxdir : /projects/0/ctdas/RINGO/EmissionInventories/True/RINGO_NuclearEmissions_14CO2.nc bgfdir : /projects/0/ctdas/RINGO/EmissionInventories/ footdir : /projects/0/ctdas/RINGO/STILT_Output/ - bounddir : /projects/0/ctdas/RINGO/EmissionInventories/ - inputdir : /home/awoude/ctdas_test/ctdas_stilt/input/ - outdir : /home/awoude/ctdas_test/ctdas_stilt/output/ +inputdir: /projects/0/ctdas/RINGO/inversions/ffdas/input nsam : 100 ndayscycle : 10 sysstadate : 2016-01-15 00:00:00 cycstadate : 2016-01-20 00:00:00 files_startdate : 2016-01-01 00:00:00 -tracerdir : /home/awoude/ctdas_test/ctdas_stilt/exec/da/rc/stilt/tracers -! Now list the tracers separated by a comma, without a space! -tracers : co2,222Rn +num_backtimes: 24 diff --git a/da/sf6/statevector.py.bak b/da/sf6/statevector.py.bak deleted file mode 100755 index 881031f67dd1d5961f2642e9a6080215ea74f1b6..0000000000000000000000000000000000000000 --- a/da/sf6/statevector.py.bak +++ /dev/null @@ -1,287 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# ct_statevector_tools.py - -""" -Author : peters - -Revision History: -File created on 28 Jul 2010. - -""" - -import os -import sys -sys.path.append(os.getcwd()) - -import logging -import numpy as np -from da.baseclasses.statevector import StateVector, EnsembleMember -import da.tools.io4 as io -from datetime import timedelta - -identifier = 'SF6 Statevector ' -version = '0.0' - -################### Begin Class SF6StateVector ################### - -class SF6StateVector(StateVector): - """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """ - - def get_covariance(self, date, dacycle): - """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - """ - try: - import matplotlib.pyplot as plt - except: - pass - - # Get the needed matrices from the specified covariance files - - fullcov = np.zeros((self.nparams, self.nparams), float) - - for n in range(self.nparams): - fullcov[n,n] = 0.5**2 - - return fullcov - - def setup(self, dacycle): - """ - Initialize the object by specifying the dimensions. - There are two major requirements for each statvector that you want to build: - - (1) is that the statevector can map itself onto a regular grid - (2) is that the statevector can map itself (mean+covariance) onto TransCom regions - - An example is given below. - """ - - self.nlag = int(dacycle['time.nlag']) - self.nmembers = int(dacycle['da.optimizer.nmembers']) - self.nparams = int(dacycle.dasystem['nparameters']) - self.nobs = 0 - - self.obs_to_assimilate = () # empty containter to hold observations to assimilate later on - - # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist - # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread. - - self.ensemble_members = range(self.nlag) - - for n in range(self.nlag): - self.ensemble_members[n] = [] - - - # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember - # that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid. - - mapfile = os.path.join(dacycle.dasystem['regionsfile']) - ncf = io.ct_read(mapfile, 'read') - self.tcmap = ncf.get_variable('transcom_regions') - ncf.close() - - self.gridmap = np.ones((180,360),'float') - - logging.debug("A TransCom map on 1x1 degree was read from file %s" % dacycle.dasystem['regionsfile']) - logging.debug("A parameter map on 1x1 degree was created") - - # Create a dictionary for state <-> gridded map conversions - - nparams = self.gridmap.max() - self.griddict = {} - for r in range(1, int(nparams) + 1): - sel = (self.gridmap.flat == r).nonzero() - if len(sel[0]) > 0: - self.griddict[r] = sel - - logging.debug("A dictionary to map grids to states and vice versa was created") - - # Create a matrix for state <-> TransCom conversions - - self.tcmatrix = np.zeros((self.nparams, 23), 'float') - - logging.debug("A matrix to map states to TransCom regions and vice versa was created") - - # Create a mask for species/unknowns - - self.make_species_mask() - - def make_species_mask(self): - - """ - - This method creates a dictionary with as key the name of a tracer, and as values an array of 0.0/1.0 values - specifying which StateVector elements are constrained by this tracer. This mask can be used in - the optimization to ensure that certain types of osbervations only update certain unknowns. - - An example would be that the tracer '14CO2' can be allowed to only map onto fossil fuel emissions in the state - - The form of the mask is: - - {'co2': np.ones(self.nparams), 'co2c14', np.zeros(self.nparams) } - - so that 'co2' maps onto all parameters, and 'co2c14' on none at all. These arrays are used in the Class - optimizer when state updates are actually performed - - """ - self.speciesdict = {'sf6': np.ones(self.nparams)} - logging.debug("A species mask was created, only the following species are recognized in this system:") - for k in self.speciesdict.keys(): - logging.debug(" -> %s" % k) - - def propagate(self,dummy): - """ - :rtype: None - - Propagate the parameter values in the StateVector to the next cycle. This means a shift by one cycle - step for all states that will - be optimized once more, and the creation of a new ensemble for the time step that just - comes in for the first time (step=nlag). - In the future, this routine can incorporate a formal propagation of the statevector. - - """ - - self.ensemble_members.append([]) - cov = self.get_covariance(None,None) - newmean = np.ones(self.nparams) - self.make_new_ensemble(self.nlag+1, newmean = newmean, covariancematrix=cov) - self.ensemble_members.pop(0) - - logging.info('The state vector remains the same in the SF6 run') - logging.info('The state vector has been propagated by one cycle') - - def make_new_ensemble(self, lag, newmean=None, covariancematrix=None): - """ - :param lag: an integer indicating the time step in the lag order - :param covariancematrix: a matrix to draw random values from - :rtype: None - - Make a new ensemble, the attribute lag refers to the position in the state vector. - Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below. - The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag] - - The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is - used to draw ensemblemembers from. If this argument is not passed it will ne substituted with an - identity matrix of the same dimensions. - - """ - - if newmean == None: - newmean = np.ones(self.nparams) - - if covariancematrix == None: - covariancematrix = np.identity(self.nparams) - - # Make a cholesky decomposition of the covariance matrix - - - _, s, _ = np.linalg.svd(covariancematrix) - dof = np.sum(s) ** 2 / sum(s ** 2) - C = np.linalg.cholesky(covariancematrix) - - logging.debug('Cholesky decomposition has succeeded ') - logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof))) - - - # Create the first ensemble member with a deviation of 0.0 and add to list - - newmember = EnsembleMember(0) - newmember.param_values = newmean.flatten() # no deviations - self.ensemble_members[lag-1].append(newmember) - - # Create members 1:nmembers and add to EnsembleMembers list - - for member in range(1, self.nmembers): - rands = np.random.randn(self.nparams) - - newmember = EnsembleMember(member) - # routine to avoids that members < 0.0 - dev = np.dot(C, rands) #VDV - dummy = np.zeros(len(dev)) #VDV - for i in range(len(dev)): #VDV - if dev[i] < 0.0: #VDV - dummy[i] = newmean[i]*np.exp(dev[i]) #VDV - else: #VDV - dummy[i] = newmean[i]*(1+dev[i]) #VDV - newmember.param_values = dummy #VDV - #newmember.ParameterValues = np.dot(C, rands) + newmean - self.ensemble_members[lag-1].append(newmember) - - logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, lag)) - - def write_members_to_file(self, lag, outdir): - """ - :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag] - :rtype: None - - Write ensemble member information to a NetCDF file for later use. The standard output filename is - *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location - is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside - called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). - This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. - - .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you - can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function. - - """ - - # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was - # to do the import already at the start of the module, not just in this method. - - #import da.tools.io as io - #import da.tools.io4 as io - - members = self.ensemble_members[lag] - - for mem in members: - filename = os.path.join(outdir, 'parameters.%03d.nc' % mem.membernumber) - ncf = io.CT_CDF(filename, method='create') - dimparams = ncf.add_params_dim(self.nparams) - dimgrid = ncf.add_latlon_dim() - logging.warning('Parameters for TM5 are NOT taken with exponential') - - # Explicitly set maximum allowable value to 0.0 - data = np.where(mem.param_values < 0, 0.0, mem.param_values) - - savedict = io.std_savedict.copy() - savedict['name'] = "parametervalues" - savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimparams - savedict['values'] = data - savedict['comment'] = 'These are parameter values to use for member %d, note: they result from an exponential function' % mem.membernumber - ncf.add_data(savedict) - - griddata = self.vector2grid(vectordata=data) - - savedict = io.std_savedict.copy() - savedict['name'] = "parametermap" - savedict['long_name'] = "parametermap_for_member_%d" % mem.membernumber - savedict['units'] = "unitless" - savedict['dims'] = dimgrid - savedict['values'] = griddata.tolist() - savedict['comment'] = 'These are gridded parameter values to use for member %d, note: they result from an exponential function' % mem.membernumber - ncf.add_data(savedict) - - ncf.close() - - logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename)) - - - - -################### End Class SF6StateVector ################### - diff --git a/da/test/test_optimizer.py.bak b/da/test/test_optimizer.py.bak deleted file mode 100755 index badd612542ee9d1b871836c105a7b48018bb6586..0000000000000000000000000000000000000000 --- a/da/test/test_optimizer.py.bak +++ /dev/null @@ -1,221 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# test_optimizer.py - -""" -Author : peters - -Revision History: -File created on 04 Aug 2010. - -""" - - -def serial_py_against_serial_fortran(): - """ Test the solution of the serial algorithm against the CT cy2 fortran generated one """ - - # get data from the savestate.hdf file from the first cycle of CarbonTracker 2009 release - - print "WARNING: The optimization algorithm has changed from the CT2009 release because of a bug" - print "WARNING: in the fortran code. Hence, the two solutions calculated are no longer the same." - print "WARNING: To change the python algorithm so that it corresponds to the fortran, change the" - print "WARNING: loop from m=n+1,nfcast to m=1,nfcast" - - savefile = '/data/CO2/peters/carbontracker/raw/ct09rc0i/20000108/savestate.hdf' - print savefile - - f = Nio.open_file(savefile, 'r') - obs = f.variables['co2_obs_fcast'].get_value() - sel_obs = obs.shape[0] - - dims = (int(dacycle.da_settings['time.nlag']), - int(dacycle.da_settings['forecast.nmembers']), - int(dacycle.dasystem.da_settings['nparameters']), - sel_obs,) - - nlag, nmembers, nparams, nobs = dims - - optserial = CtOptimizer(dims) - opt = optserial - - opt.set_localization('CT2007') - - obs = f.variables['co2_obs_fcast'].get_value()[0:nobs] - opt.obs = obs - sim = f.variables['co2_sim_fcast'].get_value()[0:nobs] - opt.Hx = sim - error = f.variables['error_sim_fcast'].get_value()[0:nobs] - flags = f.variables['flag_sim_fcast'].get_value()[0:nobs] - opt.flags = flags - simana = f.variables['co2_sim_ana'].get_value()[0:nobs] - - for n in range(nobs): opt.R[n, n] = np.double(error[n] ** 2) - - xac = [] - adX = [] - for lag in range(nlag): - xpc = f.variables['xpc_%02d' % (lag + 1)].get_value() - opt.x[lag * nparams:(lag + 1) * nparams] = xpc - X = f.variables['pdX_%02d' % (lag + 1)].get_value() - opt.X_prime[lag * nparams:(lag + 1) * nparams, :] = np.transpose(X) - HX = f.variables['dF'][:, 0:sel_obs] - opt.HX_prime[:, :] = np.transpose(HX) - - # Also create arrays of the analysis of the fortran code for later comparison - - xac.extend (f.variables['xac_%02d' % (lag + 1)].get_value()) - adX.append (f.variables['adX_%02d' % (lag + 1)].get_value()) - - xac = np.array(xac) - X_prime = np.array(adX).swapaxes(1, 2).reshape((opt.nparams * opt.nlag, opt.nmembers)) - - opt.serial_minimum_least_squares() - - print "Maximum differences and correlation of 2 state vectors:" - print np.abs(xac - opt.x).max(), np.corrcoef(xac, opt.x)[0, 1] - - plt.figure(1) - plt.plot(opt.x, label='SerialPy') - plt.plot(xac, label='SerialFortran') - plt.grid(True) - plt.legend(loc=0) - plt.title('Analysis of state vector') - - print "Maximum differences of 2 state vector deviations:" - print np.abs(X_prime - opt.X_prime).max() - - plt.figure(2) - plt.plot(opt.X_prime.flatten(), label='SerialPy') - plt.plot(X_prime.flatten(), label='SerialFortran') - plt.grid(True) - plt.legend(loc=0) - plt.title('Analysis of state vector deviations') - - print "Maximum differences and correlation of 2 simulated obs vectors:" - print np.abs(simana - opt.Hx).max(), np.corrcoef(simana, opt.Hx)[0, 1] - - plt.figure(3) - plt.plot(opt.Hx, label='SerialPy') - plt.plot(simana, label='SerialFortran') - plt.grid(True) - plt.legend(loc=0) - plt.title('Analysis of CO2 mole fractions') - plt.show() - - f.close() - -def serial_vs_bulk(): - """ A test of the two algorithms currently implemented: serial vs bulk solution """ - - # get data from the savestate.hdf file from the first cycle of CarbonTracker 2009 release - - savefile = '/data/CO2/peters/carbontracker/raw/ct09rc0i/20000108/savestate.hdf' - print savefile - - f = Nio.open_file(savefile, 'r') - obs = f.variables['co2_obs_fcast'].get_value() - - nobs = 77 - - dims = (int(dacycle.da_settings['time.nlag']), - int(dacycle.da_settings['forecast.nmembers']), - int(dacycle.dasystem.da_settings['nparameters']), - nobs,) - - nlag, nmembers, nparams, nobs = dims - - optbulk = CtOptimizer(dims) - optserial = CtOptimizer(dims) - - for o, opt in enumerate([optbulk, optserial]): - - opt.set_localization('CT2007') - - obs = f.variables['co2_obs_fcast'].get_value()[0:nobs] - opt.obs = obs - sim = f.variables['co2_sim_fcast'].get_value()[0:nobs] - opt.Hx = sim - error = f.variables['error_sim_fcast'].get_value()[0:nobs] - flags = f.variables['flag_sim_fcast'].get_value()[0:nobs] - opt.flags = flags - - for n in range(nobs): - opt.R[n, n] = np.double(error[n] ** 2) - - xac = [] - for lag in range(nlag): - xpc = f.variables['xpc_%02d' % (lag + 1)].get_value() - opt.x[lag * nparams:(lag + 1) * nparams] = xpc - X = f.variables['pdX_%02d' % (lag + 1)].get_value() - opt.X_prime[lag * nparams:(lag + 1) * nparams, :] = np.transpose(X) - HX = f.variables['dF'][:, 0:nobs] - opt.HX_prime[:, :] = np.transpose(HX) - - if o == 0: - opt.bulk_minimum_least_squares() - x1 = opt.x - xp1 = opt.X_prime - hx1 = opt.Hx - hxp1 = opt.HX_prime - hphr1 = opt.HPHR - k1 = opt.KG - if o == 1: - opt.serial_minimum_least_squares() - x2 = opt.x - xp2 = opt.X_prime - hx2 = opt.Hx - hxp2 = opt.HX_prime - hphr2 = opt.HPHR - k2 = opt.KG - - plt.figure() - - print "Maximum differences and correlation of 2 state vectors:" - print np.abs(x2 - x1).max(), np.corrcoef(x2, x1)[0, 1] - - plt.figure(1) - plt.plot(x1, label='Serial') - plt.plot(x2, label='Bulk') - plt.grid(True) - plt.legend(loc=0) - plt.title('Analysis of state vector') - - print "Maximum differences of 2 state vector deviations:" - print np.abs(xp2 - xp1).max() - - plt.figure(2) - plt.plot(xp1.flatten(), label='Serial') - plt.plot(xp2.flatten(), label='Bulk') - plt.grid(True) - plt.legend(loc=0) - plt.title('Analysis of state vector deviations') - - print "Maximum differences and correlation of 2 simulated obs vectors:" - print np.abs(hx2 - hx1).max(), np.corrcoef(hx2, hx1)[0, 1] - - plt.figure(3) - plt.plot(hx1, label='Serial') - plt.plot(hx2, label='Bulk') - plt.title('Analysis of CO2 mole fractions') - plt.grid(True) - plt.legend(loc=0) - - plt.show() - - f.close() - - - -if __name__ == "__main__": - pass diff --git a/da/tm5/methaneobservationoperator.py.bak b/da/tm5/methaneobservationoperator.py.bak deleted file mode 100755 index 943d20083cb1a1bed147d136e5b854974a68d179..0000000000000000000000000000000000000000 --- a/da/tm5/methaneobservationoperator.py.bak +++ /dev/null @@ -1,103 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# tm5_tools.py - -""" -Author : Aki - -File created on Feb 2010. -""" - -import os -import sys -import logging -import shutil -import datetime -import subprocess -from string import join -import glob -sys.path.append(os.getcwd()) -sys.path.append("../../") - -import da.tools.rc as rc -from da.tools.general import create_dirs, to_datetime -from da.baseclasses.observationoperator import ObservationOperator -from da.tm5.observationoperator import TM5ObservationOperator - -def validate_rc_methane(self): - """ - Validate the contents of the tm_settings dictionary and add extra values. The required items for the TM5 rc-file - are specified in the tm5_tools module, as dictionary variable "needed_rc_items". - """ - - if self.rcfiletype == 'pycasso': - self.projectkey = 'my.project.dir' - self.rundirkey = 'my.run.dir' - self.outputdirkey = 'output.dir' - self.savedirkey = 'restart.write.dir' - self.timestartkey = 'timerange.start' - self.timefinalkey = 'timerange.end' - self.timelengthkey = 'jobstep.length' - self.istartkey = 'istart' - self.restartvalue = 33 - self.coldstartvalue = 5 #Aki: read from mmix file - self.transitionvalue = 61 - self.mysourcedirs = 'my.source.dirs' - self.addedsourcedirs = ' proj/levels/ml91/trunk proj/levels/ml91/tropo34/trunk' - else: - self.projectkey = 'runid' - self.rundirkey = 'rundir' - self.outputdirkey = 'outputdir' - self.savedirkey = 'savedir' - self.timestartkey = 'time.start' - self.timefinalkey = 'time.final' - self.timelengthkey = 'time.break.nday' - self.istartkey = 'istart' - self.restartvalue = 3 - self.coldstartvalue = 9 - self.transitionvalue = 61 - - needed_rc_items = [ - self.projectkey, - self.rundirkey, - self.outputdirkey, - self.savedirkey, - self.timestartkey, - self.timefinalkey, - self.timelengthkey, - self.istartkey - ] - - for k, v in self.tm_settings.items(): - if v == 'True' : self.tm_settings[k] = True - if v == 'False': self.tm_settings[k] = False - if 'date' in k : self.tm_settings[k] = to_datetime(v) - if 'time.start' in k : - self.tm_settings[k] = to_datetime(v, fmt='TM5') - if 'time.final' in k : - self.tm_settings[k] = to_datetime(v, fmt='TM5') - if 'timerange.start' in k : - self.tm_settings[k] = to_datetime(v) - if 'timerange.end' in k : - self.tm_settings[k] = to_datetime(v) - - for key in needed_rc_items: - if key not in self.tm_settings: - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - raise IOError(msg) - logging.debug('rc-file has been validated succesfully') - -TM5ObservationOperator.validate_rc = validate_rc_methane - diff --git a/da/tm5/observationoperator.py b/da/tm5/observationoperator.py index bd11e3fa9ed9998f2c8f0218cf59bfe46576a405..6031b4559f3f2dcedecfbcee12fb61521bdb132b 100755 --- a/da/tm5/observationoperator.py +++ b/da/tm5/observationoperator.py @@ -38,7 +38,6 @@ import logging import shutil import datetime import subprocess -from string import join import glob sys.path.append(os.getcwd()) sys.path.append("../../") @@ -192,7 +191,7 @@ class TM5ObservationOperator(ObservationOperator): else: cmd = ['python', 'setup_tm5', '--%s' % self.dacycle.daplatform.give_queue_type(), rcfilename] logging.info('Starting the external TM5 setup script') - logging.info('using command ... %s' % join(cmd)) + logging.info('using command ... %s' %(''.join(cmd))) retcode = subprocess.call(cmd) os.chdir(self.dacycle['dir.da_submit']) @@ -254,8 +253,9 @@ class TM5ObservationOperator(ObservationOperator): self.write_rc(self.rc_filename) # Define the name of the file that will contain the modeled output of each observation - - self.simulated_file = os.path.join(self.outputdir, 'flask_output.%s.nc' % self.dacycle['time.sample.stamp']) + #temp = 'flask_output.%s.nc'%(self.dacycle['time.sample.stamp']) + #self.simulated_file = os.path.join(self.outputdir, temp) + self.simulated_file = os.path.join(self.outputdir, 'flask_output.%s.nc'%(self.dacycle['time.sample.stamp'])) def load_rc(self, name): """ @@ -346,7 +346,7 @@ class TM5ObservationOperator(ObservationOperator): """ for k, v in list(newvalues.items()): - if key in self.tm_settings: + if k in self.tm_settings: # keep previous value v_orig = self.tm_settings[k] #replace with new @@ -629,6 +629,3 @@ class TM5ObservationOperator(ObservationOperator): if __name__ == "__main__": pass - - - diff --git a/da/tm5/observationoperator.py.bak b/da/tm5/observationoperator.py.bak deleted file mode 100755 index fa60a095a05a88a149601f8e9dc0d9072fccb92e..0000000000000000000000000000000000000000 --- a/da/tm5/observationoperator.py.bak +++ /dev/null @@ -1,634 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# tm5_tools.py - -""" -Author : peters - -Revision History: -File created on 09 Feb 2009. -Major modifications to go to a class-based approach, July 2010. - -This module holds specific functions needed to use the TM5 model within the data assimilation shell. It uses the information -from the DA system in combination with the generic tm5.rc files. - -The TM5 model is now controlled by a python subprocess. This subprocess consists of an MPI wrapper (written in C) that spawns -a large number ( N= nmembers) of TM5 model instances under mpirun, and waits for them all to finish. - -The design of the system assumes that the tm5.x (executable) was pre-compiled with the normal TM5 tools, and is residing in a -directory specified by the ${RUNDIR} of a tm5 rc-file. This tm5 rc-file name is taken from the data assimilation rc-file. Thus, -this python shell does *not* compile the TM5 model for you! - -""" - -import os -import sys -import logging -import shutil -import datetime -import subprocess -from string import join -import glob -sys.path.append(os.getcwd()) -sys.path.append("../../") - -import da.tools.rc as rc -from da.tools.general import create_dirs, to_datetime -from da.baseclasses.observationoperator import ObservationOperator - -identifier = 'TM5' -version = 'release 3.0' -mpi_shell_filename = 'tm5_mpi_wrapper' -mpi_shell_location = 'da/bin/' - - -################### Begin Class TM5 ################### - - - -class TM5ObservationOperator(ObservationOperator): - """ This class holds methods and variables that are needed to run the TM5 model. It is initiated with as only argument a TM5 rc-file - location. This rc-file will be used to figure out the settings for the run. - - *** This method of running TM5 assumes that a pre-compiled tm5.exe is present, and it will be run from time.start to time.final *** - - These settings can be modified later. To run a model version, simply compile the model using an existing TM5 rc-file, then - open python, and type: - - []> tm=TM5('/Users/peters/Modeling/TM5/tutorial.rc') - []> tm.write_rc() - []> tm.WriteRunRc() - []> tm.run() - - To use this class inside a data assimilation cycle, a stand-alone method "setup()" is included which modifies the TM5 - settings according to an external dictionary of values to overwrite, and then runs the TM5 model. - - - """ - - def __init__(self, filename): - """ The instance of an TMObservationOperator is application dependent """ - self.ID = identifier # the identifier gives the model name - self.version = version # the model version used - self.restart_filelist = [] - self.output_filelist = [] - - self.outputdir = None # Needed for opening the samples.nc files created - - self.load_rc(filename) # load the specified rc-file - self.validate_rc() # validate the contents - - logging.info('Observation Operator initialized: %s (%s)' % (self.ID, self.version)) - - def setup(self, dacycle): - """ - Execute all steps needed to prepare the ObsOperator for use inside CTDAS, only done at the very first cycle normally - - """ - self.dacycle = dacycle - - if self.dacycle['time.restart'] == False or self.dacycle['transition'] == True: - newitemsmeteo = {} - if self.dacycle['transition']: - if not (self.tm_settings[self.mysourcedirs]).find('ml91') >= 0: - newdirs = self.tm_settings[self.mysourcedirs] + self.addedsourcedirs - else: newdirs = self.tm_settings[self.mysourcedirs] - # Modify the rc-file to reflect directory structure defined by CTDAS - # and change items for transition from 25 to 34 layers for od meteo in dec 2005 - newitems = {'my.basedir' : self.dacycle['dir.exec'], - 'istart' : self.transitionvalue, - 'my.source.dirs' : newdirs, - 'build.copy.dirs' : newdirs} - for k, v in self.tm_settings.items(): - if type(v) is str: - if v.find('ml60') > -1: - newitemsmeteo[k] = v.replace('ml60','ml91') - if v.find('tropo25') > -1: - newitemsmeteo[k] = v.replace('tropo25','tropo34') - if v.find('ml25') > -1: - newitemsmeteo[k] = v.replace('ml25','tropo34') - if v.find('od_L60') > -1: - newitemsmeteo[k] = 'od_L91' - if k == 'my.meteo.nlev': - newitemsmeteo[k] = '91' - logging.debug('Resetting TM5 to perform transition of od meteo from 25 to 34 levels') - else: - logging.info('First time step, setting up and compiling the TM5 model before proceeding!') - # Modify the rc-file to reflect directory structure defined by CTDAS - newitems = {'my.basedir': self.dacycle['dir.exec']} - - self.modify_rc(newitems) - self.modify_rc(newitemsmeteo) - - # Create the TM5 run directory to hold a copy of the modified rc-file - - tm5compiledir = self.tm_settings[self.rundirkey] - create_dirs(tm5compiledir) - - rcfilename = os.path.join(tm5compiledir, 'tm5_setup_init.rc') - self.write_rc(rcfilename) - - # Compile TM5 in the new directory, but only if this is a fresh start - logging.debug('Original rc file: %s '%(self.dacycle['da.obsoperator.rc'])) - self.compile_tm5(rcfilename) - - newrcfilename = os.path.join(self.tm_settings['rundir'], self.tm_settings['install.rc']) - - #Use a TM5 restart file in the first cycle (instead of init file). Used now for the CO project. - if 'da.obsoperator.restartfileinfirstcycle' in self.dacycle: - restartfilename = self.dacycle['da.obsoperator.restartfileinfirstcycle'] - targetdir = self.tm_settings[self.savedirkey] - create_dirs(targetdir) - for file in glob.glob(restartfilename): - fname = os.path.split(file)[1] - logging.debug('Copied TM5 restart file to TM5 restart directory for first cycle: %s'%fname) - shutil.copy(file,os.path.join(targetdir,fname)) - - # Replace the rc filename for TM5 with the newly created one in the new run directory - - - logging.debug('Working copy of the tm5.rc file is in place (%s) ' % newrcfilename) - - # And also replace the path to the ObservationOperator in the dacycle object so we can work from the TM5 copy from here on - - self.dacycle['da.obsoperator.rc'] = newrcfilename - - logging.debug('...and set as the da.obsoperator.rc value in this dacycle ') - - - logging.debug('Reloading the da.obsoperator.rc file for this dacycle') - self.load_rc(self.dacycle['da.obsoperator.rc']) - logging.debug('Note that the obsoperator is not recompiled if this is a recovery from a crash!!!') - - - def compile_tm5(self, rcfilename): - """ - Compile TM5 model using setup_tm5 and the modified rc-file - """ - if 'da.obsoperator.home' in self.dacycle: - os.chdir(self.dacycle['da.obsoperator.home']) - logging.debug('Proceeding from TM5 root dir (%s)'% self.dacycle['da.obsoperator.home']) - else: - tm5_dir = os.path.split(self.dacycle['da.obsoperator.rc'])[0] - logging.warning('Guessing your TM5 root dir from the rc filename') - logging.warning('Try adding a key da.obsoperator.home to your da.rc') - logging.warning('Proceeding from guessed TM5 root dir (%s) ' % tm5_dir) - - os.chdir(tm5_dir) - - if self.dacycle['transition']: - cmd = ['python', 'setup_tm5', '-n', '--%s' % self.dacycle.daplatform.give_queue_type(), rcfilename] - else: cmd = ['python', 'setup_tm5', '--%s' % self.dacycle.daplatform.give_queue_type(), rcfilename] - - logging.info('Starting the external TM5 setup script') - logging.info('using command ... %s' % join(cmd)) - - retcode = subprocess.call(cmd) - os.chdir(self.dacycle['dir.da_submit']) - - if retcode != 0: - logging.error('Compilation failed, quitting CTDAS') - raise IOError - sys.exit(2) - else: - logging.info('Compilation successful, continuing') - - def prepare_run(self): - """ - Prepare a forward model TM5 run, this consists of: - - - reading the working copy TM5 rc-file, - - validating it, - - modifying the values, - - Removing the existing tm5.ok file if present - - """ - -# Write a modified TM5 model rc-file in which run/break times are defined by our da system - - new_items = { - 'submit.options': self.dacycle.daplatform.give_blocking_flag(), - self.timestartkey: self.dacycle['time.sample.start'], - self.timefinalkey: self.dacycle['time.sample.end'], - 'jobstep.timerange.start': self.dacycle['time.sample.start'], - 'jobstep.timerange.end': self.dacycle['time.sample.end'], - 'jobstep.length': 'inf', - 'ct.params.input.dir': self.dacycle['dir.input'], - 'ct.params.input.file': os.path.join(self.dacycle['dir.input'], 'parameters'), - 'output.flask.infile': self.dacycle['ObsOperator.inputfile'] , - 'output.flask': 'True' - } - - if self.dacycle['transition']: - new_items[self.istartkey] = self.transitionvalue - logging.debug('Resetting TM5 to perform transition of od meteo from 25 to 34 levels') - elif self.dacycle['time.restart']: # If this is a restart from a previous cycle, the TM5 model should do a restart - new_items[self.istartkey] = self.restartvalue - logging.debug('Resetting TM5 to perform restart') - else: - if 'da.obsoperator.restartfileinfirstcycle' not in self.dacycle: - new_items[self.istartkey] = self.coldstartvalue # if not, start TM5 'cold' - logging.debug('Resetting TM5 to perform cold start') - else: - new_items[self.istartkey] = self.restartvalue # If restart file is specified, start TM5 with initial restartfile - logging.debug('Resetting TM5 to start with restart file: %s'%self.dacycle['da.obsoperator.restartfileinfirstcycle']) - - if self.dacycle['time.sample.window'] != 0: # If this is a restart from a previous time step within the filter lag, the TM5 model should do a restart - new_items[self.istartkey] = self.restartvalue - logging.debug('Resetting TM5 to perform restart') - - # If neither one is true, simply take the istart value from the tm5.rc file that was read - - self.modify_rc(new_items) - self.write_rc(self.rc_filename) - - # Define the name of the file that will contain the modeled output of each observation - - self.simulated_file = os.path.join(self.outputdir, 'flask_output.%s.nc' % self.dacycle['time.sample.stamp']) - - def load_rc(self, name): - """ - This method loads a TM5 rc-file with settings for this simulation - """ - self.rcfile = rc.RcFile(name) - self.tm_settings = self.rcfile.values - self.rc_filename = name - - if 'my.source.dirs' in self.tm_settings.keys(): - self.rcfiletype = 'pycasso' - else: - self.rcfiletype = 'pre-pycasso' - logging.debug('TM5 rc-file loaded successfully') - - def validate_rc(self): - """ - Validate the contents of the tm_settings dictionary and add extra values. The required items for the TM5 rc-file - are specified in the tm5_tools module, as dictionary variable "needed_rc_items". - """ - - if self.rcfiletype == 'pycasso': - self.projectkey = 'my.project.dir' - self.rundirkey = 'my.run.dir' - self.outputdirkey = 'output.dir' - self.savedirkey = 'restart.write.dir' - self.timestartkey = 'timerange.start' - self.timefinalkey = 'timerange.end' - self.timelengthkey = 'jobstep.length' - self.istartkey = 'istart' - self.restartvalue = 33 - self.coldstartvalue = 9 - self.transitionvalue = 61 - self.mysourcedirs = 'my.source.dirs' - self.addedsourcedirs = ' proj/levels/ml91/trunk proj/levels/ml91/tropo34/trunk' - else: - self.projectkey = 'runid' - self.rundirkey = 'rundir' - self.outputdirkey = 'outputdir' - self.savedirkey = 'savedir' - self.timestartkey = 'time.start' - self.timefinalkey = 'time.final' - self.timelengthkey = 'time.break.nday' - self.istartkey = 'istart' - self.restartvalue = 3 - self.coldstartvalue = 9 - self.transitionvalue = 61 - - needed_rc_items = [ - self.projectkey, - self.rundirkey, - self.outputdirkey, - self.savedirkey, - self.timestartkey, - self.timefinalkey, - self.timelengthkey, - self.istartkey - ] - - for k, v in self.tm_settings.items(): - if v == 'True' : self.tm_settings[k] = True - if v == 'False': self.tm_settings[k] = False - if 'date' in k : self.tm_settings[k] = to_datetime(v) - if 'time.start' in k : - self.tm_settings[k] = to_datetime(v, fmt='TM5') - if 'time.final' in k : - self.tm_settings[k] = to_datetime(v, fmt='TM5') - if 'timerange.start' in k : - self.tm_settings[k] = to_datetime(v) - if 'timerange.end' in k : - self.tm_settings[k] = to_datetime(v) - - for key in needed_rc_items: - if key not in self.tm_settings: - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - raise IOError(msg) - logging.debug('rc-file has been validated succesfully') - - - def modify_rc(self, newvalues): - """ - Modify parts of the tm5 settings, for instance to give control of file locations to the DA shell - instead of to the tm5.rc script. - - Note that we replace these values in all {key,value} pairs of the tm5.rc file! - - """ - - for k, v in newvalues.items(): - if key in self.tm_settings: - # keep previous value - v_orig = self.tm_settings[k] - #replace with new - self.tm_settings[k] = v - #replace all instances of old with new, but only if it concerns a name of a path!!! - if os.path.exists(str(v)): - for k_old, v_old in self.tm_settings.items(): - if not isinstance(v_old, str): - continue - if str(v_orig) in str(v_old): - v_new = str(v_old).replace(str(v_orig), str(v)) - self.tm_settings[k_old] = v_new - - logging.debug('Replaced tm5 rc-item %s : %s' % (k,v)) - - else: - self.tm_settings[k] = v - logging.debug('Added new tm5 rc-item %s : %s' % (k,v)) - - - def write_rc(self, tm5rcfilename): - """ - Write the rc-file settings to a tm5.rc file in the rundir - """ - rc.write(tm5rcfilename, self.tm_settings) - logging.debug("Modified rc file for TM5 written (%s)" % tm5rcfilename) - - def validate_input(self): - """ - Make sure that parameter files are written to the TM5 inputdir, and that observation lists are present - """ - - datadir = self.tm_settings['ct.params.input.dir'] - if not os.path.exists(datadir): - msg = "The specified input directory for the TM5 model to read from does not exist (%s), exiting..." % datadir - logging.error(msg) - raise IOError(msg) - - datafiles = os.listdir(datadir) - - obsfile = self.dacycle['ObsOperator.inputfile'] - - if not os.path.exists(obsfile): - msg = "The specified obs input file for the TM5 model to read from does not exist (%s), exiting..." % obsfile - logging.error(msg) - if 'forward.savestate.dir' not in self.dacycle: - raise IOError(msg) - - for n in range(int(self.dacycle['da.optimizer.nmembers'])): - paramfile = 'parameters.%03d.nc' % n - if paramfile not in datafiles: - msg = "The specified parameter input file for the TM5 model to read from does not exist (%s), exiting..." % paramfile - logging.error(msg) - raise IOError(msg) - - # Next, make sure there is an actual model version compiled and ready to execute - - targetdir = os.path.join(self.tm_settings[self.rundirkey]) - - if self.rcfiletype == 'pycasso': - self.tm5_exec = os.path.join(targetdir, self.tm_settings['my.basename'] + '.x') - else: - self.tm5_exec = os.path.join(targetdir, 'tm5.x') - - if not os.path.exists(self.tm5_exec): - logging.error("Required TM5 executable was not found %s" % self.tm5_exec) - logging.error("Please compile the model with the specified rc-file and the regular TM5 scripts first") - raise IOError - - - def get_initial_data(self): - """ This method places all initial data needed by an ObservationOperator in the proper folder for the model. - For TM5, this means copying the save_*.hdf* files to the dir.save directory from which TM5 will read initial - concentrations for all tracers. - - We get the input data from the restart.current directory at 2 times: - (1) When the model starts the forecast over nlag cycles - (2) When the model starts the advance step over 1 cycle - - - """ - logging.debug("Moving TM5 model restart data from the restart directory to the TM5 save dir") - - # First get the restart data for TM5 from the current restart dir of the filter - - sourcedir = self.dacycle['dir.restart'] - targetdir = self.tm_settings[self.savedirkey] - self.outputdir = self.tm_settings[self.outputdirkey] # Needed further downstream to collect output data from TM5 - - filterlist = '%s' % self.dacycle['time.start'].strftime('%Y%m%d') - - for f in os.listdir(sourcedir): - fpath = os.path.join(sourcedir, f) - if os.path.isdir(fpath): # skip dirs - logging.debug(" [skip] .... %s " % fpath) - continue - #if not f.startswith('save_'): - if not f.startswith('TM5_restart'): - logging.debug(" [skip] .... %s " % fpath) - continue - if not filterlist in f: - logging.debug(" [skip] .... %s " % fpath) - continue - - # all okay, copy file - - logging.debug(" [copy] .... %s " % fpath) - shutil.copy(fpath, fpath.replace(sourcedir, targetdir)) - logging.debug("All restart data have been copied from the restart/current directory to the TM5 save dir") - - def run_forecast_model(self): - self.prepare_run() - self.validate_input() - self.run() - self.save_data() - - def run(self): - """ - Start the TM5 executable. A new log file is started for the TM5 model IO, and then a subprocess is - spawned with the tm5_mpi_wrapper and the tm5.x executable. The exit code of the model is caught and - only if successfull on all processors will execution of the shell continue. - - """ - cwd = os.getcwd() - - # From here on, several options should be implemented. - - # - # (1) Where an mpi process is forked to do a TM5 instance with N tracers, each an ensemble member - # - # (2) Where N processes are spawned, each being one TM5 instance representing one member - # - # (3) Where N/m processes are spawned, each being a TM5 instance that handles m ensemble members - # - # In principle, it is best to make these processes produce scripts that can be executed stand-alone, or - # be submitted to a queue. - # - - # Open logfile and spawn model, wait for finish and return code - - # Code for Option (1) - - code = self.tm5_with_n_tracers() - - if code == 0: - logging.info('Finished model executable succesfully (%s)' % code) - self.Status = 'Success' - else: - logging.error('Error in model executable return code: %s ' % code) - self.Status = 'Failed' - raise OSError - - # Return to working directory - - os.chdir(cwd) - - return code - - def tm5_with_n_tracers(self): - """ Method handles the case where one TM5 model instance with N tracers does the sampling of all ensemble members""" - - - tm5submitdir = os.path.join(self.tm_settings[self.rundirkey]) - logging.info('tm5submitdir', tm5submitdir) - - # Go to executable directory and start the subprocess, using a new logfile - - os.chdir(tm5submitdir) - logging.debug('Changing directory to %s ' % tm5submitdir) - - # Remove the tm5.ok file from a previous run, placed back only if a successful TM5 run is executed - - okfile = 'tm5.ok' - if os.path.exists(okfile): - os.remove(okfile) - - # Prepare a job for the current platform, this job needs to account for all job parameters such as - # runtime, queue request, number of processors, and other platform specific parameters - - # It is easiest if all of these options are 'pre-configured' through the tm5.rc file that is used, then - # running the ObsOperator on all platforms simply becomes a matter of running the ./setup_tm5 script - # that was also used to compile TM5 when not actually running CTDAS. The first run of CTDAS - # then could be one where TM5 is actually compiled and run, the rest of the jobs would then *not* re-compile. - # An option would need to be added to force a re-compile of the TM5 code, for debugging purposes. - - # file ID and names - submitcommand = self.tm_settings['submit.command'] - logging.info('Submitting job at %s' % datetime.datetime.now()) - code = subprocess.call(submitcommand.split()) - logging.info('Resuming job at %s' % datetime.datetime.now()) - - if not os.path.exists(okfile): - code = -1 - else: - code = 0 - - return code - - def save_data(self): - """ Copy the TM5 recovery data from the outputdir to the TM5 savedir, also add the restart files to a list of names - that is used by the dacycle object to collect restart data for the filter. - - WP Note: with the new pycasso restart files we no longer need to copy save files from outdir to savedir - - Note 2: also adding the weekly mean flux output to the output_filelist for later collection - """ - - sourcedir = os.path.join(self.tm_settings[self.savedirkey]) - filterlist = ['%s' % self.tm_settings[self.timefinalkey].strftime('%Y%m%d')] - - logging.debug("Creating a new list of TM5 restart data") - logging.debug(" from directory: %s " % sourcedir) - logging.debug(" with filter: %s " % filterlist) - - - # Start from empty lists for each TM5 run. Note that these "private" lists from the obs operator are later on appended to the system - # lists - - self.restart_filelist = [] - - for fil in os.listdir(sourcedir): - fil = os.path.join(sourcedir, fil) - if os.path.isdir(fil): # skip dirs - skip = True - elif filterlist == []: # copy all - skip = False - else: # check filter - skip = True # default skip - for f in filterlist: - if f in fil: - skip = False # unless in filterlist - break - - if skip: - logging.debug(" [skip] .... %s " % fil) - continue - - self.restart_filelist.append(fil) - logging.debug(" [added to restart list] .... %s " % fil) - - sourcedir = os.path.join(self.tm_settings[self.outputdirkey]) - sd_ed = self.dacycle['time.sample.stamp'] - filterlist = ['flask_output.%s' % sd_ed, 'flux1x1_%s' % sd_ed] - - logging.debug("Creating a new list of TM5 output data to collect") - logging.debug(" from directory: %s " % sourcedir) - logging.debug(" with filter: %s " % filterlist) - - - # Start from empty lists for each TM5 run. Note that these "private" lists from the obs operator are later on appended to the system - # lists - - self.output_filelist = [] - - for fil in os.listdir(sourcedir): - fil = os.path.join(sourcedir, fil) - - if os.path.isdir(fil): # skip dirs - skip = True - elif filterlist == []: # copy all - skip = False - else: # check filterlist - skip = True # default skip - for f in filterlist: - if f in fil: - skip = False # unless in filterlist - break - - if skip: - logging.debug(" [skip] .... %s " % fil) - continue - - self.output_filelist.append(fil) - logging.debug(" [added to output list] .... %s " % fil) - - -################### End Class TM5 ################### - - -if __name__ == "__main__": - pass - - - - diff --git a/da/tools/__pycache__/__init__.cpython-37.pyc b/da/tools/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 2ef4b8c3afb04cfcbe346e3883c86daf4da7fa92..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/general.cpython-37.pyc b/da/tools/__pycache__/general.cpython-37.pyc deleted file mode 100644 index 3a231963fa3f485bd03c4d2c46dbdc7c568f3bd7..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/general.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/initexit.cpython-37.pyc b/da/tools/__pycache__/initexit.cpython-37.pyc deleted file mode 100644 index fa5ca4f1e3f9b2f85135b0900b39bd3e108bf0fa..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/initexit.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/io4.cpython-37.pyc b/da/tools/__pycache__/io4.cpython-37.pyc deleted file mode 100644 index 8d9eb6a9cd63018f26e534abde145d7d3331924c..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/io4.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/pipeline.cpython-37.pyc b/da/tools/__pycache__/pipeline.cpython-37.pyc deleted file mode 100644 index cac646ee8ea198e86e07510aa2f61fb9513f2b9c..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/pipeline.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/rc.cpython-37.pyc b/da/tools/__pycache__/rc.cpython-37.pyc deleted file mode 100644 index fea47b3f07a6fb1b84388dc4081f4437e9e50eb5..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/rc.cpython-37.pyc and /dev/null differ diff --git a/da/tools/__pycache__/standardvariables.cpython-37.pyc b/da/tools/__pycache__/standardvariables.cpython-37.pyc deleted file mode 100644 index 13af322b8b5d055a14e9412e14beaa97e6866145..0000000000000000000000000000000000000000 Binary files a/da/tools/__pycache__/standardvariables.cpython-37.pyc and /dev/null differ diff --git a/da/tools/general.py.bak b/da/tools/general.py.bak deleted file mode 100755 index 79c7bcadd0a2cd393a4810eddf9a4c177dca902c..0000000000000000000000000000000000000000 --- a/da/tools/general.py.bak +++ /dev/null @@ -1,278 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# tools_da.py - -""" -Author : peters - -Revision History: -File created on 03 Oct 2008. - -Temporary module to hold classes and methods that are in development - -""" - -import logging -import os -import shutil -import datetime -import re - -from dateutil.rrule import rrule, MO, TU, WE, TH, FR, SA, SU, YEARLY, \ - MONTHLY, WEEKLY, DAILY, HOURLY, MINUTELY, SECONDLY - -HOURS_PER_DAY = 24. -MINUTES_PER_DAY = 60.*HOURS_PER_DAY -SECONDS_PER_DAY = 60.*MINUTES_PER_DAY -MUSECONDS_PER_DAY = 1e6*SECONDS_PER_DAY -SEC_PER_MIN = 60 -SEC_PER_HOUR = 3600 -SEC_PER_DAY = SEC_PER_HOUR * 24 -SEC_PER_WEEK = SEC_PER_DAY * 7 -MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY = ( - MO, TU, WE, TH, FR, SA, SU) -WEEKDAYS = (MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) - - -def validate_rc(rcfile, needed_items): - """ validate the contents of an rc-file given a dictionary of required keys """ - - for k, v in rcfile.items(): - if v == 'True' : - rcfile[k] = True - if v == 'False': - rcfile[k] = False - if 'date' in k: - rcfile[k] = datetime.datetime.strptime(v, '%Y-%m-%d %H:%M:%S') - - for key in needed_items: - if not rcfile.has_key(key): - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - raise IOError(msg) - logging.debug('rc-file has been validated succesfully') - - -def create_dirs(dirname, forceclean=False): - """ Create a directory and report success, only if non-existent """ - - if forceclean: - try: - shutil.rmtree(dirname) - except: - pass - - if not os.path.exists(dirname): - os.makedirs(dirname) - logging.info('Creating new directory %s' % dirname) - else: - logging.debug('Using existing directory %s' % dirname) - return dirname - - -def advance_time(time_in, interval): - """ Advance time_in by a specified interval""" - - time_out = time_in - - if interval == 'month': # if monthly, this run will go to the first day of the next month - if time_in.month != 12: - time_out = datetime.datetime(time_in.year, time_in.month + 1, 1, time_in.hour, 0, 0) - else: - time_out = datetime.datetime(time_in.year + 1, 1, 1, time_in.hour, 0, 0) # end of year provision - elif interval == 'week': - time_out = time_in + datetime.timedelta(days=7) - elif isinstance(interval, datetime.timedelta): - time_out = time_in + interval - else: # assume that the interval specified is the number of days to run forward before resubmitting - time_out = time_in + datetime.timedelta(days=float(interval)) - - return time_out - - - -def to_datetime(datestring, fmt=None): - """ convert a date string to a datetime object """ - - if fmt == 'TM5': - datestring = '%04s-%02s-%02s %02s:%02s:00' % (datestring[0:4], datestring[4:6], datestring[6:8], datestring[8:10], datestring[10:12]) - elif fmt == 'pycasso-TM5': - pass # Format already compatible - else: - pass - - try: - return datetime.datetime.strptime(datestring, '%Y-%m-%d %H:%M:%S') - except: - date, time = datestring.split(' ') - year, month, day = map(int, date.split('-')) - hour, minute, second = map(int, time.split(':')) - return datetime.datetime(year, month, day, hour, minute, second) - - -def name_convert(name=None, to=None): - """ Convert between old GLOBALVIEW-style and new ObsPack-style - - print name_convert(name="lef_surface-pfp_1", to='GV' ) - lef_01P0 - - print name_convert(name="hun_35C3", to='OP' ) - hun_tower-insitu_35 - """ - - identifier = 'name_convert' - - if name == None or to == None: - return "" - - platform_dict = { 'surface':0, 'shipboard':1, 'aircraft':2, 'tower':3 } - strategy_dict = { 'flask':'D', 'pfp':'P', 'insitu':'C' } - - #----------------------------------->>> - if to.upper() == "GV": - # alt_surface-flask_1 -> alt_01D0 - try: - fields = name.split('_') - code = fields[0] - lab_num = int(fields[2]) - except: - return "" - - try: - fields = fields[1].split('-') - platform = fields[0] - strategy = fields[1] - except: - return "" - - platform_num = [ v for k, v in platform_dict.items() if platform.lower() == k ] - if len(platform_num) == 0: - print('%s: Platform %s not found in platform dict.' % (identifier, platform)) - return "" - - strategy_char = [ v for k, v in strategy_dict.items() if strategy.lower() == k ] - if len(strategy_char) == 0: - print('%s: Strategy %s not found in strategy dict.' % (identifier, strategy)) - return "" - return "%s_%2.2d%1s%1d" % (code, lab_num, strategy_char[0].upper(), int(platform_num[0])) - - #----------------------------------->>> - if to.upper() == "OP": - - # hun_35C3 -> hun_tower-insitu_35 - try: - fields = name.split('_') - - code = fields[0] - lab_num = int(fields[1][:2]) - strategy_char = fields[1][2] - platform_num = int(fields[1][3]) - except: - return "" - - platform = [ k for k, v in platform_dict.items() if v == platform_num ] - if len(platform) == 0: - print('%s: Platform number %s not found in platform dict.' % (identifier, platform_num)) - return "" - - pattern = re.compile(strategy_char, re.IGNORECASE) - strategy = [ k for k, v in strategy_dict.items() if pattern.search(v) ] - if len(strategy) == 0: - print('%s: Strategy character %s not found in strategy list.' % (identifier, strategy_char)) - return "" - return "%s_%s-%s_%d" % (code, platform[0], strategy[0], lab_num) - -def _to_ordinalf(dt): - """ - Convert :mod:`datetime` to the Gregorian date as UTC float days, - preserving hours, minutes, seconds and microseconds. Return value - is a :func:`float`. - """ - - if hasattr(dt, 'tzinfo') and dt.tzinfo is not None: - delta = dt.tzinfo.utcoffset(dt) - if delta is not None: - dt -= delta - - base = float(dt.toordinal()) - if hasattr(dt, 'hour'): - base += (dt.hour/HOURS_PER_DAY + dt.minute/MINUTES_PER_DAY + - dt.second/SECONDS_PER_DAY + dt.microsecond/MUSECONDS_PER_DAY - ) - return base - -def _from_ordinalf(x, tz=None): - """ - Convert Gregorian float of the date, preserving hours, minutes, - seconds and microseconds. Return value is a :class:`datetime`. - """ - if tz is None: tz = _get_rc_timezone() - ix = int(x) - dt = datetime.datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24*remainder, 1) - minute, remainder = divmod(60*remainder, 1) - second, remainder = divmod(60*remainder, 1) - microsecond = int(1e6*remainder) - if microsecond<10: microsecond=0 # compensate for rounding errors - dt = datetime.datetime( - dt.year, dt.month, dt.day, int(hour), int(minute), int(second), - microsecond, tzinfo=UTC).astimezone(tz) - - if microsecond>999990: # compensate for rounding errors - dt += datetime.timedelta(microseconds=1e6-microsecond) - - return dt - -def date2num(d): - """ - *d* is either a :class:`datetime` instance or a sequence of datetimes. - - Return value is a floating point number (or sequence of floats) - which gives the number of days (fraction part represents hours, - minutes, seconds) since 0001-01-01 00:00:00 UTC, *plus* *one*. - The addition of one here is a historical artifact. Also, note - that the Gregorian calendar is assumed; this is not universal - practice. For details, see the module docstring. - """ - try: - return np.asarray([_to_ordinalf(val) for val in d]) - except: - return _to_ordinalf(d) - - -def num2date(x, tz=None): - """ - *x* is a float value which gives the number of days - (fraction part represents hours, minutes, seconds) since - 0001-01-01 00:00:00 UTC *plus* *one*. - The addition of one here is a historical artifact. Also, note - that the Gregorian calendar is assumed; this is not universal - practice. For details, see the module docstring. - - Return value is a :class:`datetime` instance in timezone *tz* (default to - rcparams TZ value). - - If *x* is a sequence, a sequence of :class:`datetime` objects will - be returned. - """ - if tz is None: tz = _get_rc_timezone() - try: - return [_from_ordinalf(val, tz) for val in x] - except: - return _from_ordinalf(x, tz) - - -if __name__ == "__main__": - pass diff --git a/da/tools/initexit.py b/da/tools/initexit.py index 2a992fcbfeff726c674049c986d9b05bc07201a0..a420c993ebe8b7b7c47911c2bc4b46e1cd0839cd 100755 --- a/da/tools/initexit.py +++ b/da/tools/initexit.py @@ -157,7 +157,7 @@ class CycleControl(dict): self[k] = False if 'date' in k : self[k] = to_datetime(v) - if k in ['time.start', 'time.end', 'time.finish', 'da.restart.tstamp']: + if k in ['time.start', 'time.end', 'time.finish', 'da.restart.tstamp', 'time.fxstart']: self[k] = to_datetime(v) for key in needed_da_items: if key not in self: @@ -332,25 +332,20 @@ class CycleControl(dict): strippedname = os.path.split(self['jobrcfilename'])[-1] self['jobrcfilename'] = os.path.join(self['dir.exec'], strippedname) -# shutil.copy(os.path.join(self.dasystem['regionsfile']),os.path.join(self['dir.exec'],'da','analysis','copied_regions.nc')) - logging.info('Copied regions file to the analysis directory: %s'%os.path.join(self.dasystem['regionsfile'])) + if 'extendedregionsfile' in self.dasystem: # shutil.copy(os.path.join(self.dasystem['extendedregionsfile']),os.path.join(self['dir.exec'],'da','analysis','copied_regions_extended.nc')) logging.info('Copied extended regions file to the analysis directory: %s'%os.path.join(self.dasystem['extendedregionsfile'])) - else: -# shutil.copy(os.path.join(self['dir.exec'],'da','analysis','olson_extended.nc'),os.path.join(self['dir.exec'],'da','analysis','copied_regions_extended.nc')) - logging.info('Copied extended regions within the analysis directory: %s'%os.path.join(self['dir.exec'],'da','analysis','olson_extended.nc')) - for filename in glob.glob(os.path.join(self['dir.exec'],'da','analysis','*.pickle')): - logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) - os.remove(filename) - for filename in glob.glob(os.path.join(self['dir.exec'],'*.pickle')): - logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) - os.remove(filename) + #for filename in glob.glob(os.path.join(self['dir.exec'],'da','analysis','*.pickle')): + # logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) + # os.remove(filename) + #for filename in glob.glob(os.path.join(self['dir.exec'],'*.pickle')): + # logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) + # os.remove(filename) if 'random.seed.init' in self.dasystem: self.read_random_seed(True) self.parse_times() - #self.write_rc(self['jobrcfilename']) def setup_file_structure(self): """ @@ -397,7 +392,7 @@ class CycleControl(dict): logging.info('Succesfully created the file structure for the assimilation job') - def finalize(self): + def finalize(self, make_obs=False): """ finalize the da cycle, this means writing the save data and rc-files for the next run. The following sequence of actions occur: @@ -410,7 +405,7 @@ class CycleControl(dict): """ self.write_random_seed() - self.write_new_rc_file() + self.write_new_rc_file(make_obs) self.collect_restart_data() # Collect restart data for next cycle into a clean restart/current folder self.collect_output() # Collect restart data for next cycle into a clean restart/current folder @@ -488,7 +483,7 @@ class CycleControl(dict): # - def write_new_rc_file(self): + def write_new_rc_file(self, make_obs=False): """ Write the rc-file for the next DA cycle. .. note:: The start time for the next cycle is the end time of this one, while @@ -503,7 +498,8 @@ class CycleControl(dict): new_dacycle = copy.deepcopy(self) new_dacycle['da.restart.tstamp'] = self['time.start'] new_dacycle.advance_cycle_times() - new_dacycle['time.restart'] = True + if not make_obs: + new_dacycle['time.restart'] = True # Create the name of the rc-file that will hold this new input, and write it diff --git a/da/tools/initexit.py.bak b/da/tools/initexit.py.bak deleted file mode 100755 index 4a29e1dd50cabc1021945a85363f7e25a474fb65..0000000000000000000000000000000000000000 --- a/da/tools/initexit.py.bak +++ /dev/null @@ -1,684 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# da_initexit.py - -""" -.. module:: initexit -.. moduleauthor:: Wouter Peters - -Revision History: -File created on 13 May 2009. - -The CycleControl class is found in the module :mod:`initexit`. It is derived from the standard python :class:`dictionary` object. It is the only core object of CTDAS that is automatically created in the pipeline, the user (normally) does not need to modify or extend it. The class is created based on options and arguments passes on the command line when submitting your main CTDAS job. - -Valid options are defined in - -.. autofunction:: da.tools.initexit.parse_options - -With the name of a valid ``rc-file``, the CycleControl object is instantiated and validated. An example rc-file looks -like this::: - - ! Info on the data assimilation cycle - - time.restart : False ! Restart from an existing run T/F - time.start : 2000-01-01 00:00:00 ! Start time of first cycle - time.finish : 2000-01-08 00:00:00 ! End time of last cycle - time.cycle : 7 ! length of each cycle, 7 means one week - time.nlag : 5 ! number of cycles in one smoother window - dir.da_run : ${HOME}/tmp/test_da ! the run directory for you project - - ! Info on the DA system used - - da.system : CarbonTracker ! an identifier for your inversion system - da.system.rc : da/rc/carbontracker.rc ! the settings needed in your inversion system - - ! Info on the forward model to be used - - da.obsoperator : TM5 ! an identifier for your observation operator - da.obsoperator.rc : ${HOME}/Modeling/TM5/tm5-ctdas.rc ! the rc-file needed to run youobservation operator - da.optimizer.nmembers : 30 ! the number of ensemble members desired in the optimization - -The most important method of the CycleControl object are listed below: - -.. autoclass:: da.tools.initexit.CycleControl - :members: setup, finalize, collect_restart_data, move_restart_data, - submit_next_cycle, setup_file_structure, recover_run, random_seed - -Two important attributes of the CycleControl object are: - (1) DaSystem, an instance of a :ref:`dasystem` - (2) DaPlatForm, an instance of a :ref:`platform` - -Other functions in the module initexit that are related to the control of a DA cycle are: - -.. autofunction:: da.tools.initexit.start_logger -.. autofunction:: da.tools.initexit.validate_opts_args - - -""" -import logging -import os -import sys -import glob -import shutil -import copy -import getopt -import pickle -import numpy as np -#from string import join - -import da.tools.rc as rc -from da.tools.general import create_dirs, to_datetime, advance_time - -needed_da_items = [ - 'time.start', - 'time.finish', - 'time.nlag', - 'time.cycle', - 'dir.da_run', - 'da.resources.ncycles_per_job', - 'da.resources.ntasks', - 'da.resources.ntime', - 'da.system', - 'da.system.rc', - 'da.obsoperator', - 'da.obsoperator.rc', - 'da.optimizer.nmembers'] - -# only needed in an earlier implemented where each substep was a separate job -# validprocesses = ['start','done','samplestate','advance','invert'] - - -class CycleControl(dict): - """ - This object controls the CTDAS system flow and functionality. - """ - - def __init__(self, opts=[], args={}): - """ - The CycleControl object is instantiated with a set of options and arguments. - The list of arguments must contain the name of an existing ``rc-file``. - This rc-file is loaded by method :meth:`~da.tools.initexit.CycleControl.load_rc` and validated - by :meth:`~da.tools.initexit.CycleControl.validate_rc` - - Options for the CycleControl consist of accepted command line flags or arguments - in :func:`~da.tools.initexit.CycleControl.parse_options` - - """ - rcfile = args['rc'] - self.load_rc(rcfile) - self.validate_rc() - self.opts = opts - - # Add some useful variables to the rc-file dictionary - - self['jobrcfilename'] = rcfile - self['dir.da_submit'] = os.getcwd() - self['da.crash.recover'] = '-r' in opts - self['transition'] = '-t' in opts - self['verbose'] = '-v' in opts - self.dasystem = None # to be filled later - self.restart_filelist = [] # List of files needed for restart, to be extended later - self.output_filelist = [] # List of files needed for output, to be extended later - - - def load_rc(self, rcfilename): - """ - This method loads a DA Cycle rc-file with settings for this simulation - """ - - rcdata = rc.read(rcfilename) - for k, v in rcdata.items(): - self[k] = v - - logging.info('DA Cycle rc-file (%s) loaded successfully' % rcfilename) - - - def validate_rc(self): - """ - Validate the contents of the rc-file given a dictionary of required keys. - Currently required keys are :attr:`~da.tools.initexit.needed_da_items` - """ - - for k, v in self.items(): - if v in ['True', 'true', 't', 'T', 'y', 'yes']: - self[k] = True - if v in ['False', 'false', 'f', 'F', 'n', 'no']: - self[k] = False - if 'date' in k : - self[k] = to_datetime(v) - if k in ['time.start', 'time.end', 'time.finish', 'da.restart.tstamp']: - self[k] = to_datetime(v) - for key in needed_da_items: - if key not in self: - msg = 'Missing a required value in rc-file : %s' % key - logging.error(msg) - logging.error('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ') - logging.error('Please note the update on Dec 02 2011 where rc-file names for DaSystem and ') - logging.error('are from now on specified in the main rc-file (see da/rc/da.rc for example)') - logging.error('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ') - raise IOError(msg) - logging.debug('DA Cycle settings have been validated succesfully') - - def parse_times(self): - """ - Parse time related parameters into datetime objects for later use - """ - - startdate = self['time.start'] - finaldate = self['time.finish'] - - if finaldate <= startdate: - logging.error('The start date (%s) is not greater than the end date (%s), please revise' % (startdate.strftime('%Y%m%d'), finaldate.strftime('%Y%m%d'))) - raise ValueError - cyclelength = self['time.cycle'] # get time step - -# Determine end date - - if cyclelength == 'infinite': - enddate = finaldate - else: - enddate = advance_time(startdate, cyclelength) - - dt = enddate - startdate - - if enddate > finaldate: # do not run beyond finaldate - enddate = finaldate - - self['time.start'] = startdate - self['time.end'] = enddate - self['time.finish'] = finaldate - self['cyclelength'] = dt - - logging.info("===============================================================") - logging.info("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M')) - logging.info("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M')) - logging.info("DA Cycle final date is %s" % finaldate.strftime('%Y-%m-%d %H:%M')) - logging.info("DA Cycle cycle length is %s" % cyclelength) - logging.info("DA Cycle restart is %s" % str(self['time.restart'])) - logging.info("===============================================================") - - - def set_sample_times(self, lag): - """ - Set the times over which a sampling interval will loop, depending on - the lag. Note that lag falls in the interval [0,nlag-1] - """ - - # Start from cycle times - self['time.sample.start'] = copy.deepcopy(self['time.start']) - self['time.sample.end'] = copy.deepcopy(self['time.end']) - - # Now advance depending on lag - - for l in range(lag): - self.advance_sample_times() - - - def advance_sample_times(self): - """ - Advance sampling start and end time by one cycle interval - """ - - days = self['cyclelength'].days - - self['time.sample.start'] = advance_time(self['time.sample.start'], days) - self['time.sample.end'] = advance_time(self['time.sample.end'], days) - - - def advance_cycle_times(self): - """ - Advance cycle start and end time by one cycle interval - """ - - days = self['cyclelength'].days - - startdate = advance_time(self['time.start'], days) - enddate = advance_time(self['time.end'], days) - - filtertime = startdate.strftime('%Y%m%d') - self['dir.output'] = os.path.join(self['dir.da_run'], 'output', filtertime) - - self['time.start'] = startdate - self['time.end'] = enddate - - - def write_random_seed(self): - filename = os.path.join(self['dir.restart'], 'randomseed_%s.pickle' % self['time.start'].strftime('%Y%m%d')) - f = open(filename, 'wb') - seed = np.random.get_state() - pickle.dump(seed, f, -1) - f.close() - - logging.info("Saved the random seed generator values to file") - - - def read_random_seed(self, first=False): - if first: - filename = self.dasystem['random.seed.init'] - logging.info("Initialised random seed from: %s"%filename) - else: - filename = os.path.join(self['dir.restart'], 'randomseed_%s.pickle' % self['da.restart.tstamp'].strftime('%Y%m%d')) - logging.info("Retrieved the random seed generator values of last cycle from file") - f = open(filename, 'rb') - seed = pickle.load(f) - np.random.set_state(seed) - f.close() - - - def setup(self): - """ - This method determines how to proceed with the cycle. Three options are implemented: - - 1. *Fresh start* : set up the required file structure for this simulation and start - 2. *Restart* : use latest da_runtime variables from the exec dir and restart - 3. *Recover* : restart after crash by getting data from restart/one-ago folder - - The choice that gets executed depends on the presence of - - # the ``-r`` option on the command line, this triggers a recover - # the ``time.restart : True`` option in the da.rc file - - The latter is automatically set if the filter submits the next cycle at the end of the current one, - through method :meth:`~da.tools.initexit.CycleControl.submit_next_cycle`. - - The specific call tree under each scenario is: - - 1. *Fresh Start* - * :meth:`~da.tools.initexit.CycleControl.setup_file_structure()` <- Create directory tree - 2. *Restart* - * :meth:`~da.tools.initexit.CycleControl.setup_file_structure()` - * :meth:`~da.tools.initexit.CycleControl.random_seed` <- Read the random seed from file - 3. *Recover* - * :meth:`~da.tools.initexit.CycleControl.setup_file_structure()` - * :meth:`~da.tools.initexit.CycleControl.recover_run()` <- Recover files from restart/one-ago dir, reset ``time.start`` - * :meth:`~da.tools.initexit.CycleControl.random_seed` - - And is always followed by a call to - - * parse_times() - * WriteRc('jobfilename') - """ - if self['transition']: - logging.info("Transition of filter from previous step with od meteo from 25 to 34 levels") - self.setup_file_structure() - strippedname = os.path.split(self['jobrcfilename'])[-1] - self['jobrcfilename'] = os.path.join(self['dir.exec'], strippedname) - self.read_random_seed(False) - - elif self['time.restart']: - logging.info("Restarting filter from previous step") - self.setup_file_structure() - strippedname = os.path.split(self['jobrcfilename'])[-1] - self['jobrcfilename'] = os.path.join(self['dir.exec'], strippedname) - self.read_random_seed(False) - - else: #assume that it is a fresh start, change this condition to more specific if crash recover added - logging.info("First time step in filter sequence") - self.setup_file_structure() - - # expand jobrcfilename to include exec dir from now on. - # First strip current leading path from filename - - strippedname = os.path.split(self['jobrcfilename'])[-1] - self['jobrcfilename'] = os.path.join(self['dir.exec'], strippedname) -# shutil.copy(os.path.join(self.dasystem['regionsfile']),os.path.join(self['dir.exec'],'da','analysis','copied_regions.nc')) - logging.info('Copied regions file to the analysis directory: %s'%os.path.join(self.dasystem['regionsfile'])) - if 'extendedregionsfile' in self.dasystem: -# shutil.copy(os.path.join(self.dasystem['extendedregionsfile']),os.path.join(self['dir.exec'],'da','analysis','copied_regions_extended.nc')) - logging.info('Copied extended regions file to the analysis directory: %s'%os.path.join(self.dasystem['extendedregionsfile'])) - else: -# shutil.copy(os.path.join(self['dir.exec'],'da','analysis','olson_extended.nc'),os.path.join(self['dir.exec'],'da','analysis','copied_regions_extended.nc')) - logging.info('Copied extended regions within the analysis directory: %s'%os.path.join(self['dir.exec'],'da','analysis','olson_extended.nc')) - for filename in glob.glob(os.path.join(self['dir.exec'],'da','analysis','*.pickle')): - logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) - os.remove(filename) - for filename in glob.glob(os.path.join(self['dir.exec'],'*.pickle')): - logging.info('Deleting pickle file %s to make sure the correct regions are used'%os.path.split(filename)[1]) - os.remove(filename) - if 'random.seed.init' in self.dasystem: - self.read_random_seed(True) - - self.parse_times() - #self.write_rc(self['jobrcfilename']) - - def setup_file_structure(self): - """ - Create file structure needed for data assimilation system. - In principle this looks like: - - * ``${da_rundir}`` - * ``${da_rundir}/input`` - * ``${da_rundir}/output`` - * ``${da_rundir}/exec`` - * ``${da_rundir}/analysis`` - * ``${da_rundir}/jobs`` - * ``${da_rundir}/restart/current`` - * ``${da_rundir}/restart/one-ago`` - - .. note:: The exec dir will actually be a simlink to the directory where - the observation operator executable lives. This directory is passed through - the ``da.rc`` file. - - .. note:: The observation input files will be placed in the exec dir, - and the resulting simulated values will be retrieved from there as well. - - """ - -# Create the run directory for this DA job, including I/O structure - - filtertime = self['time.start'].strftime('%Y%m%d') - - self['dir.exec'] = os.path.join(self['dir.da_run'], 'exec') - self['dir.input'] = os.path.join(self['dir.da_run'], 'input') - self['dir.output'] = os.path.join(self['dir.da_run'], 'output', filtertime) - self['dir.analysis'] = os.path.join(self['dir.da_run'], 'analysis') - self['dir.jobs'] = os.path.join(self['dir.da_run'], 'jobs') - self['dir.restart'] = os.path.join(self['dir.da_run'], 'restart') - - create_dirs(self['dir.da_run']) - create_dirs(os.path.join(self['dir.exec'])) - create_dirs(os.path.join(self['dir.input'])) - create_dirs(os.path.join(self['dir.output'])) - create_dirs(os.path.join(self['dir.analysis'])) - create_dirs(os.path.join(self['dir.jobs'])) - create_dirs(os.path.join(self['dir.restart'])) - - logging.info('Succesfully created the file structure for the assimilation job') - - - def finalize(self): - """ - finalize the da cycle, this means writing the save data and rc-files for the next run. - The following sequence of actions occur: - - * Write the randomseed to file for reuse in next cycle - * Write a new ``rc-file`` with ``time.restart : True``, and new ``time.start`` and ``time.end`` - * Collect all needed data needed for check-pointing (restart from current system state) - * Move the previous check pointing data out of the way, and replace with current - * Submit the next cycle - - """ - self.write_random_seed() - self.write_new_rc_file() - - self.collect_restart_data() # Collect restart data for next cycle into a clean restart/current folder - self.collect_output() # Collect restart data for next cycle into a clean restart/current folder - self.submit_next_cycle() - - def collect_output(self): - """ Collect files that are part of the requested output for this cycle. This function allows users to add files - to a list, and then the system will copy these to the current cycle's output directory. - The list of files included is read from the - attribute "output_filelist" which is a simple list of files that can be appended by other objects/methods that - require output data to be saved. - - - """ - targetdir = os.path.join(self['dir.output']) - create_dirs(targetdir) - - logging.info("Collecting the required output data") - logging.debug(" to directory: %s " % targetdir) - - for file in set(self.output_filelist): - if os.path.isdir(file): # skip dirs - continue - if not os.path.exists(file): # skip dirs - logging.warning(" [not found] .... %s " % file) - continue - - logging.debug(" [copy] .... %s " % file) - shutil.copy(file, file.replace(os.path.split(file)[0], targetdir)) - - - - def collect_restart_data(self): - """ Collect files needed for the restart of this cycle in case of a crash, or for the continuation of the next cycle. - All files needed are written to the restart/current directory. The list of files included is read from the - attribute "restart_filelist" which is a simple list of files that can be appended by other objects/methods that - require restart data to be saved. - - .. note:: Before collecting the files in the ``restart_filelist``, the restart/current directory will be emptied and - recreated. This prevents files from accumulating in the restart/current and restart/one-ago folders. It - also means that if a file is missing from the ``restart_filelist``, it will not be available for check-pointing - if your run crashes or dies! - - Currently, the following files are included: - - * The ``da_runtime.rc`` file - * The ``randomseed.pickle`` file - * The savestate.nc file - * The files in the ``ObservationOperator.restart_filelist``, i.e., restart data for the transport model - - - .. note:: We assume that the restart files for the :ref:`ObservationOperator` - reside in a separate folder, i.e, the ObservationOperator does *not* write directly to the CTDAS restart dir! - - """ - - targetdir = os.path.join(self['dir.restart']) - - #logging.info("Purging the current restart directory before collecting new data") - - #create_dirs(targetdir, forceclean=True) - - logging.info("Collecting the required restart data") - logging.debug(" to directory: %s " % targetdir) - - for file in set(self.restart_filelist): - if os.path.isdir(file): # skip dirs - continue - if not os.path.exists(file): - logging.warning(" [not found] .... %s " % file) - else: - logging.debug(" [copy] .... %s " % file) - shutil.copy(file, file.replace(os.path.split(file)[0], targetdir)) - - - -# - def write_new_rc_file(self): - """ Write the rc-file for the next DA cycle. - - .. note:: The start time for the next cycle is the end time of this one, while - the end time for the next cycle is the current end time + one cycle length. - - The resulting rc-file is written to the ``dir.exec`` so that it can be used when resubmitting the next cycle - - """ - - # We make a copy of the current dacycle object, and modify the start + end dates and restart value - - new_dacycle = copy.deepcopy(self) - new_dacycle['da.restart.tstamp'] = self['time.start'] - new_dacycle.advance_cycle_times() - new_dacycle['time.restart'] = True - - # Create the name of the rc-file that will hold this new input, and write it - - #fname = os.path.join(self['dir.exec'], 'da_runtime.rc') # current exec dir holds next rc file - - fname = os.path.join(self['dir.restart'], 'da_runtime_%s.rc' % new_dacycle['time.start'].strftime('%Y%m%d'))#advanced time - - rc.write(fname, new_dacycle) - logging.debug('Wrote new da_runtime.rc (%s) to restart dir' % fname) - - # The rest is info needed for a system restart, so it modifies the current dacycle object (self) - - self['da.restart.fname'] = fname # needed for next job template - #self.restart_filelist.append(fname) # not that needed since it is already written to the restart dir... - #logging.debug('Added da_runtime.rc to the restart_filelist for later collection') - - - def write_rc(self, fname): - """ Write RC file after each process to reflect updated info """ - - rc.write(fname, self) - logging.debug('Wrote expanded rc-file (%s)' % fname) - - - def submit_next_cycle(self): - """ - Submit the next job of a DA cycle, this consists of - * Changing to the working directory from which the job was started initially - * create a line to start the master script again with a newly created rc-file - * Submitting the jobfile - - If the end of the cycle series is reached, no new job is submitted. - - """ - - - if self['time.end'] < self['time.finish']: - - # file ID and names - jobid = self['time.end'].strftime('%Y%m%d') - targetdir = os.path.join(self['dir.exec']) - jobfile = os.path.join(targetdir, 'jb.%s.jb' % jobid) - logfile = os.path.join(targetdir, 'jb.%s.log' % jobid) - # Template and commands for job - jobparams = {'jobname':"j.%s" % jobid, 'jobnodes':self['da.resources.ntasks'], 'jobtime': self['da.resources.ntime'], 'logfile': logfile, 'errfile': logfile} - template = self.daplatform.get_job_template(jobparams) - execcommand = os.path.join(self['dir.da_submit'], sys.argv[0]) - if '-t' in self.opts: - (self.opts).remove('-t') - - if 'icycle_in_job' not in os.environ: - logging.info('Environment variable icycle_in_job not found, resubmitting after this cycle') - os.environ['icycle_in_job'] = self['da.resources.ncycles_per_job'] # assume that if no cycle number is set, we should submit the next job by default - else: - logging.info('Environment variable icycle_in_job was found, processing cycle %s of %s in this job'%(os.environ['icycle_in_job'],self['da.resources.ncycles_per_job']) ) - - ncycles = int(self['da.resources.ncycles_per_job']) - for cycle in range(ncycles): - nextjobid = '%s'% ( (self['time.end']+cycle*self['cyclelength']).strftime('%Y%m%d'),) - nextrestartfilename = self['da.restart.fname'].replace(jobid,nextjobid) - nextlogfilename = logfile.replace(jobid,nextjobid) - if self.daplatform.ID == 'WU capegrim': - template += '\nexport icycle_in_job=%d\npython %s rc=%s %s >&%s &\n' % (cycle+1,execcommand, nextrestartfilename, str.join(str(self.opts), ''), nextlogfilename,) - else: - template += '\nexport icycle_in_job=%d\npython %s rc=%s %s >&%s\n' % (cycle+1,execcommand, nextrestartfilename, str.join(str(self.opts), ''), nextlogfilename,) - - # write and submit - self.daplatform.write_job(jobfile, template, jobid) - if 'da.resources.ncycles_per_job' in self: - do_submit = (int(os.environ['icycle_in_job']) >= int(self['da.resources.ncycles_per_job'])) - else: - dosubmit = False - - if do_submit: - jobid = self.daplatform.submit_job(jobfile, joblog=logfile) - - else: - logging.info('Final date reached, no new cycle started') - - -def start_logger(level=logging.INFO): - """ start the logging of messages to screen""" - -# start the logging basic configuration by setting up a log file - - logging.basicConfig(level=level, - format=' [%(levelname)-7s] (%(asctime)s) py-%(module)-20s : %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') - -def parse_options(): - """ - Function parses options from the command line and returns the arguments as a dictionary. - Accepted command line arguments are: - - ======== ======= - Argument Meaning - ======== ======= - -v verbose output in log files - -h display help - -r start a simulation by recovering from a previous crash - -t start a simulation by transitioning from 25 to 34 layers in December 2005 (od meteo) - ======== ======= - - """ - -# Parse keywords, the only option accepted so far is the "-h" flag for help - - opts = [] - args = [] - try: - opts, args = getopt.gnu_getopt(sys.argv[1:], "-rvt") - except getopt.GetoptError as msg: - logging.error('%s' % msg) - sys.exit(2) - - for options in opts: - options = options[0].lower() - if options == '-r': - logging.info('-r flag specified on command line: recovering from crash') - if options == '-t': - logging.info('-t flag specified on command line: transition with od from December 2005') - if options == '-v': - logging.info('-v flag specified on command line: extra verbose output') - logging.root.setLevel(logging.DEBUG) - - if opts: - optslist = [item[0] for item in opts] - else: - optslist = [] - -# Parse arguments and return as dictionary - - arguments = {} - for item in args: - #item=item.lower() - -# Catch arguments that are passed not in "key=value" format - - if '=' in item: - key, arg = item.split('=') - else: - logging.error('%s' % 'Argument passed without description (%s)' % item) - raise getopt.GetoptError(arg) - - arguments[key] = arg - - - return optslist, arguments - -def validate_opts_args(opts, args): - """ - Validate the options and arguments passed from the command line before starting the cycle. The validation consists of checking for the presence of an argument "rc", and the existence of - the specified rc-file. - - """ - if "rc" not in args: - msg = "There is no rc-file specified on the command line. Please use rc=yourfile.rc" - logging.error(msg) - raise IOError(msg) - elif not os.path.exists(args['rc']): - msg = "The specified rc-file (%s) does not exist " % args['rc'] - logging.error(msg) - raise IOError(msg) - - # WP not needed anymore - #if not args.has_key('process'): - # msg = "There is no process specified on the command line, assuming process=Start" ; logging.info(msg) - # args['process'] = 'start' - #if args['process'].lower() not in validprocesses: - # msg = "The specified process (%s) is not valid"%args['process'] ; logging.error(msg) - # raise IOError,msg - - return opts, args - - -if __name__ == "__main__": - pass - diff --git a/da/tools/io4.py.bak b/da/tools/io4.py.bak deleted file mode 100755 index 45dab0a98c2adc4e4fa9b6ea1154d83c88742a85..0000000000000000000000000000000000000000 --- a/da/tools/io4.py.bak +++ /dev/null @@ -1,494 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#!/usr/bin/env python -# io.py - -""" -Author : peters - -Revision History: -File created on 15 Oct 2008. -File modified for CT data assimilation system in July 2010, Wouter Peters - -""" -from . import standardvariables -import netCDF4 -#import pyhdf.SD as hdf -import datetime as dt -from numpy import array, arange -import os -import logging -import sys - -disclaimer = "This data belongs to the CarbonTracker project" -email = "wouter.peters@wur.nl" -url = "http://carbontracker.wur.nl" -institution = "Wageningen University and Research Center" -source = "CarbonTracker release 2.0" -conventions = "CF-1.1" -historytext = 'created on '+dt.datetime.now().strftime('%B %d, %Y')+' by %s'%os.environ['USER'] - -std_savedict={'name':'unknown','values':[],'dims':(0,0,),'units':'','long_name':'','comment':''} - -def ct_read(filename='',method=''): - """ read from an HDF or NetCDF file. Function choses itself which type is needed """ - - if 'hdf' in filename.split('.'): - return CT_HDF(filename,method) - elif 'nc' in filename.split('.'): - return CT_CDF(filename,method) - else: - msg = 'Could not determine whether input file was NetCDF or HDF trying both: ' ; logging.warning(msg) - try: - return CT_CDF(filename,method) - except: - return CT_HDF(filename,method) - -class CT_CDF(netCDF4.Dataset): - """ function opens a NetCDF file for writing of output""" - - def __init__(self,filename, method='read'): - - if method not in ['read','write','create']: - raise ValueError('Method %s is not defined for a CarbonTracker NetCDF file object' % method) - - if method == 'read': - try: - super(CT_CDF,self).__init__(filename, 'r') - except RuntimeError: - msg = 'Requested file not found for opening: %s'%filename ; logging.error(msg) - msg = "Exiting" ; logging.info(msg) - sys.exit(2) - elif method == 'write': - try: - super(CT_CDF,self).__init__(filename, 'a') - except: - super(CT_CDF,self).__init__(filename, 'w',format='NETCDF4') - - #self.AddCTHeader() - elif method == 'create': - if os.path.exists(filename): os.remove(filename) - super(CT_CDF,self).__init__(filename, 'w',format='NETCDF4') - self.add_tc_header() - - - def add_tc_header(self): - - # - self.setncattr('Institution',institution) - self.setncattr('Contact',email) - self.setncattr('URL',url) - self.setncattr('Source',source) - self.setncattr('Convention',conventions) - self.setncattr('Disclaimer',disclaimer) - self.setncattr('History',historytext) - - def add_params_dim(self,nparams): - - if 'nparameters' in self.dimensions.keys(): - pass - else: - dimparams=self.createDimension('nparameters',size=nparams) - - return ('nparameters',) - - def add_members_dim(self,nmembers): - - if 'nmembers' in self.dimensions.keys(): - pass - else: - dimmembers=self.createDimension('nmembers',size=nmembers) - - return ('nmembers',) - - def add_lag_dim(self,nlag,unlimited=True): - - if 'nlag' in self.dimensions.keys(): - pass - else: - if unlimited: - dimlag = self.createDimension('nlag',size=None) - else: - dimlag = self.createDimension('nlag',size=nlag) - - return ('nlag',) - - def add_obs_dim(self,nobs): - - if 'nobs' in self.dimensions.keys(): - pass - else: - dimobs = self.createDimension('nobs',size=nobs) - - return ('nobs',) - - def add_latlon_dim(self,istart=0,iend=360,jstart=0,jend=180): - - from numpy import arange, float64 - - if 'latitude' in self.dimensions.keys(): return ('latitude','longitude',) - - lons=-180+arange(360)*1.0+0.5 - lats=-90+arange(180)*1.0+0.5 - # - lats=lats[jstart:jend] - lons=lons[istart:iend] - # - dimlon = self.createDimension('longitude',size=lons.shape[0]) - dimlat = self.createDimension('latitude',size=lats.shape[0]) - - savedict=self.standard_var(varname='latitude') - savedict['values']=lats.tolist() - savedict['actual_range']=(float(lats[0]),float(lats[-1])) - savedict['dims']=('latitude',) - self.add_data(savedict) - - savedict=self.standard_var(varname='longitude') - savedict['values']=lons.tolist() - savedict['actual_range']=(float(lons[0]),float(lons[-1])) - savedict['dims']=('longitude',) - self.add_data(savedict) - - return ('latitude','longitude',) - - def add_region_dim(self,type='eco',dimsize=None): - - from da.analysis.tools_transcom import olsonlabs, transnams, ext_transnams, ext_transcomps, olsonnams - from da.analysis.tools_regions import ext_econams, ext_ecocomps - - if type not in ['eco','eco_ext','tc','tc_ext','olson']: - raise ValueError('Type of dimension for regions requested (%s) is not possible' %type) - - dimname='regions_%s' % type - - if dimname in self.dimensions.keys(): - return (dimname,) - - if type == 'olson': - - dim = self.createDimension(dimname,size=len(olsonlabs)) - - for i,name in enumerate(olsonnams): - att = setattr(self, 'OlsonEcosystem_%03d'%(i+1,), name ) - - elif type == 'tc': - - dim = self.createDimension(dimname,size=len(transnams)) - for i,name in enumerate(transnams): - att = setattr(self, 'TransComRegion_%03d'%(i+1,), name ) - - elif type == 'tc_ext': - - dim = self.createDimension(dimname,size=len(ext_transnams)) - - for i,name in enumerate(ext_transnams): - lab='Aggregate_Region_%03d'%(i+1,) - setattr(self,lab,name) - for i,name in enumerate(ext_transcomps): - lab='Aggregate_Components_%03d'%(i+1,) - setattr(self,lab,name) - - elif type == 'eco': - - dim = self.createDimension(dimname,size=dimsize) - - return (dimname,) - - - def add_date_dim(self,unlimited=False): - - if 'date' in self.dimensions.keys(): - pass - else: - dimdate = self.createDimension('date',size=None) - - return ('date',) - - def add_date_dim_format(self): - - if 'yyyymmddhhmmss' in self.dimensions.keys(): - pass - else: - dimdateformat = self.createDimension('yyyymmddhhmmss',size=6) - return ('yyyyymmddhhmmss',) - - def add_dim(self,dimname,dimsize): - - if dimname in self.dimensions.keys(): - pass - else: - newdim = self.createDimension(dimname,dimsize) - return (dimname,) - - def has_date(self,dd): - - if 'date' not in self.dimensions: - return False - if 'date' not in self.variables: - return False - if self.dimensions['date'].isunlimited: - if dd in self.get_variable('date').tolist(): - return True - else: - return False - else: - return False - - def get_variable(self,varname): - """ get variable from ncf file""" - return self.variables[varname][:] - - def get_attribute(self,attname): - """ get attribute from ncf file""" - return getattr(self,attname) - - def add_attribute(self,attname,attvalue): - """ set attribute in ncf file""" - self.setncattr(attname,attvalue) - - def standard_var(self,varname): - """ return properties of standard variables """ - from . import standardvariables - - if varname in standardvariables.standard_variables.keys(): - return standardvariables.standard_variables[varname] - else: - return standardvariables.standard_variables['unknown'] - - def inq_unlimlen(self): - """ return lenght of unlimited dimenion(s) """ - - unlims=() - for dimname, dimobj in self.dimensions.items(): - if dimobj.isunlimited() : unlims += (len(dimobj),) - - return unlims - - def has_unlimlen(self,dims): - """ return T/F whether dimensions include an unlimited dimenion(s) """ - - for dimname, dimobj in self.dimensions.items(): - if dimname in dims: - if dimobj.isunlimited() : return True - - return False - - def add_variable(self,datadict,silent=True): - """ add variables to file, but no data""" - import numpy as np - - existing_vars=self.variables - - if datadict['name'] in existing_vars: - return - else: - if not silent: print('Creating new dataset: '+datadict['name']) - - if 'dtype' in datadict: - if datadict['dtype'] == 'int': - var = self.createVariable(datadict['name'],'i4',datadict['dims']) - elif datadict['dtype'] == 'int64': - var = self.createVariable(datadict['name'],'i8',datadict['dims']) - elif datadict['dtype'] == 'char': - var = self.createVariable(datadict['name'],'S1',datadict['dims'],fill_value='!') - elif datadict['dtype'] == 'float': - var = self.createVariable(datadict['name'],'f4',datadict['dims']) - elif datadict['dtype'] == 'double': - var = self.createVariable(datadict['name'],'f8',datadict['dims']) - else: - var = self.createVariable(datadict['name'],'f8',datadict['dims']) - else: - var = self.createVariable(datadict['name'],'f4',datadict['dims']) - - for k,v in datadict.items(): - if k not in ['name','dims','values','_FillValue','count']: - var.setncattr(k,v) - - - def add_data(self,datadict,nsets=1,silent=True): - """ add fields to file, at end of unlimited dimension""" - import numpy as np - - existing_vars=self.variables - - try: - next = datadict['count'] - except: - next=0 - - - if datadict['name'] in existing_vars: - var = self.variables[datadict['name']] - ndims = var.ndim - - datadict = ConvertCharDims(var,datadict) - - if ndims == 1: - var[next:next+nsets]=datadict['values'] - elif ndims == 2: - var[next:next+nsets,:]=datadict['values'] - elif ndims == 3: - var[next:next+nsets,:,:]=datadict['values'] - elif ndims == 4: - var[next:next+nsets,:,:,:]=datadict['values'] - elif ndims == 5: - var[next:next+nsets,:,:,:,:]=datadict['values'] - else: - print('More than 5 dimensions in array not implemented yet') - raise ValueError - - else: - if not silent: print('Creating new dataset: '+datadict['name']) - - if 'dtype' in datadict: - if datadict['dtype'] == 'int': - var = self.createVariable(datadict['name'],'i4',datadict['dims'])#,fill_value=datadict['_FillValue']) - elif datadict['dtype'] == 'int64': - var = self.createVariable(datadict['name'],'i8',datadict['dims'])#,fill_value=datadict['_FillValue']) - elif datadict['dtype'] == 'char': - var = self.createVariable(datadict['name'],'S1',datadict['dims'],fill_value='!') - elif datadict['dtype'] == 'float': - var = self.createVariable(datadict['name'],'f4',datadict['dims'])#,fill_value=datadict['_FillValue']) - elif datadict['dtype'] == 'double': - var = self.createVariable(datadict['name'],'f8',datadict['dims'])#,fill_value=datadict['_FillValue']) - else: - var = self.createVariable(datadict['name'],'f8',datadict['dims'])#,fill_value=datadict['_FillValue']) - else: - var = self.createVariable(datadict['name'],'f4',datadict['dims'])#,fill_value=datadict['_FillValue']) - - for k,v in datadict.items(): - if k not in ['name','dims','values','_FillValue','count']: - var.setncattr(k,v) - - #if nsets > 1 or self.has_unlimlen(datadict['dims']) == True: - if nsets > 1 or (nsets > 0 and self.has_unlimlen(datadict['dims']) ) == True: - ndims = var.ndim - - datadict = ConvertCharDims(var,datadict) - if ndims == 1: - var[next:next+nsets]=datadict['values'] - elif ndims == 2: - var[next:next+nsets,:]=datadict['values'] - elif ndims == 3: - var[next:next+nsets,:,:]=datadict['values'] - elif ndims == 4: - var[next:next+nsets,:,:,:]=datadict['values'] - elif ndims == 5: - var[next:next+nsets,:,:,:,:]=datadict['values'] - else: - print('More than 5 dimensions in array not implemented yet') - raise ValueError - else: - ndims = var.ndim - - datadict = ConvertCharDims(var,datadict) - - var[:] = datadict['values'] - -try: - import pyhdf.SD as hdf - class CT_HDF(hdf.SD): - """ function opens a HDF file for reading """ - - def __init__(self,filename, method='read'): - - if method in ['write','create']: - raise ValueError('Method %s is not defined for a CarbonTracker HDF file object' % method) - - if method == 'read': - #print 'Reading from file' - try: - super(CT_HDF,self).__init__(filename) - except hdf.HDF4Error: - msg = 'Requested file not found for opening: %s'%filename ; logging.error(msg) - msg = "Exiting" ; logging.info(msg) - sys.exit(2) - - def get_variable(self,varname): - """ get variable from ncf file""" - return self.select(varname).get() - - def get_attribute(self,attname): - """ get attribute from ncf file""" - return getattr(self,attname) - - def standard_var(self,varname): - """ return properties of standard variables """ - from . import standardvariables - - if varname in standardvariables.standard_variables.keys(): - return standardvariables.standard_variables[varname] - else: - return standardvariables.standard_variables['unknown'] - - def close(self): - """ close file""" - - return self.end() -except: - print('IO Class CT_HDF not compiled, no HDF support!!!') - - - - -def ConvertCharDims(var,datadict): - - if not var.dtype == 'S1': - pass - else: - datalen = len(datadict['values']) - dimlen = list(var.shape) - - dimlen.remove(datalen) # string length remaining - - slen=dimlen[0] - - #print [d for d in datadict['values'] ] - values = [netCDF4.stringtoarr(d,slen) for d in datadict['values'] ] - datadict['values'] = values - - return datadict - -def get_variable(file,varname): - """ get variable from HDF file""" - return array(file.select(varname).get()) - - -if __name__ == '__main__': - import numpy as np - - ncf=CT_CDF('test.nc','create') - print(ncf.file_format) - dimmembers=ncf.add_members_dim(200) - dimparams=ncf.add_params_dim(200) - - dimdate=ncf.add_date_dim() - dimidate=ncf.add_date_dim_format() - dimlon,dimlat=ncf.add_latlon_dim() - - savedict=std_savedict.copy() - savedict['name']='testvar' - savedict['values']=np.zeros((2,200,))+2.0 - savedict['dims']=('date','nparameters',) - ncf.add_data(savedict,nsets=2) - - savedict=std_savedict.copy() - savedict['name']='testvar' - savedict['values']=np.zeros((3,200,))+3.0 - savedict['dims']=('date','nparameters',) - savedict['count']=2 - ncf.add_data(savedict,nsets=3) - - ncf.close() - - diff --git a/da/tools/rc.py b/da/tools/rc.py index 19beaece437e7dba8dbe00748e52e9f85e64ae22..6e6e980758481a558ca81aece27fe404e1829a63 100755 --- a/da/tools/rc.py +++ b/da/tools/rc.py @@ -262,7 +262,7 @@ class RcFile(object) : Class to store settings read from a rcfile. """ - def __init__(self, filename, silent=False, marks=('${', '}')) : + def __init__(self, filename, silent=False, marks=('${', '}'), do_eval=True): """ @@ -835,6 +835,11 @@ class RcFile(object) : raise Exception #endif else : + if do_eval: + if isinstance(val, str): + if any(operator in val for operator in '+-*/'): + try: val = eval(val, {'__builtins__': None}) + except: pass # store new value: self.values[key] = val self.sources[key] = linetrace diff --git a/da/tools/rc.py.bak b/da/tools/rc.py.bak deleted file mode 100755 index 5692ffa3e202ccb2000ee82d29e8c14940236423..0000000000000000000000000000000000000000 --- a/da/tools/rc.py.bak +++ /dev/null @@ -1,1300 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#! /usr/bin/env python -# rc.py - - -# ------------------------------------------------ -# help -# ------------------------------------------------ - -""" -Deal with model settings in `rc` format. - -RCFILES - - A rcfile is a text file with key/value pairs seperated by a ':', e.g. - - my.flag : T - my.answer : 42 - - The following functionality is supported: - - * Empty lines are ignored. - - * Comment lines are introduced by a '!' as first character. - - * Long values could be continued at the next line after a '\' as last character. - - * Include the key/value pairs from another file: - - #include an/other.rc - - * Substitute environment variables when available: - - tm.dir : ${HOME}/TM5/cy3 - - * Substitute values of other keys in a line: - - build.dir : ${tm.dir}/build - grid : glb300x200 - input.${grid}.path : /data/input/${grid} - - Substitions are allowed in both key names as well as values. - The substitutions are performed in a loop until nothing - has to be substituted anymore, or some substitutions could - not be applied at al; for the later an error is raised. - Values to be substituted could therefore be set before and - after they are used. - - Note that if a key has the same name as an environment variable, - the new value will be assigned to the key instead of the value - retrieved from the environment: - - HOME : /some/other/dir/ - - * Substitude some specials: - - ${pid} # evaluates to the current process id; - # useful for names of log files etc - ${script} # evaluates to the base name of the calling script, - # thus without .py etc - - * Instead of variables of the form '${..}' other patterns could be - specified with the optional 'marks' tupple (see below). - - * Old-style '#eval' lines are still supported: - - #eval RUNDIR = /path/to/mydir - tmdir : ${RUNDIR}/TM5/cy3 - - In this example, the value of RUNDIR will be evaluated and substituted - in all {key,value} pairs. This feature is obsolete and a warning will - be issued. The proper way to use this is with {key,value} pairs too: - - run.dir : /path/to/mydir - tmdir : ${run.dir}/TM5/cy3 - - * Comment starting with '!' is stripped from the values. - To have a value including exclamation marks, use '\!' but do - not expect that the rest of the value is scanned for comment too: - - my.value : -999 ! just an integer value - my.message : This value has 64 characters \! Count if you don't believe it ... - - * If you trust yourself you might try to use conditional expressions: - - #if ${my.number} == 1 - message : Welcome - #else - message : Whatever ... - #endif - - The conditions should be valid python expressions that evaluate to a boolean; - value substitutions are performed before evaluation. Examples: - - ${my.runmode} == 4 - "${my.tracer}" == "CH4" - - Keep it simple! Very complicated and nested if-statements might not be - resolved correctly, and are in any case not easy to understand for other users! - - In the example above, an exception could be raised by the special error expression; - everything behind the '#error' mark is displayed as an error message: - - #error No settings provided for value : ${my.value} - - -USAGE AS SCRIPT - - Called in script form, the following syntaxis is supported: - - rc.py [options] <rcfile> <key> - rc.py -h|--help - - The <rcfile> is read and the value defined by <key> is printed - to the standard output. - - Use the --help option for more documentation. - - -USAGE AS PYTHON MODULE - - Import the module with: - - import rc - - Initialiase by reading all settings in a rcfile, - supporting the functionality described in the 'RCFILES' section. - - rcf = RcFile( 'settings.rc' ) - - The initialisation accepts some optional arguments. - Set the silent flag to True to ignore warnings. - - rcf = RcFile( 'settings.rc', silent=False ) - - Use the optional 'marks' tupple to define that variables to be expanded - are marked other than '${..}' but rather '<mark1>..<mark2>' : - - rcf = RcFile( 'settings.rc', marks=('${','}') ) - - Test to see if a key is defined: - - if rcf.has_key('my.flag') : - print 'value of my flag is : ', rcf['my.flag'] - - Extract a list with all keys: - - rcf.keys() - - A 'get' function is provided to extract values: - - * by default, the 'get' function returns the value as a str type: - - s = rcf.get('my.value') - - * a second argument is the name of the python type to which - the value is converted to: - - i = rcf.get('my.flag','int') - - * if the return value should be a 'bool', the result is - True for values : 'True' , 'T', 'yes', or '1' , - and False for value : 'False', 'F', 'no' , or '0' ; - for other values an error is raised; - - * return a default value if the key is not found: - - rcf.get( 'my.flag', default=False ) - - * print a debug message to the logging system for each extracted key: - - rcf.get( 'my.flag', verbose=True ) - - Add a new value, comment is optional: - - rcf.add( 'my.iter', 2, comment='iteration number for restart' ) - - Assign a new value to an existing key: - - rcf.replace( 'my.flag', True ) - - Scan a character line for all occurances of ${<key>} and subsitute for - the rc value assigned to <key> : - - line = rcf.substitute( line ) - - Write the dictionary (with all variables expanded and included files included) - to new file: - - rcf.write('newfile.rc') - - -USAGE AS PYTHON MODULE - BACKWARDS COMPATIBILITY - - For backwards compatibility with older implementations of the rc.py module, - two extra routines are available. - - To read rc-file by making an instance of the RcFile class, - and to returns a dictionary of values only, use: - - rcdict = read( 'test.rc' [,silent=False] ) - - Create a new rcfile and fill with key/values from a dictionary: - - write( 'test.rc', rcdict ) - - -HISTORY - - 2008? Andy Jacobson, NOAA - Translation to python of original shell script 'go_readrc' . - 2009-06 Wouter Peters, WUR - Support substitution of previously defined variables. - 2009-06 Arjo Segers, TNO - Support include files. - 2009-09 Arjo Segers, TNO - Re-coded into class. - Implemented substitution loop. - 2009-11 Arjo Segers, JRC - Added main program to run this file as a shell script. - Added replace and substitute routines. - 2010-03 Arjo Segers, JRC - Support simple if-statements. - Support comment in values. - 2010-07 Wouter Peters, WUR - Downgraded to work for python 2.4.3 too. - Added read/write routines for backwards compatibility. - 2010-07-27 Arjo Segers, JRC - Maintain list with rcfile names and line numbers to be displayed - with error messages to identify where problematic lines are found. - 2010-07-28 Andy Jacobson, NOAA - Add second dictionary of key,linetrace values to help track the - provenance of #included keys (to debug multiple key instances). - Identify duplicate keys by checking on different source lines - instead of checking if the values are different. -""" -import re -import os -import sys -import logging - - -# ------------------------------------------------ -# classes -# ------------------------------------------------ - - -class RcFile(object) : - - """ - Class to store settings read from a rcfile. - """ - - def __init__(self, filename, silent=False, marks=('${', '}')) : - - """ - - Usage: - - rcf = RcFile( 'settings.rc' [,silent=False] [marks=('${','}')] ) - - Read an rc-file and expand all the keys and values into a dictionary. - Do not shout messages if silent is set to True. - The 2-item tupple (mark1,mark2) could be used to re-define the default - key pattern '${..}' into something else: - <mark1>...<mark2> - - """ - - # info ... - logging.debug('reading rcfile %s ...' % filename) - - # check ... - if not os.path.exists(filename) : - msg = 'rcfile not found : %s' % filename ; logging.error(msg) - raise IOError(msg) - #endif - - # store file name: - self.filename = filename - # store rc-file root directory: - self.rootdir = os.path.split(filename)[0] - - # storage for processed rcfile: - self.outfile = [] - - # storage for key/value pairs: - self.values = {} - - # storage for key/source file pairs: - self.sources = {} - - # open the specified rc-file: - f = open(filename, 'r') - # store all lines in a list: - inpfile = f.readlines() - # close: - f.close() - - # create traceback info: - inptrace = [] - for jline in range(len(inpfile)) : - inptrace.append('"%s", line %-10s' % (filename, str(jline + 1))) - #endfor - - # flags: - warned_for_eval = False - - # pass counter: - ipass = 1 - - # loop until all substitutions and inclusions are done: - while True : - - # start again with empty output file: - self.outfile = [] - # also empty traceback info: - self.trace = [] - # init current line: - line = '' - # assume nothing has to be done after this loop: - something_done = False - something_to_be_done = False - # maintain list with unresolved lines (with keys that could not be evaluated yet): - unresolved_lines = [] - # maintain list with keys from which the value could not be resolved yet: - keys_with_unresolved_value = [] - # maintain list with undefined keys; - # some might be part of the keys_with_unresolved_value list: - undefined_keys = [] - - # stack for conditional evaluation; - # each element is a tuple with elements: - # resolved (boolean) true if the if-statement is evaluated - # flag (boolean) true if the lines below the statement - # are to be included - # anyflag (boolean) used to check if any of the 'if' or 'elif' conditions - # in this sequence evaluated to True - # line (char) description of the line for messages - ifstack = [] - - #print '' - #print '---[pass %i]-------------------------------------' % ipass - #for line in inpfile : print line.strip() - - # loop over lines in input file: - iline = -1 - for inpline in inpfile : - - # line counter: - iline = iline + 1 - - # cut current traceback info from list: - linetrace_curr = inptrace.pop(0) - - # set full traceback info: - if line.endswith('\\') : - # current input line is a continuation; combine: - qfile, qlinenrs = linetrace.split(',') - qnrs = qlinenrs.replace('lines', '').replace('line', '') - if '-' in qnrs : - qnr1, qnr2 = qnrs.split('-') - else : - qnr1, qnr2 = qnrs, qnrs - #endif - linetrace = '%s, lines %-9s' % (qfile, '%i-%i' % (int(qnr1), int(qnr2) + 1)) - else : - # just copy: - linetrace = linetrace_curr - #endif - - # remove end-of-line character: - inpline = inpline.strip() - - ## DEBUG: display current line ... - #print '%4i | %s' % (iline,inpline) - #print '%4i | %s %s' % (iline,inpline,linetrace) - - # - # empty lines - # - - # skip empty lines: - if len(inpline) == 0 : - # add empty line to output: - self.outfile.append('\n') - # add traceback info: - self.trace.append(linetrace) - # next will be a new line: - line = '' - # next input line: - continue - #endif - - # - # comment lines - # - - # skip comment: - if inpline.startswith('!') : - # add copy to output file: - self.outfile.append('%s\n' % inpline) - # add traceback info: - self.trace.append(linetrace) - # next will be a new line: - line = '' - # next input line: - continue - #endif - - # - # continuation lines - # - - # current line has continuation mark '\' at the end ? - # then add this input line: - if line.endswith('\\') : - # remove continuation character, add input line: - line = line[:-1] + inpline - else : - # bright new line: - line = inpline - #endif - - # continuation mark ? then next line of input file: - if line.endswith('\\') : continue - - # - # conditional settings (1) - # - - ## debug ... - #print 'xxx0 ', ifstack - - # is this the begin of a new condition ? - mark = '#if' - if line.startswith(mark) : - # push temporary flag to stack, will be replaced after evaluation of condition: - ifstack.append((False, True, False, linetrace)) - # debug ... - #print 'xxx1 ', ifstack - #endif - - mark = '#elif' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found orphin "%s" in %s' % (mark, linetrace)) - raise Exception - #endif - # remove current top from stack: - resolved, flag, anyflag, msg = ifstack.pop() - # did one of the previous #if/#elif evaluate to True already ? - if resolved and anyflag : - # push to stack that this line resolved to False : - ifstack.append((True, False, anyflag, linetrace)) - # copy to output: - self.outfile.append('%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - # next input line: - continue - else : - # push temporary flag to stack, will be replaced after evaluation of condition: - ifstack.append((False, True, anyflag, linetrace)) - #endif - ## debug ... - #print 'xxx2 ', ifstack - #endif - - mark = '#else' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found orphin "%s" in %s' % (mark, linetrace)) - raise Exception - #endif - # remove current top from stack: - resolved, flag, anyflag, msg = ifstack.pop() - # get higher level settings: - if len(ifstack) > 0 : - resolved_prev, flag_prev, anyflag_prev, msg_prev = ifstack[-1] - else : - flag_prev = True - #endif - # should next lines be included ? - # reverse flag, take into acount higher level: - new_flag = (not flag) and (not anyflag) and flag_prev - # push to stack: - ifstack.append((resolved, new_flag, False, linetrace)) - # debug ... - #print 'xxx3 ', ifstack - # copy to output: - self.outfile.append('%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - # next input line: - continue - #endif - - # is this the end of a condition ? - mark = '#endif' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found orphin "%s" in %s' % (mark, linetrace)) - raise Exception - #endif - # remove top from stack: - top = ifstack.pop() - # copy to output: - self.outfile.append('%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - # next input line: - continue - #endif - - # within if-statements ? - if len(ifstack) > 0 : - # extract current top: - resolved, flag, anyflag, msg = ifstack[-1] - # already resolved ? then check if this line should be skipped: - if resolved and (not flag) : - # skip this line, but include commented version in output: - self.outfile.append('!%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - # read the next input line: - continue - #endif - #endif - - # - # handle '#eval' lines - # - - mark = '#eval' - if line.startswith(mark): - # info .. - if not warned_for_eval : - if not silent: logging.warning('the #eval statements in rc-files are deprecated, use {key:value} pairs instead') - warned_for_eval = True - #endif - # add commented copy to output: - self.outfile.append('!evaluated>>> ' + line) - # add traceback info: - self.trace.append(linetrace) - # remove leading mark: - line = line.lstrip(mark) - # multiple settings are seperated by ';' : - evals = line.split(';') - # insert in output file: - for k in range(len(evals)) : - # split in key and value: - key, value = evals[k].split('=') - # insert: - self.outfile.append('%s : %s' % (key, value)) - # add traceback info: - self.trace.append(linetrace) - #endfor - # next input line: - continue - #endif - - # - # replace ${..} values - # - - # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace('{', '\{').replace('<', '\<').replace('$', '\$') - close_mark = marks[1].replace('}', '\}').replace('>', '\>') - - # set syntax of keywords to be matched, e.g. '${...}' : - pattern = start_mark + '[A-Za-z0-9_.]+' + close_mark - - # make a regular expression that matches all variables: - rc_varpat = re.compile(pattern) - - # search all matching paterns: - pats = re.findall(rc_varpat, line) - # counter for unexpanded substitutions: - ntobedone = 0 - # loop over matches: - for pat in pats : - # remove enclosing characters: - key = pat.lstrip(start_mark).rstrip(close_mark) - # test some dictionaries for matching key: - if key in self.values: - # get previously defined value: - val = self.values[key] - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif key in os.environ: - # get value from environment: - val = os.environ[key] - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif key == 'pid' : - # special value: process id; convert to character: - val = '%i' % os.getpid() - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif key == 'script' : - # special value: base name of the calling script, without extension: - script, ext = os.path.splitext(os.path.basename(sys.argv[0])) - # substitute value: - line = line.replace(pat, script) - # set flag: - something_done = True - else : - # could not substitute yet; set flag: - ntobedone = ntobedone + 1 - # add to list with unresolved keys: - if key not in undefined_keys : undefined_keys.append(key) - # continue with next substitution: - continue - #endif - #endfor # matched patterns - # not all substituted ? - if ntobedone > 0 : - # add line to output: - self.outfile.append(line) - # add traceback info: - self.trace.append(linetrace) - # a new pass is needed: - something_to_be_done = True - # store for info messages: - unresolved_lines.append('%s | %s' % (linetrace, line)) - # could this be a 'key : value' line ? - if ':' in line : - # split, remove leading and end space: - qkey, qvalue = line.split(':', 1) - qkey = qkey.strip() - # assume it is indeed a key if it does not contain whitespace, - # no start mark, and does not start wiht '#' : - if (' ' not in qkey) and (start_mark not in qkey) and (not qkey.startswith('#')) : - # add to list: - if qkey not in keys_with_unresolved_value : keys_with_unresolved_value.append(qkey) - #endif - # next input line: - continue - #endif - - # - # handle '#include' lines - # - - mark = '#include' - if line.startswith(mark) : - # remove leading mark, what remains is file to be included: - inc_file = line.lstrip(mark).strip() - # check ... - if not os.path.exists(inc_file) : - inc_file = os.path.join(self.rootdir, inc_file) - logging.debug('Added rootdir to requested include: %s' % inc_file) - #endif - if not os.path.exists(inc_file) : - logging.error('include file not found : %s' % inc_file) - logging.error(linetrace) - raise IOError('include file not found : %s' % inc_file) - #endif - # read content: - inc_f = open(inc_file, 'r') - inc_rc = inc_f.readlines() - inc_f.close() - # add extra comment for output file: - self.outfile.append('! >>> %s >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n' % inc_file) - self.outfile.extend(inc_rc) - self.outfile.append('! <<< %s <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n' % inc_file) - # add traceback info: - self.trace.append(linetrace) - for jline in range(len(inc_rc)) : - self.trace.append('"%s", line %-10s' % (inc_file, str(jline + 1))) - #endfor - self.trace.append(linetrace) - # set flag: - something_done = True - # a new pass is needed: - something_to_be_done = True - # next input line: - continue - #endif - - - # - # conditional settings (2) - # - - # evaluate conditional expressions: - mark1 = '#if' - mark2 = '#elif' - if line.startswith(mark1) or line.startswith(mark2) : - # remove leading mark, what remains is logical expression: - expression = line.lstrip(mark1).strip() - expression = line.lstrip(mark2).strip() - # common mistake is to add a ':' as in python; remove this: - if expression.endswith(':') : expression = expression.rstrip(':').strip() - # evaluate: - try : - flag = eval(expression) - except : - logging.error('could not evaluate expression:') - logging.error(' %s' % expression) - logging.error('in %s' % linetrace) - raise Exception - #endtry - # remove temporary top added before during this pass: - tmp_statement, tmp_flag, tmp_anyflag, tmp_msg = ifstack.pop() - # extract current top if necessary: - if len(ifstack) > 0 : - dummy_statement, prev_flag, dummy_anyflag, dummy_msg = ifstack[-1] - else : - prev_flag = True - #endif - # should next lines be included ? - new_flag = prev_flag and tmp_flag and flag - # any if/elif evaluated to true in this sequence ? - new_anyflag = tmp_anyflag or new_flag - # add to stack, now resolved, take into accout current flag: - ifstack.append((True, new_flag, new_anyflag, linetrace)) - # debug ... - #print 'xxx2 ', ifstack - # copy to output: - self.outfile.append('%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - # next input line: - continue - #endif - - # - # error message - # - - # special command to rais an exception: - mark = '#error' - if line.startswith(mark) : - # remove leading mark, what remains is error message: - msg = line.lstrip(mark).strip() - # display: - logging.error(msg) - # add info: - logging.error('error message in %s' % linetrace) - # stop: - raise Exception - #endif - - # - # checks - # - - # common mistake ... - if line.startswith('#') : - logging.error('line in rcfile starts with "#" but is not an "#include" or other special line;') - logging.error('if it is supposed to be comment, please start with "!" ...') - logging.error(' %s' % line) - logging.error('%s' % linetrace) - raise IOError - #endif - - # check ... - if ':' not in line : - logging.error('key/value line should contain a ":"') - logging.error('%s' % linetrace) - raise IOError - #endif - - # - # add to output - # - - # add line to output: - self.outfile.append('%s\n' % line) - # add traceback info: - self.trace.append(linetrace) - - # - # add key/value pair - # - - # not if inside an unresolved if-statement ... - if len(ifstack) > 0 : - # get top values: - resolved, flag, anyflag, msg = ifstack[-1] - # not resolved yet ? then continue: - if not resolved : continue - #endif - - # split in key and value; - # value might contain ':' too, so at maximum 1 split: - key, val = line.split(':', 1) - - # remove comment from value: - if '!' in val : - # not if '\!' is in the value ... - if not '\!' in val : val, comment = val.split('!') - # replace all slash-comments: - val = val.replace('\!', '!') - #endif - - # remove spaces: - key = key.strip() - val = val.strip() - - # already defined ? - if key in self.values: - # this will occure often after the first pass since - # the keys are resolved again and again ; - # therefore, only complain if this definition is read - # from a different line : - if linetrace != self.sources[key] : - logging.error('duplicated key \"%s\" found:' % key) - logging.error('first definition in %s is:' % self.sources[key]) - logging.error(' %s : %s' % (key, str(self.values[key]))) - logging.error('second definition in %s is:' % linetrace.strip()) - logging.error(' %s : %s' % (key, str(val))) - raise Exception - #endif - else : - # store new value: - self.values[key] = val - self.sources[key] = linetrace - # set flag: - something_done = True - #endif - - # display key and value ... - #print ' --> %s : %s, from %s' % (key,val, linetrace) - - #endfor # loop over lines in text - - ## info ... - #print '~~~ outfile ~~~~~~~~~~~~~~~~~~~~~~~' - #for line in self.outfile : print line.strip() - #print '~~~ key/values ~~~~~~~~~~~~~~~~~~~~' - #for k,v in self.items() : - # print '%s : %s' % (k,v) - ##endfor - #print '-------------------------------------------------' - #print '' - - # check ... - if len(ifstack) > 0 : - logging.error('unterminated if-statement ; current stack:') - for resolved, flag, anyflag, msg in ifstack : logging.error(msg) - logging.error('please fix the rcfile or debug this script ...') - raise Exception - #endif - - # check ... - if something_to_be_done : - # check for unterminated loop ... - if not something_done : - # list all unresolved lines: - logging.error('Could not resolve the following lines in rcfile(s):') - logging.error('') - for uline in unresolved_lines : - logging.error(' %s' % uline) - #endfor - logging.error('') - # list all undefined keys: - logging.error(' Undefined key(s):') - logging.error('') - for ukey in undefined_keys : - # do not list them if they are undefined because the value - # depends on other undefined keys: - if ukey not in keys_with_unresolved_value : - # display: - logging.error(' %s' % ukey) - # loop over unresolved lines to see in which the key is used: - for uline in unresolved_lines : - # search for '${key}' pattern: - if marks[0] + ukey + marks[1] in uline : - logging.error(' %s' % uline) - #endif - #endfor - logging.error('') - #endif - #endfor - logging.error('please fix the rcfile(s) or debug this script ...') - raise Exception - #endif - else : - # finished ... - break - #endif - - # for safety ... - if ipass == 100 : - logging.error('resolving rc file has reached pass %i ; something wrong ?' % ipass) - raise Exception - #endif - - # new pass: - ipass = ipass + 1 - # renew input: - inpfile = self.outfile - # renew traceback: - inptrace = self.trace - - #endwhile # something to be done - - #enddef # __init__ - - - # *** - - - def has_key(self, key) : - - # from dictionairy: - return key in self.values - - #enddef - - - # *** - - - def keys(self) : - - # from dictionairy: - return self.values.keys() - - #enddef - - - # *** - - - def get(self, key, totype='', default=None, verbose=False) : - - """ - rcf.get( 'my.value' [,default=None] ) - Return element 'key' from the dictionairy. - If the element is not present but a default is specified, than return - the default value. - If 'verbose' is set to True, then print debug messages to the logging - about which values is returned for the given key. - The option argument 'totype' defines the conversion to a Python type. - If 'totype' is set to 'bool', the return value is the - boolean True for values 'T', 'True', 'yes', and '1', - and False for 'F', 'False', 'no', or '0' ; - for other values, an exception will be raised. - """ - - # element found ? - if key in self.values: - # copy value: - value = self.values[key] - # convert ? - if totype == 'bool' : - # convert to boolean: - if value in ['T', 'True', 'yes', '1'] : - value = True - elif value in ['F', 'False', 'no', '0'] : - value = False - else : - logging.error("value of key '%s' is not a boolean : %s" % (key, str(value))) - raise Exception - #endif - elif len(totype) > 0 : - # convert to other type ... - value = eval('%s(%s)' % (totype, value)) - #endif - # for debugging ... - if verbose : logging.debug('rc setting "%s" : "%s"' % (key, str(value))) - else : - # default value specified ? - if default != None : - # copy default: - value = default - # for debugging ... - if verbose : logging.debug('rc setting "%s" : "%s" (deault)' % (key, str(value))) - else : - # something wrong ... - logging.error("key '%s' not found in '%s' and no default specified" % (key, self.filename)) - raise Exception - #endif - #endif - - # ok - return value - - #enddef - - - # *** - - - def replace(self, key, val) : - - """ - Replace a key by a new value. - """ - - # search for a line '<key> : <val>' - # loop over lines in output file: - found = False - for iline in range(len(self.outfile)) : - # extract: - line = self.outfile[iline] - # skip lines that are no key:value pair for sure ... - if ':' not in line : continue - # split once at first ':' - k, v = line.split(':', 1) - # match ? - if k.strip() == key : - # replace line in original file: - self.outfile[iline] = '%s : %s\n' % (k, str(val)) - # replace value: - self.values[key] = val - # set flag: - found = True - # found, thus no need to continue: - break - #endif - #endfor # lines - # not found ? - if not found : - logging.error('could not replace key : %s' % key) - raise Exception - #endif - - # ok - return - - #enddef - - - # *** - - - def add(self, key, val, comment='') : - - """Add a new key/value pair.""" - - # add lines: - self.outfile.append('\n') - if len(comment) > 0 : self.outfile.append('! %s\n' % comment) - self.outfile.append('%s : %s\n' % (key, str(val))) - - # add to dictionairy: - self.values[key] = val - - # ok - return - - #enddef - - - # *** - - - def substitute(self, line, marks=('${', '}')) : - - """ - Return a line with all '${..}' parts replaced by the corresponding rcfile values. - The 2-item tupple (mark1,mark2) could be used to re-define the default - key pattern '${..}' into something else: - <mark1>...<mark2> - """ - - # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace('{', '\{').replace('<', '\<').replace('$', '\$') - close_mark = marks[1].replace('}', '\}').replace('>', '\>') - - # set syntax of keywords to be matched, e.g. '${...}' : - pattern = start_mark + '[A-Za-z0-9_.]+' + close_mark - - # make a regular expression that matches all variables: - rc_varpat = re.compile(pattern) - - # search all matching paterns: - pats = re.findall(rc_varpat, line) - # loop over matches: - for pat in pats : - # remove enclosing characters: - key = pat.lstrip(start_mark).rstrip(close_mark) - # test dictionary for matching key: - if key in self.values: - # get previously defined value: - val = self.values[key] - # substitute value: - line = line.replace(pat, val) - #endif - #endfor # matched patterns - - # ok - return line - - #enddef - - - # *** - - - def WriteFile(self, filename) : - - """ write the dictionary to file""" - - # open file for writing: - f = open(filename, 'w') - - ## loop over key/value pairs: - #for k,v in self.items(): - # # add line; at least the specified number of characters - # # is used for the key: - # f.write( '%-20s:%s\n' % (k,v) ) - ##endfor - - # write processed input: - f.writelines(self.outfile) - - # close file: - f.close() - - #endif - - -#endclass # RcFile - - -# *** - - -def read(rcfilename, silent=False) : - - """ - This method reads an rc-file by making an instance of the RcFile class, - and then returns the dictionary of values only. - This makes it backwards compatible with older implementations of the rc.py module - """ - - rcdict = RcFile(rcfilename, silent=silent) - - return rcdict.values - -#enddef - - -# *** - - -def write(filename, rcdict) : - - """ - This method writes an rc-file dictionary. - This makes it backwards compatible with older implementations of the rc.py module - """ - - # open file for writing: - f = open(filename, 'w') - - # loop over key/value pairs: - for k, v in rcdict.items(): - # add line; at least the specified number of characters - # is used for the key: - f.write('%-20s:%s\n' % (k, v)) - #endfor - - # close file: - f.close() - -#enddef - - - -# ------------------------------------------------ -# script -# ------------------------------------------------ - - -if __name__ == '__main__': - - # external ... - import optparse - import traceback - - # extract arguments from sys.argv array: - # 0 = name of calling script, 1: = actual arguments - args = sys.argv[1:] - - # set text for 'usage' help line: - usage = "\n %prog <rcfile> <key> [-b|--bool] [--default<=value>]\n %prog <rcfile> -w|--write\n %prog -h|--help\n %prog -d|--doc" - - # initialise the option parser: - parser = optparse.OptionParser(usage=usage) - - # define options: - parser.add_option("-d", "--doc", - help="print documentation", - dest="doc", action="store_true", default=False) - parser.add_option("-v", "--verbose", - help="print information messages", - dest="verbose", action="store_true", default=False) - parser.add_option("-b", "--bool", - help="return 'True' for values 'T', 'True', 'yes', or '1', and 'False' for 'F', 'False', 'no', or '0'", - dest="boolean", action="store_true", default=False) - parser.add_option("--default", - help="default value returned if key is not found", - dest="default", action="store") - parser.add_option("-w", "--write", - help="write pre-processed rcfile", - dest="write", action="store_true", default=False) - - # now parse the actual arguments: - opts, args = parser.parse_args(args=args) - - # print documentation ? - if opts.doc : - print(__doc__) - sys.exit(0) - #endif - - # recfile argument should be provided: - if len(args) < 1 : - parser.error("no name of rcfile provided\n") - #endif - # extract: - rcfile = args[0] - - # read rcfile in dictionary: - try : - rcf = RcFile(rcfile, silent=(not opts.verbose)) - except : - if opts.verbose : logging.error(traceback.format_exc()) - sys.exit(1) - #endtry - - # print pre-processed file ? - if opts.write : - for line in rcf.outfile : print(line.strip()) - sys.exit(0) - #endif - - # key argument should be provided: - if len(args) < 2 : - parser.error("no name of rckey provided\n") - #endif - # extract: - rckey = args[1] - - # key present ? - if rckey in rcf: - - # print requested value: - if opts.boolean : - # extract value: - flag = rcf.get(rckey, 'bool') - # print result: - if flag : - print('True') - else : - print('False') - #endif - else : - # extract value: - value = rcf.get(rckey) - # display: - print(value) - #endif - - else : - - # default value provided ? - if opts.default != None : - # display: - print(opts.default) - else : - print('ERROR - key "%s" not found in rcfile "%s" and no default specified' % (rckey, rcfile)) - sys.exit(1) - #endif - - #endif - -#endif - - -# ------------------------------------------------ -# end -# ------------------------------------------------ diff --git a/da/tools/rc_old.py.bak b/da/tools/rc_old.py.bak deleted file mode 100755 index 3d529590b81f70ffdfb7592744612cf4d675e922..0000000000000000000000000000000000000000 --- a/da/tools/rc_old.py.bak +++ /dev/null @@ -1,1149 +0,0 @@ -"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -updates of the code. See also: http://www.carbontracker.eu. - -This program is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software Foundation, -version 3. This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with this -program. If not, see <http://www.gnu.org/licenses/>.""" -#! /usr/bin/env python -# rc.py - - -# ------------------------------------------------ -# help -# ------------------------------------------------ - -""" -Deal with model settings in `rc` format. - -RCFILES - - A rcfile is a text file with key/value pairs seperated by a ':', e.g. - - my.flag : T - my.answer : 42 - - The following functionality is supported: - - * Empty lines are ignored. - - * Comment lines are introduced by a '!' as first character. - - * Long values could be continued at the next line after a '\' as last character. - - * Include the key/value pairs from another file: - - #include an/other.rc - - * Substitute environment variables when available: - - tm.dir : ${HOME}/TM5/cy3 - - * Substitute values of other keys in a line: - - build.dir : ${tm.dir}/build - grid : glb300x200 - input.${grid}.path : /data/input/${grid} - - Substitions are allowed in both key names as well as values. - The substitutions are performed in a loop until nothing - has to be substituted anymore, or some substitutions could - not be applied at al; for the later an error is raised. - Values to be substituted could therefore be set before and - after they are used. - - Note that if a key has the same name as an environment variable, - the new value will be assigned to the key instead of the value - retrieved from the environment: - - HOME : /some/other/dir/ - - * Substitude some specials: - - ${pid} # evaluates to the current process id; - # useful for names of log files etc - ${script} # evaluates to the base name of the calling script, - # thus without .py etc - - * Instead of variables of the form '${..}' other patterns could be - specified with the optional 'marks' tupple (see below). - - * Old-style '#eval' lines are still supported: - - #eval RUNDIR = /path/to/mydir - tmdir : ${RUNDIR}/TM5/cy3 - - In this example, the value of RUNDIR will be evaluated and substituted - in all {key,value} pairs. This feature is obsolete and a warning will - be issued. The proper way to use this is with {key,value} pairs too: - - run.dir : /path/to/mydir - tmdir : ${run.dir}/TM5/cy3 - - * Comment starting with '!' is stripped from the values. - To have a value including exclamation marks, use '\!' but do - not expect that the rest of the value is scanned for comment too: - - my.value : -999 ! just an integer value - my.message : This value has 64 characters \! Count if you don't believe it ... - - * If you trust yourself you might try to use conditional expressions: - - #if ${my.number} == 1 - message : Welcome - #else - message : Whatever ... - #endif - - The conditions should be valid python expressions that evaluate to a boolean; - value substitutions are performed before evaluation. Examples: - - ${my.runmode} == 4 - "${my.tracer}" == "CH4" - - Keep it simple! Very complicated and nested if-statements might not be - resolved correctly, and are in any case not easy to understand for other users! - - In the example above, an exception could be raised by the special error expression; - everything behind the '#error' mark is displayed as an error message: - - #error No settings provided for value : ${my.value} - - -USAGE AS SCRIPT - - Called in script form, the following syntaxis is supported: - - rc.py [options] <rcfile> <key> - rc.py -h|--help - - The <rcfile> is read and the value defined by <key> is printed - to the standard output. - - Use the --help option for more documentation. - - -USAGE AS PYTHON MODULE - - Import the module with: - - import rc - - Initialiase by reading all settings in a rcfile, - supporting the functionality described in the 'RCFILES' section. - - rcf = RcFile( 'settings.rc' ) - - The initialisation accepts some optional arguments. - Set the silent flag to True to ignore warnings. - - rcf = RcFile( 'settings.rc', silent=False ) - - Use the optional 'marks' tupple to define that variables to be expanded - are marked other than '${..}' but rather '<mark1>..<mark2>' : - - rcf = RcFile( 'settings.rc', marks=('${','}') ) - - Test to see if a key is defined: - - if rcf.has_key('my.flag') : - print 'value of my flag is : ', rcf['my.flag'] - - Extract a list with all keys: - - rcf.keys() - - A 'get' function is provided to extract values: - - * by default, the 'get' function returns the value as a str type: - - s = rcf.get('my.value') - - * a second argument is the name of the python type to which - the value is converted to: - - i = rcf.get('my.flag','int') - - * if the return value should be a 'bool', the result is - True for values : 'True' , 'T', 'yes', or '1' , - and False for value : 'False', 'F', 'no' , or '0' ; - for other values an error is raised; - - * return a default value if the key is not found: - - rcf.get( 'my.flag', default=False ) - - * print a debug message to the logging system for each extracted key: - - rcf.get( 'my.flag', verbose=True ) - - Add a new value, comment is optional: - - rcf.add( 'my.iter', 2, comment='iteration number for restart' ) - - Assign a new value to an existing key: - - rcf.replace( 'my.flag', True ) - - Scan a character line for all occurances of ${<key>} and subsitute for - the rc value assigned to <key> : - - line = rcf.substitute( line ) - - Write the dictionary (with all variables expanded and included files included) - to new file: - - rcf.write('newfile.rc') - - -HISTORY - - 2008? Andy Jacobson, NOAA - Translation to python of original shell script 'go_readrc' . - 2009-06 Wouter Peters, WUR - Support substitution of previously defined variables. - 2009-06 Arjo Segers, TNO - Support include files. - 2009-09 Arjo Segers, TNO - Re-coded into class. - Implemented substitution loop. - 2009-11 Arjo Segers, JRC - Added main program to run this file as a shell script. - Added replace and substitute routines. - 2010-03 Arjo Segers, JRC - Support simple if-statements. - Support comment in values. - -""" - - -import re -import os -import sys -import logging - -# ------------------------------------------------ -# classes -# ------------------------------------------------ - - -class RcFile(object) : - - """ - Class to store settings read from a rcfile. - """ - - def __init__(self, filename, silent=False, marks=('${', '}')) : - - """ - - Usage: - - rcf = RcFile( 'settings.rc' [,silent=False] [marks=('${','}')] ) - - Read an rc-file and expand all the keys and values into a dictionary. - Do not shout messages if silent is set to True. - The 2-item tupple (mark1,mark2) could be used to re-define the default - key pattern '${..}' into something else: - <mark1>...<mark2> - - """ - - # info ... - logging.debug('reading rcfile %s ...' % filename) - - # check ... - if not os.path.exists(filename) : - msg = 'rcfile not found : %s' % filename ; logging.error(msg) - raise IOError, msg - #endif - - # store file name: - self.filename = filename - # store rc-file root directory: - self.rootdir = os.path.split(filename)[0] - - # storage for processed rcfile: - self.outfile = [] - - # storage for key/value pairs: - self.values = {} - - # open the specified rc-file: - f = open(filename, 'r') - # store all lines in a list: - inpfile = f.readlines() - # close: - f.close() - - # flags: - warned_for_eval = False - - # pass counter: - ipass = 1 - - # loop until all substitutions and inclusions are done: - while True : - - # start again with empty output file: - self.outfile = [] - # init current line: - line = '' - # assume nothing has to be done after this loop: - something_done = False - something_to_be_done = False - unresolved = [] - - # stack for conditional evaluation; - # each element is a tuple with elements: - # resolved (boolean) true if the if-statement is evaluated - # flag (boolean) true if the lines below the statement - # are to be included - # anyflag (boolean) used to check if any of the 'if' or 'elif' conditions - # in this sequence evaluated to True - # line (char) description of the line for messages - ifstack = [] - - #print '' - #print '---[pass %i]-------------------------------------' % ipass - #for line in inpfile : print line.strip() - - # loop over lines in input file: - iline = -1 - for inpline in inpfile : - - # line counter: - iline = iline + 1 - - # remove end-of-line character: - inpline = inpline.strip() - - ## DEBUG: display current line ... - #print '%4i | %s' % (iline,inpline) - - # - # empty lines - # - - # skip empty lines: - if len(inpline) == 0 : - # add empty line to output: - self.outfile.append('\n') - # next will be a new line: - line = '' - # next input line: - continue - #endif - - # - # comment lines - # - - # skip comment: - if inpline.startswith('!') : - # add copy to output file: - self.outfile.append('%s\n' % inpline) - # next will be a new line: - line = '' - # next input line: - continue - #endif - - # - # continuation lines - # - - # current line has continuation mark '\' at the end ? - # then add this input line: - if line.endswith('\\') : - # remove continuation character, add input line: - line = line[:-1] + inpline - else : - # bright new line: - line = inpline - #endif - - # continuation mark ? then next line of input file: - if line.endswith('\\') : continue - - # - # line info - # - - # line number and text for messages: - line_info = '%6i | %s' % (iline + 1, line) - - # - # conditional settings (1) - # - - # is this the begin of a new condition ? - mark = '#if' - if line.startswith(mark) : - # push temporary flag to stack, will be replaced after evaluation of condition: - ifstack.append((False, True, False, line_info)) - # debug ... - #print 'xxx1 ', ifstack - #endif - - mark = '#elif' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found orphin elif in rcfile on line :') - logging.error(' %s' % line_info) - raise Exception - #endif - # remove current top from stack: - resolved, flag, anyflag, msg = ifstack.pop() - # push temporary flag to stack, will be replaced after evaluation of condition: - ifstack.append((resolved, True, anyflag, line_info)) - # debug ... - #print 'xxx1 ', ifstack - #endif - - mark = '#else' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found lonely else in rcfile on line :') - logging.error(' %s' % line_info) - raise Exception - #endif - # remove current top from stack: - resolved, flag, anyflag, msg = ifstack.pop() - # get higher level settings: - if len(ifstack) > 0 : - resolved_prev, flag_prev, anyflag_prev, msg_prev = ifstack[-1] - else : - flag_prev = True - #endif - # should next lines be included ? - new_flag = (not flag) and (not anyflag) and flag_prev - # add else block with reversed flag, take into acount higher level - ifstack.append((resolved, new_flag, False, line_info)) - # debug ... - #print 'xxx1 ', ifstack - # copy to output: - self.outfile.append('%s\n' % line) - # next input line: - continue - #endif - - # is this the end of a condition ? - mark = '#endif' - if line.startswith(mark) : - # check ... - if len(ifstack) == 0 : - logging.error('found lonely endif in rcfile on line :') - logging.error(' %s' % line_info) - raise Exception - #endif - # remove top from stack: - top = ifstack.pop() - # copy to output: - self.outfile.append('%s\n' % line) - # next input line: - continue - #endif - - # within if-statements ? - if len(ifstack) > 0 : - # extract current top: - resolved, flag, anyflag, msg = ifstack[-1] - # already resolved ? then check if this line should be skipped: - if resolved and (not flag) : - # skip this line, but include commented version in output: - self.outfile.append('!%s\n' % line) - # read the next input line: - continue - #endif - #endif - - # - # handle '#eval' lines - # - - mark = '#eval' - if line.startswith(mark): - # info .. - if not warned_for_eval : - if not silent: logging.warning('the #eval statements in rc-files are deprecated, use {key:value} pairs instead') - warned_for_eval = True - #endif - # add commented copy to output: - self.outfile.append('!evaluated>>> ' + line) - # remove leading mark: - line = line.lstrip(mark) - # multiple settings are seperated by ';' : - evals = line.split(';') - # insert in output file: - for k in range(len(evals)) : - # split in key and value: - key, value = evals[k].split('=') - # insert: - self.outfile.append('%s : %s' % (key, value)) - #endfor - # next input line: - continue - #endif - - # - # replace ${..} values - # - - # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace('{', '\{').replace('<', '\<').replace('$', '\$') - close_mark = marks[1].replace('}', '\}').replace('>', '\>') - - # set syntax of keywords to be matched, e.g. '${...}' : - pattern = start_mark + '[A-Za-z0-9_.]+' + close_mark - - # make a regular expression that matches all variables: - rc_varpat = re.compile(pattern) - - # search all matching paterns: - pats = re.findall(rc_varpat, line) - # counter for unexpanded substitutions: - ntobedone = 0 - # loop over matches: - for pat in pats : - # remove enclosing characters: - key = pat.lstrip(start_mark).rstrip(close_mark) - # test some dictionaries for matching key: - if self.values.has_key(key) : - # get previously defined value: - val = self.values[key] - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif os.environ.has_key(key) : - # get value from environment: - val = os.environ[key] - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif key == 'pid' : - # special value: process id; convert to character: - val = '%i' % os.getpid() - # substitute value: - line = line.replace(pat, val) - # set flag: - something_done = True - elif key == 'script' : - # special value: base name of the calling script, without extension: - script, ext = os.path.splitext(os.path.basename(sys.argv[0])) - # substitute value: - line = line.replace(pat, script) - # set flag: - something_done = True - else : - # could not substitute yet; set flag: - ntobedone = ntobedone + 1 - # continue with next substitution: - continue - #endif - #endfor # matched patterns - # not all substituted ? - if ntobedone > 0 : - # add line to output: - self.outfile.append(line) - # a new pass is needed: - something_to_be_done = True - # store for info messages: - unresolved.append(line) - # next input line: - continue - #endif - - # - # handle '#include' lines - # - - mark = '#include' - if line.startswith(mark) : - # remove leading mark, what remains is file to be included: - inc_file = line.lstrip(mark).strip() - # check ... - if not os.path.exists(inc_file) : - inc_file = os.path.join(self.rootdir, inc_file) - logging.debug('Added rootdir to requested include: %s' % inc_file) - - if not os.path.exists(inc_file) : - logging.error('include file not found : %s' % inc_file) - msg = 'ERROR - include file not found : %s' % inc_file - raise IOError, msg - #endif - # read content: - inc_f = open(inc_file, 'r') - inc_rc = inc_f.readlines() - inc_f.close() - # add extra comment for output file: - self.outfile.append('! >>> %s >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n' % inc_file) - self.outfile.extend(inc_rc) - self.outfile.append('! <<< %s <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n' % inc_file) - # set flag: - something_done = True - # a new pass is needed: - something_to_be_done = True - # next input line: - continue - #endif - - - # - # conditional settings (2) - # - - # evaluate conditional expressions: - mark1 = '#if' - mark2 = '#elif' - if line.startswith(mark1) or line.startswith(mark2) : - # remove leading mark, what remains is logical expression: - expression = line.lstrip(mark1).strip() - expression = line.lstrip(mark2).strip() - # common mistake is to add a ':' as in python; remove this: - if expression.endswith(':') : expression = expression.rstrip(':').strip() - # evaluate: - try : - flag = eval(expression) - except : - logging.error('could not evaluate expression:') - logging.error(' %s' % expression) - logging.error('on line:') - logging.error(line_info) - sys.exit(1) - #endtry - # remove temporary top added before during this pass: - tmp_statement, tmp_flag, tmp_anyflag, tmp_msg = ifstack.pop() - # extract current top if necessary: - if len(ifstack) > 0 : - dummy_statement, prev_flag, dummy_anyflag, dummy_msg = ifstack[-1] - else : - prev_flag = True - #endif - # should next lines be included ? - new_flag = prev_flag and tmp_flag and flag - # any if/elif evaluated to true in this sequence ? - new_anyflag = tmp_anyflag or new_flag - # add to stack, now resolved, take into accout current flag: - ifstack.append((True, new_flag, new_anyflag, line_info)) - # debug ... - #print 'xxx2 ', ifstack - # copy to output: - self.outfile.append('%s\n' % line) - # next input line: - continue - #endif - - # - # error message - # - - # special command to rais an exception: - mark = '#error' - if line.startswith(mark) : - # remove leading mark, what remains is error message: - msg = line.lstrip(mark).strip() - # display: - logging.error(msg) - # add info: - logging.error('error message found on line:') - logging.error(line_info) - # stop: - raise Exception - #endif - - # - # checks - # - - # common mistake ... - if line.startswith('#') : - logging.error('line in rcfile starts with "#" but is not an "#include" or other special line;') - logging.error('if it is supposed to be comment, please start with "!" ...') - logging.error(' rcfile : %s' % filename) - logging.error(' line : %s' % line) - raise IOError - #endif - - # check ... - if ':' not in line : - logging.error('key/value line should contain a ":"') - logging.error(' rcfile : %s' % filename) - logging.error(' line : %s' % line) - raise IOError - #endif - - # - # add to output - # - - # add line to output: - self.outfile.append('%s\n' % line) - - # - # add key/value pair - # - - # not if inside an unresolved if-statement ... - if len(ifstack) > 0 : - # get top values: - resolved, flag, anyflag, msg = ifstack[-1] - # not resolved yet ? then continue: - if not resolved : continue - #endif - - # split in key and value; - # value might contain ':' too, so at maximum 1 split: - key, val = line.split(':', 1) - - # remove comment from value: - if '!' in val : - # not if '\!' is in the value ... - if not '\!' in val : val, comment = val.split('!') - # replace all slash-comments: - val = val.replace('\!', '!') - #endif - - # remove spaces: - key = key.strip() - val = val.strip() - - # already defined ? - if self.values.has_key(key) : - # no problem if values are the same, but otherwise ... - if self.values[key] != val : - logging.error('key found twice in "%s" :' % filename) - logging.error(' %s : %s' % (key, str(self.values[key]))) - logging.error(' %s : %s' % (key, str(val))) - raise Exception - #endif - else : - # store new value: - self.values[key] = val - # set flag: - something_done = True - #endif - - # display key and value ... - #print ' --> %s : %s' % (key,val) - - #endfor # loop over lines in text - - ## info ... - #print '~~~ outfile ~~~~~~~~~~~~~~~~~~~~~~~' - #for line in self.outfile : print line.strip() - #print '~~~ key/values ~~~~~~~~~~~~~~~~~~~~' - #for k,v in self.items() : - # print '%s : %s' % (k,v) - ##endfor - #print '-------------------------------------------------' - #print '' - - # check ... - if len(ifstack) > 0 : - logging.error('unterminated if-statement ; current stack:') - for resolved, flag, anyflag, msg in ifstack : logging.error(msg) - logging.error('please fix the rcfile or debug this script ...') - raise Exception - #endif - - # check ... - if something_to_be_done : - # check for unterminated loop ... - if not something_done : - logging.error('could not resolve the following lines in rcfile:') - for uline in unresolved : logging.error(' %s' % uline) - logging.error('please fix the rcfile or debug this script ...') - raise Exception - #endif - else : - # finished ... - break - #endif - - # for safety ... - if ipass == 100 : - logging.error('resolving rc file has reached pass %i ; something wrong ?' % ipass) - raise Exception - #endif - - # new pass: - ipass = ipass + 1 - # renew input: - inpfile = self.outfile - - #endwhile # something to be done - - #enddef # __init__ - - - # *** - - - def has_key(self, key) : - - # from dictionairy: - return self.values.has_key(key) - - #enddef - - - # *** - - - def keys(self) : - - # from dictionairy: - return self.values.keys() - - #enddef - - - # *** - - - def get(self, key, totype='', default=None, verbose=False) : - - """ - rcf.get( 'my.value' [,default=None] ) - Return element 'key' from the dictionairy. - If the element is not present but a default is specified, than return - the default value. - If 'verbose' is set to True, then print debug messages to the logging - about which values is returned for the given key. - The option argument 'totype' defines the conversion to a Python type. - If 'totype' is set to 'bool', the return value is the - boolean True for values 'T', 'True', 'yes', and '1', - and False for 'F', 'False', 'no', or '0' ; - for other values, an exception will be raised. - """ - - # element found ? - if self.values.has_key(key) : - # copy value: - value = self.values[key] - # convert ? - if totype == 'bool' : - # convert to boolean: - if value in ['T', 'True', 'yes', '1'] : - value = True - elif value in ['F', 'False', 'no', '0'] : - value = False - else : - logging.error("value of key '%s' is not a boolean : %s" % (key, str(value))) - raise Exception - #endif - elif len(totype) > 0 : - # convert to other type ... - value = eval('%s(%s)' % (totype, value)) - #endif - # for debugging ... - if verbose : logging.debug('rc setting "%s" : "%s"' % (key, str(value))) - else : - # default value specified ? - if default != None : - # copy default: - value = default - # for debugging ... - if verbose : logging.debug('rc setting "%s" : "%s" (deault)' % (key, str(value))) - else : - # something wrong ... - logging.error("key '%s' not found in '%s' and no default specified" % (key, self.filename)) - raise Exception - #endif - #endif - - # ok - return value - - #enddef - - - # *** - - - def replace(self, key, val) : - - """ - Replace a key by a new value. - """ - - # search for a line '<key> : <val>' - # loop over lines in output file: - found = False - for iline in range(len(self.outfile)) : - # extract: - line = self.outfile[iline] - # skip lines that are no key:value pair for sure ... - if ':' not in line : continue - # split once at first ':' - k, v = line.split(':', 1) - # match ? - if k.strip() == key : - # replace line in original file: - self.outfile[iline] = '%s : %s\n' % (k, str(val)) - # replace value: - self.values[key] = val - # set flag: - found = True - # found, thus no need to continue: - break - #endif - #endfor # lines - # not found ? - if not found : - logging.error('could not replace key : %s' % key) - raise Exception - #endif - - # ok - return - - #enddef - - - # *** - - - def add(self, key, val, comment='') : - - """Add a new key/value pair.""" - - # add lines: - self.outfile.append('\n') - if len(comment) > 0 : self.outfile.append('! %s\n' % comment) - self.outfile.append('%s : %s\n' % (key, str(val))) - - # add to dictionairy: - self.values[key] = val - - # ok - return - - #enddef - - - # *** - - - def substitute(self, line, marks=('${', '}')) : - - """ - Return a line with all '${..}' parts replaced by the corresponding rcfile values. - The 2-item tupple (mark1,mark2) could be used to re-define the default - key pattern '${..}' into something else: - <mark1>...<mark2> - """ - - # ensure that common marks are evaluated correctly: - start_mark = marks[0].replace('{', '\{').replace('<', '\<').replace('$', '\$') - close_mark = marks[1].replace('}', '\}').replace('>', '\>') - - # set syntax of keywords to be matched, e.g. '${...}' : - pattern = start_mark + '[A-Za-z0-9_.]+' + close_mark - - # make a regular expression that matches all variables: - rc_varpat = re.compile(pattern) - - # search all matching paterns: - pats = re.findall(rc_varpat, line) - # loop over matches: - for pat in pats : - # remove enclosing characters: - key = pat.lstrip(start_mark).rstrip(close_mark) - # test dictionary for matching key: - if self.values.has_key(key) : - # get previously defined value: - val = self.values[key] - # substitute value: - line = line.replace(pat, val) - #endif - #endfor # matched patterns - - # ok - return line - - #enddef - - - # *** - - - def WriteFile(self, filename) : - - """ write the dictionary to file""" - - # open file for writing: - f = open(filename, 'w') - - ## loop over key/value pairs: - #for k,v in self.items(): - # # add line; at least the specified number of characters - # # is used for the key: - # f.write( '%-20s:%s\n' % (k,v) ) - ##endfor - - # write processed input: - f.writelines(self.outfile) - - # close file: - f.close() - - #endif - - -#endclass # RcFile - -def read(rcfilename, silent=False): - """ - This method reads an rc-file by making an instance of the RcFile class, and then returns the dictionary of values only. This - makes it backwards compatible with older implementations of the rc.py module - """ - - rcdict = RcFile(rcfilename, silent=silent) - - return rcdict.values - -def write(filename, rcdict): - """ - This method writes an rc-file dictionary. This is included to make this module backwards compatible with - older implementations of the rc.py module - """ - - # open file for writing: - f = open(filename, 'w') - - # loop over key/value pairs: - for k, v in rcdict.items(): - # add line; at least the specified number of characters - # is used for the key: - f.write('%-20s:%s\n' % (k, v)) - #endfor - - # close file: - f.close() - - - -# ------------------------------------------------ -# test -# ------------------------------------------------ - - -if __name__ == '__main__': - - # external ... - - import optparse - - - # extract arguments from sys.argv array: - # 0 = name of calling script, 1: = actual arguments - args = sys.argv[1:] - - # set text for 'usage' help line: - usage = "\n %prog <rcfile> <key> [-b|--bool] [--default<=value>]\n %prog <rcfile> -w|--write\n %prog -h|--help\n %prog -d|--doc" - - # initialise the option parser: - parser = optparse.OptionParser(usage=usage) - - # define options: - parser.add_option("-d", "--doc", - help="print documentation", - dest="doc", action="store_true", default=False) - parser.add_option("-b", "--bool", - help="return 'True' for values 'T', 'True', 'yes', or '1', and 'False' for 'F', 'False', 'no', or '0'", - dest="boolean", action="store_true", default=False) - parser.add_option("--default", - help="default value returned if key is not found", - dest="default", action="store") - parser.add_option("-w", "--write", - help="write pre-processed rcfile", - dest="write", action="store_true", default=False) - - # now parse the actual arguments: - opts, args = parser.parse_args(args=args) - - # print documentation ? - if opts.doc : - print __doc__ - sys.exit(0) - #endif - - # recfile argument should be provided: - if len(args) < 1 : - parser.error("no name of rcfile provided\n") - #endif - # extract: - rcfile = args[0] - - # read rcfile in dictionary: - try : - rcf = RcFile(rcfile) - except : - logging.error(sys.exc_info()[1]) - sys.exit(1) - #endtry - - # print pre-processed file ? - if opts.write : - for line in rcf.outfile : print line.strip() - sys.exit(0) - #endif - - # key argument should be provided: - if len(args) < 2 : - parser.error("no name of rckey provided\n") - #endif - # extract: - rckey = args[1] - - # key present ? - if rcf.has_key(rckey) : - - # print requested value: - if opts.boolean : - # extract value: - flag = rcf.get(rckey, 'bool') - # print result: - if flag : - print 'True' - else : - print 'False' - #endif - else : - # extract value: - value = rcf.get(rckey) - # display: - print value - #endif - - else : - - # default value provided ? - if opts.default != None : - # display: - print opts.default - else : - print 'ERROR - key "%s" not found in rcfile "%s" and no default specified' % (rckey, rcfile) - sys.exit(1) - #endif - - #endif - -#endif - - -# ------------------------------------------------ -# end -# ------------------------------------------------ - diff --git a/go_readrc b/go_readrc deleted file mode 100644 index 6725a935b50da803c3b739833b3392517124a5be..0000000000000000000000000000000000000000 --- a/go_readrc +++ /dev/null @@ -1,193 +0,0 @@ -# CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. -# Users are recommended to contact the developers (wouter.peters@wur.nl) to receive -# updates of the code. See also: http://www.carbontracker.eu. -# -# This program is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software Foundation, -# version 3. This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with this -# program. If not, see <http://www.gnu.org/licenses/>. - -#! /bin/sh - -# --- init --- - -# leave on error -set -e - - -# --- external --- - -basename='/bin/basename' -test ! -x ${basename} && basename='/usr/bin/basename' - -egrep='/bin/egrep' -test ! -x ${egrep} && egrep='/usr/bin/egrep' - -less='/bin/less' -test ! -x ${less} && less='/usr/bin/less' -test ! -x ${less} && less='/usr/local/bin/less' - -sed='/bin/sed' -test ! -x ${sed} && sed='/usr/bin/sed' - - -# --- definitions --- - -prog=`${basename} $0` - - -# --- help --- - -DisplayHelp () -{ -${xPAGER:-${less}} << EOF -$prog General Objects - -NAME - $prog - read data value from a resource file - -SYNOPSIS - go_readrc <rcfile> <key> [<default>] - go_readrc -h|--help - -DESCRIPTION - A recourcefile is a text file with key/data pairs, usefull - to initialize programs (scripts, Fortran, etc). - The format of the <rcfile> is chosen close to the standard X resources: - - * Comment lines start with '!' - - * A key/data pair has the format: - <key> : <value> - where the white space (space or tabs) is optional. - The <key> consists of letters, numbers, '_', and '.' . - - Example of a valid rcfile: - - ! Specify an output directory: - output.path : d/ - - Given a text <key>, the <rcfile> is scanned for a line starting - with this key; all text behind the ':' is written to the standard output. - Example of usage in sh script: - - output_root=\`go_readrc test.rc output.path\` - - If the <key> is not found, an error message is issued, - unless a <default> is supplied which is then written to standard output. - The <default> might be an empty string, e.g. '' . - -PREPROCESSING - The rcfile might be preprocessed by go_pprc, - to expand environment variables. - -EXIT STATUS - Non zero in case of any error. - -SEE ALSO - X, go_pprc - -AUTHOR - Arjo Segers -EOF -exit 0 -} - -ErrorMessage () -{ - echo "ERROR in $prog: $1" 1>&2 - echo " Use '$prog -h' for information." 1>&2 - exit 1 -} - -# --- arguments --- - -rcfile='' -rckey='' -with_default='' - -while [ $# -gt 0 ]; do - case "$1" in - -h | --help ) - DisplayHelp - ;; - -* ) - ErrorMessage "unknown option '$1' ..." - ;; - * ) - if [ -z "${rcfile}" ]; then - rcfile="$1" - elif [ -z "${rckey}" ]; then - rckey="$1" - elif [ -z "${with_default}" ]; then - default="$1" - with_default='true' - else - ErrorMessage "unknown argument '$1'" - fi - ;; - esac - shift -done - -if [ -z "${rcfile}" -o -z "${rckey}" ]; then - ErrorMessage "missing arguments" -fi - -# --- begin --- - -# does the rcfile exist? -if [ ! -f ${rcfile} ]; then - ErrorMessage "rcfile '${rcfile}' does not exist ..." -fi - -# replace '.' in the rckey by '\.' -rckeydots=`echo ${rckey} | ${sed} -e 's/\./\\\\./g'` - -# 10 Apr 06: Andy Jacobson -# [[:space:]] indicates a space or tab character -#wspace='[[:space:]]*' -# -# 26 Apr 06: Arjo Segers -# The egrep on SGI system does not support the '[:space:]' ; -# use a real tab character instead ... -tab=' ' -wspace="[ ${tab}]*" - -# A key-data line has the following synopsis: -# -# <begin-of-line><key>[<wspace>]:[<wspace>]<data> -# -# where <wspace> denote tabs or spaces. -# Set regular expression for such a line except the <data> part; -# this expression is used to search for a key and to extract -# the data part: -# -re="^${rckeydots}${wspace}:${wspace}" - -# set grep command to select matching lines: -selectlinecmd="${egrep} '${re}' ${rcfile}" - -# count number of hits; should be exactely 1 ... -nfound=`eval "${selectlinecmd}" | /usr/bin/wc -l` -if [ ${nfound} -eq 0 ]; then - if [ -z "${with_default}" ]; then - ErrorMessage "key '${rckey}' not found in ${rcfile} and no default specified ..." - else - echo "${default}" - exit 0 - fi -elif [ ${nfound} -gt 1 ]; then - ErrorMessage "key '${rckey}' found ${nfound} times in $rcfile ..." -fi - -# extract the data part for this key; -# substitute an empty string for the 'key : ' part; -# remove trailing blanks; -# output is written to standard output: -eval "${selectlinecmd}" | ${sed} -e "s/${re}//" -e "s/${wspace}$//" - diff --git a/start_ctdas.sh b/start_ctdas.sh index a56aae9aa3541e161a7d3d5f1891e343e2fdf632..c588506e72855d511cad6046d2e0857d1351ae96 100755 --- a/start_ctdas.sh +++ b/start_ctdas.sh @@ -40,7 +40,8 @@ where: arg2: project name (i.e, test_ctdas) arg3: branch name -h shows this help text - + -M: create new branch from which clones can be made (new branch is a Master) + -n: clone new branch from current branch ! A new folder will then be created and populated: /scratch/"$USER"/test_ctdas/ diff --git a/templates/template.py b/templates/template.py index 6f88081c7e3e6fdd7231a1a3526e0af4baf6e79e..22c4eb39d626aa8e9a0bf7e219d90389a97da706 100755 --- a/templates/template.py +++ b/templates/template.py @@ -29,7 +29,7 @@ from da.tools.initexit import start_logger, validate_opts_args, parse_options, C from da.stilt.pipeline import forward_pipeline, header, footer from da.platform.cartesius import CartesiusPlatform from da.baseclasses.dasystem import DaSystem -from da.stilt.statevector import CO2GriddedStateVector +from da.baseclasses.statevector import StateVector from da.baseclasses.obs import Observations from da.baseclasses.optimizer import Optimizer from da.stilt.observationoperator import STILTObservationOperator @@ -54,7 +54,7 @@ platform = CartesiusPlatform() dasystem = DaSystem(dacycle['da.system.rc']) obsoperator = STILTObservationOperator(dacycle['da.obsoperator.rc']) samples = Observations() -statevector = CO2GriddedStateVector() +statevector = StateVector() optimizer = Optimizer() ##########################################################################################