Commit 3593d6ce authored by Peters, Wouter's avatar Peters, Wouter
Browse files

latest tested version with all of Karolina's updates is now the new trunk, to be released soon

parents 7c216236 3927ae2c
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ctdas_light_refactor_new</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/ctdas_light_refactor_new</path>
</pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>
......@@ -12,12 +12,12 @@ clean:
# -------------------
/bin/rm -f jb.[0-9]*.{jb,rc,log}
/bin/rm -f */*/jb.[0-9]*.{rc,jb,log}
/bin/rm -f *.pyc
/bin/rm -f */*/*.pyc
/bin/rm -f das.[0-9]*.log
/bin/rm -f *.log
/bin/rm -f *.[eo]*
/bin/rm -f *.p[eo]*
/bin/rm -f out.*
/bin/rm -f err.*
#!/usr/bin/env python
# daily_fluxes.py
"""
Author : peters
Revision History:
File created on 20 Dec 2012.
"""
import sys
sys.path.append('../../')
import os
import sys
import shutil
import datetime
def daily_avg(rundir,avg):
""" Function to create a set of daily files in a folder, needed to make longer term means """
if avg not in ['transcom','olson']:
raise IOError,'Choice of averaging invalid'
if no os.path.exists(rundir):
raise IOError,'rundir requested (%s) does not exist, exiting...'%rundir
weekdir = os.path.join(rundir + 'data_%s_weekly'%avg)
files = os.listdir(weekdir)
daydir = os.path.join(rundir + 'data_%s_daily'%avg)
if not os.path.exists(daydir):
print "Creating new output directory " + daydir
os.makedirs(daydir)
files = [f for f in files if '-' in f]
fileinfo = {}
for filename in files:
date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d')
fileinfo[filename] = date
dt = fileinfo[files[1]] - fileinfo[files[0]]
for k,v in fileinfo.iteritems():
cycle_file = os.path.join(weekdir,k)
for i in range(dt.days):
daily_file = os.path.join(daydir,'%s_fluxes.%s.nc'%(avg,(v+datetime.timedelta(days=i)).strftime('%Y-%m-%d')))
if not os.path.lexists(daily_file):
os.symlink(cycle_file,daily_file)
print daily_file,cycle_file
if __name__ == "__main__":
rundir = "/Storage/CO2/ivar/zeus_output/ICDC9/ctdas_co2c13_terdisbcb_manscale/analysis/"
try:
avg=sys.argv[1]
except:
avg='transcom'
daily_avg(rundir,avg)
This diff is collapsed.
#!/usr/bin/env python
# expand_fluxes.py
import sys
import os
import getopt
import shutil
import logging
import netCDF4
import numpy as np
from string import join
from datetime import datetime, timedelta
from pylab import date2num, num2date
sys.path.append('../../')
from da.tools.general import create_dirs
import da.tools.io4 as io
"""
Author: Wouter Peters (Wouter.Peters@wur.nl)
Revision History:
File created on 11 May 2012.
"""
def write_mixing_ratios(dacycle):
"""
Write Sample information to NetCDF files. These files are organized by site and
have an unlimited time axis to which data is appended each cycle.
The needed information is obtained from the sample_auxiliary.nc files and the original input data files from ObsPack.
The steps are:
(1) Create a directory to hold timeseries output files
(2) Read the sample_auxiliary.nc file for this cycle and get a list of original files they were obtained from
(3) For each file, copy the original data file from ObsPack (if not yet present)
(4) Open the copied file, find the index of each observation, fill in the simulated data
"""
dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_molefractions'))
#
# Some help variables
#
dectime0 = date2num(datetime(2000, 1, 1))
dt = dacycle['cyclelength']
startdate = dacycle['time.start']
enddate = dacycle['time.end']
logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
logging.debug("DA Cycle end date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
dacycle['time.sample.stamp'] = "%s_%s" % (startdate.strftime("%Y%m%d%H"), enddate.strftime("%Y%m%d%H"),)
# Step (1): Get the posterior sample output data file for this cycle
infile = os.path.join(dacycle['dir.output'], 'sample_auxiliary_%s.nc' % dacycle['time.sample.stamp'])
ncf_in = io.ct_read(infile, 'read')
obs_num = ncf_in.get_variable('obs_num')
obs_val = ncf_in.get_variable('observed')
simulated = ncf_in.get_variable('modelsamples')
infilename = ncf_in.get_variable('inputfilename')
infiles = netCDF4.chartostring(infilename).tolist()
#infiles = [join(s.compressed(),'') for s in infilename]
ncf_in.close()
# Step (2): Get the prior sample output data file for this cycle
infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % startdate.strftime('%Y%m%d'))
if os.path.exists(infile):
optimized_present = True
else:
optimized_present = False
if optimized_present:
ncf_fc_in = io.ct_read(infile, 'read')
fc_obs_num = ncf_fc_in.get_variable('obspack_num')
fc_obs_val = ncf_fc_in.get_variable('observed')
fc_simulated = ncf_fc_in.get_variable('modelsamplesmean_prior')
fc_simulated_ens = ncf_fc_in.get_variable('modelsamplesdeviations_prior')
fc_flag = ncf_fc_in.get_variable('flag')
if not dacycle.dasystem.has_key('opt.algorithm'):
fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance')
fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance')
elif dacycle.dasystem['opt.algorithm'] == 'serial':
fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance')
fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance')
elif dacycle.dasystem['opt.algorithm'] == 'bulk':
fc_r = ncf_fc_in.get_variable('modeldatamismatchvariance').diagonal()
fc_hphtr = ncf_fc_in.get_variable('totalmolefractionvariance').diagonal()
filesitecode = ncf_fc_in.get_variable('sitecode')
fc_sitecodes = netCDF4.chartostring(filesitecode).tolist()
#fc_sitecodes = [join(s.compressed(),'') for s in filesitecode]
ncf_fc_in.close()
# Expand the list of input files with those available from the forecast list
infiles_rootdir = os.path.split(infiles[0])[0]
infiles.extend(os.path.join(infiles_rootdir, f + '.nc') for f in fc_sitecodes)
#Step (2): For each observation timeseries we now have data for, open it and fill with data
for orig_file in set(infiles):
if not os.path.exists(orig_file):
logging.error("The original input file (%s) could not be found, continuing to next file..." % orig_file)
continue
copy_file = os.path.join(dirname, os.path.split(orig_file)[-1])
if not os.path.exists(copy_file):
shutil.copy(orig_file, copy_file)
logging.debug("Copied a new original file (%s) to the analysis directory" % orig_file)
ncf_out = io.CT_CDF(copy_file, 'write')
# Modify the attributes of the file to reflect added data from CTDAS properly
ncf_out.Caution = '==================================================================================='
try:
ncf_out.History += '\nOriginal observation file modified by user %s on %s\n' % (os.environ['USER'], datetime.today().strftime('%F'),)
except:
ncf_out.History = '\nOriginal observation file modified by user %s on %s\n' % (os.environ['USER'], datetime.today().strftime('%F'),)
ncf_out.CTDAS_info = 'Simulated values added from a CTDAS run by %s on %s\n' % (os.environ['USER'], datetime.today().strftime('%F'),)\
+ '\nCTDAS was run on platform %s' % (os.environ['HOST'],)\
+ '\nCTDAS job directory was %s' % (dacycle['dir.da_run'],)\
+ '\nCTDAS Da System was %s' % (dacycle['da.system'],)\
+ '\nCTDAS Da ObsOperator was %s' % (dacycle['da.obsoperator'],)
ncf_out.CTDAS_startdate = dacycle['time.start'].strftime('%F')
ncf_out.CTDAS_enddate = dacycle['time.finish'].strftime("%F")
ncf_out.original_file = orig_file
# get nobs dimension
if ncf_out.dimensions.has_key('id'):
dimidob = ncf_out.dimensions['id']
dimid = ('id',)
elif ncf_out.dimensions.has_key('obs'):
dimidob = ncf_out.dimensions['obs']
dimid = ('obs',)
if dimidob.isunlimited:
nobs = ncf_out.inq_unlimlen()
else:
nobs = len(dimid)
# add nmembers dimension
dimmembersob = ncf_out.createDimension('nmembers', size=simulated.shape[1])
dimmembers = ('nmembers',)
nmembers = len(dimmembers)
# Create empty arrays for posterior samples, as well as for forecast sample statistics
savedict = io.std_savedict.copy()
savedict['name'] = "flag_forecast"
savedict['long_name'] = "flag_for_obs_model in forecast"
savedict['units'] = "None"
savedict['dims'] = dimid
savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modeldatamismatch"
savedict['long_name'] = "modeldatamismatch"
savedict['units'] = "[mol mol-1]^2"
savedict['dims'] = dimid
savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "totalmolefractionvariance_forecast"
savedict['long_name'] = "totalmolefractionvariance of forecast"
savedict['units'] = "[mol mol-1]^2"
savedict['dims'] = dimid
savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesmean"
savedict['long_name'] = "mean modelsamples"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid
savedict['comment'] = 'simulated mixing ratios based on optimized state vector'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesmean_forecast"
savedict['long_name'] = "mean modelsamples from forecast"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid
savedict['comment'] = 'simulated mixing ratios based on prior state vector'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesstandarddeviation"
savedict['long_name'] = "standard deviaton of modelsamples over all ensemble members"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid
savedict['comment'] = 'std dev of simulated mixing ratios based on optimized state vector'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesstandarddeviation_forecast"
savedict['long_name'] = "standard deviaton of modelsamples from forecast over all ensemble members"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid
savedict['comment'] = 'std dev of simulated mixing ratios based on prior state vector'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesensemble"
savedict['long_name'] = "modelsamples over all ensemble members"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid + dimmembers
savedict['comment'] = 'ensemble of simulated mixing ratios based on optimized state vector'
ncf_out.add_variable(savedict)
savedict = io.std_savedict.copy()
savedict['name'] = "modelsamplesensemble_forecast"
savedict['long_name'] = "modelsamples from forecast over all ensemble members"
savedict['units'] = "mol mol-1"
savedict['dims'] = dimid + dimmembers
savedict['comment'] = 'ensemble of simulated mixing ratios based on prior state vector'
ncf_out.add_variable(savedict)
else:
logging.debug("Modifying existing file (%s) in the analysis directory" % copy_file)
ncf_out = io.CT_CDF(copy_file, 'write')
# Get existing file obs_nums to determine match to local obs_nums
if ncf_out.variables.has_key('id'):
file_obs_nums = ncf_out.get_variable('id')
elif ncf_out.variables.has_key('obspack_num'):
file_obs_nums = ncf_out.get_variable('obspack_num')
# Get all obs_nums related to this file, determine their indices in the local arrays
selected_obs_nums = [num for infile, num in zip(infiles, obs_num) if infile == orig_file]
# Optimized data 1st: For each index, get the data and add to the file in the proper file index location
for num in selected_obs_nums:
model_index = obs_num.tolist().index(num)
file_index = file_obs_nums.tolist().index(num)
#var = ncf_out.variables['modeldatamismatch'] # Take from optimizer.yyyymmdd.nc file instead
#var[file_index] = mdm[model_index]
var = ncf_out.variables['modelsamplesmean']
var[file_index] = simulated[model_index, 0]
var = ncf_out.variables['modelsamplesstandarddeviation']
var[file_index] = simulated[model_index, 1:].std()
var = ncf_out.variables['modelsamplesensemble']
var[file_index] = simulated[model_index, :]
# Now forecast data too: For each index, get the data and add to the file in the proper file index location
if optimized_present:
selected_fc_obs_nums = [num for sitecode, num in zip(fc_sitecodes, fc_obs_num) if sitecode in orig_file]
for num in selected_fc_obs_nums:
model_index = fc_obs_num.tolist().index(num)
file_index = file_obs_nums.tolist().index(num)
var = ncf_out.variables['modeldatamismatch']
var[file_index] = np.sqrt(fc_r[model_index])
var = ncf_out.variables['modelsamplesmean_forecast']
var[file_index] = fc_simulated[model_index]
var = ncf_out.variables['modelsamplesstandarddeviation_forecast']
var[file_index] = fc_simulated_ens[model_index, 1:].std()
var = ncf_out.variables['modelsamplesensemble_forecast']
var[file_index] = fc_simulated_ens[model_index, :]
var = ncf_out.variables['totalmolefractionvariance_forecast']
var[file_index] = fc_hphtr[model_index]
var = ncf_out.variables['flag_forecast']
var[file_index] = fc_flag[model_index]
# close the file
status = ncf_out.close()
return None
if __name__ == "__main__":
import logging
from da.tools.initexit import CycleControl
from da.carbondioxide.dasystem import CO2DaSystem
sys.path.append('../../')
logging.root.setLevel(logging.DEBUG)
dacycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'})
dacycle.setup()
dacycle.parse_times()
dasystem = CO2DaSystem('../rc/carbontracker_ct09_opfnew.rc')
dacycle.dasystem = dasystem
while dacycle['time.start'] < dacycle['time.finish']:
outfile = write_mixing_ratios(dacycle)
dacycle.advance_cycle_times()
sys.exit(0)
#!/usr/bin/env python
# daily_fluxes.py
"""
Author : peters
Revision History:
File created on 20 Dec 2012.
"""
import os
import sys
import datetime
from dateutil.relativedelta import relativedelta
import subprocess
def monthly_avg(rundir,avg):
""" Function to average a set of files in a folder from daily to monthly means """
if avg not in ['transcom','olson']:
raise IOError,'Choice of averaging invalid'
if no os.path.exists(rundir):
raise IOError,'rundir requested (%s) does not exist, exiting...'%rundir
daydir = rundir + 'data_%s_daily'%avg
files = os.listdir(daydir)
monthdir = os.path.join(rundir,'data_%s_monthly'%avg)
if not os.path.exists(monthdir):
print "Creating new output directory " + monthdir
os.makedirs(monthdir)
sd = datetime.datetime(1999,1,1)
else:
files_monthly = os.listdir(monthdir)
files_monthly.sort()
fm = files_monthly[-1]
#sd = datetime.datetime(int(fm[9:13]),int(fm[14:16]),1)
sd = datetime.datetime.strptime(fm.split('.')[-2],'%Y-%m')
print sd
files = [f for f in files if '-' in f]
fileinfo = {}
for filename in files:
date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d')
fileinfo[filename] = date
years = [d.year for d in fileinfo.values()]
months = set([d.month for d in fileinfo.values()])
if sd > datetime.datetime(1999,2,1):
sd = sd + relativedelta(months=+1)
else: sd = datetime.datetime(min(years),1,1)
ed = datetime.datetime(max(years)+1,1,1)
while sd < ed:
nd = sd + relativedelta(months=+1)
avg_files = [os.path.join(daydir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd]
print sd, nd , len(avg_files)
if len(avg_files) > 0:
command = ['ncra']+ avg_files + [os.path.join(monthdir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y-%m')))]
status = subprocess.check_call(command)
sd = nd
if __name__ == "__main__":
rundir = "/Storage/CO2/ivar/zeus_output/ICDC9/ctdas_co2c13_terdisbcbmanscale_neweps449_test/analysis/"
try:
avg=sys.argv[1]
except:
avg='transcom'
#!/usr/bin/env python
# runinfo.py
"""
Author : peters
Revision History:
File created on 01 Mar 2011.
"""
import mysettings
import commands
import os
import sys
from datetime import datetime, timedelta
class RunInfo(object):
"""
This is a CarbonTracker run info object
Initialize it with the following keywords:
MANDATORY:
project = Name of project, should correspond to a directory [string]
sd = start date of project output [datetime object or integer yyyymmdd]
ed = end date of project output [datetime object or integer yyyymmdd]
OPTIONAL:
basedir = base directory of model output on your machine [string, default='~/Modeling/download']
outputdir= output directory for figures and summary files [string, default='~/Modeling/SEATA']
Tip: change the std_rundat dictionary in module filtertools to set new defaults for all values!
"""
def __init__(self,infodict={},rcfile=None):
""" Initialize from the name of an rcfile, or by passing the dictionary directly """
import da.tools.rc as rc
import da.tools.io4 as io
import copy
if rcfile:
infodict = rc.read(rcfile)
self.basedir = infodict['dir.da_run']
self.proj = os.path.split(self.basedir)[-1]
self.projdir = self.basedir
self.dir_scheme = mysettings.ct_dir_scheme
self.inputdir = os.path.join(self.basedir,'output')
self.outputdir = os.path.join(self.basedir,'analysis')
if not os.path.exists(self.outputdir):
os.makedirs(self.outputdir)
print "Creating new output directory "+self.outputdir
sd=infodict['time.start']
ed=infodict['time.finish']
dt=infodict['time.cycle']
self.dt = timedelta(days=int(dt))
if not isinstance(sd,datetime):
self.sd = datetime.strptime(sd,'%Y-%m-%d %H:%M:%S')
if not isinstance(ed,datetime):
self.ed = datetime.strptime(ed,'%Y-%m-%d %H:%M:%S')
dd = copy.deepcopy(self.sd)
self.