Compare revisions

Naomi Smith · Naomi Smith · 9cce7176 · 9cce7176 · 9cce7176 · 9cce7176
--- a/da/__pycache__/__init__.cpython-35.pyc
+++ b/da/__pycache__/__init__.cpython-35.pyc
--- a/da/analysis/expand_fluxes.py
+++ b/da/analysis/expand_fluxes.py
@@ -45,7 +45,7 @@ File created on 21 Ocotber 2008.
 def proceed_dialog(txt, yes=['y', 'yes'], all=['a', 'all', 'yes-to-all']):
    """ function to ask whether to proceed or not """
-    response = raw_input(txt)
+    response = input(txt)
    if response.lower() in yes:
        return 1
    if response.lower() in all:
@@ -113,7 +113,7 @@ def save_weekly_avg_1x1_data(dacycle, statevector):
        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
-        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
+        if dacycle.dasystem['background.co2.biosam.flux'] in list(file.variables.keys()):
            sam = True
            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
@@ -162,7 +162,7 @@ def save_weekly_avg_1x1_data(dacycle, statevector):
 #
 # if prior, do not multiply fluxes with parameters, otherwise do
 #
-            print gridensemble.shape, bio.shape, gridmean.shape
+            print(gridensemble.shape, bio.shape, gridmean.shape)
            biomapped = bio * gridmean 
            oceanmapped = ocean * gridmean 
            biovarmapped = bio * gridensemble
@@ -184,7 +184,7 @@ def save_weekly_avg_1x1_data(dacycle, statevector):
            savedict['count'] = next
            ncf.add_data(savedict)
-            print biovarmapped.shape
+            print(biovarmapped.shape)
            savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
            savedict['values'] = biovarmapped.tolist()
            savedict['dims'] = dimdate + dimensemble + dimgrid
@@ -301,7 +301,7 @@ def save_weekly_avg_state_data(dacycle, statevector):
        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
-        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
+        if dacycle.dasystem['background.co2.biosam.flux'] in list(file.variables.keys()):
            sam = True
            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
@@ -555,7 +555,7 @@ def save_weekly_avg_tc_data(dacycle, statevector):
        # Now convert other variables that were inside the flux_1x1 file
        vardict = ncf_in.variables
-        for vname, vprop in vardict.iteritems():
+        for vname, vprop in vardict.items():
            data = ncf_in.get_variable(vname)[index]
@@ -680,7 +680,7 @@ def save_weekly_avg_ext_tc_data(dacycle):
        # Now convert other variables that were inside the tcfluxes.nc file
        vardict = ncf_in.variables
-        for vname, vprop in vardict.iteritems():
+        for vname, vprop in vardict.items():
            data = ncf_in.get_variable(vname)[index]
@@ -899,7 +899,7 @@ def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'):
        # Now convert other variables that were inside the statevector file
        vardict = ncf_in.variables
-        for vname, vprop in vardict.iteritems():
+        for vname, vprop in vardict.items():
            if vname == 'latitude': continue
            elif vname == 'longitude': continue
            elif vname == 'date': continue
@@ -1014,7 +1014,7 @@ def save_time_avg_data(dacycle, infile, avg='monthly'):
        pass
    file = io.ct_read(infile, 'read')
-    datasets = file.variables.keys()
+    datasets = list(file.variables.keys())
    date = file.get_variable('date')
    globatts = file.ncattrs()
@@ -1042,7 +1042,7 @@ def save_time_avg_data(dacycle, infile, avg='monthly'):
        for d in vardims:
            if 'date' in d:
                continue
-            if d in ncf.dimensions.keys():
+            if d in list(ncf.dimensions.keys()):
                pass
            else:
                dim = ncf.createDimension(d, size=len(file.dimensions[d]))
@@ -1072,7 +1072,7 @@ def save_time_avg_data(dacycle, infile, avg='monthly'):
                time_avg = [time_avg]
                data_avg = [data_avg]
            else:
-                raise ValueError, 'Averaging (%s) does not exist' % avg
+                raise ValueError('Averaging (%s) does not exist' % avg)
            count = -1
            for dd, data in zip(time_avg, data_avg):

--- a/da/analysis/expand_fluxes.py.bak
+++ b/da/analysis/expand_fluxes.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+# expand_fluxes.py
+import sys
+import os
+sys.path.append('../../')
+rootdir = os.getcwd().split('da/')[0]
+analysisdir = os.path.join(rootdir, 'da/analysis')
+from datetime import datetime, timedelta
+import logging
+import numpy as np
+from da.tools.general import date2num, num2date
+import da.tools.io4 as io
+from da.analysis.tools_regions import globarea, state_to_grid
+from da.tools.general import create_dirs
+from da.analysis.tools_country import countryinfo  # needed here
+from da.analysis.tools_transcom import transcommask, ExtendedTCRegions
+import netCDF4 as cdf
+import da.analysis.tools_transcom as tc 
+import da.analysis.tools_country as ct
+import da.analysis.tools_time as timetools
+"""
+Author: Wouter Peters (Wouter.Peters@noaa.gov)
+Revision History:
+File created on 21 Ocotber 2008.
+"""
+def proceed_dialog(txt, yes=['y', 'yes'], all=['a', 'all', 'yes-to-all']):
+    """ function to ask whether to proceed or not """
+    response = raw_input(txt)
+    if response.lower() in yes:
+        return 1
+    if response.lower() in all:
+        return 2
+    return 0
+def save_weekly_avg_1x1_data(dacycle, statevector):
+    """
+        Function creates a NetCDF file with output on 1x1 degree grid. It uses the flux data written by the 
+        :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
+        variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
+           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
+           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
+           :rtype: None
+    """
+#
+    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly'))
+#
+# Some help variables
+#
+    dectime0 = date2num(datetime(2000, 1, 1))
+    dt = dacycle['cyclelength']
+    startdate = dacycle['time.start'] 
+    enddate = dacycle['time.end'] 
+    nlag = statevector.nlag
+    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
+#
+# Create or open NetCDF output file
+#
+    saveas = os.path.join(dirname, 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d'))
+    ncf = io.CT_CDF(saveas, 'write')
+#
+# Create dimensions and lat/lon grid
+#
+    dimgrid = ncf.add_latlon_dim()
+    dimensemble = ncf.add_dim('members', statevector.nmembers)
+    dimdate = ncf.add_date_dim()
+#
+# set title and tell GMT that we are using "pixel registration"
+#
+    setattr(ncf, 'Title', 'CarbonTracker fluxes')
+    setattr(ncf, 'node_offset', 1)
+#
+# skip dataset if already in file
+#
+    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
+    skip = ncf.has_date(ncfdate)
+    if skip:
+        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
+    else:
+#
+# if not, process this cycle. Start by getting flux input data from CTDAS
+#
+        filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))
+        file = io.ct_read(filename, 'read')
+        bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux']))
+        ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux']))
+        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
+        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
+        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
+        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
+            sam = True
+            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
+            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
+        else: sam = False
+        file.close()
+        if sam:
+            bio = bio + biosam
+            fire = fire + firesam
+        next = ncf.inq_unlimlen()[0]
+# Start adding datasets from here on, both prior and posterior datasets for bio and ocn
+        for prior in [True, False]:
+#
+# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
+# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
+#
+            if prior:
+                qual_short = 'prior'
+                for n in range(nlag, 0, -1):
+                    priordate = startdate + n*dt - timedelta(dt.days * n)
+                    savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
+                    filename = os.path.join(savedir, 'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
+                    if os.path.exists(filename):
+                        statevector.read_from_file(filename, qual=qual_short)
+                        gridmean, gridensemble = statevector.state_to_grid(lag=n)
+# Replace the mean statevector by all ones (assumed priors)
+                        gridmean = statevector.vector2grid(vectordata=np.ones(statevector.nparams,))
+                        logging.debug('Read prior dataset from file %s, sds %d: ' % (filename, n))
+                        break
+            else:
+                qual_short = 'opt'
+                savedir = dacycle['dir.output']
+                filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
+                statevector.read_from_file(filename, qual=qual_short)
+                gridmean, gridensemble = statevector.state_to_grid(lag=1)
+                logging.debug('Read posterior dataset from file %s, sds %d: ' % (filename, 1))
+#
+# if prior, do not multiply fluxes with parameters, otherwise do
+#
+            print gridensemble.shape, bio.shape, gridmean.shape
+            biomapped = bio * gridmean 
+            oceanmapped = ocean * gridmean 
+            biovarmapped = bio * gridensemble
+            oceanvarmapped = ocean * gridensemble
+#
+#
+#  For each dataset, get the standard definitions from the module mysettings, add values, dimensions, and unlimited count, then write
+#
+            savedict = ncf.standard_var(varname='bio_flux_' + qual_short)
+            savedict['values'] = biomapped.tolist()
+            savedict['dims'] = dimdate + dimgrid
+            savedict['count'] = next
+            ncf.add_data(savedict)
+#
+            savedict = ncf.standard_var(varname='ocn_flux_' + qual_short)
+            savedict['values'] = oceanmapped.tolist()
+            savedict['dims'] = dimdate + dimgrid
+            savedict['count'] = next
+            ncf.add_data(savedict)
+            print biovarmapped.shape
+            savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
+            savedict['values'] = biovarmapped.tolist()
+            savedict['dims'] = dimdate + dimensemble + dimgrid
+            savedict['count'] = next
+            ncf.add_data(savedict)
+#
+            savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short)
+            savedict['values'] = oceanvarmapped.tolist()
+            savedict['dims'] = dimdate + dimensemble + dimgrid
+            savedict['count'] = next
+            ncf.add_data(savedict)
+        # End prior/posterior block
+        savedict = ncf.standard_var(varname='fire_flux_imp')
+        savedict['values'] = fire.tolist()
+        savedict['dims'] = dimdate + dimgrid
+        savedict['count'] = next
+        ncf.add_data(savedict)
+#
+        savedict = ncf.standard_var(varname='fossil_flux_imp')
+        savedict['values'] = fossil.tolist()
+        savedict['dims'] = dimdate + dimgrid
+        savedict['count'] = next
+        ncf.add_data(savedict)
+        area = globarea()
+        savedict = ncf.standard_var(varname='cell_area')
+        savedict['values'] = area.tolist()
+        savedict['dims'] = dimgrid
+        ncf.add_data(savedict)
+#
+        savedict = ncf.standard_var(varname='date')
+        savedict['values'] = date2num(startdate) - dectime0 + dt.days / 2.0
+        savedict['dims'] = dimdate
+        savedict['count'] = next
+        ncf.add_data(savedict)
+        sys.stdout.write('.')
+        sys.stdout.flush()
+#
+#   Done, close the new NetCDF file
+#
+    ncf.close()
+#
+#   Return the full name of the NetCDF file so it can be processed by the next routine
+#
+    logging.info("Gridded weekly average fluxes now written")
+    return saveas
+def save_weekly_avg_state_data(dacycle, statevector):
+    """
+        Function creates a NetCDF file with output for all parameters. It uses the flux data written by the 
+        :class:`~da.baseclasses.obsoperator.ObsOperator.py`, and multiplies these with the mapped parameters and
+        variance (not covariance!) from the :class:`~da.baseclasses.statevector.StateVector`.
+           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
+           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
+           :rtype: None
+    """
+    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_state_weekly'))
+#
+# Some help variables
+#
+    dectime0 = date2num(datetime(2000, 1, 1))
+    dt = dacycle['cyclelength']
+    startdate = dacycle['time.start'] 
+    enddate = dacycle['time.end'] 
+    nlag = statevector.nlag
+    area = globarea()
+    vectorarea = statevector.grid2vector(griddata=area, method='sum')
+    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
+#
+# Create or open NetCDF output file
+#
+    saveas = os.path.join(dirname, 'statefluxes.nc')
+    ncf = io.CT_CDF(saveas, 'write')
+#
+# Create dimensions and lat/lon grid
+#
+    dimregs = ncf.add_dim('nparameters', statevector.nparams)
+    dimmembers = ncf.add_dim('nmembers', statevector.nmembers)
+    dimdate = ncf.add_date_dim()
+#
+# set title and tell GMT that we are using "pixel registration"
+#
+    setattr(ncf, 'Title', 'CarbonTracker fluxes')
+    setattr(ncf, 'node_offset', 1)
+#
+# skip dataset if already in file
+#
+    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
+    skip = ncf.has_date(ncfdate)
+    if skip:
+        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
+    else:
+        next = ncf.inq_unlimlen()[0]
+#
+# if not, process this cycle. Start by getting flux input data from CTDAS
+#
+        filename = os.path.join(dacycle['dir.output'], 'flux1x1_%s_%s.nc' % (startdate.strftime('%Y%m%d%H'), enddate.strftime('%Y%m%d%H')))
+        file = io.ct_read(filename, 'read')
+        bio = np.array(file.get_variable(dacycle.dasystem['background.co2.bio.flux']))
+        ocean = np.array(file.get_variable(dacycle.dasystem['background.co2.ocean.flux']))
+        fire = np.array(file.get_variable(dacycle.dasystem['background.co2.fires.flux']))
+        fossil = np.array(file.get_variable(dacycle.dasystem['background.co2.fossil.flux']))
+        #mapped_parameters   = np.array(file.get_variable(dacycle.dasystem['final.param.mean.1x1']))
+        if dacycle.dasystem['background.co2.biosam.flux'] in file.variables.keys():
+            sam = True
+            biosam = np.array(file.get_variable(dacycle.dasystem['background.co2.biosam.flux']))
+            firesam = np.array(file.get_variable(dacycle.dasystem['background.co2.firesam.flux']))
+        else: sam = False
+        file.close()
+        if sam:
+            bio = bio + biosam
+            fire = fire + firesam
+        next = ncf.inq_unlimlen()[0]
+        vectorbio = statevector.grid2vector(griddata=bio * area, method='sum')
+        vectorocn = statevector.grid2vector(griddata=ocean * area, method='sum')
+        vectorfire = statevector.grid2vector(griddata=fire * area, method='sum')
+        vectorfossil = statevector.grid2vector(griddata=fossil * area, method='sum')
+# Start adding datasets from here on, both prior and posterior datasets for bio and ocn
+        for prior in [True, False]:
+#
+# Now fill the statevector with the prior values for this time step. Note that the prior value for this time step
+# occurred nlag time steps ago, so we make a shift in the output directory, but only if we are more than nlag cycle away from the start date..
+#
+            if prior:
+                qual_short = 'prior'
+                for n in range(nlag, 0, -1):
+                    priordate = enddate - timedelta(dt.days * n)
+                    priordate = startdate + n*dt - timedelta(dt.days * n)
+                    savedir = dacycle['dir.output'].replace(startdate.strftime('%Y%m%d'), priordate.strftime('%Y%m%d'))
+                    filename = os.path.join(savedir,'savestate_%s.nc' % priordate.strftime('%Y%m%d'))
+                    if os.path.exists(filename):
+                        statevector.read_from_file(filename, qual=qual_short)
+# Replace the mean statevector by all ones (assumed priors)
+                        statemean = np.ones((statevector.nparams,))
+                        choicelag = n
+                        logging.debug('Read prior dataset from file %s, lag %d: ' % (filename, choicelag))
+                        break
+            else:
+                qual_short = 'opt'
+                savedir = dacycle['dir.output']
+                filename = os.path.join(savedir, 'savestate_%s.nc' % startdate.strftime('%Y%m%d'))
+                statevector.read_from_file(filename)
+                choicelag = 1
+                statemean = statevector.ensemble_members[choicelag - 1][0].param_values
+                logging.debug('Read posterior dataset from file %s, lag %d: ' % (filename, choicelag))
+#
+# if prior, do not multiply fluxes with parameters, otherwise do
+#
+            data = statemean * vectorbio # units of mole region-1 s-1
+            savedict = ncf.standard_var(varname='bio_flux_%s' % qual_short)
+            savedict['values'] = data
+            savedict['dims'] = dimdate + dimregs
+            savedict['count'] = next
+            ncf.add_data(savedict)
+#
+# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
+# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the 
+# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
+#
+# The implementation is done by multiplying the ensemble with the vectorbio only, and not with the statemean values
+# which are assumed 1.0 in the prior always.
+#
+            members = statevector.ensemble_members[choicelag - 1] 
+            deviations = np.array([mem.param_values * vectorbio for mem in members])
+            deviations = deviations - deviations[0, :]
+            savedict = ncf.standard_var(varname='bio_flux_%s_ensemble' % qual_short)
+            savedict['values'] = deviations.tolist()
+            savedict['dims'] = dimdate + dimmembers + dimregs
+            savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
+            savedict['units'] = "mol region-1 s-1"
+            savedict['count'] = next
+            ncf.add_data(savedict)
+            savedict = ncf.standard_var('unknown')
+            savedict['name'] = 'bio_flux_%s_std' % qual_short
+            savedict['long_name'] = 'Biosphere flux standard deviation, %s' % qual_short
+            savedict['values'] = deviations.std(axis=0)
+            savedict['dims'] = dimdate + dimregs
+            savedict['comment'] = "This is the standard deviation on each parameter"
+            savedict['units'] = "mol region-1 s-1"
+            savedict['count'] = next
+            ncf.add_data(savedict)
+            data = statemean * vectorocn # units of mole region-1 s-1
+            savedict = ncf.standard_var(varname='ocn_flux_%s' % qual_short)
+            savedict['values'] = data
+            savedict['dims'] = dimdate + dimregs
+            savedict['count'] = next
+            ncf.add_data(savedict)
+#
+# Here comes a special provision for the posterior flux covariances: these are calculated relative to the prior flux covariance to
+# ensure they are indeed smaller due to the data assimilation. If they would be calculated relative to the mean posterior flux, the 
+# uncertainties would shift just because the mean flux had increased or decreased, which is not what we want.
+#
+# The implementation is done by multiplying the ensemble with the vectorocn only, and not with the statemean values
+# which are assumed 1.0 in the prior always.
+#
+            deviations = np.array([mem.param_values * vectorocn for mem in members])
+            deviations = deviations - deviations[0, :]
+            savedict = ncf.standard_var(varname='ocn_flux_%s_ensemble' % qual_short)
+            savedict['values'] = deviations.tolist()
+            savedict['dims'] = dimdate + dimmembers + dimregs
+            savedict['comment'] = "This is the matrix square root, use (M x M^T)/(nmembers-1) to make covariance"
+            savedict['units'] = "mol region-1 s-1"
+            savedict['count'] = next
+            ncf.add_data(savedict)
+            savedict = ncf.standard_var('unknown')
+            savedict['name'] = 'ocn_flux_%s_std' % qual_short
+            savedict['long_name'] = 'Ocean flux standard deviation, %s' % qual_short
+            savedict['values'] = deviations.std(axis=0)
+            savedict['dims'] = dimdate + dimregs
+            savedict['comment'] = "This is the standard deviation on each parameter"
+            savedict['units'] = "mol region-1 s-1"
+            savedict['count'] = next
+            ncf.add_data(savedict)
+        data = vectorfire
+        savedict = ncf.standard_var(varname='fire_flux_imp')
+        savedict['values'] = data
+        savedict['dims'] = dimdate + dimregs
+        savedict['count'] = next
+        ncf.add_data(savedict)
+        data = vectorfossil
+        savedict = ncf.standard_var(varname='fossil_flux_imp')
+        savedict['values'] = data
+        savedict['dims'] = dimdate + dimregs
+        savedict['count'] = next
+        ncf.add_data(savedict)
+        savedict = ncf.standard_var(varname='date')
+        savedict['values'] = ncfdate
+        savedict['dims'] = dimdate
+        savedict['count'] = next
+        ncf.add_data(savedict)
+        sys.stdout.write('.')
+        sys.stdout.flush()
+#
+#   Done, close the new NetCDF file
+#
+    ncf.close()
+#
+#   Return the full name of the NetCDF file so it can be processed by the next routine
+#
+    logging.info("Vector weekly average fluxes now written")
+    return saveas
+def save_weekly_avg_tc_data(dacycle, statevector):
+    """
+        Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the 
+        function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected
+        onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`.
+           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
+           :param statevector: a :class:`~da.baseclasses.statevector.StateVector`
+           :rtype: None
+        This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve 
+        these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete
+        statevector in units of mol/box/s which we then turn into TC fluxes and covariances.
+    """
+#
+    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly'))
+#
+# Some help variables
+#
+    dectime0 = date2num(datetime(2000, 1, 1))
+    dt = dacycle['cyclelength']
+    startdate = dacycle['time.start'] 
+    enddate = dacycle['time.end'] 
+    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
+    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
+    # Write/Create NetCDF output file
+    #
+    saveas = os.path.join(dirname, 'tcfluxes.nc')
+    ncf = io.CT_CDF(saveas, 'write')
+    dimdate = ncf.add_date_dim()
+    dimidateformat = ncf.add_date_dim_format()
+    dimregs = ncf.add_region_dim(type='tc')
+#
+# set title and tell GMT that we are using "pixel registration"
+#
+    setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes')
+    setattr(ncf, 'node_offset', 1)
+    #
+    skip = ncf.has_date(ncfdate)
+    if skip:
+        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
+    else:
+        # Get input data
+        area = globarea()
+        infile = os.path.join(dacycle['dir.analysis'], 'data_state_weekly', 'statefluxes.nc')
+        if not os.path.exists(infile):
+            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
+            return None
+        ncf_in = io.ct_read(infile, 'read')
+        # Transform data one by one
+        # Get the date variable, and find index corresponding to the dacycle date
+        try:
+            dates = ncf_in.variables['date'][:]
+        except KeyError:
+            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
+            logging.error("Please make sure you create gridded fluxes before making TC fluxes ")
+            raise KeyError
+        try:
+            index = dates.tolist().index(ncfdate)
+        except ValueError:
+            logging.error("The requested cycle date is not yet available in file %s " % infile)
+            logging.error("Please make sure you create state based fluxes before making TC fluxes")
+            raise ValueError
+        # First add the date for this cycle to the file, this grows the unlimited dimension
+        savedict = ncf.standard_var(varname='date')
+        savedict['values'] = ncfdate
+        savedict['dims'] = dimdate
+        savedict['count'] = index
+        ncf.add_data(savedict)
+        # Now convert other variables that were inside the flux_1x1 file
+        vardict = ncf_in.variables
+        for vname, vprop in vardict.iteritems():
+            data = ncf_in.get_variable(vname)[index]
+            if vname in ['latitude','longitude', 'date', 'idate'] or 'std' in vname:
+                continue
+            elif 'ensemble' in vname:
+                tcdata = []
+                for member in data:
+                    tcdata.append(statevector.vector2tc(vectordata=member))
+                tcdata = np.array(tcdata)
+                try:
+                    cov = tcdata.transpose().dot(tcdata) / (statevector.nmembers - 1) 
+                except:
+                    cov = np.dot(tcdata.transpose(), tcdata) / (statevector.nmembers - 1) # Huygens fix 
+                #print vname,cov.sum()
+                tcdata = cov
+                savedict = ncf.standard_var(varname=vname.replace('ensemble', 'cov'))
+                savedict['units'] = '[mol/region/s]**2'
+                savedict['dims'] = dimdate + dimregs + dimregs
+            else:
+                tcdata = statevector.vector2tc(vectordata=data) # vector to TC
+                savedict = ncf.standard_var(varname=vname)
+                savedict['dims'] = dimdate + dimregs
+                savedict['units'] = 'mol/region/s'
+            savedict['count'] = index
+            savedict['values'] = tcdata
+            ncf.add_data(savedict)
+        ncf_in.close()
+    ncf.close()
+    logging.info("TransCom weekly average fluxes now written")
+    return saveas
+def save_weekly_avg_ext_tc_data(dacycle):
+    """ Function SaveTCDataExt saves surface flux data to NetCDF files for extended TransCom regions
+        *** Inputs ***
+        rundat : a RunInfo object
+        *** Outputs ***
+        NetCDF file containing n-hourly global surface fluxes per TransCom region
+        *** Example ***
+        ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """
+#
+    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_tc_weekly'))
+#
+# Some help variables
+#
+    dectime0 = date2num(datetime(2000, 1, 1))
+    dt = dacycle['cyclelength']
+    startdate = dacycle['time.start'] 
+    enddate = dacycle['time.end'] 
+    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
+    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
+    # Write/Create NetCDF output file
+    #
+    saveas = os.path.join(dirname, 'tc_extfluxes.nc')
+    ncf = io.CT_CDF(saveas, 'write')
+    dimdate = ncf.add_date_dim()
+    dimidateformat = ncf.add_date_dim_format()
+    dimregs = ncf.add_region_dim(type='tc_ext')
+#
+# set title and tell GMT that we are using "pixel registration"
+#
+    setattr(ncf, 'Title', 'CarbonTracker TransCom fluxes')
+    setattr(ncf, 'node_offset', 1)
+    #
+    skip = ncf.has_date(ncfdate)
+    if skip:
+        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
+    else:
+        infile = os.path.join(dacycle['dir.analysis'], 'data_tc_weekly', 'tcfluxes.nc')
+        if not os.path.exists(infile):
+            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
+            return None
+        ncf_in = io.ct_read(infile, 'read')
+        # Transform data one by one
+        # Get the date variable, and find index corresponding to the dacycle date
+        try:
+            dates = ncf_in.variables['date'][:]
+        except KeyError:
+            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
+            logging.error("Please make sure you create gridded fluxes before making extended TC fluxes")
+            raise KeyError
+        try:
+            index = dates.tolist().index(ncfdate)
+        except ValueError:
+            logging.error("The requested cycle date is not yet available in file %s " % infile)
+            logging.error("Please make sure you create state based fluxes before making extended TC fluxes ")
+            raise ValueError
+        # First add the date for this cycle to the file, this grows the unlimited dimension
+        savedict = ncf.standard_var(varname='date')
+        savedict['values'] = ncfdate
+        savedict['dims'] = dimdate
+        savedict['count'] = index
+        ncf.add_data(savedict)
+        # Now convert other variables that were inside the tcfluxes.nc file
+        vardict = ncf_in.variables
+        for vname, vprop in vardict.iteritems():
+            data = ncf_in.get_variable(vname)[index]
+            if vname == 'latitude': continue
+            elif vname == 'longitude': continue
+            elif vname == 'date': continue
+            elif vname == 'idate': continue
+            elif 'cov' in vname:
+                tcdata = ExtendedTCRegions(data, cov=True)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['units'] = '[mol/region/s]**2'
+                savedict['dims'] = dimdate + dimregs + dimregs
+            else:
+                tcdata = ExtendedTCRegions(data, cov=False)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['dims'] = dimdate + dimregs
+                savedict['units'] = 'mol/region/s'
+            savedict['count'] = index
+            savedict['values'] = tcdata
+            ncf.add_data(savedict)
+        ncf_in.close()
+    ncf.close()
+    logging.info("TransCom weekly average extended fluxes now written")
+    return saveas
+def save_weekly_avg_agg_data(dacycle, region_aggregate='olson'):
+    """
+        Function creates a NetCDF file with output on TransCom regions. It uses the flux input from the 
+        function `save_weekly_avg_1x1_data` to create fluxes of length `nparameters`, which are then projected
+        onto TC regions using the internal methods from :class:`~da.baseclasses.statevector.StateVector`.
+           :param dacycle: a :class:`~da.tools.initexit.CycleControl` object
+           :param StateVector: a :class:`~da.baseclasses.statevector.StateVector`
+           :rtype: None
+        This function only read the prior fluxes from the flux_1x1.nc files created before, because we want to convolve 
+        these with the parameters in the statevector. This creates posterior fluxes, and the posterior covariance for the complete
+        statevector in units of mol/box/s which we then turn into TC fluxes and covariances.
+    """
+#
+    dirname = create_dirs(os.path.join(dacycle['dir.analysis'], 'data_%s_weekly' % region_aggregate))
+#
+# Some help variables
+#
+    dectime0 = date2num(datetime(2000, 1, 1))
+    dt = dacycle['cyclelength']
+    startdate = dacycle['time.start'] 
+    enddate = dacycle['time.end'] 
+    ncfdate = date2num(startdate) - dectime0 + dt.days / 2.0
+    logging.debug("DA Cycle start date is %s" % startdate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("DA Cycle end   date is %s" % enddate.strftime('%Y-%m-%d %H:%M'))
+    logging.debug("Aggregating 1x1 fluxes to %s totals" % region_aggregate)
+    # Write/Create NetCDF output file
+    #
+    saveas = os.path.join(dirname, '%s_fluxes.%s.nc' % (region_aggregate, startdate.strftime('%Y-%m-%d')))
+    ncf = io.CT_CDF(saveas, 'write')
+    dimdate = ncf.add_date_dim()
+    dimidateformat = ncf.add_date_dim_format()
+    dimgrid = ncf.add_latlon_dim()  # for mask
+#
+#   Select regions to aggregate to
+# 
+    if region_aggregate == "olson":
+        regionmask = tc.olson240mask
+        dimname = 'olson'
+        dimregs = ncf.add_dim(dimname, regionmask.max())
+        regionnames = []
+        for i in range(11):
+            for j in range(19):
+                regionnames.append("%s_%s" % (tc.transnams[i], tc.olsonnams[j],))
+        regionnames.extend(tc.oifnams)
+        xform = False
+        for i, name in enumerate(regionnames):
+            lab = 'Aggregate_Region_%03d' % (i + 1,)
+            setattr(ncf, lab, name)
+    elif region_aggregate == "olson_extended":
+        regionmask = tc.olson_ext_mask
+        dimname = 'olson_ext'
+        dimregs = ncf.add_dim(dimname, regionmask.max())
+        xform = False
+        for i, name in enumerate(tc.olsonextnams):
+            lab = 'Aggreate_Region_%03d'%(i+1)
+            setattr(ncf, lab, name)
+    elif region_aggregate == "transcom":
+        regionmask = tc.transcommask
+        dimname = 'tc'
+        dimregs = ncf.add_region_dim(type='tc')
+        xform = False
+    elif region_aggregate == "transcom_extended":
+        regionmask = tc.transcommask
+        dimname = 'tc_ext'
+        dimregs = ncf.add_region_dim(type='tc_ext')
+        xform = True
+    elif region_aggregate == "amazon":
+        regfile = cdf.Dataset(os.path.join(analysisdir,'amazon_mask.nc'))
+        regionmask = regfile.variables['regionmask'][:]
+        regfile.close()
+        dimname = 'amazon'
+        dimregs = ncf.add_dim(dimname, regionmask.max())
+        xform = False
+    elif region_aggregate == "country":
+        xform = False
+        countrydict = ct.get_countrydict()
+        selected = ['Russia', 'Canada', 'China', 'United States', 'EU27', 'Brazil', 'Australia', 'India'] #,'G8','UNFCCC_annex1','UNFCCC_annex2']
+        regionmask = np.zeros((180, 360,), 'float')
+        for i, name in enumerate(selected):
+            lab = 'Country_%03d' % (i + 1,)
+            setattr(ncf, lab, name)
+            if name == 'EU27':
+                namelist = ct.EU27
+            elif name == 'EU25':
+                namelist = ct.EU25
+            elif name == 'G8':
+                namelist = ct.G8
+            elif name == 'UNFCCC_annex1':
+                namelist = ct.annex1
+            elif name == 'UNFCCC_annex2':
+                namelist = ct.annex2
+            else:
+                namelist = [name]
+            for countryname in namelist:
+                try:
+                    country = countrydict[countryname]
+                    regionmask.put(country.gridnr, i + 1)
+                except:
+                    continue
+        dimname = 'country'
+        dimregs = ncf.add_dim(dimname, regionmask.max())
+    #
+    skip = ncf.has_date(ncfdate)
+    if skip:
+        logging.warning('Skipping writing of data for date %s : already present in file %s' % (startdate.strftime('%Y-%m-%d'), saveas))
+    else:
+        #
+        # set title and tell GMT that we are using "pixel registration"
+        #
+        setattr(ncf, 'Title', 'CTDAS Aggregated fluxes')
+        setattr(ncf, 'node_offset', 1)
+        savedict = ncf.standard_var('unknown')
+        savedict['name'] = 'regionmask'
+        savedict['comment'] = 'numerical mask used to aggregate 1x1 flux fields, each integer 0,...,N is one region aggregated'
+        savedict['values'] = regionmask.tolist()
+        savedict['units'] = '-'
+        savedict['dims'] = dimgrid
+        savedict['count'] = 0
+        ncf.add_data(savedict)
+        # Get input data from 1x1 degree flux files
+        area = globarea()
+        infile = os.path.join(dacycle['dir.analysis'], 'data_flux1x1_weekly', 'flux_1x1.%s.nc' % startdate.strftime('%Y-%m-%d'))
+        if not os.path.exists(infile):
+            logging.error("Needed input file (%s) does not exist yet, please create file first, returning..." % infile)
+            return None
+        ncf_in = io.ct_read(infile, 'read')
+        # Transform data one by one
+        # Get the date variable, and find index corresponding to the dacycle date
+        try:
+            dates = ncf_in.variables['date'][:]
+        except KeyError:
+            logging.error("The variable date cannot be found in the requested input file (%s) " % infile)
+            logging.error("Please make sure you create gridded fluxes before making TC fluxes ")
+            raise KeyError
+        try:
+            index = dates.tolist().index(ncfdate)
+        except ValueError:
+            logging.error("The requested cycle date is not yet available in file %s " % infile)
+            logging.error("Please make sure you create state based fluxes before making TC fluxes ")
+            raise ValueError
+        # First add the date for this cycle to the file, this grows the unlimited dimension
+        savedict = ncf.standard_var(varname='date')
+        savedict['values'] = ncfdate
+        savedict['dims'] = dimdate
+        savedict['count'] = index
+        ncf.add_data(savedict)
+        # Now convert other variables that were inside the statevector file
+        vardict = ncf_in.variables
+        for vname, vprop in vardict.iteritems():
+            if vname == 'latitude': continue
+            elif vname == 'longitude': continue
+            elif vname == 'date': continue
+            elif vname == 'idate': continue
+            elif 'std' in vname: continue
+            elif 'ensemble' in vname:
+                data = ncf_in.get_variable(vname)[index]
+                dimensemble = ncf.add_dim('members', data.shape[0])
+                regiondata = []
+                for member in data:
+                    aggdata = state_to_grid(member * area, regionmask, reverse=True, mapname=region_aggregate)
+                    regiondata.append(aggdata)
+                regiondata = np.array(regiondata)
+                try:
+                    regioncov = regiondata.transpose().dot(regiondata) / (data.shape[0] - 1) 
+                except:
+                    regioncov = np.dot(regiondata.transpose(), regiondata) / (data.shape[0] - 1) # Huygens fix 
+                if xform:
+                    regiondata = ExtendedTCRegions(regiondata,cov=False)
+                    regioncov = ExtendedTCRegions(regioncov,cov=True)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['name'] = vname.replace('ensemble','covariance') 
+                savedict['units'] = '[mol/region/s]^2'
+                savedict['dims'] = dimdate + dimregs + dimregs
+                savedict['count'] = index
+                savedict['values'] = regioncov
+                ncf.add_data(savedict)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['name'] = vname 
+                savedict['units'] = 'mol/region/s'
+                savedict['dims'] = dimdate + dimensemble + dimregs
+            elif 'flux' in vname:
+                data = ncf_in.get_variable(vname)[index]
+                regiondata = state_to_grid(data * area, regionmask, reverse=True, mapname=region_aggregate)
+                if xform:
+                    regiondata = ExtendedTCRegions(regiondata)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['dims'] = dimdate + dimregs
+                savedict['units'] = 'mol/region/s'
+            else:
+                data = ncf_in.get_variable(vname)[:]
+                regiondata = state_to_grid(data, regionmask, reverse=True, mapname=region_aggregate)
+                if xform:
+                    regiondata = ExtendedTCRegions(regiondata)
+                savedict = ncf.standard_var(varname=vname)
+                savedict['dims'] = dimdate + dimregs
+            savedict['count'] = index
+            savedict['values'] = regiondata
+            ncf.add_data(savedict)
+        ncf_in.close()
+    ncf.close()
+    logging.info("%s aggregated weekly average fluxes now written" % dimname)
+    return saveas
+def save_time_avg_data(dacycle, infile, avg='monthly'):
+    """ Function saves time mean surface flux data to NetCDF files
+        *** Inputs ***
+        rundat : a RunInfo object
+        *** Outputs ***
+        daily NetCDF file containing 1-hourly global surface fluxes at 1x1 degree
+        *** Example ***
+        ./expand_savestate project=enkf_release sd=20000101 ed=20010101 """
+    if 'weekly' in infile:
+        intime = 'weekly'
+    if 'monthly' in infile:
+        intime = 'monthly'
+    if 'yearly' in infile:
+        intime = 'yearly'
+    dirname, filename = os.path.split(infile)
+    outdir = create_dirs(os.path.join(dacycle['dir.analysis'], dirname.replace(intime, avg)))
+    dectime0 = date2num(datetime(2000, 1, 1))
+# Create NetCDF output file
+#
+    saveas = os.path.join(outdir, filename)
+    ncf = io.CT_CDF(saveas, 'create')
+    dimdate = ncf.add_date_dim()
+#
+# Open input file specified from the command line
+#
+    if not os.path.exists(infile):
+        logging.error("Needed input file (%s) not found. Please create this first:" % infile)
+        logging.error("returning...")
+        return None 
+    else:
+        pass
+    file = io.ct_read(infile, 'read')
+    datasets = file.variables.keys()
+    date = file.get_variable('date')
+    globatts = file.ncattrs()
+    for att in globatts:
+        attval = file.getncattr(att)
+        if not att in ncf.ncattrs():
+            ncf.setncattr(att, attval)
+    time = [datetime(2000, 1, 1) + timedelta(days=d) for d in date]
+# loop over datasets in infile, skip idate and date as we will make new time axis for the averaged data
+    for sds in ['date'] + datasets:
+# get original data
+        data = file.get_variable(sds)
+        varatts = file.variables[sds].ncattrs()
+        vardims = file.variables[sds].dimensions
+#
+# Depending on dims of input dataset, create dims for output dataset. Note that we add the new dimdate now.
+#
+        for d in vardims:
+            if 'date' in d:
+                continue
+            if d in ncf.dimensions.keys():
+                pass
+            else:
+                dim = ncf.createDimension(d, size=len(file.dimensions[d]))
+        savedict = ncf.standard_var(sds)
+        savedict['name'] = sds
+        savedict['dims'] = vardims
+        savedict['units'] = file.variables[sds].units
+        savedict['long_name'] = file.variables[sds].long_name
+        savedict['comment'] = file.variables[sds].comment
+        savedict['standard_name'] = file.variables[sds].standard_name
+        savedict['count'] = 0
+        if not 'date' in vardims:
+            savedict['values'] = data
+            ncf.add_data(savedict)
+        else:
+            if avg == 'monthly':
+                time_avg, data_avg = timetools.monthly_avg(time, data)
+            elif avg == 'seasonal':
+                time_avg, data_avg = timetools.season_avg(time, data)
+            elif avg == 'yearly':
+                time_avg, data_avg = timetools.yearly_avg(time, data)
+            elif avg == 'longterm':
+                time_avg, data_avg = timetools.longterm_avg(time, data)
+                time_avg = [time_avg]
+                data_avg = [data_avg]
+            else:
+                raise ValueError, 'Averaging (%s) does not exist' % avg
+            count = -1
+            for dd, data in zip(time_avg, data_avg):
+                count = count + 1
+                if sds == 'date':
+                    savedict['values'] = date2num(dd) - dectime0
+                else:
+                    savedict['values'] = data
+                savedict['count'] = count
+                ncf.add_data(savedict, silent=True)
+                sys.stdout.write('.')
+            sys.stdout.write('\n')
+            sys.stdout.flush()
+# end NetCDF file access
+    file.close()
+    ncf.close()
+    logging.info("------------------- Finished time averaging---------------------------------")
+    return saveas
+if __name__ == "__main__":
+    from da.tools.initexit import CycleControl
+    from da.carbondioxide.dasystem import CO2DaSystem 
+    from da.carbondioxide.statevector import CO2StateVector 
+    sys.path.append('../../')
+    logging.root.setLevel(logging.DEBUG)
+    dacycle = CycleControl(args={'rc':'../../ctdas-od-gfed2-glb6x4-obspack-full.rc'})
+    dasystem = CO2DaSystem('../rc/carbontracker_ct09_opfnew.rc')
+    dacycle.dasystem = dasystem
+    dacycle.setup()
+    dacycle.parse_times()
+    statevector = CO2StateVector()
+    statevector.setup(dacycle)
+    while dacycle['time.start'] < dacycle['time.finish']:
+        save_weekly_avg_1x1_data(dacycle, statevector)
+        save_weekly_avg_state_data(dacycle, statevector)
+        save_weekly_avg_tc_data(dacycle, statevector)
+        save_weekly_avg_ext_tc_data(dacycle)
+        save_weekly_avg_agg_data(dacycle, region_aggregate='olson')
+        save_weekly_avg_agg_data(dacycle, region_aggregate='olson_extended')
+        save_weekly_avg_agg_data(dacycle, region_aggregate='transcom')
+        save_weekly_avg_agg_data(dacycle, region_aggregate='transcom_extended')
+        save_weekly_avg_agg_data(dacycle, region_aggregate='country')
+        save_weekly_avg_agg_data(dacycle, region_aggregate='amazon')
+        dacycle.advance_cycle_times()
+    statevector = None # free memory
+    sys.exit(0)
--- a/da/analysis/merge_ctdas_runs.py
+++ b/da/analysis/merge_ctdas_runs.py
@@ -28,7 +28,7 @@ import datetime as dt
 import os
 import sys
 import shutil
-import time_avg_fluxes as tma
+from . import time_avg_fluxes as tma
 basedir = '/Storage/CO2/ingrid/'
 basedir2 = '/Storage/CO2/peters/'
@@ -60,12 +60,12 @@ if __name__ == "__main__":
            os.makedirs(os.path.join(targetdir,'analysis','data_%s_weekly'%nam) )
    timedirs=[]
-    for ss,vv in sources.iteritems():
+    for ss,vv in sources.items():
        sds,eds = ss.split(' through ')
        sd = dt.datetime.strptime(sds,'%Y-%m-%d')
        ed = dt.datetime.strptime(eds,'%Y-%m-%d')
        timedirs.append([sd,ed,vv])
-        print sd,ed, vv
+        print(sd,ed, vv)
    while dacycle['time.start'] < dacycle['time.end']:

--- a/da/analysis/merge_ctdas_runs.py.bak
+++ b/da/analysis/merge_ctdas_runs.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+# merge_ctdas_runs.py
+"""
+Author : peters 
+Revision History:
+File created on 14 Jul 2014.
+This scrip merges the analysis directory from multiple projects into one new folder.
+It steps over existing analysis output files from weekly means, and then averages these to daily/monthy/yearly values.
+"""
+import datetime as dt
+import os
+import sys
+import shutil
+import time_avg_fluxes as tma
+basedir = '/Storage/CO2/ingrid/'
+basedir2 = '/Storage/CO2/peters/'
+targetproject = 'geocarbon-ei-sibcasa-gfed4-zoom-gridded-combined-convec-20011230-20130101'
+targetdir = os.path.join(basedir2,targetproject)
+sources = {
+            '2000-01-01 through 2011-12-31': os.path.join(basedir,'carbontracker','geocarbon-ei-sibcasa-gfed4-zoom-gridded-convec-combined'),
+            '2012-01-01 through 2012-12-31': os.path.join(basedir2,'geocarbon-ei-sibcasa-gfed4-zoom-gridded-convec-20111231-20140101'),
+            }
+dirs = ['flux1x1','transcom','country','olson']
+dacycle = {}
+dacycle['time.start'] = dt.datetime(2000,12,30)
+dacycle['time.end'] = dt.datetime(2013,1,1)
+dacycle['cyclelength'] = dt.timedelta(days=7)
+dacycle['dir.analysis'] = os.path.join(targetdir,'analysis')
+if __name__ == "__main__":
+    if not os.path.exists(targetdir):
+        os.makedirs(targetdir)
+    if not os.path.exists(os.path.join(targetdir,'analysis')):
+        os.makedirs(os.path.join(targetdir,'analysis') )
+    for nam in dirs:
+        if not os.path.exists(os.path.join(targetdir,'analysis','data_%s_weekly'%nam)):
+            os.makedirs(os.path.join(targetdir,'analysis','data_%s_weekly'%nam) )
+    timedirs=[]
+    for ss,vv in sources.iteritems():
+        sds,eds = ss.split(' through ')
+        sd = dt.datetime.strptime(sds,'%Y-%m-%d')
+        ed = dt.datetime.strptime(eds,'%Y-%m-%d')
+        timedirs.append([sd,ed,vv])
+        print sd,ed, vv
+    while dacycle['time.start'] < dacycle['time.end']:
+        # copy the weekly flux1x1 file from the original dir to the new project dir
+        for td in timedirs:
+            if dacycle['time.start'] >= td[0] and dacycle['time.start'] <= td[1]:
+                indir=td[2]
+        # Now time avg new fluxes
+        infile = os.path.join(indir,'analysis','data_flux1x1_weekly','flux_1x1.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) 
+        #print os.path.exists(infile),infile
+        shutil.copy(infile,infile.replace(indir,targetdir) )
+        tma.time_avg(dacycle,avg='flux1x1')
+        infile = os.path.join(indir,'analysis','data_transcom_weekly','transcom_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) 
+        #print os.path.exists(infile),infile
+        shutil.copy(infile,infile.replace(indir,targetdir) )
+        tma.time_avg(dacycle,avg='transcom')
+        infile = os.path.join(indir,'analysis','data_olson_weekly','olson_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) 
+        #print os.path.exists(infile),infile
+        shutil.copy(infile,infile.replace(indir,targetdir) )
+        tma.time_avg(dacycle,avg='olson')
+        infile = os.path.join(indir,'analysis','data_country_weekly','country_fluxes.%s.nc'%(dacycle['time.start'].strftime('%Y-%m-%d') ) ) 
+        #print os.path.exists(infile),infile
+        shutil.copy(infile,infile.replace(indir,targetdir) )
+        tma.time_avg(dacycle,avg='country')
+        dacycle['time.start'] += dacycle['cyclelength']
--- a/da/analysis/siteseries.py
+++ b/da/analysis/siteseries.py
@@ -40,8 +40,8 @@ import logging
 import copy
 from da.analysis.summarize_obs import nice_lon, nice_lat, nice_alt
 from PIL import Image
-import urllib2
+import urllib.request, urllib.error, urllib.parse
-import StringIO
+import io
 """
    General data needed to set up proper aces inside a figure instance
@@ -336,7 +336,7 @@ def timehistograms_new(fig, infile, option='final'):
        # Get a scaling factor for the x-axis range. Now we will include 5 standard deviations
        sc = res.std()
-        print 'sc',sc
+        print('sc',sc)
        # If there is too little data for a reasonable PDF, skip to the next value in the loop
        if res.shape[0] < 10: continue
@@ -435,12 +435,12 @@ def timehistograms_new(fig, infile, option='final'):
    #fig.text(0.12,0.16,str1,fontsize=0.8*fontsize,color='0.75')
    try:
-        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+        img = urllib.request.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
    except:
        logging.warning("No logo found for this program, continuing...")
        return fig
-    im = Image.open(StringIO.StringIO(img))
+    im = Image.open(io.StringIO(img))
    height = im.size[1]
    width = im.size[0]
@@ -674,12 +674,12 @@ def timevssite_new(fig, infile):
    #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75')
    try:
-        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+        img = urllib.request.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
    except:
        logging.warning("No logo found for this program, continuing...")
        return fig
-    im = Image.open(StringIO.StringIO(img))
+    im = Image.open(io.StringIO(img))
    height = im.size[1]
    width = im.size[0]
@@ -933,12 +933,12 @@ def residuals_new(fig, infile, option):
    #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75')
    try:
-        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+        img = urllib.request.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
    except:
        logging.warning("No logo found for this program, continuing...")
        return fig
-    im = Image.open(StringIO.StringIO(img))
+    im = Image.open(io.StringIO(img))
    height = im.size[1]
    width = im.size[0]

--- a/da/analysis/siteseries.py.bak
+++ b/da/analysis/siteseries.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+# siteseries_c.py
+"""
+Author : peters 
+Revision History:
+File created on 23 Dec 2008.
+"""
+import sys
+sys.path.append('../../')
+import matplotlib
+matplotlib.use('pdf')
+import sys
+import os
+from da.tools.general import create_dirs
+import matplotlib.dates as pltdt
+import matplotlib.pyplot as plt
+from matplotlib.mlab import normpdf
+from matplotlib.font_manager import FontProperties
+import numpy as np
+import datetime as dt
+import da.tools.io4 as io
+import logging
+import copy
+from da.analysis.summarize_obs import nice_lon, nice_lat, nice_alt
+from PIL import Image
+import urllib2
+import StringIO
+"""
+    General data needed to set up proper aces inside a figure instance
+"""
+ts_x1 = 0.125
+ts_x2 = 0.825
+ts_xspan = ts_x2 - ts_x1
+ts_y1 = 0.18
+ts_y2 = 0.72
+ts_yspan = ts_y2 - ts_y1
+markersize = 2
+fontsize = 16 
+small_logos = ['EC','RUG','LBNL-ARM','NIES-MRI','NIWA','ECN']
+""" 
+    Three routines that provide loops around the number of sites. These also create the rundirs and such and make sure the 
+    figures that were created are stamped and saved properly
+"""
+def site_timeseries(analysisdir,option='final'):
+    """***************************************************************************************
+    Call example:
+    "***************************************************************************************"""
+    #
+    # Repeat all options to user
+    #
+    #
+    # Create directories if needed
+    #
+    mrdir = os.path.join(analysisdir, 'timeseries_molefractions')
+    if not os.path.exists(mrdir):
+        create_dirs(mrdir)
+    #
+    # Make a dictionary of available sites and the NetCDF file names associated with them
+    #
+    sitelist = os.listdir(os.path.join(analysisdir, 'data_molefractions'))
+    sitelist = [f for f in sitelist if f.endswith('.nc')]
+    #
+    # Loop over site and sitefiles
+    #
+    for sitefile in sitelist:
+        #
+        # Create filename and extract site codes for the sites that had day/night data separated
+        #
+        if not 'co2_poc' in sitefile: continue
+        filename = os.path.join(analysisdir, 'data_molefractions', sitefile)
+        saveas = os.path.join(mrdir, sitefile[:-3] + '_timeseries')
+        if not os.path.exists(saveas+'.pdf'):
+            logging.debug('Making timeseries figures for %s: ' % sitefile)
+            #
+            # Create a figure instance to hold plot
+            #
+            fig = plt.figure(1, figsize=(15, 7,))#,frameon=False)
+            #
+            # Make plot
+            #
+            fig = timevssite_new(fig, filename)
+            #
+            # Save image
+            #
+            fig.savefig(saveas+'.pdf', dpi=100)
+            fig.savefig(saveas+'.png', dpi=50)
+            fig.savefig(saveas+'.large.png', dpi=150)
+            #
+            plt.close(fig)
+        #
+        # Residuals next
+        #
+        saveas = os.path.join(mrdir, sitefile[:-3] + '_residuals')
+        if not os.path.exists(saveas+'.pdf'):
+            logging.debug('Making residuals figures for %s: ' % sitefile)
+            #
+            # Create a figure instance to hold plot
+            #
+            fig = plt.figure(1, figsize=(15, 7,))#,frameon=False)
+            #
+            # Make plot
+            #
+            fig = residuals_new(fig, filename, option)
+            #
+            # Save image
+            #
+            fig.savefig(saveas+'.pdf', dpi=100)
+            fig.savefig(saveas+'.png', dpi=50)
+            fig.savefig(saveas+'.large.png', dpi=150)
+            #
+            # next in loop over sites
+            #
+            plt.close(fig)
+        #
+        # histograms next
+        #
+        saveas = os.path.join(mrdir, sitefile[:-3] + '_histograms')
+        if not os.path.exists(saveas+'.pdf'):
+            logging.debug('Making histograms figures for %s: ' % sitefile)
+            #
+            # Create a figure instance to hold plot
+            #
+            fig = plt.figure(1, figsize=(15, 7,))#,frameon=False)
+            #
+            # Make plot
+            #
+            fig = timehistograms_new(fig, filename, option)
+            #
+            # Save image
+            #
+            fig.savefig(saveas+'.pdf', dpi=100)
+            fig.savefig(saveas+'.png', dpi=50)
+            fig.savefig(saveas+'.large.png', dpi=150)
+            #
+            # next in loop over sites
+            #
+            plt.close(fig)
+        #
+        # html table next
+        #
+        saveas = os.path.join(mrdir, sitefile[:-3] + '.html')
+        if not os.path.exists(saveas):
+            logging.debug('Making html info table for %s' % sitefile)
+            f = io.CT_CDF(filename, 'read')
+            with open(saveas, "wt") as fout:
+                with open("sitetable.html", "rt") as fin:
+                    for lineout in fin:
+                        lineout = lineout.replace('site_name',f.site_name)
+                        if 'site_country' in lineout:
+                            if 'site_country' in f.ncattrs():
+                                lineout = lineout.replace('site_country',f.site_country)
+                            else: lineout = lineout.replace('site_country','Multiple')    
+                        if abs(f.site_latitude) > 9999:
+                            lineout = lineout.replace('site_latitude','Variable')
+                            lineout = lineout.replace('site_longitude','Variable')
+                        else:    
+                            lineout = lineout.replace('site_latitude',nice_lat(f.site_latitude,'html'))
+                            lineout = lineout.replace('site_longitude',nice_lon(f.site_longitude,'html'))
+                        if abs(f.site_latitude) > 9999 and not 'shipboard' in sitefile:
+                            lineout = lineout.replace('site_elevation','Variable')
+                            lineout = lineout.replace('intake_height','Variable')
+                        else:    
+                            lineout = lineout.replace('site_elevation',str(f.site_elevation))
+                            lineout = lineout.replace('intake_height',str(f.variables['intake_height'][:].max()))
+                        lineout = lineout.replace('site_map',str(f.dataset_map))
+                        lineout = lineout.replace('lab_1_abbr',f.lab_1_abbr)
+                        lineout = lineout.replace('lab_1_name',f.lab_1_name)
+                        lineout = lineout.replace('lab_1_country',f.lab_1_country)
+                        lineout = lineout.replace('lab_1_provider',f.provider_1_name)
+                        if 'lab_1_url' in lineout:
+                            if 'lab_1_url' in f.ncattrs():
+                                lineout = lineout.replace('lab_1_url',f.lab_1_url)
+                            else: lineout = ''
+                        lineout = lineout.replace('lab_1_logo',f.lab_1_logo)
+                        lineout = lineout.replace('dataset_selection',f.dataset_selection_tag)
+                        lineout = lineout.replace('dataset_project',f.dataset_project)
+                        lineout = lineout.replace('dataset_calibration_scale',f.dataset_calibration_scale)
+                        if f.variables['modeldatamismatch'][:].max() > 0.0009:
+                            lineout = lineout.replace('assimilated','No')
+                        else: lineout = lineout.replace('assimilated','Yes')
+                        fout.write(lineout)
+            f.close()           
+def timehistograms_new(fig, infile, option='final'):
+    """
+    This routine makes two side-by-side histograms representing summer and winter PDFs of the residuals. It uses the special 
+    x-axis and y-axis definitions from above. Note that currently, the PDFs are based on forecast-observed CO2, and not on
+    optimized-observed CO2.
+    """
+    fontsize = 17
+    #
+    # Get data
+    #
+    f = io.CT_CDF(infile, 'read')
+    species = f.dataset_parameter
+    if species == 'co2':
+        molefac=1e6
+        units = '$\mu$mol mol$^{-1}$'
+        species = "CO$_2$"
+    if species == 'co2c13':
+        molefac=1.0
+        units = 'permil'
+        species = "$\delta^{13}$C"
+    date = f.get_variable('time')
+    obs = f.get_variable('value') * molefac
+    mdm = f.get_variable('modeldatamismatch') * molefac
+    hphtr = f.get_variable('totalmolefractionvariance_forecast') * molefac * molefac
+    if option == 'final':
+        simulated = f.get_variable('modelsamplesmean') * molefac
+    if option == 'forecast':
+        simulated = f.get_variable('modelsamplesmean_forecast') * molefac
+    flags = f.get_variable('flag_forecast')
+    mdm_fig = mdm.compress(flags == 0).mean()
+    if 'site_country' in f.ncattrs():
+        longsitestring = f.site_name + ', ' + f.site_country
+    else: longsitestring = f.site_name
+    if abs(f.site_latitude) > 9999:
+        location = 'Variable'
+    else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation)
+    SDSInfo = {}
+    for k in f.ncattrs():
+        SDSInfo[k] = f.getncattr(k)
+    f.close()
+    pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date])
+    sampled = (np.ma.getmaskarray(simulated) == False)
+    if len(sampled.nonzero()[0]) < 2: 
+        logging.warning("Too few simulated values found, continuing...")
+        return fig
+    simulated = simulated.compress(sampled)
+    obs = obs.compress(sampled)
+    pydates = pydates.compress(sampled)
+    mdm = mdm.compress(sampled)
+    hphtr = hphtr.compress(sampled)
+    flags = flags.compress(sampled)
+    residual = simulated - obs
+    if option == 'final':
+        chisquared = (residual ** 2) / mdm 
+    elif option == 'forecast':    
+        chisquared = (residual ** 2) / hphtr
+    rejected = (flags == 2.0)
+    notused = (flags == 99.0)
+    #if notused.all():
+    #    return fig
+    #else:
+    obslabel = 'Residual'
+    sd = pydates[0]
+    ed = pydates[-1]
+    summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ]   # JJAS
+    winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ]  # NDJFMA
+    # Create two side-by-side axes, turn off their frame
+    ax1 = fig.add_axes([0.05, 0.18, 0.4, 0.7])
+    ax2 = fig.add_axes([0.55, 0.18, 0.4, 0.7])
+    # Loop simultaneously over ax1/ax2 and summer/winter values
+    for ax, sel in zip([ax1, ax2], [summer, winter]):
+        if not np.array(sel).any(): continue
+        # Subselect data for winter/summer
+        sel_obs = obs.take(sel)
+        sel_fc = simulated.take(sel)
+        sel_hqhr = hphtr.take(sel)
+        sel_mdm = mdm.take(sel)
+        sel_flags = flags.take(sel)
+        sel_rej = rejected.take(sel)
+        # Calculate residual and chi squared values
+        if SDSInfo['site_code'] == 'POC': 
+            sel_obs = sel_obs.compress(sel_fc > -9000) 
+            sel_mdm = sel_mdm.compress(sel_fc > -9000) 
+            sel_flags = sel_flags.compress(sel_fc > -9000) 
+            sel_fc = sel_fc.compress(sel_fc > -9000) 
+        #res = sel_fc - sel_obs
+        if option == 'final':
+            if mdm.mean() < 900:
+                res = sel_fc.compress(sel_flags == 0) - sel_obs.compress(sel_flags == 0)
+                chi = res / np.sqrt(sel_mdm.compress(sel_flags == 0))
+                nr_obs = sel_obs.compress(sel_flags == 0).shape[0]
+            else:
+                res = sel_fc.compress(sel_flags != 2) - sel_obs.compress(sel_flags != 2)
+                chi = res / np.sqrt(sel_mdm.compress(sel_flags != 2))
+                nr_obs = sel_obs.compress(sel_flags != 2).shape[0]
+        elif option == 'forecast':    
+            res = sel_fc - sel_obs
+            chi = res / np.sqrt(sel_hqhr)
+        # Get a scaling factor for the x-axis range. Now we will include 5 standard deviations
+        sc = res.std()
+        print 'sc',sc
+        # If there is too little data for a reasonable PDF, skip to the next value in the loop
+        if res.shape[0] < 10: continue
+        # make a histogram plot of the residuals with a minimum of 10 bins, and maximum of N/10 bins, normalize the PDF to an area of 1.0
+        n, bins, patches = ax.hist(res, max(res.shape[0] / 10, 10), normed=1)
+        # Change the colors on the bars
+        p = plt.setp(patches, 'facecolor', 'tan', 'edgecolor', 'tan', label='None')
+        # Create two normal distributions for the line plots over the interval of the x-axis
+        bins = np.arange(-5 * sc, 5 * sc, 0.1)
+        n = normpdf(bins, res.mean(), res.std())   
+        l = ax.plot(bins, n, 'b-', linewidth=2) # plot the PDF of the histogram in blue
+        n = normpdf(bins, 0.0 , sel_mdm[0])
+        l = ax.plot(bins, n, 'g-', linewidth=2) # plot the PDF of the model-data-mismatch in green
+        #
+        # Add a legend, not as a legend object but simply as text labels
+        #
+        if option == 'final':
+            strX = ''
+        elif option == 'forecast':
+            strX = 'Inn. '
+        if chi.mean() != chi.mean() or mdm.mean() < 900:
+            labs = [ 
+                '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \
+                'N = %d' % nr_obs, \
+                '%s$\chi^2$= %.2f'%(strX, (chi**2).mean()) 
+                ]
+        else:
+            labs = [ 
+                '%.2f $\pm$ %.2f' % (res.mean(), res.std()) , \
+                'N = %d' % sel_obs.shape[0]
+                ] 
+        # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes
+        for i, l in enumerate(labs):
+            ax.text(0.75, 0.9 - 0.07 * i, l, transform=ax.transAxes, fontsize=fontsize, horizontalalignment='center', color='blue')
+        #
+        # Set Tick Font Size on x and y labels
+        #
+        #dummy = [lab.set_fontsize(20) for lab in ax.get_xticklabels()]
+        #dummy = [lab.set_fontsize(20) for lab in ax.get_yticklabels()]
+        # set limits on x-axis and get limits on y-axis to determine the position of the x-axis labels (offset keyword to make_yaxis)
+        ax.set_xlim(-5 * sc, 5 * sc)
+        ax.spines['left'].set_position(('data', 0))
+        ax.spines['right'].set_color('none')
+        ax.spines['right'].axis.set_ticks([])
+        ax.spines['bottom'].set_position(('data', 0))
+        ax.spines['top'].set_color('none')
+        #ax.spines['left'].set_smart_bounds(True)
+        #ax.spines['bottom'].set_smart_bounds(True)
+        ax.spines['left'].set_linewidth(1.5)
+        ax.spines['bottom'].set_linewidth(1.5)
+        ax.spines['bottom'].set_position(('outward', 10))
+        matplotlib.rcParams.update({'font.size': 18})
+        ax.xaxis.set_ticks_position('bottom')
+        ax.xaxis.labelpad = -5 
+        ax.set_xlabel('[%s]'%units,size=16)
+    #
+    # All custom titles and auxiliary info are placed onto the figure directly (fig.text) in relative coordinates
+    #
+    fig.text(0.5, 0.02, 'Simulated - Observed %s [%s]\nData from %s to %s' %(species,units,pydates[0].strftime('%d-%b-%Y'), pydates[-1].strftime('%d-%b-%Y')), horizontalalignment='center', fontsize=fontsize)
+    fig.text(0.5, 0.35, 'model-data\nmismatch:\n%.2f %s' % (mdm_fig, units), horizontalalignment='center', fontsize=fontsize, color='green')
+    #fig.text(0.5, 0.35, 'model-data\nmismatch:\n%.2f %s' % (sel_mdm.mean(), units), horizontalalignment='center', fontsize=fontsize, color='green')
+    fig.text(0.12, 0.75, 'NH Summer\n(Jun-Sep)', horizontalalignment='center', fontsize=fontsize)
+    fig.text(0.62, 0.75, 'NH Winter\n(Nov-Apr)', horizontalalignment='center', fontsize=fontsize)
+    #
+    # Title
+    #
+    plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 4)
+    #
+    # Add info to plot
+    #
+    font0= FontProperties(size=14,style='italic',weight='bold')
+    txt='CarbonTracker Europe\n $\copyright$ Wageningen University'
+    clr='green'
+    fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr )
+    #now = dt.datetime.today()
+    #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y')
+    #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5')
+    #str1 = 'data provided by %s'%SDSInfo['provider_1_name']
+    #fig.text(0.12,0.16,str1,fontsize=0.8*fontsize,color='0.75')
+    try:
+        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+    except:
+        logging.warning("No logo found for this program, continuing...")
+        return fig
+    im = Image.open(StringIO.StringIO(img))
+    height = im.size[1]
+    width = im.size[0]
+    # We need a float array between 0-1, rather than
+    # a uint8 array between 0-255
+    im = np.array(im).astype(np.float)[::-1, :] / 255
+    # With newer (1.0) versions of matplotlib, you can 
+    # use the "zorder" kwarg to make the image overlay
+    # the plot, rather than hide behind it... (e.g. zorder=10)
+    if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2
+    else: scalingf = 1
+    ax3 = fig.add_axes([0.47-0.05*scalingf, 0.65, 0.15*scalingf, 0.15*scalingf * height / width])
+    ax3.axis('off')
+    ax3.imshow(im, interpolation='None')
+    return fig
+def timevssite_new(fig, infile):
+    fontsize = 17
+    #
+    # Get data
+    #
+    f = io.CT_CDF(infile, 'read')
+    species = f.dataset_parameter
+    if species == 'co2':
+        molefac=1e6
+        units = '$\mu$mol mol$^{-1}$'
+        species = "CO$_2$"
+    if species == 'co2c13':
+        molefac=1.0
+        units = 'permil'
+        species = "$\delta^{13}$C"
+    date = f.get_variable('time')
+    obs = f.get_variable('value') * molefac
+    mdm = f.get_variable('modeldatamismatch') * molefac
+    simulated = f.get_variable('modelsamplesmean') * molefac
+    flags = f.get_variable('flag_forecast')
+    if 'site_country' in f.ncattrs():
+        longsitestring = f.site_name + ', ' + f.site_country
+    else: longsitestring = f.site_name
+    if abs(f.site_latitude) > 9999:
+        location = 'Variable'
+    else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation)
+    SDSInfo = {}
+    for k in f.ncattrs():
+        SDSInfo[k] = f.getncattr(k)
+    f.close()
+    pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date])
+    sampled = (np.ma.getmaskarray(simulated) == False)
+    if len(sampled.nonzero()[0]) < 2: 
+        logging.warning("Too few simulated values found, continuing...")
+        return fig
+    simulated = simulated.compress(sampled)
+    obs = obs.compress(sampled)
+    pydates = pydates.compress(sampled)
+    mdm = mdm.compress(sampled)
+    flags = flags.compress(sampled)
+    residual = simulated - obs
+    assimilated = (flags == 0.0) 
+    rejected = (flags == 2.0) 
+    notused = (flags == 99.0)
+    sd = pydates[0]
+    ed = pydates[-1]
+    ax1 = fig.add_axes([0.1, 0.12, 0.7, 0.75])
+    ax2 = fig.add_axes([0.85, 0.12, 0.12, 0.75])
+    ax1.spines['right'].set_color('none')
+    ax1.spines['top'].set_color('none')
+    ax1.spines['left'].set_linewidth(1.5)
+    ax1.spines['bottom'].set_linewidth(1.5)
+    ax1.spines['left'].set_position(('outward', 10))
+    ax1.spines['bottom'].set_position(('outward', 10))
+    ax2.spines['right'].set_color('none')
+    ax2.spines['top'].set_color('none')
+    ax2.spines['left'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_linewidth(1.5)
+    ax2.spines['left'].set_position(('outward', 10))
+    ax2.spines['bottom'].set_position(('outward', 10))
+    markersize = 8
+    fontsize = 16
+    #
+    # Plot observations
+    #
+    if assimilated.any():
+        p1 = ax1.plot(pydates.compress(assimilated), obs.compress(assimilated), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='k', label='Observed (assimilated)', markersize=markersize)
+    if notused.any():
+        p2 = ax1.plot(pydates.compress(notused), obs.compress(notused), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='tan', label='Observed (not assimilated)', markersize=markersize)
+    #
+    # Add the simulated values
+    #
+    q = ax1.plot(pydates, simulated, marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', \
+           markeredgecolor='lightblue', label='Simulated', markersize=markersize)
+    #
+    # Add the rejected values if available
+    #
+    if rejected.any():
+        r = ax1.plot(pydates.compress(rejected), simulated.compress(rejected), marker='s', markeredgewidth=1, markerfacecolor='r', markeredgecolor='r', linestyle='None', label='Model Rejected (N=%d)' % len(pydates.compress(rejected)), markersize=markersize)
+    #
+    # Set up x axis labels
+    #
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()]
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()]
+    #
+    # Location and format of xticks
+    #
+    ax1.xaxis.set_major_locator(pltdt.MonthLocator([7],bymonthday=7))
+    ax1.xaxis.set_major_formatter(pltdt.DateFormatter('%Y'))
+    #
+    # Legend
+    #
+    leg = ax1.legend(prop=FontProperties(size=(0.75 * fontsize)), borderpad=0.1, loc='upper left')
+    #leg.get_frame().set_visible(False)
+    leg.set_zorder(20)
+    leg.get_frame().set_color('1.0')
+    dummy = [lab.set_fontsize(16) for lab in leg.get_texts()]
+    #
+    # include grid
+    #
+    ax1.grid(True, ls='-', color='0.75', axis='y')
+    ax1.autoscale(enable=True, axis='y', tight=False)
+    #ax1.set_ylim(obs.min()-3*residual.std(),obs.max()+5*residual.std())
+    #ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1)))
+    ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1)))
+    #ax1.set_ylim(360,430) #LUT
+    ym = ax1.get_ylim()
+    ymin=ym[0] ; ymax =ym[1]
+    for yr in range(sd.year,ed.year+1,2):
+        x1=dt.datetime(yr,1,1)
+        x2=dt.datetime(yr+1,1,1)
+        ax1.fill([x1,x2,x2,x1],[ymin,ymin,ymax,ymax],color='0.9',zorder=1)
+    ax1.set_ylim(ymin,ymax)
+    #
+    #
+    # Set Tick Font Size
+    #
+    #matplotlib.rcParams.update({'font.size': 30})
+    ax1.xaxis.set_ticks_position('bottom')
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()]
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()]
+    #xtitle='Time'
+    #ax1.set_xlabel(xtitle, fontsize=fontsize)   # label x axis
+    ax1.set_ylabel(r"%s [%s]"% (species,units), fontsize=fontsize + 5)  # label y-axis 
+    #
+    # Axes 2
+    #
+    if mdm.mean() < 900:
+        residual = residual.compress(flags == 0)
+    else: residual = residual.compress(flags != 2)   
+    if SDSInfo['site_code'] == 'POC': residual = residual.compress(simulated > -9000) 
+    offset = 0.0
+    n, bins, patches = ax2.hist(residual, max(residual.shape[0] / 15, 15), normed=1, orientation='horizontal')
+    p = plt.setp(patches, 'facecolor', 'tan' , 'edgecolor', 'tan', label='None', alpha=0.25)
+    # Create normal distributions for the line plots over the interval of the x-axis
+    sc = residual.std()
+    bins = np.arange(-4 * sc, 4 * sc, 0.1)
+    n = normpdf(bins, residual.mean(), residual.std())   
+    l = ax2.plot(n, bins, linestyle='-', color='lightblue', linewidth=1) # plot the PDF of the histogram in blue
+    dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_xticklabels()]
+    dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_yticklabels()]
+    labs = [ 
+        '%+.2f $\pm$ %.2f\nN=%d' % (residual.mean(), residual.std(), residual.shape[0],) 
+        ]
+    # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes
+    ax2.text(0.6, 0.01 + offset, labs[0], transform=ax2.transAxes, fontsize=1.1 * fontsize, horizontalalignment='center', color='k')
+    offset += -0.05
+    ax2.set_ylim(-6 * sc, 6 * sc)
+    ax2.spines['left'].set_position(('axes', 0.0))
+    ax2.spines['right'].set_color('none')
+    ax2.spines['bottom'].axis.set_ticks([])
+    ax2.spines['bottom'].set_position(('axes', 0.5))
+    ax2.spines['top'].set_color('none')
+    ax2.spines['left'].set_smart_bounds(True)
+    ax2.spines['bottom'].set_smart_bounds(True)
+    ax2.spines['left'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_position(('outward', 10))
+    matplotlib.rcParams.update({'font.size': 18})
+    ax2.yaxis.set_ticks_position('left')
+    ax2.xaxis.set_ticklabels([])
+    #ax2.set_ylabel(r"CO$_2$ [ppm]", fontsize=fontsize)  # label y-axis 
+    #ax2.set_xlabel("frequency", fontsize=fontsize)  # label x-axis 
+    #ax2.grid(True, axis='y')
+    ax2.grid(True, ls='-', color='0.75', axis='y')
+    #
+    # Title
+    #
+    plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 5)
+    #
+    # Add info to plot
+    #
+    font0= FontProperties(size=14,style='italic',weight='bold')
+    txt='CarbonTracker Europe\n $\copyright$ Wageningen University'
+    clr='green'
+    fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr )
+    #now = dt.datetime.today()
+    #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y')
+    #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5')
+    #str1 = 'data provided by %s' % SDSInfo['provider_1_name']
+    #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75')
+    try:
+        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+    except:
+        logging.warning("No logo found for this program, continuing...")
+        return fig
+    im = Image.open(StringIO.StringIO(img))
+    height = im.size[1]
+    width = im.size[0]
+    # We need a float array between 0-1, rather than
+    # a uint8 array between 0-255
+    im = np.array(im).astype(np.float)[::-1, :] / 255
+    # With newer (1.0) versions of matplotlib, you can 
+    # use the "zorder" kwarg to make the image overlay
+    # the plot, rather than hide behind it... (e.g. zorder=10)
+    if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2
+    else: scalingf = 1
+    ax3 = fig.add_axes([0.85-0.15*scalingf, 0.125, 0.15*scalingf, 0.15*scalingf * height / width])
+    ax3.axis('off')
+    ax3.imshow(im, interpolation='None')
+    return fig
+def residuals_new(fig, infile, option):
+    fontsize = 17
+    #
+    # Get data
+    #
+    f = io.CT_CDF(infile, 'read')
+    species = f.dataset_parameter
+    if species == 'co2':
+        molefac=1e6
+        units = '$\mu$mol mol$^{-1}$'
+        species = "CO$_2$"
+    if species == 'co2c13':
+        molefac=1.0
+        units = 'permil'
+        species = "$\delta^{13}$C"
+    date = f.get_variable('time')
+    obs = f.get_variable('value') * molefac
+    mdm = f.get_variable('modeldatamismatch') * molefac
+    if option == 'final':
+        simulated = f.get_variable('modelsamplesmean') * molefac
+    if option == 'forecast':
+        simulated = f.get_variable('modelsamplesmean_forecast') * molefac
+    hphtr = f.get_variable('totalmolefractionvariance_forecast') * molefac * molefac
+    flags = f.get_variable('flag_forecast')
+    if 'site_country' in f.ncattrs():
+        longsitestring = f.site_name + ', ' + f.site_country
+    else: longsitestring = f.site_name
+    if abs(f.site_latitude) > 9999:
+        location = 'Variable'
+    else: location = nice_lat(f.site_latitude,'python') + ', ' + nice_lon(f.site_longitude,'python') + ', ' + nice_alt(f.site_elevation)
+    SDSInfo = {}
+    for k in f.ncattrs():
+        SDSInfo[k] = f.getncattr(k)
+    f.close()
+    pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date])
+    sampled = (np.ma.getmaskarray(simulated) == False)
+    if len(sampled.nonzero()[0]) < 2: 
+        logging.warning("Too few simulated values found, continuing...")
+        return fig
+    simulated = simulated.compress(sampled)
+    obs = obs.compress(sampled)
+    pydates = pydates.compress(sampled)
+    mdm = mdm.compress(sampled)
+    hphtr = hphtr.compress(sampled)
+    flags = flags.compress(sampled)
+    assimilated = (flags == 0.0) 
+    rejected = (flags == 2.0)
+    notused = (flags == 99.0)
+    residual = simulated - obs
+    sd = pydates[0]
+    ed = pydates[-1]
+    ax1 = fig.add_axes([0.1, 0.12, 0.7, 0.75])
+    ax2 = fig.add_axes([0.85, 0.12, 0.12, 0.75])
+    ax1.spines['right'].set_color('none')
+    ax1.spines['top'].set_color('none')
+    ax1.spines['left'].set_linewidth(1.5)
+    ax1.spines['bottom'].set_linewidth(1.5)
+    ax1.spines['left'].set_position(('outward', 10))
+    ax1.spines['bottom'].set_position(('outward', 10))
+    ax2.spines['right'].set_color('none')
+    ax2.spines['top'].set_color('none')
+    ax2.spines['left'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_linewidth(1.5)
+    ax2.spines['left'].set_position(('outward', 10))
+    ax2.spines['bottom'].set_position(('outward', 10))
+    markersize = 8
+    fontsize = 16
+    #
+    # Plot observations
+    #
+    if assimilated.any():
+        p1 = ax1.plot(pydates.compress(assimilated), residual.compress(assimilated), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='k', label='Residual (assimilated)' , markersize=markersize,zorder=9)
+    if notused.any():
+        p2 = ax1.plot(pydates.compress(notused), residual.compress(notused), marker='o', markeredgewidth=1, linestyle='None', markerfacecolor='None', markeredgecolor='tan', label='Residual (not assimilated)', markersize=markersize,zorder=8)
+    #
+    # Add the model-data mismatch
+    #
+    mdm_fill = mdm.compress(assimilated).mean()
+    q = ax1.fill_between(pydates, mdm_fill, -1.0 * mdm_fill, label='model-data mismatch', color='tan', alpha=0.25, zorder=5)
+    #
+    # Add the rejected values if available
+    #
+    if rejected.any():
+       r = ax1.plot(pydates.compress(rejected), residual.compress(rejected), marker='s', markeredgewidth=1, markeredgecolor='red', markerfacecolor='red', linestyle='None', label='Model Rejected (N=%d)' % len(pydates.compress(rejected)), markersize=markersize,zorder=10)
+    #
+    # Axes 2
+    #
+    if option == 'final':
+        if mdm.mean() < 900:
+            residual = simulated.compress(flags == 0) - obs.compress(flags == 0)
+            pydates = pydates.compress(flags == 0)
+            mdm = mdm.compress(flags == 0)
+        else:
+            residual = simulated.compress(flags != 2) - obs.compress(flags != 2)
+            pydates = pydates.compress(flags != 2)
+            mdm = mdm.compress(flags != 2)
+        chisquared = (residual ** 2) / mdm
+    elif option == 'forecast':    
+        chisquared = (residual ** 2) / hphtr
+    offset = 0.0
+    if SDSInfo['site_code'] == 'POC': residual = residual.compress(simulated > -9000) 
+    n, bins, patches = ax2.hist(residual, max(residual.shape[0] / 15, 15), normed=1, orientation='horizontal')
+    p = plt.setp(patches, 'facecolor', 'tan' , 'edgecolor', 'tan', label='None', alpha=0.25)
+    # Create normal distributions for the line plots over the interval of the x-axis
+    sc = residual.std()
+    bins = np.arange(-4 * sc, 4 * sc, 0.1)
+    n = normpdf(bins, residual.mean(), residual.std())   
+    l = ax2.plot(n, bins, linestyle='-', color='lightblue', linewidth=1) # plot the PDF of the histogram in blue
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_xticklabels()]
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax2.get_yticklabels()]
+    if option == 'final':
+        strX = ''
+    elif option == 'forecast':
+        strX = 'Inn. '
+    if chisquared.mean() != chisquared.mean() or mdm.mean() < 900:
+        labs = [ 
+            '%+.2f $\pm$ %.2f\nN=%d\n%s $\chi^2$ = %5.2f'%(residual.mean(), residual.std(), residual.shape[0], strX, chisquared.mean(),) 
+            ]
+    else:        
+        labs = [ 
+            '%+.2f $\pm$ %.2f\nN=%d'%(residual.mean(), residual.std(), residual.shape[0],) 
+            ]
+    # print the above labels onto the figure. Note that I use relative coordinates for their position by specifying the transform=ax.transAxes
+    ax2.text(0.6, 0.01 + offset, labs[0], transform=ax2.transAxes, fontsize=1.1 * fontsize, horizontalalignment='center', color='k')
+    offset += -0.05
+    ax2.set_ylim(-6 * sc, 6 * sc)
+    ax2.spines['left'].set_position(('axes', 0.0))
+    ax2.spines['right'].set_color('none')
+    ax2.spines['bottom'].axis.set_ticks([])
+    ax2.spines['bottom'].set_position(('axes', 0.5))
+    ax2.spines['top'].set_color('none')
+    ax2.spines['left'].set_smart_bounds(True)
+    ax2.spines['bottom'].set_smart_bounds(True)
+    ax2.spines['left'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_linewidth(1.5)
+    ax2.spines['bottom'].set_position(('outward', 10))
+    ax2.yaxis.set_ticks_position('left')
+    ax2.xaxis.set_ticklabels([])
+    #ax2.set_ylabel(r"CO$_2$ [ppm]", fontsize=fontsize)  # label y-axis 
+    #ax2.set_xlabel("frequency", fontsize=fontsize)  # label x-axis 
+    ax2.grid(True, ls='-', color='0.75', axis='y')
+    #
+    # Set up x axis labels
+    #
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()]
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()]
+    #
+    # Location and format of xticks
+    #
+    ax1.xaxis.set_major_locator(pltdt.MonthLocator([7],bymonthday=7))
+    ax1.xaxis.set_major_formatter(pltdt.DateFormatter('%Y'))
+    #
+    # Legend
+    #
+    leg = ax1.legend(prop=FontProperties(size=(0.75 * fontsize)), borderpad=0.1, loc='upper left')
+    #leg.get_frame().set_visible(False)
+    leg.set_zorder(20)
+    leg.get_frame().set_color('1.0')
+    dummy = [lab.set_fontsize(16) for lab in leg.get_texts()]
+    #
+    # include grid
+    #
+    ax1.grid(True, ls='-', color='0.75', axis='y')
+    ax1.set_xlim(pltdt.date2num(dt.datetime(sd.year, 1, 1)), pltdt.date2num(dt.datetime(ed.year + 1, 1, 1)))
+    ax1.set_ylim(-6 * sc, 6 * sc)
+    ym = ax1.get_ylim()
+    ymin=ym[0] ; ymax =ym[1]
+    for yr in range(sd.year,ed.year+1,2):
+        x1=dt.datetime(yr,1,1)
+        x2=dt.datetime(yr+1,1,1)
+        ax1.fill([x1,x2,x2,x1],[ymin,ymin,ymax,ymax],color='0.9',zorder=1)
+    #ax1.set_ylim(ymin,ymax)
+    #
+    #
+    # Set Tick Font Size
+    #
+    matplotlib.rcParams.update({'font.size': 18})
+    ax1.xaxis.set_ticks_position('bottom')
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_xticklabels()]
+    #dummy = [lab.set_fontsize(0.9 * fontsize) for lab in ax1.get_yticklabels()]
+    #xtitle='Time'
+    #ax1.set_xlabel(xtitle, fontsize=fontsize)   # label x axis
+    ax1.set_ylabel(r"%s [%s]"%(species,units), fontsize=fontsize+5)  # label y-axis 
+    #
+    # Title
+    #
+    plt.suptitle('%s [%s]\n%s, %s, %s ' % (longsitestring, location , SDSInfo['dataset_project'], SDSInfo['lab_1_name'], SDSInfo['lab_1_country'],), fontsize=fontsize + 5)
+    #
+    # Add info to plot
+    #
+    font0= FontProperties(size=14,style='italic',weight='bold')
+    txt='CarbonTracker Europe\n $\copyright$ Wageningen University'
+    clr='green'
+    fig.text(0.8,0.01,txt,ha='left',font_properties = font0, color=clr )
+    #now = dt.datetime.today()
+    #str1 = 'CTDAS2012\n' + now.strftime('%d/%m/%y')
+    #fig.text(0.93, 0.95, str1, fontsize=0.75 * fontsize, color='0.5')
+    #str1 = 'data provided by %s' % SDSInfo['provider_1_name']
+    #fig.text(0.12, 0.16, str1, fontsize=0.8 * fontsize, color='0.75')
+    try:
+        img = urllib2.urlopen('http://www.esrl.noaa.gov/gmd/webdata/ccgg/ObsPack/images/logos/'+SDSInfo['lab_1_logo']).read()
+    except:
+        logging.warning("No logo found for this program, continuing...")
+        return fig
+    im = Image.open(StringIO.StringIO(img))
+    height = im.size[1]
+    width = im.size[0]
+    # We need a float array between 0-1, rather than
+    # a uint8 array between 0-255
+    im = np.array(im).astype(np.float)[::-1, :] / 255
+    # With newer (1.0) versions of matplotlib, you can 
+    # use the "zorder" kwarg to make the image overlay
+    # the plot, rather than hide behind it... (e.g. zorder=10)
+    if SDSInfo['lab_1_abbr'] in small_logos: scalingf = 2
+    else: scalingf = 1
+    ax3 = fig.add_axes([0.85-0.15*scalingf, 0.125, 0.15*scalingf, 0.15*scalingf * height / width])
+    ax3.axis('off')
+    ax3.imshow(im, interpolation='None')
+    return fig
+# main body if called as script
+if __name__ == '__main__':    # started as script
+    sys.path.append('../../')
+    logging.root.setLevel(logging.DEBUG)
+    analysisdir = "/Users/ingrid/mnt/promise/CO2/ingrid/carbontracker/cartesius/gcp2-combined/analysis/"
+    site_timeseries(analysisdir,option='final')
+    sys.exit(0)
--- a/da/analysis/summarize_obs.py
+++ b/da/analysis/summarize_obs.py
@@ -97,9 +97,9 @@ def summarize_obs(analysisdir, printfmt='html'):
    infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')]
    if printfmt == 'tex': 
-        print '\\begin{tabular*}{\\textheight}{l l l l r r r r}'
+        print('\\begin{tabular*}{\\textheight}{l l l l r r r r}')
-        print 'Code &  Name & Lat, Lon, Elev & Lab &  N (flagged) & $\\sqrt{R}$  &Inn \\XS &Bias\\\\'
+        print('Code &  Name & Lat, Lon, Elev & Lab &  N (flagged) & $\\sqrt{R}$  &Inn \\XS &Bias\\\\')
-        print '\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] '
+        print('\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] ')
        fmt = '%8s  & ' + ' %55s  & ' + '%20s &' + '%6s &' + ' %4d (%d)  & ' + ' %5.2f  & ' + ' %5.2f & ' + '%+5.2f  \\\\'
    elif printfmt == 'html':
        tablehead = \
@@ -136,7 +136,7 @@ def summarize_obs(analysisdir, printfmt='html'):
                <TD>%s</TD>\n \
               </TR>\n"""
    elif printfmt == 'scr':
-        print 'Code   Site     NObs flagged  R  Inn X2'
+        print('Code   Site     NObs flagged  R  Inn X2')
        fmt = '%8s ' + ' %55s  %s %s' + ' %4d ' + ' %4d ' + ' %5.2f ' + ' %5.2f'
    table = []
@@ -363,15 +363,15 @@ def make_map(analysisdir): #makes a map of amount of assimilated observations pe
            ax.annotate(labs[i],xy=m(172,86-count),xycoords='data',fontweight='bold')
            count = count + 4
-    fig.text(0.15,0.945,u'\u2022',fontsize=35,color='blue')
+    fig.text(0.15,0.945,'\u2022',fontsize=35,color='blue')
    fig.text(0.16,0.95,': N<250',fontsize=24,color='blue')
-    fig.text(0.30,0.94,u'\u2022',fontsize=40,color='green')
+    fig.text(0.30,0.94,'\u2022',fontsize=40,color='green')
    fig.text(0.31,0.95,': N<500',fontsize=24,color='green')
-    fig.text(0.45,0.94,u'\u2022',fontsize=45,color='orange')
+    fig.text(0.45,0.94,'\u2022',fontsize=45,color='orange')
    fig.text(0.46,0.95,': N<750',fontsize=24,color='orange')
-    fig.text(0.60,0.939,u'\u2022',fontsize=50,color='brown')
+    fig.text(0.60,0.939,'\u2022',fontsize=50,color='brown')
    fig.text(0.61,0.95,': N<1000',fontsize=24,color='brown')
-    fig.text(0.75,0.938,u'\u2022',fontsize=55,color='red')
+    fig.text(0.75,0.938,'\u2022',fontsize=55,color='red')
    fig.text(0.765,0.95,': N>1000',fontsize=24,color='red')
    ax.set_title('Assimilated observations',fontsize=24)

--- a/da/analysis/summarize_obs.py.bak
+++ b/da/analysis/summarize_obs.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+import sys
+sys.path.append('../../')
+import os
+import numpy as np
+import string
+import datetime as dt
+import logging
+import re
+import da.tools.io4 as io
+fontsize = 10
+def nice_lat(cls,format='html'):
+    #
+    # Convert latitude from decimal to cardinal
+    #
+    if cls > 0:
+        h = 'N'
+    else:
+        h = 'S'
+    dec, deg = np.math.modf(cls)
+    #return string.strip('%2d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+    if format == 'python':
+        return string.strip('%3d$^\circ$%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+    if format == 'html':
+        return string.strip('%3d&deg%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+def nice_lon(cls,format='html'):
+    #
+    # Convert longitude from decimal to cardinal
+    #
+    if cls > 0:
+        h = 'E'
+    else:
+        h = 'W'
+    dec, deg = np.math.modf(cls)
+    #return string.strip('%3d %2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+    if format == 'python':
+        return string.strip('%3d$^\circ$%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+    if format == 'html':
+        return string.strip('%3d&deg%2d\'%s' % (abs(deg), round(abs(60 * dec), 0), h))
+def nice_alt(cls):
+    #
+    # Reformat elevation or altitude
+    #
+    #return string.strip('%10.1f masl' % round(cls, -1))
+    return string.strip('%i masl' %cls)
+def summarize_obs(analysisdir, printfmt='html'):
+    """***************************************************************************************
+    Call example:
+    python summarize_obs.py 
+    Option printfmt    : [tex,scr,html] print summary table in latex, terminal, or html format 
+    Other options are all those needed to create a dacycle object
+    OR:
+    call directly from a python script as:
+    q=summarize_obs(dacycle,printfmt='html')
+    ***************************************************************************************"""
+    sumdir = os.path.join(analysisdir, 'summary')
+    if not os.path.exists(sumdir):
+        logging.info("Creating new directory " + sumdir)
+        os.makedirs(sumdir)
+    mrdir = os.path.join(analysisdir, 'data_molefractions')
+    if not os.path.exists(mrdir):
+        logging.error("Input directory does not exist (%s), exiting... " % mrdir)
+        return None
+    mrfiles = os.listdir(mrdir)
+    infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')]
+    if printfmt == 'tex': 
+        print '\\begin{tabular*}{\\textheight}{l l l l r r r r}'
+        print 'Code &  Name & Lat, Lon, Elev & Lab &  N (flagged) & $\\sqrt{R}$  &Inn \\XS &Bias\\\\'
+        print '\hline\\\\ \n\multicolumn{8}{ c }{Semi-Continuous Surface Samples}\\\\[3pt] '
+        fmt = '%8s  & ' + ' %55s  & ' + '%20s &' + '%6s &' + ' %4d (%d)  & ' + ' %5.2f  & ' + ' %5.2f & ' + '%+5.2f  \\\\'
+    elif printfmt == 'html':
+        tablehead = \
+              "<TR>\n <TH> Site code </TH> \
+                   <TH> Sampling Type </TH> \
+                   <TH> Lab. </TH> \
+                   <TH> Country </TH> \
+                   <TH> Lat, Lon, Elev. (m ASL) </TH> \
+                   <TH> No. Obs. Available </TH>  \
+                   <TH> No. Obs. Assimilated </TH>  \
+                   <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> &#8730;HPH  (&mu;mol mol<sup>-1</sup>) </TH> \
+                   <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \
+                   <TH> H(x)-y (JJAS) (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> H(x)-y (NDJFMA) (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> Inn. &#935;<sup>2</sup></TH> \
+                   <TH> Site code </TH>\n \
+               </TR>\n"
+        fmt = """<TR> \n \
+                <TD><a href='javascript:LoadCO2Tseries("%s")'>%s </a></TD>\
+                <TD>%s</TD>\
+                <TD>%s</TD>\
+                <TD>%40s</TD>\
+                <TD>%s</TD>\
+                <TD>%d</TD>\
+                <TD>%d</TD>\
+                <TD>%+5.2f</TD>\
+                <TD>%+5.2f</TD>\
+                <TD>%+5.2f&plusmn;%5.2f</TD>\
+                <TD>%+5.2f&plusmn;%5.2f</TD>\
+                <TD>%+5.2f&plusmn;%5.2f</TD>\
+                <TD bgcolor=%s>%+5.2f</TD>\
+                <TD>%s</TD>\n \
+               </TR>\n"""
+    elif printfmt == 'scr':
+        print 'Code   Site     NObs flagged  R  Inn X2'
+        fmt = '%8s ' + ' %55s  %s %s' + ' %4d ' + ' %4d ' + ' %5.2f ' + ' %5.2f'
+    table = []
+    for infile in infiles:
+            #if not 'mlo_surface-insitu' in infile: continue
+            #if not 'poc' in infile: continue
+            logging.debug( infile )
+            f = io.CT_CDF(infile, 'read')
+            date = f.get_variable('time')
+            obs = f.get_variable('value') * 1e6
+            mdm = f.get_variable('modeldatamismatch') * 1e6
+            simulated_fc = f.get_variable('modelsamplesmean_forecast') * 1e6
+            simulated = f.get_variable('modelsamplesmean') * 1e6
+            simulated_std = f.get_variable('modelsamplesstandarddeviation_forecast') * 1e6
+            hphtr = f.get_variable('totalmolefractionvariance_forecast') * 1e6 * 1e6
+            flag = f.get_variable('flag_forecast')
+            obs_avail = len(np.ma.compressed(mdm))
+            pydates = np.array([dt.datetime(1970, 1, 1) + dt.timedelta(seconds=int(d)) for d in date])
+            sampled = (np.ma.getmaskarray(simulated) == False)
+            pydates = pydates.compress(sampled)
+            simulated = simulated.compress(sampled)
+            simulated_fc = simulated_fc.compress(sampled)
+            obs = obs.compress(sampled)
+            mdm = mdm.compress(sampled)
+            hphtr = hphtr.compress(sampled)
+            flag = flag.compress(sampled)
+            if f.site_code.upper() == 'POC': 
+                pydates = pydates.compress(simulated > -9000)
+                simulated_fc = simulated_fc.compress(simulated > -9000)
+                obs = obs.compress(simulated > -9000)
+                mdm = mdm.compress(simulated > -9000)
+                hphtr = hphtr.compress(simulated > -9000)
+                flag = flag.compress(simulated > -9000)
+                simulated = simulated.compress(simulated > -9000)
+            if mdm.mean() > 900:
+                pydates = pydates.compress(flag != 2)
+                simulated_fc = simulated_fc.compress(flag != 2)
+                simulated = simulated.compress(flag != 2)
+                obs = obs.compress(flag != 2)
+                mdm = mdm.compress(flag != 2)
+                hphtr = hphtr.compress(flag != 2)
+                obs_assim = 0
+            else:
+                pydates = pydates.compress(flag == 0)
+                simulated_fc = simulated_fc.compress(flag == 0)
+                simulated = simulated.compress(flag == 0)
+                obs = obs.compress(flag == 0)
+                mdm = mdm.compress(flag == 0)
+                hphtr = hphtr.compress(flag == 0)
+                obs_assim = len(np.ma.compressed(mdm))
+            summer = [i for i, d in enumerate(pydates) if d.month in [6, 7, 8, 9] ]
+            winter = [i for i, d in enumerate(pydates) if d.month in [11, 12, 1, 2, 3, 4] ]
+            diff = ((simulated - obs).mean())
+            diffsummer = ((simulated - obs).take(summer).mean())
+            diffwinter = ((simulated - obs).take(winter).mean())
+            diffstd = ((simulated - obs).std())
+            diffsummerstd = ((simulated - obs).take(summer).std())
+            diffwinterstd = ((simulated - obs).take(winter).std())
+            #chi_summer = ((simulated - obs)**2/mdm).take(summer).mean() 
+            #chi_winter = ((simulated - obs)**2/mdm).take(winter).mean() 
+            #n_summer = simulated.take(summer).shape[0]
+            #n_winter = simulated.take(winter).shape[0]
+            #print 'summer: %0.2f, %0.2f, %0.2f, %i'%(diffsummer,diffsummerstd,chi_summer,n_summer)
+            #print 'winter: %0.2f, %0.2f, %0.2f, %i'%(diffwinter,diffwinterstd,chi_winter,n_winter)
+            chi_sq = -99
+            if mdm.mean() < 900:
+                chi_sq = ((simulated_fc - obs)**2/hphtr).mean() 
+                #chi_sq = ((simulated - obs)**2/mdm).mean() 
+            if mdm.mean() > 900:
+                chi_clr = '#EEEEEE'
+            elif chi_sq > 1.2:
+                chi_clr = '#ff0000'
+            elif chi_sq < 0.5:
+                chi_clr = '#ff7f00'
+            else: chi_clr = '#00cc00'
+            if abs(f.site_latitude) > 9999:
+                location = 'Variable'
+            else:location = nice_lat(f.site_latitude,'html') + ', ' + nice_lon(f.site_longitude,'html') + ', ' + nice_alt(f.site_elevation)
+            if 'site_country' in f.ncattrs():
+                country = f.site_country
+            else: country = 'Multiple'    
+            if printfmt == 'html':
+                ss = (f.dataset_name[4:],
+                    f.site_code.upper(),
+                    f.dataset_project,
+                    f.lab_1_abbr,
+                    country,
+                    location,
+                    obs_avail,
+                    obs_assim,
+                    mdm.mean(),
+                    np.sqrt((simulated_std ** 2).mean()),
+                    diff, diffstd,
+                    diffsummer, diffsummerstd,
+                    diffwinter, diffwinterstd,
+                    chi_clr, chi_sq,
+                    f.site_code.upper())
+            table.append(ss)
+            f.close()
+    if printfmt == 'tex':
+        saveas = os.path.join(sumdir, 'site_table.tex')
+        f = open(saveas, 'w')
+    elif printfmt == 'html':
+        saveas = os.path.join(sumdir, 'site_table.html')
+        f = open(saveas, 'w')
+        txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
+        f.write(txt)
+        txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
+        f.write(txt)
+    f.write(tablehead)
+    for i, ss in enumerate(table):
+        f.write(fmt % ss)
+        if (i + 1) % 15 == 0:
+            f.write(tablehead)
+    if printfmt == 'tex': 
+        f.write('\cline{2-8}\\\\')
+        f.write('\hline \\\\')
+        f.write('\end{tabular*}')
+    else:
+        txt = "\n</table>"
+        f.write(txt)
+    f.close()
+    logging.info("File written with summary: %s" % saveas)
+def make_map(analysisdir): #makes a map of amount of assimilated observations per site
+    import netCDF4 as cdf
+    import matplotlib.pyplot as plt 
+    import matplotlib
+    from maptools import *
+    from matplotlib.font_manager import FontProperties
+    sumdir = os.path.join(analysisdir, 'summary')
+    if not os.path.exists(sumdir):
+        logging.info("Creating new directory " + sumdir)
+        os.makedirs(sumdir)
+    mrdir = os.path.join(analysisdir, 'data_molefractions')
+    if not os.path.exists(mrdir):
+        logging.error("Input directory does not exist (%s), exiting... " % mrdir)
+        return None
+    mrfiles = os.listdir(mrdir)
+    infiles = [os.path.join(mrdir, f) for f in mrfiles if f.endswith('.nc')]
+    lats=[]
+    lons=[]
+    labs=[]
+    nobs=[]
+    for files in infiles:
+        f=cdf.Dataset(files)
+        if f.variables['modeldatamismatch'][:].max() < 0.001:
+            sim = f.variables['modelsamplesmean'][:]
+            flag = f.variables['flag_forecast'][:]
+            sim = sim.compress(flag != 2)
+            sampled = (np.ma.getmaskarray(sim) == False)
+            sim = sim.compress(sampled)
+            lats.append(f.site_latitude)
+            lons.append(f.site_longitude)
+            labs.append(f.site_code)
+            nobs.append(len(sim))
+        f.close()
+    lats = np.array(lats)
+    lons = np.array(lons)
+    labs = np.array(labs)
+    nobs = np.array(nobs)
+    saveas = os.path.join(sumdir, 'networkmap')
+    logging.info("Making map: %s" % saveas)
+    fig = plt.figure(1,figsize=(20,12))
+    ax = fig.add_axes([0.05,0.1,0.9,0.8])
+    #m,nx,ny = select_map('Global Cylinder')
+    m,nx,ny = select_map('Europe Conformal')
+    m.drawcountries()
+    m.drawcoastlines()
+    parallels = arange(-90.,91,30.)
+    m.drawparallels(parallels,color='grey',linewidth=0.5,dashes=[1,0.001],labels=[1,0,0,1],fontsize=16)
+    meridians = arange(-180.,181.,60.)
+    m.drawmeridians(meridians,color='grey',linewidth=0.5,dashes=[1,0.001],labels=[1,0,0,1],fontsize=16)
+    #for lon,lat,name,n in zip(lons,lats,names,nobs):
+    count = 0
+    for i in range(len(lats)):
+        if nobs[i]   < 250: 
+            n = 0
+            c = 'blue'
+        elif nobs[i] < 500:
+            n = 1
+            c = 'green'
+        elif nobs[i] < 750: 
+            n = 2
+            c = 'orange'
+        elif nobs[i] < 1000: 
+            n = 3
+            c = 'brown'
+        else: 
+            n = 4
+            c = 'red'
+        if lons[i] > -900:
+            x,y = m(lons[i],lats[i])
+            ax.plot(x,y,'o',color=c,markersize=12+1.5*n)#,markeredgecolor='k',markeredgewidth=2)
+            #ax.annotate(labs[i],xy=m(lons[i],lats[i]),xycoords='data',fontweight='bold')
+        else:    
+            x,y = m(169,87-count)
+            ax.plot(x,y,'o',color=c,markersize=12+1.5*n)
+            ax.annotate(labs[i],xy=m(172,86-count),xycoords='data',fontweight='bold')
+            count = count + 4
+    fig.text(0.15,0.945,u'\u2022',fontsize=35,color='blue')
+    fig.text(0.16,0.95,': N<250',fontsize=24,color='blue')
+    fig.text(0.30,0.94,u'\u2022',fontsize=40,color='green')
+    fig.text(0.31,0.95,': N<500',fontsize=24,color='green')
+    fig.text(0.45,0.94,u'\u2022',fontsize=45,color='orange')
+    fig.text(0.46,0.95,': N<750',fontsize=24,color='orange')
+    fig.text(0.60,0.939,u'\u2022',fontsize=50,color='brown')
+    fig.text(0.61,0.95,': N<1000',fontsize=24,color='brown')
+    fig.text(0.75,0.938,u'\u2022',fontsize=55,color='red')
+    fig.text(0.765,0.95,': N>1000',fontsize=24,color='red')
+    ax.set_title('Assimilated observations',fontsize=24)
+    font0= FontProperties(size=15,style='italic',weight='bold')
+    txt='CarbonTracker Europe\n $\copyright$ Wageningen University'
+    clr='green'
+    fig.text(0.82,0.01,txt,ha='left',font_properties = font0, color=clr )
+    saveas=os.path.join(sumdir,'networkmap.png')
+    fig.savefig(saveas,dpi=200)
+    saveas=os.path.join(sumdir,'networkmap.large.png')
+    fig.savefig(saveas,dpi=300)
+    close(fig)
+def summarize_stats(dacycle):
+    """
+    Summarize the statistics of the observations for this cycle
+    This includes X2 statistics, RMSD, and others for both forecast and
+    final fluxes
+    """
+    sumdir = os.path.join(dacycle['dir.analysis'], 'summary')
+    if not os.path.exists(sumdir):
+        logging.info("Creating new directory " + sumdir)
+        os.makedirs(sumdir)
+    # get forecast data from optimizer.ddddd.nc
+    startdate = dacycle['time.start'] 
+    dacycle['time.sample.stamp'] = "%s" % (startdate.strftime("%Y%m%d"),)
+    infile = os.path.join(dacycle['dir.output'], 'optimizer.%s.nc' % dacycle['time.sample.stamp'])
+    if not os.path.exists(infile):
+        logging.error("File not found: %s" % infile)
+        raise IOError
+    f = io.CT_CDF(infile, 'read')
+    sites = f.get_variable('sitecode')
+    y0 = f.get_variable('observed') * 1e6
+    hx = f.get_variable('modelsamplesmean_prior') * 1e6
+    dF = f.get_variable('modelsamplesdeviations_prior') * 1e6
+    HPHTR = f.get_variable('totalmolefractionvariance').diagonal() * 1e6 * 1e6
+    R = f.get_variable('modeldatamismatchvariance').diagonal() * 1e6 * 1e6
+    flags = f.get_variable('flag')
+    f.close()
+    HPHT = dF.dot(np.transpose(dF)).diagonal() / (dF.shape[1] - 1.0)
+    rejected = (flags == 2.0)
+    sitecodes = [string.join(s.compressed(), '').strip() for s in sites]
+    # calculate X2 per observation for this time step
+    x2 = []
+    for i, site in enumerate(sitecodes):
+        x2.append((y0[i] - hx[i]) ** 2 / HPHTR[i])
+    x2 = np.ma.masked_where(HPHTR == 0.0, x2)
+    # calculate X2 per site
+    saveas = os.path.join(sumdir, 'x2_table_%s.html' % dacycle['time.sample.stamp'])
+    logging.info("Writing HTML tables for this cycle (%s)" % saveas)
+    f = open(saveas, 'w')
+    txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
+    f.write(txt)
+    txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
+    f.write(txt)
+    tablehead = \
+          "<TR>\n <TH> Site code </TH> \
+               <TH> N<sub>obs</sub> </TH>  \
+               <TH> N<sub>rejected</sub> </TH>  \
+               <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
+               <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
+               <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
+               <TH> X2 </TH> \n \
+           </TR>\n"
+    fmt = """<TR> \n \
+            <TD>%s</TD>\
+            <TD>%d</TD>\
+            <TD>%d</TD>\
+            <TD>%+5.2f</TD>\
+            <TD>%+5.2f</TD>\
+            <TD>%+5.2f&plusmn;%5.2f</TD>\
+            <TD>%5.2f</TD>\n \
+           </TR>\n"""
+    f.write(tablehead)
+    set_sites = set(sitecodes)
+    set_sites = np.sort(list(set_sites))
+    for i, site in enumerate(set_sites):
+        sel = [i for i, s in enumerate(sitecodes) if s == site]
+        ss = (site, len(sel), rejected.take(sel).sum(), np.sqrt(R.take(sel)[0]), np.sqrt(HPHT.take(sel).mean()), (hx - y0).take(sel).mean(), (hx - y0).take(sel).std(), x2.take(sel).mean(),)
+        #print site,sel,x2.take(sel)
+        f.write(fmt % ss)
+        if (i + 1) % 15 == 0:
+            f.write(tablehead)
+    txt = "\n</table>"
+    f.write(txt)
+    f.close()
+    # Now summarize for each site across time steps
+    if not dacycle['time.start'] >= dt.datetime(2008, 12, 29):
+        return
+    logging.info("Writing HTML tables for each site")
+    for site in set_sites:
+        saveas = os.path.join(sumdir, '%s_x2.html' % site)
+        f = open(saveas, 'w')
+        logging.debug(saveas)
+        txt = "<meta http-equiv='content-type' content='text/html;charset=utf-8' />\n"
+        f.write(txt)
+        txt = "<table border=1 cellpadding=2 cellspacing=2 width='100%' bgcolor='#EEEEEE'>\n"
+        f.write(txt)
+        tablehead = \
+              "<TR>\n <TH> From File </TH> \
+                   <TH> Site </TH>  \
+                   <TH> N<sub>obs</sub> </TH>  \
+                   <TH> N<sub>rejected</sub> </TH>  \
+                   <TH> &#8730;R (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> &#8730;HPH<sup>T</sup> (&mu;mol mol<sup>-1</sup>) </TH>  \
+                   <TH> H(x)-y (&mu;mol mol<sup>-1</sup>) </TH> \n \
+                   <TH> X2 </TH> \n \
+               </TR>\n"
+        f.write(tablehead)
+        files = os.listdir(sumdir)
+        x2_files = [fil for fil in files if fil.startswith('x2')]
+        for htmlfile in x2_files:
+            lines = grep(site, os.path.join(sumdir, htmlfile))
+            for line in lines:
+                f.write('<TR>\n')
+                f.write('<TD>' + htmlfile + '</TD>')
+                f.write(line + '\n')
+                f.write('</TR>\n')
+        txt = "\n</table>"
+        f.write(txt)
+        f.close()
+def grep(pattern, fil):
+    fileObj = open(fil, 'r')
+    r = []
+    for line in fileObj:
+        if re.search(pattern, line):
+            r.append(line)
+    return r
+# main body if called as script
+if __name__ == '__main__':    # started as script
+    sys.path.append('../../')
+    logging.root.setLevel(logging.DEBUG)
+    analysisdir = "/Users/ingrid/mnt/promise/CO2/ingrid/carbontracker/cartesius/gcp2-combined/analysis/"
+    summarize_obs(analysisdir)
+    #make_map(analysisdir)
+    sys.exit(0)
--- a/da/analysis/time_avg_fluxes.py
+++ b/da/analysis/time_avg_fluxes.py
@@ -33,12 +33,12 @@ def time_avg(dacycle,avg='transcom'):
    """ Function to create a set of averaged files in a folder, needed to make longer term means """
    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
-        raise IOError,'Choice of averaging invalid'
+        raise IOError('Choice of averaging invalid')
    analysisdir = dacycle['dir.analysis']
    if not os.path.exists(analysisdir):
-        raise IOError,'analysis dir requested (%s) does not exist, exiting...'%analysisdir
+        raise IOError('analysis dir requested (%s) does not exist, exiting...'%analysisdir)
    daily_avg(dacycle,avg)
@@ -68,14 +68,14 @@ def daily_avg(dacycle,avg):
    """ Function to create a set of daily files in a folder, needed to make longer term means """
    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
-        raise IOError,'Choice of averaging invalid'
+        raise IOError('Choice of averaging invalid')
    analysisdir = dacycle['dir.analysis']
    weekdir = os.path.join(analysisdir , 'data_%s_weekly'%avg)
    daydir = os.path.join(analysisdir , 'data_%s_daily'%avg)
    if not os.path.exists(daydir):
-        print "Creating new output directory " + daydir
+        print("Creating new output directory " + daydir)
        os.makedirs(daydir)
    files  = os.listdir(weekdir)
@@ -88,7 +88,7 @@ def daily_avg(dacycle,avg):
    dt = dacycle['cyclelength']
-    for k,v in fileinfo.iteritems():
+    for k,v in fileinfo.items():
        cycle_file = os.path.join(weekdir,k)
        for i in range(abs(dt.days)):
            daily_file = os.path.join(daydir,'%s_fluxes.%s.nc'%(avg,(v+datetime.timedelta(days=i)).strftime('%Y-%m-%d')))
@@ -100,7 +100,7 @@ def monthly_avg(dacycle,avg):
    """ Function to average a set of files in a folder from daily to monthly means """
    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
-        raise IOError,'Choice of averaging invalid'
+        raise IOError('Choice of averaging invalid')
    analysisdir = dacycle['dir.analysis']
@@ -108,7 +108,7 @@ def monthly_avg(dacycle,avg):
    monthdir = os.path.join(analysisdir,'data_%s_monthly'%avg)
    if not os.path.exists(monthdir):
-        print "Creating new output directory " + monthdir
+        print("Creating new output directory " + monthdir)
        os.makedirs(monthdir)
@@ -116,7 +116,7 @@ def monthly_avg(dacycle,avg):
    files = [f for f in files if '-' in f and f.endswith('.nc')]
    if len(files) < 28:
-        print 'No month is yet complete, skipping monthly average'
+        print('No month is yet complete, skipping monthly average')
        return
    fileinfo = {}
@@ -124,8 +124,8 @@ def monthly_avg(dacycle,avg):
        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d')
        fileinfo[filename] = date
-    years = [d.year for d in fileinfo.values()]   # get actual years
+    years = [d.year for d in list(fileinfo.values())]   # get actual years
-    months = set([d.month for d in fileinfo.values()])  # get actual months
+    months = set([d.month for d in list(fileinfo.values())])  # get actual months
    sd = datetime.datetime(min(years),1,1)
    ed = datetime.datetime(max(years)+1,1,1)
@@ -136,7 +136,7 @@ def monthly_avg(dacycle,avg):
        ndays_in_month = (nd-sd).days
-        avg_files = [os.path.join(daydir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd]
+        avg_files = [os.path.join(daydir,k) for k,v in fileinfo.items() if v < nd and v >= sd]
        if len(avg_files) != ndays_in_month: # only once month complete 
            #print 'New month (%02d) is not yet complete, skipping monthly average'%(sd.month)
@@ -144,7 +144,7 @@ def monthly_avg(dacycle,avg):
        else:
            targetfile = os.path.join(monthdir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y-%m')))
            if not os.path.exists(targetfile):
-                print "New month (%02d) is complete, I have %d days for the next file"%(sd.month,ndays_in_month)
+                print("New month (%02d) is complete, I have %d days for the next file"%(sd.month,ndays_in_month))
                command = ['ncra','-O']+ avg_files + [targetfile]
                status = subprocess.check_call(command)
            else:
@@ -156,21 +156,21 @@ def yearly_avg(dacycle,avg):
    """ Function to average a set of files in a folder from monthly to yearly means """
    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
-        raise IOError,'Choice of averaging invalid'
+        raise IOError('Choice of averaging invalid')
    analysisdir = dacycle['dir.analysis']
    monthdir = os.path.join(analysisdir , 'data_%s_monthly'%avg )
    yeardir = os.path.join(analysisdir,'data_%s_yearly'%avg)
    if not os.path.exists(yeardir):
-        print "Creating new output directory " + yeardir
+        print("Creating new output directory " + yeardir)
        os.makedirs(yeardir)
    files  = os.listdir(monthdir)  # get monthly files
    files = [f for f in files if '-' in f and f.endswith('.nc')]
    if not files:
-        print "No full year finished yet, skipping yearly average..."
+        print("No full year finished yet, skipping yearly average...")
        return
    fileinfo = {}
@@ -178,7 +178,7 @@ def yearly_avg(dacycle,avg):
        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m')
        fileinfo[filename] = date
-    years = set([d.year for d in fileinfo.values()])
+    years = set([d.year for d in list(fileinfo.values())])
    sd = datetime.datetime(min(years),1,1)
    ed = datetime.datetime(max(years)+1,1,1)
@@ -187,15 +187,15 @@ def yearly_avg(dacycle,avg):
        nd = sd + relativedelta(years=+1)
-        avg_files = [os.path.join(monthdir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd]
+        avg_files = [os.path.join(monthdir,k) for k,v in fileinfo.items() if v < nd and v >= sd]
        if not len(avg_files) == 12 : 
-            print "Year %04d not finished yet, skipping yearly average..."%sd.year
+            print("Year %04d not finished yet, skipping yearly average..."%sd.year)
        else:
            targetfile = os.path.join(yeardir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y')))
            if not os.path.exists(targetfile):
-                print "Year %04d is complete, I have 12 months for the next file"%sd.year
+                print("Year %04d is complete, I have 12 months for the next file"%sd.year)
                command = ['ncra','-O']+ avg_files + [targetfile]
                status = subprocess.check_call(command)
@@ -205,7 +205,7 @@ def longterm_avg(dacycle,avg):
    """ Function to average a set of files in a folder from monthly to yearly means """
    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
-        raise IOError,'Choice of averaging invalid'
+        raise IOError('Choice of averaging invalid')
    analysisdir = dacycle['dir.analysis']
@@ -213,14 +213,14 @@ def longterm_avg(dacycle,avg):
    longtermdir = os.path.join(analysisdir,'data_%s_longterm'%avg)
    if not os.path.exists(longtermdir):
-        print "Creating new output directory " + longtermdir
+        print("Creating new output directory " + longtermdir)
        os.makedirs(longtermdir)
    files  = os.listdir(yeardir)
    files = [f for f in files if '-' in f and f.endswith('.nc')]
    if not files:
-        print "No full year finished yet, skipping longterm average..."
+        print("No full year finished yet, skipping longterm average...")
        return
    dates = []

--- a/da/analysis/time_avg_fluxes.py.bak
+++ b/da/analysis/time_avg_fluxes.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+# time_avg_fluxes.py
+"""
+Author : peters 
+Revision History:
+File created on 20 Dec 2012.
+"""
+import sys
+sys.path.append('../../')
+import os
+import sys
+import shutil
+from dateutil.relativedelta import relativedelta
+import datetime
+import subprocess
+def time_avg(dacycle,avg='transcom'):
+    """ Function to create a set of averaged files in a folder, needed to make longer term means """
+    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
+        raise IOError,'Choice of averaging invalid'
+    analysisdir = dacycle['dir.analysis']
+    if not os.path.exists(analysisdir):
+        raise IOError,'analysis dir requested (%s) does not exist, exiting...'%analysisdir
+    daily_avg(dacycle,avg)
+    monthly_avg(dacycle,avg)
+    yearly_avg(dacycle,avg)
+    longterm_avg(dacycle,avg)
+def new_month(dacycle):
+    """ check whether we just entered a new month"""
+    this_month = dacycle['time.start'].month
+    prev_month = (dacycle['time.start']-dacycle['cyclelength']).month
+    return (this_month != prev_month)
+def new_year(dacycle):
+    """ check whether we just entered a new year"""
+    this_year = dacycle['time.start'].year
+    prev_year = (dacycle['time.start']-dacycle['cyclelength']).year
+    return (this_year != prev_year)
+def daily_avg(dacycle,avg):
+    """ Function to create a set of daily files in a folder, needed to make longer term means """
+    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
+        raise IOError,'Choice of averaging invalid'
+    analysisdir = dacycle['dir.analysis']
+    weekdir = os.path.join(analysisdir , 'data_%s_weekly'%avg)
+    daydir = os.path.join(analysisdir , 'data_%s_daily'%avg)
+    if not os.path.exists(daydir):
+        print "Creating new output directory " + daydir
+        os.makedirs(daydir)
+    files  = os.listdir(weekdir)
+    files = [f for f in files if '-' in f and f.endswith('.nc')]
+    fileinfo = {}
+    for filename in files:
+        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d')
+        fileinfo[filename] = date
+    dt = dacycle['cyclelength']
+    for k,v in fileinfo.iteritems():
+        cycle_file = os.path.join(weekdir,k)
+        for i in range(abs(dt.days)):
+            daily_file = os.path.join(daydir,'%s_fluxes.%s.nc'%(avg,(v+datetime.timedelta(days=i)).strftime('%Y-%m-%d')))
+            if not os.path.lexists(daily_file):
+                os.symlink(cycle_file,daily_file)
+                #print daily_file,cycle_file
+def monthly_avg(dacycle,avg):
+    """ Function to average a set of files in a folder from daily to monthly means """
+    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
+        raise IOError,'Choice of averaging invalid'
+    analysisdir = dacycle['dir.analysis']
+    daydir = os.path.join(analysisdir , 'data_%s_daily'%avg)
+    monthdir = os.path.join(analysisdir,'data_%s_monthly'%avg)
+    if not os.path.exists(monthdir):
+        print "Creating new output directory " + monthdir
+        os.makedirs(monthdir)
+    files  = os.listdir(daydir)  # get daily files
+    files = [f for f in files if '-' in f and f.endswith('.nc')]
+    if len(files) < 28:
+        print 'No month is yet complete, skipping monthly average'
+        return
+    fileinfo = {}
+    for filename in files:  # parse date from each of them
+        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m-%d')
+        fileinfo[filename] = date
+    years = [d.year for d in fileinfo.values()]   # get actual years
+    months = set([d.month for d in fileinfo.values()])  # get actual months
+    sd = datetime.datetime(min(years),1,1)
+    ed = datetime.datetime(max(years)+1,1,1)
+    while sd < ed: 
+        nd = sd + relativedelta(months=+1)
+        ndays_in_month = (nd-sd).days
+        avg_files = [os.path.join(daydir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd]
+        if len(avg_files) != ndays_in_month: # only once month complete 
+            #print 'New month (%02d) is not yet complete, skipping monthly average'%(sd.month)
+            pass
+        else:
+            targetfile = os.path.join(monthdir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y-%m')))
+            if not os.path.exists(targetfile):
+                print "New month (%02d) is complete, I have %d days for the next file"%(sd.month,ndays_in_month)
+                command = ['ncra','-O']+ avg_files + [targetfile]
+                status = subprocess.check_call(command)
+            else:
+                pass
+        sd = nd
+def yearly_avg(dacycle,avg):
+    """ Function to average a set of files in a folder from monthly to yearly means """
+    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
+        raise IOError,'Choice of averaging invalid'
+    analysisdir = dacycle['dir.analysis']
+    monthdir = os.path.join(analysisdir , 'data_%s_monthly'%avg )
+    yeardir = os.path.join(analysisdir,'data_%s_yearly'%avg)
+    if not os.path.exists(yeardir):
+        print "Creating new output directory " + yeardir
+        os.makedirs(yeardir)
+    files  = os.listdir(monthdir)  # get monthly files
+    files = [f for f in files if '-' in f and f.endswith('.nc')]
+    if not files:
+        print "No full year finished yet, skipping yearly average..."
+        return
+    fileinfo = {}
+    for filename in files:
+        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y-%m')
+        fileinfo[filename] = date
+    years = set([d.year for d in fileinfo.values()])
+    sd = datetime.datetime(min(years),1,1)
+    ed = datetime.datetime(max(years)+1,1,1)
+    while sd < ed: 
+        nd = sd + relativedelta(years=+1)
+        avg_files = [os.path.join(monthdir,k) for k,v in fileinfo.iteritems() if v < nd and v >= sd]
+        if not len(avg_files) == 12 : 
+            print "Year %04d not finished yet, skipping yearly average..."%sd.year
+        else:
+            targetfile = os.path.join(yeardir,'%s_fluxes.%s.nc'%(avg,sd.strftime('%Y')))
+            if not os.path.exists(targetfile):
+                print "Year %04d is complete, I have 12 months for the next file"%sd.year
+                command = ['ncra','-O']+ avg_files + [targetfile]
+                status = subprocess.check_call(command)
+        sd = nd
+def longterm_avg(dacycle,avg):
+    """ Function to average a set of files in a folder from monthly to yearly means """
+    if avg not in ['transcom','transcom_extended','olson','olson_extended','country','flux1x1']:
+        raise IOError,'Choice of averaging invalid'
+    analysisdir = dacycle['dir.analysis']
+    yeardir = os.path.join(analysisdir , 'data_%s_yearly'%avg )
+    longtermdir = os.path.join(analysisdir,'data_%s_longterm'%avg)
+    if not os.path.exists(longtermdir):
+        print "Creating new output directory " + longtermdir
+        os.makedirs(longtermdir)
+    files  = os.listdir(yeardir)
+    files = [f for f in files if '-' in f and f.endswith('.nc')]
+    if not files:
+        print "No full year finished yet, skipping longterm average..."
+        return
+    dates = []
+    for filename in files:
+        date=datetime.datetime.strptime(filename.split('.')[-2],'%Y')
+        dates.append( date )
+    avg_files = [os.path.join(yeardir,k) for k in files]
+    if len(avg_files) > 0 : 
+        command = ['ncra','-O']+ avg_files + [os.path.join(longtermdir,'%s_fluxes.%04d-%04d.nc'%(avg,min(dates).year, max(dates).year))]
+        status = subprocess.check_call(command)
+if __name__ == "__main__":
+    from da.tools.initexit import CycleControl
+    sys.path.append('../../')
+    dacycle = CycleControl(args={'rc':'../../ctdas-ei-nobcb-zoom-ecoregions.rc'})
+    dacycle.setup()
+    dacycle.parse_times()
+    while dacycle['time.end'] < dacycle['time.finish']:
+        time_avg(dacycle,avg='flux1x1')
+        time_avg(dacycle,avg='transcom')
+        time_avg(dacycle,avg='transcom_extended')
+        time_avg(dacycle,avg='olson')
+        time_avg(dacycle,avg='olson_extended')
+        time_avg(dacycle,avg='country')
+        dacycle.advance_cycle_times()
--- a/da/analysis/tools_country.py
+++ b/da/analysis/tools_country.py
@@ -35,7 +35,7 @@ by oceans, sea, or open water. The aggregation will thus work best on arrays tha
 """
 import sys
-import cPickle
+import pickle
 import os
 sys.path.append('../../')
 rootdir = os.getcwd().split('da/')[0]
@@ -45,9 +45,9 @@ from numpy import sum, array
 try:
    from dbfpy import dbf
 except:
-    print "the python DBF lib might be needed, please install from:"
+    print("the python DBF lib might be needed, please install from:")
-    print "http://dbfpy.sourceforge.net/"
+    print("http://dbfpy.sourceforge.net/")
-    print " Trying to complete anyway..."
+    print(" Trying to complete anyway...")
 sys.path.append('../../')
 from da.analysis.tools_regions import globarea
@@ -154,7 +154,7 @@ def get_countrydict():
    file = os.path.join(analysisdir,'country_dictionary.dat')
    try:
-        countrydict = cPickle.load(open(file, 'rb'))
+        countrydict = pickle.load(open(file, 'rb'))
    except:     
        db = dbf.Dbf(os.path.join(analysisdir,'GRIDCTRY.DBF'))
@@ -194,7 +194,7 @@ def get_countrydict():
        db.close()
-        cPickle.dump(countrydict, open(file, 'wb'), -1)
+        pickle.dump(countrydict, open(file, 'wb'), -1)
    return countrydict
@@ -205,24 +205,24 @@ if __name__ == "__main__":
    area = globarea()
    areas = []
-    for k, v in countrydict.items():
+    for k, v in list(countrydict.items()):
        ar = v.agg_1x1(area) / 1.e6
        areas.append((ar, k))
    areas.sort()
    areas.reverse()
-    for a in areas: print a
+    for a in areas: print(a)
    v = countrydict['Ocean']
-    print v.agg_1x1(area)
+    print(v.agg_1x1(area))
    v = countrydict['Netherlands']
-    print v.agg_1x1(area)
+    print(v.agg_1x1(area))
    v = countrydict['Slovakia']
-    print v.agg_1x1(area)
+    print(v.agg_1x1(area))
    v = countrydict['Czech Republic']
-    print v.agg_1x1(area)
+    print(v.agg_1x1(area))
    v = countrydict['Czechoslovakia']
-    print v.agg_1x1(area)
+    print(v.agg_1x1(area))

--- a/da/analysis/tools_country.py.bak
+++ b/da/analysis/tools_country.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+# tools_country.py
+"""
+Author : peters 
+Revision History:
+File created on 25 Jul 2008.
+This module provides an interface to go from arrays of gridded data to aggregates for countries.
+It uses the country information from the UNEP database, and the database created by them for this purpose. See:
+http://na.unep.net/metadata/unep/GRID/GRIDCTRY.html
+The module sets up a class 'countryinfo' that holds a number of attributes. These attributes can be used to 
+extract gridded information about the country. A build-in method agg_1x1 is provided for convencience. 
+The routine get_countrydict() creates a dictionary with this information for a large number of countries. 
+CAUTION:
+The country data only covers the land areas of a nation and aggregation will exclude the fraction of a land covered
+by oceans, sea, or open water. The aggregation will thus work best on arrays that are *not* defined by unit area!
+"""
+import sys
+import cPickle
+import os
+sys.path.append('../../')
+rootdir = os.getcwd().split('da/')[0]
+analysisdir = os.path.join(rootdir, 'da/analysis')
+from numpy import sum, array
+try:
+    from dbfpy import dbf
+except:
+    print "the python DBF lib might be needed, please install from:"
+    print "http://dbfpy.sourceforge.net/"
+    print " Trying to complete anyway..."
+sys.path.append('../../')
+from da.analysis.tools_regions import globarea
+EU25 = ["Austria", "Belgium", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Slovakia", "Slovenia", "Spain", "Sweden", "United Kingdom"]
+EU27 = ["Austria", "Belgium", "Bulgaria", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden", "United Kingdom"]
+G8 = [ "France", "Germany", "Italy", "Japan", "United Kingdom", "United States"]
+annex1 = [ "Australia", "Austria", "Belarus", "Belgium", "Bulgaria", "Canada", "Croatia", "Czech Republic", "Denmark", "European Union", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Japan", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Monaco", "Netherlands", "New Zealand", "Norway", "Poland", "Portugal", "Romania", "Russia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "Ukraine", "United Kingdom", "United States"]
+annex2 = [ "Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland", "France", "Germany", "Greece", "Iceland", "Ireland", "Italy", "Japan", "Luxembourg", "Netherlands", "New Zealand", "Norway", "Portugal", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States"]
+class countryinfo(object):
+    """ Defines the information about 1x1 gridboxes covering each country """
+    def __init__(self, code):
+        self.country = code
+        self.ngrids = 0
+        self.gridij = []
+        self.gridnr = []
+        self.gridcoords = []
+        self.gridlandmask = []
+        self.gridsharedocean = []
+        self.gridsharedborder = []
+    def add_gridinfo(self, grid_i, grid_j, gridlandmask, shared_border, shared_water):
+        """ add information from one gridbox to the object """ 
+        self.gridij.append((grid_j, grid_i,))  # add tuple with j,i coordinates
+        lon = -180 + grid_i + 0.5
+        lat = -90 + grid_j + 0.5
+        self.gridcoords.append((lat, lon,))  # add tuple with lon,lat coordinates
+        self.gridnr.append(grid_j * 360 + grid_i)          # add grid number for take() function
+        self.gridlandmask.append(gridlandmask)  # this gives the total fraction of land
+        self.gridsharedocean.append(shared_water)
+        self.gridsharedborder.append(shared_border)
+        self.ngrids = self.ngrids + 1
+    def agg_1x1(self, field):
+        """ aggregate a 1x1 input field to country total """
+        #print field.take(self.gridnr)
+        #print self.gridlandmask
+        return (field.take(self.gridnr) * array(self.gridlandmask)).sum()
+    def __str__(self):
+        return '  Country name         : %s\n' % self.country + \
+               '  Number of gridpoints : %d ' % self.ngrids 
+        #'  Gridpoint indices : %s ' % self.gridnr
+def fix_eu(rec):
+    """ fix Czech Republic and Slovakia manually """
+    alternative_slov = {
+    '140202': (2.0, 28.1), \
+    '140201': (2.0, 34.0), \
+    '140200': (2.0, 44.0), \
+    '140199': (3.0, 25.5), \
+    '139203': (3.0, 18.9), \
+    '139202': (2.0, 57.5), \
+    '139201': (2.0, 59.7), \
+    '139200': (2.0, 87.2), \
+    '139199': (1.0, 100.0), \
+    '139198': (2.0, 72.8), \
+    '138198': (3.0, 7.7), \
+    '138199':(2.0, 10.0)  }
+    alternative_czech = {
+    '141193': (2.0, 23.0), \
+    '141194': (2.0, 62.1), \
+    '141195': (3.0, 89.5), \
+    '141196': (2.0, 79.4), \
+    '141197': (2.0, 42.3), \
+    '141198': (2.0, 24.5), \
+    '141199': (2.0, 0.1), \
+    '140193': (2.0, 20.6), \
+    '140194': (2.0, 88.9), \
+    '140195': (1.0, 100.0), \
+    '140196': (1.0, 100.0), \
+    '140197': (1.0, 100.0), \
+    '140198': (1.0, 100.0), \
+    '140199': (3.0, 50.0), \
+    '139195': (2.0, 70.6), \
+    '139196': (2.0, 12.4), \
+    '139197': (2.0, 30.9), \
+    '139198': (2.0, 25.0) }
+    id = str(int(rec['GRID']))
+    for dict in [alternative_slov, alternative_czech]:
+        if id in dict:
+            rec['COVER_ID'] = dict[id][0]
+            rec['RATE_IN_GR'] = dict[id][1]
+    return rec
+def get_countrydict():
+    """ Create a dictionary with grid-to-country information from a dbf file"""
+    countrydict = countryinfo('Test')
+    file = os.path.join(analysisdir,'country_dictionary.dat')
+    try:
+        countrydict = cPickle.load(open(file, 'rb'))
+    except:     
+        db = dbf.Dbf(os.path.join(analysisdir,'GRIDCTRY.DBF'))
+        countrydict = {}
+        for n, rec in enumerate(db):
+            code = rec['COUNTRY']
+            gridid = str(int(rec['GRID']))
+            if code in ['Czech Republic', 'Slovakia']:
+                rec = fix_eu(rec)
+            rate_in_gr = rec['RATE_IN_GR'] * 1.e-2
+            i = int(gridid[-3::])
+            j = int(gridid[0:-3])
+            lat = -91 + j + 0.5
+            lon = -181 + i + 0.5
+            if code in countrydict: 
+                a = countrydict[code]
+            else:
+                a = countryinfo(code)
+            shared_border = False
+            shared_water = False
+            if rec['COVER_ID'] == 0.0:
+                shared_border = False
+                shared_water = True
+            if rec['COVER_ID'] >= 2.0:
+                shared_border = True
+            if rec['COVER_ID'] >= 10.0:
+                shared_water = True
+            a.add_gridinfo(i - 1, j - 1, rate_in_gr, shared_border, shared_water)
+            countrydict[code] = a
+        db.close()
+        cPickle.dump(countrydict, open(file, 'wb'), -1)
+    return countrydict
+if __name__ == "__main__":
+    countrydict = get_countrydict()
+    area = globarea()
+    areas = []
+    for k, v in countrydict.items():
+        ar = v.agg_1x1(area) / 1.e6
+        areas.append((ar, k))
+    areas.sort()
+    areas.reverse()
+    for a in areas: print a
+    v = countrydict['Ocean']
+    print v.agg_1x1(area)
+    v = countrydict['Netherlands']
+    print v.agg_1x1(area)
+    v = countrydict['Slovakia']
+    print v.agg_1x1(area)
+    v = countrydict['Czech Republic']
+    print v.agg_1x1(area)
+    v = countrydict['Czechoslovakia']
+    print v.agg_1x1(area)
--- a/da/analysis/tools_regions.py
+++ b/da/analysis/tools_regions.py
@@ -13,7 +13,7 @@ program. If not, see <http://www.gnu.org/licenses/>."""
 #!/usr/bin/env python
 import numpy as np
-import cPickle
+import pickle
 from da.analysis.tools_transcom import *
 # Aggregated olson ecosystem regions for CT Europe
@@ -27,8 +27,8 @@ aggregates = {
            "IceWaterDeserts" : (12, 18, 19) \
 }
-ext_econams = [a for a, b in aggregates.iteritems()]
+ext_econams = [a for a, b in aggregates.items()]
-ext_ecocomps = [b for a, b in aggregates.iteritems()]
+ext_ecocomps = [b for a, b in aggregates.items()]
 eco19_to_ecosums = zeros((19, 6), float)
 for i, k in enumerate(ext_ecocomps):
@@ -48,7 +48,7 @@ def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None):
        if not mapname: 
            raise Exception 
-        regionselect = cPickle.load(open('%s_regiondict.pickle' % mapname, 'rb'))
+        regionselect = pickle.load(open('%s_regiondict.pickle' % mapname, 'rb'))
    except:
        # dictionary for region <-> map conversions
@@ -60,14 +60,14 @@ def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None):
        regionselect = regs
-        cPickle.dump(regionselect, open('%s_regiondict.pickle' % mapname, 'wb'), -1)
+        pickle.dump(regionselect, open('%s_regiondict.pickle' % mapname, 'wb'), -1)
-        print 'Pickling region map'
+        print('Pickling region map')
    if reverse:
        """ project 1x1 degree map onto ecoregions """
        result = np.zeros(nregions, float)
-        for k, v in regionselect.iteritems():
+        for k, v in regionselect.items():
            if avg: 
                result[k - 1] = values.ravel().take(v).mean()
            else : 
@@ -78,7 +78,7 @@ def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None):
        """ project ecoregion properties onto 1x1 degree map """
        result = np.zeros((180, 360,), float)
-        for k, v in regionselect.iteritems():
+        for k, v in regionselect.items():
            result.put(v, values[k - 1])
        return result
@@ -96,7 +96,7 @@ def globarea(im=360, jm=180, silent=True):
    dxy = dxx * (np.sin(lat + dyy) - np.sin(lat)) * radius ** 2
    area = np.resize(np.repeat(dxy, im, axis=0) , [jm, im])
    if not silent:
-        print 'total area of field = ', np.sum(area.flat)
+        print('total area of field = ', np.sum(area.flat))
-        print 'total earth area    = ', 4 * np.pi * radius ** 2
+        print('total earth area    = ', 4 * np.pi * radius ** 2)
    return area
--- a/da/analysis/tools_regions.py.bak
+++ b/da/analysis/tools_regions.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+import numpy as np
+import cPickle
+from da.analysis.tools_transcom import *
+# Aggregated olson ecosystem regions for CT Europe
+aggregates = {
+            "Forest" : (1, 2, 3, 5, 8, 10,) , \
+            "Grass" : (4, 6, 13) , \
+            "Crops": (14,) , \
+            "Tundra" : (7, 9, 16) , \
+            "Coastal" : (11, 15, 17) , \
+            "IceWaterDeserts" : (12, 18, 19) \
+}
+ext_econams = [a for a, b in aggregates.iteritems()]
+ext_ecocomps = [b for a, b in aggregates.iteritems()]
+eco19_to_ecosums = zeros((19, 6), float)
+for i, k in enumerate(ext_ecocomps):
+    indices = [x - 1 for x in k]
+    eco19_to_ecosums[:, i].put(indices, 1.0)
+##### END OF REGION DEFINITIONS
+def state_to_grid(values, regionmap, reverse=False, avg=False, mapname=None):
+    """ 
+    This method converts parameters from a CarbonTracker StateVector object to a gridded map of linear multiplication values. These
+    can subsequently be used in the transport model code to multiply/manipulate fluxes
+    """
+    nregions = regionmap.max()
+    try:
+        if not mapname: 
+            raise Exception 
+        regionselect = cPickle.load(open('%s_regiondict.pickle' % mapname, 'rb'))
+    except:
+        # dictionary for region <-> map conversions
+        regs = {}
+        for r in np.arange(1, nregions + 1):
+            sel = (regionmap.flat == r).nonzero()
+            if len(sel[0]) > 0: 
+                regs[r] = sel
+        regionselect = regs
+        cPickle.dump(regionselect, open('%s_regiondict.pickle' % mapname, 'wb'), -1)
+        print 'Pickling region map'
+    if reverse:
+        """ project 1x1 degree map onto ecoregions """
+        result = np.zeros(nregions, float)
+        for k, v in regionselect.iteritems():
+            if avg: 
+                result[k - 1] = values.ravel().take(v).mean()
+            else : 
+                result[k - 1] = values.ravel().take(v).sum()
+        return result
+    else:
+        """ project ecoregion properties onto 1x1 degree map """
+        result = np.zeros((180, 360,), float)
+        for k, v in regionselect.iteritems():
+            result.put(v, values[k - 1])
+        return result
+def globarea(im=360, jm=180, silent=True):
+    """ Function calculates the surface area according to TM5 definitions"""
+    radius = 6.371e6  # the earth radius in meters
+    deg2rad = np.pi / 180.
+    g = 9.80665 
+    dxx = 360.0 / im * deg2rad 
+    dyy = 180.0 / jm * deg2rad 
+    lat = np.arange(-90 * deg2rad, 90 * deg2rad, dyy)
+    dxy = dxx * (np.sin(lat + dyy) - np.sin(lat)) * radius ** 2
+    area = np.resize(np.repeat(dxy, im, axis=0) , [jm, im])
+    if not silent:
+        print 'total area of field = ', np.sum(area.flat)
+        print 'total earth area    = ', 4 * np.pi * radius ** 2
+    return area
--- a/da/analysis/tools_time.py
+++ b/da/analysis/tools_time.py
@@ -104,7 +104,7 @@ def monthgen(sd, ed):
    """ Generate sequence of datetime objects spaced by one month"""
    from pylab import arange
    if ed < sd: 
-        raise ValueError, 'start date exceeds end date'
+        raise ValueError('start date exceeds end date')
        sys.exit(2)
    dates = []
    for year in arange(sd.year, ed.year + 2):
@@ -166,7 +166,7 @@ def yearly_avg(time, data, sdev=False):
                weights = weights
            if weights.shape[0] != data.shape[0]:
-                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+                raise ValueError('yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]))
            sel = (weights != 0.0).nonzero()[0]     
            #print sel,array(time).take(sel),dd,ddnext
@@ -180,7 +180,7 @@ def yearly_avg(time, data, sdev=False):
                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
            else:
-                raise ValueError, 'yearly_avg takes 1, 2, or 3d arrays only'
+                raise ValueError('yearly_avg takes 1, 2, or 3d arrays only')
        elif len(weights) == 1:    
            avg_data = data[0]
            std_data = 0.0
@@ -225,7 +225,7 @@ def monthly_avg(time, data, sdev=False):
                weights = weights
            if weights.shape[0] != data.shape[0]:
-                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+                raise ValueError('yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]))
            sel = (weights != 0.0).nonzero()[0]     
            #print sel,array(time).take(sel),dd,nextmonth(dd)
@@ -239,7 +239,7 @@ def monthly_avg(time, data, sdev=False):
                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
            else:
-                raise ValueError, 'monthly_avg takes 1, 2, or 3d arrays only'
+                raise ValueError('monthly_avg takes 1, 2, or 3d arrays only')
        elif len(weights) == 1:    
            avg_data = data[0]
            std_data = 0.0
@@ -287,7 +287,7 @@ def season_avg(time, data, sdev=False):
                weights = weights
            if weights.shape[0] != data.shape[0]:
-                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+                raise ValueError('yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0]))
            sel = (weights != 0.0).nonzero()[0]     
            #print sel,array(time).take(sel),dd,nextmonth(dd)
@@ -301,7 +301,7 @@ def season_avg(time, data, sdev=False):
                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
            else:
-                raise ValueError, 'season_avg takes 1, 2, or 3d arrays only'
+                raise ValueError('season_avg takes 1, 2, or 3d arrays only')
        elif len(weights) == 1:    
            avg_data = data[0]
            std_data = 0.0
@@ -339,7 +339,7 @@ def longterm_avg(time, data):
 if __name__ == '__main__':
    #print monthgen(datetime(2000,1,1),datetime(2006,5,1))
    dd = datetime(2002, 3, 1)
-    print nextmonth(dd), dd
+    print(nextmonth(dd), dd)
--- a/da/analysis/tools_time.py.bak
+++ b/da/analysis/tools_time.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#! /usr/bin/env python
+import sys
+import calendar
+import copy
+from datetime import datetime, timedelta
+from da.tools.general import date2num, num2date
+from numpy import array, zeros, newaxis, logical_and, arange
+def Fromdatetime(date):
+    dt = date.timetuple()
+    return datetime(*dt[0:6])
+def increase_time(dd, **kwargs):
+    """ Function increases the time by specified amount"""
+    return dd + timedelta(**kwargs)
+def chardate(dd, cut=8):
+    return dd.strftime('%Y%m%d%H%M')[0:cut]
+def timegen(sd, ed, dt):
+    dd = []
+    while sd <= ed:
+        dd.append(Fromdatetime(sd))
+        sd = sd + dt
+    return dd
+def itau2datetime(itau, iyear0):
+    """ Function returns a datetime object from TM5s itau times"""
+    date0 = datetime(iyear0, 1, 1, 0, 0, 0)
+    if len(itau) == 1:
+        itau = [itau] 
+    for time in itau:
+        sec = time % 60
+        min = (time / 60) % 60
+        hrs = (time / 3600) % 24
+        day = (time / 86400) 
+        dt = timedelta(days=day, hours=hrs, minutes=min, seconds=sec)
+        yield date0 + dt 
+def date2dec(date):
+    """ Function converts datetime object to a Decimal number time  (e.g., 1991.875 such as in IDL, CCG) """
+    if not isinstance(date, list): date = [date]
+    newdate = []
+    for dd in date:
+        Days0 = date2num(datetime(dd.year, 1, 1))
+        if calendar.isleap(dd.year):
+            DaysPerYear = 366.
+        else:
+            DaysPerYear = 365.
+        DayFrac = date2num(dd)
+        newdate.append(dd.year + (DayFrac - Days0) / DaysPerYear)
+    if len(newdate) == 1: return newdate[0] 
+    return newdate
+def dec2date(dectime):
+    """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python datetime object """
+    dt = num2date(dec2num(dectime)).timetuple()
+    return datetime(*dt[0:7])
+def dec2num(dectime):
+    """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python decimal numtime """
+    from pylab import floor, drange, num2date, date2num
+    if not isinstance(dectime, list): dectime = [dectime]
+    newdectime = []
+    for dd in dectime:
+        yr = floor(dd)
+        Days0 = date2num(datetime(int(yr), 1, 1))
+        if calendar.isleap(yr):
+            DaysPerYear = 366.
+        else:
+            DaysPerYear = 365.
+        DayFrac = (dd - yr) * DaysPerYear
+        newdectime.append(Days0 + DayFrac)
+    if len(newdectime) == 1: return newdectime[0] 
+    return newdectime
+def num2dec(numtime):
+    """ Function converts python decimal numtime to an IDL decimal time """
+    from pylab import floor, drange, num2date, date2num
+    res = date2dec(num2mydate(numtime))
+    return res
+def num2mydate(num):
+    """ Function converts decimal time from year fraction (e.g., 1991.875 such as in IDL, CCG) to a python datetime object """
+    dt = num2date(num).timetuple()
+    return datetime(*dt[0:7])
+def monthgen(sd, ed):
+    """ Generate sequence of datetime objects spaced by one month"""
+    from pylab import arange
+    if ed < sd: 
+        raise ValueError, 'start date exceeds end date'
+        sys.exit(2)
+    dates = []
+    for year in arange(sd.year, ed.year + 2):
+        for month in arange(1, 13):
+            date = datetime(year, month, 1)
+            if date > ed: return dates
+            else: dates.append(date)
+def nextmonth(dd):
+    """ Find next 1st of the month following the date dd"""
+    if dd.month == 12: 
+        cc = dd.replace(year=dd.year + 1)
+        ee = cc.replace(month=1)
+    else:
+        ee = dd.replace(month=dd.month + 1)
+    ff = ee.replace(day=1)
+    return ff
+def in_interval(start, stop, times_in):
+    """ returns a list of fractions in time interval """
+    times = copy.copy(times_in)
+    interval = times[1] - times[0]
+    times.append(times[-1] + interval)  # extend by one interval
+    times_filled = [times[0] + timedelta(days=d) for d in range((times[-1] - times[0]).days)]
+    b = []
+    in_int = 0.0
+    for t in times_filled:   # loop over days
+        if t in times[1:]:   # if new interval starts
+            b.append(in_int) # add previous aggregate to output
+            in_int = 0.0         # reset counter
+        in_int += int(logical_and(t >= start, t < stop))  # count if in interval [start,stop >
+    b.append(in_int)
+    if len(b) != len(times_in) : raise ValueError
+    return b
+def yearly_avg(time, data, sdev=False):
+    """ make monthly average from array using rundat and data"""
+    years = array([d.year for d in time])
+    aa = []
+    ss = []
+    tt = []
+    dd = time[0]
+    ed = time[-1]
+    while dd <= ed:
+        ddnext = datetime(dd.year + 1, 1, 1)
+        weights = in_interval(dd, ddnext, time)
+        if len(weights) > 1:
+            weights = array(weights)
+            if weights.sum() > 0.0:
+                weights = weights / weights.sum()
+            else:
+                weights = weights
+            if weights.shape[0] != data.shape[0]:
+                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+            sel = (weights != 0.0).nonzero()[0]     
+            #print sel,array(time).take(sel),dd,ddnext
+            if data.ndim == 1:
+                avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0)
+                std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0)
+            elif data.ndim == 2:
+                avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            elif data.ndim == 3:
+                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            else:
+                raise ValueError, 'yearly_avg takes 1, 2, or 3d arrays only'
+        elif len(weights) == 1:    
+            avg_data = data[0]
+            std_data = 0.0
+        else:
+            continue # next year
+        aa.append(avg_data)
+        ss.append(std_data)
+        tt.append(datetime(dd.year, 6, 15))
+        dd = ddnext
+    aa = array(aa).squeeze()
+    ss = array(ss).squeeze()
+    time = tt
+    if len(tt) == 1: 
+        aa = aa.reshape(1, *aa.shape)
+        ss = ss.reshape(1, *ss.shape)
+    if sdev: return time, aa, ss
+    else : return time, aa
+def monthly_avg(time, data, sdev=False):
+    """ make monthly average from array using rundat and data"""
+    years = array([d.year for d in time])
+    months = array([d.month for d in time])
+    mm = []
+    ss = []
+    tt = []
+    dd = time[0]
+    ed = time[-1]
+    while dd <= ed:
+        ddnext = nextmonth(dd)
+        weights = in_interval(dd, ddnext, time)
+        if len(weights) > 1:
+            weights = array(weights)
+            if weights.sum() > 0.0:
+                weights = weights / weights.sum()
+            else:
+                weights = weights
+            if weights.shape[0] != data.shape[0]:
+                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+            sel = (weights != 0.0).nonzero()[0]     
+            #print sel,array(time).take(sel),dd,nextmonth(dd)
+            if data.ndim == 1:
+                avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0)
+                std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0)
+            elif data.ndim == 2:
+                avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            elif data.ndim == 3:
+                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            else:
+                raise ValueError, 'monthly_avg takes 1, 2, or 3d arrays only'
+        elif len(weights) == 1:    
+            avg_data = data[0]
+            std_data = 0.0
+        else:
+            continue # next month
+        mm.append(avg_data)
+        ss.append(std_data)
+        tt.append(datetime(dd.year, dd.month, 15))
+        dd = ddnext
+    mm = array(mm).squeeze()
+    ss = array(ss).squeeze()
+    time = tt
+    if len(tt) == 1: 
+        mm = mm.reshape(-1, *mm.shape)
+        ss = ss.reshape(-1, *ss.shape)
+    if sdev: return time, mm, ss
+    else : return time, mm
+def season_avg(time, data, sdev=False):
+    """ make season average from array using rundat and data"""
+    seas = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+    mm = []
+    ss = []
+    tt = []
+    dd = time[0]
+    ed = time[-1]
+    while dd <= ed:
+        ddmid = nextmonth(dd)
+        ddnext = nextmonth(nextmonth(nextmonth(dd)))
+        weights = in_interval(dd, ddnext, time)
+        if len(weights) > 1:
+            weights = array(weights)
+            if weights.sum() > 0.0:
+                weights = weights / weights.sum()
+            else:
+                weights = weights
+            if weights.shape[0] != data.shape[0]:
+                raise ValueError, 'yearly_avg has wrongly shaped weights (%d) for data of (%d)' % (weights.shape[0], data.shape[0])
+            sel = (weights != 0.0).nonzero()[0]     
+            #print sel,array(time).take(sel),dd,nextmonth(dd)
+            if data.ndim == 1:
+                avg_data = (weights.take(sel) * data.take(sel, axis=0)).sum(axis=0)
+                std_data = (weights.take(sel) * data.take(sel, axis=0)).std(axis=0)
+            elif data.ndim == 2:
+                avg_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            elif data.ndim == 3:
+                avg_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).sum(axis=0).squeeze()
+                std_data = (weights.take(sel)[:, newaxis, newaxis] * data.take(sel, axis=0)).std(axis=0).squeeze()
+            else:
+                raise ValueError, 'season_avg takes 1, 2, or 3d arrays only'
+        elif len(weights) == 1:    
+            avg_data = data[0]
+            std_data = 0.0
+        else:
+            continue # next month
+        mm.append(avg_data)
+        ss.append(std_data)
+        tt.append(datetime(ddmid.year, ddmid.month, 15))
+        dd = ddnext
+    mm = array(mm).squeeze()
+    ss = array(ss).squeeze()
+    time = tt
+    if len(tt) == 1: 
+        mm = mm.reshape(-1, *mm.shape)
+        ss = ss.reshape(-1, *ss.shape)
+    if sdev: return time, mm, ss
+    else : return time, mm
+def longterm_avg(time, data):
+    """ Create long term mean """
+    time_avg = num2date(date2num(time).mean())
+    data_avg = data.mean(axis=0)
+    return time_avg, data_avg
+if __name__ == '__main__':
+    #print monthgen(datetime(2000,1,1),datetime(2006,5,1))
+    dd = datetime(2002, 3, 1)
+    print nextmonth(dd), dd
--- a/da/analysis/tools_transcom.py
+++ b/da/analysis/tools_transcom.py
@@ -98,7 +98,7 @@ for k in keys:
    if 'shortname' in k:
        ext_transshort.append(getattr(cdf_temp, k))
    if 'component' in k:
-        ext_transcomps.append(map(int, getattr(cdf_temp, k).split(',')))
+        ext_transcomps.append(list(map(int, getattr(cdf_temp, k).split(','))))
 cdf_temp.close()
@@ -291,18 +291,18 @@ def cov2corr(A):
    """ function projects 1x1 degree map onto TransCom regions by adding gridboxes over larger areas """
    from hdf2field import Sds2field 
-    import cPickle
+    import pickle
    import os
    from plottools import rebin
    transcommapfile = 'tc_land11_oif30.hdf'
    transcomconversionfile = 'map_to_tc.pickle'
    try:
-        regionselect = cPickle.load(open(transcomconversionfile, 'rb'))
+        regionselect = pickle.load(open(transcomconversionfile, 'rb'))
    except:
        # read map from NetCDF
-        print '[in map_to_tc() in tctools.py:] ' + \
+        print('[in map_to_tc() in tctools.py:] ' + \
-              'Creating conversion map and pickle file for future quick use, patience please...'
+              'Creating conversion map and pickle file for future quick use, patience please...')
        map = Sds2field(transcommapfile, 'tc_region')
        # create dictionary for region <-> map conversions based on 1x1 map
@@ -314,10 +314,10 @@ def cov2corr(A):
            if len(sel[0]) > 0:
                regs[r] = sel
        regionselect = regs
-        dummy = cPickle.dump(regionselect, open(transcomconversionfile, 'wb'), -1)
+        dummy = pickle.dump(regionselect, open(transcomconversionfile, 'wb'), -1)
-    result = zeros(len(regionselect.keys()), float)
+    result = zeros(len(list(regionselect.keys())), float)
-    for k, v in regionselect.iteritems():
+    for k, v in regionselect.items():
        result[k - 1] = data.ravel().take(v).sum()
    return result
@@ -332,7 +332,7 @@ def cov2corr(A):
        return (transnams[reg - 1],)
    elif eco:
        if reg > rundat.npparameters:
-            raise IOError, 'Region number exceeds definitions'
+            raise IOError('Region number exceeds definitions')
        elif reg > rundat.n_land and reg != rundat.nparameters:
            ret = ('Ocean', oifnams[reg - rundat.n_land - 1])
        elif reg > 209 and reg <= rundat.n_land:
@@ -346,12 +346,12 @@ def cov2corr(A):
        return (econames[(reg - 1) % 19],)
 if __name__ == '__main__':
-    print transnams
+    print(transnams)
-    print transshort
+    print(transshort)
-    print ext_transnams
+    print(ext_transnams)
-    print ext_transshort
+    print(ext_transshort)
-    print olsonnams
+    print(olsonnams)
-    print olsonshort
+    print(olsonshort)
-    print ext_transcomps
+    print(ext_transcomps)
-    print olsonextnams    
+    print(olsonextnams)    
--- a/da/analysis/tools_transcom.py.bak
+++ b/da/analysis/tools_transcom.py.bak
+"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
+Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
+updates of the code. See also: http://www.carbontracker.eu. 
+This program is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software Foundation, 
+version 3. This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 
+You should have received a copy of the GNU General Public License along with this 
+program. If not, see <http://www.gnu.org/licenses/>."""
+#!/usr/bin/env python
+import os
+import sys
+sys.path.append('../../')
+rootdir = os.getcwd().split('da/')[0]
+analysisdir = os.path.join(rootdir, 'da/analysis')
+from string import join, split
+from numpy import array, identity, zeros, arange, dot
+import da.tools.io4 as io
+# Get masks of different region definitions
+matrix_file = os.path.join(analysisdir, 'copied_regions.nc')
+cdf_temp = io.CT_CDF(matrix_file, 'read')
+transcommask = cdf_temp.get_variable('transcom_regions')
+if transcommask.max() < 23:
+    if 'transcom_regions_original' in cdf_temp.variables:
+        transcommask = cdf_temp.get_variable('transcom_regions_original')
+olson240mask = cdf_temp.get_variable('regions')
+olsonmask = cdf_temp.get_variable('land_ecosystems')
+oifmask = cdf_temp.get_variable('ocean_regions')
+dummy = cdf_temp.close()
+matrix_file = os.path.join(analysisdir, 'copied_regions_extended.nc')
+cdf_temp = io.CT_CDF(matrix_file, 'read')
+olson_ext_mask = cdf_temp.get_variable('regions')
+dummy = cdf_temp.close()
+# Names and short names of TransCom regions
+transshort = []
+transnams = []
+transland = []
+temp = open(os.path.join(analysisdir, 't3_region_names'), 'r').readlines()
+for line in temp:
+    items = line.split()
+    if items:
+        num, abbr, name = (items[0], items[1], join(items[2:]),)
+        transnams.append(name.strip('"'))
+        transshort.append(abbr)
+        if abbr.startswith('T'):
+            transland.append(name.strip('"'))
+transnams.append("Non-optimized")
+transshort.append("I-NNOP")
+# Names and short names of Olson regions
+olsonnams = []
+olsonshort = []
+temp = open(os.path.join(analysisdir, 'olson19_region_names'), 'r').readlines()
+for line in temp:
+    items = line.split()
+    if items:
+        num, abbr, name = (items[0], items[1], join(items[2:]),)
+        olsonnams.append(name.strip('"'))
+        olsonshort.append(abbr)
+olsonextnams = []
+matrix_file = os.path.join(analysisdir, 'copied_regions_extended.nc')
+cdf_temp = io.CT_CDF(matrix_file, 'read')
+keys = cdf_temp.ncattrs()
+keys.sort()
+for k in keys:
+    if 'Region' in k:
+        olsonextnams.append(getattr(cdf_temp, k))
+cdf_temp.close()
+ext_transnams = []
+ext_transshort = []
+ext_transcomps = []
+# Get names of aggregated regions for post aggregation
+matrix_file = os.path.join(analysisdir, 'postagg_definitions.nc')
+cdf_temp = io.CT_CDF(matrix_file, 'read')
+xform = cdf_temp.get_variable('xform')
+keys = cdf_temp.ncattrs()
+keys.sort()
+for k in keys:
+    if 'longname' in k:
+        ext_transnams.append(getattr(cdf_temp, k))
+    if 'shortname' in k:
+        ext_transshort.append(getattr(cdf_temp, k))
+    if 'component' in k:
+        ext_transcomps.append(map(int, getattr(cdf_temp, k).split(',')))
+cdf_temp.close()
+# Names of the ocean inversion flux regions, to go along with oifmask
+oifnams = ['(1) NOCN Arctic Ocean', \
+         '(2) NAH  North Atlantic (49 - 76N)', \
+         '(3) NAM  North Atlantic (36 - 49N)', \
+         '(4) NAL  North Atlantic (18 - 36N)', \
+         '(5) NAT  North Atlantic ( 0 - 18N)', \
+         '(6) SAT  South Atlantic ( 0 - 18S)', \
+         '(7) SAL  South Atlantic (18 - 31S)', \
+         '(8) SAM  South Atlantic (31 - 44S)', \
+         '(9) SAH  South Atlantic (44 - 58S)', \
+         '(10) SOCN Southern Ocean (S of 58S)', \
+         '(11) NPHW North Pacific (N of 49N, W of 195E)', \
+         '(12) NPHE North Pacific (N of 36N, E of 195E)', \
+         '(13) NPK  North Pacific (Kuroshio Extension)', \
+         '(14) NPLW North Pacific (18N - K.Ext, W of 195E)', \
+         '(15) NPLE North Pacific (18 - 36N, E of 195E)', \
+         '(16) NPTW North Pacific ( 0 - 18N, W of 199E)', \
+         '(17) NPTE North Pacific ( 0 - 18N, E of 199E)', \
+         '(18) SPTW South Pacific ( 0 - 18S, W of 199E)', \
+         '(19) SPTE South Pacific ( 0 - 18S, E of 199E)', \
+         '(20) SPLW South Pacific (18 - 31S, W of 233E)', \
+         '(21) SPLE South Pacific (18 - 31S, E of 233E)', \
+         '(22) SPMW South Pacific (31 - 44S, W of 248E)', \
+         '(23) SPME South Pacific (31 - 44S, E of 248E, W of 278E)', \
+         '(24) SPMC South Pacific (31 - 44S, coastal E of 278E)', \
+         '(25) SPH  South Pacific (44 - 58S)              ', \
+         '(26) NI   North Indian', \
+         '(27) SIT  South Indian (0 - 18S)', \
+         '(28) SIL  South Indian (18 - 31S)', \
+         '(29) SIM  South Indian (31 - 44S)', \
+         '(30) SIH  South Indian (44 - 58S)']
+oiflocs = [ (200, 80,), \
+          (330, 55,), \
+          (330, 40,), \
+          (330, 22,), \
+          (330, 8,), \
+          (350, -12,), \
+          (350, -27,), \
+          (350, -40,), \
+          (350, -53,), \
+          (200, -70,), \
+          (178, 54,), \
+          (210, 40,), \
+          (165, 38,), \
+          (178, 25,), \
+          (215, 25,), \
+          (170, 8,), \
+          (230, 8,), \
+          (175, -10,), \
+          (240, -10,), \
+          (195, -27,), \
+          (265, -27,), \
+          (195, -40,), \
+          (262, -40,), \
+          (283, -40,), \
+          (220, -53,), \
+          (68, 8,), \
+          (75, -10,), \
+          (75, -27,), \
+          (75, -40,), \
+          (75, -53,)]
+translocs = [ (-177, 0), \
+            (-92, 53,), \
+            (-108, 34,), \
+            (-66, 4,), \
+            (-50, -17,), \
+            (15, 17,), \
+            (26, -12,), \
+            (84, 63,), \
+            (103, 30,), \
+            (115, 0,), \
+            (132, -25,), \
+            (9, 50,), \
+            (-174, 46,), \
+            (136, 6,), \
+            (-108, 6,), \
+            (-123, -15,), \
+            (-32, 58,), \
+            (-32, 38,), \
+            (-32, 0,), \
+            (-32, -38,), \
+            (-14, -65,), \
+            (68, 2,)]
+#olsonshort=[str(name.split()[1:2]).join('  ') for name in olsonnams]
+old_olsonshort = [join(split(name, ' ')[1:2], ' ') for name in olsonnams]
+olsonlabs = ['Conifer Forest', 'Broadleaf Forest', 'Mixed Forest', 'Grass/Shrub', 'Tropical Forest', 'Scrub/Woods', 'Semitundra', 'Fields/Woods/\nSavanna', \
+     'Northern Taiga', 'Forest/Field', 'Wetland', 'Deserts', 'Shrub/Tree/\nSuc ', 'Crops', 'Conifer\n Snowy/Coastal', \
+     'Wooded tundra', 'Mangrove', 'Ice and \nPolar desert', 'Water'] 
+ecmwfnams = [ ' 1 CRPSMF Crops, mixed farming', \
+            ' 2 SHGRSS Short Grass', \
+            ' 3 EVNDLF Evergreen Needleleaf', \
+            ' 4 DECNDLF Deciduous Needleleaf', \
+            ' 5 EVBRDLF Evergreen Broadleaf', \
+            ' 6 DECBRLF Deciduous Broadleaf', \
+            ' 7 TLGRSS Tall Grass', \
+            ' 8 DES Desert', \
+            ' 9 TDR Tundra', \
+            '10 IRRCR Irrigated Crops', \
+            '11 SMDES Semidesert', \
+            '12 ICE Ice Caps', \
+            '13 BGM Bogs and Marches', \
+            '14 INW Inland Water', \
+            '15 OCE Ocean', \
+            '16 EVSHRB Evergreen Shrubs', \
+            '17 DECSHR Deciduous shrubs', \
+            '18 MXFRST Mixed Forest', \
+            '19 INTFRST Interrupted Forest'] 
+ecmwfshort = [str(name.split()[1:2]).join('  ') for name in ecmwfnams]
+ecmwflabs = ['Crops, mixed farming', 'Short Grass', 'Evergreen Needleleaf', 'Deciduous Needleleaf', 'Evergreen Broadleaf', \
+      'Deciduous Broadleaf', 'Tall Grass', 'Desert', \
+     'Tundra', 'Irrigated Crops', 'Semidesert', 'Ice Caps', 'Bogs and Marches', 'Inland Water', 'Ocean', \
+     'Evergreen Shrubs', 'Deciduous shrubs', 'Mixed Forest', 'Interrupted Forest'] 
+a = array([\
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 1  , 1  , 1  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 0  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 0  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 1  , 1  , 1  , 1  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  1 , 1 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 1 , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 1  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 1  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 1  , 1  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 0  , 0  , 0  , 0  , 1 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 1  , 0  , 0  , 0 , \
+  0 , 0 , 0 , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 0  , 1  , 1  , 0])
+O30to11 = a.reshape(11, 30).transpose()
+O11to11 = identity(11)
+ntcland = 11 # TC standard
+ntcocean = 11 # TC standard
+Ols259_to_TC23 = zeros((259, 23), float)
+Ols240_to_TC23 = zeros((240, 23), float)
+Ols221_to_TC23 = zeros((221, 23), float)
+for i in arange(ntcland):
+    Ols259_to_TC23[i * 19:(i + 1) * 19, i] = 1.0
+Ols259_to_TC23[190:228, 10] = 1.0  # Europe
+Ols259_to_TC23[228:258, 11:22] = O30to11
+for i in arange(ntcland):
+    Ols240_to_TC23[i * 19:(i + 1) * 19, i] = 1.0
+Ols240_to_TC23[209:239, 11:22] = O30to11
+for i in arange(ntcland):
+    Ols221_to_TC23[i * 19:(i + 1) * 19, i] = 1.0
+Ols221_to_TC23[209:220:, 11:22] = O11to11
+Ols221_to_TC23[220, 22] = 1.0
+Ols240_to_TC23[239, 22] = 1.0
+Ols259_to_TC23[258, 22] = 1.0
+ntcland = 11 # TC standard
+ntcocean = 11 # TC standard
+ExtendedTCRegionsFile = 'postagg_definitions.nc'
+def ExtendedTCRegions(data, cov=False):
+    """ convert to extended transcom shaped regions"""
+    nparams = data.shape[-1]
+    if nparams != 23:
+        raise ValueError('Do not know how to convert %s regions to 37 extended transcom regions' % (nparams,))
+    M = xform
+    if not cov:
+        return dot(array(data).squeeze(), M)
+    else:
+        try:
+            return M.transpose().dot(data).dot(M)
+        except:
+            return dot(dot(M.transpose(), data), M) #Huygens fix
+def cov2corr(A):
+    b = 1. / sqrt(A.diagonal())
+    return A * dot(b[:, newaxis], b[newaxis, :])
+    """ function projects 1x1 degree map onto TransCom regions by adding gridboxes over larger areas """
+    from hdf2field import Sds2field 
+    import cPickle
+    import os
+    from plottools import rebin
+    transcommapfile = 'tc_land11_oif30.hdf'
+    transcomconversionfile = 'map_to_tc.pickle'
+    try:
+        regionselect = cPickle.load(open(transcomconversionfile, 'rb'))
+    except:
+        # read map from NetCDF
+        print '[in map_to_tc() in tctools.py:] ' + \
+              'Creating conversion map and pickle file for future quick use, patience please...'
+        map = Sds2field(transcommapfile, 'tc_region')
+        # create dictionary for region <-> map conversions based on 1x1 map
+        regs = {}
+        nregions = map.max()
+        for r in arange(1, nregions + 1):
+            sel = (map.flat == r).nonzero()
+            if len(sel[0]) > 0:
+                regs[r] = sel
+        regionselect = regs
+        dummy = cPickle.dump(regionselect, open(transcomconversionfile, 'wb'), -1)
+    result = zeros(len(regionselect.keys()), float)
+    for k, v in regionselect.iteritems():
+        result[k - 1] = data.ravel().take(v).sum()
+    return result
+    """ return name of region number reg """
+    if longnames:
+        econames = olsonnams
+    else :
+        econames = olsonshort
+    if tc:
+        return (transnams[reg - 1],)
+    elif eco:
+        if reg > rundat.npparameters:
+            raise IOError, 'Region number exceeds definitions'
+        elif reg > rundat.n_land and reg != rundat.nparameters:
+            ret = ('Ocean', oifnams[reg - rundat.n_land - 1])
+        elif reg > 209 and reg <= rundat.n_land:
+            ret = ('Europe', econames[(reg - 1) % 19] + "_East")
+        elif reg == rundat.nparameters:
+            ret = (transnams[-1])
+        else:
+            ret = (transnams[(reg - 1) / 19], econames[(reg - 1) % 19])
+        return ret
+    elif olson:
+        return (econames[(reg - 1) % 19],)
+if __name__ == '__main__':
+    print transnams
+    print transshort
+    print ext_transnams
+    print ext_transshort
+    print olsonnams
+    print olsonshort
+    print ext_transcomps
+    print olsonextnams    
--- a/da/baseclasses/__pycache__/__init__.cpython-35.pyc
+++ b/da/baseclasses/__pycache__/__init__.cpython-35.pyc
No results found