initialising commit

b4e4a62a · Auke van der Woude · fb8f88ad · b4e4a62a · b4e4a62a · b4e4a62a
Commit b4e4a62a authored 5 years ago by Auke van der Woude
--- a/da/ffdas/__init__.py
+++ b/da/ffdas/__init__.py
--- a/da/ffdas/dasystem.py
+++ b/da/ffdas/dasystem.py
+#!/usr/bin/env python
+# control.py
+
+"""
+Author : peters 
+
+Revision History:
+File created on 26 Aug 2010.
+Adapted by super004 on 26 Jan 2017.
+
+"""
+
+import logging
+
+################### Begin Class CO2DaSystem ###################
+
+from da.baseclasses.dasystem import DaSystem
+
+class CO2DaSystem(DaSystem):
+    """ Information on the data assimilation system used. This is normally an rc-file with settings.
+    """
+    def validate(self):
+        """ 
+        validate the contents of the rc-file given a dictionary of required keys
+        """
+
+        needed_rc_items = ['obs.input.id',
+                           'obs.input.nr',
+                           'obs.spec.nr',
+                           'obs.cat.nr',
+                           'nparameters',
+                           'random.seed',
+                           'emis.pparam',
+                           'ff.covariance',
+                           'obs.bgswitch',
+                           'obs.background',
+                           'emis.input.spatial',
+                           'emis.input.tempobs',
+                           'emis.input.tempprior',
+                           'emis.paramfile',
+                           'emis.paramfile2',
+                           'run.emisflag',
+                           'run.emisflagens',
+                           'run.obsflag']
+
+
+        for k, v in self.iteritems():
+            if v == 'True' : 
+                self[k] = True
+            if v == 'False': 
+                self[k] = False
+
+        for key in needed_rc_items:
+            if not self.has_key(key):
+                logging.warning('Missing a required value in rc-file : %s' % key)
+        logging.debug('DA System Info settings have been validated succesfully')
+
+################### End Class CO2DaSystem ###################
+
+
+if __name__ == "__main__":
+    pass
--- a/da/ffdas/emissionmodel.py
+++ b/da/ffdas/emissionmodel.py
+#!/usr/bin/env python
+# stilt_tools.py
+
+"""
+Author : I. Super
+
+Revision History:
+Newly developed code, September 2017
+
+This module holds an emission model that prepares emission files used by the observation operator and
+to create pseudo-data
+
+"""
+
+import shutil
+import os
+import logging
+import datetime as dtm
+import numpy as np
+from numpy import array, logical_and
+import da.tools.io4 as io
+import math
+
+import da.tools.rc as rc
+from da.tools.general import create_dirs, to_datetime
+
+identifier = 'EmissionModel ensemble '
+version = '1.0'
+
+################### Begin Class Emission model ###################
+
+class EmisModel(object):
+
+    def __init__(self, dacycle=None):
+        if dacycle != None:
+            self.dacycle = dacycle
+        else:
+            self.dacycle = {}
+
+    def setup(self, dacycle):
+        self.dacycle = dacycle
+        self.startdate = self.dacycle['time.fxstart']
+        self.enddate = self.dacycle['time.finish']
+        self.emisdir = dacycle.dasystem['datadir']
+        self.proxyfile = dacycle.dasystem['emis.input.spatial']
+        self.tempfileo = dacycle.dasystem['emis.input.tempobs']
+        self.tempfilep = dacycle.dasystem['emis.input.tempprior']
+        self.btime = int(dacycle.dasystem['run.backtime'])
+        self.obsfile = dacycle.dasystem['obs.input.id']
+        self.nrspc = int(dacycle.dasystem['obs.spec.nr'])
+        self.nrcat = int(dacycle.dasystem['obs.cat.nr'])
+        self.nparams = int(dacycle.dasystem['nparameters'])
+        self.nmembers = int(dacycle['da.optimizer.nmembers'])
+        self.pparam = dacycle.dasystem['emis.pparam']
+        self.paramfile = dacycle.dasystem['emis.paramfile']
+        #self.paramfile2 = dacycle.dasystem['emis.paramfile2']
+        
+    def get_emis(self, dacycle, psdo):
+        """set up emission information for pseudo-obs (psdo=1) and ensemble runs (psdo=0)"""
+        
+        if psdo==1:
+            priorparam=os.path.join(self.emisdir,self.pparam)
+            f = io.ct_read(priorparam, 'read')
+            self.prm = f.get_variable('true_values')[:self.nparams]
+            f.close()
+            self.get_spatial(dacycle, n=999, infile=os.path.join(self.emisdir, self.paramfile))
+            self.get_temporal(dacycle, n=999, psdo=1)
+        elif psdo==0:
+            self.timestartkey = self.dacycle['time.sample.start']
+            self.timefinishkey = self.dacycle['time.sample.end']
+            for j in range(self.nmembers):
+                #first remove old files, then create new emission files per ensemble member
+                if self.startdate == self.timestartkey:
+                    file = os.path.join(dacycle.dasystem['datadir'],'temporal_data_%03d.nc'%j)
+                    try:
+                        os.remove(file)
+                    except OSError:
+                        pass
+                prmfile=os.path.join(dacycle['dir.input'],'parameters.%03d.nc'%j)
+                f = io.ct_read(prmfile, 'read')
+                self.prm = f.get_variable('parametervalues')
+                f.close()
+                self.get_spatial(dacycle, n=j, infile=os.path.join(self.emisdir, self.paramfile))
+                self.get_temporal(dacycle, n=j, psdo=0)
+        
+    def get_totals(self, infile=None):
+        """gather all required data and calculate total emissions per sector in kg/yr"""
+        
+        yremis = np.array(self.nrcat*[self.nrspc*[0.]])
+        self.spatial_var = []
+        self.spatial_inc = []
+        self.temporal_var = []
+        self.temporal_inc = []
+        self.ops_sector = []
+        
+        ### Read in parameter values for the emission functions and ratios to calculate total emissions
+        f = open(infile, 'r')
+        lines = f.readlines()
+        f.close()
+        ct = 0
+        for line in lines:
+            dum=line.split(",")
+            if dum[0]=='#':
+                continue
+            else:
+                id = int(dum[0])
+                ### If id == 0 this (sub)sector is treated only by WRF-STILT; if id takes another value the local sources are treated by OPS
+                if id != 0:
+                    self.ops_sector.append(ct)
+                inc = int(dum[2])
+                ### If inc == 999 this parameter is not in the state vector and will not be optimized; otherwise inc refers to the
+                ### position of this parameter in the state vector
+                if inc!=999:
+                    EC = float(dum[1])*self.prm[inc]
+                else:
+                    EC = float(dum[1])
+                inc = int(dum[4])
+                if inc!=999:
+                    EF = float(dum[3])*self.prm[inc]
+                else:
+                    EF = float(dum[3])
+                inc = int(dum[6])
+                if inc!=999:
+                    AD = float(dum[5])*self.prm[inc]
+                else:
+                    AD = float(dum[5])
+                inc = int(dum[8])
+                if inc!=999:
+                    fr = float(dum[7])*self.prm[inc]
+                else:
+                    fr = float(dum[7])
+                ### emission = energy consumption per activity x emission factor x activity
+                ### fr can be used to divide sectoral emissions over several subsectors (e.g. to split road traffic into cars and HDV)
+                ems = EC*EF*AD*fr
+                ### Now we calculated emissions of CO2. To get emissions of other trace gases we multiply with an emission ratio
+                for s in range(self.nrspc):
+                    inc = int(dum[15+s*3])
+                    if inc!=999:
+                        rat = float(dum[13+s*3])*self.prm[inc]
+                    else:
+                        rat = float(dum[13+s*3])
+                    ### Some emission ratios have lognormal uncertainty distributions (label 'l')
+                    if dum[14+s*3]=='n':
+                        yremis[ct,s] = ems*rat
+                    elif dum[14+s*3]=='l':
+                        yremis[ct,s] = ems*np.exp(rat)
+                ct = ct + 1
+            ### Here we list the spatial and temporal variables that are part of the state vector for use in the get_spatial and get_temporal functions
+            self.spatial_var.append(dum[9])
+            self.spatial_inc.append(int(dum[10]))
+            self.temporal_var.append(dum[11])
+            self.temporal_inc.append(int(dum[12]))
+            
+        logging.debug("Successfully calculated total emissions")
+                        
+        return yremis
+        
+    def get_spatial(self, dacycle, n, infile=None):
+        """read in proxy data used for spatial distribution of the gridded emissions, disaggregate yearly totals for the area"""
+    
+        yremis=self.get_totals(infile)
+        
+        # read in species information
+        infile = os.path.join(self.emisdir, self.obsfile)
+        f = open(infile, 'r')
+        lines = f.readlines()
+        f.close()
+        M_mass = []
+        spname = []
+        for line in lines:
+            dum=line.split(",")
+            if dum[0]=='#':
+                continue
+            else:
+                M_mass.append(float(dum[6])*1e-9) #kg/micromole
+                spname.append(dum[5]) #name of the species
+        sec_year = 8760.*3600. #seconds in a year
+        arcor = 1e6 #km2 -> m2
+        conv = np.array(M_mass)*sec_year*arcor # to convert emissions in kg/km2/yr to micromole/m2/s
+        
+        #read in proxy data for spatial disaggregation
+        infile = os.path.join(self.emisdir, self.proxyfile)
+        prx = io.ct_read(infile, method='read')
+        sp_distr = []
+        for c in range(self.nrcat):
+            sp_distr.append(prx.get_variable(self.spatial_var[c]))
+        sp_distr=np.array(sp_distr)
+        prx.close()
+        
+        ### create output file
+        prior_file = os.path.join(self.emisdir, 'prior_spatial_%03d.nc'%n)
+        f = io.CT_CDF(prior_file, method='create')
+        dimid = f.add_dim('ncat', self.nrcat)
+        dimid2 = f.add_dim('ops', 3)
+        dimlon = f.add_dim('lon', sp_distr.shape[1])
+        dimlat = f.add_dim('lat', sp_distr.shape[2])
+        
+        #loop over all tracers
+        for s in range(self.nrspc):
+            # determine which fraction of the emissions are local and are treated by OPS 
+            datalistOPS = []
+            opsfr = [0.317, 0.317, 0.267]
+            
+            for j in range(len(self.ops_sector)):
+                OPSemis = yremis[self.ops_sector[j],s] * opsfr[j] * 1E3 / sec_year
+                datalistOPS.append(OPSemis)
+            
+            savedict = io.std_savedict.copy() 
+            savedict['name'] = "OPStotals_%s"%spname[s]
+            savedict['long_name'] = "total OPS emissions"
+            savedict['units'] = "g/s"
+            savedict['dims'] = dimid2
+            savedict['values'] = datalistOPS
+            f.add_data(savedict)
+            
+            datalist = []
+            
+            ct = 0
+            for c in range(self.nrcat):
+                inc = self.spatial_inc[c]
+                if inc!=999:
+                    ### note that the spatial distribution has to gridded state vector, such that a scaling factor would affect the total
+                    ### emissions in this area
+                    distr = sp_distr[c]*self.prm[inc]
+                else:
+                    distr = sp_distr[c]
+                    
+                if c in self.ops_sector:
+                    emis_spatial = (yremis[c,s]*(1-opsfr[ct])) / conv[s] * distr
+                    ct = ct + 1
+                else:
+                    emis_spatial = yremis[c,s] / conv[s] * distr
+                datalist.append(emis_spatial)
+            
+            savedict = io.std_savedict.copy() 
+            savedict['name'] = spname[s]
+            savedict['long_name'] = "Spatially distributed emissions"
+            savedict['units'] = "micromole/m2/s"
+            savedict['dims'] = dimid + dimlon + dimlat
+            savedict['values'] = datalist
+            f.add_data(savedict)
+        
+        f.close()
+        
+        logging.debug("Successfully wrote data to prior spatial distribution file (%s)" % prior_file)
+        
+    def get_temporal(self, dacycle, n, psdo):
+        """read in time profiles used for temporal distribution of the emissions"""
+        
+        ### For pseudo-observation (psdo==1) or when no time profiles need to be optimized the profiles are simply read from the
+        ### input file and copied to another file.
+        ### Otherwise create a new file per ensemble member at t=0 and update the profiles for each time step
+        if psdo==0 and min(self.temporal_inc)<999:
+            ensfile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%n)
+            if os.path.exists(ensfile) == False:
+                dumfile = os.path.join(self.emisdir, self.tempfilep)
+                shutil.copy2(dumfile,ensfile)
+            tpr = io.ct_read(ensfile, method='read')
+            itimes = tpr.get_variable('Times')
+            times = array([dtm.datetime(int(''.join(d[:4])),int(''.join(d[5:7])),int(''.join(d[8:10])),int(''.join(d[11:13])),int(''.join(d[14:16]))) for d in itimes])
+            subselect = logical_and(times >= self.timestartkey , times <= self.timefinishkey).nonzero()[0]
+            datlist = times.take(subselect, axis=0)
+            
+            ### The time profiles should always cover at least one full year
+            start_date = dtm.datetime(self.timestartkey.year,1,1,0,0) #first time included
+            end_date = dtm.datetime(self.timestartkey.year,12,31,23,0) #last time included
+            dt = dtm.timedelta(0,3600)
+            dum = start_date
+            times=[]
+            while dum<=end_date:
+                times.append(dum)
+                dum=dum+dt
+            times=np.array(times)
+            stidx = np.where(times==self.timestartkey)[0][0]
+            stidx2 = np.where(times==self.startdate)[0][0]
+            edidx = np.where(times==self.timefinishkey)[0][0]
+            
+            dumsel = np.where((times<self.startdate)|(times>self.timefinishkey))[0]
+            
+            """ Time profiles should, for a full year, always have an average value of 1.0. Therefore, a new method has been developed
+            to optimize time profiles such that we comply with this and the time profiles do not affect the total emissions.
+            For this purpose we apply the scaling factor (statevector) to the period covered in this cycle. The time profile for all dates 
+            outside this period are scaled equally such that the time profile remains its average value of 1.0. Except previously updated
+            dates (from previous cycles) are maintained (they are already optimized!)."""
+            
+            profiles = []
+            for c in range(self.nrcat):
+                if self.temporal_inc[c]!=999:
+                    f_orig = tpr.get_variable(self.temporal_var[c])
+                    f_sel = tpr.get_variable(self.temporal_var[c]).take(subselect, axis=0)
+                    f_new = f_sel*self.prm[self.temporal_inc[c]]
+                    dumsum = np.array(f_orig[dumsel]).sum()
+                    for i in range(len(f_new)):
+                        f_orig[:stidx2]=f_orig[:stidx2]-(f_orig[:stidx2]/dumsum)*(f_new.sum()-f_sel.sum())
+                        f_orig[edidx+1:]=f_orig[edidx+1:]-(f_orig[edidx+1:]/dumsum)*(f_new.sum()-f_sel.sum())
+                        f_orig[stidx:edidx+1]=f_new
+                    profiles.append(f_orig)
+            tpr.close()
+            
+            f = io.CT_CDF(ensfile, method='write')
+            ct=0
+            for c in range(self.nrcat):
+                if self.temporal_inc[c]!=999:
+                    f.variables[self.temporal_var[c]][:] = profiles[ct]
+                    ct=ct+1
+            f.close()
+            
+        ### Now read in the correct profiles, select the correct time period and write the profiles into one file per ensemble member
+        if psdo==1:
+            infile = os.path.join(self.emisdir, self.tempfileo)
+        elif psdo==0 and min(self.temporal_inc)==999:
+            infile = os.path.join(self.emisdir, self.tempfilep)
+        elif psdo==0 and min(self.temporal_inc)<999:
+            infile = os.path.join(self.emisdir, 'temporal_data_%03d.nc'%n)
+        tpr = io.ct_read(infile, method='read')
+        itimes = tpr.get_variable('Times')
+        times = array([dtm.datetime(int(''.join(d[:4])),int(''.join(d[5:7])),int(''.join(d[8:10])),int(''.join(d[11:13])),int(''.join(d[14:16]))) for d in itimes])
+        if psdo == 1:
+            startdum=dtm.datetime(self.startdate.year,self.startdate.month,self.startdate.day-1,1,0)
+            subselect = logical_and(times >= startdum, times <= self.enddate).nonzero()[0]
+        else:
+            dum = self.timestartkey - dtm.timedelta(0,self.btime*3600)
+            if dum.hour != 0:
+                startdum = dtm.datetime(dum.year,dum.month,dum.day,1,0)
+            else:
+                startdum = dtm.datetime(dum.year,dum.month,dum.day-1,1,0)
+            subselect = logical_and(times >= startdum , times <= self.timefinishkey).nonzero()[0]
+        datlist = times.take(subselect, axis=0)
+        profiles=[]
+        for c in range(self.nrcat):
+            f_orig = tpr.get_variable(self.temporal_var[c]).take(subselect, axis=0)
+            profiles.append(f_orig)
+        tpr.close()
+        profiles=np.array(profiles)
+
+        prior_file = os.path.join(self.emisdir, 'prior_temporal_%03d.nc'%n)
+        f = io.CT_CDF(prior_file, method='create')
+        dimtime = f.add_dim('Times', len(datlist))
+        
+        dum=[]
+        for c in range(self.nrcat):
+            if self.temporal_var[c] in dum:
+                continue
+            else:
+                savedict = io.std_savedict.copy() 
+                savedict['name'] = self.temporal_var[c]
+                savedict['long_name'] = "Temporal distribution"
+                savedict['units'] = ""
+                savedict['dims'] = dimtime
+                savedict['values'] = profiles[c,:]
+                f.add_data(savedict)      
+                dum.append(self.temporal_var[c])
+        f.close()
+            
+        logging.debug("Successfully wrote data to prior temporal distribution file (%s)" % prior_file)
+        
+################### End Class Emission model ###################
+
+
+if __name__ == "__main__":
+    pass
+        
--- a/da/ffdas/obs.py
+++ b/da/ffdas/obs.py
--- a/da/ffdas/observationoperator.py
+++ b/da/ffdas/observationoperator.py
--- a/da/ffdas/optimizer.py
+++ b/da/ffdas/optimizer.py
+#!/usr/bin/env python
+# optimizer.py
+
+"""
+.. module:: optimizer
+.. moduleauthor:: Wouter Peters 
+
+Revision History:
+File created on 28 Jul 2010.
+
+"""
+
+import os
+import logging
+import numpy as np
+import numpy.linalg as la
+import da.tools.io4 as io
+
+identifier = 'Optimizer CO2'
+version = '0.0'
+
+from da.baseclasses.optimizer import Optimizer
+
+################### Begin Class CO2Optimizer ###################
+
+class CO2Optimizer(Optimizer):
+    """
+        This creates an instance of an optimization object. It handles the minimum least squares optimization
+        of the state vector given a set of sample objects. Two routines will be implemented: one where the optimization
+        is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
+        and efficiency.
+    """
+    
+    def setup(self, dims):
+        self.nlag = dims[0]
+        self.nmembers = dims[1]
+        self.nparams = dims[2]
+        self.nobs = dims[3]
+        self.nrloc = dims[4]
+        self.nrspc = dims[5]
+        self.inputdir = dims[6]
+        #self.specfile = dims[7]
+        self.create_matrices()
+
+    def set_localization(self, loctype='None'):
+        """ determine which localization to use """
+
+        if loctype == 'CT2007':
+            self.localization = True
+            self.localizetype = 'CT2007'
+            #T-test values for two-tailed student's T-test using 95% confidence interval for some options of nmembers
+            if self.nmembers == 50:
+                self.tvalue = 2.0086
+            elif self.nmembers == 100:
+                self.tvalue = 1.9840
+            elif self.nmembers == 150:
+                self.tvalue = 1.97591
+            elif self.nmembers == 200:
+                self.tvalue = 1.9719    
+            else: self.tvalue = 0 
+        elif loctype == 'multitracer':
+            self.localization = True
+            self.localizetype = 'multitracer'
+        elif loctype == 'multitracer2':
+            self.localization = True
+            self.localizetype = 'multitracer2'
+        else:
+            self.localization = False
+            self.localizetype = 'None'
+    
+        logging.info("Current localization option is set to %s" % self.localizetype)
+
+    def localize(self, n):
+        """ localize the Kalman Gain matrix """
+        import numpy as np
+
+        if not self.localization: 
+            logging.debug('Not localized observation %i' % self.obs_ids[n])
+            return 
+        if self.localizetype == 'CT2007':
+            count_localized = 0
+            for r in range(self.nlag * self.nparams):
+                corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
+                prob = corr / np.sqrt((1.000000001 - corr ** 2) / (self.nmembers - 2))
+                if abs(prob) < self.tvalue:
+                    self.KG[r] = 0.0
+                    count_localized = count_localized + 1
+            logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
+        if self.localizetype == 'multitracer':
+            count_localized = 0
+            ###read emission ratios for source type related to each parameter
+            infile = os.path.join(self.inputdir, self.specfile)
+            f = open(infile, 'r')
+            lines = f.readlines()
+            f.close()
+            speclist = []
+            speclist.append('CO2')
+            emr = []
+            for line in lines:
+                dum=line.split(",")
+                if dum[0]=='#' or dum[0]=='CO2' or dum[0]=='SNAP':
+                    continue
+                else:
+                    sp = dum[0]
+                    emrs = []
+                    for k in range(self.nparams):
+                        emrs.append(float(dum[k+1]))
+                    speclist.append(sp)
+                    emr.append(emrs)
+            emr=np.array(emr)
+            
+            ###find obs and model value for this time step and species; calculate differences and ratios
+            lnd = self.nobs/(self.nrspc*self.nrloc)
+            for i in range(self.nrspc):
+                if self.species[n] == speclist[i]:
+                    idx = [0-i,1-i,2-i,3-i]
+                    
+            Xobs = []
+            Xmod = []
+            Robs = []
+            Rmod = []
+            for i in range(self.nrspc):
+                Xobs.append(self.obs[n+lnd*idx[i]])
+                Xmod.append(self.Hx[n+lnd*idx[i]])
+                if i>0:
+                    Robs.append(Xobs[i]/Xobs[0])
+                    Rmod.append(Xmod[i]/Xmod[0])
+            Xobs = np.array(Xobs)
+            Xmod = np.array(Xmod)
+            Robs = np.array(Robs)
+            Rmod = np.array(Rmod)
+            dX = Xmod - Xobs
+            dR = abs(Rmod - Robs)
+            flg = [] 
+            if Xobs[0]>1.: #and self.species[n] == 'CO2'
+                flg=0
+                for i in range(self.nrspc):
+                    if Xobs[i] == 0 or Xmod[i] == 0.:
+                        flg=1
+            else:
+                flg=1
+            
+            ###old version
+            # for i in range(self.nrspc):
+                # if dX[i]>0:
+                    # flg.append(1)
+                # elif dX[i]<0:
+                    # flg.append(0)
+                # else:
+                    # flg.append(np.nan)
+            
+            ### This routine determines which source types are likely to cause the model-data mismatch, according to the following principle:
+            ### if the modeled CO:CO2 ratio is higher than the observed ratio this means we either overestimate emissions with a high CO:CO2 ratio
+            ### or we underestimate emissions with a low CO:CO2 ratio; if the CO concentration is overestimated, it is likely to be the first option
+            ### (i.e. too much emissions). We only include information from species if the model-data mismatch in the ratio is >5% of the observed ratio
+            dums = []
+            dum = []
+            tst1 = []
+            if flg == 0:
+                for i in range(self.nrspc-1):
+                    if dX[0]>0:
+                        if dX[i+1]>0:
+                            if Rmod[i]>Robs[i]:
+                                tst1.append(1)
+                            elif Rmod[i]<Robs[i]:
+                                tst1.append(-1)
+                        elif dX[i+1]<0:
+                            if Rmod[i]>Robs[i]:
+                                tst1.append(0)
+                            elif Rmod[i]<Robs[i]:
+                                tst1.append(2)
+                    elif dX[0]<0:
+                        if dX[i+1]>0:
+                            if Rmod[i]>Robs[i]:
+                                tst1.append(2)
+                            elif Rmod[i]<Robs[i]:
+                                tst1.append(0)
+                        elif dX[i+1]<0:
+                            if Rmod[i]>Robs[i]:
+                                tst1.append(-1)
+                            elif Rmod[i]<Robs[i]:
+                                tst1.append(1)         
+                if 2 in tst1:
+                    dums1=[]
+                    dums2=[]
+                    for i in range(self.nrspc-1):
+                        for k in range(len(emr[i])):
+                            if emr[i,k]<Robs[i]:
+                                dums1.append(k)
+                            if emr[i,k]>Robs[i]:
+                                dums2.append(k)
+                    for j in range(self.nparams):
+                        ct = dums1.count(j)
+                        ctc = dums2.count(j)
+                        if ct == (self.nrspc - 1) or ctc == (self.nrspc - 1):
+                            ### all requirements are met, so do not localize
+                            dum.append(j)
+                else:
+                    for i in range(self.nrspc-1):
+                        if tst1[i] == 1:
+                            for k in range(len(emr[i])):
+                                if emr[i,k]>Robs[i]:
+                                    dums.append(k)                    
+                        elif tst1[i] == -1:
+                            for k in range(len(emr[i])):
+                                if emr[i,k]<Robs[i]:
+                                    dums.append(k)  
+                    for j in range(self.nparams):
+                        ct = dums.count(j)
+                        if ct == (self.nrspc - 1):
+                            ### all requirements are met, so do not localize
+                            dum.append(j)
+            
+            ###old version
+            # if sum(flg) == 0 or sum(flg) == 4:
+            ### if this is not the case, it is likely a mixture of over- and underestimation, which we can't specify; so no localization applied
+                # for i in range(self.nrspc-1):
+                    # if dR[i]>0.05*Robs[i]:
+                        # if (Rmod[i]>Robs[i] and dX[i+1]>0) or (Rmod[i]<Robs[i] and dX[i+1]<0):
+                            # tst1.append(1.)
+                        # elif (Rmod[i]<Robs[i] and dX[i+1]>0) or (Rmod[i]>Robs[i] and dX[i+1]<0):
+                            # tst1.append(-1.)
+                    # else:
+                        # tst1.append(0)
+                # for i in range(self.nrspc-1):
+                    # if tst1[i] == 1:
+                        # for k in range(len(emr[i])):
+                            # if emr[i,k]>Robs[i]:
+                                # dums.append(k)                             
+                    # elif tst1[i] == -1:
+                        # for k in range(len(emr[i])):
+                            # if emr[i,k]<Robs[i]:
+                                # dums.append(k)
+                # for j in range(self.nparams):
+                    # ct = dums.count(j)
+                    # if ct == (self.nrspc - 1):
+                        ### all requirements are met, so do not localize
+                        # dum.append(j)
+                                
+            if len(dum) > 0:
+            ### what to do when we can't attribute model-data mismatch? update all parameters or set them all to zero?? (adapt dum)
+                for r in range(self.nlag * self.nparams):
+                    if r in dum:
+                        continue
+                    else:
+                        self.KG[r] = 0.0
+                        self.test_localize[r] = self.test_localize[r] + 1
+                        count_localized = count_localized + 1
+                logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
+        if self.localizetype == 'multitracer2':
+        ### This routine used more strict rules for source attribution by comparing the observed concentration ratios to emission ratios
+        ### per source type
+        
+            count_localized = 0
+            
+            ###find obs and model value for this time step and species; calculate differences and ratios
+            lnd = self.nobs/(self.nrspc*self.nrloc)
+            for i in range(self.nrspc):
+                if self.species[n] == speclist[i]:
+                    idx = [0-i,1-i,2-i,3-i]
+                    
+            Xobs = []
+            Robs = []
+            for i in range(self.nrspc):
+                Xobs.append(self.obs[n+lnd*idx[i]])
+                if i>0:
+                    Robs.append(Xobs[i]/Xobs[0])
+            
+            dum=[]
+            if Robs[2]>0.1:
+                if Robs[1]<1. and Robs[0]<1.:
+                    dum.append(4)
+                elif Robs[2]>2.5 and Robs[1]<1.>8. and Robs[0]>3.:
+                    dum.append(8)
+            elif Robs[0]>1. and Robs[2]<0.1:
+                if Robs[0]<4. and Robs[1]<1.<1.:
+                    dum.append(2)
+                    dum.append(3)
+                elif Robs[0]>7. and Robs[1]<1.>1.5:
+                    dum.append(5)
+                    dum.append(6)
+                    dum.append(7)
+                elif Robs[0]>3.5 and Robs[1]<1.<2.5:
+                    dum.append(2)
+                    dum.append(3)
+                    dum.append(5)
+                    dum.append(6)
+                    dum.append(7)
+            elif Robs[0]<0.6 and Robs[1]<1.<0.6:
+                dum.append(0)
+                dum.append(1)
+            
+            if len(dum) > 0 and len(dum) < self.nparams:
+                for r in range(self.nlag * self.nparams):
+                    if r in dum:
+                        continue
+                    else:
+                        self.KG[r] = 0.0
+                        count_localized = count_localized + 1
+                logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
+
+
+################### End Class OPSOptimizer ###################
+
+
+
+if __name__ == "__main__":
+    pass
--- a/da/ffdas/pipeline.py
+++ b/da/ffdas/pipeline.py
--- a/da/ffdas/statevector.py
+++ b/da/ffdas/statevector.py
+#!/usr/bin/env python
+# ct_statevector_tools.py
+
+"""
+.. module:: statevector
+.. moduleauthor:: Wouter Peters 
+
+Revision History:
+File created on 28 Jul 2010.
+Adapted by super004 on 26 Jan 2017.
+
+The module statevector implements the data structure and methods needed to work with state vectors (a set of unknown parameters to be optimized by a DA system) of different lengths, types, and configurations. Two baseclasses together form a generic framework:
+    * :class:`~da.baseclasses.statevector.StateVector`
+    * :class:`~da.baseclasses.statevector.EnsembleMember`
+
+As usual, specific implementations of StateVector objects are done through inheritance form these baseclasses. An example of designing 
+your own baseclass StateVector we refer to :ref:`tut_chapter5`.
+
+.. autoclass:: da.baseclasses.statevector.StateVector 
+
+.. autoclass:: da.baseclasses.statevector.EnsembleMember 
+
+"""
+
+import os
+import sys
+sys.path.append(os.getcwd())
+
+import logging
+import numpy as np
+from da.baseclasses.statevector import StateVector, EnsembleMember
+from da.tools.general import create_dirs, to_datetime
+import datetime as dtm
+
+import da.tools.io4 as io
+
+identifier = 'CarbonTracker Statevector '
+version = '0.0'
+
+################### Begin Class CO2StateVector ###################
+
+class CO2StateVector(StateVector):
+
+    def __init__(self, dacycle=None):
+        if dacycle != None:
+            self.dacycle = dacycle
+        else:
+            self.dacycle = {}
+    
+    def setup(self, dacycle):
+        """
+        setup the object by specifying the dimensions. 
+        There are two major requirements for each statvector that you want to build:
+        
+            (1) is that the statevector can map itself onto a regular grid
+            (2) is that the statevector can map itself (mean+covariance) onto TransCom regions
+
+        An example is given below.
+        """
+
+        self.dacycle = dacycle
+        self.nlag = int(self.dacycle['time.nlag'])
+        self.nmembers = int(self.dacycle['da.optimizer.nmembers'])
+        self.nparams = int(self.dacycle.dasystem['nparameters'])
+        self.obsdir = self.dacycle.dasystem['datadir']
+        self.pparam = self.dacycle.dasystem['emis.pparam']
+        self.covm = self.dacycle.dasystem['ff.covariance']
+        self.prop = int(self.dacycle.dasystem['run.propscheme'])
+        self.nobs = 0
+        
+        self.obs_to_assimilate = ()  # empty containter to hold observations to assimilate later on
+
+        # These list objects hold the data for each time step of lag in the system. Note that the ensembles for each time step consist 
+        # of lists of EnsembleMember objects, we define member 0 as the mean of the distribution and n=1,...,nmembers as the spread.
+
+        self.ensemble_members = range(self.nlag)
+
+        for n in range(self.nlag):
+            self.ensemble_members[n] = []
+
+
+        # This specifies the file to read with the gridded mask at 1x1 degrees. Each gridbox holds a number that specifies the parametermember
+        #  that maps onto it. From this map, a dictionary is created that allows a reverse look-up so that we can map parameters to a grid.
+
+        ## Initialise an array with an element for each parameter to optimise
+        self.gridmap = np.arange(1,self.nparams+1,1)
+
+        # Create a dictionary for state <-> gridded map conversions
+
+        nparams = self.gridmap.max()
+        self.griddict = {}
+        for r in range(1, int(nparams) + 1):
+            sel = (self.gridmap.flat == r).nonzero()
+            if len(sel[0]) > 0: 
+                self.griddict[r] = sel
+
+        logging.debug("A dictionary to map grids to states and vice versa was created")
+
+        # Create a mask for species/unknowns
+
+        self.make_species_mask()
+        
+    def get_covariance(self, date, dacycle):
+        
+        file=os.path.join(self.obsdir,self.covm)
+        f = io.ct_read(file, 'read')
+        covmatrix = f.get_variable('covariances')[:self.nparams,:self.nparams]
+        f.close()
+        
+        return covmatrix
+    
+    def write_members_to_file(self, lag, outdir,endswith='.nc'):
+        """ 
+           :param: lag: Which lag step of the filter to write, must lie in range [1,...,nlag]
+           :param: outdir: Directory where to write files
+           :param: endswith: Optional label to add to the filename, default is simply .nc
+           :rtype: None
+
+           Write ensemble member information to a NetCDF file for later use. The standard output filename is 
+           *parameters.DDD.nc* where *DDD* is the number of the ensemble member. Standard output file location 
+           is the `dir.input` of the dacycle object. In principle the output file will have only two datasets inside 
+           called `parametervalues` which is of dimensions `nparameters` and `parametermap` which is of dimensions (180,360). 
+           This dataset can be read and used by a :class:`~da.baseclasses.observationoperator.ObservationOperator` object. 
+
+           .. note:: if more, or other information is needed to complete the sampling of the ObservationOperator you
+                     can simply inherit from the StateVector baseclass and overwrite this write_members_to_file function.
+
+        """
+
+        # These import statements caused a crash in netCDF4 on MacOSX. No problems on Jet though. Solution was
+        # to do the import already at the start of the module, not just in this method.
+           
+        #import da.tools.io as io
+        #import da.tools.io4 as io
+
+        members = self.ensemble_members[lag]
+
+        for mem in members:
+            filename = os.path.join(outdir, 'parameters.%03d%s' % (mem.membernumber, endswith))
+            ncf = io.CT_CDF(filename, method='create')
+            dimparams = ncf.add_params_dim(self.nparams)
+
+            data = mem.param_values
+
+            savedict = io.std_savedict.copy()
+            savedict['name'] = "parametervalues"
+            savedict['long_name'] = "parameter_values_for_member_%d" % mem.membernumber
+            savedict['units'] = "unitless"
+            savedict['dims'] = dimparams 
+            savedict['values'] = data
+            savedict['comment'] = 'These are parameter values to use for member %d' % mem.membernumber
+            ncf.add_data(savedict)
+
+            ncf.close()
+
+            logging.debug('Successfully wrote data from ensemble member %d to file (%s) ' % (mem.membernumber, filename))
+    
+    def make_new_ensemble(self, dacycle, lag, covariancematrix=None):
+        """ 
+        :param lag: an integer indicating the time step in the lag order
+        :param covariancematrix: a matrix to draw random values from
+        :rtype: None
+    
+        Make a new ensemble, the attribute lag refers to the position in the state vector. 
+        Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
+        The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
+
+        The optional covariance object to be passed holds a matrix of dimensions [nparams, nparams] which is
+        used to draw ensemblemembers from. If this argument is not passed it will ne substituted with an 
+        identity matrix of the same dimensions.
+
+        """    
+        self.seed = int(dacycle.dasystem['random.seed'])
+        if self.seed != 0:
+            np.random.seed(self.seed)
+            sds = np.random.randint(1,10000,20)
+        else:
+            sds = np.random.randint(1,10000,20)
+        sid = (dacycle['time.start'] - dacycle['time.fxstart']).days
+        
+        np.random.seed(sds[sid])
+        enssds = np.random.randint(1,10000,self.nmembers)
+        
+        #option 1: start each cycle with the same prior values (makes several independent estimates)
+        if self.prop == 1 or dacycle['time.restart']==False:
+            file=os.path.join(self.obsdir,self.pparam)
+            f = io.ct_read(file, 'read')
+            prmval = f.get_variable('prior_values')[:self.nparams]
+            f.close()
+        #option 2: propagate optimized parameter values, but not the covariance matrix
+        elif self.prop == 2:
+            selectdate = dacycle['time.start']-dtm.timedelta(1)
+            dt=selectdate.strftime('%Y%m%d')
+            file=os.path.join(dacycle['dir.da_run'], 'output', selectdate.strftime('%Y%m%d'),'optimizer.%s.nc' % selectdate.strftime('%Y%m%d'))
+            f = io.ct_read(file, 'read')
+            prmval = f.get_variable('statevectormean_optimized')[:]
+            f.close()
+        elif self.prop == 3:
+        #option 3: start each cycle with the parameter values and uncertainties of the previous cycle (optimized)
+            selectdate = dacycle['time.start']-dtm.timedelta(1)
+            dt=selectdate.strftime('%Y%m%d')
+            file=os.path.join(dacycle['dir.da_run'], 'output', selectdate.strftime('%Y%m%d'),'optimizer.%s.nc' % selectdate.strftime('%Y%m%d'))
+            f = io.ct_read(file, 'read')
+            prmval = f.get_variable('statevectormean_optimized')[:]
+            devs = f.get_variable('statevectordeviations_optimized')[:]
+            f.close()
+            covariancematrix = (np.dot(devs,devs.T)/(devs.shape[1]-1))
+        
+        # Check dimensions of covariance matrix list, must add up to nparams
+
+        dims = covariancematrix.shape[0]
+        if dims != self.nparams:
+            logging.error("The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..." % (dims, self.nparams))
+            raise ValueError
+
+        # Make a cholesky decomposition of the covariance matrix
+
+        try:
+            _, s, _ = np.linalg.svd(covariancematrix)
+        except:
+            s = np.linalg.svd(covariancematrix, full_matrices=1, compute_uv=0) #Cartesius fix
+        dof = np.sum(s) ** 2 / sum(s ** 2)
+        C = np.linalg.cholesky(covariancematrix)
+
+        logging.debug('Cholesky decomposition has succeeded ')
+        logging.info('Appr. degrees of freedom in covariance matrix is %s' % (int(dof)))
+
+        # Create mean values 
+
+        newmean = np.ones(self.nparams, float) * prmval # standard value for a new time step is 1.0
+
+        # If this is not the start of the filter, average previous two optimized steps into the mix
+
+        if lag == self.nlag - 1 and self.nlag >= 3:
+            newmean += self.ensemble_members[lag - 1][0].param_values + \
+                                           self.ensemble_members[lag - 2][0].param_values 
+            newmean = newmean / 3.0
+
+        # Create the first ensemble member with a deviation of 0.0 and add to list
+
+        newmember = EnsembleMember(0)
+        newmember.param_values = newmean.flatten()  # no deviations
+        self.ensemble_members[lag].append(newmember)
+
+        # Create members 1:nmembers and add to ensemble_members list
+
+        for member in range(1, self.nmembers):
+            np.random.seed(enssds[member])
+            rands = np.random.randn(self.nparams)
+            # logging.debug('rands are %f, %f, %f, %f, %f'%(rands[0],rands[1],rands[2],rands[3],rands[4]))
+
+            newmember = EnsembleMember(member)
+            newmember.param_values = np.dot(C, rands) + newmean
+            self.ensemble_members[lag].append(newmember)
+
+        logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, (lag + 1)))    
+    
+
+################### End Class OPSStateVector ###################
+
+if __name__ == "__main__":
+    pass
+
--- a/da/ffdas/stilt-ops_urbanall.rc
+++ b/da/ffdas/stilt-ops_urbanall.rc
+!!! Info for the CarbonTracker data assimilation system
+
+datadir         : /Storage/CO2/super004/STILT_model/Data
+
+! list of all observation sites
+obs.input.id   : obsfiles.csv
+! number of observation sites included; number of species included and to be used in inversion
+obs.input.nr   : 7
+obs.spec.nr    : 4
+! number of emission categories defined in the emission model
+obs.cat.nr     : 14
+! For Rdam obs
+obs.sites.rc        : ${datadir}/sites_weights.rc
+
+! number of parameters
+nparameters     : 44
+! set fixed seed for random number generator, or use 0 if you want to use any random seed
+random.seed     : 4385
+!file with prior estimate of scaling factors (statevector) and covariances
+emis.pparam     : param_values.nc
+ff.covariance   : covariances.nc
+!file with emission model parameter values
+emis.paramfile  : emis_parameters.csv
+
+! switch (1=on/0=off) and input data for background CO2 and CO concentrations
+obs.bgswitch    : 1
+obs.background  : ${datadir}/background.nc
+
+! input data for emission model
+emis.input.spatial : spatial_data.nc
+emis.input.tempobs : temporal_data.nc
+emis.input.tempprior : temporal_data.nc
+
+! overwrite existing prior/ensemble emission files + pseudo-data (0: keep existing files; 1: create new files)
+run.emisflag            : 0
+run.emisflagens         : 1
+run.obsflag             : 0
+
+! back trajectory time of STILT footprints, also applied to OPS (in hours)
+run.backtime            : 6
+
+! choose propagation scheme:
+! 1: no propagation, start each cycle with the same prior parameter values and covariance matrix
+! 2: propagation of optimized parameter values, but not of the covariance matrix
+! 3: propagation of both optimized parameter values and covariance matrix
+run.propscheme          : 3