optimizer.py 12.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python
# optimizer.py

"""
.. module:: optimizer
.. moduleauthor:: Wouter Peters 

Revision History:
File created on 28 Jul 2010.

"""

import os
import logging
import numpy as np
import numpy.linalg as la
import da.tools.io4 as io

identifier = 'Optimizer CO2'
version = '0.0'

from da.baseclasses.optimizer import Optimizer

################### Begin Class CO2Optimizer ###################

class CO2Optimizer(Optimizer):
    """
        This creates an instance of an optimization object. It handles the minimum least squares optimization
        of the state vector given a set of sample objects. Two routines will be implemented: one where the optimization
        is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
        and efficiency.
    """
    
    def setup(self, dims):
        self.nlag = dims[0]
        self.nmembers = dims[1]
        self.nparams = dims[2]
        self.nobs = dims[3]
        self.nrloc = dims[4]
        self.nrspc = dims[5]
        self.inputdir = dims[6]
        #self.specfile = dims[7]
        self.create_matrices()

    def set_localization(self, loctype='None'):
        """ determine which localization to use """

        if loctype == 'CT2007':
            self.localization = True
            self.localizetype = 'CT2007'
            #T-test values for two-tailed student's T-test using 95% confidence interval for some options of nmembers
            if self.nmembers == 50:
                self.tvalue = 2.0086
            elif self.nmembers == 100:
                self.tvalue = 1.9840
            elif self.nmembers == 150:
                self.tvalue = 1.97591
            elif self.nmembers == 200:
                self.tvalue = 1.9719    
            else: self.tvalue = 0 
        elif loctype == 'multitracer':
            self.localization = True
            self.localizetype = 'multitracer'
        elif loctype == 'multitracer2':
            self.localization = True
            self.localizetype = 'multitracer2'
        else:
            self.localization = False
            self.localizetype = 'None'
    
        logging.info("Current localization option is set to %s" % self.localizetype)

    def localize(self, n):
        """ localize the Kalman Gain matrix """
        import numpy as np

        if not self.localization: 
            logging.debug('Not localized observation %i' % self.obs_ids[n])
            return 
        if self.localizetype == 'CT2007':
            count_localized = 0
            for r in range(self.nlag * self.nparams):
                corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
                prob = corr / np.sqrt((1.000000001 - corr ** 2) / (self.nmembers - 2))
                if abs(prob) < self.tvalue:
                    self.KG[r] = 0.0
                    count_localized = count_localized + 1
            logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
        if self.localizetype == 'multitracer':
            count_localized = 0
            ###read emission ratios for source type related to each parameter
            infile = os.path.join(self.inputdir, self.specfile)
            f = open(infile, 'r')
            lines = f.readlines()
            f.close()
            speclist = []
            speclist.append('CO2')
            emr = []
            for line in lines:
                dum=line.split(",")
                if dum[0]=='#' or dum[0]=='CO2' or dum[0]=='SNAP':
                    continue
                else:
                    sp = dum[0]
                    emrs = []
                    for k in range(self.nparams):
                        emrs.append(float(dum[k+1]))
                    speclist.append(sp)
                    emr.append(emrs)
            emr=np.array(emr)
            
            ###find obs and model value for this time step and species; calculate differences and ratios
            lnd = self.nobs/(self.nrspc*self.nrloc)
            for i in range(self.nrspc):
                if self.species[n] == speclist[i]:
                    idx = [0-i,1-i,2-i,3-i]
                    
            Xobs = []
            Xmod = []
            Robs = []
            Rmod = []
            for i in range(self.nrspc):
                Xobs.append(self.obs[n+lnd*idx[i]])
                Xmod.append(self.Hx[n+lnd*idx[i]])
                if i>0:
                    Robs.append(Xobs[i]/Xobs[0])
                    Rmod.append(Xmod[i]/Xmod[0])
            Xobs = np.array(Xobs)
            Xmod = np.array(Xmod)
            Robs = np.array(Robs)
            Rmod = np.array(Rmod)
            dX = Xmod - Xobs
            dR = abs(Rmod - Robs)
            flg = [] 
            if Xobs[0]>1.: #and self.species[n] == 'CO2'
                flg=0
                for i in range(self.nrspc):
                    if Xobs[i] == 0 or Xmod[i] == 0.:
                        flg=1
            else:
                flg=1
            
            ###old version
            # for i in range(self.nrspc):
                # if dX[i]>0:
                    # flg.append(1)
                # elif dX[i]<0:
                    # flg.append(0)
                # else:
                    # flg.append(np.nan)
            
            ### This routine determines which source types are likely to cause the model-data mismatch, according to the following principle:
            ### if the modeled CO:CO2 ratio is higher than the observed ratio this means we either overestimate emissions with a high CO:CO2 ratio
            ### or we underestimate emissions with a low CO:CO2 ratio; if the CO concentration is overestimated, it is likely to be the first option
            ### (i.e. too much emissions). We only include information from species if the model-data mismatch in the ratio is >5% of the observed ratio
            dums = []
            dum = []
            tst1 = []
            if flg == 0:
                for i in range(self.nrspc-1):
                    if dX[0]>0:
                        if dX[i+1]>0:
                            if Rmod[i]>Robs[i]:
                                tst1.append(1)
                            elif Rmod[i]<Robs[i]:
                                tst1.append(-1)
                        elif dX[i+1]<0:
                            if Rmod[i]>Robs[i]:
                                tst1.append(0)
                            elif Rmod[i]<Robs[i]:
                                tst1.append(2)
                    elif dX[0]<0:
                        if dX[i+1]>0:
                            if Rmod[i]>Robs[i]:
                                tst1.append(2)
                            elif Rmod[i]<Robs[i]:
                                tst1.append(0)
                        elif dX[i+1]<0:
                            if Rmod[i]>Robs[i]:
                                tst1.append(-1)
                            elif Rmod[i]<Robs[i]:
                                tst1.append(1)         
                if 2 in tst1:
                    dums1=[]
                    dums2=[]
                    for i in range(self.nrspc-1):
                        for k in range(len(emr[i])):
                            if emr[i,k]<Robs[i]:
                                dums1.append(k)
                            if emr[i,k]>Robs[i]:
                                dums2.append(k)
                    for j in range(self.nparams):
                        ct = dums1.count(j)
                        ctc = dums2.count(j)
                        if ct == (self.nrspc - 1) or ctc == (self.nrspc - 1):
                            ### all requirements are met, so do not localize
                            dum.append(j)
                else:
                    for i in range(self.nrspc-1):
                        if tst1[i] == 1:
                            for k in range(len(emr[i])):
                                if emr[i,k]>Robs[i]:
                                    dums.append(k)                    
                        elif tst1[i] == -1:
                            for k in range(len(emr[i])):
                                if emr[i,k]<Robs[i]:
                                    dums.append(k)  
                    for j in range(self.nparams):
                        ct = dums.count(j)
                        if ct == (self.nrspc - 1):
                            ### all requirements are met, so do not localize
                            dum.append(j)
            
            ###old version
            # if sum(flg) == 0 or sum(flg) == 4:
            ### if this is not the case, it is likely a mixture of over- and underestimation, which we can't specify; so no localization applied
                # for i in range(self.nrspc-1):
                    # if dR[i]>0.05*Robs[i]:
                        # if (Rmod[i]>Robs[i] and dX[i+1]>0) or (Rmod[i]<Robs[i] and dX[i+1]<0):
                            # tst1.append(1.)
                        # elif (Rmod[i]<Robs[i] and dX[i+1]>0) or (Rmod[i]>Robs[i] and dX[i+1]<0):
                            # tst1.append(-1.)
                    # else:
                        # tst1.append(0)
                # for i in range(self.nrspc-1):
                    # if tst1[i] == 1:
                        # for k in range(len(emr[i])):
                            # if emr[i,k]>Robs[i]:
                                # dums.append(k)                             
                    # elif tst1[i] == -1:
                        # for k in range(len(emr[i])):
                            # if emr[i,k]<Robs[i]:
                                # dums.append(k)
                # for j in range(self.nparams):
                    # ct = dums.count(j)
                    # if ct == (self.nrspc - 1):
                        ### all requirements are met, so do not localize
                        # dum.append(j)
                                
            if len(dum) > 0:
            ### what to do when we can't attribute model-data mismatch? update all parameters or set them all to zero?? (adapt dum)
                for r in range(self.nlag * self.nparams):
                    if r in dum:
                        continue
                    else:
                        self.KG[r] = 0.0
                        self.test_localize[r] = self.test_localize[r] + 1
                        count_localized = count_localized + 1
                logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
        if self.localizetype == 'multitracer2':
        ### This routine used more strict rules for source attribution by comparing the observed concentration ratios to emission ratios
        ### per source type
        
            count_localized = 0
            
            ###find obs and model value for this time step and species; calculate differences and ratios
            lnd = self.nobs/(self.nrspc*self.nrloc)
            for i in range(self.nrspc):
                if self.species[n] == speclist[i]:
                    idx = [0-i,1-i,2-i,3-i]
                    
            Xobs = []
            Robs = []
            for i in range(self.nrspc):
                Xobs.append(self.obs[n+lnd*idx[i]])
                if i>0:
                    Robs.append(Xobs[i]/Xobs[0])
            
            dum=[]
            if Robs[2]>0.1:
                if Robs[1]<1. and Robs[0]<1.:
                    dum.append(4)
                elif Robs[2]>2.5 and Robs[1]<1.>8. and Robs[0]>3.:
                    dum.append(8)
            elif Robs[0]>1. and Robs[2]<0.1:
                if Robs[0]<4. and Robs[1]<1.<1.:
                    dum.append(2)
                    dum.append(3)
                elif Robs[0]>7. and Robs[1]<1.>1.5:
                    dum.append(5)
                    dum.append(6)
                    dum.append(7)
                elif Robs[0]>3.5 and Robs[1]<1.<2.5:
                    dum.append(2)
                    dum.append(3)
                    dum.append(5)
                    dum.append(6)
                    dum.append(7)
            elif Robs[0]<0.6 and Robs[1]<1.<0.6:
                dum.append(0)
                dum.append(1)
            
            if len(dum) > 0 and len(dum) < self.nparams:
                for r in range(self.nlag * self.nparams):
                    if r in dum:
                        continue
                    else:
                        self.KG[r] = 0.0
                        count_localized = count_localized + 1
                logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))


################### End Class OPSOptimizer ###################



if __name__ == "__main__":
    pass