optimizer.py 19.1 KB
Newer Older
1
2
3
4
#!/usr/bin/env python
# optimizer.py

"""
Peters, Wouter's avatar
Peters, Wouter committed
5
6
.. module:: optimizer
.. moduleauthor:: Wouter Peters 
7
8
9
10
11
12
13
14
15
16
17
18

Revision History:
File created on 28 Jul 2010.

"""

import os
import sys
import logging
import datetime

identifier = 'Optimizer baseclass'
karolina's avatar
karolina committed
19
version = '0.0'
20
21
22
23
24
25
26
27
28
29
30
31
32

################### Begin Class Optimizer ###################

class Optimizer(object):
    """
        This creates an instance of an optimization object. It handles the minimum least squares optimization
        of the state vector given a set of sample objects. Two routines will be implemented: one where the optimization
        is sequential and one where it is the equivalent matrix solution. The choice can be made based on considerations of speed
        and efficiency.
    """

    def __init__(self):
        self.Identifier = self.getid()
karolina's avatar
karolina committed
33
        self.Version = self.getversion()
34

karolina's avatar
karolina committed
35
        logging.info('Optimizer object initialized: %s' % self.Identifier)
36
37
38
39
40
41
42
43

    def getid(self):
        return identifier

    def getversion(self):
        return version

    def Initialize(self, dims):
karolina's avatar
karolina committed
44
45
46
47
        self.nlag = dims[0]
        self.nmembers = dims[1]
        self.nparams = dims[2]
        self.nobs = dims[3]
48
49
50
51
52
53
54
55
56
        self.CreateMatrices()

        return None

    def CreateMatrices(self):
        """ Create Matrix space needed in optimization routine """
        import numpy as np

        # mean state  [X]
karolina's avatar
karolina committed
57
        self.x = np.zeros((self.nlag * self.nparams,), float)
58
        # deviations from mean state  [X']
karolina's avatar
karolina committed
59
        self.X_prime = np.zeros((self.nlag * self.nparams, self.nmembers,), float)
60
        # mean state, transported to observation space [ H(X) ]
karolina's avatar
karolina committed
61
        self.Hx = np.zeros((self.nobs,), float)
62
        # deviations from mean state, transported to observation space [ H(X') ]
karolina's avatar
karolina committed
63
        self.HX_prime = np.zeros((self.nobs, self.nmembers), float)
64
        # observations
karolina's avatar
karolina committed
65
        self.obs = np.zeros((self.nobs,), float)
66
        # observation ids
karolina's avatar
karolina committed
67
        self.obs_ids = np.zeros((self.nobs,), float)
68
        # covariance of observations
karolina's avatar
karolina committed
69
        self.R = np.zeros((self.nobs, self.nobs,), float)
70
        # localization of obs
karolina's avatar
karolina committed
71
        self.may_localize = np.zeros(self.nobs, bool)
72
        # rejection of obs
karolina's avatar
karolina committed
73
        self.may_reject = np.zeros(self.nobs, bool)
74
        # flags of obs
karolina's avatar
karolina committed
75
        self.flags = np.zeros(self.nobs, int)
76
        # species type
karolina's avatar
karolina committed
77
        self.species = np.zeros(self.nobs, str)
78
        # species type
karolina's avatar
karolina committed
79
        self.sitecode = np.zeros(self.nobs, str)
80
81

        # species mask
karolina's avatar
karolina committed
82
        self.speciesmask = {}
83

84
        # Total covariance of fluxes and obs in units of obs [H P H^t + R]
karolina's avatar
karolina committed
85
        self.HPHR = np.zeros((self.nobs, self.nobs,), float)
86
        # Kalman Gain matrix
karolina's avatar
karolina committed
87
88
89
90
91
92
93
94
95
96
97
98
99
        self.KG = np.zeros((self.nlag * self.nparams, self.nobs,), float)

    def StateToMatrix(self, StateVector):
        allsites = []      # collect all obs for n=1,..,nlag
        allobs = []      # collect all obs for n=1,..,nlag
        allmdm = []      # collect all mdm for n=1,..,nlag
        allsamples = []  # collect all model samples for n=1,..,nlag
        allids = []  # collect all model samples for n=1,..,nlag
        allreject = []  # collect all model samples for n=1,..,nlag
        alllocalize = []  # collect all model samples for n=1,..,nlag
        allflags = []  # collect all model samples for n=1,..,nlag
        allspecies = []  # collect all model samples for n=1,..,nlag
        allsimulated = None  # collect all members model samples for n=1,..,nlag
100

101
        for n in range(self.nlag):
karolina's avatar
karolina committed
102
103
104
105
            Samples = StateVector.ObsToAssimmilate[n]
            members = StateVector.EnsembleMembers[n]
            self.x[n * self.nparams:(n + 1) * self.nparams] = members[0].ParameterValues
            self.X_prime[n * self.nparams:(n + 1) * self.nparams, :] = np.transpose(np.array([m.ParameterValues for m in members]))
106

107
            if Samples != None:
karolina's avatar
karolina committed
108
                self.rejection_threshold = Samples.rejection_threshold
Peters, Wouter's avatar
Peters, Wouter committed
109

karolina's avatar
karolina committed
110
111
112
113
114
115
116
117
                allreject.extend(Samples.Data.getvalues('may_reject'))
                alllocalize.extend(Samples.Data.getvalues('may_localize'))
                allflags.extend(Samples.Data.getvalues('flag'))
                allspecies.extend(Samples.Data.getvalues('species'))
                allobs.extend(Samples.Data.getvalues('obs'))
                allsites.extend(Samples.Data.getvalues('code'))
                allmdm.extend(Samples.Data.getvalues('mdm'))
                allids.extend(Samples.Data.getvalues('id'))
118

karolina's avatar
karolina committed
119
                simulatedensemble = Samples.Data.getvalues('simulated')
Peters, Wouter's avatar
Peters, Wouter committed
120

121
122
                if allsimulated == None :
                    allsimulated = np.array(simulatedensemble)
Peters, Wouter's avatar
Peters, Wouter committed
123
                else:
karolina's avatar
karolina committed
124
                    allsimulated = np.concatenate((allsimulated, np.array(simulatedensemble)), axis=0)
125

karolina's avatar
karolina committed
126
127
128
129
        self.obs[:] = np.array(allobs)
        self.obs_ids[:] = np.array(allids)
        self.HX_prime[:, :] = np.array(allsimulated)
        self.Hx[:] = self.HX_prime[:, 0]
130

karolina's avatar
karolina committed
131
132
133
134
135
        self.may_reject[:] = np.array(allreject)
        self.may_localize[:] = np.array(alllocalize)
        self.flags[:] = np.array(allflags)
        self.species[:] = np.array(allspecies)
        self.sitecode = allsites
136

karolina's avatar
karolina committed
137
138
        self.X_prime = self.X_prime - self.x[:, np.newaxis] # make into a deviation matrix
        self.HX_prime = self.HX_prime - self.Hx[:, np.newaxis] # make a deviation matrix
139

karolina's avatar
karolina committed
140
141
        for i, mdm in enumerate(allmdm):
            self.R[i, i] = mdm ** 2
142

karolina's avatar
karolina committed
143
    def MatrixToState(self, StateVector):
144
        for n in range(self.nlag):
karolina's avatar
karolina committed
145
146
147
            members = StateVector.EnsembleMembers[n]
            for m, mem in enumerate(members):
                members[m].ParameterValues[:] = self.X_prime[n * self.nparams:(n + 1) * self.nparams, m] + self.x[n * self.nparams:(n + 1) * self.nparams]     
148

149
        StateVector.isOptimized = True
karolina's avatar
karolina committed
150
        logging.debug('Returning optimized data to the StateVector, setting "StateVector.isOptimized = True" ')
151

karolina's avatar
karolina committed
152
    def WriteDiagnostics(self, DaCycle, StateVector, type='prior'):
153
154
155
156
157
158
159
160
161
162
        """
            Open a NetCDF file and write diagnostic output from optimization process:

                - calculated residuals
                - model-data mismatches
                - HPH^T
                - prior ensemble of samples
                - posterior ensemble of samples
                - prior ensemble of fluxes
                - posterior ensemble of fluxes
163
164

            The type designation refers to the writing of prior or posterior data and is used in naming the variables"
165
        """
166
167
        import da.tools.io4 as io
        #import da.tools.io as io
168

karolina's avatar
karolina committed
169
170
171
        outdir = DaCycle['dir.diagnostics']
        filename = os.path.join(outdir, 'optimizer.%s.nc' % DaCycle['time.start'].strftime('%Y%m%d'))
        DaCycle.OutputFileList += (filename,)
172

173
174
175
        # Open or create file

        if type == 'prior':
karolina's avatar
karolina committed
176
177
            f = io.CT_CDF(filename, method='create')
            logging.debug('Creating new diagnostics file for optimizer (%s)' % filename)
178
        elif type == 'optimized':
karolina's avatar
karolina committed
179
180
            f = io.CT_CDF(filename, method='write')
            logging.debug('Opening existing diagnostics file for optimizer (%s)' % filename)
181
182

        # Add dimensions 
183

karolina's avatar
karolina committed
184
185
186
187
188
189
        dimparams = f.AddParamsDim(self.nparams)
        dimmembers = f.AddMembersDim(self.nmembers)
        dimlag = f.AddLagDim(self.nlag, unlimited=False)
        dimobs = f.AddObsDim(self.nobs)
        dimstate = f.AddDim('nstate', self.nparams * self.nlag)
        dim200char = f.AddDim('string_of200chars', 200)
190

191
192
        # Add data, first the ones that are written both before and after the optimization

karolina's avatar
karolina committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        data = self.x

        savedict = io.std_savedict.copy() 
        savedict['name'] = "statevectormean_%s" % type
        savedict['long_name'] = "full_statevector_mean_%s" % type
        savedict['units'] = "unitless"
        savedict['dims'] = dimstate
        savedict['values'] = data.tolist()
        savedict['comment'] = 'Full %s state vector mean ' % type
        f.AddData(savedict)

        data = self.X_prime

        savedict = io.std_savedict.copy()
        savedict['name'] = "statevectordeviations_%s" % type
        savedict['long_name'] = "full_statevector_deviations_%s" % type
        savedict['units'] = "unitless"
        savedict['dims'] = dimstate + dimmembers
        savedict['values'] = data.tolist()
        savedict['comment'] = 'Full state vector %s deviations as resulting from the optimizer' % type
        f.AddData(savedict)

        data = self.Hx

        savedict = io.std_savedict.copy()
        savedict['name'] = "modelsamplesmean_%s" % type
        savedict['long_name'] = "modelsamplesforecastmean_%s" % type
        savedict['units'] = "mol mol-1"
        savedict['dims'] = dimobs
        savedict['values'] = data.tolist()
        savedict['comment'] = '%s mean mixing ratios based on %s state vector' % (type, type,)
        f.AddData(savedict)

        data = self.HX_prime

        savedict = io.std_savedict.copy()
        savedict['name'] = "modelsamplesdeviations_%s" % type
        savedict['long_name'] = "modelsamplesforecastdeviations_%s" % type
        savedict['units'] = "mol mol-1"
        savedict['dims'] = dimobs + dimmembers
        savedict['values'] = data.tolist()
        savedict['comment'] = '%s mixing ratio deviations based on %s state vector' % (type, type,)
        f.AddData(savedict)
236

237
238
239
        # Continue with prior only data

        if type == 'prior':
240
241
            data = self.sitecode

karolina's avatar
karolina committed
242
243
244
245
246
247
            savedict = io.std_savedict.copy()
            savedict['name'] = "sitecode"
            savedict['long_name'] = "site code propagated from observation file"
            savedict['dtype'] = "char"
            savedict['dims'] = dimobs + dim200char
            savedict['values'] = data
248
            savedict['missing_value'] = '!'
karolina's avatar
karolina committed
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
            f.AddData(savedict)

            data = self.obs

            savedict = io.std_savedict.copy()
            savedict['name'] = "observed"
            savedict['long_name'] = "observedvalues"
            savedict['units'] = "mol mol-1"
            savedict['dims'] = dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Observations used in optimization'
            f.AddData(savedict)

            data = self.obs_ids

            savedict = io.std_savedict.copy()
            savedict['name'] = "obspack_num"
            savedict['dtype'] = "int64"
            savedict['long_name'] = "Unique_ObsPack_observation_number"
            savedict['units'] = ""
            savedict['dims'] = dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Unique observation number across the entire ObsPack distribution'
            f.AddData(savedict)

            data = self.R

            savedict = io.std_savedict.copy()
            savedict['name'] = "modeldatamismatchvariance"
            savedict['long_name'] = "modeldatamismatch variance"
            savedict['units'] = "[mol mol-1]^2"
            savedict['dims'] = dimobs + dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Variance of mole fractions resulting from model-data mismatch'
            f.AddData(savedict)
284
285
286
287
288

        # Continue with posterior only data

        elif type == 'optimized':

karolina's avatar
karolina committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
            data = self.HPHR

            savedict = io.std_savedict.copy()
            savedict['name'] = "totalmolefractionvariance"
            savedict['long_name'] = "totalmolefractionvariance"
            savedict['units'] = "[mol mol-1]^2"
            savedict['dims'] = dimobs + dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Variance of mole fractions resulting from prior state and model-data mismatch'
            f.AddData(savedict)

            data = self.flags

            savedict = io.std_savedict.copy()
            savedict['name'] = "flag"
            savedict['long_name'] = "flag_for_obs_model"
            savedict['units'] = "None"
            savedict['dims'] = dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Flag (0/1/2/99) for observation value, 0 means okay, 1 means QC error, 2 means rejected, 99 means not sampled'
            f.AddData(savedict)

            data = self.KG

            savedict = io.std_savedict.copy()
            savedict['name'] = "kalmangainmatrix"
            savedict['long_name'] = "kalmangainmatrix"
            savedict['units'] = "unitless molefraction-1"
            savedict['dims'] = dimstate + dimobs
            savedict['values'] = data.tolist()
            savedict['comment'] = 'Kalman gain matrix of all obs and state vector elements'
320
            #dummy                   = f.AddData(savedict)
321

karolina's avatar
karolina committed
322
323
        f.close()
        logging.debug('Diagnostics file closed')
324
325
326

        return None

327
328
329
    def SerialMinimumLeastSquares(self):
        """ Make minimum least squares solution by looping over obs"""
        import numpy as np
330
        import numpy.linalg as la
331

karolina's avatar
karolina committed
332
        tvalue = 1.97591
333

334
335
        for n in range(self.nobs):

336
337
338
            # Screen for flagged observations (for instance site not found, or no sample written from model)

            if self.flags[n] != 0:
karolina's avatar
karolina committed
339
                logging.debug('Skipping observation (%s,%s) because of flag value %d' % (self.sitecode[n], self.obs_ids[n], self.flags[n]))
340
341
                continue

342
343
344

            # Screen for outliers greather than 3x model-data mismatch, only apply if obs may be rejected

karolina's avatar
karolina committed
345
            res = self.obs[n] - self.Hx[n]
346
347

            if self.may_reject[n]:
karolina's avatar
karolina committed
348
                threshold = self.rejection_threshold * np.sqrt(self.R[n, n])
349
                if np.abs(res) > threshold:
karolina's avatar
karolina committed
350
                    logging.debug('Rejecting observation (%s,%s) because residual (%f) exceeds threshold (%f)' % (self.sitecode[n], self.obs_ids[n], res, threshold))
351
                    self.flags[n] = 2
352
                    continue
353

karolina's avatar
karolina committed
354
355
            PHt = 1. / (self.nmembers - 1) * np.dot(self.X_prime, self.HX_prime[n, :])
            self.HPHR[n, n] = 1. / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[n, :]).sum() + self.R[n, n]
356

karolina's avatar
karolina committed
357
            self.KG[:, n] = PHt / self.HPHR[n, n]
358

359
            if self.may_localize[n]:
karolina's avatar
karolina committed
360
361
                self.Localize(n)
                logging.debug('Localized observation %s' % self.obs_ids[n])
362
            else:
karolina's avatar
karolina committed
363
                logging.debug('Not allowed to Localize observation %s' % self.obs_ids[n])
364

karolina's avatar
karolina committed
365
            alpha = np.double(1.0) / (np.double(1.0) + np.sqrt((self.R[n, n]) / self.HPHR[n, n]))
366

karolina's avatar
karolina committed
367
            self.x[:] = self.x + self.KG[:, n] * res
368
369

            for r in range(self.nmembers):
karolina's avatar
karolina committed
370
                self.X_prime[:, r] = self.X_prime[:, r] - alpha * self.KG[:, n] * (self.HX_prime[n, r])
371
372
373
374

#WP !!!! Very important to first do all obervations from n=1 through the end, and only then update 1,...,n. The current observation
#WP      should always be updated last because it features in the loop of the adjustments !!!!

karolina's avatar
karolina committed
375
376
377
378
379
            for m in range(n + 1, self.nobs):
                res = self.obs[n] - self.Hx[n]
                fac = 1.0 / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[m, :]).sum() / self.HPHR[n, n]
                self.Hx[m] = self.Hx[m] + fac * res
                self.HX_prime[m, :] = self.HX_prime[m, :] - alpha * fac * self.HX_prime[n, :]
380

karolina's avatar
karolina committed
381
382
383
384
385
            for m in range(1, n + 1):
                res = self.obs[n] - self.Hx[n]
                fac = 1.0 / (self.nmembers - 1) * (self.HX_prime[n, :] * self.HX_prime[m, :]).sum() / self.HPHR[n, n]
                self.Hx[m] = self.Hx[m] + fac * res
                self.HX_prime[m, :] = self.HX_prime[m, :] - alpha * fac * self.HX_prime[n, :]
386
387
388
389
390
391
392
393
394

            
    def BulkMinimumLeastSquares(self):
        """ Make minimum least squares solution by solving matrix equations"""
        import numpy as np
        import numpy.linalg as la

        # Create full solution, first calculate the mean of the posterior analysis

karolina's avatar
karolina committed
395
396
397
398
        HPH = np.dot(self.HX_prime, np.transpose(self.HX_prime)) / (self.nmembers - 1)   # HPH = 1/N * HX' * (HX')^T
        self.HPHR[:, :] = HPH + self.R                                                            # HPHR = HPH + R
        HPb = np.dot(self.X_prime, np.transpose(self.HX_prime)) / (self.nmembers - 1)    # HP = 1/N X' * (HX')^T
        self.KG[:, :] = np.dot(HPb, la.inv(self.HPHR))                                         # K = HP/(HPH+R)
399
400

        for n in range(self.nobs):
karolina's avatar
karolina committed
401
            self.Localize(n)
402

karolina's avatar
karolina committed
403
        self.x[:] = self.x + np.dot(self.KG, self.obs - self.Hx)                             # xa = xp + K (y-Hx)
404
405
406
407
408

        # And next make the updated ensemble deviations. Note that we calculate P by using the full equation (10) at once, and 
        # not in a serial update fashion as described in Whitaker and Hamill. 
        # For the current problem with limited N_obs this is easier, or at least more straightforward to do.

karolina's avatar
karolina committed
409
410
411
412
413
414
        I = np.identity(self.nlag * self.nparams)
        sHPHR = la.cholesky(self.HPHR)                                  # square root of HPH+R
        part1 = np.dot(HPb, np.transpose(la.inv(sHPHR)))                 # HP(sqrt(HPH+R))^-1
        part2 = la.inv(sHPHR + np.sqrt(self.R))                           # (sqrt(HPH+R)+sqrt(R))^-1
        Kw = np.dot(part1, part2)                                     # K~
        self.X_prime[:, :] = np.dot(I, self.X_prime) - np.dot(Kw, self.HX_prime)         # HX' = I - K~ * HX'
415

karolina's avatar
karolina committed
416
        P_opt = np.dot(self.X_prime, np.transpose(self.X_prime)) / (self.nmembers - 1)
417
418
419
420

        # Now do the adjustments of the modeled mixing ratios using the linearized ensemble. These are not strictly needed but can be used
        # for diagnosis.

karolina's avatar
karolina committed
421
422
423
424
        part3 = np.dot(HPH, np.transpose(la.inv(sHPHR)))                           # HPH(sqrt(HPH+R))^-1
        Kw = np.dot(part3, part2)                                               # K~
        self.Hx[:] = self.Hx + np.dot(np.dot(HPH, la.inv(self.HPHR)), self.obs - self.Hx)  # Hx  = Hx+ HPH/HPH+R (y-Hx)
        self.HX_prime[:, :] = self.HX_prime - np.dot(Kw, self.HX_prime)                            # HX' = HX'- K~ * HX'
425

karolina's avatar
karolina committed
426
        logging.info('Minimum Least Squares solution was calculated, returning')
427
428
429
430
431

    def SetLocalization(self):
        """ determine which localization to use """
        self.localization = True
        self.localizetype = "None"
karolina's avatar
karolina committed
432
        logging.info("Current localization option is set to %s" % self.localizetype)
433

karolina's avatar
karolina committed
434
    def Localize(self, n):
435
        """ localize the Kalman Gain matrix """
karolina's avatar
karolina committed
436
        pass
437
438
439
440
441
442
443
444
445

################### End Class Optimizer ###################



if __name__ == "__main__":

    sys.path.append('../../')

karolina's avatar
karolina committed
446
    from da.tools.initexit import StartLogger 
447
448
449
450
451
452
    from da.tools.initexit import CycleControl 
    from da.ct.statevector import CtStateVector, PrepareState
    from da.ct.obs import CtObservations
    import numpy as np

    opts = ['-v']
karolina's avatar
karolina committed
453
    args = {'rc':'../../da.rc', 'logfile':'da_initexit.log', 'jobrcfilename':'test.rc'}
454
455

    StartLogger()
karolina's avatar
karolina committed
456
    DaCycle = CycleControl(opts, args)
457
458
459
460
461
462

    DaCycle.Initialize()
    print DaCycle

    StateVector = PrepareState(DaCycle)

karolina's avatar
karolina committed
463
464
465
    samples = CtObservations(DaCycle.DaSystem, datetime.datetime(2005, 3, 5))
    samples.AddObs()
    samples.AddSimulations('/Users/peters/tmp/test_da/output/20050305/samples.000.nc')
466
467


karolina's avatar
karolina committed
468
469
    nobs = len(samples.Data)
    dims = (int(DaCycle['time.nlag']),
470
                  int(DaCycle['da.optimizer.nmembers']),
471
                  int(DaCycle.DaSystem['nparameters']),
karolina's avatar
karolina committed
472
                  nobs,)
473
474
475
476
477

    opt = CtOptimizer(dims)
    opt.StateToMatrix(StateVector)
    opt.MinimumLeastSquares()
    opt.MatrixToState(StateVector)