statevectorNHgridded.py 8.85 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters. 
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu. 

This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation, 
version 3. This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

You should have received a copy of the GNU General Public License along with this 
program. If not, see <http://www.gnu.org/licenses/>."""
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python
# ct_statevector_tools.py

"""
Author : peters 

Revision History:
File created on 28 Jul 2010.

"""

import os
import sys
import logging
import numpy as np

sys.path.append(os.getcwd())
sys.path.append('../../')

import da.tools.io4 as io
from da.baseclasses.statevector import StateVector, EnsembleMember
identifier = 'CarbonTracker Gridded Statevector '
version = '0.0'

################### Begin Class CtStateVector ###################

class CO2GriddedStateVector(StateVector):
    """ This is a StateVector object for CarbonTracker. It has a private method to make new ensemble members """

42
43
44
45
46
47
48
49
50
51
    def __init__(self):
        self.ID = identifier
        self.version = version

        # The following code allows the object to be initialized with a dacycle object already present. Otherwise, it can
        # be added at a later moment.

        logging.info('Statevector object initialized: %s' % self.ID)


52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    def get_covariance(self, date, dacycle):
        """ Make a new ensemble from specified matrices, the attribute lag refers to the position in the state vector. 
            Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
            The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]
        """    

        
        try:
            import matplotlib.pyplot as plt
        except:
            pass

        # Get the needed matrices from the specified covariance files

        file_ocn_cov = dacycle.dasystem['ocn.covariance'] 

        cov_files = os.listdir(dacycle.dasystem['bio.cov.dir'])
        cov_files = [os.path.join(dacycle.dasystem['bio.cov.dir'], f) for f in cov_files if dacycle.dasystem['bio.cov.prefix'] in f]

        logging.debug("Found %d covariances to use for biosphere" % len(cov_files))

        # replace YYYY.MM in the ocean covariance file string

        file_ocn_cov = file_ocn_cov.replace('2000.01', date.strftime('%Y.%m'))

        cov_files.append(file_ocn_cov)

        covariancematrixlist = []
        for file in cov_files:
            if not os.path.exists(file):
                msg = "Cannot find the specified file %s" % file 
                logging.error(msg)
brunner's avatar
brunner committed
84
                raise IOError(msg)
85
86
87
88
89
            else:
                logging.debug("Using covariance file: %s" % file)

            f = io.ct_read(file, 'read')

90
            if 'pco2' in file or 'cov_ocean' in file: 
91
                cov_ocn = f.get_variable('CORMAT')
92
                parnr = range(9805,9835)
93
94
95
                cov = cov_ocn
            elif 'tropic' in file:
                cov = f.get_variable('covariance')
96
                parnr = f.get_variable('parameternumber')
97
98
            else: 
                cov = f.get_variable('covariance')
99
                parnr = f.get_variable('parameternumber')
100
101
102
103
                cov_sf = 90.0 / np.sqrt(cov.diagonal().sum())  # this scaling factor makes the total variance close to the value of a single ecoregion
                cov = cov * cov_sf

            f.close()
104
            covariancematrixlist.append([cov,parnr,file])
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

        logging.debug("Succesfully closed files after retrieving prior covariance matrices")

        # Once we have the matrices, we can start to make the full covariance matrix, and then decompose it

        return covariancematrixlist

    def make_new_ensemble(self, lag, covariancematrixlist=[None]):
        """ 
        :param lag: an integer indicating the time step in the lag order
        :param covariancematrix: a list of matrices specifying the covariance distribution to draw from
        :rtype: None
    
        Make a new ensemble, the attribute lag refers to the position in the state vector. 
        Note that lag=1 means an index of 0 in python, hence the notation lag-1 in the indexing below.
        The argument is thus referring to the lagged state vector as [1,2,3,4,5,..., nlag]

        The covariance list object to be passed holds a list of matrices with a total number of dimensions [nparams, nparams], which is
        used to draw ensemblemembers from. Each draw is done on a matrix from the list, to make the computational burden smaller when
        the StateVector nparams becomes very large.

        """    
        try:
            import matplotlib.pyplot as plt
        except:
            pass

        if not isinstance(covariancematrixlist, list):
            logging.error("The covariance matrix or matrices must be passed as a list of array objects, exiting..." )
            raise ValueError

        # Check dimensions of covariance matrix list, must add up to nparams

        dims = 1  # start from 1.0 to account for the last parameter that scales Ice+Non-optimized, we have no covariance matrix for this though

140
        for matrix,parnr,file in covariancematrixlist: 
141
142
143
144
145
146
147
148
149
150
151
152
            dims += matrix.shape[0]

        if dims != self.nparams:
            logging.error("The total dimension of the covariance matrices passed (%d) does not add up to the prescribed nparams (%d), exiting..." % (dims, self.nparams))
            raise ValueError

        # Loop over list if identity matrices and create a matrix of (nparams,nmembers) with the deviations

        dof = 0.0
        dev_matrix = np.zeros((self.nparams, self.nmembers,), 'float')
        randstate = np.random.get_state()

153
        for matrix,parnr,file in covariancematrixlist:
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
            # Make a cholesky decomposition of the covariance matrix

            _, s, _ = np.linalg.svd(matrix)
            dof += np.sum(s) ** 2 / sum(s ** 2)
            try:
                C = np.linalg.cholesky(matrix)
            except np.linalg.linalg.LinAlgError, err:
                logging.error('Cholesky decomposition has failed ')
                logging.error('For a matrix of dimensions: %d' % matrix.shape[0])
                logging.debug(err)
                raise np.linalg.linalg.LinAlgError


            # Draw nmembers instances of this distribution

            npoints = matrix.shape[0]

171
172
            istart = parnr[0]-1
            istop = parnr[-1]
173
174
175
176
177

            for member in range(1, self.nmembers):
                rands = np.random.randn(npoints)
                deviations = np.dot(C, rands)
                dev_matrix[istart:istop, member - 1] = deviations
178
179
180
181

                dev_matrix[-1, member - 1] = 1.e-10 * np.random.randn()

            logging.debug("Inserted prior covariance matrix from file %s of %d elements into statevector location %d through %d"%(file,len(parnr),istart,istop) )
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227

            #cov2 = np.dot(dev_matrix[istart:istop,:],np.transpose(dev_matrix[istart:istop,:])) / (self.nmembers-1)
            #print matrix.sum(),cov2.sum(),abs(matrix.diagonal()-cov2.diagonal()).max(), matrix.shape,cov2.shape


        logging.debug('Successfully constructed a deviation matrix from covariance structure')
        logging.info('Appr. degrees of freedom in full covariance matrix is %s' % (int(dof)))

        # Now fill the ensemble members with the deviations we have just created


        # Create mean values 

        new_mean = np.ones(self.nparams, float) # standard value for a new time step is 1.0

        # If this is not the start of the filter, average previous two optimized steps into the mix

        if lag == self.nlag - 1 and self.nlag >= 3:
            new_mean += self.ensemble_members[lag - 1][0].param_values + \
                                           self.ensemble_members[lag - 2][0].param_values 
            new_mean = new_mean / 3.0

        # Create the first ensemble member with a deviation of 0.0 and add to list

        new_member = EnsembleMember(0)
        new_member.param_values = new_mean.flatten()  # no deviations
        self.ensemble_members[lag].append(new_member)

        # Create members 1:nmembers and add to ensemble_members list

        for member in range(1, self.nmembers):
            new_member = EnsembleMember(member)
            new_member.param_values = dev_matrix[:, member - 1] + new_mean
            self.ensemble_members[lag].append(new_member)

        logging.debug('%d new ensemble members were added to the state vector # %d' % (self.nmembers, (lag + 1)))





################### End Class CtStateVector ###################


if __name__ == "__main__":
    pass