From b13b5851fa03b760f3abd0c0f1a69cc097cfc382 Mon Sep 17 00:00:00 2001
From: Ingrid Luijx <ingrid.vanderlaan@wur.nl>
Date: Mon, 25 Mar 2013 14:40:52 +0000
Subject: [PATCH] Changed the localization routine and settings, so that the
 localization is actually used. Also added some extra logging messages related
 to localization.

---
 da/baseclasses/optimizer.py |  9 +++++----
 da/ct/optimizer.py          | 32 ++++++++++++++++++++++++--------
 da/tools/pipeline.py        |  2 +-
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/da/baseclasses/optimizer.py b/da/baseclasses/optimizer.py
index d07d0d9d..9c55734e 100755
--- a/da/baseclasses/optimizer.py
+++ b/da/baseclasses/optimizer.py
@@ -305,7 +305,7 @@ class Optimizer(object):
             # Screen for flagged observations (for instance site not found, or no sample written from model)
 
             if self.flags[n] != 0:
-                logging.debug('Skipping observation (%s,%s) because of flag value %d' % (self.sitecode[n], self.obs_ids[n], self.flags[n]))
+                logging.debug('Skipping observation (%s,%i) because of flag value %d' % (self.sitecode[n], self.obs_ids[n], self.flags[n]))
                 continue
 
             # Screen for outliers greather than 3x model-data mismatch, only apply if obs may be rejected
@@ -315,7 +315,7 @@ class Optimizer(object):
             if self.may_reject[n]:
                 threshold = self.rejection_threshold * np.sqrt(self.R[n, n])
                 if np.abs(res) > threshold:
-                    logging.debug('Rejecting observation (%s,%s) because residual (%f) exceeds threshold (%f)' % (self.sitecode[n], self.obs_ids[n], res, threshold))
+                    logging.debug('Rejecting observation (%s,%i) because residual (%f) exceeds threshold (%f)' % (self.sitecode[n], self.obs_ids[n], res, threshold))
                     self.flags[n] = 2
                     continue
 
@@ -325,10 +325,10 @@ class Optimizer(object):
             self.KG[:, n] = PHt / self.HPHR[n, n]
 
             if self.may_localize[n]:
+                logging.debug('Trying to localize observation %i' % self.obs_ids[n])
                 self.localize(n)
-                logging.debug('Localized observation %s' % self.obs_ids[n])
             else:
-                logging.debug('Not allowed to localize observation %s' % self.obs_ids[n])
+                logging.debug('Not allowed to localize observation %i' % self.obs_ids[n])
 
             alpha = np.double(1.0) / (np.double(1.0) + np.sqrt((self.R[n, n]) / self.HPHR[n, n]))
 
@@ -400,6 +400,7 @@ class Optimizer(object):
 
     def localize(self, n):
         """ localize the Kalman Gain matrix """
+        logging.debug('Not localized observation %i' % self.obs_ids[n])
         pass
 
 ################### End Class Optimizer ###################
diff --git a/da/ct/optimizer.py b/da/ct/optimizer.py
index 64c6c3c8..41704c15 100755
--- a/da/ct/optimizer.py
+++ b/da/ct/optimizer.py
@@ -35,26 +35,42 @@ class CtOptimizer(Optimizer):
         if loctype == 'CT2007':
             self.localization = True
             self.localizetype = 'CT2007'
+            #T-test values for two-tailed student's T-test using 95% confidence interval for some options of nmembers
+            if self.nmembers == 50:
+                self.tvalue = 2.0086
+            elif self.nmembers == 100:
+                self.tvalue = 1.9840
+            elif self.nmembers == 150:
+                self.tvalue = 1.97591
+            elif self.nmembers == 200:
+                self.tvalue = 1.9719    
+            else: self.tvalue = 0    
         else:
             self.localization = False
             self.localizetype = 'None'
     
         logging.info("Current localization option is set to %s" % self.localizetype)
+        logging.info("Used critical tvalue %0.05f is based on 95%% probability and %i ensemble members in a two-tailed student's T-test"%(self.tvalue,self.nmembers))
+        if self.tvalue == 0:
+            logging.error("Critical tvalue for localization not set for %i ensemble members"%(self.nmembers))
+            sys.exit(2)
 
     def localize(self, n):
         """ localize the Kalman Gain matrix """
         import numpy as np
-    
+
         if not self.localization: 
+            logging.debug('Not localized observation %i' % self.obs_ids[n])
             return 
         if self.localizetype == 'CT2007':
-            tvalue = 1.97591
-            if np.sqrt(self.R[n, n]) >= 1.5:
-                for r in range(self.nlag * self.nparams):
-                    corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
-                    prob = corr / np.sqrt((1.0 - corr ** 2) / (self.nmembers - 2))
-                    if abs(prob) < tvalue:
-                        self.KG[r, n] = 0.0
+            count_localized = 0
+            for r in range(self.nlag * self.nparams):
+                corr = np.corrcoef(self.HX_prime[n, :], self.X_prime[r, :].squeeze())[0, 1]
+                prob = corr / np.sqrt((1.0 - corr ** 2) / (self.nmembers - 2))
+                if abs(prob) < self.tvalue:
+                    self.KG[r, n] = 0.0
+                    count_localized = count_localized + 1
+            logging.debug('Localized observation %i, %i%% of values set to 0' % (self.obs_ids[n],count_localized*100/(self.nlag * self.nparams)))
 
 
 ################### End Class CtOptimizer ###################
diff --git a/da/tools/pipeline.py b/da/tools/pipeline.py
index dc3013d4..253d008e 100755
--- a/da/tools/pipeline.py
+++ b/da/tools/pipeline.py
@@ -237,7 +237,7 @@ def invert(DaCycle, StateVector, Optimizer):
     Optimizer.Initialize(dims)
     Optimizer.state_to_matrix(StateVector)
     Optimizer.write_diagnostics(DaCycle, StateVector, type='prior')
-    Optimizer.set_localization('None')
+    Optimizer.set_localization('CT2007')
 
     if not DaCycle.DaSystem.has_key('opt.algorithm'):
         logging.info("There was no minimum least squares algorithm specified in the DA System rc file (key : opt.algorithm)") 
-- 
GitLab