From bb5bb7731cc5c559bd7abc9147b61875f6ef2168 Mon Sep 17 00:00:00 2001
From: Ingrid Luijx <ingrid.vanderlaan@wur.nl>
Date: Fri, 29 Apr 2016 12:22:46 +0000
Subject: [PATCH] Added mdm scaling with number of observations per day (except
 for aircraft)

---
 da/carbondioxide/obspack_globalviewplus.py | 26 ++++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/da/carbondioxide/obspack_globalviewplus.py b/da/carbondioxide/obspack_globalviewplus.py
index 0af4dbf..7712b34 100755
--- a/da/carbondioxide/obspack_globalviewplus.py
+++ b/da/carbondioxide/obspack_globalviewplus.py
@@ -14,7 +14,7 @@ import logging
         
 import datetime as dtm
 from string import strip
-from numpy import array, logical_and
+from numpy import array, logical_and, sqrt
 sys.path.append(os.getcwd())
 sys.path.append('../../')
 
@@ -85,9 +85,15 @@ class ObsPackObservations(Observations):
             subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]
 
             dates = dates.take(subselect, axis=0)
-            
-            obspacknum = ncf.get_variable('obspack_num').take(subselect)  # or should we propagate obs_num which is not unique across datasets??
-            obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
+ 
+            if 'merge_num' in ncf.variables:
+                obspacknum = ncf.get_variable('merge_num').take(subselect)
+            else:
+                obspacknum = ncf.get_variable('obspack_num').take(subselect)
+            if 'ccggAllData' in ncfile:
+                obspackid = ncf.get_variable('id').take(subselect, axis=0)
+            else:
+                obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
             obspackid = [s.tostring().lower() for s in obspackid]
             obspackid = map(strip, obspackid)
             datasetname = ncfile  # use full name of dataset to propagate for clarity
@@ -325,7 +331,7 @@ class ObsPackObservations(Observations):
                 identifier, incalt = value.split(';')
                 site_incalt[identifier.strip()] = (int(incalt))
 
-        for obs in self.datalist:  # loop over all available data points
+        for obs in self.datalist:  # first loop over all available data points to set flags correctly
 
             obs.mdm = 1000.0  # default is very high model-data-mismatch, until explicitly set by script
             if obs.flag == 1: # flag is taken from the gv+ datasets: 1=background/representative, 0=local. 
@@ -334,6 +340,8 @@ class ObsPackObservations(Observations):
                 obs.flag = 99 # 99 means: do-not-use
             else: obs.flag = 99    
                     
+        for obs in self.datalist:  # second loop over all available data points to set mdm
+
             identifier = obs.code
             species, site, method, lab, datasetnr = identifier.split('_')
 
@@ -345,8 +353,12 @@ class ObsPackObservations(Observations):
                     obs.may_reject = site_info[identifier]['may_reject']
                     obs.flag = 99
                 else:
-                    logging.debug("Observation found (%s, %d), mdm used is: %0.2f." % (identifier, obs.id, site_info[identifier]['error']))
-                    obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
+                    if site_info[identifier]['category'] == 'aircraft':
+                        nr_obs_per_day = 1
+                    else:
+                        nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0])
+                    logging.debug("Observation found (%s, %d), mdm category is: %0.2f, scaled with number of observations per day (%i), final mdm applied is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'],nr_obs_per_day,site_info[identifier]['error']*sqrt(nr_obs_per_day)))
+                    obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling
                     obs.may_localize = site_info[identifier]['may_localize']
                     obs.may_reject = site_info[identifier]['may_reject']
                     obs.flag = 0
-- 
GitLab