From bb5bb7731cc5c559bd7abc9147b61875f6ef2168 Mon Sep 17 00:00:00 2001 From: Ingrid Luijx <ingrid.vanderlaan@wur.nl> Date: Fri, 29 Apr 2016 12:22:46 +0000 Subject: [PATCH] Added mdm scaling with number of observations per day (except for aircraft) --- da/carbondioxide/obspack_globalviewplus.py | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/da/carbondioxide/obspack_globalviewplus.py b/da/carbondioxide/obspack_globalviewplus.py index 0af4dbf..7712b34 100755 --- a/da/carbondioxide/obspack_globalviewplus.py +++ b/da/carbondioxide/obspack_globalviewplus.py @@ -14,7 +14,7 @@ import logging import datetime as dtm from string import strip -from numpy import array, logical_and +from numpy import array, logical_and, sqrt sys.path.append(os.getcwd()) sys.path.append('../../') @@ -85,9 +85,15 @@ class ObsPackObservations(Observations): subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0] dates = dates.take(subselect, axis=0) - - obspacknum = ncf.get_variable('obspack_num').take(subselect) # or should we propagate obs_num which is not unique across datasets?? - obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0) + + if 'merge_num' in ncf.variables: + obspacknum = ncf.get_variable('merge_num').take(subselect) + else: + obspacknum = ncf.get_variable('obspack_num').take(subselect) + if 'ccggAllData' in ncfile: + obspackid = ncf.get_variable('id').take(subselect, axis=0) + else: + obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0) obspackid = [s.tostring().lower() for s in obspackid] obspackid = map(strip, obspackid) datasetname = ncfile # use full name of dataset to propagate for clarity @@ -325,7 +331,7 @@ class ObsPackObservations(Observations): identifier, incalt = value.split(';') site_incalt[identifier.strip()] = (int(incalt)) - for obs in self.datalist: # loop over all available data points + for obs in self.datalist: # first loop over all available data points to set flags correctly obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script if obs.flag == 1: # flag is taken from the gv+ datasets: 1=background/representative, 0=local. @@ -334,6 +340,8 @@ class ObsPackObservations(Observations): obs.flag = 99 # 99 means: do-not-use else: obs.flag = 99 + for obs in self.datalist: # second loop over all available data points to set mdm + identifier = obs.code species, site, method, lab, datasetnr = identifier.split('_') @@ -345,8 +353,12 @@ class ObsPackObservations(Observations): obs.may_reject = site_info[identifier]['may_reject'] obs.flag = 99 else: - logging.debug("Observation found (%s, %d), mdm used is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'])) - obs.mdm = site_info[identifier]['error'] * self.global_R_scaling + if site_info[identifier]['category'] == 'aircraft': + nr_obs_per_day = 1 + else: + nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0]) + logging.debug("Observation found (%s, %d), mdm category is: %0.2f, scaled with number of observations per day (%i), final mdm applied is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'],nr_obs_per_day,site_info[identifier]['error']*sqrt(nr_obs_per_day))) + obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling obs.may_localize = site_info[identifier]['may_localize'] obs.may_reject = site_info[identifier]['may_reject'] obs.flag = 0 -- GitLab