Commit 4d0fda0a authored by Florentie, Liesbeth's avatar Florentie, Liesbeth
Browse files

add obs_obspack_gvplus2.py

parent 29eb2f47
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters.
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu.
This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation,
version 3. This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this
program. If not, see <http://www.gnu.org/licenses/>."""
#!/usr/bin/env python
# obs.py
"""
Author : peters
Revision History:
File created on 28 Jul 2010.
"""
import os
import sys
import logging
import datetime as dtm
from string import strip
from numpy import array, logical_and, sqrt
sys.path.append(os.getcwd())
sys.path.append('../../')
identifier = 'CarbonTracker CO2 mole fractions'
version = '0.0'
from da.carbondioxide.obspack_globalviewplus2 import ObsPackObservations, MoleFractionSample
import da.tools.io4 as io
import da.tools.rc as rc
################### Begin Class ctsfObsPackObservations ###################
class ctsfObsPackObservations(ObsPackObservations):
def add_observations(self):
""" Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file
The ObsPack mole fraction files are provided as time series per site with all dates in sequence.
We will loop over all site files in the ObsPackage, and subset each to our needs
"""
# Step 1: Read list of available site files in package
infile = os.path.join(self.obspack_dir, 'summary', '%s_dataset_summary.txt' % (self.obspack_id,))
f = open(infile, 'r')
lines = f.readlines()
f.close()
ncfilelist = []
for line in lines:
if not line.startswith('# dataset:'): continue
items = line.split(':')
ncfile = items[1].strip()
ncfilelist += [ncfile]
del line
logging.debug("ObsPack dataset info read, proceeding with %d netcdf files" % len(ncfilelist))
for ncfile in ncfilelist:
infile = os.path.join(self.obspack_dir, 'data', 'nc', ncfile + '.nc')
ncf = io.ct_read(infile, 'read')
idates = ncf.get_variable('time_components')
dates = array([dtm.datetime(*d) for d in idates])
subselect = logical_and(dates >= self.startdate , dates <= self.enddate).nonzero()[0]
dates = dates.take(subselect, axis=0)
if 'merge_num' in ncf.variables:
obspacknum = ncf.get_variable('merge_num').take(subselect)
else:
obspacknum = ncf.get_variable('obspack_num').take(subselect)
if 'ccggAllData' in ncfile:
obspackid = ncf.get_variable('id').take(subselect, axis=0)
else:
obspackid = ncf.get_variable('obspack_id').take(subselect, axis=0)
obspackid = [s.tostring().lower() for s in obspackid]
obspackid = map(strip, obspackid)
datasetname = ncfile # use full name of dataset to propagate for clarity
lats = ncf.get_variable('latitude').take(subselect, axis=0)
lons = ncf.get_variable('longitude').take(subselect, axis=0)
alts = ncf.get_variable('altitude').take(subselect, axis=0)
obs = ncf.get_variable('value').take(subselect, axis=0)
species = ncf.get_attribute('dataset_parameter')
flags = ncf.get_variable('obs_flag').take(subselect, axis=0)
ncf.close()
# only add observations with flag = 1
counter = 0
for n in range(len(dates)):
if flags[n] == 1:
self.datalist.append(MoleFractionSample(obspacknum[n], dates[n], datasetname, obs[n], 0.0, 0.0, 0.0, 0.0, \
flags[n], alts[n], lats[n], lons[n], obspackid[n], species, 1, 0.0, infile))
counter += 1
logging.debug("Added %d observations from file (%s) to the Data list" % (counter, ncfile))
logging.info("Observations list now holds %d values" % len(self.datalist))
def add_model_data_mismatch(self, filename):
"""
Get the model-data mismatch values for this cycle.
(1) Open a sites_weights file
(2) Parse the data
(3) Compare site list against data
(4) Take care of double sites, etc
"""
if not os.path.exists(filename):
msg = 'Could not find the required sites.rc input file (%s) ' % filename
logging.error(msg)
raise IOError, msg
else:
self.sites_file = filename
sites_weights = rc.read(self.sites_file)
self.rejection_threshold = int(sites_weights['obs.rejection.threshold'])
self.global_R_scaling = float(sites_weights['global.R.scaling'])
self.n_site_categories = int(sites_weights['n.site.categories'])
logging.debug('Model-data mismatch rejection threshold: %d ' % self.rejection_threshold)
logging.warning('Model-data mismatch scaling factor : %f ' % self.global_R_scaling)
logging.debug('Model-data mismatch site categories : %d ' % self.n_site_categories)
cats = [k for k in sites_weights.keys() if 'site.category' in k]
site_categories = {}
for key in cats:
name, error, may_localize, may_reject = sites_weights[key].split(';')
name = name.strip().lower()
error = float(error)
may_reject = ("TRUE" in may_reject.upper())
may_localize = ("TRUE" in may_localize.upper())
site_categories[name] = {'category': name, 'error': error, 'may_localize': may_localize, 'may_reject': may_reject}
site_info = {}
site_move = {}
site_incalt = {} # option to increase sampling altitude for sites specified in sites and weights file
for key, value in sites_weights.iteritems():
if 'co2_' in key or 'sf6' in key: # to be fixed later, do not yet know how to parse valid keys from rc-files yet.... WP
sitename, sitecategory = key, value
sitename = sitename.strip()
sitecategory = sitecategory.split()[0].strip().lower()
site_info[sitename] = site_categories[sitecategory]
if 'site.move' in key:
identifier, latmove, lonmove = value.split(';')
site_move[identifier.strip()] = (float(latmove), float(lonmove))
if 'site.incalt' in key:
identifier, incalt = value.split(';')
site_incalt[identifier.strip()] = (int(incalt))
del key, value
for obs in self.datalist: # first loop over all available data points to set flags correctly
obs.mdm = 1000.0 # default is very high model-data-mismatch, until explicitly set by script
if obs.flag == 1: # flag is taken from the gv+ datasets: 1=background/representative, 0=local.
obs.flag = 0
elif obs.flag == 0:
obs.flag = 99 # 99 means: do-not-use
else: obs.flag = 99
# second loop over all available data points to set mdm
for obs in self.datalist:
identifier = obs.code
species, site, method, lab, datasetnr = identifier.split('_')
if site_info.has_key(identifier):
if site_info[identifier]['category'] == 'do-not-use' or obs.flag == 99:
logging.warning("Observation found (%s, %d), but not used in assimilation." % (identifier, obs.id))
obs.mdm = site_info[identifier]['error'] * self.global_R_scaling
obs.may_localize = site_info[identifier]['may_localize']
obs.may_reject = site_info[identifier]['may_reject']
obs.flag = 99
else:
if site_info[identifier]['category'] == 'aircraft':
nr_obs_per_day = 1
else:
nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0])
logging.debug("Observation found (%s, %d), mdm category is: %0.2f, scaled with number of observations per day (%i), final mdm applied is: %0.2f." % (identifier, obs.id, site_info[identifier]['error'],nr_obs_per_day,site_info[identifier]['error']*sqrt(nr_obs_per_day)))
obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling
obs.may_localize = site_info[identifier]['may_localize']
obs.may_reject = site_info[identifier]['may_reject']
obs.flag = 0
else:
logging.warning("Observation NOT found (%s, %d), please check sites.rc file (%s) !!!" % (identifier, obs.id, self.sites_file))
if site_move.has_key(identifier):
movelat, movelon = site_move[identifier]
obs.lat = obs.lat + movelat
obs.lon = obs.lon + movelon
logging.warning("Observation location for (%s, %d), is moved by %3.2f degrees latitude and %3.2f degrees longitude" % (identifier, obs.id, movelat, movelon))
if site_incalt.has_key(identifier):
incalt = site_incalt[identifier]
obs.height = obs.height + incalt
logging.warning("Observation location for (%s, %d), is moved by %3.2f meters in altitude" % (identifier, obs.id, incalt))
# Only keep obs in datalist that will be used in assimilation
obs_to_keep = []
for obs in self.datalist:
if obs.flag != 99:
obs_to_keep.append(obs)
self.datalist = obs_to_keep
logging.info('Removed unused observations, observation list now holds %s values' % len(self.datalist))
# Add site_info dictionary to the Observations object for future use
self.site_info = site_info
self.site_move = site_move
self.site_incalt = site_incalt
logging.debug("Added Model Data Mismatch to all samples ")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment