Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
NearRealTimeCTDAS
CTDAS
Commits
1910336a
Commit
1910336a
authored
Nov 20, 2018
by
brunner
Browse files
No commit message
No commit message
parent
acd8c79f
Changes
3
Hide whitespace changes
Inline
Side-by-side
da/cosmo/observationoperator.py
View file @
1910336a
...
...
@@ -11,6 +11,7 @@ from netCDF4 import Dataset
from
datetime
import
datetime
,
timedelta
from
dateutil
import
rrule
from
cdo
import
*
from
.
import
site_height
identifier
=
'ObservationOperator'
version
=
'10'
...
...
@@ -124,7 +125,7 @@ class ObservationOperator(object):
# cdo.merge(input = os.path.join(dacycle['da.bio.input'], 'ensemble', "gpp_???_%s.nc" % dt.strftime('%Y%m%d%H')), output = os.path.join(dacycle['da.bio.input'], 'ensemble', "gpp_%s.nc" % dt.strftime('%Y%m%d%H')))
# cdo.merge(input = os.path.join(dacycle['da.bio.input'], 'ensemble', "ra_???_%s.nc" % dt.strftime('%Y%m%d%H')), output = os.path.join(dacycle['da.bio.input'], 'ensemble', "ra_%s.nc" % dt.strftime('%Y%m%d%H')))
# os.chdir(dacycle['da.obsoperator.home'])
# os.system('python run_chain.py ctdas '+dacycle['time.start'].strftime('%Y-%m-%d')+' 0 168 -j meteo icbc emissions biofluxes int2lm post_int2lm cosmo')
# os.system('python run_chain.py ctdas '+dacycle['time.start'].strftime('%Y-%m-%d')+' 0
'
168
'
-j meteo icbc emissions biofluxes int2lm post_int2lm cosmo')
self
.
extract_model_data
(
dacycle
,
self
.
forecast_nmembers
)
for
i
in
range
(
0
,
self
.
forecast_nmembers
):
...
...
@@ -139,21 +140,7 @@ class ObservationOperator(object):
f
.
variables
[
'flask'
][
j
,:]
=
model_data
[:,
j
]
# print model_data[:,j]
f
.
close
()
# f.variables['flask'][i,:] = data[1]+np.random.randn(self.forecast_nmembers)
#### WARNING ACHTUNG PAZNJA POZOR VNEMANIE data[2] is model data mismatch (=1000) by default in tools/io4.py!!! pavle
# print i # i is num of hour
# f.variables['flask'][i,:] = data[1]+np.random.randn(self.forecast_nmembers)*data[2]
# print 'pavle',data[1]
# print 'pavle',np.random.randn(self.forecast_nmembers)
# print 'pavle',data[2]
# print 'i',i
# print 'data',data
# print 'model data',model_data
# print 'ids',ids # number of obs [11841779 11841780 and such] dim =7
# print 'obs',obs # observation data [0.00040033 0.0004001 and suchh] dim =7
# print 'mdm',mdm # model data mismatch (=1000.) dim = 7
# print self.forecast_nmembers
# sys.exit()
logging
.
info
(
'ObservationOperator finished successfully, output file written (%s)'
%
self
.
simulated_file
)
...
...
@@ -162,48 +149,52 @@ class ObservationOperator(object):
self
.
prepare_run
()
self
.
run
(
dacycle
)
def
extract_model_data
(
self
,
dacycle
,
ens
):
def
extract_model_data
(
self
,
dacycle
,
ensnum
):
time_stamp
=
dacycle
[
'time.sample.stamp'
]
sites
=
(
"lhw"
,
"brm"
,
"jfj"
,
"ssl"
)
files2cat
=
[]
self
.
dacycle
=
dacycle
#
time_stamp = dacycle['time.s
ample.stamp']
cosmo_
time_stamp
=
dacycle
[
'time.start'
]
+
timedelta
(
hours
=
168
)
#
print(cosmo_time_stamp)
print
(
dacycle
[
'time.nlag'
])
sys
.
exit
(
)
c
os
mo_out
=
"/scratch/snx3000/parsenov/ctdas/2013040100_0_168/cosmo"
os
.
chdir
(
cosmo_out
)
for
time
in
tools
.
iter_hours
(
starttime
,
hstart
,
hstop
):
co2_in_fn
=
time
.
strftime
(
cosmo_out
+
'/lffd%Y%m%d%H.nc'
)
co2_out_fn
=
time
.
strftime
(
cosmo_out
+
'/CO2_'
+
ens
+
'_%Y%m%d%H.nc'
)
hhl_fn
=
time
.
strftime
(
cosmo_out
+
'/lffd
%Y%m%d%H
c.nc
'
)
cdo
.
expr
(
"CO2=CO2_BG-CO2_GPP+CO2_RESP+CO2_A_CH+CO2_A"
,
input
=
"-selname,CO2_BG,CO2_GPP,CO2_RESP,CO2_A_CH,CO2_A "
+
co2_in_fn
,
output
=
co
2
_out
_fn
)
files2cat
.
append
(
co2_out_fn
)
cdo
.
selname
(
"HHL"
,
input
=
hhl
_fn
,
output
=
"hhl.nc"
)
cdo
.
cat
(
input
=
files2cat
,
output
=
"CO2_"
+
time_stamp
+
".nc"
)
sites
=
(
"lhw"
,
"brm"
,
"jfj"
,
"ssl"
)
cdo
.
remapnn
(
"lon=7.99_lat=46.54,"
,
input
=
"CO2
_"
+
time_stamp
+
".nc"
,
output
=
"CO2_jfj
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=7.99_lat=46.54,"
,
input
=
"hhl.nc"
,
output
=
"hhl_jfj.nc"
)
cdo
.
remapnn
(
"lon=8.40_lat=47.48,"
,
input
=
"CO2
_"
+
time_stamp
+
".nc"
,
output
=
"CO2_lhw
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=8.40_lat=47.48,"
,
input
=
"hhl.nc"
,
output
=
"hhl_lhw.nc"
)
cdo
.
remapnn
(
"lon=8.18_lat=47.19,"
,
input
=
"CO2
_"
+
time_stamp
+
".nc"
,
output
=
"CO2_brm
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=8.18_lat=47.19,"
,
input
=
"hhl.nc"
,
output
=
"hhl_brm.nc"
)
cdo
.
remapnn
(
"lon=7.92_lat=47.92,"
,
input
=
"CO2
_"
+
time_stamp
+
".nc"
,
output
=
"CO2_ssl
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=7.92_lat=47.92,"
,
input
=
"hhl.nc"
,
output
=
"hhl_ssl.nc"
)
cosmo_
time_stamp
=
dacycle
[
'time.s
tart'
].
strftime
(
'%Y%m%d%H'
)
#+timedelta(hours=168)
cosmo_
out
=
"/scratch/snx3000/parsenov/ctdas/"
+
cosmo_time_stamp
+
"_0_168/cosmo/output/"
cosmo_save
=
"/store/empa/em05/parsenov/cosmo_data/"
hhl_fn
=
cosmo_out
+
'lffd'
+
dacycle
[
'time.start'
].
strftime
(
'%Y%m%d%H'
)
+
'c.nc'
cdo
.
selname
(
"HHL"
,
input
=
hhl_fn
,
output
=
cosmo_out
+
"hhl.nc"
)
#
os
.chdir(cosmo_out)
for
ens
in
range
(
0
,
ensnum
):
ens
=
str
(
ens
).
zfill
(
3
)
for
dt
in
rrule
.
rrule
(
rrule
.
HOURLY
,
dtstart
=
dacycle
[
'time.start'
],
until
=
dacycle
[
'time.start'
]
+
timedelta
(
hours
=
24
)):
#for dt in rrule.rrule(rrule.HOURLY, dtstart=dacycle['time.start'], until=dacycle['time.start']+timedelta(hours=168)):
dt
=
dt
.
strftime
(
'
%Y%m%d%H'
)
co2_in_fn
=
co
smo
_out
+
'lffd'
+
dt
+
'.nc'
co2_out_fn
=
cosmo_out
+
'CO2_'
+
ens
+
'_'
+
dt
+
'.nc'
cdo
.
expr
(
"CO2=CO2_BG-GPP_"
+
ens
+
"+RESP_"
+
ens
+
"+CO2_A_CH+CO2_A"
,
input
=
"-selname,CO2_BG,GPP_"
+
ens
+
",RESP_"
+
ens
+
",CO2_A_CH,CO2_A "
+
co2_in
_fn
,
output
=
co2_out_fn
)
files2cat
.
append
(
co2_out_fn
)
cdo
.
cat
(
input
=
files2cat
,
output
=
cosmo_out
+
"CO2_"
+
ens
+
"_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=7.99_lat=46.54,"
,
input
=
cosmo_out
+
"CO2_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"CO2_jfj_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=7.99_lat=46.54,"
,
input
=
cosmo_out
+
"hhl.nc"
,
output
=
cosmo_out
+
"hhl_jfj.nc"
)
cdo
.
remapnn
(
"lon=8.40_lat=47.48,"
,
input
=
cosmo_out
+
"CO2_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"CO2_lhw_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=8.40_lat=47.48,"
,
input
=
cosmo_out
+
"hhl.nc"
,
output
=
cosmo_out
+
"hhl_lhw.nc"
)
cdo
.
remapnn
(
"lon=8.18_lat=47.19,"
,
input
=
cosmo_out
+
"CO2_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"CO2_brm_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=8.18_lat=47.19,"
,
input
=
cosmo_out
+
"hhl.nc"
,
output
=
cosmo_out
+
"hhl_brm.nc"
)
cdo
.
remapnn
(
"lon=7.92_lat=47.92,"
,
input
=
cosmo_out
+
"CO2_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"CO2_ssl_"
+
ens
+
"
_"
+
time_stamp
+
".nc"
)
cdo
.
remapnn
(
"lon=7.92_lat=47.92,"
,
input
=
cosmo_out
+
"hhl.nc"
,
output
=
cosmo_out
+
"hhl_ssl.nc"
)
for
s
,
ss
in
enumerate
(
sites
):
site_height
.
main
(
ss
,
time_stamp
)
site_height
.
main
(
cosmo_out
,
str
(
ens
),
ss
,
time_stamp
)
cdo
.
intlevel
(
"860"
,
input
=
"
,
CO2_60lev_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc"
,
output
=
"
modelled_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"797"
,
input
=
"
,
CO2_60lev_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc"
,
output
=
"modelled_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"3580"
,
input
=
"CO2_60lev_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc"
,
output
=
"modelled_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"1205"
,
input
=
"CO2_60lev_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc"
,
output
=
"modelled_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"860"
,
input
=
cosmo_out
+
"CO2_60lev_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"/
modelled_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"797"
,
input
=
cosmo_out
+
"CO2_60lev_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"modelled_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"3580"
,
input
=
cosmo_out
+
"CO2_60lev_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"modelled_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc"
)
cdo
.
intlevel
(
"1205"
,
input
=
cosmo_out
+
"CO2_60lev_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc"
,
output
=
cosmo_out
+
"modelled_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc"
)
cdo
.
cat
(
input
=
"modelled_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc modelled_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc modelled_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc modelled_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc "
,
output
=
"model_"
+
ens
+
"_"
+
time_stamp
+
".nc"
)
cdo
.
cat
(
input
=
cosmo_out
+
"modelled_"
+
ens
+
"_brm_"
+
time_stamp
+
".nc "
+
cosmo_out
+
"modelled_"
+
ens
+
"_jfj_"
+
time_stamp
+
".nc "
+
cosmo_out
+
"modelled_"
+
ens
+
"_lhw_"
+
time_stamp
+
".nc "
+
cosmo_out
+
"modelled_"
+
ens
+
"_ssl_"
+
time_stamp
+
".nc "
,
output
=
cosmo_save
+
"model_"
+
ens
+
"_"
+
time_stamp
+
".nc"
)
sys
.
exit
()
################### End Class ObservationOperator ###################
...
...
da/cosmo/obspack_globalviewplus2.py
0 → 100755
View file @
1910336a
"""CarbonTracker Data Assimilation Shell (CTDAS) Copyright (C) 2017 Wouter Peters.
Users are recommended to contact the developers (wouter.peters@wur.nl) to receive
updates of the code. See also: http://www.carbontracker.eu.
This program is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software Foundation,
version 3. This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this
program. If not, see <http://www.gnu.org/licenses/>."""
#!/usr/bin/env python
# obs.py
"""
Author : peters
Revision History:
File created on 28 Jul 2010.
"""
import
os
import
sys
import
logging
import
datetime
as
dtm
#from string import strip
from
numpy
import
array
,
logical_and
,
sqrt
sys
.
path
.
append
(
os
.
getcwd
())
sys
.
path
.
append
(
'../../'
)
identifier
=
'CarbonTracker CO2 mole fractions'
version
=
'0.0'
from
da.baseclasses.obs
import
Observations
import
da.tools.io4
as
io
import
da.tools.rc
as
rc
################### Begin Class ObsPackObservations ###################
class
ObsPackObservations
(
Observations
):
""" an object that holds data + methods and attributes needed to manipulate mole fraction values """
def
setup
(
self
,
dacycle
):
self
.
startdate
=
dacycle
[
'time.sample.start'
]
self
.
enddate
=
dacycle
[
'time.sample.end'
]
op_id
=
dacycle
.
dasystem
[
'obspack.input.id'
]
op_dir
=
dacycle
.
dasystem
[
'obspack.input.dir'
]
if
not
os
.
path
.
exists
(
op_dir
):
msg
=
'Could not find the required ObsPack distribution (%s) '
%
op_dir
logging
.
error
(
msg
)
raise
IOError
(
msg
)
else
:
self
.
obspack_dir
=
op_dir
self
.
obspack_id
=
op_id
self
.
datalist
=
[]
def
add_observations
(
self
):
""" Returns a MoleFractionList holding individual MoleFractionSample objects for all obs in a file
The ObsPack mole fraction files are provided as time series per site with all dates in sequence.
We will loop over all site files in the ObsPackage, and subset each to our needs
"""
# Step 1: Read list of available site files in package
infile
=
os
.
path
.
join
(
self
.
obspack_dir
,
'summary'
,
'%s_dataset_summary.txt'
%
(
self
.
obspack_id
,))
f
=
open
(
infile
,
'r'
)
lines
=
f
.
readlines
()
f
.
close
()
ncfilelist
=
[]
for
line
in
lines
:
if
not
line
.
startswith
(
'# dataset:'
):
continue
items
=
line
.
split
(
':'
)
#ncfile, lab , start_date, stop_date, data_comparison = items[0:5]
#ncfile, lab , start_date, stop_date, data_comparison= line[:105].split()
ncfile
=
items
[
1
].
strip
()
ncfilelist
+=
[
ncfile
]
logging
.
debug
(
"ObsPack dataset info read, proceeding with %d netcdf files"
%
len
(
ncfilelist
))
for
ncfile
in
ncfilelist
:
infile
=
os
.
path
.
join
(
self
.
obspack_dir
,
'data'
,
'nc'
,
ncfile
+
'.nc'
)
ncf
=
io
.
ct_read
(
infile
,
'read'
)
idates
=
ncf
.
get_variable
(
'time_components'
)
dates
=
array
([
dtm
.
datetime
(
*
d
)
for
d
in
idates
])
subselect
=
logical_and
(
dates
>=
self
.
startdate
,
dates
<=
self
.
enddate
).
nonzero
()[
0
]
dates
=
dates
.
take
(
subselect
,
axis
=
0
)
if
'merge_num'
in
ncf
.
variables
:
obspacknum
=
ncf
.
get_variable
(
'merge_num'
).
take
(
subselect
)
else
:
obspacknum
=
ncf
.
get_variable
(
'obspack_num'
).
take
(
subselect
)
if
'ccggAllData'
in
ncfile
:
obspackid
=
ncf
.
get_variable
(
'id'
).
take
(
subselect
,
axis
=
0
)
else
:
obspackid
=
ncf
.
get_variable
(
'obspack_id'
).
take
(
subselect
,
axis
=
0
)
obspackid
=
[
s
.
tostring
().
lower
()
for
s
in
obspackid
]
obspackid
=
list
(
map
(
str
.
strip
,
str
(
obspackid
)))
datasetname
=
ncfile
# use full name of dataset to propagate for clarity
lats
=
ncf
.
get_variable
(
'latitude'
).
take
(
subselect
,
axis
=
0
)
lons
=
ncf
.
get_variable
(
'longitude'
).
take
(
subselect
,
axis
=
0
)
alts
=
ncf
.
get_variable
(
'altitude'
).
take
(
subselect
,
axis
=
0
)
obs
=
ncf
.
get_variable
(
'value'
).
take
(
subselect
,
axis
=
0
)
species
=
ncf
.
get_attribute
(
'dataset_parameter'
)
flags
=
ncf
.
get_variable
(
'obs_flag'
).
take
(
subselect
,
axis
=
0
)
ncf
.
close
()
for
n
in
range
(
len
(
dates
)):
self
.
datalist
.
append
(
MoleFractionSample
(
obspacknum
[
n
],
dates
[
n
],
datasetname
,
obs
[
n
],
0.0
,
0.0
,
0.0
,
0.0
,
flags
[
n
],
alts
[
n
],
lats
[
n
],
lons
[
n
],
obspackid
[
n
],
species
,
1
,
0.0
,
infile
))
logging
.
debug
(
"Added %d observations from file (%s) to the Data list"
%
(
len
(
dates
),
ncfile
))
logging
.
info
(
"Observations list now holds %d values"
%
len
(
self
.
datalist
))
def
add_simulations
(
self
,
filename
,
silent
=
False
):
""" Adds model simulated values to the mole fraction objects """
if
not
os
.
path
.
exists
(
filename
):
msg
=
"Sample output filename for observations could not be found : %s"
%
filename
logging
.
error
(
msg
)
logging
.
error
(
"Did the sampling step succeed?"
)
logging
.
error
(
"...exiting"
)
raise
IOError
(
msg
)
ncf
=
io
.
ct_read
(
filename
,
method
=
'read'
)
ids
=
ncf
.
get_variable
(
'obs_num'
)
simulated
=
ncf
.
get_variable
(
'flask'
)
ncf
.
close
()
logging
.
info
(
"Successfully read data from model sample file (%s)"
%
filename
)
obs_ids
=
self
.
getvalues
(
'id'
).
tolist
()
ids
=
list
(
map
(
int
,
ids
))
missing_samples
=
[]
for
idx
,
val
in
zip
(
ids
,
simulated
):
if
idx
in
obs_ids
:
index
=
obs_ids
.
index
(
idx
)
self
.
datalist
[
index
].
simulated
=
val
# in mol/mol
else
:
missing_samples
.
append
(
idx
)
if
not
silent
and
missing_samples
!=
[]:
logging
.
warning
(
'Model samples were found that did not match any ID in the observation list. Skipping them...'
)
#msg = '%s'%missing_samples ; logging.warning(msg)
logging
.
debug
(
"Added %d simulated values to the Data list"
%
(
len
(
ids
)
-
len
(
missing_samples
)))
def
write_sample_coords
(
self
,
obsinputfile
):
"""
Write the information needed by the observation operator to a file. Return the filename that was written for later use
"""
if
len
(
self
.
datalist
)
==
0
:
#f.close()
#return obsinputfile
logging
.
debug
(
"No observations found for this time period, nothing written to obs file"
)
else
:
f
=
io
.
CT_CDF
(
obsinputfile
,
method
=
'create'
)
logging
.
debug
(
'Creating new observations file for ObservationOperator (%s)'
%
obsinputfile
)
dimid
=
f
.
add_dim
(
'obs'
,
len
(
self
.
datalist
))
dim200char
=
f
.
add_dim
(
'string_of200chars'
,
200
)
dim10char
=
f
.
add_dim
(
'string_of10chars'
,
10
)
dimcalcomp
=
f
.
add_dim
(
'calendar_components'
,
6
)
data
=
self
.
getvalues
(
'id'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"obs_num"
savedict
[
'dtype'
]
=
"int"
savedict
[
'long_name'
]
=
"Unique_Dataset_observation_index_number"
savedict
[
'units'
]
=
""
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
"Unique index number within this dataset ranging from 0 to UNLIMITED."
f
.
add_data
(
savedict
)
data
=
[[
d
.
year
,
d
.
month
,
d
.
day
,
d
.
hour
,
d
.
minute
,
d
.
second
]
for
d
in
self
.
getvalues
(
'xdate'
)
]
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'dtype'
]
=
"int"
savedict
[
'name'
]
=
"date_components"
savedict
[
'units'
]
=
"integer components of UTC date/time"
savedict
[
'dims'
]
=
dimid
+
dimcalcomp
savedict
[
'values'
]
=
data
savedict
[
'missing_value'
]
=
-
9
savedict
[
'comment'
]
=
"Calendar date components as integers. Times and dates are UTC."
savedict
[
'order'
]
=
"year, month, day, hour, minute, second"
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'lat'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"latitude"
savedict
[
'units'
]
=
"degrees_north"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'missing_value'
]
=
-
999.9
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'lon'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"longitude"
savedict
[
'units'
]
=
"degrees_east"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'missing_value'
]
=
-
999.9
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'height'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"altitude"
savedict
[
'units'
]
=
"meters_above_sea_level"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'missing_value'
]
=
-
999.9
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'samplingstrategy'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'dtype'
]
=
"int"
savedict
[
'name'
]
=
"sampling_strategy"
savedict
[
'units'
]
=
"NA"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'missing_value'
]
=
-
9
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'evn'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'dtype'
]
=
"char"
savedict
[
'name'
]
=
"obs_id"
savedict
[
'units'
]
=
"ObsPack datapoint identifier"
savedict
[
'dims'
]
=
dimid
+
dim200char
savedict
[
'values'
]
=
data
savedict
[
'missing_value'
]
=
'!'
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'obs'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"observed"
savedict
[
'long_name'
]
=
"observedvalues"
savedict
[
'units'
]
=
"mol mol-1"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
'Observations used in optimization'
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'mdm'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"modeldatamismatch"
savedict
[
'long_name'
]
=
"modeldatamismatch"
savedict
[
'units'
]
=
"[mol mol-1]"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
'Standard deviation of mole fractions resulting from model-data mismatch'
f
.
add_data
(
savedict
)
f
.
close
()
logging
.
debug
(
"Successfully wrote data to obs file"
)
logging
.
info
(
"Sample input file for obs operator now in place [%s]"
%
obsinputfile
)
def
add_model_data_mismatch
(
self
,
filename
):
"""
Get the model-data mismatch values for this cycle.
(1) Open a sites_weights file
(2) Parse the data
(3) Compare site list against data
(4) Take care of double sites, etc
"""
if
not
os
.
path
.
exists
(
filename
):
msg
=
'Could not find the required sites.rc input file (%s) '
%
filename
logging
.
error
(
msg
)
raise
IOError
(
msg
)
else
:
self
.
sites_file
=
filename
sites_weights
=
rc
.
read
(
self
.
sites_file
)
self
.
rejection_threshold
=
int
(
sites_weights
[
'obs.rejection.threshold'
])
self
.
global_R_scaling
=
float
(
sites_weights
[
'global.R.scaling'
])
self
.
n_site_categories
=
int
(
sites_weights
[
'n.site.categories'
])
logging
.
debug
(
'Model-data mismatch rejection threshold: %d '
%
self
.
rejection_threshold
)
logging
.
warning
(
'Model-data mismatch scaling factor : %f '
%
self
.
global_R_scaling
)
logging
.
debug
(
'Model-data mismatch site categories : %d '
%
self
.
n_site_categories
)
cats
=
[
k
for
k
in
sites_weights
.
keys
()
if
'site.category'
in
k
]
site_categories
=
{}
for
key
in
cats
:
name
,
error
,
may_localize
,
may_reject
=
sites_weights
[
key
].
split
(
';'
)
name
=
name
.
strip
().
lower
()
error
=
float
(
error
)
may_reject
=
(
"TRUE"
in
may_reject
.
upper
())
may_localize
=
(
"TRUE"
in
may_localize
.
upper
())
site_categories
[
name
]
=
{
'category'
:
name
,
'error'
:
error
,
'may_localize'
:
may_localize
,
'may_reject'
:
may_reject
}
for
obs
in
self
.
datalist
:
# first loop over all available data points to set flags correctly
obs
.
mdm
=
1E-6
# default is very high model-data-mismatch, until explicitly set by script
obs
.
flag
=
1
identifier
=
obs
.
code
species
,
site
,
method
,
lab
,
datasetnr
=
identifier
.
split
(
'_'
)
# nr_obs_per_day = len([c.code for c in self.datalist if c.code == obs.code and c.xdate.day == obs.xdate.day and c.flag == 0])
# obs.mdm = site_info[identifier]['error'] * sqrt(nr_obs_per_day) * self.global_R_scaling
# obs.may_localize = site_info[identifier]['may_localize']
# obs.may_reject = site_info[identifier]['may_reject']
# Add site_info dictionary to the Observations object for future use
logging
.
debug
(
"Added Model Data Mismatch to all samples "
)
def
write_sample_auxiliary
(
self
,
auxoutputfile
):
"""
Write selected information contained in the Observations object to a file.
"""
f
=
io
.
CT_CDF
(
auxoutputfile
,
method
=
'create'
)
logging
.
debug
(
'Creating new auxiliary sample output file for postprocessing (%s)'
%
auxoutputfile
)
dimid
=
f
.
add_dim
(
'obs'
,
len
(
self
.
datalist
))
dim200char
=
f
.
add_dim
(
'string_of200chars'
,
200
)
dim10char
=
f
.
add_dim
(
'string_of10chars'
,
10
)
dimcalcomp
=
f
.
add_dim
(
'calendar_components'
,
6
)
if
len
(
self
.
datalist
)
==
0
:
f
.
close
()
#return outfile
data
=
self
.
getvalues
(
'id'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"obs_num"
savedict
[
'dtype'
]
=
"int"
savedict
[
'long_name'
]
=
"Unique_Dataset_observation_index_number"
savedict
[
'units'
]
=
""
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
"Unique index number within this dataset ranging from 0 to UNLIMITED."
f
.
add_data
(
savedict
)
data
=
[[
d
.
year
,
d
.
month
,
d
.
day
,
d
.
hour
,
d
.
minute
,
d
.
second
]
for
d
in
self
.
getvalues
(
'xdate'
)]
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'dtype'
]
=
"int"
savedict
[
'name'
]
=
"date_components"
savedict
[
'units'
]
=
"integer components of UTC date/time"
savedict
[
'dims'
]
=
dimid
+
dimcalcomp
savedict
[
'values'
]
=
data
savedict
[
'missing_value'
]
=
-
9
savedict
[
'comment'
]
=
"Calendar date components as integers. Times and dates are UTC."
savedict
[
'order'
]
=
"year, month, day, hour, minute, second"
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'obs'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"observed"
savedict
[
'long_name'
]
=
"observedvalues"
savedict
[
'units'
]
=
"mol mol-1"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
'Observations used in optimization'
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'mdm'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"modeldatamismatch"
savedict
[
'long_name'
]
=
"modeldatamismatch"
savedict
[
'units'
]
=
"[mol mol-1]"
savedict
[
'dims'
]
=
dimid
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
'Standard deviation of mole fractions resulting from model-data mismatch'
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'simulated'
)
dimmembers
=
f
.
add_dim
(
'members'
,
data
.
shape
[
1
])
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"modelsamples"
savedict
[
'long_name'
]
=
"modelsamples for all ensemble members"
savedict
[
'units'
]
=
"mol mol-1"
savedict
[
'dims'
]
=
dimid
+
dimmembers
savedict
[
'values'
]
=
data
.
tolist
()
savedict
[
'comment'
]
=
'simulated mole fractions based on optimized state vector'
f
.
add_data
(
savedict
)
data
=
self
.
getvalues
(
'fromfile'
)
savedict
=
io
.
std_savedict
.
copy
()
savedict
[
'name'
]
=
"inputfilename"
savedict
[
'long_name'
]
=
"name of file where original obs data was taken from"
savedict
[
'dtype'
]
=
"char"
savedict
[
'dims'
]
=
dimid
+
dim200char
savedict
[
'values'
]
=
data
savedict
[
'missing_value'
]
=
'!'
f
.
add_data
(
savedict
)
f
.
close
()
logging
.
debug
(
"Successfully wrote data to auxiliary sample output file (%s)"
%
auxoutputfile
)
#return outfile
################### End Class CtObservations ###################
################### Begin Class MoleFractionSample ###################
class
MoleFractionSample
(
object
):
"""
Holds the data that defines a mole fraction Sample in the data assimilation framework. Sor far, this includes all
attributes listed below in the __init__ method. One can additionally make more types of data, or make new
objects for specific projects.
"""
def
__init__
(
self
,
idx
,
xdate
,
code
=
'XXX'
,