Commit 37baecb4 authored by Peters, Wouter's avatar Peters, Wouter
Browse files

The da_initexit.py and das.py are now starting to us the new class based modules.

parent 4697e5cf
! Info on the data assimilation cycle
time.restart : False
time.start : 2005030500
time.finish : 2005030700
time.start : 2005-03-05 00:00:00
time.finish : 2005-03-07 00:00:00
time.cycle : 1
time.nlag : 2
dir.da_run : ${HOME}/tmp/test_da
......
......@@ -9,6 +9,17 @@ File created on 13 May 2009.
"""
needed_da_items=[
'time.start',
'time.finish',
'time.nlag',
'time.cycle',
'dir.da_run',
'forecast.model',
'forecast.model.rc',
'da.system']
import logging
import os
import sys
......@@ -18,324 +29,432 @@ import datetime
from tools_da import CreateDirs
from tools_da import AdvanceTime
from tools_da import StartLogger
from tools_da import ParseOptions
def Init():
""" Initialization of the data assimilation job. Parse command line options, read specified rc-file values """
class CycleControl():
"""
This object controls a data assimilation cycle. It is created using information from a da rc-file with settings, and
also using the command line options passed to the job control script that runs in the foreground, or through the queue.
This object is passed around throughout the code as it also contains information on directory structures, requested time
windows, and other resources.
"""
def __init__(self,opts,args):
"""
Initialization occurs from passed options and arguments to the job control script.
"""
# Initialize the logging system
self.LoadRc(args['rc'])
self.ValidateRC()
logfilename = 'da.test.%s.log' % os.getpid()
dummy = StartLogger(logfile=logfilename)
# Add some useful variables to the rc-file dictionary
logging.info('Starting execution of Main program')
self.da_settings['jobrcfilename'] = args["jobrcfilename"]
self.da_settings['log'] = args["logfile"]
self.da_settings['dir.da_submit'] = os.getcwd()
self.da_settings['da.crash.recover'] = '-r' in opts
self.da_settings['verbose'] = '-v' in opts
# Start execution, first pass command line arguments
opts, args = ParseOptions()
def __str__(self):
"""
String representation of a CycleControl object
"""
# Check for the existence of the "rc" key, this is needed input for the script to pass a set of external options
msg = "===============================================================" ; print msg
msg = "DAS rc-file is %s" % self.RcFileName ; print msg
msg = "DAS log file is %s" % self.da_settings['log'] ; print msg
msg = "DAS run directory is %s" % self.da_settings['dir.da_run'] ; print msg
msg = "DAS inverse system is %s" % self.da_settings['da.system'] ; print msg
msg = "DAS forecast model is %s" % self.da_settings['forecast.model'] ; print msg
msg = "===============================================================" ; print msg
if not args.has_key("rc"):
msg = "There is no rc-file specified on the command line. Please use rc=yourfile.rc" ; logging.error(msg)
raise IOError,msg
elif not os.path.exists(args['rc']):
msg = "The specified rc-file (%s) does not exist " % args['rc'] ; logging.error(msg)
raise IOError,msg
else:
rc_da_shell = rc.read(args['rc'])
return ""
# Add some useful variables to the rc-file dictionary
rc_da_shell['log'] = logfilename
rc_da_shell['pid'] = '%s' % os.getpid()
rc_da_shell['dir.da_submit'] = os.getcwd()
rc_da_shell['da.crash.recover'] = '-r' in opts
rc_da_shell['verbose'] = '-v' in opts
def LoadRc(self,RcFileName):
"""
This method loads a DAS rc-file with settings for this simulation
"""
return rc_da_shell
self.da_settings = rc.read(RcFileName)
self.RcFileName = RcFileName
self.DaRcLoaded = True
def SetupFileStructure(rc_da_shell):
""" Create file structure needed for data assimilation system.
In principle this looks like:
msg = 'DAS rc-file (%s) loaded successfully'%self.RcFileName ; logging.info(msg)
${da_rundir}
${da_rundir}/input
${da_rundir}/output
${da_rundir}/exec
${da_rundir}/diagnostics
${da_rundir}/analysis
${da_rundir}/jobs
${da_rundir}/save
${da_rundir}/save/one-ago
${da_rundir}/save/two-ago
return True
Note that the exec dir will actually be a simlink to the directory where
the SIBCASA or TM5 model executable lives. This directory is passed through
the da.rc file. The observation input files will be placed in the exec dir,
and the resulting simulated values will be retrieved from there as well.
"""
# Create the run directory for this DA job, including I/O structure
def ValidateRC(self):
"""
Validate the contents of the rc-file given a dictionary of required keys
"""
CreateDirs(rc_da_shell['dir.da_run'])
rc_da_shell['dir.exec'] = os.path.join(rc_da_shell['dir.da_run'],'exec')
rc_da_shell['dir.input'] = os.path.join(rc_da_shell['dir.da_run'],'input')
rc_da_shell['dir.output'] = os.path.join(rc_da_shell['dir.da_run'],'output')
rc_da_shell['dir.diagnostics'] = os.path.join(rc_da_shell['dir.da_run'],'diagnostics')
rc_da_shell['dir.analysis'] = os.path.join(rc_da_shell['dir.da_run'],'analysis')
rc_da_shell['dir.jobs'] = os.path.join(rc_da_shell['dir.da_run'],'jobs')
rc_da_shell['dir.save'] = os.path.join(rc_da_shell['dir.da_run'],'save')
CreateDirs(os.path.join(rc_da_shell['dir.exec']))
CreateDirs(os.path.join(rc_da_shell['dir.input']))
CreateDirs(os.path.join(rc_da_shell['dir.output']))
CreateDirs(os.path.join(rc_da_shell['dir.diagnostics']))
CreateDirs(os.path.join(rc_da_shell['dir.analysis']))
CreateDirs(os.path.join(rc_da_shell['dir.jobs']))
CreateDirs(os.path.join(rc_da_shell['dir.save']))
CreateDirs(os.path.join(rc_da_shell['dir.save'],'one-ago'))
CreateDirs(os.path.join(rc_da_shell['dir.save'],'two-ago'))
msg = 'Succesfully created the file structure for the assimilation job' ; logging.info(msg)
def RecoverRun(rc_da_shell):
"""Prepare a recovery from a crashed run. This consists of:
- copying all data from the save/one-ago folder,
- replacing all rc-file items with those from da_runtime.rc
"""
for k,v in self.da_settings.iteritems():
if v == 'True' : self.da_settings[k] = True
if v == 'False': self.da_settings[k] = False
if 'date' in k : self.da_settings[k] = datetime.datetime.strptime(v,'%Y-%m-%d %H:%M:%S')
if 'time.start' in k :
self.da_settings[k] = datetime.datetime.strptime(v,'%Y-%m-%d %H:%M:%S')
if 'time.finish' in k :
self.da_settings[k] = datetime.datetime.strptime(v,'%Y-%m-%d %H:%M:%S')
savedir = os.path.join(rc_da_shell['dir.save'])
recoverydir = os.path.join(rc_da_shell['dir.save'],'one-ago')
for key in needed_da_items:
# Replace rc-items with those from the crashed run last rc-file
if not self.da_settings.has_key(key):
status,msg = ( False,'Missing a required value in rc-file : %s' % key)
logging.error(msg)
raise IOError,msg
file_rc_rec = os.path.join(rc_da_shell['dir.save'],'da_runtime.rc')
rc_rec = rc.read(file_rc_rec)
status,msg = ( True,'DAS settings have been validated succesfully' ) ; logging.debug(msg)
for k,v in rc_rec.iteritems():
rc_da_shell[k] = v
return None
msg = "Replaced rc-items.... " ; logging.debug(msg)
msg = "Next cycle start date is %s" % rc_da_shell['time.start'] ; logging.debug(msg)
return None
def ParseTimes(self):
"""
Parse time related parameters into datetime objects for later use
"""
from tools_da import AdvanceTime
startdate = self.da_settings['time.start']
finaldate = self.da_settings['time.finish']
def IOSaveData(rc_da_shell, io_option='restore', save_option='partial',filter=[]):
""" Store or restore model state to/from a temporary directory. Two save_option options are available:
if finaldate <= startdate:
msg = 'The start date (%s) is not greater than the end date (%s), please revise'%(startdate.strftime('%Y%M%d'),finaldate.strftime('%Y%M%d'))
logging.error(msg)
raise ValueError
#
cyclelength = self.da_settings['time.cycle'] # get time step
(1) save_option = 'partial' : Save the background concentration fields only, to use for a later advance of the background
(2) save_option = 'full' : Save all restart data, to use for a next cycle
# Determine end date
While also two IO options are available:
if cyclelength == 'infinite':
enddate = finaldate
else:
enddate = AdvanceTime(startdate,cyclelength)
(1) io_option = restore : Get data from a directory
(2) io_option = store : Save data to a directory
#
if enddate > finaldate: # do not run beyon finaldate
enddate = finaldate
In case of a 'store' command the target folder is re-created so that the contents are empty to begin with.
self.da_settings['startdate'] = startdate
self.da_settings['enddate'] = enddate
self.da_settings['finaldate'] = finaldate
self.da_settings['cyclelength'] = cyclelength
There is currently room for the specific sub-models to also add save data to the mix. This is done through a
forecast.model dependend import statement, followed by a call to function:
msg = "===============================================================" ; logging.info(msg)
msg = "DAS start date is %s" % startdate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg)
msg = "DAS end date is %s" % enddate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg)
msg = "DAS final date is %s" % finaldate.strftime('%Y-%m-%d %H:%M') ; logging.info(msg)
msg = "DAS cycle length is %s" % cyclelength ; logging.info(msg)
msg = "===============================================================" ; logging.info(msg)
model.IOSaveData(**args)
return None
"""
def Initialize(self):
""" Determine how to proceed with this cycle:
(a) recover from crash : get the save data from the one-ago folder and replace the da_runtime variables with those from the save dir
(b) restart cycle : get the save data from the one-ago folder and use latest da_runtime variables from the exec dir
(c) fresh start : set up the required file structure for this simulation
"""
# import modules, note that depending on the type of assimilation system, different submodules are imported!
#
# case 1: A recover from a previous crash, this is signaled by flag "-r"
#
if self.da_settings['da.crash.recover']:
msg = "Recovering simulation from data in: %s" % self.da_settings['dir.da_run'] ; logging.info(msg)
if rc_da_shell['forecast.model'] == 'TM5': import tm5_tools as model
elif rc_da_shell['forecast.model'] == 'SIBCASA': import sibcasa_tools as model
elif rc_da_shell['forecast.model'] == 'WRF': import wrf_tools as model
dummy = self.SetupFileStructure()
if io_option not in ['store','restore']:
raise ValueError,'Invalid option specified for io_option (%s)' % io_option
if save_option not in ['partial','full']:
raise ValueError,'Invalid option specified for save_option (%s)' % save_option
dummy = self.IOSaveData(io_option='restore',save_option='full',filter=[])
dummy = self.RecoverRun()
#
# case 2: A continuation, this is signaled by rc-item time.restart = True
#
elif self.da_settings['time.restart']:
savedir = os.path.join(rc_da_shell['dir.save'])
oneagodir = os.path.join(rc_da_shell['dir.save'],'one-ago')
tempdir = os.path.join(rc_da_shell['dir.save'],'tmp')
msg = "Restarting filter from previous step" ; logging.info(msg)
dummy = self.SetupFileStructure()
if save_option == 'partial': # save background data to tmp dir
dummy = self.IOSaveData(io_option='restore',save_option='full',filter=[])
#
# case 3: A fresh start, this is signaled by rc-item time.restart = False
#
elif not self.da_settings['time.restart']:
msg = "First time step in filter sequence" ; logging.info(msg)
if io_option == 'store':
dummy = self.SetupFileStructure()
dummy = model.WriteSaveData(rc_da_shell)
targetdir = tempdir
sourcedir = savedir
self.ParseTimes()
self.WriteRC(self.da_settings['jobrcfilename'])
elif io_option == 'restore':
return None
sourcedir = tempdir
targetdir = savedir
elif save_option == 'full': # move all save data to one-ago dir
def SetupFileStructure(self):
"""
Create file structure needed for data assimilation system.
In principle this looks like:
if io_option == 'store':
${da_rundir}
${da_rundir}/input
${da_rundir}/output
${da_rundir}/exec
${da_rundir}/diagnostics
${da_rundir}/analysis
${da_rundir}/jobs
${da_rundir}/save
${da_rundir}/save/one-ago
${da_rundir}/save/two-ago
dummy = model.WriteSaveData(rc_da_shell)
Note that the exec dir will actually be a simlink to the directory where
the SIBCASA or TM5 model executable lives. This directory is passed through
the da.rc file. The observation input files will be placed in the exec dir,
and the resulting simulated values will be retrieved from there as well.
"""
from tools_da import CreateDirs
targetdir = oneagodir
sourcedir = savedir
# Create the run directory for this DA job, including I/O structure
elif io_option == 'restore':
CreateDirs(self.da_settings['dir.da_run'])
sourcedir = oneagodir
targetdir = savedir
self.da_settings['dir.exec'] = os.path.join(self.da_settings['dir.da_run'],'exec')
self.da_settings['dir.input'] = os.path.join(self.da_settings['dir.da_run'],'input')
self.da_settings['dir.output'] = os.path.join(self.da_settings['dir.da_run'],'output')
self.da_settings['dir.diagnostics'] = os.path.join(self.da_settings['dir.da_run'],'diagnostics')
self.da_settings['dir.analysis'] = os.path.join(self.da_settings['dir.da_run'],'analysis')
self.da_settings['dir.jobs'] = os.path.join(self.da_settings['dir.da_run'],'jobs')
self.da_settings['dir.save'] = os.path.join(self.da_settings['dir.da_run'],'save')
# If "store" is requested, recreate target dir, cleaning the contents
CreateDirs(os.path.join(self.da_settings['dir.exec']))
CreateDirs(os.path.join(self.da_settings['dir.input']))
CreateDirs(os.path.join(self.da_settings['dir.output']))
CreateDirs(os.path.join(self.da_settings['dir.diagnostics']))
CreateDirs(os.path.join(self.da_settings['dir.analysis']))
CreateDirs(os.path.join(self.da_settings['dir.jobs']))
CreateDirs(os.path.join(self.da_settings['dir.save']))
CreateDirs(os.path.join(self.da_settings['dir.save'],'one-ago'))
CreateDirs(os.path.join(self.da_settings['dir.save'],'two-ago'))
if io_option == 'store':
CreateDirs(os.path.join(targetdir),forceclean=True)
msg = 'Succesfully created the file structure for the assimilation job' ; logging.info(msg)
msg = "Performing a %s %s of data" % (save_option,io_option) ; logging.info(msg)
msg = " from directory: %s " % sourcedir ; logging.info(msg)
msg = " to directory: %s " % targetdir ; logging.info(msg)
msg = " with filter : %s " % filter ; logging.info(msg)
def RecoverRun(self):
"""Prepare a recovery from a crashed run. This consists of:
- copying all data from the save/one-ago folder,
- replacing all rc-file items with those from da_runtime.rc
"""
for file in os.listdir(sourcedir):
savedir = os.path.join(self.da_settings['dir.save'])
recoverydir = os.path.join(self.da_settings['dir.save'],'one-ago')
file = os.path.join(sourcedir,file)
# Replace rc-items with those from the crashed run last rc-file
if os.path.isdir(file): # skip dirs
skip = True
elif filter == []: # copy all
skip= False
else: # check filter
skip = True # default skip
for f in filter:
if f in file:
skip = False # unless in filter
break
if skip:
msg = " [skip] .... %s " % file ; logging.debug(msg)
continue
file_rc_rec = os.path.join(self.da_settings['dir.save'],'da_runtime.rc')
rc_rec = rc.read(file_rc_rec)
if io_option == 'store' and save_option == 'full':
msg = " [move] .... %s " % file ; logging.info(msg)
dummy = shutil.move(file,file.replace(sourcedir,targetdir) )
for k,v in rc_rec.iteritems():
self.da_settings[k] = v
else:
msg = " [copy] .... %s " % file ; logging.info(msg)
dummy = shutil.copy(file,file.replace(sourcedir,targetdir) )
self.ValidateRC()
msg = "Replaced rc-items.... " ; logging.debug(msg)
msg = "Next cycle start date is %s" % self.da_settings['time.start'] ; logging.debug(msg)
return None
def Finalize(self):
"""
Finalize the da cycle, this means writing the save data and rc-files for the next run
"""
def StartRestartRecover(rc_da_shell):
""" Determine how to proceed with this cycle:
(a) recover from crash : get the save data from the one-ago folder and replace the da_runtime variables with those from the save dir
(b) restart cycle : get the save data from the one-ago folder and use latest da_runtime variables from the exec dir
(c) fresh start : set up the required file structure for this simulation
"""
self.IOSaveData(io_option='store',save_option='full')
self.WriteNewRCfile()
self.SubmitNextCycle()
#
# case 1: A recover from a previous crash, this is signaled by flag "-r"
#
if rc_da_shell['da.crash.recover']:
msg = "Recovering simulation from data in: %s" % rc_da_shell['dir.da_run'] ; logging.info(msg)
dummy = SetupFileStructure(rc_da_shell)
def IOSaveData(self, io_option='restore', save_option='partial',filter=[]):
""" Store or restore model state to/from a temporary directory. Two save_option options are available:
dummy = IOSaveData(rc_da_shell,io_option='restore',save_option='full',filter=[])
(1) save_option = 'partial' : Save the background concentration fields only, to use for a later advance of the background
(2) save_option = 'full' : Save all restart data, to use for a next cycle
dummy = RecoverRun(rc_da_shell)
#
# case 2: A continuation, this is signaled by rc-item time.restart = True
#
elif rc_da_shell['time.restart']:
msg = "Restarting filter from previous step" ; logging.info(msg)
While also two IO options are available:
dummy = SetupFileStructure(rc_da_shell)
(1) io_option = restore : Get data from a directory
(2) io_option = store : Save data to a directory
dummy = IOSaveData(rc_da_shell,io_option='restore',save_option='full',filter=[])
#
# case 3: A fresh start, this is signaled by rc-item time.restart = False
#
elif not rc_da_shell['time.restart']:
msg = "First time step in filter sequence" ; logging.info(msg)
In case of a 'store' command the target folder is re-created so that the contents are empty to begin with.
dummy = SetupFileStructure(rc_da_shell)
There is currently room for the specific sub-models to also add save data to the mix. This is done through a
forecast.model dependend import statement, followed by a call to function:
return None
#
def WriteNewRCfile(rc_da_shell):
""" Write the rc-file for the next DA cycle. Note that the start time for the next cycle is the end time of this one, while
the end time for the next cycle is the current end time + one cycle length. The resulting rc-file is written twice:
(1) to the dir.exec so that it can be used when resubmitting the next cycle
(2) to the dir.save so that it can be backed up with the rest of the save data, for later recovery if needed
"""
model.IOSaveData(**args)
# These first two lines advance the filter time for the next cycle
"""
rc_da_shell['startdate'] = rc_da_shell['enddate']
rc_da_shell['enddate'] = AdvanceTime(rc_da_shell['enddate'],rc_da_shell['cyclelength'])
if io_option not in ['store','restore']:
raise ValueError,'Invalid option specified for io_option (%s)' % io_option
if save_option not in ['partial','full']:
raise ValueError,'Invalid option specified for save_option (%s)' % save_option
# The rest is info needed for a system restart
rc_da_shell['time.restart'] = True
rc_da_shell['time.start'] = rc_da_shell['startdate'].strftime('%Y%m%d%H')
rc_da_shell['time.finish'] = rc_da_shell['finaldate'].strftime('%Y%m%d%H')
savedir = os.path.join(self.da_settings['dir.save'])
oneagodir = os.path.join(self.da_settings['dir.save'],'one-ago')
tempdir = os.path.join(self.da_settings['dir.save'],'tmp')
fname = os.path.join(rc_da_shell['dir.exec'],'da_runtime.rc')
rc_da_shell['da.restart.fname'] = fname
dummy = rc.write(fname,rc_da_shell)
msg = 'Wrote new da_runtime.rc file to working directory' ; logging.debug(msg)
if save_option == 'partial': # save background data to tmp dir
# Also write this info to the savedir
if io_option == 'store':
fname = os.path.join(rc_da_shell['dir.save'],'da_runtime.rc')
dummy = rc.write(fname,rc_da_shell)
targetdir = tempdir
sourcedir = savedir
msg = 'Wrote new da_runtime.rc file to save directory' ; logging.debug(msg)
elif io_option == 'restore':
def WriteRC(rc_da_shell,filename):
""" Write RC file after each process to reflect updated info """
sourcedir = tempdir
targetdir = savedir
rcname = filename
fname = './'+rcname
dummy = rc.write(fname,rc_da_shell)
msg = 'Wrote expanded rc-file (%s) to current directory (%s)'%(rcname,rc_da_shell['dir.exec']) ; logging.debug(msg)
return None
elif save_option == 'full': # move all save data to one-ago dir
def SubmitNextCycle(rc_da_shell):
""" submit next job in cycle, if needed. The end date is held against the final date. A subprocess is called that
submits the next job, without waiting for its result so this cycle can finish and exit.
"""
import subprocess
import os
if io_option == 'store':
targetdir = oneagodir
sourcedir = savedir
elif io_option == 'restore':
sourcedir = oneagodir
targetdir = savedir
# If "store" is requested, recreate target dir, cleaning the contents
if io_option == 'store':
CreateDirs(os.path.join(targetdir),forceclean=True)
msg = "Performing a %s %s of data" % (save_option,io_option) ; logging.info(msg)
msg = " from directory: %s " % sourcedir ; logging.info(msg)
msg = " to directory: %s " % targetdir ; logging.info(msg)
msg = " with filter : %s " % filter ; logging.info(msg)
for file in os.listdir(sourcedir):
file = os.path.join(sourcedir,file)
if os.path.isdir(file): # skip dirs
skip = True
elif filter == []: # copy all
skip= False
else: # check filter
skip = True # default skip
for f in filter:
if f in file:
skip = False # unless in filter
break
if skip:
msg = " [skip] .... %s " % file ; logging.debug(msg)
continue
if io_option == 'store' and save_option == 'full':
msg = " [move] .... %s " % file ; logging.info(msg)
dummy = shutil.move(file,file.replace(sourcedir,targetdir) )
else:
msg = " [copy] .... %s " % file ; logging.info(msg)
dummy = shutil.copy(file,file.replace(sourcedir,targetdir) )
#
def WriteNewRCfile(self):
""" Write the rc-file for the next DA cycle. Note that the start time for the next cycle is the end time of this one, while
the end time for the next cycle is the current end time + one cycle length. The resulting rc-file is written twice:
(1) to the dir.exec so that it can be used when resubmitting the next cycle
(2) to the dir.save so that it can be backed up with the rest of the save data, for later recovery if needed
"""
# These first two lines advance the filter time for the next cycle