Skip to content
Snippets Groups Projects
Commit 4f6af55b authored by Lannoy, Carlos de's avatar Lannoy, Carlos de
Browse files

run production pipeline on the fly during compilation

parent d30580b4
No related branches found
No related tags found
No related merge requests found
......@@ -49,8 +49,7 @@ kmer_list = ('--kmer-list', {
nn_directory = ('--nn-directory', {
'type': str,
'required': False,
'default': __location__ + '/nns/rnns/'
'required': False
})
model = ('--model', {
......@@ -285,11 +284,64 @@ def get_optimize_hyperparams_parser():
def get_compile_model_parser():
parser = argparse.ArgumentParser(description='Compile a multi-network model from single k-mer models, for use '
'in run_inference.')
kmer_source = parser.add_mutually_exclusive_group(required=True)
for arg in (kmer_list, target_16S):
kmer_source.add_argument(arg[0], **arg[1])
for arg in (nn_directory, out_model):
kmer_list = ('--kmer-list', {
'type': str,
'help': 'txt list of k-mers',
'default': '' # need it to be existent; passed to production pipeline
})
for arg in (kmer_list, target_16S, nn_directory, out_model):
parser.add_argument(arg[0], **arg[1])
parser.add_argument('--train-required', action='store_true',
help='Train new models as required [default: use only available models]')
training_reads = ('--training-reads', {
'type': str,
'required': False,
'help': '*ONLY USED IF --TRAINING-REQUIRED* Directory containing fast5 format reads, for nn training.'
})
test_reads = ('--test-reads', {
'type': str,
'required': False,
'help': '*ONLY USED IF --TRAINING-REQUIRED* Directory containing fast5 format reads, for nn testing.'
})
out_dir = ('--out-dir', {
'type': lambda x: parse_output_path(x),
'required': False,
'help': '*ONLY USED IF --TRAINING-REQUIRED* nn output dir'
})
cores = ('--cores', {
'type': int,
'default': 4,
'help': '*ONLY USED IF --TRAINING-REQUIRED* Maximum number of CPU cores to engage at once.'
})
parameter_file = ('--parameter-file', {
'type': str,
'required': False,
'default': os.path.join(__location__, 'nns/hyperparams/RnnParameterFile_defaults.yaml'),
'help': '*ONLY USED IF --TRAINING-REQUIRED* A yaml-file containing NN parameters. If none supplied, default values are used.'})
hdf_path = ('--hdf-path', {
'type': str,
'required': False,
'default': 'Analyses/RawGenomeCorrected_000',
'help': '*ONLY USED IF --TRAINING-REQUIRED* Internal path in fast5-files, at which analysis files can be found '
'[default: Analyses/RawGenomeCorrected_000]'
})
uncenter_kmer = ('--uncenter-kmer', {
'action': "store_true",
'default': False,
'help': '*ONLY USED IF --TRAINING-REQUIRED* If this flag is provided, kmers are not always centered in '
'the read'
})
for arg in (training_reads, test_reads, out_dir, cores, parameter_file, hdf_path, uncenter_kmer):
parser.add_argument(arg[0], **arg[1])
return parser
......
......@@ -15,7 +15,12 @@ def main(args):
db_dir = parse_output_path(f'{args.out_dir}dbs/')
nn_dir = parse_output_path(f'{args.out_dir}nns/')
logs_dir = parse_output_path(f'{args.out_dir}logs/')
with open(args.kmer_list, 'r') as fh: kmer_list = [k.strip() for k in fh.readlines() if len(k.strip())]
if type(args.kmer_list) == str:
with open(args.kmer_list, 'r') as fh: kmer_list = [k.strip() for k in fh.readlines() if len(k.strip())]
elif type(args.kmer_list) == list:
kmer_list = args.kmer_list
else:
ValueError(f'dtype of kmer_list not valid: {type(args.kmer_list)}')
with open(args.parameter_file, 'r') as pf: params = yaml.load(pf, Loader=yaml.FullLoader)
# Construct and run snakemake pipeline
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment