Skip to content
Snippets Groups Projects
Commit dec89520 authored by Lannoy, Carlos de's avatar Lannoy, Carlos de
Browse files

make min nb models arg, small fixes

parent 83e37100
No related branches found
No related tags found
No related merge requests found
......@@ -291,6 +291,12 @@ def get_compile_model_parser():
'default': '' # need it to be existent; passed to production pipeline
})
min_nb_models = ('--min-nb-models', {
'type': int,
'default': 25,
'help': 'Minimum number of kmer models to include in the multi-network'
})
for arg in (kmer_list, target_16S, nn_directory, out_model):
parser.add_argument(arg[0], **arg[1])
parser.add_argument('--train-required', action='store_true',
......
......@@ -25,7 +25,7 @@ class InferenceModel(object):
def predict(self, read, kmer):
model_output = self._model_dict[kmer](read)
return np.any(model_output > 5.0) # todo make threshold feature?
return np.any(model_output > 6.2) # todo make threshold feature?
# # For fixed batch sizes
# read, last_idx = read
......
......@@ -114,7 +114,7 @@ def main(args):
kmer_size = 8
kmer_candidates_dict = fa2kmers(args.target_16S, kmer_size) # Collect k-mers per sequence in target fasta
if args.train_required:
kmers = get_kmer_candidates_16S(kmer_candidates_dict, 10, 0.0001)
kmers = get_kmer_candidates_16S(kmer_candidates_dict, args.min_nb_models, 0.0001)
kmers_no_models = [km for km in kmers if km not in available_mod_kmers]
if len(kmers_no_models): # train additional models, if required
args.kmer_list = kmers_no_models
......@@ -127,7 +127,7 @@ def main(args):
warnings.warn(f'model generation failed for {km}, see {args.out_dir}logs. Continuing compilation without it.')
kmers.remove(km)
else: # filter out k-mers for which no stored model exists
kmers = get_kmer_candidates_16S(kmer_candidates_dict, 10, 0.0001, filter_list=available_mod_kmers)
kmers = get_kmer_candidates_16S(kmer_candidates_dict, args.min_nb_models, 0.0001, filter_list=available_mod_kmers)
if not len(kmers):
raise ValueError('Sequences do not contain any of available models!')
......
......@@ -48,7 +48,7 @@ rule compile_model:
species_seq_fasta='{{target_fastas_dir}}{species}.fasta'
output:
model='{{model_dir}}{species}/compiled.mod'
threads: workflow.cores
threads: max(workflow.cores // len(species_list), 4)
shell:
"""
python {__baseless__} compile_model \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment