diff --git a/src/rules/repeats/reas.rules b/src/rules/repeats/reas.rules index d22eec321dba31f6e5ea129c2212b06051eb21e0..4fe90a4eb08b066bb6649e24ff8c774850500bcf 100644 --- a/src/rules/repeats/reas.rules +++ b/src/rules/repeats/reas.rules @@ -18,12 +18,16 @@ __author__ = "Jan van Haarst" __email__ = "jan.vanhaarst@wur.nl" -# Download and install samtools +# Download and install reas rule reas_installation: - output: CONFIG["executables"]["reas"] + 'reas_all.pl' - params : dir = strip_path_level(CONFIG['executables']['reas'],2) + output: + reas = CONFIG["executables"]["reas"] + 'reas_all.pl', + dust = CONFIG["executables"]["reas"] + 'dust', + cross_match = CONFIG["executables"]["reas"] + 'cross_match' + params : dir = strip_path_level(CONFIG['executables']['reas'],1) threads : 12 shell: + "mkdir -p {params.dir};" "TEMPDIR=`mktemp -d`;" "cd $TEMPDIR;" "git clone " +CONFIG["executable_sources"]['reas_URL'] + " && " @@ -32,33 +36,194 @@ rule reas_installation: "cd code && " "make -j {threads} && " "make install && " - "mv -v ../bin {params.dir} &&" - "touch {output} && " + "touch {output.reas} && " "cd - && " "cd dust && " "make && make install && " + "touch {output.dust} && " + "cd - && " + "cd cross_match && " + "make manyreads && " + "mv -v cross_match{{,.manyreads}} ../bin && " + "touch {output.cross_match} && " + "mv -v ../bin/* {params.dir} &&" "cd / && rm -rf $TEMPDIR;" -rule reas: +#rule reas: +# input: +# reads = CONFIG["reads"], +# REAS = rules.reas_installation.output, +# PATH = strip_path_level(CONFIG['executables']['reas'],1) +# output: "consensus.fa" +# message: """Run ReAS on unpaired data""" +# threads: 8 +# params: +# options = CONFIG["reas"]["options"] +# shell: +# """ +# if conda info --envs | grep '^reas' +# then +# echo "Conda environment exists" +# else +# conda create --use-index-cache --yes --name reas muscle blast; +# fi +# source activate reas +# export PATH=$PATH:{input.PATH} +# {input.REAS} {params.options} -pa {threads} -read {input.reads} -output {output} +# source deactivate; +# """ + +rule reas_clean_reads: input: + rules.reas_installation.output, reads = CONFIG["reads"], - REAS = rules.reas_installation.output, PATH = strip_path_level(CONFIG['executables']['reas'],1) - output: "consensus.fa" - message: """Run ReAS on unpaired data""" + output: CONFIG["reas"]["options"]["clean_read_file"] + params: + seqsize = CONFIG["reas"]["options"]["seqsize"] + shell: + """ + export PATH=$PATH:{input.PATH} + cat {input.reads} | CleanData.pl -size={params.seqsize} | rename.pl > {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_kmers: + input: + rules.reas_installation.output, + clean_read_file = rules.reas_clean_reads.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["nmers_file"] + params: + k = CONFIG["reas"]["options"]["k"], + d = CONFIG["reas"]["options"]["d"], + m = CONFIG["reas"]["options"]["m"] + shell: + """ + export PATH=$PATH:{input.PATH} + kmer_num -k {params.k} {input.clean_read_file} | kmer2reads -d {params.d} {input.clean_read_file} | N_mers -m {params.m} > {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_subsample: + input: + rules.reas_installation.output, + nmers_file = rules.reas_kmers.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["hd_read_id_file"] + params: + fraction = CONFIG["reas"]["options"]["fraction"] + shell: + """ + export PATH=$PATH:{input.PATH} + cat {input.nmers_file} | cut -f1 | RandomList.pl {params.fraction} > {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_picklist: + input: + rules.reas_installation.output, + clean_read_file = rules.reas_clean_reads.output, + hd_read_id_file = rules.reas_subsample.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["hd_read_file"] + shell: + """ + export PATH=$PATH:{input.PATH} + cat {input.clean_read_file} | pickListSeq.pl {input.hd_read_id_file} > {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_pairwise_alignment: + input: + rules.reas_installation.output, + hd_read_file = rules.reas_picklist.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["bound_file"] + params: + n = CONFIG["reas"]["options"]["n"], + pa = CONFIG["reas"]["options"]["pa"], + bound_file = CONFIG["reas"]["options"]["bound_file"], + size = CONFIG["reas"]["options"]["size"], + ident = CONFIG["reas"]["options"]["ident"], + d = CONFIG["reas"]["options"]["d"], + t = CONFIG["reas"]["options"]["t"] threads: 8 + shell: + """ + export PATH=$PATH:{input.PATH} + run_HighDseg.pl -r {input.hd_read_file} -n {params.n} -a {threads} -b {output} -s {params.size} -i {params.ident} -d {params.d} -t {params.t} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_cut_segments: + input: + rules.reas_installation.output, + hd_read_file = rules.reas_subsample.output, + bound_file = rules.reas_pairwise_alignment.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["seg_file"] + params: + n = CONFIG["reas"]["options"]["n"], + pa = CONFIG["reas"]["options"]["pa"], + bound_file = CONFIG["reas"]["options"]["bound_file"], + size = CONFIG["reas"]["options"]["size"], + ident = CONFIG["reas"]["options"]["ident"], + d = CONFIG["reas"]["options"]["d"] + shell: + """ + export PATH=$PATH:{input.PATH} + cat {input.bound_file} | cutSeg.pl {input.hd_read_file} > {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_segment_linkage: + input: + rules.reas_installation.output, + hd_read_file = rules.reas_subsample.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["link_file"] + params: + n = CONFIG["reas"]["options"]["n"], + pa = CONFIG["reas"]["options"]["pa"], + size = CONFIG["reas"]["options"]["size"], + ident = CONFIG["reas"]["options"]["ident"], + nonlcs = CONFIG["reas"]["options"]["nonlcs"] + shell: + """ + export PATH=$PATH:{input.PATH} + run_SegLink.pl -r {input.hd_read_file} -n {params.n} -a {params.pa} {params.size} {params.ident} {params.nonlcs} -o {output} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} + """ + +rule reas_repeat_assembly: + input: + rules.reas_installation.output, + seg_file = rules.reas_cut_segments.output, + link_file = rules.reas_segment_linkage.output, + bound_file = rules.reas_pairwise_alignment.output, + PATH = strip_path_level(CONFIG['executables']['reas'],1) + output: CONFIG["reas"]["options"]["output"] params: - options = CONFIG["reas"]["options"] + sub_multi = CONFIG["reas"]["options"]["sub_multi"], + d = CONFIG["reas"]["options"]["d"], + prefix = CONFIG["reas"]["options"]["prefix"], + log = CONFIG["reas"]["options"]["log"], + end = CONFIG["reas"]["options"]["end"], + min_depth = CONFIG["reas"]["options"]["min_depth"], + min_extend = CONFIG["reas"]["options"]["min_extend"], + max_extend = CONFIG["reas"]["options"]["max_extend"] shell: """ - if conda info --envs | grep '^reas' - then - echo "Conda environment exists" - else - conda create --use-index-cache --yes --name reas muscle blast; - fi - source activate reas export PATH=$PATH:{input.PATH} - {input.REAS} {params.options} -pa {threads} -read {input.reads} -output {output} - source deactivate; + DoAssembly -r {input.seg_file} -l {input.link_file} {params.sub_multi} -b {input.bound_file} -d {params.d} -o {output} {params.prefix} {params.log} -e {params.end} {params.min_depth} {params.min_extend} {params.max_extend} + # remove output if empty, or touch if not + [ -s {output} ] && touch {output} || rm -f {output} """ diff --git a/src/workflows/repeats/reas/config.json b/src/workflows/repeats/reas/config.json index a0af1a44583c7ab4a54d2ef1d526f5af49e1c148..16de7c2f47b320fba062b2b3dc650683deebec94 100644 --- a/src/workflows/repeats/reas/config.json +++ b/src/workflows/repeats/reas/config.json @@ -19,7 +19,34 @@ "sra_id" : "ERR171441", "reas": { - "options": "--seqsize=80" + "options": { + "k": "17", + "d": "6", + "m": "1", + "fraction": "1", + "n": "1", + "t": "0", + "pa": "1", + "bound_file": "seg.bk", + "link_file": "seg.link", + "sub_multi": "-t -1", + "output": "consensus.fa", + "prefix": "-p ''", + "log": "", + "size": "100", + "seqsize": "80", + "ident": "0.6", + "nonlcs": "50", + "end": "50", + "min_depth": "-j 2", + "min_extend": "-u 50", + "max_extend": "-v 200", + "clean_read_file" : "reads.clean.fa", + "nmers_file" : "read.N_mers", + "hd_read_id_file" : "HD_reads.id", + "hd_read_file" : "HD_reads.fa", + "seg_file" : "seg.fa" + } }, "sra": {