From 78f6ad966bc5718e517b57ab5ba2aee63c0c82f7 Mon Sep 17 00:00:00 2001 From: "Haarst, Jan van" <jan.vanhaarst@wur.nl> Date: Thu, 3 Nov 2016 11:41:17 +0100 Subject: [PATCH] Add downloading of test data. --- src/rules/repeats/ReAs.rules | 0 src/rules/repeats/reas.rules | 158 ++++++++++++++----------- src/workflows/repeats/reas/Snakefile | 4 +- src/workflows/repeats/reas/config.json | 21 +--- 4 files changed, 93 insertions(+), 90 deletions(-) delete mode 100644 src/rules/repeats/ReAs.rules diff --git a/src/rules/repeats/ReAs.rules b/src/rules/repeats/ReAs.rules deleted file mode 100644 index e69de29..0000000 diff --git a/src/rules/repeats/reas.rules b/src/rules/repeats/reas.rules index e5ed539..105eb2e 100644 --- a/src/rules/repeats/reas.rules +++ b/src/rules/repeats/reas.rules @@ -1,72 +1,86 @@ -""" -@version: 0.1 - -Run ReAS repeat discovery tool - -Required programs: -* ReAS -* MUSCLE -* BLASTN (from BLAST-suite) -* dustmasker (from BLAST-suite) - -""" - -import os, subprocess -import configparser -from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider - -__author__ = "Jan van Haarst" -__email__ = "jan.vanhaarst@wur.nl" - - -# Download and install samtools -rule reas_installation: - output: CONFIG["executables"]["reas"] + 'reas_all.pl' - params : dir = strip_path_level(CONFIG['executables']['reas'],1) - threads : 12 - shell: - "TEMPDIR=`mktemp -d`;" - "cd $TEMPDIR;" - "git clone " +CONFIG["executable_sources"]['reas_URL'] + " && " - "cd reas && " - "mkdir bin && " - "cd code && " - "make -j {threads} && " - "make install && " - "mv -v ../bin {params.dir} &&" - "touch {output} && " - "cd / && rm -rf $TEMPDIR;" - - -FTP = FTPRemoteProvider() -READS=os.path.basename(CONFIG["reads_URL"]) - -rule download_data: - input: FTP.remote(CONFIG["reads_URL"], keep_local=True) - output : READS - shell : "mv {input} {output}" - - -rule reas: - input: - reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz", - output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam" - message: """Run Tophat on unpaired data""" - threads: 8 - params: - options = CONFIG["reas"]["options"] - shell: - """ - TEMPDIR=$(mktemp -d); - UUID=$(uuidgen); - if conda info --envs | grep '^reas' - then - echo "Conda environment exists" - else - conda create --use-index-cache --yes --name reas muscle blast; - fi - source activate reas - tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} && - rm -rf $TEMPDIR; - source deactivate; - """ +""" +@version: 0.1 + +Run ReAS repeat discovery tool + +Required programs: +* ReAS +* MUSCLE +* BLASTN (from BLAST-suite) +* dustmasker (from BLAST-suite) + +""" + +import os, subprocess +import configparser +from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider + +__author__ = "Jan van Haarst" +__email__ = "jan.vanhaarst@wur.nl" + + +# Download and install samtools +rule reas_installation: + output: CONFIG["executables"]["reas"] + 'reas_all.pl' + params : dir = strip_path_level(CONFIG['executables']['reas'],1) + threads : 12 + shell: + "TEMPDIR=`mktemp -d`;" + "cd $TEMPDIR;" + "git clone " +CONFIG["executable_sources"]['reas_URL'] + " && " + "cd reas && " + "mkdir bin && " + "cd code && " + "make -j {threads} && " + "make install && " + "mv -v ../bin {params.dir} &&" + "touch {output} && " + "cd / && rm -rf $TEMPDIR;" + + +FTP = FTPRemoteProvider() +READS=os.path.basename(CONFIG["reads_URL"]) +FORWARD_READS=os.path.basename(CONFIG["reads_pe_URLS"]["1"]) +REVERSE_READS=os.path.basename(CONFIG["reads_pe_URLS"]["2"]) + +ruleorder: download_pe_data > download_data + +rule download_data: + input: FTP.remote(CONFIG["reads_URL"], keep_local=True) + output : READS + shell : "mv {input} {output}" + +rule download_pe_data: + input: + forward = FTP.remote(CONFIG["reads_pe_URLS"]["1"], keep_local=True), + reverse = FTP.remote(CONFIG["reads_pe_URLS"]["2"], keep_local=True) + output : + FORWARD_READS=FORWARD_READS, + REVERSE_READS=REVERSE_READS + shell : + "mv {input.forward} {output.FORWARD_READS};" + "mv {input.reverse} {output.REVERSE_READS};" + +rule reas: + input: + reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz", + output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam" + message: """Run Tophat on unpaired data""" + threads: 8 + params: + options = CONFIG["reas"]["options"] + shell: + """ + TEMPDIR=$(mktemp -d); + UUID=$(uuidgen); + if conda info --envs | grep '^reas' + then + echo "Conda environment exists" + else + conda create --use-index-cache --yes --name reas muscle blast; + fi + source activate reas + tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} && + rm -rf $TEMPDIR; + source deactivate; + """ diff --git a/src/workflows/repeats/reas/Snakefile b/src/workflows/repeats/reas/Snakefile index 57d972f..7813ff0 100644 --- a/src/workflows/repeats/reas/Snakefile +++ b/src/workflows/repeats/reas/Snakefile @@ -54,5 +54,7 @@ rule all: input: # CONFIG['base']['bashrc'], # Needs to be separate defined always ! CONFIG["executables"]["reas"] + 'reas_all.pl', - os.path.basename(CONFIG["reads_URL"]) + os.path.basename(CONFIG["reads_URL"]), + os.path.basename(CONFIG["reads_pe_URLS"]["1"]), + os.path.basename(CONFIG["reads_pe_URLS"]["2"]) diff --git a/src/workflows/repeats/reas/config.json b/src/workflows/repeats/reas/config.json index 9a8536e..5621924 100644 --- a/src/workflows/repeats/reas/config.json +++ b/src/workflows/repeats/reas/config.json @@ -17,23 +17,10 @@ "wget_options": " -c --proxy=off " }, "reads_info" : "http://www.ebi.ac.uk/ena/data/view/ERX147330", - "reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz", - "samples": - { - "Pleurotus_ostreatus": { - "EP57": { - "readsets": { - "1": [ - "/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R1.fastq", - "/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R2.fastq" - ] - }, - "type": "pe", - "insertSize": "300", - "insertSizeStDev": "", - "platform": "illumina" - } - } + "reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/DRR001/DRR001191/DRR001191_1.fastq.gz", + "reads_pe_URLS" : { + "1": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz", + "2": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_2.fastq.gz" }, "reas": { -- GitLab