From 78f6ad966bc5718e517b57ab5ba2aee63c0c82f7 Mon Sep 17 00:00:00 2001
From: "Haarst, Jan van" <jan.vanhaarst@wur.nl>
Date: Thu, 3 Nov 2016 11:41:17 +0100
Subject: [PATCH] Add downloading of test data.

---
 src/rules/repeats/ReAs.rules           |   0
 src/rules/repeats/reas.rules           | 158 ++++++++++++++-----------
 src/workflows/repeats/reas/Snakefile   |   4 +-
 src/workflows/repeats/reas/config.json |  21 +---
 4 files changed, 93 insertions(+), 90 deletions(-)
 delete mode 100644 src/rules/repeats/ReAs.rules

diff --git a/src/rules/repeats/ReAs.rules b/src/rules/repeats/ReAs.rules
deleted file mode 100644
index e69de29..0000000
diff --git a/src/rules/repeats/reas.rules b/src/rules/repeats/reas.rules
index e5ed539..105eb2e 100644
--- a/src/rules/repeats/reas.rules
+++ b/src/rules/repeats/reas.rules
@@ -1,72 +1,86 @@
-"""
-@version: 0.1
-
-Run ReAS repeat discovery tool
-
-Required programs:
-* ReAS
-* MUSCLE
-* BLASTN (from BLAST-suite)
-* dustmasker (from BLAST-suite)
-
-"""
-
-import os, subprocess
-import configparser
-from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
-
-__author__ = "Jan van Haarst"
-__email__ = "jan.vanhaarst@wur.nl"
-
-
-# Download and install samtools
-rule reas_installation:
-    output: CONFIG["executables"]["reas"] + 'reas_all.pl'
-    params : dir = strip_path_level(CONFIG['executables']['reas'],1)
-    threads : 12
-    shell:
-        "TEMPDIR=`mktemp -d`;"
-        "cd $TEMPDIR;"
-        "git clone " +CONFIG["executable_sources"]['reas_URL'] + " && "
-        "cd reas && "
-        "mkdir bin && "
-        "cd code && "
-        "make -j {threads} && "
-        "make install && "
-        "mv -v ../bin {params.dir} &&"
-        "touch {output} && "
-        "cd / && rm -rf $TEMPDIR;"
-
-
-FTP = FTPRemoteProvider()
-READS=os.path.basename(CONFIG["reads_URL"])
-
-rule download_data:
-    input: FTP.remote(CONFIG["reads_URL"], keep_local=True)
-    output : READS
-    shell : "mv {input} {output}"
-
-
-rule reas:
-    input:
-        reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz",
-    output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam"
-    message: """Run Tophat on unpaired data"""
-    threads: 8
-    params:
-        options = CONFIG["reas"]["options"]
-    shell:
-        """
-        TEMPDIR=$(mktemp -d);
-        UUID=$(uuidgen);
-        if conda info --envs | grep '^reas'
-        then
-            echo "Conda environment exists"
-        else
-            conda create --use-index-cache --yes --name reas muscle blast;
-        fi
-        source activate reas
-        tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} &&
-        rm -rf $TEMPDIR;
-        source deactivate;
-        """
+"""
+@version: 0.1
+
+Run ReAS repeat discovery tool
+
+Required programs:
+* ReAS
+* MUSCLE
+* BLASTN (from BLAST-suite)
+* dustmasker (from BLAST-suite)
+
+"""
+
+import os, subprocess
+import configparser
+from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
+
+__author__ = "Jan van Haarst"
+__email__ = "jan.vanhaarst@wur.nl"
+
+
+# Download and install samtools
+rule reas_installation:
+    output: CONFIG["executables"]["reas"] + 'reas_all.pl'
+    params : dir = strip_path_level(CONFIG['executables']['reas'],1)
+    threads : 12
+    shell:
+        "TEMPDIR=`mktemp -d`;"
+        "cd $TEMPDIR;"
+        "git clone " +CONFIG["executable_sources"]['reas_URL'] + " && "
+        "cd reas && "
+        "mkdir bin && "
+        "cd code && "
+        "make -j {threads} && "
+        "make install && "
+        "mv -v ../bin {params.dir} &&"
+        "touch {output} && "
+        "cd / && rm -rf $TEMPDIR;"
+
+
+FTP = FTPRemoteProvider()
+READS=os.path.basename(CONFIG["reads_URL"])
+FORWARD_READS=os.path.basename(CONFIG["reads_pe_URLS"]["1"])
+REVERSE_READS=os.path.basename(CONFIG["reads_pe_URLS"]["2"])
+
+ruleorder: download_pe_data > download_data
+
+rule download_data:
+    input: FTP.remote(CONFIG["reads_URL"], keep_local=True)
+    output : READS
+    shell : "mv {input} {output}"
+
+rule download_pe_data:
+    input: 
+        forward = FTP.remote(CONFIG["reads_pe_URLS"]["1"], keep_local=True),
+        reverse = FTP.remote(CONFIG["reads_pe_URLS"]["2"], keep_local=True)
+    output : 
+        FORWARD_READS=FORWARD_READS,
+        REVERSE_READS=REVERSE_READS
+    shell : 
+        "mv {input.forward} {output.FORWARD_READS};"
+        "mv {input.reverse} {output.REVERSE_READS};"
+
+rule reas:
+    input:
+        reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz",
+    output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam"
+    message: """Run Tophat on unpaired data"""
+    threads: 8
+    params:
+        options = CONFIG["reas"]["options"]
+    shell:
+        """
+        TEMPDIR=$(mktemp -d);
+        UUID=$(uuidgen);
+        if conda info --envs | grep '^reas'
+        then
+            echo "Conda environment exists"
+        else
+            conda create --use-index-cache --yes --name reas muscle blast;
+        fi
+        source activate reas
+        tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} &&
+        rm -rf $TEMPDIR;
+        source deactivate;
+        """
diff --git a/src/workflows/repeats/reas/Snakefile b/src/workflows/repeats/reas/Snakefile
index 57d972f..7813ff0 100644
--- a/src/workflows/repeats/reas/Snakefile
+++ b/src/workflows/repeats/reas/Snakefile
@@ -54,5 +54,7 @@ rule all:
     input:
         # CONFIG['base']['bashrc'],                       # Needs to be separate defined always !
         CONFIG["executables"]["reas"] + 'reas_all.pl',
-        os.path.basename(CONFIG["reads_URL"])
+        os.path.basename(CONFIG["reads_URL"]),
+        os.path.basename(CONFIG["reads_pe_URLS"]["1"]),
+        os.path.basename(CONFIG["reads_pe_URLS"]["2"])
 
diff --git a/src/workflows/repeats/reas/config.json b/src/workflows/repeats/reas/config.json
index 9a8536e..5621924 100644
--- a/src/workflows/repeats/reas/config.json
+++ b/src/workflows/repeats/reas/config.json
@@ -17,23 +17,10 @@
 		"wget_options": " -c --proxy=off "
 	},
 	"reads_info" : "http://www.ebi.ac.uk/ena/data/view/ERX147330",
-	"reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz",
-	"samples": 
-	{
-		"Pleurotus_ostreatus": {
-		"EP57": {
-			"readsets": {
-				"1": [
-					"/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R1.fastq",
-					"/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R2.fastq"
-					]	
-				},
-			"type": "pe",
-			"insertSize": "300",
-			"insertSizeStDev": "",
-			"platform": "illumina"
-			}
-		}
+	"reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/DRR001/DRR001191/DRR001191_1.fastq.gz",
+	"reads_pe_URLS" : {
+		"1": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz",
+		"2": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_2.fastq.gz"
 	},
 	"reas": 
 	{
-- 
GitLab