Skip to content
Snippets Groups Projects
Commit 78f6ad96 authored by Haarst, Jan van's avatar Haarst, Jan van
Browse files

Add downloading of test data.

parent fe0c7bc6
No related branches found
No related tags found
No related merge requests found
"""
@version: 0.1
Run ReAS repeat discovery tool
Required programs:
* ReAS
* MUSCLE
* BLASTN (from BLAST-suite)
* dustmasker (from BLAST-suite)
"""
import os, subprocess
import configparser
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
__author__ = "Jan van Haarst"
__email__ = "jan.vanhaarst@wur.nl"
# Download and install samtools
rule reas_installation:
output: CONFIG["executables"]["reas"] + 'reas_all.pl'
params : dir = strip_path_level(CONFIG['executables']['reas'],1)
threads : 12
shell:
"TEMPDIR=`mktemp -d`;"
"cd $TEMPDIR;"
"git clone " +CONFIG["executable_sources"]['reas_URL'] + " && "
"cd reas && "
"mkdir bin && "
"cd code && "
"make -j {threads} && "
"make install && "
"mv -v ../bin {params.dir} &&"
"touch {output} && "
"cd / && rm -rf $TEMPDIR;"
FTP = FTPRemoteProvider()
READS=os.path.basename(CONFIG["reads_URL"])
rule download_data:
input: FTP.remote(CONFIG["reads_URL"], keep_local=True)
output : READS
shell : "mv {input} {output}"
rule reas:
input:
reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz",
output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam"
message: """Run Tophat on unpaired data"""
threads: 8
params:
options = CONFIG["reas"]["options"]
shell:
"""
TEMPDIR=$(mktemp -d);
UUID=$(uuidgen);
if conda info --envs | grep '^reas'
then
echo "Conda environment exists"
else
conda create --use-index-cache --yes --name reas muscle blast;
fi
source activate reas
tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} &&
rm -rf $TEMPDIR;
source deactivate;
"""
"""
@version: 0.1
Run ReAS repeat discovery tool
Required programs:
* ReAS
* MUSCLE
* BLASTN (from BLAST-suite)
* dustmasker (from BLAST-suite)
"""
import os, subprocess
import configparser
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
__author__ = "Jan van Haarst"
__email__ = "jan.vanhaarst@wur.nl"
# Download and install samtools
rule reas_installation:
output: CONFIG["executables"]["reas"] + 'reas_all.pl'
params : dir = strip_path_level(CONFIG['executables']['reas'],1)
threads : 12
shell:
"TEMPDIR=`mktemp -d`;"
"cd $TEMPDIR;"
"git clone " +CONFIG["executable_sources"]['reas_URL'] + " && "
"cd reas && "
"mkdir bin && "
"cd code && "
"make -j {threads} && "
"make install && "
"mv -v ../bin {params.dir} &&"
"touch {output} && "
"cd / && rm -rf $TEMPDIR;"
FTP = FTPRemoteProvider()
READS=os.path.basename(CONFIG["reads_URL"])
FORWARD_READS=os.path.basename(CONFIG["reads_pe_URLS"]["1"])
REVERSE_READS=os.path.basename(CONFIG["reads_pe_URLS"]["2"])
ruleorder: download_pe_data > download_data
rule download_data:
input: FTP.remote(CONFIG["reads_URL"], keep_local=True)
output : READS
shell : "mv {input} {output}"
rule download_pe_data:
input:
forward = FTP.remote(CONFIG["reads_pe_URLS"]["1"], keep_local=True),
reverse = FTP.remote(CONFIG["reads_pe_URLS"]["2"], keep_local=True)
output :
FORWARD_READS=FORWARD_READS,
REVERSE_READS=REVERSE_READS
shell :
"mv {input.forward} {output.FORWARD_READS};"
"mv {input.reverse} {output.REVERSE_READS};"
rule reas:
input:
reads = "reads/{sample}/{library}/{replicate}/{name}.fastq.gz",
output: "TOPHAT/{sample}/{library}/{replicate}/{name}/accepted_hits.bam"
message: """Run Tophat on unpaired data"""
threads: 8
params:
options = CONFIG["reas"]["options"]
shell:
"""
TEMPDIR=$(mktemp -d);
UUID=$(uuidgen);
if conda info --envs | grep '^reas'
then
echo "Conda environment exists"
else
conda create --use-index-cache --yes --name reas muscle blast;
fi
source activate reas
tophat2 --num-threads {threads} {params.options} --tmp-dir $TEMPDIR --output-dir TOPHAT/{wildcards.sample}/{wildcards.library}/{wildcards.replicate}/{wildcards.name} {input.reference} {input.reads} &&
rm -rf $TEMPDIR;
source deactivate;
"""
......@@ -54,5 +54,7 @@ rule all:
input:
# CONFIG['base']['bashrc'], # Needs to be separate defined always !
CONFIG["executables"]["reas"] + 'reas_all.pl',
os.path.basename(CONFIG["reads_URL"])
os.path.basename(CONFIG["reads_URL"]),
os.path.basename(CONFIG["reads_pe_URLS"]["1"]),
os.path.basename(CONFIG["reads_pe_URLS"]["2"])
......@@ -17,23 +17,10 @@
"wget_options": " -c --proxy=off "
},
"reads_info" : "http://www.ebi.ac.uk/ena/data/view/ERX147330",
"reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz",
"samples":
{
"Pleurotus_ostreatus": {
"EP57": {
"readsets": {
"1": [
"/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R1.fastq",
"/home/lavri002/scratch/oudehome/lavri002/temp/S240_05A_CHG009446-0428lane1-EP57_L004_R2.fastq"
]
},
"type": "pe",
"insertSize": "300",
"insertSizeStDev": "",
"platform": "illumina"
}
}
"reads_URL" : "ftp.sra.ebi.ac.uk/vol1/fastq/DRR001/DRR001191/DRR001191_1.fastq.gz",
"reads_pe_URLS" : {
"1": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_1.fastq.gz",
"2": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR171/ERR171441/ERR171441_2.fastq.gz"
},
"reas":
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment