From de0a927ca7205fcda289a13db5f10b7404c2936a Mon Sep 17 00:00:00 2001 From: Jan van Haarst <jan.vanhaarst@wur.nl> Date: Fri, 18 Nov 2016 14:15:38 +0100 Subject: [PATCH] Subsample reads, so that crossmathc doesn't choke --- src/rules/fastqProcessors/Seqtk.rules | 27 ++++++++++++++++++++++++++ src/workflows/repeats/reas/Snakefile | 3 +++ src/workflows/repeats/reas/config.json | 8 +++++++- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/rules/fastqProcessors/Seqtk.rules diff --git a/src/rules/fastqProcessors/Seqtk.rules b/src/rules/fastqProcessors/Seqtk.rules new file mode 100644 index 0000000..e2c472f --- /dev/null +++ b/src/rules/fastqProcessors/Seqtk.rules @@ -0,0 +1,27 @@ +""" +@version: 0.1 + +Use seqtk toolkit + +Required programs: +* seqtk (bioconda) + +""" + +import os, subprocess +import configparser + +__author__ = "Jan van Haarst" +__email__ = "jan.vanhaarst@wur.nl" + + +rule seqtk_sample: + input: "{name}.fasta" + params: + number = CONFIG["seqtk"]["options"]["number"], + seed = CONFIG["seqtk"]["options"]["seed"] + output: "{name}.sampled.fasta" + shell: + """ + seqtk sample -s{params.seed} {input} {params.number} > {output} + """ diff --git a/src/workflows/repeats/reas/Snakefile b/src/workflows/repeats/reas/Snakefile index 43e5b87..783063d 100644 --- a/src/workflows/repeats/reas/Snakefile +++ b/src/workflows/repeats/reas/Snakefile @@ -50,6 +50,9 @@ workdir: CONFIG["base"]["working_dir"] # Include necessary rules include: ruleDir + "/repeats/reas.rules" include: ruleDir + "/fastqProcessors/Sra.rules" +include: ruleDir + "/fastqProcessors/Seqtk.rules" + +os.mkdir(strip_path_level(CONFIG['executables']['reas'],1)) rule all: input: diff --git a/src/workflows/repeats/reas/config.json b/src/workflows/repeats/reas/config.json index 16de7c2..9395c12 100644 --- a/src/workflows/repeats/reas/config.json +++ b/src/workflows/repeats/reas/config.json @@ -48,6 +48,12 @@ "seg_file" : "seg.fa" } }, + "seqtk":{ + "options":{ + "number" : "1000000", + "seed" : "314159" + } + }, "sra": { "sra_download" :{ @@ -57,5 +63,5 @@ "options" : "--split-files --fasta" } }, - "reads" : "{download_dir}sra/ERR171441_1.fasta" + "reads" : "{download_dir}sra/ERR171441_1.sampled.fasta" } -- GitLab