diff --git a/src/rules/fastqProcessors/Seqtk.rules b/src/rules/fastqProcessors/Seqtk.rules new file mode 100644 index 0000000000000000000000000000000000000000..e2c472f08a4c3dd352904ac566429df5ce6f4c29 --- /dev/null +++ b/src/rules/fastqProcessors/Seqtk.rules @@ -0,0 +1,27 @@ +""" +@version: 0.1 + +Use seqtk toolkit + +Required programs: +* seqtk (bioconda) + +""" + +import os, subprocess +import configparser + +__author__ = "Jan van Haarst" +__email__ = "jan.vanhaarst@wur.nl" + + +rule seqtk_sample: + input: "{name}.fasta" + params: + number = CONFIG["seqtk"]["options"]["number"], + seed = CONFIG["seqtk"]["options"]["seed"] + output: "{name}.sampled.fasta" + shell: + """ + seqtk sample -s{params.seed} {input} {params.number} > {output} + """ diff --git a/src/workflows/repeats/reas/Snakefile b/src/workflows/repeats/reas/Snakefile index 43e5b8740e26b71a6eb653a1be3a0ed5258cb96e..783063d02d1b392745530150314dbbc2f4ddf23c 100644 --- a/src/workflows/repeats/reas/Snakefile +++ b/src/workflows/repeats/reas/Snakefile @@ -50,6 +50,9 @@ workdir: CONFIG["base"]["working_dir"] # Include necessary rules include: ruleDir + "/repeats/reas.rules" include: ruleDir + "/fastqProcessors/Sra.rules" +include: ruleDir + "/fastqProcessors/Seqtk.rules" + +os.mkdir(strip_path_level(CONFIG['executables']['reas'],1)) rule all: input: diff --git a/src/workflows/repeats/reas/config.json b/src/workflows/repeats/reas/config.json index 16de7c2f47b320fba062b2b3dc650683deebec94..9395c1247a62362fffb1a02ad907609ede0fecc6 100644 --- a/src/workflows/repeats/reas/config.json +++ b/src/workflows/repeats/reas/config.json @@ -48,6 +48,12 @@ "seg_file" : "seg.fa" } }, + "seqtk":{ + "options":{ + "number" : "1000000", + "seed" : "314159" + } + }, "sra": { "sra_download" :{ @@ -57,5 +63,5 @@ "options" : "--split-files --fasta" } }, - "reads" : "{download_dir}sra/ERR171441_1.fasta" + "reads" : "{download_dir}sra/ERR171441_1.sampled.fasta" }