diff --git a/src/rules/repeats/Transposome.rules b/src/rules/repeats/Transposome.rules index 510c2bbac68a113e6557f5a9304d6b56b625f6d3..58a3bccc72c1fe88cfadd3a055450257cbdb45d2 100644 --- a/src/rules/repeats/Transposome.rules +++ b/src/rules/repeats/Transposome.rules @@ -7,41 +7,6 @@ import configparser from Transposome import * -# Install perl if version is too low (as on our cluster) -rule install_perl: - output: CONFIG["executables"]["perl"] - shell: - "wget --no-check-certificate -O - http://install.perlbrew.pl | bash && " - "" + CONFIG["base"]["perl_brew"] + "/bin/perlbrew " + CONFIG["base"]["perl_brew_options"] + " install " + CONFIG["base"]["perl_version"] + " && " - "ln -fs " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl" + CONFIG["base"]["perl_version"] + " " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl && " - "sed -i 's|export PERLBREW_BASHRC_VERSION|set +u\\nexport PERLBREW_BASHRC_VERSION|' " + CONFIG["base"]["perl_brew"] + "etc/bashrc && " - "source " + CONFIG["base"]["perl_brew"] + "etc/bashrc && " - "perlbrew switch " + CONFIG["base"]["perl_version"] + " && " - "cat " + CONFIG["base"]["perl_brew"] + "etc/bashrc >> {rules.bashrc.output} && " - "echo 'perlbrew switch " + CONFIG["base"]["perl_version"] + "' >> {rules.bashrc.output} && " - "touch {output};" - -# Create local Perl library -rule install_perl_cpanm: - input: - PERL=rules.install_perl.output - output: CONFIG["executables"]["cpanm"] - shell: - "source {rules.bashrc.output} && " - "perlbrew install-cpanm && " - "touch {output}" - -# Install necessary Perl libraries -rule install_perl_libs: - input: - rules.install_perl_cpanm.output, - rules.install_perl.output - output: CONFIG["base"]["perl_install"] + "lib/done" - run: - for lib in CONFIG["base"]["perl_libs"]: - shell("source {rules.bashrc.output} ; cpanm "+ CONFIG["base"]["perl_cpanm_options"] + " " + lib) - shell("touch " + CONFIG["base"]["perl_install"] + "lib/done") - rule install_transposome: input: rules.install_perl_libs.output, diff --git a/src/rules/repeats/masking.rules b/src/rules/repeats/masking.rules index 1302c9366a7b990640a78ad24e1334cb788f4ea1..fce35b29d46142de9f2fd9aa28ef4724af1b2d70 100644 --- a/src/rules/repeats/masking.rules +++ b/src/rules/repeats/masking.rules @@ -3,6 +3,41 @@ import os, subprocess import configparser +rule install_perl: + output: CONFIG["executables"]["perl"] + shell: + "wget --no-check-certificate -O - http://install.perlbrew.pl | bash && " + "" + CONFIG["base"]["perl_brew"] + "/bin/perlbrew " + CONFIG["base"]["perl_brew_options"] + " install " + CONFIG["base"]["perl_version"] + " && " + "ln -fs " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl" + CONFIG["base"]["perl_version"] + " " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl && " + "sed -i 's|export PERLBREW_BASHRC_VERSION|set +u\\nexport PERLBREW_BASHRC_VERSION|' " + CONFIG["base"]["perl_brew"] + "etc/bashrc && " + "source " + CONFIG["base"]["perl_brew"] + "etc/bashrc && " + "perlbrew switch " + CONFIG["base"]["perl_version"] + " && " + "cat " + CONFIG["base"]["perl_brew"] + "etc/bashrc >> {rules.bashrc.output} && " + "echo 'perlbrew switch " + CONFIG["base"]["perl_version"] + "' >> {rules.bashrc.output} && " + "touch {output};" + +# Create local Perl library +rule install_perl_cpanm: + input: + PERL=rules.install_perl.output + output: CONFIG["executables"]["cpanm"] + shell: + "source {rules.bashrc.output} && " + "perlbrew install-cpanm && " + "touch {output}" + +# Install necessary Perl libraries +rule install_perl_libs: + input: + rules.install_perl_cpanm.output, + rules.install_perl.output + output: CONFIG["base"]["perl_install"] + "lib/done" + run: + for lib in CONFIG["base"]["perl_libs"]: + shell("source {rules.bashrc.output} ; cpanm "+ CONFIG["base"]["perl_cpanm_options"] + " " + lib) + shell("touch " + CONFIG["base"]["perl_install"] + "lib/done") + + def strip_path_level(path, level = 0): head = path for i in range(0, level): @@ -116,7 +151,8 @@ rule repeatmasker_installation: "touch {output}" rule repeatmodeler_installation: - input: SCOUT = rules.repeatscout_installation.output, + input: rules.install_perl_libs.output, + SCOUT = rules.repeatscout_installation.output, TRF = rules.trf_installation.output, RECON = rules.recon_installation.output, RM = rules.rmblast_installation.output, diff --git a/src/rules/repeats/usearch.rules b/src/rules/repeats/usearch.rules new file mode 100644 index 0000000000000000000000000000000000000000..8f85c0f68aa92aa3c1daa8ace4ee9afb79a36a44 --- /dev/null +++ b/src/rules/repeats/usearch.rules @@ -0,0 +1,27 @@ +############# +## usearch ## +############# + +import os, subprocess +import configparser +from SnakeMakeVlpb.src.utils.StripPathLevel import strip_path_level + + +# Download and install +rule usearch_installation: + output: CONFIG["executables"]["usearch"] + shell: + "mkdir -p " + CONFIG["base"]["executables"] + " && " + "cd " + CONFIG["base"]["executables"] + " && " + "wget" + CONFIG["executable_sources"]["wget_options"] + CONFIG["executable_sources"]["usearch_URL"] + " && " + "mv upload* usearch && " + "chmod a+rx usearch && " + "touch {output}" + +rule usearch: + input: + rules.usearch_installation.output, + fasta = CONFIG["base"]["fasta"] + output: "" + CONFIG["base"]["output"] + shell: + "" + CONFIG["base"]["executables"] + "/usearch -cluster_fast {input.fasta} -consout " + CONFIG["base"]["output"] + " " + CONFIG["usearch_opts"]["cl_options"] diff --git a/src/workflows/repeats/modeler/config.json b/src/workflows/repeats/modeler/config.json index a13a93f86cb3fe58d36a2cdd10d87278760b26d9..3b2a3981dc68019dc04e6d71e2215e70a3b7a491 100644 --- a/src/workflows/repeats/modeler/config.json +++ b/src/workflows/repeats/modeler/config.json @@ -7,10 +7,39 @@ "rmlib" : "{base_dir}{databases}repeat_db.fasta", "cpus": "8", "output":"do not use (is for repeatmasker)", - "cl_options":"do not use (is for repeatmasker)" + "cl_options":"do not use (is for repeatmasker)", + + "perl_brew" : "{home_dir}perl5/perlbrew/", + "perl_install": "{home_dir}perl5/perlbrew/perls/perl-5.18.0/", + "perl_version": "5.18.0", + "perl_cpanm_options" : "--force --notest ", + "perl_brew_options" : "-j 6 --notest --noman --thread --force ", + "perl_libs": + ["Scalar::Util", + "Bio::SeqIO", + "Pod::Usage", + "File::Basename", + "threads", + "threads::shared", + "Thread::Queue", + "Getopt::Long", + "FindBin", + "File::Spec", + "File::Path", + "Data::Dumper", + "YAML", + "Carp", + "Hash::Merge", + "Logger::Simple", + "Parallel::ForkManager" + ] + }, "executables": { + "perl" : "{home_dir}perl5/perlbrew/perls/perl-5.18.0/bin/perl", + "cpanm" : "{home_dir}perl5/perlbrew/bin/cpanm", + "rmblast": "{base_dir}{executables}rmblast/bin/rmblastn", "makeblastdb": "{base_dir}{executables}blast_plus/bin/makeblastdb", "blastn": "{base_dir}{executables}blast_plus/bin/blastn", @@ -27,6 +56,7 @@ "executable_sources": { "wget_options": " -c --proxy=off ", + "perl_URL": "http://downloads.activestate.com/ActivePerl/releases/5.18.4.1803/ActivePerl-5.18.4.1803-x86_64-linux-glibc-2.5-298573.tar.gz", "trf_URL": "http://tandem.bu.edu/trf/downloads/trf407b.linux64", "blast+_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz", "rmblast_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/rmblast/2.2.28/ncbi-rmblastn-2.2.28-x64-linux.tar.gz ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.28/ncbi-blast-2.2.28+-x64-linux.tar.gz", diff --git a/src/workflows/repeats/modeler/paths.json b/src/workflows/repeats/modeler/paths.json index b59d0c905214eb8309da258143f4f1bcdaadf918..bd1a06e423a80ddafa7ee85ca70c7f7da7c85c46 100644 --- a/src/workflows/repeats/modeler/paths.json +++ b/src/workflows/repeats/modeler/paths.json @@ -2,5 +2,6 @@ "base_dir": "/tmp/", "executables": "bin/repeats/", "databases":"databases/", + "home_dir":"/home/vlpb/", "download_dir":"/tmp/" } diff --git a/src/workflows/repeats/transposome/Snakefile b/src/workflows/repeats/transposome/Snakefile index c9e89a510fe342771c8d97d78e2571e80bdc5387..404a96ee2274bf411d72f1ebe93739d5fbb3578e 100644 --- a/src/workflows/repeats/transposome/Snakefile +++ b/src/workflows/repeats/transposome/Snakefile @@ -37,8 +37,8 @@ with open("config.json") as conf: # Include necessary rules -include: ruleDir + "/repeats/Transposome.rules" include: ruleDir + "/repeats/masking.rules" +include: ruleDir + "/repeats/Transposome.rules" rule all: input: diff --git a/src/workflows/repeats/transposome/config.json b/src/workflows/repeats/transposome/config.json index 1315ef29648d302ff74eacd9fe2ad3dcfcc390c8..4c572a39c17d49742d0f4ad837d8bf078593977e 100644 --- a/src/workflows/repeats/transposome/config.json +++ b/src/workflows/repeats/transposome/config.json @@ -18,7 +18,8 @@ "perl_cpanm_options" : "--force --notest ", "perl_brew_options" : "-j 6 --notest --noman --thread --force ", "perl_libs": - ["Moose" + ["Moose", + "list::Util" ], "genome":"do not use (is for repeatmasker)", "cl_options":"do not use (is for repeatmasker)" diff --git a/src/workflows/repeats/usearch/Snakefile b/src/workflows/repeats/usearch/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..a409ef8659079bdd9c7d1bb9a17fa5010b62daa0 --- /dev/null +++ b/src/workflows/repeats/usearch/Snakefile @@ -0,0 +1,46 @@ +""" +Create a repeat libarary from reads +""" + +import json, os, inspect +import configparser +import io + +# Get lib configuration +# Set PYTHONPATH before running Snakemake : +# export PYTHONPATH=$PYTHONPATH:/home/lavri002/scratch/ + +from SnakeMakeVlpb import VLPB_LIB_PATH +from SnakeMakeVlpb.src.utils import CreateSymlinkReads + +ruleDir = VLPB_LIB_PATH + "rules/" + +""" +Recursive function to do a string format on each of the string values in the config. + Full replace on a JSON does not work: it uses '{}' for formatting, which results + in errors. +""" + +def replace_paths_in_json(conf, paths): + for item in conf: + if isinstance(conf[item], str): # Is it a string value? + conf[item] = conf[item].format(**paths) # Use json to replace values + elif isinstance(conf[item], dict): # Is this a json object (dict)? + replace_paths_in_json(conf[item], paths) # Parse object + +# Get the config file: +with open("config.json") as conf: + with open("paths.json") as paths: + PATHS = json.load(paths) + CONFIG = json.load(conf) + replace_paths_in_json(CONFIG, PATHS) + +# Create and change to the working directory +workdir: CONFIG["base"]["working_dir"] + +# Include necessary rules +include: ruleDir + "/repeats/usearch.rules" + +rule all: + input: + rules.usearch.output diff --git a/src/workflows/repeats/usearch/config.json b/src/workflows/repeats/usearch/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ef32b3a7b1853397d03918a56cd1ca91042e4b0 --- /dev/null +++ b/src/workflows/repeats/usearch/config.json @@ -0,0 +1,24 @@ +{ + "base": + { + "fasta": "/tmp/fasta.fa", + "output": "/tmp/consensus.fa", + "working_dir": "{base_dir}temp_repeats", + "download_dir": "{download_dir}", + "executables": "{base_dir}{executables}" + }, + + "executables": + { + "usearch": "{base_dir}{executables}/usearch" + }, + + "executable_sources": + { + "usearch_URL": "http://drive5.com/cgi-bin/upload3.py?license=2017012008363614880", + "wget_options": " -c --proxy=off " + }, + "usearch_opts": { + "cl_options": " -id 0.8 -threads 20 " + } +} diff --git a/src/workflows/repeats/usearch/paths.json b/src/workflows/repeats/usearch/paths.json new file mode 100644 index 0000000000000000000000000000000000000000..aa24db5d5796596757777a8061124eb668e9a1de --- /dev/null +++ b/src/workflows/repeats/usearch/paths.json @@ -0,0 +1,5 @@ +{ + "base_dir": "/tmp/", + "executables": "bin/repeats/", + "download_dir":"/tmp/" +}