Skip to content
Snippets Groups Projects
Commit 6522b38b authored by Sven Warris's avatar Sven Warris
Browse files

Added perlbrew for transposome and created usearch workflow

parent 03b69666
Branches master-wur
No related tags found
No related merge requests found
...@@ -7,41 +7,6 @@ import configparser ...@@ -7,41 +7,6 @@ import configparser
from Transposome import * from Transposome import *
# Install perl if version is too low (as on our cluster)
rule install_perl:
output: CONFIG["executables"]["perl"]
shell:
"wget --no-check-certificate -O - http://install.perlbrew.pl | bash && "
"" + CONFIG["base"]["perl_brew"] + "/bin/perlbrew " + CONFIG["base"]["perl_brew_options"] + " install " + CONFIG["base"]["perl_version"] + " && "
"ln -fs " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl" + CONFIG["base"]["perl_version"] + " " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl && "
"sed -i 's|export PERLBREW_BASHRC_VERSION|set +u\\nexport PERLBREW_BASHRC_VERSION|' " + CONFIG["base"]["perl_brew"] + "etc/bashrc && "
"source " + CONFIG["base"]["perl_brew"] + "etc/bashrc && "
"perlbrew switch " + CONFIG["base"]["perl_version"] + " && "
"cat " + CONFIG["base"]["perl_brew"] + "etc/bashrc >> {rules.bashrc.output} && "
"echo 'perlbrew switch " + CONFIG["base"]["perl_version"] + "' >> {rules.bashrc.output} && "
"touch {output};"
# Create local Perl library
rule install_perl_cpanm:
input:
PERL=rules.install_perl.output
output: CONFIG["executables"]["cpanm"]
shell:
"source {rules.bashrc.output} && "
"perlbrew install-cpanm && "
"touch {output}"
# Install necessary Perl libraries
rule install_perl_libs:
input:
rules.install_perl_cpanm.output,
rules.install_perl.output
output: CONFIG["base"]["perl_install"] + "lib/done"
run:
for lib in CONFIG["base"]["perl_libs"]:
shell("source {rules.bashrc.output} ; cpanm "+ CONFIG["base"]["perl_cpanm_options"] + " " + lib)
shell("touch " + CONFIG["base"]["perl_install"] + "lib/done")
rule install_transposome: rule install_transposome:
input: input:
rules.install_perl_libs.output, rules.install_perl_libs.output,
......
...@@ -3,6 +3,41 @@ ...@@ -3,6 +3,41 @@
import os, subprocess import os, subprocess
import configparser import configparser
rule install_perl:
output: CONFIG["executables"]["perl"]
shell:
"wget --no-check-certificate -O - http://install.perlbrew.pl | bash && "
"" + CONFIG["base"]["perl_brew"] + "/bin/perlbrew " + CONFIG["base"]["perl_brew_options"] + " install " + CONFIG["base"]["perl_version"] + " && "
"ln -fs " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl" + CONFIG["base"]["perl_version"] + " " + CONFIG["base"]["perl_brew"] + "perls/perl-" + CONFIG["base"]["perl_version"] + "/bin/perl && "
"sed -i 's|export PERLBREW_BASHRC_VERSION|set +u\\nexport PERLBREW_BASHRC_VERSION|' " + CONFIG["base"]["perl_brew"] + "etc/bashrc && "
"source " + CONFIG["base"]["perl_brew"] + "etc/bashrc && "
"perlbrew switch " + CONFIG["base"]["perl_version"] + " && "
"cat " + CONFIG["base"]["perl_brew"] + "etc/bashrc >> {rules.bashrc.output} && "
"echo 'perlbrew switch " + CONFIG["base"]["perl_version"] + "' >> {rules.bashrc.output} && "
"touch {output};"
# Create local Perl library
rule install_perl_cpanm:
input:
PERL=rules.install_perl.output
output: CONFIG["executables"]["cpanm"]
shell:
"source {rules.bashrc.output} && "
"perlbrew install-cpanm && "
"touch {output}"
# Install necessary Perl libraries
rule install_perl_libs:
input:
rules.install_perl_cpanm.output,
rules.install_perl.output
output: CONFIG["base"]["perl_install"] + "lib/done"
run:
for lib in CONFIG["base"]["perl_libs"]:
shell("source {rules.bashrc.output} ; cpanm "+ CONFIG["base"]["perl_cpanm_options"] + " " + lib)
shell("touch " + CONFIG["base"]["perl_install"] + "lib/done")
def strip_path_level(path, level = 0): def strip_path_level(path, level = 0):
head = path head = path
for i in range(0, level): for i in range(0, level):
...@@ -116,7 +151,8 @@ rule repeatmasker_installation: ...@@ -116,7 +151,8 @@ rule repeatmasker_installation:
"touch {output}" "touch {output}"
rule repeatmodeler_installation: rule repeatmodeler_installation:
input: SCOUT = rules.repeatscout_installation.output, input: rules.install_perl_libs.output,
SCOUT = rules.repeatscout_installation.output,
TRF = rules.trf_installation.output, TRF = rules.trf_installation.output,
RECON = rules.recon_installation.output, RECON = rules.recon_installation.output,
RM = rules.rmblast_installation.output, RM = rules.rmblast_installation.output,
......
#############
## usearch ##
#############
import os, subprocess
import configparser
from SnakeMakeVlpb.src.utils.StripPathLevel import strip_path_level
# Download and install
rule usearch_installation:
output: CONFIG["executables"]["usearch"]
shell:
"mkdir -p " + CONFIG["base"]["executables"] + " && "
"cd " + CONFIG["base"]["executables"] + " && "
"wget" + CONFIG["executable_sources"]["wget_options"] + CONFIG["executable_sources"]["usearch_URL"] + " && "
"mv upload* usearch && "
"chmod a+rx usearch && "
"touch {output}"
rule usearch:
input:
rules.usearch_installation.output,
fasta = CONFIG["base"]["fasta"]
output: "" + CONFIG["base"]["output"]
shell:
"" + CONFIG["base"]["executables"] + "/usearch -cluster_fast {input.fasta} -consout " + CONFIG["base"]["output"] + " " + CONFIG["usearch_opts"]["cl_options"]
...@@ -7,10 +7,39 @@ ...@@ -7,10 +7,39 @@
"rmlib" : "{base_dir}{databases}repeat_db.fasta", "rmlib" : "{base_dir}{databases}repeat_db.fasta",
"cpus": "8", "cpus": "8",
"output":"do not use (is for repeatmasker)", "output":"do not use (is for repeatmasker)",
"cl_options":"do not use (is for repeatmasker)" "cl_options":"do not use (is for repeatmasker)",
"perl_brew" : "{home_dir}perl5/perlbrew/",
"perl_install": "{home_dir}perl5/perlbrew/perls/perl-5.18.0/",
"perl_version": "5.18.0",
"perl_cpanm_options" : "--force --notest ",
"perl_brew_options" : "-j 6 --notest --noman --thread --force ",
"perl_libs":
["Scalar::Util",
"Bio::SeqIO",
"Pod::Usage",
"File::Basename",
"threads",
"threads::shared",
"Thread::Queue",
"Getopt::Long",
"FindBin",
"File::Spec",
"File::Path",
"Data::Dumper",
"YAML",
"Carp",
"Hash::Merge",
"Logger::Simple",
"Parallel::ForkManager"
]
}, },
"executables": "executables":
{ {
"perl" : "{home_dir}perl5/perlbrew/perls/perl-5.18.0/bin/perl",
"cpanm" : "{home_dir}perl5/perlbrew/bin/cpanm",
"rmblast": "{base_dir}{executables}rmblast/bin/rmblastn", "rmblast": "{base_dir}{executables}rmblast/bin/rmblastn",
"makeblastdb": "{base_dir}{executables}blast_plus/bin/makeblastdb", "makeblastdb": "{base_dir}{executables}blast_plus/bin/makeblastdb",
"blastn": "{base_dir}{executables}blast_plus/bin/blastn", "blastn": "{base_dir}{executables}blast_plus/bin/blastn",
...@@ -27,6 +56,7 @@ ...@@ -27,6 +56,7 @@
"executable_sources": "executable_sources":
{ {
"wget_options": " -c --proxy=off ", "wget_options": " -c --proxy=off ",
"perl_URL": "http://downloads.activestate.com/ActivePerl/releases/5.18.4.1803/ActivePerl-5.18.4.1803-x86_64-linux-glibc-2.5-298573.tar.gz",
"trf_URL": "http://tandem.bu.edu/trf/downloads/trf407b.linux64", "trf_URL": "http://tandem.bu.edu/trf/downloads/trf407b.linux64",
"blast+_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz", "blast+_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz",
"rmblast_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/rmblast/2.2.28/ncbi-rmblastn-2.2.28-x64-linux.tar.gz ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.28/ncbi-blast-2.2.28+-x64-linux.tar.gz", "rmblast_URL": "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/rmblast/2.2.28/ncbi-rmblastn-2.2.28-x64-linux.tar.gz ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.28/ncbi-blast-2.2.28+-x64-linux.tar.gz",
......
...@@ -2,5 +2,6 @@ ...@@ -2,5 +2,6 @@
"base_dir": "/tmp/", "base_dir": "/tmp/",
"executables": "bin/repeats/", "executables": "bin/repeats/",
"databases":"databases/", "databases":"databases/",
"home_dir":"/home/vlpb/",
"download_dir":"/tmp/" "download_dir":"/tmp/"
} }
...@@ -37,8 +37,8 @@ with open("config.json") as conf: ...@@ -37,8 +37,8 @@ with open("config.json") as conf:
# Include necessary rules # Include necessary rules
include: ruleDir + "/repeats/Transposome.rules"
include: ruleDir + "/repeats/masking.rules" include: ruleDir + "/repeats/masking.rules"
include: ruleDir + "/repeats/Transposome.rules"
rule all: rule all:
input: input:
......
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
"perl_cpanm_options" : "--force --notest ", "perl_cpanm_options" : "--force --notest ",
"perl_brew_options" : "-j 6 --notest --noman --thread --force ", "perl_brew_options" : "-j 6 --notest --noman --thread --force ",
"perl_libs": "perl_libs":
["Moose" ["Moose",
"list::Util"
], ],
"genome":"do not use (is for repeatmasker)", "genome":"do not use (is for repeatmasker)",
"cl_options":"do not use (is for repeatmasker)" "cl_options":"do not use (is for repeatmasker)"
......
"""
Create a repeat libarary from reads
"""
import json, os, inspect
import configparser
import io
# Get lib configuration
# Set PYTHONPATH before running Snakemake :
# export PYTHONPATH=$PYTHONPATH:/home/lavri002/scratch/
from SnakeMakeVlpb import VLPB_LIB_PATH
from SnakeMakeVlpb.src.utils import CreateSymlinkReads
ruleDir = VLPB_LIB_PATH + "rules/"
"""
Recursive function to do a string format on each of the string values in the config.
Full replace on a JSON does not work: it uses '{}' for formatting, which results
in errors.
"""
def replace_paths_in_json(conf, paths):
for item in conf:
if isinstance(conf[item], str): # Is it a string value?
conf[item] = conf[item].format(**paths) # Use json to replace values
elif isinstance(conf[item], dict): # Is this a json object (dict)?
replace_paths_in_json(conf[item], paths) # Parse object
# Get the config file:
with open("config.json") as conf:
with open("paths.json") as paths:
PATHS = json.load(paths)
CONFIG = json.load(conf)
replace_paths_in_json(CONFIG, PATHS)
# Create and change to the working directory
workdir: CONFIG["base"]["working_dir"]
# Include necessary rules
include: ruleDir + "/repeats/usearch.rules"
rule all:
input:
rules.usearch.output
{
"base":
{
"fasta": "/tmp/fasta.fa",
"output": "/tmp/consensus.fa",
"working_dir": "{base_dir}temp_repeats",
"download_dir": "{download_dir}",
"executables": "{base_dir}{executables}"
},
"executables":
{
"usearch": "{base_dir}{executables}/usearch"
},
"executable_sources":
{
"usearch_URL": "http://drive5.com/cgi-bin/upload3.py?license=2017012008363614880",
"wget_options": " -c --proxy=off "
},
"usearch_opts": {
"cl_options": " -id 0.8 -threads 20 "
}
}
{
"base_dir": "/tmp/",
"executables": "bin/repeats/",
"download_dir":"/tmp/"
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment