Commit a2ed31da authored by Jasper Koehorst's avatar Jasper Koehorst
Browse files

build improvements and conda deactivation added

parent b5abb1b7
# Base Image
FROM ubuntu:18.04
# Metadata
LABEL base.image="docker-registry.wur.nl/unlock/docker"
LABEL version="1"
LABEL software="BASE 1.0"
LABEL software.version="0.0.0"
LABEL description="IRODS base image for UNLOCK"
LABEL website="https://m-unlock.gitlab.io"
LABEL documentation="NA"
LABEL license="NA"
LABEL tags="Base"
# ARG cwltool_version=3.0.20200720165847
ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=C.UTF-8 LANGUAGE=en_US.UTF-8
# Some default needed stuff
# RUN apt-get update && apt-get install -y git build-essential curl wget nano htop pigz zip unzip
# Install the CWL runner (cwltool) and python and pip
RUN apt-get update && apt-get install -y gnupg git build-essential locales libfontconfig1 curl wget nano htop pigz zip unzip raptor2-utils nodejs cpanminus python3.8 python3-dev python3-distutils && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && \
pip3 install cwltool &&\
cwltool --version && pip install html5lib
# Enable icommands
RUN pip install python-irodsclient --upgrade && \
apt-get update && \
wget -qO - https://packages.irods.org/irods-signing-key.asc | apt-key add - && \
echo "# /etc/apt/sources.list.d/renci-irods.list" | tee -a /etc/apt/sources.list.d/renci-irods.list && \
echo "deb [arch=amd64] https://packages.irods.org/apt/ xenial main" | tee -a /etc/apt/sources.list.d/renci-irods.list && \
apt-get update && apt-get install -y irods-icommands
# Couple shell to bash
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
# Java installation
RUN curl -s https://get.sdkman.io | bash
RUN chmod a+x "$HOME/.sdkman/bin/sdkman-init.sh"
RUN echo "sdkman_auto_complete=false" >> "$HOME/.sdkman/etc/config"
RUN source "$HOME/.sdkman/bin/sdkman-init.sh" &&\
sdk install java 11.0.11.hs-adpt &&\
sdk install gradle 7.1 &&\
sdk install maven 3.6.3
# Other packages
# R
RUN apt-get update && apt-get install -y software-properties-common && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' && \
apt update && \
apt install -y r-base
# Miniconda
RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash /Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda
######################################
####### ANALYSIS TOOLS SECTION #######
## Biom format
RUN pip install biom-format quast
# Fix locale issue with quast
RUN apt-get clean && apt-get update && locale-gen en_US.UTF-8 && dpkg-reconfigure locales
## CheckM
RUN python3 -m pip install pysam numpy matplotlib checkm-genome gtdbtk && \
checkm data setRoot /unlock/references/databases/CheckM/
## Needed for faTools (used in the metagenomics workflow)
RUN sudo ln -s /usr/lib/x86_64-linux-gnu/libpng16.so.16.34.0 /usr/lib/x86_64-linux-gnu/libpng12.so.0
## picrust2 (conda)
COPY conda/picrust2_env.yml /picrust2_env.yml
RUN source /root/miniconda/bin/activate && \
conda update -n base -c defaults conda && \
conda create -v -n picrust2 -c bioconda -c conda-forge picrust2=2.4.1 && \
conda env update --name picrust2 --file /picrust2_env.yml && \
conda deactivate
## HUMAnN meta-omics
RUN pip install humann --no-binary :all: && \
pip install metaphlan --no-binary :all: && \
humann_config --update database_folders nucleotide /unlock/references/databases/HUMAnN/chocophlan && \
humann_config --update database_folders utility_mapping /unlock/references/databases/HUMAnN/utility_mapping && \
humann_config --update database_folders protein /unlock/references/databases/HUMAnN/uniref
## SET PATH ##
ENV SDKMAN=/root/.sdkman/candidates/maven/current/bin:/root/.sdkman/candidates/java/current/bin:/root/.sdkman/candidates/gradle/current/bin
ENV CHECKM=/unlock/infrastructure/binaries/hmmer/hmmer-3.3.2/bin:/unlock/infrastructure/binaries/pplacer/pplacer-Linux-v1.1.alpha17/bin:/unlock/infrastructure/binaries/prodigal/v2.6.3
# note: CheckM and GTDB-Tk overlap with hmmer,prodigal and are not added to GTDBTK
ENV GTDBTK=/unlock/infrastructure/binaries/Mash/mash-Linux64-v2.3:/unlock/infrastructure/binaries/FastTree/FastTree_v2.1.11:/unlock/infrastructure/binaries/Mash/mash-Linux64-v2.3:/unlock/infrastructure/binaries/fastANI/fastANI_v1.33
## GTDB-Tk
ENV GTDBTK_DATA_PATH=/unlock/references/databases/GTDBTK/release202/
# All paths combined
ENV PATH=$PATH:$SDKMAN:$CHECKM:$GTDBTK
######################################
######################################
# Add anonymous access
COPY irods /root/.irods
# Git config file for the runner?
COPY gitconfig /root/.gitconfig
# Add the startup script
COPY run.sh run.sh
# Make it executable
RUN chmod +x run.sh
# Add the upload script and install sshpass
RUN apt-get -y install sshpass
COPY /scripts/upload.sh upload.sh
# Scripts for small management tasks
COPY scripts /scripts
# Add files for the SRA toolkit
# COPY /files/user-settings.mkfg /root/.ncbi/user-settings.mkfg
# Testing to upgrade nodejs
RUN curl -sL https://deb.nodesource.com/setup_14.x -o nodesource_setup.sh && sh ./nodesource_setup.sh && apt install -y nodejs
# Set the startup entrypoint
# ENTRYPOINT ["bash", "/run.sh"]
......@@ -17,6 +17,6 @@ git -C $DIR pull
# Build the docker file
#============================================================================
docker build -t docker-registry.wur.nl/unlock/docker .
docker build -t docker-registry.wur.nl/unlock/docker:kubernetes .
docker push docker-registry.wur.nl/unlock/docker
\ No newline at end of file
docker push docker-registry.wur.nl/unlock/docker:kubernetes
\ No newline at end of file
......@@ -75,6 +75,12 @@ fi
# Check exit status of cwl
exit_status=$?
# Start a conda environment when needed
conda_env=`grep conda $yaml | awk -F": " '{print $2}'`
if ! [[ -z "$conda_env" ]]; then
source /root/miniconda/bin/activate && conda deactivate
fi
# When success, update avu on yaml file and upload results
if [ $exit_status -eq 0 ]; then
# Remove provenance data file
......
#!/bin/bash
usage() { echo "Usage: $0 [-c <cwl file path>] [-y <yaml file path>] [-p <true|false>]" 1>&2; exit 1; }
while getopts ":c:y:p:" o; do
case "${o}" in
c)
cwl=${OPTARG}
;;
y)
yaml=${OPTARG}
;;
p)
prov=${OPTARG}
((prov == true || prov == false)) || usage
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${cwl}" ] || [ -z "${yaml}" ] || [ -z "${prov}" ]; then
usage
fi
echo "c = ${cwl}"
echo "y = ${yaml}"
echo "p = ${prov}"
echo $@
#######################################
yaml_filepath=$yaml
# Obtain parent directory
yaml_dir="$(dirname "$yaml")"/
# Obtain the yaml name
yaml_file_name="$(basename "$yaml")"
# Download the yaml file $1=yaml full path
mkdir -p $yaml_dir
# Delete yaml file if already exists
rm $yaml
# Updating metadata by removing all cwl and cwl path status
python3 /scripts/metadata.py -key cwl -value $cwl -remove -file $yaml
# Adding the running status
python3 /scripts/metadata.py -key cwl -value $cwl -unit running -add -file $yaml
# Download latest yaml file
java -jar /unlock/infrastructure/binaries/irods/IRODSTransfer.jar -pull --files $yaml --local $yaml_dir --force
# Obtain destination path
destination_path=`cat $yaml | grep destination | awk -F": " '{print $2}'`
# Create parent folders to store the files in
grep "[0-9]\+_irods:" $yaml | awk '{print $2}' | sort | uniq | xargs dirname | xargs mkdir -p
# Download run files from iRODS
grep "[0-9]\+_irods:" $yaml | awk '{print $2}' | sort | uniq | awk '{print "java -jar /unlock/infrastructure/binaries/irods/IRODSTransfer.jar --pull --local "$1" --files "$1" --force"}' | sh
# Start a conda environment when needed
conda_env=`grep conda $yaml | awk -F": " '{print $2}'`
if ! [[ -z "$conda_env" ]]; then
source /root/miniconda/bin/activate && conda activate $conda_env
fi
# Run workflow with or without provenance
if $prov; then
cwltool --tmpdir-prefix /unlock/rancher/unlockcwl --no-container --preserve-entire-environment --outdir $destination_path --provenance $destination_path/PROVENANCE $cwl $yaml
else
cwltool --tmpdir-prefix /unlock/rancher/unlockcwl --no-container --preserve-entire-environment --outdir $destination_path $cwl $yaml
fi
# Check exit status of cwl
exit_status=$?
# When success, update avu on yaml file and upload results
if [ $exit_status -eq 0 ]; then
# Remove provenance data file
if $prov; then
rm -r $destination_path/PROVENANCE/data/
fi
# Remove cache data
# rm -r $destination_path/CACHE_$yaml_file_name
# Upload the provenance $4=Final destination (Hash path)
java -jar /unlock/infrastructure/binaries/irods/IRODSTransfer.jar --push --files $destination_path --irods $destination_path
# Update metadata field
python3 /scripts/metadata.py -key cwl -value $cwl -remove -file $yaml
python3 /scripts/metadata.py -key cwl -value $cwl -unit finished -add -file $yaml
# Remove local destination results
rm -rf $destination_path
else
# When CWL execution fails update metadata
echo "CWL execution failed..."
python3 /scripts/metadata.py -key cwl -value $cwl -remove -file $yaml
python3 /scripts/metadata.py -key cwl -value $cwl -unit failed -add -file $yaml
exit 1
fi
# Remove downloaded files
echo "Removing irods files"
grep "[0-9]\+_irods:" $yaml | awk '{print "rm -f "$2}' | sh
exit 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment