Skip to content
Snippets Groups Projects
Commit 6a857caa authored by Wijfjes, Raul's avatar Wijfjes, Raul
Browse files

Added the option to exclude certain chromosomes

parent d48739a1
Branches
Tags
No related merge requests found
......@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [0.2.0] - 2019-04-25
### Added
- Hecaton can now be run with pre-generated BWA mem alignments
- Certain chromosomal regions can now be excluded from CNV calling
## [0.1.0] - 2019-03-16
### Added
......
......@@ -4,7 +4,6 @@ name: hecaton_py2
channels:
- bioconda
- conda-forge
- defaults
dependencies:
# python
- python=2.7
......
......@@ -4,7 +4,6 @@ name: hecaton_py3
channels:
- bioconda
- conda-forge
- r
- defaults
dependencies:
# python
......
......@@ -9,6 +9,7 @@
params.genome_file = ""
params.reads = "prefix_{1,2}.fastq"
params.alignment_exclude = 'NO_EXCLUDE'
params.model_file = ""
params.cutoff = 0.7
params.extra_filtering = false
......@@ -36,6 +37,7 @@ def helpMessage() {
--output_dir: Output directory to which all results will be written
Optional arguments:
--alignment_exclude: BED file containing chromosomal regions that will be excluded from CNV calling
-w: Working directory to which intermediate results will be written. Default: work
-c: Config file specifying the number of CPU cores and memory that will be assigned to Hecaton
""".stripIndent()
......@@ -80,6 +82,7 @@ genome_N_file = file(params.genome_file + ".N.bed")
genome_bed = file(params.genome_file + ".genome")
manta_config_file = file(params.manta_config)
model_file = file(params.model_file)
exclude_file = file(params.alignment_exclude)
/*
* Align reads to reference genome using speedseq
......@@ -98,6 +101,7 @@ process align_reads {
file genome_bwa_bwt_file
file genome_bwa_pac_file
file genome_bwa_sa_file
file exclude_file
output:
set val(prefix), file("${prefix}.bam") into bwa_bams
......@@ -108,12 +112,32 @@ process align_reads {
set val(prefix), file("${prefix}.splitters.bam.bai") into bwa_splitters_bam_indices
script:
"""
source activate hecaton_py3
speedseq align -t ${task.cpus} -o ${prefix} -R \"@RG\tID:${prefix}\tSM:${prefix}\tLB:${prefix}\" \
${genome_file} ${reads[0]} ${reads[1]}
source deactivate
"""
if( exclude_file.name == 'NO_EXCLUDE' )
"""
source activate hecaton_py3
speedseq align -t ${task.cpus} -o ${prefix} -R \"@RG\tID:${prefix}\tSM:${prefix}\tLB:${prefix}\" \
${genome_file} ${reads[0]} ${reads[1]}
source deactivate
"""
else
"""
source activate hecaton_py3
speedseq align -t ${task.cpus} -o ${prefix} -R \"@RG\tID:${prefix}\tSM:${prefix}\tLB:${prefix}\" \
${genome_file} ${reads[0]} ${reads[1]} && \
samtools view -h -b -L ${exclude_file} ${prefix}.bam > ${prefix}_excluded.bam && \
samtools index ${prefix}_excluded.bam && \
mv ${prefix}_excluded.bam ${prefix}.bam && \
mv ${prefix}_excluded.bam.bai ${prefix}.bam.bai && \
samtools view -h -b -L ${exclude_file} ${prefix}.discordants.bam > ${prefix}.discordants_excluded.bam && \
samtools index ${prefix}.discordants_excluded.bam && \
mv ${prefix}.discordants_excluded.bam ${prefix}.discordants.bam && \
mv ${prefix}.discordants_excluded.bam.bai ${prefix}.discordants.bam.bai && \
samtools view -h -b -L ${exclude_file} ${prefix}.splitters.bam > ${prefix}.splitters_excluded.bam && \
samtools index ${prefix}.splitters_excluded.bam && \
mv ${prefix}.splitters_excluded.bam ${prefix}.splitters.bam && \
mv ${prefix}.splitters_excluded.bam.bai ${prefix}.splitters.bam.bai && \
source deactivate
"""
}
/*
......
......@@ -9,6 +9,7 @@
params.genome_file = ""
params.bwa_bams = "*.bam"
params.alignment_exclude = 'NO_EXCLUDE'
params.model_file = ""
params.cutoff = 0.7
params.extra_filtering = false
......@@ -36,6 +37,7 @@ def helpMessage() {
--output_dir: Output directory to which all results will be written
Optional arguments:
--alignment_exclude: BED file containing chromosomal regions that will be excluded from CNV calling
-w: Working directory to which intermediate results will be written. Default: work
-c: Config file specifying the number of CPU cores and memory that will be assigned to Hecaton
""".stripIndent()
......@@ -80,13 +82,50 @@ genome_file = file(params.genome_file)
genome_index_file = file(params.genome_file + ".fai")
genome_N_file = file(params.genome_file + ".N.bed")
genome_bed = file(params.genome_file + ".genome")
manta_config_file = file(params.manta_config)
model_file = file(params.model_file)
exclude_file = file(params.alignment_exclude)
/*
* Exclude chromosomal regions
*/
process exclude_reads {
tag "Excluding reads"
input:
set val(prefix), file(alignment_file), file(alignment_file_index) from bwa_bams
file exclude_file
output:
set val(prefix), file("${prefix}.bam"), file("${prefix}.bam.bai") into bwa_bams_excluded
script:
if( exclude_file.name != 'NO_EXCLUDE' )
"""
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
samtools view -h -b -L ${exclude_file} ${alignment_file} > ${prefix}_excluded.bam && \
samtools index ${prefix}_excluded.bam && \
mv ${prefix}_excluded.bam ${prefix}.bam && \
mv ${prefix}_excluded.bam.bai ${prefix}.bam.bai && \
conda deactivate
"""
else
"""
export PS1=\${PS1:-''}
source ~/.bashrc
mv ${alignment_file} ${prefix}.bam
mv ${alignment_file_index} ${prefix}.bam.bai
"""
}
/*
* Create channels for all callers
*/
bwa_bams.into{delly_bams; gridss_bams; lumpy_bams; manta_bams; duphold_bams}
bwa_bams_excluded.into{delly_bams; gridss_bams; lumpy_bams; manta_bams; duphold_bams}
/*
* Call CNVs with LUMPY
......@@ -94,6 +133,7 @@ bwa_bams.into{delly_bams; gridss_bams; lumpy_bams; manta_bams; duphold_bams}
process call_lumpy {
label "multithreading"
label "lumpy"
publishDir "${params.output_dir}/lumpy_calls", mode: 'copy'
tag "LUMPY calling: ${prefix}"
......@@ -110,7 +150,9 @@ process call_lumpy {
script:
"""
source activate hecaton_py3 &&
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3 &&
speedseq sv -t ${task.cpus} -o ${prefix} \
-x ${genome_N_file} \
-B ${alignment_file} \
......@@ -127,7 +169,7 @@ process call_lumpy {
-b ${prefix}_post_processed.bedpe \
-f ${genome_index_file} \
-o ${prefix}_post_processed_collapsed.bedpe
source deactivate
conda deactivate
"""
}
......@@ -138,6 +180,7 @@ process call_lumpy {
process call_delly {
publishDir "${params.output_dir}/delly_calls", mode: 'copy'
tag "DELLY calling: ${prefix}"
label "delly"
input:
set val(prefix), file(alignment_file), file(alignment_file_index) from delly_bams
......@@ -152,7 +195,9 @@ process call_delly {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
delly call -g ${genome_file} -o ${prefix}_delly_cnvs.bcf ${alignment_file} &&
bcftools view ${prefix}_delly_cnvs.bcf > ${prefix}_delly_cnvs.vcf &&
bgzip -f ${prefix}_delly_cnvs.vcf &&
......@@ -169,7 +214,7 @@ process call_delly {
-b ${prefix}_post_processed.bedpe \
-f ${genome_index_file} \
-o ${prefix}_post_processed_collapsed.bedpe
source deactivate
conda deactivate
"""
}
......@@ -179,6 +224,7 @@ process call_delly {
process call_gridss {
label "multithreading"
label "gridss"
publishDir "${params.output_dir}/gridss_calls", mode: 'copy'
tag "GRIDSS calling: ${prefix}"
......@@ -192,7 +238,9 @@ process call_gridss {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
java -ea -Xmx31g \
-Dreference_fasta=${genome_file} \
-Dsamjdk.create_index=true \
......@@ -217,7 +265,7 @@ process call_gridss {
-b ${prefix}_post_processed.bedpe \
-f ${genome_index_file} \
-o ${prefix}_post_processed_collapsed.bedpe
source deactivate
conda deactivate
"""
}
......@@ -229,6 +277,7 @@ process call_manta {
label "multithreading"
publishDir "${params.output_dir}/manta_calls", mode: 'copy'
tag "Manta calling: ${prefix}"
label "manta"
input:
set val(prefix), file(alignment_file), file(alignment_file_index) from manta_bams
......@@ -243,7 +292,9 @@ process call_manta {
script:
"""
source activate hecaton_py2
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py2
configManta.py --tumorBam ${alignment_file} \
--config ${manta_config_file} \
--referenceFasta ${genome_file} --runDir ${prefix}_rundir &&
......@@ -252,6 +303,8 @@ process call_manta {
mv results/variants/tumorSV.vcf.gz ../${prefix}_tumorSV.vcf.gz &&
mv results/variants/tumorSV.vcf.gz.tbi ../${prefix}_tumorSV.vcf.gz.tbi &&
cd .. &&
conda deactivate
conda activate hecaton_py3
vcf_to_bedpe.py -i ${prefix}_tumorSV.vcf.gz \
-o ${prefix}.bedpe -t Manta &&
process_simple_cnvs.py -i ${prefix}_tumorSV.vcf.gz \
......@@ -264,7 +317,7 @@ process call_manta {
-b ${prefix}_post_processed.bedpe \
-f ${genome_index_file} \
-o ${prefix}_post_processed_collapsed.bedpe
source deactivate
conda deactivate
"""
}
......@@ -287,7 +340,9 @@ process intersecting_calls {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
intersecting_bedpe_intervals.py \
-a lumpy_file.bedpe \
-b manta_file.bedpe \
......@@ -303,7 +358,7 @@ process intersecting_calls {
-b gridss_file.bedpe \
-f ${genome_index_file} \
-o ${prefix}_intersected.bedpe
source deactivate
conda deactivate
"""
}
......@@ -324,11 +379,13 @@ process prepare_features {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
intersect_bedpe_to_feature_bedpe.py -i ${intersection_file} \
-o ${prefix}_features.bedpe
split_feature_set_non_insertions.py -f ${prefix}_features.bedpe -i ${prefix}_insertion_features.bedpe -n ${prefix}_non_insertion_features.bedpe
source deactivate
conda deactivate
"""
}
......@@ -349,7 +406,9 @@ process apply_random_forest {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
predict_cnvs_model_insertions_probabilities.py \
-i ${insertion_file} \
-m ${model_file} \
......@@ -362,7 +421,7 @@ process apply_random_forest {
-i ${prefix}_insertion_probabilities.bedpe \
-n ${prefix}_non_insertion_probabilities.bedpe \
-o ${prefix}_probabilities_unfiltered.bedpe
source deactivate
conda deactivate
"""
}
......@@ -382,12 +441,14 @@ process filter_calls_cutoff {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
filter_calls_by_query.py \
-i ${probability_file} \
-q \"SIZE >= 50 & PREDICTION_1 >= ${params.cutoff}\" \
-o ${prefix}_probabilities_filtered_cutoff_${params.cutoff}.bedpe
source deactivate
conda deactivate
"""
}
......@@ -412,7 +473,9 @@ process filter_calls_median_depth {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
bedpe_to_vcf.py \
-i ${probability_file} \
-o ${prefix}.vcf \
......@@ -430,7 +493,7 @@ process filter_calls_median_depth {
-i ${prefix}_probabilities_duphold.bedpe \
-q \"(Chrom_norm_depth < 4 & GC_norm_depth < 4 & Flank_norm_depth < 4) & ((Chrom_norm_depth < 0.7 & GC_norm_depth < 0.7 & Flank_norm_depth < 0.7 & TYPE == 'DEL') | (Chrom_norm_depth > 1.3 & GC_norm_depth > 1.3 & Flank_norm_depth > 1.3 & (TYPE == 'DUP:TANDEM' | TYPE == 'DUP:DISPERSED')) | TYPE == 'INS')\" \
-o ${prefix}_probabilities_filtered_cutoff_${params.cutoff}_depth.bedpe
source deactivate
conda deactivate
"""
}
......@@ -453,7 +516,9 @@ process filter_calls_flanking_Ns {
script:
"""
source activate hecaton_py3
export PS1=\${PS1:-''}
source ~/.bashrc
conda activate hecaton_py3
breakpoint_slop.py -i ${probability_file} \
-o tmp_${prefix}_slop_200.bedpe -s 200 && \
bedtools flank -i tmp_${prefix}_slop_200.bedpe \
......@@ -471,6 +536,6 @@ process filter_calls_flanking_Ns {
-i ${prefix}_probabilities_flanking_Ns.bedpe \
-q \"Flanking_Ns < 0.1\" \
-o ${prefix}_probabilities_filtered_cutoff_${params.cutoff}_depth_flanking_Ns.bedpe \
source deactivate
conda deactivate
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment