From d169fc216f64736d5f3159c2bd9cf36c45328650 Mon Sep 17 00:00:00 2001
From: Raul Wijfjes <raul.wijfjes@wur.nl>
Date: Mon, 21 Mar 2022 09:59:51 +0100
Subject: [PATCH] Fixed tests

---
 CHANGELOG.md                           |  7 +++++++
 docker/Dockerfile                      |  2 +-
 docker/docker_build.sh                 |  2 +-
 nextflow/hecaton.nf                    |  2 +-
 nextflow/hecaton_no_align.nf           |  2 +-
 scripts/filter/filter_ref_sites_vcf.py |  1 +
 tests/functional_test.sh               |  2 +-
 tests/genotyping/expected_output.tsv   |  8 +++----
 tests/genotyping/expected_output.vcf   | 29 +++++++++++++++-----------
 9 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5472f8c..61e3dba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+## [0.5.1] - 2022-03-21
+### Fixed
+- Updated the expected test output files
+- Changed the minimum number of samples in the filter_ref_sites_vcf.py test to 1
+
 ## [0.5.0] - 2021-01-08
 ### Changed
 - merge_vcf_files.py now sets genotypes with a DHFFC of Inf, Nan, or higher than 4 to missing
diff --git a/docker/Dockerfile b/docker/Dockerfile
index d836bca..667c488 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -64,7 +64,7 @@ RUN source activate hecaton_py2 && \
 # RUN git clone https://git.wur.nl/wijfj001/hecaton.git && cd hecaton && \
 #	git checkout e85bba0c && cd .. && \
 RUN git clone https://git.wur.nl/bioinformatics/hecaton.git && \
-	echo "aeb7ff36" && \
+	echo "5efce90c" && \
 	chmod +x hecaton/scripts/collapse/* && \
 	chmod +x hecaton/scripts/convert/* && \
 	chmod +x hecaton/scripts/filter/* && \
diff --git a/docker/docker_build.sh b/docker/docker_build.sh
index c29dc3f..e486265 100644
--- a/docker/docker_build.sh
+++ b/docker/docker_build.sh
@@ -2,4 +2,4 @@
 
 set -euo pipefail
 
-docker build -t hecaton:v0.5.0 .
\ No newline at end of file
+docker build -t hecaton:v0.5.1 .
\ No newline at end of file
diff --git a/nextflow/hecaton.nf b/nextflow/hecaton.nf
index 47e2b2f..def890c 100755
--- a/nextflow/hecaton.nf
+++ b/nextflow/hecaton.nf
@@ -25,7 +25,7 @@ params.help = false
 def helpMessage() {
     log.info"""
     =========================================
-     Hecaton v0.5.0
+     Hecaton v0.5.1
     =========================================
     Usage:
     nextflow run hecaton --genome_file reference.fa --reads "prefix_{1,2}.fastq" --manta_config configManta_weight_1.py.ini --model_file model_file.pkl --output_dir results
diff --git a/nextflow/hecaton_no_align.nf b/nextflow/hecaton_no_align.nf
index 76e4042..e4ca11e 100755
--- a/nextflow/hecaton_no_align.nf
+++ b/nextflow/hecaton_no_align.nf
@@ -25,7 +25,7 @@ params.help = false
 def helpMessage() {
     log.info"""
     =========================================
-     Hecaton v0.5.0
+     Hecaton v0.5.1
     =========================================
     Usage:
     nextflow run hecaton --genome_file reference.fa --bwa_bams "*.bam" --manta_config configManta_weight_1.py.ini --model_file model_file.pkl --output_dir results
diff --git a/scripts/filter/filter_ref_sites_vcf.py b/scripts/filter/filter_ref_sites_vcf.py
index 916ef66..6522f37 100755
--- a/scripts/filter/filter_ref_sites_vcf.py
+++ b/scripts/filter/filter_ref_sites_vcf.py
@@ -53,6 +53,7 @@ def filter_ref_sites(input_fn, min_samples, output_fn):
             else:
                 for sample in samples:
                     genotype = record.samples[sample]["GT"]
+                    print(genotype)
                     non_variants = [(0, 0), (None, None), (None, 0)]
                     if genotype not in non_variants:
                         var_calls += 1
diff --git a/tests/functional_test.sh b/tests/functional_test.sh
index 3223b59..49af303 100644
--- a/tests/functional_test.sh
+++ b/tests/functional_test.sh
@@ -57,7 +57,7 @@ else
 	echo "Converting VCF to table test successful"
 fi
 
-filter_ref_sites_vcf.py -v ../tests/genotyping/test_filter_ref_input.vcf -n 0 -o test_filter_ref_output.vcf
+filter_ref_sites_vcf.py -v ../tests/genotyping/test_filter_ref_input.vcf -n 1 -o test_filter_ref_output.vcf
 
 cmp test_filter_ref_output.vcf ../tests/genotyping/test_filter_ref_expected_output.vcf
 if [ $? -ne 0 ] 
diff --git a/tests/genotyping/expected_output.tsv b/tests/genotyping/expected_output.tsv
index cc7e152..253ab58 100644
--- a/tests/genotyping/expected_output.tsv
+++ b/tests/genotyping/expected_output.tsv
@@ -1,11 +1,11 @@
 CHROM	POS	REF	ALT	HOM-VAR	VAR	1098.RQ	2098.RQ
 1	43994	N	<DEL>	1	1	0.0	0.9
 1	44995	N	<DEL>	1	1	0.86	0.0
-1	75823	N	<DEL>	1	1	0.0	0.81
-1	76823	N	<DEL>	1	1	0.81	0.0
-1	484431	N	<DUP>	2	2	0.76	0.76
+1	75823	N	<DEL>	0	1	0.0	0.81
+1	76823	N	<DEL>	0	1	0.81	0.0
+1	484431	N	<DUP>	0	1	0.76	0.76
 1	4618775	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCTTT	0	2	0.71	0.71
 1	4618785	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCT	0	1	0.0	0.72
-1	6134670	N	<DUP>	2	2	0.8	0.8
+1	6134670	N	<DUP>	0	2	0.8	0.8
 1	6134670	N	<DUP>	1	1	0.0	0.7
 2	9057242	N	<DEL>	1	1	0.83	0.83
diff --git a/tests/genotyping/expected_output.vcf b/tests/genotyping/expected_output.vcf
index 7f1747e..08f5e5e 100644
--- a/tests/genotyping/expected_output.vcf
+++ b/tests/genotyping/expected_output.vcf
@@ -1,10 +1,15 @@
-##fileformat=VCFv4.3
-##fileDate=20190207
+##fileformat=VCFv4.2
+##fileDate=20220320
 ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
 ##INFO=<ID=END,Number=.,Type=Integer,Description="End position of the variant described in this region">
 ##INFO=<ID=INSCHROM,Number=.,Type=String,Description="Chromosome on which insertion site of the dispersed duplication is located">
 ##INFO=<ID=INSPOS,Number=.,Type=Integer,Description="Position of insertion site of the dispersed duplication">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
 ##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
 ##ALT=<ID=DUP:DISPERSED,Description="Dispersed Duplication">
 ##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
 ##ALT=<ID=INS,Description="Insertion of novel sequence">
@@ -20,13 +25,13 @@
 ##contig=<ID=1,length=30427671>
 ##contig=<ID=2,length=19698289>
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	1098	2098
-1	43994	1	N	<DEL>	-1	PASS	SVTYPE=DEL;END=45232	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	1/1:18.0:0.0:18.0:DELLY,LUMPY,MANTA,GRIDSS:0.9:0.0:0.0:0.0
-1	44995	2	N	<DEL>	-1	PASS	SVTYPE=DEL;END=45231	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	1/1:17.0:0.0:17.0:DELLY,LUMPY,MANTA,GRIDSS:0.86:0.0:0.0:0.0	0/0:0:0:0:.:0:-1:-1:-1
-1	75823	3	N	<DEL>	-1	PASS	SVTYPE=DEL;END=77562	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	1/1:26.5:15.0:11.5:DELLY,LUMPY,MANTA,GRIDSS:0.81:0.5:0.5:0.5
-1	76823	4	N	<DEL>	-1	PASS	SVTYPE=DEL;END=77560	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	1/1:26.5:15.0:11.5:DELLY,LUMPY,MANTA,GRIDSS:0.81:0.25:0.25:0.25	0/0:0:0:0:.:0:-1:-1:-1
-1	484431	5	N	<DUP>	-1	PASS	SVTYPE=DUP:TANDEM;END=586498	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	1/1:14.0:0.0:14.0:GRIDSS:0.76:1.25:1.25:1.25	1/1:14.0:0.0:14.0:GRIDSS:0.76:1.5:1.5:1.5
-1	4618775	6	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCTTT	-1	PASS	SVTYPE=INS	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	./1:10.0:0.0:10.0:DELLY:0.71:0.9090909957885742:0.9090909957885742:1.111109972000122	./1:10.0:0.0:10.0:DELLY:0.71:0.9090909957885742:0.9090909957885742:1.111109972000122
-1	4618785	7	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCT	-1	PASS	SVTYPE=INS	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	./1:11.0:0.0:11.0:DELLY:0.72:0.9090909957885742:0.9090909957885742:1.111109972000122
-1	6134670	8	N	<DUP>	-1	PASS	SVTYPE=DUP:DISPERSED;END=6134722;INSCHROM=1;INSPOS=6134669	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	1/1:0.0:0.0:0.0:GRIDSS:0.8:1.5:1.5:1.5	1/1:0.0:0.0:0.0:GRIDSS:0.8:1.5:1.5:1.5
-1	6134670	9	N	<DUP>	-1	PASS	SVTYPE=DUP:DISPERSED;END=6134722;INSCHROM=2;INSPOS=6134669	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	1/1:0.0:0.0:0.0:GRIDSS:0.7:2.0:2.0:2.0
-2	9057242	10	N	<DEL>	-1	PASS	SVTYPE=DEL;END=9059624	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:23.0:12.0:11.0:DELLY,LUMPY,MANTA,GRIDSS:0.83:2.0:2.0:2.0	1/1:23.0:12.0:11.0:DELLY,LUMPY,MANTA,GRIDSS:0.83:0.0:0.0:0.0
+1	43994	1	N	<DEL>	-1	PASS	SVTYPE=DEL;END=45232;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	1/1:18.0:0.0:18.0:DELLY,LUMPY,MANTA,GRIDSS:0.9:0.0:0.0:0.0
+1	44995	2	N	<DEL>	-1	PASS	SVTYPE=DEL;END=45231;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	1/1:17.0:0.0:17.0:DELLY,LUMPY,MANTA,GRIDSS:0.86:0.0:0.0:0.0	0/0:0:0:0:.:0:-1:-1:-1
+1	75823	3	N	<DEL>	-1	PASS	SVTYPE=DEL;END=77562;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	0/1:26.5:15.0:11.5:DELLY,LUMPY,MANTA,GRIDSS:0.81:0.5:0.5:0.5
+1	76823	4	N	<DEL>	-1	PASS	SVTYPE=DEL;END=77560;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/1:26.5:15.0:11.5:DELLY,LUMPY,MANTA,GRIDSS:0.81:0.25:0.25:0.25	0/0:0:0:0:.:0:-1:-1:-1
+1	484431	5	N	<DUP>	-1	PASS	SVTYPE=DUP:TANDEM;END=586498;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:14.0:0.0:14.0:GRIDSS:0.76:1.25:1.25:1.25	0/1:14.0:0.0:14.0:GRIDSS:0.76:1.5:1.5:1.5
+1	4618775	6	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCTTT	-1	PASS	SVTYPE=INS;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	./1:10.0:0.0:10.0:DELLY:0.71:0.9090909957885742:0.9090909957885742:1.111109972000122	./1:10.0:0.0:10.0:DELLY:0.71:0.9090909957885742:0.9090909957885742:1.111109972000122
+1	4618785	7	N	NCTTACAGTATAATCCAAGGCATGTATATATATCTTTCTTAGAGTTTTTTTTTTTTTTTGAACACACTCT	-1	PASS	SVTYPE=INS;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	./1:11.0:0.0:11.0:DELLY:0.72:0.9090909957885742:0.9090909957885742:1.111109972000122
+1	6134670	8	N	<DUP>	-1	PASS	SVTYPE=DUP:DISPERSED;END=6134722;INSCHROM=1;INSPOS=6134669;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/1:0.0:0.0:0.0:GRIDSS:0.8:1.5:1.5:1.5	0/1:0.0:0.0:0.0:GRIDSS:0.8:1.5:1.5:1.5
+1	6134670	9	N	<DUP>	-1	PASS	SVTYPE=DUP:DISPERSED;END=6134722;INSCHROM=2;INSPOS=6134669;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:0:0:0:.:0:-1:-1:-1	1/1:0.0:0.0:0.0:GRIDSS:0.7:2.0:2.0:2.0
+2	9057242	10	N	<DEL>	-1	PASS	SVTYPE=DEL;END=9059624;CIPOS=-10,10;CIEND=-10,10;CIPOS95=-10,10;CIEND95=-10,10	GT:SUP:RP:SR:TOOL:RQ:DHFC:DHBFC:DHFFC	0/0:23.0:12.0:11.0:DELLY,LUMPY,MANTA,GRIDSS:0.83:2.0:2.0:2.0	1/1:23.0:12.0:11.0:DELLY,LUMPY,MANTA,GRIDSS:0.83:0.0:0.0:0.0
-- 
GitLab