Skip to content
Snippets Groups Projects
Commit 37f542fe authored by Wijfjes, Raul's avatar Wijfjes, Raul
Browse files

Created script that generates site-only vcf

parent 712adaff
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
"""
Convert VCF files to site-only VCF, removing all genotype information
"""
import argparse
import sys
from xopen import xopen
def parse_cl_args(in_args):
"""
Parse command line arguments
:param in_args: All command line arguments
:return: None
"""
description = "Convert VCF file to tabular file"
parser = argparse.ArgumentParser(description=description)
parser.add_argument("-v", "--vcf_fn", type=str,
help="File containing path to VCF file")
parser.add_argument("-o", "--output_fn", type=str,
help="Name of VCF output file")
args = parser.parse_args(in_args)
return args
def vcf_to_site_only_vcf(input_fn, output_fn):
"""
Convert VCF files to site-only VCF, removing all genotype information
:param input_fn: Path to VCF file
:param output_fn: Name of VCF output file
:return: 0 (integer)
"""
with xopen(input_fn) as input_file, xopen(output_fn, "w") as output_file:
for line in input_file:
# write initial header lines to output file
if line.startswith("##"):
output_file.write(line)
else:
# strip samples from line
output_line = '\t'.join(line.strip().split()[0:9])
output_file.write(output_line)
output_file.write("\n")
return 0
def main():
args = parse_cl_args(sys.argv[1:])
# write VCF fields to tabular output
vcf_to_site_only_vcf(args.vcf_fn, args.output_fn)
if __name__ == "__main__":
main()
#!/usr/bin/env python3
"""
Filter sites in a VCF file for which no variant alleles were found in the samples
Filter sites in a VCF file for which the number of samples carrying a
variant allele goes beyond a user-chosen threshold
"""
import argparse
......@@ -16,20 +17,24 @@ def parse_cl_args(in_args):
:param in_args: All command line arguments
:return: None
"""
description = "Filter sites in a VCF file for which no variant alleles were found in the samples"
description = "Filter sites in a VCF file for which a minimum number of samples were found carrying a variant allele"
parser = argparse.ArgumentParser(description=description)
parser.add_argument("-v", "--vcf_fn", type=str,
help="File containing path to VCF file")
parser.add_argument("-n", "--minimum_samples", type=int,
help="Minimum number of samples that should carry a variant allele")
parser.add_argument("-o", "--output_fn", type=str,
help="Name of output file")
args = parser.parse_args(in_args)
return args
def filter_ref_sites(input_fn, output_fn):
def filter_ref_sites(input_fn, min_samples, output_fn):
"""
Filter sites in a VCF file for which only reference alleles were found in the samples
Filter sites in a VCF file for which the number of samples carrying a
variant allele goes beyond a user-chosen threshold
:param input_fn: Path to VCF file
:param min_samples: Minimum number of samples carrying a variant allele
:param output_fn: Name of VCF output file
:return: 0 (integer)
"""
......@@ -52,14 +57,14 @@ def filter_ref_sites(input_fn, output_fn):
if genotype not in non_variants:
var_calls += 1
# write record to output if it has variant calls
if var_calls > 0:
if var_calls > min_samples:
output_file.write(str(record))
return 0
def main():
args = parse_cl_args(sys.argv[1:])
# filter ref sites from input file
filter_ref_sites(args.vcf_fn, args.output_fn)
filter_ref_sites(args.vcf_fn, args.minimum_samples, args.output_fn)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment