Skip to content
Snippets Groups Projects
Commit f82885e8 authored by Wijfjes, Raul's avatar Wijfjes, Raul
Browse files

Added more options to vcf_to_table.py

parent 3428de6e
No related branches found
No related tags found
No related merge requests found
......@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Option in merge_vcf_files.py to merge VCF files of different samples without genotyping based on read depth computed by duphold
- Option in vcf_to_table.py to calculate number of deletions that are supported by a change in read depth compared to 1000 bp flanking regions
- Option in vcf_to_table.py to generate, for each variant, a list of identifiers of samples that had a non-reference call
- Option in vcf_to_table.py to generate ID field of each variant
### Fixed
- FILTER field is now correctly processed in vcf_to_table.py
......
......@@ -2,6 +2,7 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="38800356-6f91-47c6-8c6a-4d9b650fe843" name="Default" comment="">
<change beforePath="$PROJECT_DIR$/../CHANGELOG.md" afterPath="$PROJECT_DIR$/../CHANGELOG.md" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change beforePath="$PROJECT_DIR$/convert/vcf_to_table.py" afterPath="$PROJECT_DIR$/convert/vcf_to_table.py" />
</list>
......@@ -61,7 +62,7 @@
<file leaf-file-name="merge_vcf_files.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/genotype/merge_vcf_files.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="251">
<state relative-caret-position="1224">
<caret line="78" column="20" lean-forward="false" selection-start-line="78" selection-start-column="20" selection-end-line="78" selection-end-column="20" />
<folding>
<element signature="e#132#147#0" expanded="true" />
......@@ -73,8 +74,8 @@
<file leaf-file-name="vcf_to_table.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/../../../hecaton/scripts/convert/vcf_to_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="648">
<caret line="90" column="24" lean-forward="true" selection-start-line="90" selection-start-column="24" selection-end-line="90" selection-end-column="74" />
<state relative-caret-position="1566">
<caret line="90" column="24" lean-forward="false" selection-start-line="90" selection-start-column="24" selection-end-line="90" selection-end-column="74" />
<folding />
</state>
</provider>
......@@ -83,8 +84,8 @@
<file leaf-file-name="vcf_to_table.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/convert/vcf_to_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="380">
<caret line="80" column="28" lean-forward="true" selection-start-line="80" selection-start-column="28" selection-end-line="80" selection-end-column="28" />
<state relative-caret-position="612">
<caret line="82" column="60" lean-forward="false" selection-start-line="82" selection-start-column="60" selection-end-line="82" selection-end-column="60" />
<folding>
<element signature="e#108#123#0" expanded="true" />
</folding>
......@@ -203,8 +204,6 @@
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<expand>
......@@ -266,12 +265,14 @@
<select />
</subPane>
</pane>
<pane id="Scope" />
<pane id="Scratches" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/../../../hecaton/scripts/convert/vcf_to_table.py" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/../../../Dutch_accessions_analysis/scripts" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
......@@ -303,6 +304,7 @@
</component>
<component name="ToolWindowManager">
<frame x="65" y="24" width="1857" height="1092" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.3301887" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
......@@ -310,7 +312,7 @@
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32914045" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32914045" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.45213062" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.47537354" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Docker" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32982844" sideWeight="0.5125786" order="4" side_tool="true" content_ui="tabs" />
<window_info id="SciView" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32982844" sideWeight="0.4874214" order="3" side_tool="false" content_ui="tabs" />
......@@ -611,8 +613,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/../../../hecaton/scripts/convert/vcf_to_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="648">
<caret line="90" column="24" lean-forward="true" selection-start-line="90" selection-start-column="24" selection-end-line="90" selection-end-column="74" />
<state relative-caret-position="1566">
<caret line="90" column="24" lean-forward="false" selection-start-line="90" selection-start-column="24" selection-end-line="90" selection-end-column="74" />
<folding />
</state>
</provider>
......@@ -637,7 +639,7 @@
</entry>
<entry file="file://$PROJECT_DIR$/genotype/merge_vcf_files.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="251">
<state relative-caret-position="1224">
<caret line="78" column="20" lean-forward="false" selection-start-line="78" selection-start-column="20" selection-end-line="78" selection-end-column="20" />
<folding>
<element signature="e#132#147#0" expanded="true" />
......@@ -655,8 +657,8 @@
</entry>
<entry file="file://$PROJECT_DIR$/convert/vcf_to_table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="380">
<caret line="80" column="28" lean-forward="true" selection-start-line="80" selection-start-column="28" selection-end-line="80" selection-end-column="28" />
<state relative-caret-position="612">
<caret line="82" column="60" lean-forward="false" selection-start-line="82" selection-start-column="60" selection-end-line="82" selection-end-column="60" />
<folding>
<element signature="e#108#123#0" expanded="true" />
</folding>
......
......@@ -47,7 +47,7 @@ def vcf_to_table(input_fn, output_fn, fields=None, genotype_fields=None):
samples = list(vcf.header.samples)
samples.sort()
header_fields = []
standard_fields = ["CHROM", "POS", "REF", "ALT", "QUAL", "FILTER", "END", "HOM-VAR", "VAR", "DEL_SUPPORTED"]
standard_fields = ["CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "END", "HOM-VAR", "VAR", "SAMPLES-VAR", "DEL_SUPPORTED"]
standard_fields.extend(vcf.header.info)
if fields:
for field in fields:
......@@ -79,6 +79,8 @@ def vcf_to_table(input_fn, output_fn, fields=None, genotype_fields=None):
record_fields.append(str(record.chrom))
elif field == "POS":
record_fields.append(str(record.pos))
elif field == "ID":
record_fields.append(str(record.id))
elif field == "REF":
record_fields.append(str(record.ref))
elif field == "ALT":
......@@ -118,12 +120,25 @@ def vcf_to_table(input_fn, output_fn, fields=None, genotype_fields=None):
if genotype not in non_variants:
var_calls += 1
record_fields.append(str(var_calls))
elif field == "SAMPLES-VAR":
# get list of samples that contain a non-ref variant
samples_var = []
# compute total number of variant calls
if "GT" not in record.format:
logging.warning("GT not in format, SAMPLES-VAR will be NA")
record_fields.append("NA")
else:
for sample in samples:
genotype = record.samples[sample]["GT"]
non_variants = [(0, 0), (None, None), (None, 0)]
if genotype not in non_variants:
samples_var.append(sample)
record_fields.append(str(samples_var))
elif field == "DEL_SUPPORTED":
del_supported_calls = 0
del_unsupported_calls = 0
nondel_supported_calls = 0
nondel_unsupported_calls = 0
if record.info["SVTYPE"] != "DEL":
record_fields.extend(["NA", "NA", "NA", "NA"])
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment