Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Aflitos, Saulo Alves
ibrowser
Commits
b0a8450a
Commit
b0a8450a
authored
Jun 26, 2015
by
Aflitos, Saulo Alves
Browse files
BUGFIX: error in heterozygous SNP filtering
parent
21d1e839
Changes
2
Hide whitespace changes
Inline
Side-by-side
vcfmerger/vcfmerger.py
View file @
b0a8450a
...
...
@@ -149,6 +149,9 @@ class vcfResult(object):
excludeSingleton
=
None
noncarefiles
=
None
simpliStats
=
None
prints
=
0
printsReal
=
0
print_every
=
100
def
__init__
(
self
,
simplify
=
SIMP_NO_SIMPLIFICATION
,
noncarefiles
=
[]):
self
.
result
=
None
...
...
@@ -158,6 +161,7 @@ class vcfResult(object):
self
.
posCount
=
None
self
.
simplify
=
simplify
if
vcfResult
.
simpliStats
is
None
:
vcfResult
.
simpliStats
=
{
'Heterozygous Dest'
:
0
,
...
...
@@ -183,6 +187,18 @@ class vcfResult(object):
print
"simplifying :: Heterozygous [%3d, %3d] %s"
%
(
SIMP_EXCL_HETEROZYGOUS
,
math
.
log
(
SIMP_EXCL_HETEROZYGOUS
,
2
),
str
(
vcfResult
.
excludHET
)
)
print
"simplifying :: Indel [%3d, %3d] %s"
%
(
SIMP_EXCL_INDEL
,
math
.
log
(
SIMP_EXCL_INDEL
,
2
),
str
(
vcfResult
.
excludeINDEL
)
)
print
"simplifying :: Singleton [%3d, %3d] %s"
%
(
SIMP_EXCL_SINGLETON
,
math
.
log
(
SIMP_EXCL_SINGLETON
,
2
),
str
(
vcfResult
.
excludeSingleton
)
)
print
'Progress Legend:'
print
' print every :'
,
vcfResult
.
print_every
print
' Heterozygous Dest : h'
print
' Heterozygous Indel: I'
print
' Homozygous Indel : i'
print
' Source Indel : s'
print
' Singleton 1 : 1'
print
' Singleton 2 : 2'
print
' Singleton 3 : 3'
print
' Ok : .'
#sys.exit(0)
#TODO: IMPLEMENT
...
...
@@ -232,6 +248,39 @@ class vcfResult(object):
return
simpl
#return srcs
def
printprogress
(
self
,
msg
,
key
=
None
,
skip
=
0
,
linelen
=
100
):
vcfResult
.
prints
+=
1
if
skip
!=
0
:
if
key
is
None
:
if
vcfResult
.
prints
%
skip
==
0
:
sys
.
stderr
.
write
(
msg
)
vcfResult
.
printsReal
+=
1
if
vcfResult
.
printsReal
%
linelen
==
0
:
sys
.
stderr
.
write
(
' %12d
\n
'
%
vcfResult
.
prints
)
else
:
if
vcfResult
.
simpliStats
[
key
]
%
skip
==
0
:
sys
.
stderr
.
write
(
msg
)
vcfResult
.
printsReal
+=
1
if
vcfResult
.
printsReal
%
linelen
==
0
:
sys
.
stderr
.
write
(
' %12d
\n
'
%
vcfResult
.
prints
)
else
:
vcfResult
.
prints
+=
1
vcfResult
.
printsReal
+=
1
sys
.
stderr
.
write
(
msg
)
if
vcfResult
.
printsReal
%
linelen
==
0
:
sys
.
stderr
.
write
(
' %12d
\n
'
%
vcfResult
.
prints
)
sys
.
stderr
.
flush
()
def
__str__
(
self
):
#SL2.40ch01 2118853 . T G 222 . DP=40;AF1=1;CI95=1,1;DP4=0,0,16,23;MQ=60;FQ=-144 GT:PL:DP:GQ 1/1:255,117,0:39:99
...
...
@@ -262,7 +311,8 @@ class vcfResult(object):
if
register
is
None
:
print
"register #%d is empty"
%
register
sys
.
exit
(
1
)
if
rcount
%
100
==
0
:
if
rcount
%
100
==
0
:
sys
.
stderr
.
write
(
"
\n
"
)
sys
.
stderr
.
flush
()
...
...
@@ -286,7 +336,7 @@ class vcfResult(object):
if
(
vcfResult
.
excludHET
)
and
(
desti
.
find
(
','
)
!=
-
1
):
vcfResult
.
simpliStats
[
'Heterozygous Dest'
]
+=
1
#print "heretozygous: %s" % desti
s
ys
.
stderr
.
write
(
'h'
)
s
elf
.
printprogress
(
'h'
,
key
=
'Heterozygous Dest'
,
skip
=
vcfResult
.
print_every
)
#return ""
continue
...
...
@@ -297,7 +347,7 @@ class vcfResult(object):
if
len
(
alt
)
>
1
:
vcfResult
.
simpliStats
[
'Heterozygous Indel'
]
+=
1
#print "has het indel: %s > %s" % ( desti, alt )
s
ys
.
stderr
.
write
(
'I'
)
s
elf
.
printprogress
(
'I'
,
key
=
'Heterozygous Indel'
,
skip
=
vcfResult
.
print_every
)
#return ""
continue
...
...
@@ -305,7 +355,7 @@ class vcfResult(object):
if
(
len
(
desti
)
>
1
):
vcfResult
.
simpliStats
[
'Homozygous Indel'
]
+=
1
#print "has hom indel: %s" % desti
s
ys
.
stderr
.
write
(
'i'
)
s
elf
.
printprogress
(
'i'
,
key
=
'Homozygous Indel'
,
skip
=
vcfResult
.
print_every
)
#return ""
continue
...
...
@@ -313,7 +363,7 @@ class vcfResult(object):
vcfResult
.
simpliStats
[
'Source Indel'
]
+=
1
# todo: if len(src) != len(tgt)
#print "not single nucleotide source: %s" % str(register)
s
ys
.
stderr
.
write
(
's'
)
s
elf
.
printprogress
(
's'
,
key
=
'Source Indel'
,
skip
=
vcfResult
.
print_every
)
#return ""
continue
...
...
@@ -357,7 +407,7 @@ class vcfResult(object):
if
(
vcfResult
.
excludeSingleton
)
and
(
ns
==
1
):
vcfResult
.
simpliStats
[
'Singleton 1'
]
+=
1
#print "singleton ns: %s" % str(srcs)
s
ys
.
stderr
.
write
(
'1'
)
s
elf
.
printprogress
(
'1'
,
key
=
'Singleton 1'
,
skip
=
vcfResult
.
print_every
)
return
""
for
src
in
sorted
(
srcs
):
...
...
@@ -378,7 +428,7 @@ class vcfResult(object):
if
(
vcfResult
.
excludeSingleton
)
and
(
nu
==
1
):
vcfResult
.
simpliStats
[
'Singleton 2'
]
+=
1
#print "singleton 1: %s" % str(srcs)
s
ys
.
stderr
.
write
(
'2'
)
s
elf
.
printprogress
(
'2'
,
key
=
'Singleton 2'
,
skip
=
vcfResult
.
print_every
)
continue
files_str
=
","
.
join
(
files
)
...
...
@@ -390,11 +440,11 @@ class vcfResult(object):
if
len
(
restr
)
==
0
:
vcfResult
.
simpliStats
[
'Singleton 3'
]
+=
1
#print "singleton l: %s" % str(srcs)
s
ys
.
stderr
.
write
(
'3'
)
s
elf
.
printprogress
(
'3'
,
key
=
'Singleton 3'
,
skip
=
vcfResult
.
print_every
)
else
:
vcfResult
.
simpliStats
[
'Ok'
]
+=
1
s
ys
.
stderr
.
write
(
'.'
)
s
elf
.
printprogress
(
'.'
,
key
=
'Ok'
,
skip
=
vcfResult
.
print_every
)
return
restr
...
...
@@ -495,10 +545,12 @@ class vcfFile(object):
if
len
(
cols
)
==
0
:
self
.
next
()
return
elif
currLine
[
0
]
==
"#"
:
if
currLine
.
startswith
(
'##sources='
):
self
.
names
=
currLine
[
10
:].
split
(
';'
)
pp
(
self
.
names
)
elif
currLine
.
startswith
(
'##numsources='
):
numsources
=
int
(
currLine
[
13
:])
if
numsources
!=
len
(
self
.
names
):
...
...
@@ -508,7 +560,6 @@ class vcfFile(object):
else
:
print
"num sources:"
,
numsources
self
.
next
()
return
...
...
@@ -518,15 +569,55 @@ class vcfFile(object):
self
.
register
[
'src'
]
=
cols
[
3
]
self
.
register
[
'dst'
]
=
cols
[
4
]
if
len
(
cols
)
>
9
:
self
.
register
[
'desc'
]
=
cols
[
9
]
else
:
self
.
register
[
'desc'
]
=
""
if
len
(
cols
)
>
9
:
self
.
register
[
'desc'
]
=
cols
[
9
]
try
:
#print "has desc"
info
=
cols
[
8
].
split
(
':'
)
except
:
info
=
[]
#print " info", info
if
len
(
info
)
>
1
:
try
:
gtpos
=
info
.
index
(
'GT'
)
except
:
gtpos
=
None
if
gtpos
is
not
None
:
#print " GT pos", gtpos
desc
=
self
.
register
[
'desc'
].
split
(
":"
)
#print " desc", desc
if
len
(
info
)
==
len
(
desc
):
#print " len info == len desc", info, desc
gtinfo
=
desc
[
gtpos
]
#print " gtinfo", gtinfo
#print " gt1", gtinfo[ 0]
#print " gt2", gtinfo[-1]
if
any
([
gt
==
'0'
for
gt
in
(
gtinfo
[
0
],
gtinfo
[
-
1
])]):
#print "has desc"
#print " info", info
#print " GT pos", gtpos
#print " desc", desc
#print " len info == len desc", info, desc
#print " gtinfo", gtinfo
#print " gt1", gtinfo[ 0]
#print " gt2", gtinfo[-1]
#print " adding src to dst", self.register['src' ], self.register['dst' ]
self
.
register
[
'dst'
]
=
","
.
join
(
sorted
(
list
(
set
([
self
.
register
[
'src'
]]
+
self
.
register
[
'dst'
].
split
(
","
)))))
#print " added src to dst", self.register['dst' ]
else
:
self
.
register
[
'desc'
]
=
""
self
.
register
[
'state'
]
=
self
.
state
except
(
RuntimeError
,
TypeError
,
NameError
):
print
RuntimeError
,
TypeError
,
NameError
print
RuntimeError
,
TypeError
,
NameError
print
"error getting colums"
,
cols
,
currLine
,
"
\n
"
#self.next()
#return
...
...
@@ -856,14 +947,14 @@ def main(incsv):
print
"error parsing"
sys
.
exit
(
1
)
print
cols
,
cols
[:
3
]
print
cols
,
cols
[:
3
]
data
.
addFile
(
*
cols
[:
3
])
mfh
=
openvcffile
(
outfile
,
'w'
,
compresslevel
=
1
)
mfh
=
openvcffile
(
outfile
+
'.tmp.vcf.gz'
,
'w'
,
compresslevel
=
1
)
mfh
.
write
(
data
.
getVcfHeader
()
)
...
...
@@ -885,6 +976,8 @@ def main(incsv):
mfh
.
close
()
os
.
rename
(
outfile
+
'.tmp.vcf.gz'
,
outfile
)
print
"Finished"
return
outfile
...
...
vcfmerger/vcfsimplify.py
View file @
b0a8450a
...
...
@@ -89,7 +89,7 @@ def main(args):
data
.
addFile
(
'1'
,
invcf
,
'merged'
)
with
vcfmerger
.
openvcffile
(
outfile
,
'w'
,
compresslevel
=
1
)
as
mfh
:
with
vcfmerger
.
openvcffile
(
outfile
+
'.tmp.vcf.gz'
,
'w'
,
compresslevel
=
1
)
as
mfh
:
mfh
.
write
(
data
.
getVcfHeader
()
)
lines
=
[]
...
...
@@ -108,7 +108,8 @@ def main(args):
mfh
.
write
(
""
.
join
(
lines
)
)
lines
=
[]
os
.
rename
(
outfile
+
'.tmp.vcf.gz'
,
outfile
)
print
print
data
.
getFilterStats
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment