Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Aflitos, Saulo Alves
mapper
Commits
4dde25c4
Commit
4dde25c4
authored
Jul 01, 2014
by
Aflitos, Saulo Alves
Browse files
fixed but stddev
parent
5514e136
Changes
1
Hide whitespace changes
Inline
Side-by-side
vcf_stats.py
View file @
4dde25c4
...
...
@@ -39,14 +39,14 @@ class stats( object ):
self
.
Raverage
=
v
else
:
self
.
Raverage
=
((
self
.
Raverage
*
self
.
count
)
+
v
)
/
(
self
.
count
+
1
)
self
.
variance
=
(
v
-
self
.
Raverage
)
**
2
self
.
Raverage
=
((
self
.
Raverage
*
self
.
count
)
+
v
)
/
(
self
.
count
+
1
)
self
.
variance
+
=
(
v
-
self
.
Raverage
)
**
2
#dev = ( self.variance'] / self.count']) ** 0.5
def
get
(
self
):
return
{
'average'
:
self
.
sum
/
self
.
count
,
'Aaverage'
:
self
.
Asum
/
self
.
count
,
'average'
:
self
.
sum
/
self
.
count
,
'Aaverage'
:
self
.
Asum
/
self
.
count
,
'stddev'
:
(
self
.
variance
/
self
.
count
)
**
0.5
,
'min'
:
self
.
min
,
'max'
:
self
.
max
,
...
...
@@ -58,196 +58,195 @@ class stats( object ):
}
def
main
():
infile
=
sys
.
argv
[
1
]
#$v=0;
#$c=0;
#$min=10000000;
#$max=0;
#$av=0;
#$var=0;
#
#$a=$v/$c;
#$dev=sqrt($var/$c);
#$mx=$a+(2*$dev);
#
#print "$c\t$v\t$a\t$min\t$max\t$av\t$var\t$dev\t$mx\n"; }
#
#$c+=1;
#$v+=$_;
#if ($_ < $min) { $min = $_; };
#if ($_ > $max) { $max = $_; };
#if ($c==1) {
# $av = $_; $var=0; }
#
#else {
# $av=(($av*$c)+$_)/($c+1);
# $var+=($_-$av)**2;
# $dev=sqrt($var/$c);
# print "$av\t$var\t$dev\n";}' | \
values
=
{
'count'
:
{
},
'info'
:
{
},
'format'
:
{
},
'qual'
:
{
'all'
:
stats
()
},
'dist'
:
{
'all'
:
stats
()
},
}
codes
=
{
'info'
:
{},
'format'
:
{}
}
count
=
0
with
openfile
(
infile
)
as
fhd
:
for
line
in
fhd
:
line
=
line
.
strip
()
if
len
(
line
)
==
0
:
continue
if
line
[
0
]
==
"#"
:
print
line
if
'##FORMAT'
in
line
:
lp
=
line
[
13
:]
code
=
lp
[:
2
]
nfo
=
lp
[
lp
.
find
(
'Description="'
)
+
13
:
-
2
]
print
'formal'
,
code
,
nfo
codes
[
'format'
][
code
]
=
nfo
elif
'##INFO'
in
line
:
lp
=
line
[
11
:]
code
=
lp
[:
lp
.
index
(
','
)]
nfo
=
lp
[
lp
.
find
(
'Description="'
)
+
13
:
-
2
]
print
'info'
,
code
,
nfo
codes
[
'info'
][
code
]
=
nfo
continue
#sys.exit(0)
count
+=
1
if
count
%
1000
!=
0
:
continue
pass
def
main
(
infiles
):
for
infile
in
infiles
:
#$v=0;
#$c=0;
#$min=10000000;
#$max=0;
#$av=0;
#$var=0;
#
#$a=$v/$c;
#$dev=sqrt($var/$c);
#$mx=$a+(2*$dev);
#
#print "$c\t$v\t$a\t$min\t$max\t$av\t$var\t$dev\t$mx\n"; }
#
#$c+=1;
#$v+=$_;
#if ($_ < $min) { $min = $_; };
#if ($_ > $max) { $max = $_; };
#if ($c==1) {
# $av = $_; $var=0; }
#
#else {
# $av=(($av*$c)+$_)/($c+1);
# $var+=($_-$av)**2;
# $dev=sqrt($var/$c);
# print "$av\t$var\t$dev\n";}' | \
values
=
{
'count'
:
{
},
'info'
:
{
},
'format'
:
{
},
'qual'
:
{
'all'
:
stats
()
},
'dist'
:
{
'all'
:
stats
()
},
}
#print line
codes
=
{
'info'
:
{},
'format'
:
{}
}
count
=
0
with
openfile
(
infile
)
as
fhd
:
for
line
in
fhd
:
line
=
line
.
strip
()
cols
=
line
.
split
()
chrom
=
cols
[
0
]
pos
=
int
(
cols
[
1
])
qual
=
float
(
cols
[
5
])
info
=
cols
[
7
].
split
(
";"
)
fmtLbl
=
cols
[
8
].
split
(
":"
)
fmtVal
=
cols
[
9
].
split
(
":"
)
if
len
(
line
)
==
0
:
continue
if
line
[
0
]
==
"#"
:
#print line
if
'##FORMAT'
in
line
:
lp
=
line
[
13
:]
code
=
lp
[:
2
]
nfo
=
lp
[
lp
.
find
(
'Description="'
)
+
13
:
-
2
]
print
'format
\t
'
,
code
,
"
\t
"
,
nfo
codes
[
'format'
][
code
]
=
nfo
elif
'##INFO'
in
line
:
lp
=
line
[
11
:]
code
=
lp
[:
lp
.
index
(
','
)]
nfo
=
lp
[
lp
.
find
(
'Description="'
)
+
13
:
-
2
]
print
'info
\t
'
,
code
,
"
\t
"
,
nfo
codes
[
'info'
][
code
]
=
nfo
continue
if
chrom
not
in
values
[
'dist'
]:
values
[
'dist'
][
chrom
]
=
stats
()
values
[
'qual'
][
chrom
]
=
stats
()
values
[
'count'
][
chrom
]
=
0
#sys.exit(0)
count
+=
1
if
count
%
1000
!=
0
:
#continue
pass
values
[
'count'
][
chrom
]
+=
1
#print line
if
values
[
'count'
][
chrom
]
==
1
:
print
'first'
,
chrom
values
[
'dist'
][
chrom
].
prev
=
pos
#print values[ 'dist' ][ chrom ].prev
cols
=
line
.
split
()
chrom
=
cols
[
0
]
pos
=
int
(
cols
[
1
])
qual
=
float
(
cols
[
5
])
info
=
cols
[
7
].
split
(
";"
)
fmtLbl
=
cols
[
8
].
split
(
":"
)
fmtVal
=
cols
[
9
].
split
(
":"
)
else
:
#print 'consecutive', values[ 'dist' ][ chrom ].prev, pos,
dist
=
pos
-
values
[
'dist'
][
chrom
].
prev
values
[
'dist'
][
'all'
].
add
(
dist
)
values
[
'dist'
][
chrom
].
add
(
dist
)
values
[
'dist'
][
chrom
].
prev
=
pos
#print values[ 'dist' ][ chrom ].prev
if
chrom
not
in
values
[
'dist'
]:
values
[
'dist'
][
chrom
]
=
stats
()
values
[
'qual'
][
chrom
]
=
stats
()
values
[
'count'
][
chrom
]
=
0
values
[
'qual'
][
'all'
].
add
(
qual
)
values
[
'qual'
][
chrom
].
add
(
qual
)
values
[
'count'
][
chrom
]
+=
1
if
values
[
'count'
][
chrom
]
==
1
:
print
'first'
,
chrom
values
[
'dist'
][
chrom
].
prev
=
pos
#print values[ 'dist' ][ chrom ].prev
else
:
#print 'consecutive', values[ 'dist' ][ chrom ].prev, pos,
dist
=
pos
-
values
[
'dist'
][
chrom
].
prev
values
[
'dist'
][
'all'
].
add
(
dist
)
values
[
'dist'
][
chrom
].
add
(
dist
)
values
[
'dist'
][
chrom
].
prev
=
pos
#print values[ 'dist' ][ chrom ].prev
for
i
in
info
:
if
"="
in
i
:
k
,
v
=
i
.
split
(
"="
)
#print "K %s V %s" % (k ,v)
if
k
in
codes
[
'info'
]:
k
=
codes
[
'info'
][
k
]
values
[
'qual'
][
'all'
].
add
(
qual
)
values
[
'qual'
][
chrom
].
add
(
qual
)
try
:
v
=
float
(
v
)
except
:
pass
if
type
(
v
)
is
float
:
#print "float", v
if
k
not
in
values
[
'info'
]:
values
[
'info'
][
k
]
=
{
'all'
:
stats
()
}
for
i
in
info
:
if
"="
in
i
:
k
,
v
=
i
.
split
(
"="
)
#print "K %s V %s" % (k ,v)
if
chrom
not
in
valu
es
[
'info'
]
[
k
]
:
valu
es
[
'info'
][
k
]
[
chrom
]
=
stats
()
if
k
in
cod
es
[
'info'
]:
k
=
cod
es
[
'info'
][
k
]
values
[
'info'
][
k
][
chrom
].
add
(
v
)
values
[
'info'
][
k
][
'all'
].
add
(
v
)
try
:
v
=
float
(
v
)
except
:
pass
if
type
(
v
)
is
float
:
#print "float", v
if
k
not
in
values
[
'info'
]:
values
[
'info'
][
k
]
=
{
'all'
:
stats
()
}
for
fp
in
range
(
len
(
fmtLbl
)):
f
=
fmtLbl
[
fp
]
v
=
fmtVal
[
fp
]
if
chrom
not
in
values
[
'info'
][
k
]:
values
[
'info'
][
k
][
chrom
]
=
stats
()
if
f
in
codes
[
'format'
]:
f
=
codes
[
'format'
][
f
]
values
[
'info'
][
k
][
chrom
].
add
(
v
)
values
[
'info'
][
k
][
'all'
].
add
(
v
)
#print f
if
'/'
in
v
:
continue
if
','
in
v
:
pass
for
fp
in
range
(
len
(
fmtLbl
)):
f
=
fmtLbl
[
fp
]
v
=
fmtVal
[
fp
]
else
:
v
=
float
(
v
)
#print fp, f, v
if
f
not
in
values
[
'format'
]:
values
[
'format'
][
f
]
=
{
'all'
:
stats
()
}
if
f
in
codes
[
'format'
]:
f
=
codes
[
'format'
][
f
]
if
chrom
not
in
values
[
'format'
][
f
]:
values
[
'format'
][
f
][
chrom
]
=
stats
()
#print f
values
[
'format'
][
f
][
chrom
].
add
(
v
)
values
[
'format'
][
f
][
'all'
].
add
(
v
)
if
'/'
in
v
:
continue
if
','
in
v
:
pass
else
:
v
=
float
(
v
)
#print fp, f, v
if
f
not
in
values
[
'format'
]:
values
[
'format'
][
f
]
=
{
'all'
:
stats
()
}
if
count
==
1000
:
#break
pass
if
chrom
not
in
values
[
'format'
][
f
]:
values
[
'format'
][
f
][
chrom
]
=
stats
()
values
[
'format'
][
f
][
chrom
].
add
(
v
)
values
[
'format'
][
f
][
'all'
].
add
(
v
)
if
count
==
1000
:
#break
pass
for
k
in
values
:
if
k
in
[
'count'
]:
pass
elif
k
in
[
'dist'
,
'qual'
]:
for
chrom
in
values
[
k
]:
#print "chrom", chrom
values
[
k
][
chrom
]
=
values
[
k
][
chrom
].
get
()
#elif k in []:
#values[k] = values[k].get()
else
:
for
chrom
in
values
[
k
]:
for
kk
in
values
[
k
][
chrom
]:
values
[
k
][
chrom
][
kk
]
=
values
[
k
][
chrom
][
kk
].
get
()
pp
(
values
)
json
.
dump
(
values
,
open
(
os
.
path
.
basename
(
infile
)
+
'.json'
,
'w'
),
sort_keys
=
True
,
indent
=
' '
)
for
k
in
values
:
if
k
in
[
'count'
]:
pass
elif
k
in
[
'dist'
,
'qual'
]:
for
chrom
in
values
[
k
]:
#print "chrom", chrom
values
[
k
][
chrom
]
=
values
[
k
][
chrom
].
get
()
#elif k in []:
#values[k] = values[k].get()
else
:
for
chrom
in
values
[
k
]:
for
kk
in
values
[
k
][
chrom
]:
values
[
k
][
chrom
][
kk
]
=
values
[
k
][
chrom
][
kk
].
get
()
pp
(
values
)
json
.
dump
(
values
,
open
(
os
.
path
.
basename
(
infile
)
+
'.json'
,
'w'
),
sort_keys
=
True
,
indent
=
' '
)
if
__name__
==
'__main__'
:
main
()
main
(
sys
.
argv
[
1
:]
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment