Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Siu, Pui Chung
bioinformatics algorithms
Commits
fd61f420
Commit
fd61f420
authored
Jul 21, 2021
by
Siu, Pui Chung
Browse files
Replace generatekmercomposition.py
parent
bada710e
Changes
1
Hide whitespace changes
Inline
Side-by-side
rosalind solutions/k-Mer Composition/generatekmercomposition.py
View file @
fd61f420
...
...
@@ -3,27 +3,65 @@
"""
Name = Siu Pui Chung Jacky
Student number = 1047527
Script for
algorithm for bioinformatics prepera
tion
input:
integer k and string Text
output: composition of
all kmers of text
Script for
k-Mer Composi
tion
input:
A DNA string s in FASTA format (having length at most 100 kbp).
output:
The 4-mer
composition of
s.
"""
from
sys
import
argv
def
composition
(
k
,
string
):
comp
=
[]
n
=
len
(
string
)
for
i
in
range
(
n
-
k
+
1
):
#sliding window
comp
.
append
(
string
[
i
:
i
+
k
])
#size as kmer size
return
comp
bases
=
'ACGT'
def
fastaparser
(
filetext
):
ID
,
seq
,
fastas
=
None
,
[],
[]
for
line
in
filetext
:
line
=
line
.
strip
()
if
line
.
startswith
(
">"
):
if
ID
:
fastas
.
append
((
ID
,
''
.
join
(
seq
)))
ID
,
seq
=
line
,
[]
else
:
seq
.
append
(
line
)
if
ID
:
fastas
.
append
((
ID
,
''
.
join
(
seq
)))
return
fastas
def
basescombinations
(
bases
):
"""
generate all bases combination to form dictionary
input: the four bases
output: kmers, list of combinations of bases
combinationdict, dictionary of combinations of bases
"""
kmers
=
[]
for
a
in
bases
:
n1
=
a
for
b
in
bases
:
n2
=
n1
+
b
for
c
in
bases
:
n3
=
n2
+
c
for
d
in
bases
:
kmers
.
append
(
n3
+
d
)
combinationdict
=
{
i
:
0
for
i
in
kmers
}
return
kmers
,
combinationdict
def
frequency
(
string
,
combinationdict
):
"""
calculate the frequency of a particular 4-mer in string
input: string, sequences with bases of ATCG
combinationdict, dictionary of k-mers consist of combinations of bases, values all = 0
output: combinationdict, dictionary of k-mers consit of combinations of bases with frequency from string
"""
for
i
in
range
(
len
(
string
)
-
4
+
1
):
combinationdict
[
string
[
i
:
i
+
4
]]
+=
1
return
combinationdict
if
__name__
==
"__main__"
:
filename
=
argv
[
1
]
with
open
(
filename
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
kmer
=
int
(
lines
[
0
].
strip
())
string
=
lines
[
1
].
strip
()
comp
=
composition
(
kmer
,
string
)
f
=
open
(
"answer.txt"
,
"w"
)
f
.
write
(
"
\n
"
.
join
(
comp
))
f
.
close
()
with
open
(
argv
[
1
])
as
f
:
fastalist
=
fastaparser
(
f
)
kmers
,
dictionary
=
basescombinations
(
bases
)
dictionary
=
frequency
(
fastalist
[
0
][
1
],
dictionary
)
print
(
' '
.
join
(
str
(
dictionary
[
kmer
])
for
kmer
in
kmers
))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment