Commit fd61f420 authored by Siu, Pui Chung's avatar Siu, Pui Chung
Browse files


parent bada710e
......@@ -3,27 +3,65 @@
Name = Siu Pui Chung Jacky
Student number = 1047527
Script for algorithm for bioinformatics preperation
input: integer k and string Text
output: composition of all kmers of text
Script for k-Mer Composition
input: A DNA string s in FASTA format (having length at most 100 kbp).
output: The 4-mer composition of s.
from sys import argv
def composition(k, string):
comp = []
n = len(string)
for i in range(n - k + 1): #sliding window
comp.append(string[i: i+k])#size as kmer size
return comp
bases = 'ACGT'
def fastaparser(filetext):
ID, seq, fastas = None, [], []
for line in filetext:
line = line.strip()
if line.startswith(">"):
if ID:
fastas.append((ID, ''.join(seq)))
ID, seq = line, []
if ID:
fastas.append((ID, ''.join(seq)))
return fastas
def basescombinations(bases):
generate all bases combination to form dictionary
input: the four bases
output: kmers, list of combinations of bases
combinationdict, dictionary of combinations of bases
kmers = []
for a in bases:
n1 = a
for b in bases:
n2 = n1 + b
for c in bases:
n3 = n2 + c
for d in bases:
combinationdict = {i:0 for i in kmers}
return kmers, combinationdict
def frequency(string, combinationdict):
calculate the frequency of a particular 4-mer in string
input: string, sequences with bases of ATCG
combinationdict, dictionary of k-mers consist of combinations of bases, values all = 0
output: combinationdict, dictionary of k-mers consit of combinations of bases with frequency from string
for i in range(len(string) - 4 + 1):
combinationdict[string[i:i + 4]] += 1
return combinationdict
if __name__ == "__main__":
filename = argv[1]
with open(filename, 'r') as f:
lines = f.readlines()
kmer = int(lines[0].strip())
string = lines[1].strip()
comp = composition(kmer, string)
f = open("answer.txt", "w")
with open(argv[1]) as f:
fastalist = fastaparser(f)
kmers, dictionary = basescombinations(bases)
dictionary = frequency(fastalist[0][1], dictionary)
print(' '.join(str(dictionary[kmer]) for kmer in kmers))
