def debruijn(patterns):
result = {}
for i in patterns:
if i[:-1] not in result.keys():
result[i[:-1]] = i[1:]
Name = Siu Pui Chung Jacky
Student number = 1047527
Script for Constructing a De Bruijn Graph
input: A collection of up to 1000 (possibly repeating) DNA strings of
equal length (not exceeding 50 bp) corresponding to a set S of (k+1)-mers.
output: The adjacency list corresponding to the de Bruijn graph corresponding to S∪Src.
from sys import argv
def reversecompliment(DNA):
Description: get reverse compliment of DNA string
Input: string of DNA
Output: String of reverse complimented DNA
complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
reverse_compliment = "".join(complement.get(neucleotide, neucleotide) for neucleotide in reversed(DNA.upper()))
return reverse_compliment
def debruijn(kmer_list):
Description: convert list of patterns into de-bruijn graph
Input:list of DNA patterns
Output:dictionary of de-bruijn graph, with nodes are key and edges as values
adjlist = {}
for kmer in kmer_list:
if kmer[:-1] not in adjlist.keys():
adjlist[kmer[:-1]] = [kmer[1:]]
result[i[:-1]] += ','+ i[1:]
return result
if __name__ == '__main__':
with open('rosalind_dbru.txt', 'r') as f:
p = [line.strip() for line in f.readlines()]
res = debruijn(p)
for i in sorted(res.keys()):
print(i+' -> '+res[i])
if kmer[1:] not in adjlist[kmer[:-1]]:
return adjlist
if __name__ == "__main__":
with open(argv[1]) as f:
kmer_list = [line.strip() for line in f]
reverse_compli_kmer_list = [reversecompliment(kmer) for kmer in kmer_list]
total_kmer_list = kmer_list + reverse_compli_kmer_list
adjlist = debruijn(total_kmer_list)
with open('answer.txt', 'w') as f:
for key in sorted(adjlist.keys()):
for value in range(len(adjlist[key])):
f.write("(" + key + "," + adjlist[key][value] + ")" + "\n")
