Commit d6750788 authored by sauloal's avatar sauloal
Browse files

fasta gap

parent e3d73b88
......@@ -45,7 +45,7 @@ def main(args):
with open(in_agp, 'r') as fhd_in:
with open(ou_gff, 'w') as fhd_ou:
fhd_ou.write("##gff-version 3\n")
fhd_ou.write("#infile: %s\n" %in_agp)
fhd_ou.write("#infile: %s\n" % in_agp)
for line_in in fhd_in:
line_in = line_in.strip()
......@@ -92,6 +92,9 @@ def main(args):
fhd_ou.write(";".join( ["=".join([x, cols_in[x]]) for x in sorted(cols_in) if x not in gff_cols] ) )
fhd_ou.write("\n")
if __name__ == '__main__':
main(sys.argv[1:])
#1 2 3 4 5 6 7 8 9
#SL2.50ch00 1 2191949 1 W SL2.40sc05082 1 2191949 0
......@@ -206,6 +209,3 @@ def main(args):
# listed using a ';' delimiter (e.g. paired-ends;align_xgenus).
if __name__ == '__main__':
main(sys.argv[1:])
\ No newline at end of file
#!/usr/bin/python
import os
import sys
import re
re_ns = re.compile('(n+)')
source_name = "fasta"
source_type = "gap"
score, orientation, phase = [ '.', '.', '.' ]
def parse_seq(ofh, seq_name, seq_seq):
if len(seq_seq) == 0:
return
print "saving chromosome", seq_name, "len", len(seq_seq)
hit_num = 1
for m in re_ns.finditer(seq_seq.lower()):
start_pos = m.start()
end_pos = m.end()
match_seq = m.group()
diff_pos = end_pos - start_pos
match_len = len(match_seq)
#print seq_name, start_pos, end_pos, diff_pos, match_len, match_seq
row_id = seq_name + '_' + str(hit_num)
attributes = "ID=%s;Name=%s;length=%d" % ( row_id, row_id, diff_pos )
cols = [ seq_name, source_name, source_type, start_pos, end_pos, score, orientation, phase, attributes ]
#print cols
ofh.write("\t".join( [ str(x) for x in cols ] ) + "\n")
hit_num += 1
def main(args):
infasta = args[0]
outgff = infasta + '.gff3'
with open(infasta, 'r') as ifh:
with open(outgff, 'w') as ofh:
ofh.write("##gff-version 3\n")
ofh.write("#infile: %s\n" % infasta)
seq_name = None
seq_seq = ""
for line in ifh:
line = line.strip()
if len(line) == 0:
continue
if line[0] == ">":
if seq_name is not None:
parse_seq(ofh, seq_name, seq_seq)
seq_seq = ""
seq_name = line[1:]
else:
seq_seq += line
if seq_name is not None:
parse_seq(ofh, seq_name, seq_seq)
pass
if __name__ == '__main__':
main(sys.argv[1:])
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment