Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Aflitos, Saulo Alves
opticalmapping
Commits
d6750788
Commit
d6750788
authored
Mar 17, 2015
by
sauloal
Browse files
fasta gap
parent
e3d73b88
Changes
2
Hide whitespace changes
Inline
Side-by-side
agp_to_gff.py
View file @
d6750788
...
...
@@ -45,7 +45,7 @@ def main(args):
with
open
(
in_agp
,
'r'
)
as
fhd_in
:
with
open
(
ou_gff
,
'w'
)
as
fhd_ou
:
fhd_ou
.
write
(
"##gff-version 3
\n
"
)
fhd_ou
.
write
(
"#infile: %s
\n
"
%
in_agp
)
fhd_ou
.
write
(
"#infile: %s
\n
"
%
in_agp
)
for
line_in
in
fhd_in
:
line_in
=
line_in
.
strip
()
...
...
@@ -92,6 +92,9 @@ def main(args):
fhd_ou
.
write
(
";"
.
join
(
[
"="
.
join
([
x
,
cols_in
[
x
]])
for
x
in
sorted
(
cols_in
)
if
x
not
in
gff_cols
]
)
)
fhd_ou
.
write
(
"
\n
"
)
if
__name__
==
'__main__'
:
main
(
sys
.
argv
[
1
:])
#1 2 3 4 5 6 7 8 9
#SL2.50ch00 1 2191949 1 W SL2.40sc05082 1 2191949 0
...
...
@@ -206,6 +209,3 @@ def main(args):
# listed using a ';' delimiter (e.g. paired-ends;align_xgenus).
if
__name__
==
'__main__'
:
main
(
sys
.
argv
[
1
:])
\ No newline at end of file
fasta_gap_to_gff.py
0 → 100755
View file @
d6750788
#!/usr/bin/python
import
os
import
sys
import
re
re_ns
=
re
.
compile
(
'(n+)'
)
source_name
=
"fasta"
source_type
=
"gap"
score
,
orientation
,
phase
=
[
'.'
,
'.'
,
'.'
]
def
parse_seq
(
ofh
,
seq_name
,
seq_seq
):
if
len
(
seq_seq
)
==
0
:
return
print
"saving chromosome"
,
seq_name
,
"len"
,
len
(
seq_seq
)
hit_num
=
1
for
m
in
re_ns
.
finditer
(
seq_seq
.
lower
()):
start_pos
=
m
.
start
()
end_pos
=
m
.
end
()
match_seq
=
m
.
group
()
diff_pos
=
end_pos
-
start_pos
match_len
=
len
(
match_seq
)
#print seq_name, start_pos, end_pos, diff_pos, match_len, match_seq
row_id
=
seq_name
+
'_'
+
str
(
hit_num
)
attributes
=
"ID=%s;Name=%s;length=%d"
%
(
row_id
,
row_id
,
diff_pos
)
cols
=
[
seq_name
,
source_name
,
source_type
,
start_pos
,
end_pos
,
score
,
orientation
,
phase
,
attributes
]
#print cols
ofh
.
write
(
"
\t
"
.
join
(
[
str
(
x
)
for
x
in
cols
]
)
+
"
\n
"
)
hit_num
+=
1
def
main
(
args
):
infasta
=
args
[
0
]
outgff
=
infasta
+
'.gff3'
with
open
(
infasta
,
'r'
)
as
ifh
:
with
open
(
outgff
,
'w'
)
as
ofh
:
ofh
.
write
(
"##gff-version 3
\n
"
)
ofh
.
write
(
"#infile: %s
\n
"
%
infasta
)
seq_name
=
None
seq_seq
=
""
for
line
in
ifh
:
line
=
line
.
strip
()
if
len
(
line
)
==
0
:
continue
if
line
[
0
]
==
">"
:
if
seq_name
is
not
None
:
parse_seq
(
ofh
,
seq_name
,
seq_seq
)
seq_seq
=
""
seq_name
=
line
[
1
:]
else
:
seq_seq
+=
line
if
seq_name
is
not
None
:
parse_seq
(
ofh
,
seq_name
,
seq_seq
)
pass
if
__name__
==
'__main__'
:
main
(
sys
.
argv
[
1
:])
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment