Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Aflitos, Saulo Alves
opticalmapping
Commits
9a88e383
Commit
9a88e383
authored
Mar 17, 2015
by
sauloal
Browse files
better gff
parent
d6750788
Changes
4
Hide whitespace changes
Inline
Side-by-side
agp_to_gff.py
View file @
9a88e383
...
...
@@ -4,6 +4,7 @@ import os
import
sys
from
sqlalchemy.util
import
KeyedTuple
flags_gap
=
(
'N'
,
'U'
)
col_names_shared
=
[
'object_id'
,
'object_beg'
,
'object_end'
,
'part_number'
,
'component_type'
]
col_names_gap_Y
=
col_names_shared
+
[
'gap_length'
,
'gap_type'
,
'linkage'
,
'linkage_evidence'
]
...
...
@@ -36,6 +37,7 @@ type_mapper = {
gff_cols
=
[
'object_id'
,
'source_name'
,
'type'
,
'object_beg'
,
'object_end'
,
'score'
,
'orientation'
,
'phase'
]
source_name
=
'agp'
id_prefix
=
'agp_'
def
main
(
args
):
in_agp
=
args
[
0
]
...
...
@@ -84,8 +86,8 @@ def main(args):
cols_in
[
'phase'
]
=
'.'
print
cols_in
row_id
=
cols_in
[
'object_id'
]
+
'_'
+
cols_in
[
'part_number'
]
#
print cols_in
row_id
=
id_prefix
+
cols_in
[
'object_id'
]
+
'_'
+
cols_in
[
'part_number'
]
fhd_ou
.
write
(
"
\t
"
.
join
(
[
cols_in
[
x
]
for
x
in
gff_cols
]
)
)
fhd_ou
.
write
(
"
\t
ID=%s;Name=%s;"
%
(
row_id
,
row_id
)
)
...
...
fasta_gap_to_gff.py
View file @
9a88e383
...
...
@@ -8,12 +8,15 @@ re_ns = re.compile('(n+)')
source_name
=
"fasta"
source_type
=
"gap"
id_prefix
=
"fastagap_"
score
,
orientation
,
phase
=
[
'.'
,
'.'
,
'.'
]
def
parse_seq
(
ofh
,
seq_name
,
seq_seq
):
if
len
(
seq_seq
)
==
0
:
return
ofh
.
write
(
"##sequence-region %s 0 %d
\n
"
%
(
seq_name
,
len
(
seq_seq
)))
print
"saving chromosome"
,
seq_name
,
"len"
,
len
(
seq_seq
)
hit_num
=
1
for
m
in
re_ns
.
finditer
(
seq_seq
.
lower
()):
...
...
@@ -24,7 +27,7 @@ def parse_seq(ofh, seq_name, seq_seq):
match_len
=
len
(
match_seq
)
#print seq_name, start_pos, end_pos, diff_pos, match_len, match_seq
row_id
=
seq_name
+
'_'
+
str
(
hit_num
)
row_id
=
id_prefix
+
seq_name
+
'_'
+
str
(
hit_num
)
attributes
=
"ID=%s;Name=%s;length=%d"
%
(
row_id
,
row_id
,
diff_pos
)
cols
=
[
seq_name
,
source_name
,
source_type
,
start_pos
,
end_pos
,
score
,
orientation
,
phase
,
attributes
]
...
...
om_to_gff.py
View file @
9a88e383
...
...
@@ -94,6 +94,8 @@ def main(args):
feature_name_full1
=
"gene"
feature_name_full2
=
"mRNA"
feature_name_piece
=
"CDS"
id_prefix
=
"om_"
if
not
os
.
path
.
exists
(
infile
):
...
...
@@ -189,8 +191,8 @@ def main(args):
chromosome_name
=
chromosome_names
[
RefContigID
-
1
]
attributes_keys_G
=
[
[
'ID'
,
"%s_%d"
%
(
chromosome_name
,
QryContigID
)
],
[
'Name'
,
"%s_%d"
%
(
chromosome_name
,
QryContigID
)
]
[
'ID'
,
"%s
%s
_%d"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
],
[
'Name'
,
"%s
%s
_%d"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
]
]
for
filter_data
in
filters
:
...
...
@@ -205,9 +207,9 @@ def main(args):
attributes_keys_G
=
[
[
'ID'
,
"%s_%d_m"
%
(
chromosome_name
,
QryContigID
)
],
[
'Name'
,
"%s_%d_m"
%
(
chromosome_name
,
QryContigID
)
],
[
'Parent'
,
"%s_%d"
%
(
chromosome_name
,
QryContigID
)
]
[
'ID'
,
"%s
%s
_%d_m"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
],
[
'Name'
,
"%s
%s
_%d_m"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
],
[
'Parent'
,
"%s
%s
_%d"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
]
]
for
filter_data
in
filters
:
...
...
@@ -238,9 +240,9 @@ def main(args):
HitEnum
=
qry_row
[
"HitEnum"
]
attributes_keys
=
[
[
'ID'
,
"%s_%d_m_%06d_c"
%
(
chromosome_name
,
QryContigID
,
qry_num
)
],
[
'Name'
,
"%s_%d_m_%06d_c"
%
(
chromosome_name
,
QryContigID
,
qry_num
)
],
[
'Parent'
,
"%s_%d_m"
%
(
chromosome_name
,
QryContigID
)
],
[
'ID'
,
"%s
%s
_%d_m_%06d_c"
%
(
id_prefix
,
chromosome_name
,
QryContigID
,
qry_num
)
],
[
'Name'
,
"%s
%s
_%d_m_%06d_c"
%
(
id_prefix
,
chromosome_name
,
QryContigID
,
qry_num
)
],
[
'Parent'
,
"%s
%s
_%d_m"
%
(
id_prefix
,
chromosome_name
,
QryContigID
)
],
[
'Gap'
,
HitEnum
],
]
...
...
run.sh
View file @
9a88e383
...
...
@@ -2,12 +2,12 @@ set -xeu
infile
=
data/S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt
augmented
=
$infile
.augmented.tsv
filtered
=
${
augmented
}
_Confidence_ge_10
.0
__meta_num_orientations_gt_1__meta_is_max_confidence_for_qry_chrom_eq_T
rue
.report.tsv
filtered
=
${
augmented
}
_Confidence_ge_10__meta_num_orientations_gt_1__meta_is_max_confidence_for_qry_chrom_eq_T.report.tsv
delta
=
$filtered
.delta
gff
=
$filtered
.gff3
cols_to_escape
=
info/gff_cols_to_escape.txt
chromosome_names
=
S_lycopersicum_chromosomes.2.50.chromosome_names.txt
chromosome_names
=
info/
S_lycopersicum_chromosomes.2.50.chromosome_names.txt
#rm $augmented || true
if
[[
!
-f
"
$augmented
"
]]
;
then
...
...
@@ -32,5 +32,7 @@ if [[ ! -f "$gff" ]]; then
./om_to_gff.py
--names-from-file
$chromosome_names
--exclude-cols-from-file
$cols_to_escape
$filtered
fi
./om_to_gff.py
--names-from-file
$chromosome_names
--exclude-cols-from-file
$cols_to_escape
S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv
./om_to_gff.py
--names-from-file
$chromosome_names
--exclude-cols-from-file
$cols_to_escape
S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv_Confidence_ge_10.0.report.tsv
./om_to_gff.py
--names-from-file
$chromosome_names
--exclude-cols-from-file
$cols_to_escape
${
augmented
}
./om_filter.py
$augmented
--filter
Confidence:ge:10
./om_to_gff.py
--names-from-file
$chromosome_names
--exclude-cols-from-file
$cols_to_escape
${
augmented
}
_Confidence_ge_10.report.tsv
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment