Commit dc899ca0 authored by sauloal's avatar sauloal
Browse files

with help

parent 3f3af440
*.txt
*.delta
*.tsv
*.pyc
out*
~assembly/tomato150/programs/mummer/MUMmer3.23/mummerplot S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.delta
~assembly/tomato150/programs/mummer/MUMmer3.23/mummerplot $1
#S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.delta
......@@ -2,17 +2,9 @@
import os
import sys
import re
import operator
import argparse
from collections import defaultdict
from sqlalchemy.util import KeyedTuple
"""
#h XmapEntryID QryContigID RefContigID QryStartPos QryEndPos RefStartPos RefEndPos Orientation Confidence HitEnum QryLen RefLen LabelChannel Alignment
#f int int int float float float float string float string float float int string
1 141 1 528400.6 571697.5 10672 54237.5 + 6.65 4M2D2M 1439123.5 21805821 1 "(1,34)(2,34)(3,35)(4,36)(5,37)(6,38)(8,38)(9,39)"
"""
from om_shared import *
......@@ -27,76 +19,17 @@ group_by = [
["QryContigID", "XmapEntryID"],
["XmapEntryID", "Confidence" ],
]
re_matches = re.compile("(\d+)M")
re_insertions = re.compile("(\d+)I")
re_deletions = re.compile("(\d+)D")
re_alignment = re.compile("\((\d+),(\d+)\)")
valid_fields = [
"Alignment",
"Confidence",
"HitEnum",
"LabelChannel",
"Orientation",
"QryContigID",
"QryEndPos",
"QryLen",
"QryStartPos",
"RefContigID",
"RefEndPos",
"RefLen",
"RefStartPos",
"XmapEntryID",
"_meta_alignment_count_queries",
"_meta_alignment_count_queries_colapses",
"_meta_alignment_count_refs",
"_meta_alignment_count_refs_colapses",
"_meta_cigar_deletions",
"_meta_cigar_insertions",
"_meta_cigar_matches",
"_meta_is_max_confidence_for_qry_chrom",
"_meta_len_qry_match_gapped",
"_meta_len_qry_match_no_gap",
"_meta_len_ref_match_gapped",
"_meta_len_ref_match_no_gap",
"_meta_max_confidence_for_qry",
"_meta_max_confidence_for_qry_chrom",
"_meta_num_orientations",
"_meta_num_qry_matches",
"_meta_proportion_query_len_gapped",
"_meta_proportion_query_len_no_gap",
"_meta_proportion_sizes_gapped",
"_meta_proportion_sizes_no_gap"
]
def col_parse_orientation(val):
assert(val in ("+", "-"))
return val
#if val == "+":
# return PLUS
#
#elif val == "-":
# return MINUS
def col_parse_hit_enum(val):
#4M2D2M
return val
def col_parse_alignment(val):
#"(1,34)(2,34)(3,35)(4,36)(5,37)(6,38)(8,38)(9,39)"
val = val.strip('"')
return val
def parse_file(infile):
def parse_file(infile, valid_fields):
data = []
names = []
seman = {}
types = []
indexer = {}
headers = []
groups = defaultdict(lambda: defaultdict(lambda: defaultdict(set)))
ref_maps_from = ""
query_maps_from = ""
......@@ -113,6 +46,8 @@ def parse_file(infile):
continue
if line[0] == "#":
headers.append(line)
if line[1] == "h":
line = line[3:]
names = [x.strip() for x in line.split("\t")]
......@@ -125,14 +60,14 @@ def parse_file(infile):
line = line[3:]
types = [x.strip() for x in line.split("\t")]
for tp in xrange(len(types)):
t = types[tp]
if t == "int":
types[tp] = int
elif t == "float":
types[tp] = float
elif t == "string":
types[tp] = col_parsers[ names[tp] ]
#for tp in xrange(len(types)):
# t = types[tp]
# if t == "int":
# types[tp] = int
# elif t == "float":
# types[tp] = float
# elif t == "string":
# types[tp] = col_parsers[ names[tp] ]
assert(len(types) == len(names))
......@@ -148,8 +83,8 @@ def parse_file(infile):
continue
cols = [x.strip() for x in line.split("\t") ]
vals = [types[p](cols[p]) for p in xrange(len(cols))]
cols = [x.strip() for x in line.split("\t") ]
vals = [valid_fields['parsers'][names[p]](cols[p]) for p in xrange(len(cols))]
for ind in indexer:
indexer[ ind ][ vals[seman[ind]] ].add(len(data))
......@@ -161,62 +96,12 @@ def parse_file(infile):
data.append(vals)
return data, names, seman, types, indexer, groups, ref_maps_from, query_maps_from
def process_cigar(cigar):
"""
2M3D1M1D1M1D4M1I2M1D2M1D1M2I2D9M3I3M1D6M1D2M2D1M1D6M1D1M1D1M2D2M2D1M1I1D1M1D5M2D4M2D1M2D2M1D2M1D3M1D1M1D2M3I3D1M1D1M3D2M3D1M2I1D1M2D1M1D1M1I2D3M2I1M1D2M1D1M1D1M2I3D3M3D1M2D1M1D1M1D5M2D12M
"""
cigar_matches = 0
cigar_insertions = 0
cigar_deletions = 0
i_matches = re_matches .finditer(cigar)
i_inserts = re_insertions.finditer(cigar)
i_deletes = re_deletions .finditer(cigar)
for i in i_matches:
n = i.group(1)
cigar_matches += int(n)
for i in i_inserts:
n = i.group(1)
cigar_insertions += int(n)
for i in i_deletes:
n = i.group(1)
cigar_deletions += int(n)
return cigar_matches, cigar_insertions, cigar_deletions
def process_alignment(alignment):
"""
Alignment (4862,48)(4863,48)(4864,47)(4865,46)(4866,45)(4867,44)(4870,43)(4873,42)(4874,41)(4875,40)(4877,40)(4878,39)(4879,38)(4880,37)(4883,36)(4884,36)(4885,35)(4886,34)(4887,33)(4888,33)(4889,32)(4890,30)(4891,30)(4892,29)(4893,28)(4894,28)(4899,27)(4900,26)(4901,25)(4902,24)(4903,23)(4904,22)(4906,21)(4907,21)(4908,20)(4910,19)(4911,18)(4912,17)(4913,16)(4915,15)(4917,14)(4918,13)(4919,12)(4920,11)(4922,10)(4923,9)(4925,8)(4927,7)(4930,6)(4931,5)(4932,3)(4933,2)(4934,1)
"""
count_refs = defaultdict(int)
count_queries = defaultdict(int)
count_refs_colapses = 0
count_queries_colapses = 0
i_alignment = re_alignment.finditer(alignment)
for i in i_alignment:
c_r = int(i.group(1))
c_q = int(i.group(2))
count_refs [c_r] += 1
count_queries[c_q] += 1
count_refs_colapses = sum([count_refs[ x] for x in count_refs if count_refs[ x] > 1])
count_queries_colapses = sum([count_queries[x] for x in count_queries if count_queries[x] > 1])
return len(count_refs), len(count_queries), count_refs_colapses, count_queries_colapses
return data, headers, names, seman, types, indexer, groups, ref_maps_from, query_maps_from
def parse_args(args):
parser = argparse.ArgumentParser(description="Bionano Genomics MAP parser")
parser.add_argument('infile', help="MAP file" )
parser.add_argument('-f' , '--filter' , action='append' , help="Filters [Field:Function(lt, le, eq, ne, ge, gt):Value:Type(i (INT), f (FLOAT), s (STRING), b (BOOL))]")
parser.add_argument('-f' , '--filter' , action='append' , help="Filters [Field:Function(lt, le, eq, ne, ge, gt):Value]")
parser.add_argument('-l' , '--list' , action='store_true' , help="List Values" )
parser.add_argument('-g' , '--count' , action='store_false', help="Do NOT do global count" )
parser.add_argument('-c' , '--conf' , action='store_false', help="Do NOT do confidence stats" )
......@@ -229,20 +114,14 @@ def parse_args(args):
return args
col_parsers = {
"Orientation": col_parse_orientation,
"HitEnum" : col_parse_hit_enum,
"Alignment" : col_parse_alignment
}
def main(args):
args = parse_args(args)
valid_fields = gen_valid_fields(valid_fields_g)
args = parse_args(args)
if args.list:
print "LIST OF FIELDS"
print "", "\n ".join( valid_fields )
print "", "\n ".join( ["%-41s: %-6s : %s"% (valid_field_name, valid_fields['types' ][valid_field_name], valid_fields['helps' ][valid_field_name]) for valid_field_name in valid_fields['names' ]] )
sys.exit(0)
infile = args.infile
......@@ -261,55 +140,21 @@ def main(args):
sys.exit(1)
filters = []
if args.filter is not None:
for filter_data in args.filter:
filter_cols = filter_data.split(":")
if len(filter_cols) != 4:
print "filter has to have 4 parts <field>:<function>:<value>:<type>"
sys.exit(0)
field_name, operator_name, value, val_type = filter_cols
assert val_type in ("i", "f", "s", "b"), "acceptable values for value type are i,f,s,b"
assert field_name in valid_fields , "invalid value for field name"
assert operator_name in dir(operator) , "operator %s does not exists. acceptable values are: lt, le, eq, ne, ge, gt" % operator_name
if val_type == "i":
value = int( value)
elif val_type == "f":
value = float(value)
elif val_type == "s":
value = str( value)
elif val_type == "b":
assert value in ['T', 'F'], 'Bool is either T or F'
value = value == 'T'
else:
print "unknown type %s. options are i (INT), f (FLOAT), s (STRING)" % (val_type)
sys.exit(1)
operator_name = getattr(operator, operator_name)
filter_cols[1] = operator_name
filter_cols[2] = value
filters.append(filter_cols)
filters = gen_filter(args, valid_fields)
oufile = infile
for field_name, operator_name, value, value_type in filters:
oufile += '_' + field_name + '_' + operator_name.__name__ + '_' + str(value)
for field_name, field_operator_name, field_operator, field_value in filters:
oufile += '_' + field_name + '_' + field_operator_name + '_' + str(field_value)
print "saving to %s" % oufile
data, names, seman, types, indexer, groups, ref_maps_from, query_maps_from = parse_file(infile)
data, headers, names, seman, types, indexer, groups, ref_maps_from, query_maps_from = parse_file(infile, valid_fields)
print "NAMES", names
print "TYPES", types
print "NAMES" , names
#print "HEADERS", "\n".join( headers )
print "TYPES" , types
#print "DATA" , data[1]
#print "INDEX", indexer.keys()[0], indexer[indexer.keys()[0]]
......@@ -342,10 +187,12 @@ def main(args):
reporter = None
if DO_REPORT:
print "SAVING REPORT"
print "CREATING REPORT:", oufile + ".report.tsv"
reporter = open(oufile + ".report.tsv", "w")
linecount = 0
for RefContigID in sorted(groups["RefContigID_QryContigID"]):
QryContigIDs = groups["RefContigID_QryContigID"][RefContigID]
......@@ -387,11 +234,13 @@ def main(args):
if isinstance(data[confidence_pos], dict):
pass
else:
data[confidence_pos] = KeyedTuple(data[confidence_pos], labels=names)._asdict()
max_confidence_chrom = data[confidence_pos]["RefContigID"]
#print "RefContigID %4d QryContigID %6d" % ( RefContigID, QryContigID )
for data_pos in dataPoses:
dataVal = data[data_pos]
......@@ -402,40 +251,52 @@ def main(args):
Alignment = dataVal["Alignment"]
alignment_count_queries, alignment_count_refs, alignment_count_refs_colapses, alignment_count_queries_colapses = process_alignment(Alignment)
dataVal["_meta_num_orientations" ] = num_orientations
dataVal["_meta_num_qry_matches" ] = num_qry_matches
dataVal["_meta_num_good_confidence_%d"%MIN_CONFIDENCE] = num_good_quals
dataVal["_meta_len_ref_match_no_gap" ] = ref_no_gap_len
dataVal["_meta_alignment_count_queries" ] = alignment_count_queries
dataVal["_meta_alignment_count_queries_colapses" ] = alignment_count_refs_colapses
dataVal["_meta_alignment_count_refs" ] = alignment_count_refs
dataVal["_meta_alignment_count_refs_colapses" ] = alignment_count_queries_colapses
dataVal["_meta_cigar_deletions" ] = cigar_deletions
dataVal["_meta_cigar_insertions" ] = cigar_insertions
dataVal["_meta_cigar_matches" ] = cigar_matches
dataVal["_meta_is_max_confidence_for_qry_chrom" ] = max_confidence_chrom == RefContigID
dataVal["_meta_len_ref_match_gapped" ] = ref_gap_len
dataVal["_meta_len_qry_match_no_gap" ] = qry_no_gap_len
dataVal["_meta_len_ref_match_no_gap" ] = ref_no_gap_len
dataVal["_meta_len_qry_match_gapped" ] = qry_gap_len
dataVal["_meta_proportion_sizes_no_gap" ] = (ref_no_gap_len * 1.0)/ qry_no_gap_len
dataVal["_meta_proportion_sizes_gapped" ] = (ref_gap_len * 1.0)/ qry_gap_len
dataVal["_meta_proportion_query_len_no_gap" ] = (qry_no_gap_len * 1.0)/ dataVal["QryLen"]
dataVal["_meta_proportion_query_len_gapped" ] = (qry_gap_len * 1.0)/ dataVal["QryLen"]
dataVal["_meta_len_qry_match_no_gap" ] = qry_no_gap_len
dataVal["_meta_max_confidence_for_qry" ] = max_confidence
dataVal["_meta_max_confidence_for_qry_chrom" ] = max_confidence_chrom
dataVal["_meta_is_max_confidence_for_qry_chrom" ] = max_confidence_chrom == RefContigID
dataVal["_meta_cigar_matches" ] = cigar_matches
dataVal["_meta_cigar_insertions" ] = cigar_insertions
dataVal["_meta_cigar_deletions" ] = cigar_deletions
dataVal["_meta_alignment_count_queries" ] = alignment_count_queries
dataVal["_meta_alignment_count_refs" ] = alignment_count_refs
dataVal["_meta_alignment_count_queries_colapses" ] = alignment_count_refs_colapses
dataVal["_meta_alignment_count_refs_colapses" ] = alignment_count_queries_colapses
dataVal["_meta_num_good_confidence" ] = num_good_quals
dataVal["_meta_num_orientations" ] = num_orientations
dataVal["_meta_num_qry_matches" ] = num_qry_matches
dataVal["_meta_proportion_query_len_gapped" ] = (qry_gap_len * 1.0)/ dataVal["QryLen"]
dataVal["_meta_proportion_query_len_no_gap" ] = (qry_no_gap_len * 1.0)/ dataVal["QryLen"]
dataVal["_meta_proportion_sizes_gapped" ] = (ref_gap_len * 1.0)/ qry_gap_len
dataVal["_meta_proportion_sizes_no_gap" ] = (ref_no_gap_len * 1.0)/ qry_no_gap_len
data[data_pos] = dataVal
filter_res = all([ x[1]( dataVal[x[0]], x[2] ) for x in filters])
filter_res = all([ field_operator( dataVal[field_name], field_value ) for field_name, field_operator_name, field_operator, field_value in filters])
if not filter_res:
continue
if DO_REPORT:
if linecount == 0:
reporter.write("\t".join(sorted(dataVal)) + "\n")
reporter.write("\n".join(headers[:-2]) + "\n\n")
reporter.write("# META_MIN_CONFIDENCE: %d\n\n" % MIN_CONFIDENCE)
reporter.write( "\n".join( [ "# %-39s: %s" % ( x, valid_fields['helps_t'][x] ) for x in valid_fields['names' ] ] ) + "\n\n")
reporter.write("#h " + "\t".join( [ "%-39s" % ( x ) for x in valid_fields['names' ] ] ) + "\n")
reporter.write("#f " + "\t".join( [ "%-39s" % ( valid_fields['types' ][x] ) for x in valid_fields['names' ] ] ) + "\n")
#print " ", " ".join( ["%s %s" % (x, str(dataVal[x])) for x in sorted(dataVal)] )
reporter.write("\t".join( [ str(dataVal[x]) for x in sorted(dataVal)]) + "\n")
reporter.write( "\t".join( [ str(dataVal[x]) for x in valid_fields['names' ] ] ) + "\n")
linecount += 1
print
......@@ -443,7 +304,7 @@ def main(args):
if DO_DELTA:
print "CREATING DELTA"
print "CREATING DELTA: ", oufile + ".delta"
with open(oufile + ".delta", "w") as fhd:
linecount = 0
#fhd.write("/home/assembly/nobackup/mummer/MUMmer3.23/1502/solanum_lycopersicum_heinz/SL2.40ch12.fa /home/assembly/nobackup/mummer/MUMmer3.23/1502/solanum_pennellii_scaffold/final.assembly.fasta\n")
......@@ -464,7 +325,7 @@ def main(args):
for pos_row_pos in pos_rows:
pos_row = data[pos_row_pos]
filter_res = all([ x[1]( pos_row[x[0]], x[2] ) for x in filters])
filter_res = all([ field_operator( pos_row[field_name], field_value ) for field_name, field_operator_name, field_operator, field_value in filters])
if not filter_res:
continue
......@@ -494,7 +355,7 @@ def main(args):
for qry_row_pos in qry_rows:
qry_row = data[qry_row_pos]
filter_res = all([ x[1]( qry_row[x[0]], x[2] ) for x in filters])
filter_res = all([ field_operator( qry_row[field_name], field_value ) for field_name, field_operator_name, field_operator, field_value in filters])
if not filter_res:
continue
......
import re
import operator
import argparse
import textwrap
from collections import defaultdict
from sqlalchemy.util import KeyedTuple
"""
#h XmapEntryID QryContigID RefContigID QryStartPos QryEndPos RefStartPos RefEndPos Orientation Confidence HitEnum QryLen RefLen LabelChannel Alignment
#f int int int float float float float string float string float float int string
1 141 1 528400.6 571697.5 10672 54237.5 + 6.65 4M2D2M 1439123.5 21805821 1 "(1,34)(2,34)(3,35)(4,36)(5,37)(6,38)(8,38)(9,39)"
"""
re_matches = re.compile("(\d+)M")
re_insertions = re.compile("(\d+)I")
re_deletions = re.compile("(\d+)D")
re_alignment = re.compile("\((\d+),(\d+)\)")
def col_parse_orientation(val):
assert(val in ("+", "-"))
return val
def col_parse_hit_enum(val):
#4M2D2M
assert(set([x for x in val]) <= set(['M', 'D', 'I', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']))
return val
def col_parse_alignment(val):
#"(1,34)(2,34)(3,35)(4,36)(5,37)(6,38)(8,38)(9,39)"
val = val.strip('"')
assert(set([x for x in val]) <= set(['(', ')', ',', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']))
return val
def col_parse_bool(val):
if val.lower() in ("t", "true" , "1" ):
return True
elif val.lower() in ("f", "false", "0"):
return False
else:
print "could not parse bool %s" % val
sys.exit(1)
def process_cigar(cigar):
"""
2M3D1M1D1M1D4M1I2M1D2M1D1M2I2D9M3I3M1D6M1D2M2D1M1D6M1D1M1D1M2D2M2D1M1I1D1M1D5M2D4M2D1M2D2M1D2M1D3M1D1M1D2M3I3D1M1D1M3D2M3D1M2I1D1M2D1M1D1M1I2D3M2I1M1D2M1D1M1D1M2I3D3M3D1M2D1M1D1M1D5M2D12M
"""
assert(set([x for x in cigar]) <= set(['M', 'D', 'I', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']))
cigar_matches = 0
cigar_insertions = 0
cigar_deletions = 0
i_matches = re_matches .finditer(cigar)
i_inserts = re_insertions.finditer(cigar)
i_deletes = re_deletions .finditer(cigar)
for i in i_matches:
n = i.group(1)
cigar_matches += int(n)
for i in i_inserts:
n = i.group(1)
cigar_insertions += int(n)
for i in i_deletes:
n = i.group(1)
cigar_deletions += int(n)
return cigar_matches, cigar_insertions, cigar_deletions
def process_alignment(alignment):
"""
Alignment (4862,48)(4863,48)(4864,47)(4865,46)(4866,45)(4867,44)(4870,43)(4873,42)(4874,41)(4875,40)(4877,40)(4878,39)(4879,38)(4880,37)(4883,36)(4884,36)(4885,35)(4886,34)(4887,33)(4888,33)(4889,32)(4890,30)(4891,30)(4892,29)(4893,28)(4894,28)(4899,27)(4900,26)(4901,25)(4902,24)(4903,23)(4904,22)(4906,21)(4907,21)(4908,20)(4910,19)(4911,18)(4912,17)(4913,16)(4915,15)(4917,14)(4918,13)(4919,12)(4920,11)(4922,10)(4923,9)(4925,8)(4927,7)(4930,6)(4931,5)(4932,3)(4933,2)(4934,1)
"""
assert(set([x for x in alignment]) <= set(['(', ')', ',', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']))
count_refs = defaultdict(int)
count_queries = defaultdict(int)
count_refs_colapses = 0
count_queries_colapses = 0
i_alignment = re_alignment.finditer(alignment)
for i in i_alignment:
c_r = int(i.group(1))
c_q = int(i.group(2))
count_refs [c_r] += 1
count_queries[c_q] += 1
count_refs_colapses = sum([count_refs[ x] for x in count_refs if count_refs[ x] > 1])
count_queries_colapses = sum([count_queries[x] for x in count_queries if count_queries[x] > 1])
return len(count_refs), len(count_queries), count_refs_colapses, count_queries_colapses
def gen_valid_fields(valid_fields):
valid_fields['names' ] = [None] * len(valid_fields['data'])
valid_fields['parsers'] = {}
valid_fields['types' ] = {}
valid_fields['poses' ] = {}
valid_fields['helps' ] = {}
valid_fields['helps_t'] = {}
for field_pos, field_data, in enumerate(valid_fields['data']):
field_name, field_type, field_parser, field_help = field_data
valid_fields['names' ][ field_pos ] = field_name
valid_fields['parsers'][ field_name ] = field_parser
valid_fields['types' ][ field_name ] = field_type
valid_fields['poses' ][ field_name ] = field_pos
valid_fields['helps' ][ field_name ] = ("\n" + (" "*53)).join(textwrap.wrap(field_help, 80))
valid_fields['helps_t'][ field_name ] = ("\n#" + (" "*42)).join(textwrap.wrap(field_help, 80))
return valid_fields
def gen_filter(args, valid_fields):
filters = []
if args.filter is not None:
for filter_data in args.filter:
filter_cols = filter_data.split(":")
if len(filter_cols) != 3:
print "filter has to have 3 parts <field>:<function>:<value>, %d found in %s" % ( len(filter_cols), filter_data )
sys.exit(0)
field_name, operator_name, value = filter_cols
assert field_name in valid_fields['names' ], "invalid value for field name"
assert operator_name in dir(operator) , "operator %s does not exists. acceptable values are: lt, le, eq, ne, ge, gt" % operator_name
value = valid_fields['parsers'][field_name]( value )
operator_val = getattr(operator, operator_name)
filter_res = [field_name, operator_name, operator_val, value]
filters.append(filter_res)
return filters
valid_fields_g = {
'data':
[
#colum name type parser order help
[ "XmapEntryID" , 'int' , int , 'A unique line number for the data lines in the XMAP file. Note: For 2-color, the XmapEntryID will begin with the number 2.' ],
[ "QryContigID" , 'int' , int , 'Map ID of query map (Contig ID from .cmap file for query)' ],
[ "RefContigID" , 'int' , int , 'Map ID of the reference map from the .cmap reference file (the .cmap file may contain multiple reference maps). Note: RefContigIDs must be integers, but they need not be sequential.' ],
[ "QryStartPos" , 'float' , float , 'Coordinates of the first aligned label on the query map (Start position of hit on query map)' ],
[ "QryEndPos" , 'float' , float , 'Coordinates of the last aligned label on the query map (Stop position of hit on query map)' ],
[ "RefStartPos" , 'float' , float , 'Coordinates of the first aligned label on the reference or anchor map' ],
[ "RefEndPos" , 'float' , float , 'Coordinates of the last aligned label on the reference or anchor map' ],
[ "Orientation" , 'string', col_parse_orientation, 'The relative orientation of the query map relative to the reference: forward (+) or reverse (-). The convention is that the reference is always positive orientation, so if the query aligns in reverse, it is shown as having negative (-) orientation. Note: For 2-color, the orientation will be the same.' ],
[ "Confidence" , 'float' , float , 'Statistical Confidence of result: Negative Log10 of p-value of alignment (without Bonferroni Correction for multiple experiments). Note: For 2-color, the confidence number is the combined confidence of the alignment for both colors.' ],
[ "HitEnum" , 'string', col_parse_hit_enum , 'Pseudo-CIGAR string representing matches (M), insertions (I), or deletions (D) of label sites with respect to the reference or anchor map. Count begins at the leftmost anchor label of that color. Note: When 2 or more anchor sites resolve into a single query site, only the rightmost anchor site is shown matched with the query site and the leftmost associated anchor sites are shown as deletions.' ],
[ "QryLen" , 'float' , float , 'Length of query map from _q.cmap.' ],
[ "RefLen" , 'float' , float , 'Length of reference map from _r.cmap.' ],
[ "LabelChannel" , 'int' , int , 'Color channel of alignment from cmap files. For 1-color data, LabelChannel is 1. For 2-color data: Using -usecolor N, the LabelChannel is N (N = 1 or 2), and there is only one XMAP entry per alignment for the color channel specified by N. Without -usecolor N, LabelChannel is 1 or 2. In this case, there are two XMAP entries (two lines), one for each color channel.' ],
[ "Alignment" , 'string', col_parse_alignment , 'Indices of the aligned site ID pairs. (When the query orientation is reversed ("-"), the query IDs are in descending order.) Count begins at the leftmost anchor label of that color. Note: When two sites in the reference align with the same site in the query, it is an indication that the two sites in the reference failed to resolve. Alignment provides a view of aligned pairs which would normally be ignored by HitEnum (CIGAR string).' ],
[ "_meta_alignment_count_queries" , 'int' , int , 'Number of query labels in alignment' ],
[ "_meta_alignment_count_queries_colapses", 'int' , int , 'Number of query label collapses in alignment. A collapse happens when a label matches more than once a reference label' ],
[ "_meta_alignment_count_refs" , 'int' , int , 'Number of reference labels in alignment' ],
[ "_meta_alignment_count_refs_colapses" , 'int' , int , 'Number of reference label collapses in alignment. A collapse happens when a label matches more than once a query label' ],
[ "_meta_cigar_deletions" , 'int' , int , 'Number of deleted labels in CIGAR string' ],
[ "_meta_cigar_insertions" , 'int' , int , 'Number of inserted labels in CIGAR string' ],
[ "_meta_cigar_matches" , 'int' , int , 'Number of match labels in CIGAR string' ],
[ "_meta_is_max_confidence_for_qry_chrom" , 'bool' , col_parse_bool , 'Whether the current RefContigID is the highest confidence match for this QryContigID' ],