Commit cb154428 authored by sauloal's avatar sauloal
Browse files

in directive

parent df5135ad
......@@ -3,5 +3,10 @@
*.tsv
*.pyc
*.gff
*.gff3
*.agp
*.tar.gz
.~*
out*
data/
info/
......@@ -8,9 +8,9 @@ from om_shared import *
def parse_args(args):
parser = argparse.ArgumentParser(description="Bionano Genomics augmented MAP filter")
parser.add_argument( 'infile', help="AUGMENTED file" )
parser.add_argument( '-l' , '--list' , action='store_true' , help="List Values" )
parser.add_argument( '-f' , '--filter' , action='append' , help="Filters [Field:Function(lt, le, eq, ne, ge, gt):Value]" )
parser.add_argument( 'infile', help="AUGMENTED file" )
parser.add_argument( '-l' , '--list' , action='store_true' , help="List Fields and Operators" )
parser.add_argument( '-f' , '--filter' , action='append' , help="Filters [Field:Function(%s):Value]" % ", ".join(sorted(valid_operators.keys())))
args = parser.parse_args(args=args)
......@@ -23,6 +23,8 @@ def main(args):
if args.list:
print "LIST OF FIELDS"
print "", "\n ".join( ["%-41s: %-6s : %s"% (valid_field_name, valid_fields['types' ][valid_field_name], valid_fields['helps' ][valid_field_name]) for valid_field_name in valid_fields['names' ]] )
print "LIST OF OPERATORS"
print "", "\n ".join( [ "%-9s: %s" % (x,valid_operators[x][1]) for x in sorted(valid_operators) ] )
sys.exit(0)
if not os.path.exists(infile):
......@@ -37,8 +39,8 @@ def main(args):
filters = gen_filter(args.filter, valid_fields)
oufile = infile
for field_name, field_operator_name, field_operator, field_value in filters:
oufile += '_' + field_name + '_' + field_operator_name + '_' + str(field_value)
for field_name, field_operator_name, field_operator, field_value_str, field_value in filters:
oufile += ('_' + '_'.join( [ field_name, field_operator_name, field_value_str ] )).replace(',', '_')
print "saving to %s" % oufile
......@@ -146,7 +148,12 @@ def main(args):
data[data_pos] = dataVal
filter_res = all([ field_operator( dataVal[field_name], field_value ) for field_name, field_operator_name, field_operator, field_value in filters])
#print filters
#for field_name, field_operator_name, field_operator, field_value_str, field_value in filters:
# print "filter", field_name, field_operator_name, field_operator, field_value_str, field_value, "val", dataVal[field_name]
# print " ", field_operator( dataVal[field_name], field_value )
filter_res = all([ field_operator( dataVal[field_name], field_value ) for field_name, field_operator_name, field_operator, field_value_str, field_value in filters])
if not filter_res:
continue
......@@ -154,9 +161,8 @@ def main(args):
reporter.write("\n".join(headers[:-2]) + "\n#\n")
reporter.write("# FILTERS:\n")
for field_name, field_operator_name, field_operator, field_value in filters:
oufile += '_' + field_name + '_' + field_operator_name + '_' + str(field_value)
reporter.write( "# FILTER : %-39s: %3s : %s\n" % (field_name, field_operator_name, str(field_value) ) )
for field_name, field_operator_name, field_operator, field_value_str, field_value in filters:
reporter.write( "# FILTER : %-39s: %3s : %s\n" % ( field_name, field_operator_name, field_value_str ) )
reporter.write( "\n\n" )
reporter.write("#h " + "\t".join( [ "%-39s" % ( x ) for x in valid_fields['names' ] ] ) + "\n")
......
......@@ -134,6 +134,35 @@ def gen_valid_fields(valid_fields):
return valid_fields
def parser_in(orig_parser, value, sep=","):
return set([ orig_parser(v) for v in value.split(sep) ])
def function_in(val, qry):
return val in qry
valid_operators = {
#op name op func override parser help
'eq' : [ operator.eq , None, "value <Equal> to filter" ],
'ge' : [ operator.ge , None, "value <Greater than or Equal> to filter" ],
'gt' : [ operator.gt , None, "value <Greater than> filter" ],
#'is_not' : [ operator.is_not , None, "value <Is not> filter [class]" ],
#'is' : [ operator.is_ , None, "value <Is> filter [class]" ],
'le' : [ operator.le , None, "value <Less than or Equal> to filter" ],
'lt' : [ operator.lt , None, "value <Less than> filter" ],
'ne' : [ operator.ne , None, "value <Not equal> to filter" ],
#'truth' : [ operator.truth , None, "value is <Truth>" ],
'contains': [ operator.contains, parser_in, "value <Contains> filter" ],
'in' : [ function_in , parser_in, "value <In> filter [comman separated]" ]
}
#for operator_name in sorted(dir(operator)):
# if operator_name[:2] == "__":
# continue
# if operator_name[-1] == "_":
# continue
# print "adding operator name %s" % operator_name
# valid_operators[operator_name] = getattr(operator, operator_name)
def gen_filter(filter_datas, valid_fields):
filters = []
......@@ -145,13 +174,17 @@ def gen_filter(filter_datas, valid_fields):
print "filter has to have 3 parts <field>:<function>:<value>, %d found in %s" % ( len(filter_cols), filter_data )
sys.exit(0)
field_name, operator_name, value = filter_cols
field_name, operator_name, value_str = filter_cols
assert field_name in valid_fields['names' ], "invalid value for field name"
assert operator_name in dir(operator) , "operator %s does not exists. acceptable values are: lt, le, eq, ne, ge, gt" % operator_name
assert operator_name in valid_operators , "operator %s does not exists. acceptable values are: lt, le, eq, ne, ge, gt" % operator_name
if valid_operators[operator_name][1] is None:
value = valid_fields['parsers'][field_name]( value_str )
else:
value = valid_operators[operator_name][1]( valid_fields['parsers'][field_name], value_str )
value = valid_fields['parsers'][field_name]( value )
operator_val = getattr(operator, operator_name)
filter_res = [field_name, operator_name, operator_val, value]
operator_val = valid_operators[operator_name][0]
filter_res = [field_name, operator_name, operator_val, value_str, value]
filters.append(filter_res)
......
set -xeu
infile=S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt
infile=data/S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt
augmented=$infile.augmented.tsv
filtered=${augmented}_Confidence_ge_10.0__meta_num_orientations_gt_1__meta_is_max_confidence_for_qry_chrom_eq_True.report.tsv
delta=$filtered.delta
gff=$filtered.gff
cols_to_escape=info/gff_cols_to_escape.txt
chromosome_names=S_lycopersicum_chromosomes.2.50.chromosome_names.txt
#rm $augmented || true
if [[ ! -f "$augmented" ]]; then
......@@ -27,8 +29,8 @@ fi
rm $gff || true
if [[ ! -f "$gff" ]]; then
./om_to_gff.py --names-from-file S_lycopersicum_chromosomes.2.50.chromosome_names.txt --exclude-cols-from-file gff_cols_to_escape.txt $filtered
./om_to_gff.py --names-from-file $chromosome_names --exclude-cols-from-file $cols_to_escape $filtered
fi
./om_to_gff.py --names-from-file S_lycopersicum_chromosomes.2.50.chromosome_names.txt --exclude-cols-from-file gff_cols_to_escape.txt S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv
./om_to_gff.py --names-from-file S_lycopersicum_chromosomes.2.50.chromosome_names.txt --exclude-cols-from-file gff_cols_to_escape.txt S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv_Confidence_ge_10.0.report.tsv
\ No newline at end of file
./om_to_gff.py --names-from-file $chromosome_names --exclude-cols-from-file $cols_to_escape S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv
./om_to_gff.py --names-from-file $chromosome_names --exclude-cols-from-file $cols_to_escape S_lycopersicum_chromosomes.2.50.BspQI_to_EXP_REFINEFINAL1_xmap.txt.augmented.tsv_Confidence_ge_10.0.report.tsv
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment