From 6df8e61f5066cf3e6fef8c4d83fc538bae6525f9 Mon Sep 17 00:00:00 2001 From: Saulo Aflitos <sauloalves.aflitos@wur.nl> Date: Fri, 29 Apr 2016 00:17:52 +0200 Subject: [PATCH] mods --- newest/vcfconcat.py | 318 ++++++++++++++----------- newest/vcffiltergff.py | 57 +++-- newest/vcfmerger.py | 299 ++++++++++------------- scripts/median_polish/median_polish.py | 168 ++++++++----- 4 files changed, 456 insertions(+), 386 deletions(-) diff --git a/newest/vcfconcat.py b/newest/vcfconcat.py index 011387a..82710b6 100755 --- a/newest/vcfconcat.py +++ b/newest/vcfconcat.py @@ -3,44 +3,54 @@ Concatenates all SNPs from a VCF file in either FASTA or aln (clustal) format """ import os, sys -from pprint import pprint as pp import copy import argparse import multiprocessing +from pprint import pprint as pp from collections import defaultdict, Counter sys.path.insert(0, '.') import vcfmerger import editdist - - +from treemanager import fixsppname #GZ=SL2.40ch06g50000_000100001_000150000.vcf.gz.raw.vcf.gz; FA=$GZ.SL2.40ch06.fasta; ../vcfconcat.py -f -RIL -Rg -Rd -i $GZ; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree #FA=SL2.40ch06g50000_000100001_000150000.vcf.gz.SL2.40ch06.fasta; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree -def main(): - parser = argparse.ArgumentParser(description='Concatenate SNPs as a single sequence for each species.') - parser.add_argument('-c', '--chrom', '--chromosome', dest='chromosome', default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]') - parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip') - parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]') - parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]') - parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]') - parser.add_argument('-f', '--fasta' , dest='fasta' , action='store_true' , help='Output in fasta format [default: clustal alignment .aln format]') - parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]') - parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]') - parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]') - parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]') - parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]') - parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]') +def main(args): + methods = { + 'median' : grouper_median, + 'linkage': grouper_linkage + } - parser.add_argument('-i', '--input' , dest='iinput' , default=None , type=str , help='Input file') - parser.add_argument('input' , default=None , action='store' , nargs='?', metavar='input file', type=str , help='Input file') - options = parser.parse_args() + dflmethod = methods.keys()[0] + parser = argparse.ArgumentParser(description='Concatenate SNPs as a single sequence for each species.') + parser.add_argument('-c', '--chrom', '--chromosome', dest='chromosome' , default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]') + parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip') + parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]') + parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]') + parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]') + parser.add_argument('-f', '--fasta' , dest='fasta' , action='store_true' , help='Output in fasta format [default: clustal alignment .aln format]') + parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]') + parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]') + parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]') + parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]') + parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]') + parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]') + parser.add_argument('-M' ,'--RIL-method' , dest='groupMethod', default=dflmethod, action='store' , nargs='?', choices=methods.keys(), type=str , help='Clustering method for RIL selection of good and bad sequences [' + ','.join(methods.keys()) + ']') + + parser.add_argument('-i', '--input' , dest='input' , default=None , nargs='?', type=str , help='Input file') + #parser.add_argument('input' , default=None , action='store' , nargs='?', metavar='input file', type=str , help='Input file') + + options = parser.parse_args(args) + + print options + indexFile = None parallel = False @@ -67,20 +77,37 @@ def main(): - config['infile' ] = getOptionInFile(parser, options) - config['infile' ] = vcfmerger.checkfile(config['infile']) + config['infile' ] = options.input + config['ignore' ] = options.ignore + config['inchr' ] = options.chromosome + config['inend' ] = options.end + config['instart' ] = options.start + config['noref' ] = options.noref + config['threads' ] = options.threads + config['RIL' ] = options.RIL + config['RILmads' ] = options.RILmads + config['RILminsim' ] = options.RILminsim + config['RILgreedy' ] = options.RILgreedy + config['RILdelete' ] = options.RILdelete + config['groupMethod'] = options.groupMethod + + + + if config['groupMethod'] not in methods: + print "%s not a valid method" % config['groupMethod'] + sys.exit(1) + + config[ 'grouper' ] = methods[ config['groupMethod'] ] - config['ignore' ] = options.ignore - config['inchr' ] = options.chromosome - config['inend' ] = options.end - config['instart' ] = options.start - config['noref' ] = options.noref - config['threads' ] = options.threads - config['RIL' ] = options.RIL - config['RILmads' ] = options.RILmads - config['RILminsim'] = options.RILminsim - config['RILgreedy'] = options.RILgreedy - config['RILdelete'] = options.RILdelete + + + if options.input is None: + print "no input file defined" + sys.exit(1) + + if not os.path.exists(options.input): + print "input file %s does not exists" % options.input + sys.exit(1) if options.fasta: @@ -123,6 +150,8 @@ def main(): else: readData(config) + return config + def getOptionInFile(pars, opts): """ @@ -196,51 +225,50 @@ def readSources(config): """ print "reading sources" print "opening", - infhd = vcfmerger.openvcffile(config['infile'], 'r') - print "ok" - - print "reading" - - header = "" - - sources = {} - names = None - for line in infhd: - line = line.strip() - if len(line) == 0: continue - if line[0] == '#': - header += line + "\n" - if line.startswith('##sources='): - names = line[10:].split(';') - for name in names: - sources[name] = [] - #pp(sources) - - elif line.startswith('##numsources='): - numsources = int(line[13:]) - if numsources != len(sources): - print "error parsing sources" - sys.exit(1) - else: - print "num sources:",numsources + with vcfmerger.openvcffile(config['infile'], 'r') as infhd: + print "ok" + + print "reading" + + header = "" + + sources = {} + names = None + for line in infhd: + line = line.strip() + if len(line) == 0: continue + if line[0] == '#': + header += line + "\n" + if line.startswith('##sources='): + names = line[10:].split(';') + for name in names: + sources[name] = [] + #pp(sources) + + elif line.startswith('##numsources='): + numsources = int(line[13:]) + if numsources != len(sources): + print "error parsing sources", numsources, len(sources),sources, len(names), names + sys.exit(1) + else: + print "num sources:",numsources - continue - break + continue + break - sppmaxlength = 0 - for spp in sorted(sources): - sppname = fixsppname( spp ) - if len(sppname) > sppmaxlength: sppmaxlength = len(sppname) + sppmaxlength = 0 + for spp in sorted(sources): + sppname = fixsppname( spp ) + if len(sppname) > sppmaxlength: sppmaxlength = len(sppname) - sppmaxlength += 2 - sppmaxlength = "%-"+str(sppmaxlength)+"s" + sppmaxlength += 2 + sppmaxlength = "%-"+str(sppmaxlength)+"s" - infhd.close() - config['sppmaxlength'] = sppmaxlength - config['sources' ] = sources - config['names' ] = names - config['header' ] = header + config['sppmaxlength'] = sppmaxlength + config['sources' ] = sources + config['names' ] = names + config['header' ] = header def readData(config): @@ -461,13 +489,23 @@ def parse(config, refs, chro): printfilecoords(config, chro) - if config['oufhd'] is not None: + if config['oufhd'] is not None: config['oufhd'].write('\n\n') config['oufhd'].close() config['oufhd'] = None def RIL( config, refsStrs, sourcesStrs, emptyStr ): + high, low, valids, invalids = config[ 'grouper' ]( config, refsStrs, sourcesStrs, emptyStr ) + + #valids, invalids = filterSimilarity( valids, invalids, config, refsStrs, sourcesStrs, emptyStr ) + + refsStrs, sourcesStrs, emptyStr = filter( high, low, valids, invalids, config, refsStrs, sourcesStrs, emptyStr ) + + return ( refsStrs, sourcesStrs, emptyStr ) + + +def grouper_median( config, refsStrs, sourcesStrs, emptyStr ): distMatrix = {} mindist = sys.maxint @@ -514,18 +552,18 @@ def RIL( config, refsStrs, sourcesStrs, emptyStr ): good_low = [ k for k in distMatrix if distMatrix[k] <= threshold_low ] good_hig = [ k for k in distMatrix if distMatrix[k] >= threshold_hig ] - bad = [ k for k in distMatrix if distMatrix[k] > threshold_low and distMatrix[k] < threshold_hig ] - good = good_low + good_hig + invalids = [ k for k in distMatrix if distMatrix[k] > threshold_low and distMatrix[k] < threshold_hig ] + valids = good_low + good_hig good_low.sort() good_hig.sort() - bad.sort() - good.sort() + invalids.sort() + valids.sort() print 'GOOD LOW ', len(good_low), good_low print 'GOOD HIGH ', len(good_hig), good_hig - print 'BAD ', len(bad ), bad - print 'GOOD ', len(good ), good + print 'INVALIDS ', len(invalids), invalids + print 'VALIDS ', len(valids ), valids #print 'DISTS ', len(dists), " ".join(["%3d" % x for x in dists]) # @@ -563,11 +601,11 @@ def RIL( config, refsStrs, sourcesStrs, emptyStr ): #print 'GOOD HIGH ', len(good_hig), good_hig #print 'BAD ', len(bad ), bad - return refsStrs, sourcesStrs, emptyStr + return ( good_hig, good_low, valids, invalids ) -def RILo( config, refsStrs, sourcesStrs, emptyStr ): +def grouper_linkage( config, refsStrs, sourcesStrs, emptyStr ): #entropy_cutoff = config[ 'RILentro' ] # ##print "len names", len(names) @@ -596,8 +634,6 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): tolerance = config[ 'RILmads' ] - greedy = config[ 'RILgreedy' ] - minsim = config[ 'RILminsim' ] distMatrix = {} @@ -695,7 +731,7 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): print 'MAD min before', tolvalsminMAD print 'MAD max before', tolvalsmaxMAD - print 'tolerance', tolerance + print 'tolerance ', tolerance if tolerance > 0: tolvalsminMAD = tolvalsminMAD * tolerance tolvalsmaxMAD = tolvalsmaxMAD * tolerance @@ -757,8 +793,8 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): simonly = similar.difference( dissimilar ) disonly = dissimilar.difference( similar ) - valids = set().union(similar, dissimilar) - invalids = set(sourceskeys).difference(valids) + valids = set().union(similar, dissimilar) + invalids = set(sourceskeys).difference(valids) print "Similar ", len(sorted(list( similar ))), sorted(list( similar )) print "Disimilar", len(sorted(list( dissimilar ))), sorted(list( dissimilar )) @@ -780,9 +816,59 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): #Valids 45 ['609', '610', '612', '614', '615', '618', '623', '625', '626', '634', '644', '649', '651', '653', '654', '659', '660', '665', '666', '667', '668', '670', '674', '676', '679', '684', '685', '688', '691', '692', '693', '694', '696', '697', '702', '705', '706', '707', '710', '711', 'Slycopersicum MoneyMaker', 'Spimpinellifolium LA1578', 'Spimpinellifolium LA1584', 'Spimpinellifolium LYC2740', 'Spimpinellifolium LYC2798'] #Invalids 20 ['601', '603', '608', '611', '619', '622', '624', '630', '631', '639', '643', '646', '648', '656', '658', '669', '675', '678', '682', '701'] + return ( sorted(list(dissimilar)), sorted(list(similar)), sorted(list( valids )), sorted(list( invalids)) ) +def filterSimilarity( high, low, valids, invalids, config, refsStrs, sourcesStrs, emptyStr ): + minsim = config[ 'RILminsim' ] + + # CALCULATE PERCENTAGE OF SIMILARITY TO REFERENCE + refsims = {} + + for spp in valids: + sppseq = sourcesStrs[ spp ] + refsim = 0 + + for nucPos, nucSeq in enumerate( sppseq ): + refSeq = refsStrs[ nucPos ] + + if refSeq == nucSeq: + refsim += 1 + + refsims[ spp ] = ( float( refsim ) / float( len( sppseq ) ) ) + + + #print 'REF', refsStrs + #print 'ALT', altsStrs + + + # REPLACE SEQUENCES WITH EITHER REFERENCE OR ALTERNATIVE + modsCount = defaultdict( int ) + for spp in sourcesStrs: + if spp in refsims: + if refsims[ spp ] > minsim: + modsCount[ 'REF' ] += 1 + + else: + modsCount[ 'ALT' ] += 1 + invalids[ spp ] = valids[ spp ] + del valids[ spp ] + + + #for sppPos, sppNucs in enumerate( nucs ): + #print sppNucs, list(sppNucs), dict(sppNucs), sppNucs.values() + + for key in modsCount: + print "count", key, modsCount[ key ] + print "count NNN", len( invalids ) + + return ( valids, invalids, config, refsStrs, sourcesStrs, emptyStr ) + + +def filter( high, low, valids, invalids, config, refsStrs, sourcesStrs, emptyStr ): + greedy = config[ 'RILgreedy' ] + # IF IN GREEDY MODE, FIX NUCLEOTIDES FROM VALIDs if greedy: # COUNT NUCLEOTIDES IN EACH POSITION @@ -792,9 +878,7 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): for nucPos, nuc in enumerate( sppseq ): if len( nucs ) <= nucPos: nucs.append( Counter() ) - nucs[ nucPos ].update( [ nuc ]) - - + nucs[ nucPos ].update( [ nuc ] ) # GENERATE ALTERNATIVE STRING altsStrs = "" @@ -812,7 +896,7 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): else: #print 'pos %3d data %-10s ref %s' % ( nucPos, str(nucData), nucRef ), nucData.remove( nucRef ) - nucAlt = nucData[0] + nucAlt = nucData[0] #print 'alt', nucAlt altsStrs += nucAlt refsStrsN += nucRef @@ -821,47 +905,19 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): refsStrs = refsStrsN - # CALCULATE PERCENTAGE OF SIMILARITY TO REFERENCE - refsims = {} - - for spp in valids: - sppseq = sourcesStrs[ spp ] - refsim = 0 - - for nucPos, nucSeq in enumerate( sppseq ): - refSeq = refsStrs[ nucPos ] - - if refSeq == nucSeq: - refsim += 1 - refsims[ spp ] = ( float( refsim ) / float( len( sppseq ) ) ) - - #print 'REF', refsStrs - #print 'ALT', altsStrs - - - - # REPLACE SEQUENCES WITH EITHER REFERENCE OR ALTERNATIVE - modsCount = defaultdict( int ) for spp in sourcesStrs: - if spp in refsims: - if refsims[ spp ] > minsim: - #print " fixing spp %-30s to REF" % ( spp ) - sourcesStrs[ spp ] = refsStrs - modsCount[ 'REF' ] += 1 + if spp in low: + sourcesStrs[ spp ] = refsStrs + #print " fixing spp %-30s to REF" % ( spp ) + #print " fixing spp %-30s to ALT" % ( spp ) + #sourcesStrs[ spp ] = refsStrs + #sourcesStrs[ spp ] = altsStrs + elif spp in high: + sourcesStrs[ spp ] = altsStrs - else: - #print " fixing spp %-30s to ALT" % ( spp ) - sourcesStrs[ spp ] = altsStrs - modsCount[ 'ALT' ] += 1 - #for sppPos, sppNucs in enumerate( nucs ): - #print sppNucs, list(sppNucs), dict(sppNucs), sppNucs.values() - - for key in modsCount: - print "count", key, modsCount[ key ] - print "count NNN", len( invalids ) if config['RILdelete']: for spp in sourcesStrs.keys(): @@ -890,12 +946,6 @@ def RILo( config, refsStrs, sourcesStrs, emptyStr ): -def fixsppname(spp): - """ - Fix species name to be tree printable - """ - return spp.replace(' ', '_').replace('(', '').replace(')', '').replace("'", '') - def printfilename(config, chro, coords=False): inchr = config['inchr' ] @@ -991,4 +1041,4 @@ def printfilecoords(config, chro): if __name__ == '__main__': - main() + main(sys.argv[1:]) diff --git a/newest/vcffiltergff.py b/newest/vcffiltergff.py index 99d307a..6ab51b1 100755 --- a/newest/vcffiltergff.py +++ b/newest/vcffiltergff.py @@ -11,12 +11,12 @@ import argparse #import multiprocessing sys.path.insert(0, '.') -import vcfmerger +import filemanager import vcfconcat -def main(): +def main(args): parser = argparse.ArgumentParser(description='Filters VCF file according to parameters') parser.add_argument('-c', '--chrom', '--chromosome', dest='chromosome', default=None, action='store' , nargs='?', type=str, help='Chromosome to filter [all]') parser.add_argument('-g', '--gff' , dest='gff' , default=None, action='store' , nargs='?', type=str, help='Gff Coordinate file') @@ -31,11 +31,11 @@ def main(): parser.add_argument('-f', '--folder' , dest='outfolder' , default=None, action='store' , nargs='?', type=str, help='Overrride default output folder name') parser.add_argument('-o', '--out', '--output' , dest='outfile' , default=None, action='store' , nargs='?', type=str, help='Overrride default output name') - parser.add_argument('-i', '--input' , dest='iinput' , default=None, type=str, help='Input file') - parser.add_argument('input' , default=None, action='store' , nargs='?', metavar='input file', type=str, help='Input file') + parser.add_argument('-i', '--input' , dest='input' , default=None , nargs='?', type=str, help='Input file') + #parser.add_argument('input' , default=None, action='store' , nargs='?', metavar='input file', type=str, help='Input file') - options = parser.parse_args() + options = parser.parse_args(args) config = { 'infile' : None, @@ -56,10 +56,16 @@ def main(): print options - config['infile' ] = vcfconcat.getOptionInFile(parser, options) - config['infile' ] = vcfmerger.checkfile(config['infile']) + config['infile' ] = options.input config['outfile' ] = options.outfile + if options.input is None: + print "no input file defined" + sys.exit(1) + + if not os.path.exists(options.input): + print "input file %s does not exists" % options.input + sys.exit(1) config['outfolder'] = options.outfolder @@ -104,9 +110,9 @@ def main(): """ Creates index file for VCF file """ - vcfmerger.makeIndexFile( indexFile, config['infile'] ) + filemanager.makeIndexFile( indexFile, config['infile'] ) - config['idx'] = vcfmerger.readIndex(indexFile) + config['idx'] = filemanager.readIndex(indexFile) if config['ingff'] is not None: """ @@ -130,6 +136,11 @@ def main(): """ readData(config, verbose=verbose) + with open(config['outfolder']+'.ok', 'w') as fhd: + fhd.write(str(config)) + + return config + def readData(config, verbose=False): """ @@ -141,7 +152,7 @@ def readData(config, verbose=False): """ Creates a VCFMERGER instance to read the VCF file """ - config['infhd'] = vcfmerger.openvcffile(config['infile'], 'r') + config['infhd'] = filemanager.openvcffile(config['infile'], 'r') if config['outfile'] is None: @@ -169,7 +180,7 @@ def readData(config, verbose=False): """ Creates a VCFMERGER to save the output """ - config['oufhd'] = vcfmerger.openvcffile(config['outfn'], 'w') + config['oufhd'] = filemanager.openvcffile(config['outfn'], 'w') config['oufhd'].write( config['header'] ) runName = "all" @@ -223,6 +234,7 @@ def readData(config, verbose=False): finalChro = False numSnps = {} valids = {} + validst = 0 for line in config['infhd']: """ @@ -251,6 +263,7 @@ def readData(config, verbose=False): #src = cols[3] #dst = cols[4] + #print cols if lastChro != chro: print chro @@ -345,9 +358,10 @@ def readData(config, verbose=False): if chro not in valids: valids[chro] = 0 valids[chro] += 1 + validst += 1 if valids[chro] % 1000 == 0: - print "reading data :: %s :: %s %12d SNPs valid" % (runName, chro, valids[chro]) + print "reading data :: %s :: %s %12d SNPs valid %12d SNPs valid in total" % (runName, chro, valids[chro], validst) lastPosVal = posi @@ -356,14 +370,16 @@ def readData(config, verbose=False): If separated in different files """ + #print "getting last register" last_register = gffreader.get_last_register() - last_register_key = ( last_register['lastChrom'], last_register['lastStart'], last_register['lastEnd'], last_register['lastName'] ) + #print last_register + last_register_key = ( last_register['lastChrom'], last_register['lastStart'], last_register['lastEnd'], last_register['lastName'] ) - if last_register['prevStart'] < 0: + if last_register['lastStart'] < 0: """ If first register (no previous), skip """ - #print "continuing" + print "If first register (no previous), skip. continuing" continue if config['knife_last_register_key'] != last_register_key: @@ -408,10 +424,11 @@ def readData(config, verbose=False): knife_out_fn += ".vcf.gz" print " saving to knifed %s" % knife_out_fn + #sys.exit(1) config['knife_last_register_key'] = last_register_key config['knife_out_fn'] = knife_out_fn - config['knife_out_fhd'] = vcfmerger.openvcffile(knife_out_fn, 'w') + config['knife_out_fhd'] = filemanager.openvcffile(knife_out_fn, 'w') knife_extra = [ "##KNIFE-chromosome=%s" % last_register_key[0] , @@ -478,9 +495,9 @@ class gffReader(object): gffFileIdx = gffFile + ".idx" if not os.path.exists(gffFileIdx): - vcfmerger.makeIndexFile(gffFileIdx, gffFile) + filemanager.makeIndexFile(gffFileIdx, gffFile) - self.index = vcfmerger.readIndex(gffFileIdx) + self.index = filemanager.readIndex(gffFileIdx) self.lastChrom = None self.lastName = "-" self.lastStart = None @@ -541,7 +558,7 @@ class gffReader(object): """ Open GFF file """ - self.fhd = vcfmerger.openfile(self.infile, 'r') + self.fhd = filemanager.openfile(self.infile, 'r') def readRegister(self): """ @@ -982,4 +999,4 @@ class fasta(object): return self.index[chro]['len'] if __name__ == '__main__': - main() + main(sys.argv[1:]) diff --git a/newest/vcfmerger.py b/newest/vcfmerger.py index 79ee15f..df18ab0 100755 --- a/newest/vcfmerger.py +++ b/newest/vcfmerger.py @@ -11,6 +11,9 @@ import math import copy from pprint import pprint as pp +sys.path.insert(0, '.') +from filemanager import getFh,openvcffile,openfile,checkfile,makeIndexFile,readIndex + #try: # sys.path.insert(0, 'aux/pypy-2.0/pypy-2.0-beta2/pypy-pypy-4b60269153b5/') # from rpython.rlib.jit import JitDriver, purefunction @@ -132,131 +135,6 @@ def getBits(val): return res -def checkfile(infile): - """ - Checks whether file exists - """ - if infile is None: - print "no input file given" - sys.exit(1) - - if isinstance(infile, list): - if len(infile) > 1: - print "more than one file given" - print infile - sys.exit(1) - - infile = infile[0] - - if not os.path.exists(infile): - print "input file %s does not exists" % infile - sys.exit(1) - - return infile - - -def openfile(infile, method, compresslevel=1): - """ - Open file handling if compressed or not - """ - fhd = None - - if infile.endswith('.gz'): - fhd = gzip.open(infile, method+'b', compresslevel) - - else: - fhd = open(infile, method) - - return fhd - - -def openvcffile(infile, method, **kwargs): - """ - Open vcf file checking for extension and handling compression - """ - fhd = openfile(infile, method, **kwargs) - if infile.endswith('.vcf.gz'): - pass - - elif infile.endswith('.vcf'): - pass - - else: - print "unknown file type" - sys.exit(1) - return fhd - - -def getFh(infile): - """ - Returns file handler. Handles compression given the .gz extension - """ - ifh = None - if infile.endswith('gz'): - ifh = gzip.open(infile, 'rb') - else: - ifh = open(infile, 'r') - return ifh - - -def makeIndexFile( indexFile, infile ): - """ - Indexes the start position of each chromosome and creates a index file - """ - print "creating index" - chroms = {} - - ifh = getFh(infile) - - for line in iter(ifh.readline, ''): - if line[0] == "#": - continue - - line = line.strip() - if len(line) == 0: continue - - chro = line.split("\t")[0] - - if chro not in chroms: - beginPos = ifh.tell() - len(line) - chroms[chro] = beginPos - print "creating index :: chrom %s curr pos %15d begin pos %15d" % (chro, ifh.tell(), beginPos) - - for chro in sorted(chroms): - pos = chroms[chro] - ifh.seek(pos) - print ifh.readline(), - - ifh.close() - - with open(indexFile, 'w') as ofh: - for chrom in sorted(chroms): - ofh.write( "%s\t%d\n" % ( chrom, chroms[chrom] ) ) - - -def readIndex(indexFile): - """ - Reads VCF index file - """ - print "reading index",indexFile - chroms = {} - with open(indexFile, 'r') as ifh: - for line in ifh: - line = line.strip() - if len(line) == 0: continue - print "INDEX LINE", line, - chrom, pos = line.strip().split("\t") - pos = int(pos) - if pos > 0: - pos -= 1 - chroms[chrom] = pos - print "reading index :: chrom %s pos %15d" % ( chrom, pos ) - return chroms - - - - - class vcfResult(object): """ @@ -269,6 +147,11 @@ class vcfResult(object): excludeSingleton = None noncarefiles = None simpliStats = None + prints = 0 + printsReal = 0 + printsRealLast = 0 + print_every = 1000 + linelen = 100 def __init__(self, simplify=SIMP_NO_SIMPLIFICATION, noncarefiles=[]): self.result = None @@ -278,6 +161,7 @@ class vcfResult(object): self.posCount = None self.simplify = simplify + if vcfResult.simpliStats is None: vcfResult.simpliStats = { 'Heterozygous Dest' : 0, @@ -303,13 +187,24 @@ class vcfResult(object): print "simplifying :: Heterozygous [%3d, %3d] %s" % ( SIMP_EXCL_HETEROZYGOUS, math.log(SIMP_EXCL_HETEROZYGOUS , 2 ), str(vcfResult.excludHET ) ) print "simplifying :: Indel [%3d, %3d] %s" % ( SIMP_EXCL_INDEL , math.log(SIMP_EXCL_INDEL , 2 ), str(vcfResult.excludeINDEL ) ) print "simplifying :: Singleton [%3d, %3d] %s" % ( SIMP_EXCL_SINGLETON , math.log(SIMP_EXCL_SINGLETON , 2 ), str(vcfResult.excludeSingleton) ) + + print 'Progress Legend:' + print ' print every :', vcfResult.print_every + print ' Heterozygous Dest : h' + print ' Heterozygous Indel: I' + print ' Homozygous Indel : i' + print ' Source Indel : s' + print ' Singleton 1 : 1' + print ' Singleton 2 : 2' + print ' Singleton 3 : 3' + print ' Ok : .' + #sys.exit(0) #TODO: IMPLEMENT if vcfResult.noncarefiles is None: vcfResult.noncarefiles = noncarefiles - def simplifier(self, srcs): """ Reads each register and simplifies it @@ -352,6 +247,29 @@ class vcfResult(object): return simpl #return srcs + def printprogress(self, msg, key=None, skip=0): + vcfResult.prints += 1 + + if skip != 0: + if key is None: + if vcfResult.prints % skip == 0: + sys.stderr.write(msg) + vcfResult.printsReal += 1 + + else: + if vcfResult.simpliStats[key] % skip == 0: + sys.stderr.write(msg) + vcfResult.printsReal += 1 + else: + sys.stderr.write(msg) + vcfResult.printsReal += 1 + + if vcfResult.printsReal % vcfResult.linelen == 0 and vcfResult.printsReal != vcfResult.printsRealLast: + vcfResult.printsRealLast = vcfResult.printsReal + sys.stderr.write(' {:14,d}\n'.format( vcfResult.prints ) ) + + sys.stderr.flush() + def __str__(self): #SL2.40ch01 2118853 . T G 222 . DP=40;AF1=1;CI95=1,1;DP4=0,0,16,23;MQ=60;FQ=-144 GT:PL:DP:GQ 1/1:255,117,0:39:99 @@ -374,13 +292,19 @@ class vcfResult(object): #outFileHandle << "\n"; - srcs = {} + srcs = {} + rcount = 0 #print "RESULTS", self.result for register in self.result: + rcount += 1 if register is None: print "register #%d is empty" % register sys.exit( 1 ) + #if rcount % 100 == 0: + # sys.stderr.write("\n") + # sys.stderr.flush() + chrom = register['chrom' ] posit = register['pos' ] sourc = register['src' ] @@ -401,7 +325,7 @@ class vcfResult(object): if ( vcfResult.excludHET ) and ( desti.find(',') != -1 ): vcfResult.simpliStats['Heterozygous Dest'] += 1 #print "heretozygous: %s" % desti - sys.stderr.write('h') + self.printprogress('h', key='Heterozygous Dest', skip=vcfResult.print_every) #return "" continue @@ -412,7 +336,7 @@ class vcfResult(object): if len(alt) > 1: vcfResult.simpliStats['Heterozygous Indel'] += 1 #print "has het indel: %s > %s" % ( desti, alt ) - sys.stderr.write('I') + self.printprogress('I', key='Heterozygous Indel', skip=vcfResult.print_every) #return "" continue @@ -420,7 +344,7 @@ class vcfResult(object): if ( len( desti ) > 1 ): vcfResult.simpliStats['Homozygous Indel'] += 1 #print "has hom indel: %s" % desti - sys.stderr.write('i') + self.printprogress('i', key='Homozygous Indel', skip=vcfResult.print_every) #return "" continue @@ -428,7 +352,7 @@ class vcfResult(object): vcfResult.simpliStats['Source Indel'] += 1 # todo: if len(src) != len(tgt) #print "not single nucleotide source: %s" % str(register) - sys.stderr.write('s') + self.printprogress('s', key='Source Indel', skip=vcfResult.print_every) #return "" continue @@ -467,12 +391,13 @@ class vcfResult(object): for dst in srcs[src]: nv += 1 allspps.extend( srcs[src][dst] ) + ns = len( set(allspps) ) if (vcfResult.excludeSingleton) and (ns == 1): vcfResult.simpliStats['Singleton 1'] += 1 #print "singleton ns: %s" % str(srcs) - sys.stderr.write('1') + self.printprogress('1', key='Singleton 1', skip=vcfResult.print_every) return "" for src in sorted( srcs ): @@ -493,7 +418,7 @@ class vcfResult(object): if (vcfResult.excludeSingleton) and (nu == 1): vcfResult.simpliStats['Singleton 2'] += 1 #print "singleton 1: %s" % str(srcs) - sys.stderr.write('2') + self.printprogress('2', key='Singleton 2', skip=vcfResult.print_every) continue files_str = ",".join( files ) @@ -505,11 +430,11 @@ class vcfResult(object): if len(restr) == 0: vcfResult.simpliStats['Singleton 3'] += 1 #print "singleton l: %s" % str(srcs) - sys.stderr.write('3') - + self.printprogress('3', key='Singleton 3', skip=vcfResult.print_every) + else: vcfResult.simpliStats['Ok'] += 1 - sys.stderr.write('.') + self.printprogress('.', key='Ok', skip=vcfResult.print_every) return restr @@ -550,15 +475,6 @@ class vcfRegister(dict): ) return res - #chrom = None - #pos = None - #src = None - #dst = None - #desc = None - #state = None - #filename = None - #filedesc = None - #filecare = None class vcfFile(object): @@ -610,19 +526,21 @@ class vcfFile(object): if len(cols) == 0: self.next() return + elif currLine[0] == "#": if currLine.startswith('##sources='): self.names = currLine[10:].split(';') pp(self.names) + elif currLine.startswith('##numsources='): numsources = int(currLine[13:]) if numsources != len(self.names): print "error parsing sources" + print "num sources", numsources,"!=",len(self.names),sorted(self.names) sys.exit(1) else: print "num sources:",numsources - self.next() return @@ -632,15 +550,55 @@ class vcfFile(object): self.register['src' ] = cols[3] self.register['dst' ] = cols[4] - if len( cols ) > 9: - self.register['desc' ] = cols[9] - else: - self.register['desc' ] = "" + if len( cols ) > 9: + self.register['desc' ] = cols[9] + + try: + #print "has desc" + info = cols[8].split(':') + + except: + info = [] + + #print " info", info + if len(info) > 1: + try: + gtpos = info.index('GT') + + except: + gtpos = None + + if gtpos is not None: + #print " GT pos", gtpos + desc = self.register['desc' ].split(":") + #print " desc", desc + if len(info) == len(desc): + #print " len info == len desc", info, desc + gtinfo = desc[gtpos] + #print " gtinfo", gtinfo + #print " gt1", gtinfo[ 0] + #print " gt2", gtinfo[-1] + if any([gt == '0' for gt in (gtinfo[ 0], gtinfo[-1])]): + #print "has desc" + #print " info", info + #print " GT pos", gtpos + #print " desc", desc + #print " len info == len desc", info, desc + #print " gtinfo", gtinfo + #print " gt1", gtinfo[ 0] + #print " gt2", gtinfo[-1] + #print " adding src to dst", self.register['src' ], self.register['dst' ] + + self.register['dst' ] = ",".join(sorted(list(set([self.register['src' ]] + self.register['dst' ].split(","))))) + #print " added src to dst", self.register['dst' ] + + else: + self.register['desc' ] = "" self.register['state'] = self.state except (RuntimeError, TypeError, NameError): - print RuntimeError, TypeError, NameError + print RuntimeError, TypeError, NameError print "error getting colums", cols, currLine, "\n" #self.next() #return @@ -707,7 +665,7 @@ class vcfHeap(object): """ Adds file to be taken track of """ - print "adding file to heap" + print "adding file to heap" filecare = filecare == '1' self.filePos[ (fileName, filedesc, filecare) ] = len( self.fileNames ) @@ -716,18 +674,18 @@ class vcfHeap(object): self.fileNames.append( (fileName, filedesc, filecare) ) - print "creating vcf handler" + print "creating vcf handler" self.filehandle.append( vcfFile( fileName, filedesc, filecare) ); - print "created vcf handler" + print "created vcf handler" self.fileStates.append( True ); self.numOpenFiles += 1 - print "getting first register" + print "getting first register" firstRegister = self.getRegister( self.filePos[ ( fileName, filedesc, filecare ) ] ); - print "got first register" + print "got first register" self.heapHead.append( firstRegister ); - print "added to heap" + print "added to heap" def getFileNames(self): """ @@ -917,6 +875,7 @@ class vcfHeap(object): """ Returns a VCF header """ + header = """\ ##fileformat=VCFv4.1 ##fileDate=%s @@ -930,18 +889,13 @@ class vcfHeap(object): ##INFO=<ID=NU,Number=1,Type=Integer,Description=\"Number of Species Having Source and Target Nucleotides\"> ##FORMAT=<ID=FI,Number=1,Type=String,Description=\"Source Files\"> #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tFILENAMES -""" % (self.ctime, ";".join( self.getFileDesc() ), self.getNumOpenFiles() ) +""" % (self.ctime, ";".join( self.getFileDesc() ), self.getNumFiles() ) return header -def main(): - if len(sys.argv) != 2: - print "wrong number of arguments. %s <input list>" % sys.argv[0] - sys.exit(1) - +def main(incsv): data = vcfHeap() - incsv = sys.argv[ 1 ] outfile = incsv + '.vcf.gz' if not os.path.exists( incsv ): @@ -974,13 +928,14 @@ def main(): print "error parsing" sys.exit(1) - data.addFile(*cols) + print cols, cols[:3] + data.addFile(*cols[:3]) - mfh = openvcffile(outfile, 'w', compresslevel=1) + mfh = openvcffile(outfile + '.tmp.vcf.gz', 'w', compresslevel=1) mfh.write( data.getVcfHeader() ) @@ -1002,7 +957,17 @@ def main(): mfh.close() + os.rename(outfile + '.tmp.vcf.gz', outfile) + print "Finished" + return outfile + if __name__ == '__main__': - main() + if len(sys.argv) != 2: + print "wrong number of arguments. %s <input list>" % sys.argv[0] + sys.exit(1) + + incsv = sys.argv[ 1 ] + + main(incsv) diff --git a/scripts/median_polish/median_polish.py b/scripts/median_polish/median_polish.py index 79c84ad..945adf7 100755 --- a/scripts/median_polish/median_polish.py +++ b/scripts/median_polish/median_polish.py @@ -5,6 +5,7 @@ import pylab from scipy.stats import norm import sys import os +import copy import argparse @@ -62,7 +63,8 @@ class MedianPolish: self.x_axis_name = x_axis_name self.y_axis_name = y_axis_name self.reorder = reorder - + self.rounds = [] + self.get_fn_props() if title is None: self.title = os.path.basename( self.fname ).replace( os.path.basename( sys.argv[0] ), "" ).replace( '.', ' ' ).replace( '_', ' ' )[:-4] @@ -125,8 +127,9 @@ class MedianPolish: self.col_effects = np.zeros(shape=self.tbl.shape[1]) median_row_effects = 0 median_col_effects = 0 - iter = 0 - sm = 0 + iternum = 0 + self.sm = 0 + self.sm_delta = sys.maxint prev_sm = -sys.maxint - 1 func = np.median @@ -135,7 +138,7 @@ class MedianPolish: print "method", method - while ((iter < max_iterations) & (abs(sm - prev_sm) >= eps)): + while ((iternum < max_iterations) & (self.sm_delta >= eps)): row_medians = func( self.tbl , 1 ) self.row_effects += row_medians median_row_effects = func( self.row_effects ) @@ -151,24 +154,27 @@ class MedianPolish: self.tbl -= col_medians self.grand_effect += median_col_effects - prev_sm = sm - sm = np.sum(np.absolute(self.tbl)) - print "method %7s #%3d prev sm %16.4f sm %16.4f eps %5.4f diff %16.4f valid %r" % ( method, iter, prev_sm, sm, eps, abs(sm - prev_sm), (abs(sm - prev_sm) < eps)) - iter += 1 + self.sm = np.sum(np.absolute(self.tbl)) + self.sm_delta = abs(self.sm - prev_sm) + print "method %7s #%3d prev sm %16.4f sm %16.4f eps %5.4f diff %16.4f valid %r" % ( method, iternum, prev_sm, self.sm, eps, self.sm_delta, (self.sm_delta < eps)) + iternum += 1 + prev_sm = self.sm - if str(intermediates) != str(False): + if intermediates: self.getValid() - np.savetxt(intermediates+".median.valids_vals.round.%03d.tsv" % iter, self.res_valid_tbl, delimiter="\t") - np.savetxt(intermediates+".median.valids_bool.round.%03d.tsv" % iter, self.res_valid , delimiter="\t") + tables, graphs = self.get_vars() + self.rounds.append([copy.deepcopy(tables), copy.deepcopy(graphs)]) + #np.savetxt(str(intermediates)+".median.valids_vals.round.%03d.tsv" % iternum, self.res_valid_tbl, delimiter="\t") + #np.savetxt(str(intermediates)+".median.valids_bool.round.%03d.tsv" % iternum, self.res_valid , delimiter="\t") self.getValid() def getValid(self): - threshold = abs(norm.ppf(self.pval)) + self.threshold = abs(norm.ppf(self.pval)) #print "threshold", threshold - res_mean = np.average( self.tbl ) - res_stddev = np.std( self.tbl, dtype=np.float32 ) + self.res_mean = np.average( self.tbl ) + self.res_stddev = np.std( self.tbl, dtype=np.float32 ) #print "mean", res_mean, "stddev", res_stddev @@ -180,32 +186,29 @@ class MedianPolish: #print "table" #print self.tbl - res_valid_tbl = self.tbl - res_mean + self.res_valid_tbl = self.tbl - self.res_mean #print "table - mean" #print res_valid_tbl - res_valid_tbl = res_valid_tbl / res_stddev + self.res_valid_tbl = self.res_valid_tbl / self.res_stddev #print "(table - mean) / stddev" #print res_valid_tbl - res_valid = np.asarray( res_valid_tbl.copy() ) + self.res_valid = np.asarray( self.res_valid_tbl.copy() ) #print "mean tbl\n" , res_mean_tbl #print "stddev tbl\n", res_stddev_tbl #print "valid tbl\n" , res_valid_tbl - for i in range(res_valid_tbl.shape[0]): - for j in range(res_valid_tbl.shape[1]): - if abs(res_valid[i,j]) > threshold: - if res_valid[i,j] > 0: - res_valid[i,j] = 1 + for i in range(self.res_valid_tbl.shape[0]): + for j in range(self.res_valid_tbl.shape[1]): + if abs(self.res_valid[i,j]) > self.threshold: + if self.res_valid[i,j] > 0: + self.res_valid[i,j] = 1 else: - res_valid[i,j] = 1 + self.res_valid[i,j] = 1 else: - res_valid[i,j] = 0 - - self.res_valid_tbl = res_valid_tbl - self.res_valid = res_valid + self.res_valid[i,j] = 0 def do_reorder(self, tbl): if self.reorder is not None: @@ -400,52 +403,78 @@ class MedianPolish: def get_vars(self): tables = [ - [ "grand_effect", "grand_effect", 'float'], - [ "col_effects" , "col_effects" , 'list' ], - [ "row_effects" , "row_effects" , 'list' ] + [ "grand_effect", "grand_effect", 'float', copy.deepcopy(self.grand_effect) ], + [ "col_effects" , "col_effects" , 'list' , copy.deepcopy(self.col_effects ) ], + [ "row_effects" , "row_effects" , 'list' , copy.deepcopy(self.row_effects ) ], + [ "sm" , "sm" , 'float', copy.deepcopy(self.sm ) ], + [ "sm_delta" , "sm_delta" , 'float', copy.deepcopy(self.sm_delta ) ], + [ "pval" , "pval" , 'float', copy.deepcopy(self.pval ) ], + [ "threshold" , "threshold" , 'float', copy.deepcopy(self.threshold ) ], + [ "res_mean" , "res_mean" , 'float', copy.deepcopy(self.res_mean ) ], + [ "res_stddev" , "res_stddev" , 'float', copy.deepcopy(self.res_stddev ) ], ] - # class property file name title scale name position position clean + # class property file name title scale name position position clean graphs = [ - [ "tbl_org" , "original" , "Raw Values" , "Number of SNPs", 141, 121 ], - [ "tbl" , "residuals" , "Residuals" , "" , 142, None ], - [ "res_valid_tbl", "valids_vals", "Residuals Norm" , "Sigmas" , 143, None ], - [ "res_valid" , "valids_bool", "Residuals Valids ( Alpha = " + str( self.pval ) + " )", None , 144, 122 ] + [ "tbl_org" , "original" , "Raw Values" , "Number of SNPs", 141, 121 , copy.deepcopy(self.tbl_org ) ], + [ "tbl" , "residuals" , "Residuals" , "" , 142, None , copy.deepcopy(self.tbl ) ], + [ "res_valid_tbl", "valids_vals", "Residuals Norm" , "Sigmas" , 143, None , copy.deepcopy(self.res_valid_tbl) ], + [ "res_valid" , "valids_bool", "Residuals Valids ( Alpha = " + str( self.pval ) + " )", None , 144, 122 , copy.deepcopy(self.res_valid ) ] ] return tables, graphs def export_tables(self, tables, graphs, basename): - for var in tables: - valN = var[0] - name = var[1] - eType = var[2] - val = self.__dict__[ valN ] - - bn = "%s.%s.%s%s" % ( basename, self.method, name, self.ext ) - - print "exporting %s" % bn - - if eType == 'list': - np.savetxt( bn, val, delimiter=self.sep ) - - elif eType == 'float': - with open(bn, 'w') as fhd: - fhd.write(str(val)) - - for var in graphs: - valN = var[0] - name = var[1] - name_ptr = var[2] - do_scale = var[3] - val = self.__dict__[ valN ] - - - bn = "%s.%s.%s%s" % ( basename, self.method, name, self.ext ) - - print "exporting %s" % bn - - np.savetxt( bn, val, delimiter=self.sep ) + gbn = "%s.%s.%s%s" % ( basename, self.method, 'global', self.ext ) + print 'Saving global %s' % gbn + + with open(gbn, 'w') as gbhd: + gbhd.write('\n%s\n%s\n' % ('method' ,self.method)) + + for var in tables: + valN = var[0] + name = var[1] + eType = var[2] + val = var[3] + #val = self.__dict__[ valN ] + + bn = "%s.%s.%s%s" % ( basename, self.method, name, self.ext ) + + print "exporting table %s" % bn + + gbhd.write('\n%s\n' % name) + if eType == 'list': + np.savetxt( bn , val, delimiter=self.sep ) + np.savetxt( gbhd, val, delimiter=self.sep ) + + elif eType == 'float': + with open(bn, 'w') as fhd: + fhd.write( str(val)) + gbhd.write(str(val)) + + gbhd.write('\n') + + for var in graphs: + valN = var[0] #class property + name = var[1] #file name + #name_ptr = var[2] #title + #do_scale = var[3] #scale name + #position = var[4] # position + #positionC = var[5] # position clean + val = var[6] # val + #print "VAL", valN, val + #val = self.__dict__[ valN ] + + + bn = "%s.%s.%s%s" % ( basename, self.method, name, self.ext ) + + print "exporting graph %s" % bn + sys.stdout.flush() + + gbhd.write( '\n%s\n' % name ) + np.savetxt( bn , val, delimiter=self.sep ) + np.savetxt( gbhd, val, delimiter=self.sep ) + gbhd.write( '\n' ) def export_graph(self, graphs, basename, epitope=None, font_size_ttl=18, font_size_ax=16,font_size_lbl=18, sz=14, szw=14, szh=14, DPI=1200, x_rotate=0, row_names=[], col_names=[], do_pdf=False ): #szw = sz*3*3 @@ -499,10 +528,19 @@ class MedianPolish: pylab.clf() def export( self, basename, font_size_ttl=18, font_size_ax=16, font_size_lbl=18, sz=14, DPI=1200, x_rotate=0, row_names=[], col_names=[], do_pdf=False, do_image=True, do_tables=True ): + if len(self.rounds) > 0: + for r, v in enumerate(self.rounds): + tables, graphs = v + self._export( basename + ('_round_%d'%r), tables, graphs, font_size_ttl=font_size_ttl, font_size_ax=font_size_ax, font_size_lbl=font_size_lbl, sz=sz, DPI=DPI, x_rotate=x_rotate, row_names=row_names, col_names=col_names, do_pdf=do_pdf, do_image=do_image, do_tables=do_tables) + tables, graphs = self.get_vars() + + self._export( basename, tables, graphs, font_size_ttl=font_size_ttl, font_size_ax=font_size_ax, font_size_lbl=font_size_lbl, sz=sz, DPI=DPI, x_rotate=x_rotate, row_names=row_names, col_names=col_names, do_pdf=do_pdf, do_image=do_image, do_tables=do_tables) + def _export(self, basename, tables, graphs, font_size_ttl=18, font_size_ax=16, font_size_lbl=18, sz=14, DPI=1200, x_rotate=0, row_names=[], col_names=[], do_pdf=False, do_image=True, do_tables=True): if do_tables: self.export_tables(tables, graphs, basename) + else: print "SKIPPING EXPORTING TABLES" -- GitLab