Skip to content
Snippets Groups Projects
Commit 398ab7eb authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

converter for multicolumn vcf. again

parent e7aeb5bd
No related branches found
No related tags found
No related merge requests found
......@@ -10,62 +10,62 @@ timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
#./vcfmerger/aux/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_50k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_50k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_50k_sing
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_10k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_10k_sing
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
#./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_genes
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k_introgression
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k_introgression
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_50k
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_greedy
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete_greedy
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_10k
SCRIPT_DIR = 'vcfmerger'
AUX_DIR = os.path.join(SCRIPT_DIR, 'aux')
AUX_DIR = os.path.join(SCRIPT_DIR)
merger = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcfmerger.py' ) )
......@@ -76,8 +76,8 @@ walk_ram = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_ram.py' ) )
walk_sql = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_sql.py' ) )
cluster = os.path.abspath( os.path.join( SCRIPT_DIR, 'cluster.py' ) )
topng = os.path.abspath( os.path.join( SCRIPT_DIR, 'newick_to_png.py') )
fasta_spacer = os.path.abspath( os.path.join( AUX_DIR , 'fasta_spacer.py' ) )
tree_maker = os.path.abspath( os.path.join( AUX_DIR , 'FastTreeMP' ) )
fasta_spacer = os.path.abspath( os.path.join( SCRIPT_DIR, 'fasta_spacer.py' ) )
tree_maker = os.path.abspath( os.path.join( SCRIPT_DIR, 'FastTreeMP' ) )
class makewriter(object):
......@@ -372,7 +372,7 @@ def main(args):
if infasta:
#vcfmerger/aux/fasta_spacer.py GENOME.fa 50000
#vcfmerger/fasta_spacer.py GENOME.fa 50000
gff_cmd = "%s %s %s" % (fasta_spacer, infasta, size)
writer.write( infasta, filter_gff, gff_cmd, nick='gff' )
......
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
./vcfmerger/aux/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_50k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_50k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_50k_sing
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_10k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_10k_sing
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k
./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_genes
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k_introgression
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k_introgression
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_50k
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
make -f makefile_RIL_50k_mode_ril
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_greedy
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete_greedy
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_10k
......
#!/usr/bin/python
import os
import sys
import string
ignores = ['0/0', './.'] # reference, nocov
valid_chars = frozenset("_%s%s" % (string.ascii_letters, string.digits))
def sanitize(name):
return ''.join(c if c in valid_chars else '_' for c in name)
def main():
try:
infile = os.sys.argv[1]
except:
print "no input file given"
print sys.argv[0], "<INPUT MULTICOLUMN CSV>"
sys.exit(1)
if not os.path.exists( infile ):
print "input file %s does not exists" % infile
sys.exit(1)
if os.path.isdir( infile ):
print "input file %s is a folder" % infile
sys.exit(1)
print "splitting %s" % infile
defs = []
names = []
outfiles = []
num_cols = None
with open(infile) as fhd:
for line in fhd:
line = line.strip()
if len(line) == 0:
continue
if line.startswith("#"): # header
#print "HEADER", line
if line.startswith("##"): # definition lines
#print "HEADER :: DEF", line
defs.append( line )
else: # column description
#print "HEADER :: COL", line
cols = line.split("\t")
num_cols = len(cols)
shared = cols[:9] #CHROM POS ID REF ALT QUAL FILTER INFO FORMA
names = cols[9:]
#print "HEADER :: COL :: SHARED", shared
#print "HEADER :: COL :: NAMES" , names
outfiles = [None]*len(names)
outlist = open("%s.lst" % infile, 'w')
for np, name in enumerate(names):
nof = ("%s_%0"+str(len("%d"%len(names)))+"d_%s.vcf") % (infile, np+1, sanitize(name))
print ("creating %"+str(len("%d"%len(names)))+"d %-"+str(max([len(x) for x in names]))+"s to %s") % (np+1, name, nof)
nop = open( nof, 'w' )
# skipped valid
outfiles[np] = [name, nof, nop, 0 , 0]
outlist.write("1\t%s\t%s\n" % (os.path.abspath(nof), name))
nop.write("\n".join(defs) + "\n")
nop.write("##Split from: %s column %d\n" % ( os.path.abspath(infile), np + 1) )
nop.write("\t".join(shared))
nop.write("\t%s\n" % name)
nop.flush()
continue
#print "DATA", line
cols = line.split("\t")
assert len(cols) == num_cols
shared = cols[:9] #CHROM POS ID REF ALT QUAL FILTER INFO FORMA
data = cols[9:]
#print "shared", shared
#print "data" , data
for pos, ndata in enumerate(data):
#outfiles[np] = [name, nof, 0, 0, nop]
if any([ndata.startswith(x) for x in ignores]):
outfiles[pos][3] += 1 # skipped
continue
outfiles[pos][4] += 1 # valid
outfiles[pos][2].write("\t".join(shared) + "\t%s\n" % ndata)
for nop, ndata in enumerate(outfiles):
ndata[2].close()
print ("closing %"+str(len("%d"%len(outfiles)))+"d %-"+str(max([len(x[0]) for x in outfiles]))+"s :: %-"+str(max([len(x[1]) for x in outfiles]))+"s :: skipped %6d exported %6d total %7d") % (nop+1, ndata[0], ndata[1], ndata[3], ndata[4], ndata[3] + ndata[4])
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment