diff --git a/.gitignore b/.gitignore index 1a22bef91de95c3e3895a1481d166162daccc04a..87b9045e5ea387a3bbff850d38b2908e17066524 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ *.dot *.png *.html +*.out diff --git a/SConstruct.run b/SConstruct.run index f72eaeb8869db30461986737e359f59ce2d73aad..166d31bf353f2c012a5b4ab3fcdd665b8543b747 100755 --- a/SConstruct.run +++ b/SConstruct.run @@ -1,4 +1,5 @@ #!/bin/bash +set -xeu LOGFILE="SConstruct.run.log."`date +%Y%m%d_%H%M%S`".out" #echo "LOGFILE $LOGFILE" python scons/scons.py -n --stack-size=1024 --diskcheck=none $@ 2>&1 | tee $LOGFILE diff --git a/SConstruct.tree.sh b/SConstruct.tree.sh index 8ec0be4571cf23a5ff81ef12a1c36f08fda5fc4c..baf0808232b9ea02cc455e2f0c26c94b93916919 100755 --- a/SConstruct.tree.sh +++ b/SConstruct.tree.sh @@ -4,7 +4,7 @@ rm SConstruct.tree.png 2>/dev/null rm SConstruct.tree.gif 2>/dev/null rm SConstruct.tree.html 2>/dev/null -bash SConstruct.run -n --tree=prune,status $@ > SConstruct.tree.dat +bash SConstruct.run -n --tree=prune,status $@ | grep -v '+-/' > SConstruct.tree.dat #,derived cat SConstruct.tree.dat | python SConstruct.graph.py $@ diff --git a/all.csv b/all.csv new file mode 100644 index 0000000000000000000000000000000000000000..4c2e48e937010255b0c370e60b09aa9eddc96c8e --- /dev/null +++ b/all.csv @@ -0,0 +1,13 @@ +#in fasta in csv +sa1/sa.fa sa1/sa.csv +#sa2/sa.fa sa2/sa.csv + +sh1/sh.fa sh1/sh.csv +#sh2/sh.fa sh2/sh.csv + +#sl1/sl.fa sl1/sl.csv +#sl2/sl.fa sl2/sl.csv + +sp1/sp.fa sp1/sp.csv +#sp2/sp.fa sp2/sp.csv + diff --git a/run2.py b/run2.py index 39e7fe47543063326730cea1bc45b89f94f39bab..9332f7bf9b529e2deacdd273eb0731d41916e6d8 100755 --- a/run2.py +++ b/run2.py @@ -25,7 +25,7 @@ MAP_FOLDER = 'map' OUT_FOLDER = 'output' GENOME_SIZE = 950000000 TMP_FOLDER = '/run/shm' -TMP_PREFIX = os.path.basename(sys.argv[0])+'_'+str(os.getpid())+'_' +TMP_PREFIX = 'bwa_scons_mapping_'+str(os.getpid())+'_' FRC_EXE = '/home/assembly/nobackup/metrics/FRC_align/src/FRC' BWA_EXE = '/usr/bin/bwa' SAMTOOLS_EXE = 'samtools' @@ -35,30 +35,20 @@ num_threads = 1 ONLY_PROCESS_IF_DONT_EXISTS = True -CONTIG = None -CSV_GROUP = None -env = None +CSV_PROJECT = None +env = None if __name__ == '__main__': if len(sys.argv) < 3: print "no contig given" - print sys.argv[0], "<IN FASTA> <IN CSV CONFIG>" + print sys.argv[0], "<IN PROJECT CSV>" sys.exit(1) - CONTIG = sys.argv[1] - CSV_GROUP = sys.argv[2] + CSV_PROJECT = sys.argv[1] elif __name__ == 'SCons.Script': - AddOption('--infasta', - dest='CONTIG', - type='string', - nargs=1, - action='store', - metavar='INFASTA', - help='input fasta' - ) AddOption('--incsv', - dest='CSV_GROUP', + dest='CSV_PROJECT', type='string', nargs=1, action='store', @@ -66,59 +56,39 @@ elif __name__ == 'SCons.Script': help='input csv' ) + AddOption('--threads', + dest='num_threads', + type='int', + nargs=1, + action='store', + metavar='threads', + default=0, + help='number of threads' + ) + os.environ['SHELL'] = '/bin/bash' #env['SHELL'] = os.environ['SHELL'] - env = Environment( CONTIG=GetOption('CONTIG'), CSV_GROUP=GetOption('CSV_GROUP'), ENV = os.environ, SHELL='/bin/bash' ) - - print "SHELL" , os.environ['SHELL'] - print "SCONS SHELL", env['SHELL'] + env = Environment( CSV_PROJECT=GetOption('CSV_PROJECT'), ENV = os.environ, SHELL='/bin/bash' ) - CONTIG = env.GetOption('CONTIG') - CSV_GROUP = env.GetOption('CSV_GROUP') + CSV_PROJECT = env.GetOption('CSV_PROJECT') + if CSV_PROJECT is None: + print "input project csv not defined" + print "scons --incsv=<IN PROJECT FILE>" + sys.exit(1) + print "SHELL" , os.environ['SHELL'] + print "SCONS SHELL", env['SHELL'] -if CONTIG is None: - print "no fasta file given: %s --infasta <fasta> --incsv <csv> [-j <num threads>]" % sys.argv[0] - sys.exit(1) - -if CSV_GROUP is None: - print "no csv file given: %s --infasta <fasta> --incsv <csv> [-j <num threads>]" % sys.argv[0] - sys.exit(1) - -if (not os.path.exists( CONTIG )) or (not os.path.exists( CSV_GROUP )): - if (not os.path.exists( CONTIG )): - print "contig file %s does not exists" % CONTIG - if (not os.path.exists( CSV_GROUP )): - print "csv group file %s does not exists" % CSV_GROUP - print sys.argv[0], "<IN FASTA> <IN CSV GROUP>" +if not os.path.exists(CSV_PROJECT): + print "input project csv %s does not exists" % CSV_PROJECT sys.exit(1) -outfolder = os.path.dirname( os.path.abspath(CSV_GROUP) ) -DB_FOLDER = os.path.join(outfolder, DB_FOLDER ) -MAP_FOLDER = os.path.join(outfolder, MAP_FOLDER ) -OUT_FOLDER = os.path.join(outfolder, OUT_FOLDER ) - -print "BASE FOLDER: %s" % outfolder -print "DB FOLDER: %s" % DB_FOLDER -print "MAP FOLDER: %s" % MAP_FOLDER -print "OUT FOLDER: %s" % OUT_FOLDER - - -#RND_PREFIX = os.path.join(TMP_FOLDER, os.path.basename(CONTIG) + os.path.basename(CSV_GROUP)) -if not dry_run: - if not os.path.exists(DB_FOLDER): - os.makedirs(DB_FOLDER) - if not os.path.exists(MAP_FOLDER): - os.makedirs(MAP_FOLDER) - - if not os.path.exists(OUT_FOLDER): - os.makedirs(OUT_FOLDER) def my_decider(dependency, target, prev_ni): @@ -151,19 +121,23 @@ if __name__ == 'SCons.Script': # How many CPU's can we use ? #env.SetOption("SHELL", "/bin/bash") - num_cpu = multiprocessing.cpu_count() - num_jobs = int( env.GetOption('num_jobs') ) - print "num cpu %d" % num_cpu - print "num jobs %d" % num_jobs + num_cpu = multiprocessing.cpu_count() + num_jobs = int( env.GetOption('num_jobs' ) ) + num_threads = int( env.GetOption('num_threads') ) + print "num cpu %d" % num_cpu + print "num jobs %d" % num_jobs + print "num threads %d" % num_threads if num_jobs <= 1: - num_threads = num_cpu/4 - env.SetOption('num_jobs', num_threads) + num_jobs = int((num_cpu/8) + 1) - else: - num_threads = num_jobs + if num_threads == 0: + num_threads = (num_jobs*2) + + env.SetOption('num_jobs' , num_jobs ) - print "running with -j", env.GetOption('num_jobs') + print "running with %d jobs" % num_jobs + print "running with %d threads" % num_threads # Set number of simultaneous threads to something less than the number of CPUs # /CPUs @@ -257,7 +231,7 @@ def move(*args, **kwargs): ########################## ## ADD COMMAND WRAPPER ########################## -#addCommand('bwa indexing', CMD_BWA_INDEX, TARGET = bwt_index, SOURCE = CONTIG, DEPS = [] }) +#addCommand('bwa indexing', CMD_BWA_INDEX, TARGET = bwt_index, SOURCE = setup['in_fasta'], DEPS = [] }) def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRECIOUS=False, USETMP=False, TMP_SUFFIX=""): if TARGET is None: print "no target defined:", str(locals()) @@ -334,17 +308,20 @@ def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRE - pdata = { 'SOURCE': SOURCE, 'TARGET': TARGET, 'DEPS': DEPS, 'REQS': REQS } + pdata = { 'SOURCE': SOURCE, 'TARGET': TARGET, 'RESULTS': TARGET, 'DEPS': DEPS, 'REQS': REQS } + datan = copy.copy(DATA) for key in pdata: datan[key] = pdata[key] + + + outFile = None tmpFile = None - if USETMP: + if (not allOk) and USETMP: tmpFile = tempfile.mktemp(dir=TMP_FOLDER, prefix=TMP_PREFIX, suffix=TMP_SUFFIX) - print " USING TMP :: %s as %s" % ( datan['TARGET'], tmpFile ) - outFile = datan['TARGET'] + print " USING TMP :: %s as %s" % ( datan['TARGET'][0], tmpFile ) datan['TARGET'] = tmpFile @@ -372,8 +349,6 @@ def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRE elif __name__ == 'SCons.Script': print " adding" - datan['RESULTS'] = datan['TARGET'] - if ONLY_PROCESS_IF_DONT_EXISTS: if allOk: print " all ok. skipping" @@ -386,26 +361,22 @@ def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRE else: print " not ok. adding" - datan['TARGET'] = env.Command(datan['TARGET'], datan['SOURCE'], cmdCall) + env.Command(datan['TARGET'], datan['SOURCE'], cmdCall) else: print " appending always" - datan['RESULTS'] = env.Command(datan['TARGET'], datan['SOURCE'], cmdCall) + env.Command(datan['TARGET'], datan['SOURCE'], cmdCall) - for dp in DEPS: env.Depends( datan['RESULTS'], dp ) - for rq in REQS: env.Requires( datan['RESULTS'], rq ) + if (not allOk) and USETMP: + print " USING TMP: %s to %s" % (datan['TARGET'], datan['RESULTS'][0]) + env.Command( datan['RESULTS'][0] , datan['TARGET'], move ) + for dp in DEPS: env.Depends( datan['TARGET'], dp ) + for rq in REQS: env.Requires( datan['TARGET'], rq ) - if USETMP: - print " USING TMP: %s to %s" % (datan['RESULTS'][0], outFile[0] ) - env.Command( outFile , datan['RESULTS'], move ) - #env.Depends( datan['RESULTS'], outFile ) - if PRECIOUS: env.Precious( outFile ) - - else: - if PRECIOUS: env.Precious( datan['RESULTS'] ) + if PRECIOUS: env.Precious( datan['RESULTS'] ) print "\n\n" @@ -440,14 +411,91 @@ def run_cmd( CMD , name ): print " DRY RUN:: skipping %s :: %s\n" % ( name , CMD ) return 0 - def main(): + setup = [] + with open(CSV_PROJECT, 'r') as csvfhd: + for line in csvfhd: + line = line.strip() + + if len(line) == 0: continue + if line[0] == "#": continue + + in_fasta, in_csv = line.split("\t") + + if not os.path.exists(in_fasta): + print "input fasta %s does not exists" % in_fasta + sys.exit(1) + + if not os.path.exists(in_csv): + print "input csv %s does not exists" % in_csv + sys.exit(1) + + + base_folder = os.path.dirname( os.path.abspath(in_csv) ) + proj_name = os.path.basename(base_folder) + db_folder = os.path.join(base_folder, DB_FOLDER ) + map_folder = os.path.join(base_folder, MAP_FOLDER ) + out_folder = os.path.join(base_folder, OUT_FOLDER ) + + if 'all' not in COMMAND_LINE_TARGETS: + if proj_name not in COMMAND_LINE_TARGETS: + print "project %s not to be built. skipping parsing" % proj_name + continue + + print "PROJECT NAME: %s" % proj_name + print "FASTA FILE : %s" % in_fasta + print "CSV FILE : %s" % in_csv + print "BASE FOLDER: %s" % base_folder + print "DB FOLDER: %s" % db_folder + print "MAP FOLDER: %s" % map_folder + print "OUT FOLDER: %s" % out_folder + print + + if not dry_run: + if not os.path.exists(db_folder): + os.makedirs(db_folder) + + if not os.path.exists(map_folder): + os.makedirs(map_folder) + + if not os.path.exists(out_folder): + os.makedirs(out_folder) + + projsetup = { + 'proj_name' : proj_name, + 'in_fasta' : in_fasta, + 'in_csv' : in_csv, + 'base_folder': base_folder, + 'db_folder' : db_folder, + 'map_folder' : map_folder, + 'out_folder' : out_folder + } + pp(projsetup) + setup.append(projsetup) + + projout = [] + for projsetup in setup: + projout.extend( project(projsetup) ) + + projout = [ x for x in sorted(list(set(projout))) if x is not None ] + + if __name__ == 'SCons.Script': + + env.Alias( 'all', projout ) + + print "target ALL:", "\n ".join( projout ) + for projsetup in setup: + print "target %s: %s" % (projsetup['proj_name'].upper(), projsetup['outfile']) + print "\n\n\n" + +def project( setup ): + datas = {} ########################## ## READ CSV ########################## - with open(CSV_GROUP, 'r') as fhd: + with open(setup['in_csv'], 'r') as fhd: line_count = 0 for line in fhd: line_count += 1 @@ -513,9 +561,9 @@ def main(): for plat in sorted(datas): data = datas[plat] if plat == 'illumina': - datas[plat] = processIllumina(data) + datas[plat] = processIllumina(data, setup) elif plat == '454' : - datas[plat] = process454(data) + datas[plat] = process454(data, setup) @@ -538,10 +586,11 @@ def main(): dataf[name] = datas[plat][name] - run_bwa(dataf) + finalout = run_bwa(dataf, setup) + return finalout -def process454(data): +def process454(data, setup): for comm_name in sorted(data): print "parsing 454 :: %s" % comm_name @@ -554,7 +603,7 @@ def process454(data): sys.exit(1) - out_prefix = os.path.join( MAP_FOLDER , comm_name ) + out_prefix = os.path.join( setup['map_folder'] , comm_name ) print "parsing 454 :: %s :: %s :: out prefix %s" % ( comm_name, infile, out_prefix ) out_fastq = out_prefix + '.fastq' @@ -581,11 +630,11 @@ def process454(data): addCommand('extracting sff', CMD_EXTRACT, TARGET = data[comm_name]['file_names_454_q'], SOURCE = data[comm_name]['file_names_454' ], DATA = data[comm_name], REQS = [ ]) addCommand('splitting sff' , CMD_SPLIT , TARGET = data[comm_name]['file_names' ], SOURCE = data[comm_name]['file_names_454_q'], DATA = data[comm_name], REQS = [ ]) - data = processIllumina(data) + data = processIllumina(data, setup) return data -def processIllumina(data): +def processIllumina(data, setup): ########################## ## GENERATE FILE NAMES ########################## @@ -594,18 +643,18 @@ def processIllumina(data): RND_NAME2 = tempfile.mktemp(dir=TMP_FOLDER, prefix='bwa_') - out_sam = os.path.join( MAP_FOLDER , comm_name + '.sam' ) - out_bam_tmp = os.path.join( MAP_FOLDER , comm_name + '.bam' ) + out_sam = os.path.join( setup['map_folder'] , comm_name + '.sam' ) + out_bam_tmp = os.path.join( setup['map_folder'] , comm_name + '.bam' ) - out_bam_pfx = os.path.join( OUT_FOLDER , comm_name ) - out_bam = os.path.join( OUT_FOLDER , comm_name + '.bam' ) + out_bam_pfx = os.path.join( setup['out_folder'] , comm_name ) + out_bam = os.path.join( setup['out_folder'] , comm_name + '.bam' ) out_bai = out_bam + '.bai' - out_cov = os.path.join( OUT_FOLDER , comm_name + '.cov' ) - out_stats = os.path.join( OUT_FOLDER , comm_name + '.stats' ) - out_flag = os.path.join( OUT_FOLDER , comm_name + '.flagstats') - out_ok = os.path.join( OUT_FOLDER , comm_name + '.ok' ) - out_frc = os.path.join( OUT_FOLDER , comm_name + '_FRC__CEstats_PE.txt') - out_frcB = os.path.join( OUT_FOLDER , comm_name + '_FRC_' ) + out_cov = os.path.join( setup['out_folder'] , comm_name + '.cov' ) + out_stats = os.path.join( setup['out_folder'] , comm_name + '.stats' ) + out_flag = os.path.join( setup['out_folder'] , comm_name + '.flagstats') + out_ok = os.path.join( setup['out_folder'] , comm_name + '.ok' ) + out_frc = os.path.join( setup['out_folder'] , comm_name + '_FRC__CEstats_PE.txt') + out_frcB = os.path.join( setup['out_folder'] , comm_name + '_FRC_' ) filenames = data[comm_name]['file_names'] @@ -634,11 +683,11 @@ def processIllumina(data): data[comm_name]['file_names_str' ] = " ".join( filenames ) data[comm_name]['fastq1' ] = filenames[0] - data[comm_name]['sai1' ] = os.path.join( MAP_FOLDER , os.path.basename( filenames[0] ) + '.sai' ) + data[comm_name]['sai1' ] = os.path.join( setup['map_folder'] , os.path.basename( filenames[0] ) + '.sai' ) data[comm_name]['fastq1cmd' ] = data[comm_name]['fastq1'] data[comm_name]['fastq2' ] = filenames[1] - data[comm_name]['sai2' ] = os.path.join( MAP_FOLDER , os.path.basename( filenames[1] ) + '.sai' ) + data[comm_name]['sai2' ] = os.path.join( setup['map_folder'] , os.path.basename( filenames[1] ) + '.sai' ) data[comm_name]['fastq2cmd' ] = data[comm_name]['fastq2'] @@ -669,19 +718,24 @@ def processIllumina(data): ########################## ## GENERATE COMMANDS ########################## -def run_bwa(data): - db_name = os.path.join(DB_FOLDER , os.path.basename( CONTIG ).replace('/', '_')) - bam_name = os.path.join(OUT_FOLDER, os.path.basename( CONTIG ).replace('/', '_')) +def run_bwa(data, setup): + in_fasta_bn = os.path.basename( setup['in_fasta'] ).replace('/', '_') + db_name = os.path.join( setup['db_folder' ], in_fasta_bn ) + bam_name = os.path.join( setup['out_folder'], in_fasta_bn ) + pre_ok = os.path.join( setup['out_folder'], in_fasta_bn + '.ok' ) + bwt_index = db_name + '.bwt' + cmds = [] bams = [] deps = [] + oks = [] sum_process = 0 - index_data = { 'db_name': db_name, 'contig' : CONTIG, 'bwa_exe': BWA_EXE} + index_data = { 'db_name': db_name, 'contig' : setup['in_fasta'], 'bwa_exe': BWA_EXE} CMD_BWA_INDEX = "%(bwa_exe)s index -p %(db_name)s -a is %(contig)s" - addCommand( 'bwa indexing', CMD_BWA_INDEX, TARGET=bwt_index, SOURCE=CONTIG, DATA=index_data, DEPS=[CONTIG], PRECIOUS=True ) + addCommand( 'bwa indexing', CMD_BWA_INDEX, TARGET=bwt_index, SOURCE=setup['in_fasta'], DATA=index_data, DEPS=[], PRECIOUS=True ) ########################## @@ -705,7 +759,7 @@ def run_bwa(data): CMD_CONVERT = "%(samtools_exe)s view -S -b -q 30 -1 %(sam)s > %(TARGET)s" # TARGET = bam_tmp #CMD_MV1 = "mv %(bam_tmp_rnd)s %(bam_tmp)s" - CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -o %(bam_tmp)s > %(TARGET)s" + CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -f %(bam_tmp)s %(TARGET)s" #CMD_MV2 = "mv %(bam_pfx_rnd_bam)s %(bam)s" CMD_INDEX = '%(samtools_exe)s index %(bam)s' CMD_COV = '%(samtools_exe)s depth %(bam)s > %(cov)s' @@ -735,8 +789,8 @@ def run_bwa(data): addCommand('coverage' , CMD_COV , TARGET = info['cov'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] ) addCommand('stats' , CMD_STATS , TARGET = info['stats'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] ) addCommand('flag' , CMD_FLAG , TARGET = info['flag'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] ) - addCommand('oking' , CMD_OK , TARGET = info['ok'] , SOURCE = [ info['bai'], info['cov'], info['stats'], info['flag'] ], DATA = info ) + deps = [ info['bai'], info['cov'], info['stats'], info['flag'] ] if FRC: if 'gsize' in info: @@ -754,23 +808,26 @@ def run_bwa(data): else: print " SKIPPING FRC %(frc)s. disabled\n" % info + addCommand('oking' , CMD_OK , TARGET = info['ok'] , SOURCE = deps, DATA = info ) - deps.append( info['ok'] ) + oks.append(info['ok']) bams.append( info['bam'] ) - - + CMD_PRE_OK = touch + addCommand('preoking' , CMD_PRE_OK , TARGET = pre_ok , SOURCE = oks, DATA = data ) ########################## ## IF NOT TO MERGE, STOPS HERE ########################## + setup['data'] = data if not do_merge: - print "FINISHED" - if __name__ == 'SCons.Script': - env.Alias( 'all', deps ) - return + print "FINISHED", setup['proj_name'] + env.Alias( setup['proj_name'], pre_ok ) + setup['bamdata'] = "" + setup['outfile'] = pre_ok + return [ pre_ok ] @@ -854,7 +911,8 @@ def run_bwa(data): CMD_MERGE_tmp = False if (not os.path.exists( bam_data['bam'] )) or ( os.path.getsize(bam_data['bam']) == 0 ): CMD_MERGE_tmp = True - addCommand('merging' , CMD_MERGE , TARGET = bam_data['bam'] , SOURCE = bams , DATA = bam_data, DEPS = deps , USETMP=CMD_MERGE_tmp ) + + addCommand('merging' , CMD_MERGE , TARGET = bam_data['bam'] , SOURCE = bams , DATA = bam_data, DEPS = [ pre_ok ] , USETMP=CMD_MERGE_tmp ) #addCommand('moving' , CMD_MV1 , TARGET = bam_data['bam'] , SOURCE = bam_data['bamrnd1'], DATA = bam_data ) @@ -887,7 +945,11 @@ def run_bwa(data): if __name__ == 'SCons.Script': - env.Alias( 'all', [ bam_data['fok'] ] ) + print "FINISHED", setup['proj_name'] + env.Alias( setup['proj_name'], bam_data['fok'] ) + setup['bamdata'] = bam_data + setup['outfile'] = bam_data['fok'] + return [ bam_data['fok'] ] diff --git a/sp1/sp.csv b/sp1/sp.csv index a9db75d706ca0ca5915532a8d430b713ac52f295..f1dacf430d315280df1e55810be903e76f9fdfd3 100644 --- a/sp1/sp.csv +++ b/sp1/sp.csv @@ -1,13 +1,13 @@ #group name platform library name library size library size min library size max file names -120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79 illumina pe 500 450 550 /home/assembly/tomato150/reseq/raw/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/raw/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_2.fq.gz.clean.dup.clean.gz -120524_I238_FCD1174ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz -120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz -120530_I235_FCD117YACXX_L1_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz -120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz -120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz -120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz -120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz -120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/raw/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz -20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/pennellii/filtered/454/20000/20000.dedup.sff -8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/pennellii/filtered/454/8000/8000.dedup.sff +120524_I238_FCD1174ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz +120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz +120530_I235_FCD117YACXX_L1_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz +120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_2.fq.gz.clean.dup.clean.gz +120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz +120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz +120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz +120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz +120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz +20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/pennellii/filtered/454/20000/20000.dedup.sff +8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/pennellii/filtered/454/8000/8000.dedup.sff 3000.dedup 454 mp 3000 2700 3300 /home/assembly/tomato150/denovo/pennellii/filtered/454/3000/3000.dedup.sff