Commit f7a0f9be authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

ok

parent 9b2ca054
#!/bin/bash
set -xeu
LOGFILE="SConstruct.run.log."`date +%Y%m%d_%H%M%S`".out"
LOGFILE=".log/SConstruct.run.log."`date +%Y%m%d_%H%M%S`".out"
#echo "LOGFILE $LOGFILE"
python scons/scons.py -n --stack-size=1024 --diskcheck=none $@ 2>&1 | tee $LOGFILE
#!/bin/bash
LOGFILE="SConstruct.run.log."`date +%Y%m%d_%H%M%S`".out"
LOGFILE=".log/SConstruct.run.log."`date +%Y%m%d_%H%M%S`".out"
#echo "LOGFILE $LOGFILE"
python scons/scons.py --stack-size=1024 --diskcheck=none $@ 2>&1 | tee $LOGFILE
#in fasta in csv
#sa1/sa.fa sa1/sa.csv
sa2/sa.fa sa2/sa.csv
#sh1/sh.fa sh1/sh.csv
sh2/sh.fa sh2/sh.csv
#sl1/sl.fa sl1/sl.csv
#sp1/sp.fa sp1/sp.csv
sp2/sp.fa sp2/sp.csv
#in fasta in csv
#sa1/sa.fa sa1/sa.csv
#sa2/sa.fa sa2/sa.csv
sa3/sa.fa sa3/sa.csv
#sh1/sh.fa sh1/sh.csv
#sh2/sh.fa sh2/sh.csv
sh3/sh.fa sh3/sh.csv
#sl1/sl.fa sl1/sl.csv
sl2/sl.fa sl2/sl.csv
#sp1/sp.fa sp1/sp.csv
#sp2/sp.fa sp2/sp.csv
sp3/sp.fa sp3/sp.csv
......@@ -17,7 +17,6 @@ do_merge = True
FRC = False
LINKER = '/home/assembly/dev_150/assemblies/mapping/sff_linker.fasta'
LINKER = '/home/assembly/dev_150/assemblies/mapping/sff_linker.fasta.all'
DB_FOLDER = 'db'
......@@ -29,6 +28,7 @@ TMP_PREFIX = 'bwa_scons_mapping_'+str(os.getpid())+'_'
FRC_EXE = '/home/assembly/nobackup/metrics/FRC_align/src/FRC'
BWA_EXE = '/usr/bin/bwa'
SAMTOOLS_EXE = 'samtools'
USE_PIGZ = False
num_threads = 1
......@@ -237,15 +237,15 @@ def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRE
print "no target defined:", str(locals())
sys.exit(1)
if not isinstance(SOURCE, list): SOURCE = [ SOURCE ]
if not isinstance(TARGET, list): TARGET = [ TARGET ]
if not isinstance(DEPS , list): DEPS = [ DEPS ]
if not isinstance(REQS , list): REQS = [ REQS ]
if not isinstance( SOURCE, list ): SOURCE = [ SOURCE ]
if not isinstance( TARGET, list ): TARGET = [ TARGET ]
if not isinstance( DEPS , list ): DEPS = [ DEPS ]
if not isinstance( REQS , list ): REQS = [ REQS ]
SOURCE = list(set(SOURCE))
TARGET = list(set(TARGET))
DEPS = list(set(DEPS ))
REQS = list(set(REQS ))
SOURCE = list( set(SOURCE) )
TARGET = list( set(TARGET) )
DEPS = list( set(DEPS ) )
REQS = list( set(REQS ) )
if __name__ == 'SCons.Script':
allOk = True
......@@ -699,7 +699,9 @@ def processIllumina(data, setup):
opener = 'cat'
if fn.endswith('.gz'):
opener = 'pigz -d -p3 -c'
opener = 'gunzip -c'
if USE_PIGZ:
opener = 'pigz -d -p3 -c'
data[comm_name]['fastq1cmd' ] = "<(%(opener)s %(fn)s | fastx_reverse_complement )" % { 'opener': opener, 'fn': fn }
......@@ -709,7 +711,11 @@ def processIllumina(data, setup):
opener = 'cat'
if fn.endswith('.gz'):
opener = 'pigz -d -p3 -c'
opener = 'gunzip -c'
if USE_PIGZ:
opener = 'pigz -d -p3 -c'
data[comm_name]['fastq2cmd' ] = "<(%(opener)s %(fn)s | fastx_reverse_complement )" % { 'opener': opener, 'fn': fn }
return data
......@@ -749,46 +755,47 @@ def run_bwa(data, setup):
#CMD_ALIGNER = "%(bwa_exe)s samse -f %(sam)s %(db_name)s %(sai1)s %(fastq1cmd)s "
#addCommand('align', CMD_ALIGNER, TARGET = info['sam'], SOURCE = info['fastq1'], DEPS = [bwt_index, info['sai1']] })
CMD_MAPPER1 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq1cmd)s > %(sai1)s"
CMD_MAPPER2 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq2cmd)s > %(sai2)s"
CMD_MAPPER1 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq1cmd)s > %(TARGET)s"
CMD_MAPPER2 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq2cmd)s > %(TARGET)s"
CMD_ALIGNER = "%(bwa_exe)s sampe -o 20 -P -f %(sam)s "
CMD_ALIGNER = "%(bwa_exe)s sampe -o 20 -P -f %(TARGET)s "
if 'gsize' in info:
CMD_ALIGNER += " -a %(gmax)s "
CMD_ALIGNER += "%(db_name)s %(sai1)s %(sai2)s %(fastq1cmd)s %(fastq2cmd)s"
CMD_CONVERT = "%(samtools_exe)s view -S -b -q 30 -1 %(sam)s > %(TARGET)s" # TARGET = bam_tmp
#CMD_MV1 = "mv %(bam_tmp_rnd)s %(bam_tmp)s"
CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -f %(bam_tmp)s %(TARGET)s"
CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -o -l 1 -@ 20 -m 2G %(bam_tmp)s %(TARGET)s > %(TARGET)s"
#samtools sort -m 53687091200 -o -l 1 -@ 20 -m 2G 136.bam /run/shm/bwa_scons_mapping_13117_J9DFw5 > /run/shm/bwa_scons_mapping_13117_J9DFw5
#CMD_MV2 = "mv %(bam_pfx_rnd_bam)s %(bam)s"
CMD_INDEX = '%(samtools_exe)s index %(bam)s'
CMD_COV = '%(samtools_exe)s depth %(bam)s > %(cov)s'
CMD_STATS = '%(samtools_exe)s idxstats %(bam)s > %(stats)s'
CMD_FLAG = '%(samtools_exe)s flagstat %(bam)s > %(flag)s'
CMD_COV = '%(samtools_exe)s depth %(bam)s > %(TARGET)s'
CMD_STATS = '%(samtools_exe)s idxstats %(bam)s > %(TARGET)s'
CMD_FLAG = '%(samtools_exe)s flagstat %(bam)s > %(TARGET)s'
CMD_OK = touch
addCommand('map' , CMD_MAPPER1, TARGET = info['sai1'] , SOURCE = info['fastq1'] , DATA = info, REQS = [ info['bwt_index'] ] )
addCommand('map' , CMD_MAPPER2, TARGET = info['sai2'] , SOURCE = info['fastq2'] , DATA = info, REQS = [ info['bwt_index'] ] )
addCommand('align' , CMD_ALIGNER, TARGET = info['sam'] , SOURCE = [ info['fastq1'], info['fastq2'] ] , DATA = info, DEPS = [ bwt_index, info['sai1'], info['sai2'] ] )
addCommand('map' , CMD_MAPPER1, TARGET = info['sai1'] , SOURCE = info['fastq1'] , DATA = info, REQS = [ info['bwt_index'] ] , USETMP=True )
addCommand('map' , CMD_MAPPER2, TARGET = info['sai2'] , SOURCE = info['fastq2'] , DATA = info, REQS = [ info['bwt_index'] ] , USETMP=True )
addCommand('align' , CMD_ALIGNER, TARGET = info['sam'] , SOURCE = [ info['fastq1'], info['fastq2'] ] , DATA = info, DEPS = [ bwt_index, info['sai1'], info['sai2'] ], USETMP=True )
CMD_CONVERT_tmp = False
if (not os.path.exists(info['bam'])) or ( os.path.getsize(info['bam']) == 0 ):
CMD_CONVERT_tmp = True
addCommand('convert' , CMD_CONVERT, TARGET = info['bam_tmp'] , SOURCE = info['sam' ] , DATA = info, USETMP=CMD_CONVERT_tmp )
addCommand('convert' , CMD_CONVERT, TARGET = info['bam_tmp'] , SOURCE = info['sam' ] , DATA = info , USETMP=True )
#addCommand('moving' , CMD_MV1 , TARGET = info['bam_tmp' ] , SOURCE = info['bam_tmp_rnd'] , DATA = info)
CMD_SORT_tmp = False
if (not os.path.exists(info['bam'])) or ( os.path.getsize(info['bam']) == 0 ):
CMD_SORT_tmp = True
addCommand('sort' , CMD_SORT , TARGET = info['bam'] , SOURCE = info['bam_tmp' ] , DATA = info, USETMP=CMD_SORT_tmp )
addCommand('sort' , CMD_SORT , TARGET = info['bam'] , SOURCE = info['bam_tmp' ] , DATA = info , USETMP=True)
#addCommand('moving' , move , TARGET = info['bam'] , SOURCE = info['bam_pfx_rnd_bam'] , DATA = info)
addCommand('index' , CMD_INDEX , TARGET = info['bai'] , SOURCE = info['bam'] , DATA = info)
addCommand('coverage' , CMD_COV , TARGET = info['cov'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] )
addCommand('stats' , CMD_STATS , TARGET = info['stats'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] )
addCommand('flag' , CMD_FLAG , TARGET = info['flag'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] )
addCommand('index' , CMD_INDEX , TARGET = info['bai'] , SOURCE = info['bam'] , DATA = info )
addCommand('coverage' , CMD_COV , TARGET = info['cov'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] , USETMP=True)
addCommand('stats' , CMD_STATS , TARGET = info['stats'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] , USETMP=True)
addCommand('flag' , CMD_FLAG , TARGET = info['flag'] , SOURCE = info['bam'] , DATA = info, DEPS = [ info['bai'] ] , USETMP=True)
deps = [ info['bai'], info['cov'], info['stats'], info['flag'] ]
......@@ -893,18 +900,18 @@ def run_bwa(data, setup):
CMD_MERGE = '%(samtools_exe)s merge -r -1 %(TARGET)s %(bams)s'
#CMD_MV1 = "mv %(bamrnd1)s %(bam)s"
CMD_INDEX = '%(samtools_exe)s index %(bam)s'
CMD_COV = '%(samtools_exe)s depth %(bam)s > %(cov)s'
CMD_STAT = '%(samtools_exe)s idxstats %(bam)s > %(stat)s'
CMD_FLAG = '%(samtools_exe)s flagstat %(bam)s > %(flag)s'
CMD_COV = '%(samtools_exe)s depth %(bam)s > %(TARGET)s'
CMD_STAT = '%(samtools_exe)s idxstats %(bam)s > %(TARGET)s'
CMD_FLAG = '%(samtools_exe)s flagstat %(bam)s > %(TARGET)s'
CMD_BOK = touch
CMD_FILTER = '%(samtools_exe)s view %(sam_filter)s -b %(bam)s > %(TARGET)s'
#CMD_MV2 = 'mv %(bamrnd2)s %(filter)s'
#CMD_FIX = '%(samtools_exe)s fixmate %(filter)s %(fix)s'
#CMD_DEDUP = '%(samtools_exe)s rmdup -S %(filter)s %(dedup)s'
CMD_FINDEX = '%(samtools_exe)s index %(filter)s'
CMD_FCOV = '%(samtools_exe)s depth %(filter)s > %(fcov)s'
CMD_FSTAT = '%(samtools_exe)s idxstats %(filter)s > %(fstat)s'
CMD_FFLAG = '%(samtools_exe)s flagstat %(filter)s > %(fflag)s'
CMD_FCOV = '%(samtools_exe)s depth %(filter)s > %(TARGET)s'
CMD_FSTAT = '%(samtools_exe)s idxstats %(filter)s > %(TARGET)s'
CMD_FFLAG = '%(samtools_exe)s flagstat %(filter)s > %(TARGET)s'
CMD_FOK = touch
......@@ -912,14 +919,14 @@ def run_bwa(data, setup):
if (not os.path.exists( bam_data['bam'] )) or ( os.path.getsize(bam_data['bam']) == 0 ):
CMD_MERGE_tmp = True
addCommand('merging' , CMD_MERGE , TARGET = bam_data['bam'] , SOURCE = bams , DATA = bam_data, DEPS = [ pre_ok ] , USETMP=CMD_MERGE_tmp )
addCommand('merging' , CMD_MERGE , TARGET = bam_data['bam'] , SOURCE = bams , DATA = bam_data, DEPS = [ pre_ok ] , USETMP=CMD_MERGE_tmp )
#addCommand('moving' , CMD_MV1 , TARGET = bam_data['bam'] , SOURCE = bam_data['bamrnd1'], DATA = bam_data )
addCommand('indexing' , CMD_INDEX , TARGET = bam_data['index'] , SOURCE = bam_data['bam'] , DATA = bam_data )
addCommand('coverage' , CMD_COV , TARGET = bam_data['cov'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ] )
addCommand('statistics' , CMD_STAT , TARGET = bam_data['stat'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ] )
addCommand('flag stats' , CMD_FLAG , TARGET = bam_data['flag'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ] )
addCommand('indexing' , CMD_INDEX , TARGET = bam_data['index'] , SOURCE = bam_data['bam'] , DATA = bam_data )
addCommand('coverage' , CMD_COV , TARGET = bam_data['cov'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ], USETMP=True )
addCommand('statistics' , CMD_STAT , TARGET = bam_data['stat'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ], USETMP=True )
addCommand('flag stats' , CMD_FLAG , TARGET = bam_data['flag'] , SOURCE = bam_data['bam'] , DATA = bam_data, DEPS = [ bam_data['index'] ], USETMP=True )
bdeps = [ bam_data['index'], bam_data['cov'], bam_data['stat'], bam_data['flag'] ]
addCommand('bam ok' , CMD_BOK , TARGET = bam_data['bok'] , SOURCE = bdeps )
......@@ -931,10 +938,10 @@ def run_bwa(data, setup):
#addCommand('moving' , CMD_MV2 , TARGET = bam_data['filter'] , SOURCE = bam_data['bamrnd2'], DATA = bam_data )
addCommand('Findexing' , CMD_FINDEX, TARGET = bam_data['findex'] , SOURCE = bam_data['filter'] , DATA = bam_data )
addCommand('Fcoverage' , CMD_FCOV , TARGET = bam_data['fcov'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ] )
addCommand('Fstatistics' , CMD_FSTAT , TARGET = bam_data['fstat'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ] )
addCommand('Fflag stats' , CMD_FFLAG , TARGET = bam_data['fflag'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ] )
addCommand('Findexing' , CMD_FINDEX, TARGET = bam_data['findex'] , SOURCE = bam_data['filter'] , DATA = bam_data )
addCommand('Fcoverage' , CMD_FCOV , TARGET = bam_data['fcov'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ], USETMP=True )
addCommand('Fstatistics' , CMD_FSTAT , TARGET = bam_data['fstat'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ], USETMP=True )
addCommand('Fflag stats' , CMD_FFLAG , TARGET = bam_data['fflag'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['findex'] ], USETMP=True )
addCommand('Fok' , CMD_FOK , TARGET = bam_data['fok'] , SOURCE = [
bam_data['findex' ],
bam_data['fcov' ],
......
#group name platform library name library size library size min library size max file names
120524_I238_FCD1174ACXX_L2_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz
120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz
120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz
120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/058/illumina/pairedend_500/120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/058/illumina/pairedend_500/120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46_2.fq.gz.clean.dup.clean.gz
120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz
120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz
120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz
120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz
20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/arcanum/filtered/454/20000/20000.dedup.sff
8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/arcanum/filtered/454/8000/8000.dedup.sff
../sa1/sa.csv
\ No newline at end of file
../../allpaths_lg_pennellii_raw/sp/data/run/ASSEMBLIES/test2/final.assembly.fasta
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment