diff --git a/.gitignore b/.gitignore index 87b9045e5ea387a3bbff850d38b2908e17066524..968fe90e6bdcbccf2a94b29a262eef1ee6cec906 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,9 @@ *.png *.html *.out +*/db +*/in +*/output +*/map +*.fa +scons/ diff --git a/all3.csv b/all3.csv index ac83f39e0192a82c28e0e0be366862504219a012..f414e6d8f6155d128d4511c7283c86a98077575c 100644 --- a/all3.csv +++ b/all3.csv @@ -1,16 +1,16 @@ #in fasta in csv -#sa1/sa.fa sa1/sa.csv -#sa2/sa.fa sa2/sa.csv -sa3/sa.fa sa3/sa.csv +sa1/sa.fa sa1/sa.csv +sa2/sa.fa sa2/sa.csv -#sh1/sh.fa sh1/sh.csv -#sh2/sh.fa sh2/sh.csv -sh3/sh.fa sh3/sh.csv +sh1/sh.fa sh1/sh.csv +sh2/sh.fa sh2/sh.csv -#sl1/sl.fa sl1/sl.csv -sl2/sl.fa sl2/sl.csv +sl1/sl.fa sl1/sl.csv -#sp1/sp.fa sp1/sp.csv -#sp2/sp.fa sp2/sp.csv +sp1/sp.fa sp1/sp.csv +sp2/sp.fa sp2/sp.csv sp3/sp.fa sp3/sp.csv +heinz/heinz.fa heinz/heinz.csv +pimp/heinz.fa pimp/pimp.csv + diff --git a/heinz/heinz.csv b/heinz/heinz.csv new file mode 100644 index 0000000000000000000000000000000000000000..d3da803946c908f494ce5c3fefaf9e3c308a53ba --- /dev/null +++ b/heinz/heinz.csv @@ -0,0 +1,24 @@ +#group name platform library name library size library size min library size max file names +30507AAXX_6 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_6_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_6_f.fastq +30507AAXX_7 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_7_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_7_f.fastq +30507AAXX_8 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_8_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/30507AAXX_8_f.fastq +314TYAAXX_1 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_1_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_1_r.fastq +314TYAAXX_2 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_2_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_2_r.fastq +314TYAAXX_3 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_3_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_3_r.fastq +314TYAAXX_4 illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_4_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_250/314TYAAXX_4_r.fastq +30507AAXX_1 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_1_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_1_f.fastq +30507AAXX_2 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_2_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_2_f.fastq +30507AAXX_3 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_3_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_3_f.fastq +30507AAXX_4 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_4_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/30507AAXX_4_f.fastq +314TYAAXX_6 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_6_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_6_r.fastq +314TYAAXX_7 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_7_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_7_r.fastq +314TYAAXX_8 illumina pe 215 193 237 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_8_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_300/314TYAAXX_8_r.fastq +#sample_R illumina pe 500 450 550 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_500/sample_R1_001.64.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/Heinz_Illumina_pairedend_500/sample_R2_001.64.fastq +#3018DAAXX_1 illumina mp 1640 1440 1840 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_2000/3018DAAXX_1_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_2000/3018DAAXX_1_f.fastq +#3018DAAXX_2 illumina mp 2430 2180 2680 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_3000/3018DAAXX_2_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_3000/3018DAAXX_2_r.fastq +#3018DAAXX_3 illumina mp 2850 2550 3150 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_4000/3018DAAXX_3_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_4000/3018DAAXX_3_f.fastq +#3018DAAXX_4 illumina mp 3000 2500 3500 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_5000/3018DAAXX_4_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_5000/3018DAAXX_4_f.fastq +#30THBAAXX_1 illumina mp 1640 1440 1840 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_2000/30THBAAXX_1_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_2000/30THBAAXX_1_r.fastq +#30THBAAXX_2 illumina mp 2430 2180 2680 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_3000/30THBAAXX_2_r.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_3000/30THBAAXX_2_f.fastq +#30THBAAXX_3 illumina mp 2850 2550 3150 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_4000/30THBAAXX_3_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_4000/30THBAAXX_3_r.fastq +#30THBAAXX_4 illumina mp 3000 2500 3500 /home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_5000/30THBAAXX_4_f.fastq;/home/assembly/tomato150/dev150/assemblies/mapping/heinz/in/MP_5000/30THBAAXX_4_r.fastq diff --git a/heinz/mean_coverage.pl b/heinz/mean_coverage.pl new file mode 100644 index 0000000000000000000000000000000000000000..c6ff42ebc4aacdcde1b56f87efc0932c2384b919 --- /dev/null +++ b/heinz/mean_coverage.pl @@ -0,0 +1,30 @@ +($num,$den,$min,$max,$av,$var)=(0,0,10000000,0,0,0); +while ($cov=<STDIN>) { + if ( $cov >= 1 ) { + $num += $cov; + if ( $cov > $max ) { $max = $cov; }; + if ( $cov < $min ) { $min = $cov; }; + if ( $den == 0 ) { $av=$cov; $var=0; } + else { + $av = (($av*$den)+$cov)/($den+1); + $var += ($cov-$av)**2; + $dev = sqrt($var/$den); + if ( $den % 10000 == 0 ) { + print "$den\t$av\t$var\t$dev\n"; + } + }; + $den++; + } +} +$cov=$num/$den; +$dev=sqrt($var/$den); +$mx=$cov+(2*$dev); +print "Length = $den\n"; +print "Bases = $num\n"; +print "Mean Coverage = $cov\n"; +print "Min Coverage = $min\n"; +print "Max Coverage = $max\n"; +print "Var Coverage = $var\n"; +print "Dev Coverage = $dev\n"; +print "MX Coverage = $mx\n"; + diff --git a/heinz/run.sh b/heinz/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..632e186364f892e182d4fb9114fffa849c569f73 --- /dev/null +++ b/heinz/run.sh @@ -0,0 +1 @@ +./SConstruct.run --incsv=all3.csv --threads=10 -j 5 heinz diff --git a/heinz/snpcalling.txt b/heinz/snpcalling.txt new file mode 100644 index 0000000000000000000000000000000000000000..62b2b91f2a0eecd2679c28c6e4dab0d0b725aea1 --- /dev/null +++ b/heinz/snpcalling.txt @@ -0,0 +1,121 @@ +#http://samtools.sourceforge.net/mpileup.shtml + +For variant calling by Samtools, we used following commands. + +samtools calmd | samtools mpileup | bcftools view: to create raw variant bcf files +Options "-AEur" used for calmd. +generates extra tags (MD tags) with high sensitivity (-Ar), capping it (I dont know what it means) and exporting as uncompresse (-u) bam. + +Options "-d 99999 -L 99999 -Dugf" were used for mpileup. +For each position read at most 99999 reads (-d), dont skip indels (-L 99999), output per sample read depth (-D), export as uncompressed BCF (-ug) + +Options "-bvcg" were used for view. + output as BCF (-b) showing only variant sites (-vc) for each sample (-g) + + +vcfutils.pl varFilter: to filter and create variant vcf files +Options "-W 5 -Q 20 -d 4 -D xxx" were used. Upper depth (-D) of each sample was set based on depth distribution of the sample. +Filter adjacent gaps with a window size of 5 (-W) minimum RMS quality of 20 (-Q, i dont know what it means), minimum depth of 4 (-d) and a custom maximum depth depending on the actual depth (i guess average) + +/usr/share/samtools/vcfutils.pl + + + + +INBAM1=output/heinz.fa.bam.filtered.bam +RUNNAME=output/heinz.fa.bam.filtered.bam +REF=heinz.fa + + +samtools rmdup $INBAM1 $RUNNAME.dedup.bam +samtools calmd -AEur $RUNNAME.dedup.bam $REF | samtools mpileup -d 99999 -L 99999 -Dugf $REF - | bcftools view -bvcg - > $RUNNAME.dedup.bam.bcf +bcftools view $RUNNAME.dedup.bam.bcf | /usr/share/samtools/vcfutils.pl varFilter -W 5 -Q 20 -d 4 -D100 > $RUNNAME.dedup.bam.bcf.vcf + + +samtools mpileup $RUNNAME.dedup.bam | awk '{print $4}' | perl mean_coverage.pl +cat output/heinz.fa.bam.filtered.bam.dedup.bam.bcf.vcf | grep -v \# | cut -f 8 | cut -d ';' -f 1 | grep -v INDEL | cut -d '=' -f 2 | perl -ne 'BEGIN{$v=0;$c=0;$min=10000000;$max=0;$av=0;$var=0;} END{ $a=$v/$c; $dev=sqrt($var/$c); $mx=$a+(2*$dev); print "$c\t$v\t$a\t$min\t$max\t$av\t$var\t$dev\t$mx\n"; } $c+=1; $v+=$_; if ($_ < $min) { $min = $_; }; if ($_ > $max) { $max = $_; }; if ($c==1) { $av = $_; $var=0; } else { $av=(($av*$c)+$_)/($c+1); $var+=($_-$av)**2; $dev=sqrt($var/$c); print "$av\t$var\t$dev\n";}' + +heinz dedup : BAM : Length = 725,678,368 +heinz dedup : BAM : Bases = 15,270,968,181 +heinz dedup : BAM : Mean Coverage = 21.04 +heinz dedup : BAM : Min Coverage = 1 +heinz dedup : BAM : Max Coverage = 8,010 +heinz dedup : BAM : Var Coverage = 934,577,172,304.24 +heinz dedup : BAM : Dev Coverage = 35.89 +heinz dedup : BAM : MX Coverage = 92.82 + +heinz dedup : VCF : POSITION = 370,742 +heinz dedup : VCF : SUM COV = 7,425,739 +heinz dedup : VCF : AVG COV = 20.03 +heinz dedup : VCF : MIN COV = 4 +heinz dedup : VCF : MAX COV = 100 +heinz dedup : VCF : VAR COV = 75,974,225.29 +heinz dedup : VCF : STDDEV COV = 14.31 +heinz dedup : VCF : 2DEV COV = 48.66 + +RF_001 : BAM : Length = 733,900,334 +RF_001 : BAM : Bases = 28,464,215,562 +RF_001 : BAM : Mean Coverage = 38.78 +RF_001 : BAM : Min Coverage = 1 +RF_001 : BAM : Max Coverage = 7,861 +RF_001 : BAM : Var Coverage = 1,355,682,620,730.03 +RF_001 : BAM : Dev Coverage = 42.98 +RF_001 : BAM : MX Coverage = 124.74 + +RF_001 : VCF : POSITION = 237,532 +RF_001 : VCF : SUM COV = 10,093,075 +RF_001 : VCF : AVG COV = 42.49 +RF_001 : VCF : MIN COV = 4 +RF_001 : VCF : MAX COV = 105 +RF_001 : VCF : VAR COV = 97,797,311.67 +RF_001 : VCF : STDDEV COV = 20.29 +RF_001 : VCF : 2DEV COV = 82.58 + + +samtools mpileup ~/tomato150/Disk02_bam_ass_nobackup/reseq/mapped/Heinz/RF_001_SZAXPI008746-45.bam | awk '{print $4}' | perl mean_coverage.pl | tee RF_001.bam.cov_stats +gunzip -c ~/tomato150/programs/snpeff/newest/datain/RF_001_SZAXPI008746-45.vcf.gz | grep -v \# | cut -f 8 | cut -d ';' -f 1 | grep -v INDEL | cut -d '=' -f 2 | perl -ne 'BEGIN{$v=0;$c=0;$min=10000000;$max=0;$av=0;$var=0;} END{ $a=$v/$c; $dev=sqrt($var/$c); $mx=$a+(2*$dev); print "$c\t$v\t$a\t$min\t$max\t$av\t$var\t$dev\t$mx\n"; } $c+=1; $v+=$_; if ($_ < $min) { $min = $_; }; if ($_ > $max) { $max = $_; }; if ($c==1) { $av = $_; $var=0; } else { $av=(($av*$c)+$_)/($c+1); $var+=($_-$av)**2; $dev=sqrt($var/$c); print "$av\t$var\t$dev\n";}' + + + +wc -l *.vcf + 410.804 heinz.fa.bam.filtered.bam.dedup.bam.bcf.vcf + 200.015 heinz.fa.bam.filtered.bam.dedup.bam.bcf.vcf.qual20.vcf + 610.819 total + + + +========================================================================================= +($num,$den,$min,$max,$av,$var)=(0,0,10000000,0,0,0); +while ($cov=<STDIN>) { + if ( $cov >= 1 ) { + $c += 1; + $num += $cov; + if ( $cov > $max ) { $max = $cov; }; + if ( $cov < $min ) { $min = $cov; }; + if ( $c == 0 ) { $av=$cov; $var=0; } + else { + $av = (($av*$den)+$cov)/($den+1); + $var += ($cov-$av)**2; + $dev = sqrt($var/$den); + if ( $den % 10000 == 0 ) { + print "$den\t$av\t$var\t$dev\n"; + } + }; + $den++; + } +} +$cov=$num/$den; +$dev=sqrt($var/$c); +$mx=$cov+(2*$dev); +print "Length = $den\n"; +print "Bases = $num\n"; +print "Mean Coverage = $cov\n"; +print "Min Coverage = $min\n"; +print "Max Coverage = $max\n"; +print "Var Coverage = $var\n"; +print "Dev Coverage = $dev\n"; +print "MX Coverage = $mx\n"; + + + + diff --git a/pimp/in/mkreport.sh b/pimp/in/mkreport.sh new file mode 100755 index 0000000000000000000000000000000000000000..6f24d1f5974bd8c049fe46397a762223361ba5a5 --- /dev/null +++ b/pimp/in/mkreport.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +set -xeu + +WID=800 +HEI=600 +NUM=9 + + +F1=$1 +F2=$2 + + +if [[ ! -f "white.png" ]]; then + convert -size ${WID}x${HEI} xc:white white.png +fi + +function do_report { + reportfolderOut=$1 + reportfolder=${reportfolderOut}_fastqc + + if [[ ! -f "$reportfolder/Images/kmer_profiles.png" ]]; then + #kmer_profiles.png + cp white.png $reportfolder/Images/kmer_profiles.png + fi + + images=`find $reportfolder/Images -type f -name '*.png' | sort | tr '\n' ' '` + #echo " IMAGES $images" + IMC=`find $reportfolder/Images -type f -name '*.png' | wc -l` + + if [[ "$IMC" -ne "$NUM" ]]; then + echo "wrong number of images $IMC (should be $NUM)" + exit 1 + fi + + if [[ -f "$reportfolderOut.png" ]]; then + #rm $reportfolder.png + echo -n + fi + + WIDT=$(($WID*1 )) + HEIT=$(($HEI*$NUM)) + CMD="montage -verbose -label '%f' -font Helvetica -pointsize 14 -background 'white' -fill 'black' -define png:size=${WIDT}x${HEIT} -geometry ${WID}x${HEI}+2+2 -tile 1x$NUM -auto-orient $images $reportfolderOut.png" + echo $CMD + eval $CMD +} + + +N1=${F1%%.*} +N2=${F2%%.*} + +O1=$N1.png +O2=$N2.png + +do_report $N1 +do_report $N2 + + +LCN=`python -c "import os,sys; print os.path.commonprefix([sys.argv[1], sys.argv[2]])" $N1 $N2`.png +LCN=`basename $LCN` +WIDT=$(($WID*2 )) +HEIT=$(($HEI*$NUM)) + +CMD="montage -verbose -label '%f' -font Helvetica -pointsize 14 -background 'white' -fill 'black' -define png:size=${WIDT}x${HEIT} -geometry ${WID}x${HEIT}+2+2 -tile 2x1 -auto-orient $O1 $O2 $LCN" +echo " LCN $LCN" +echo " width $WID height $HEI num $NUM images 2" +echo " width total $WIDT height total $HEIT" +#echo " CMD $CMD" +eval $CMD diff --git a/pimp/mean_coverage.pl b/pimp/mean_coverage.pl new file mode 120000 index 0000000000000000000000000000000000000000..306ab2e243556f446bcd2b0a34996722ccddcc1c --- /dev/null +++ b/pimp/mean_coverage.pl @@ -0,0 +1 @@ +../heinz/mean_coverage.pl \ No newline at end of file diff --git a/pimp/pimp.csv b/pimp/pimp.csv new file mode 100644 index 0000000000000000000000000000000000000000..2629fb6e1709e889b4e3786099a0959ade58c7b5 --- /dev/null +++ b/pimp/pimp.csv @@ -0,0 +1,2 @@ +#group name platform library name library size library size min library size max file names +C3VVEACXX illumina pe 160 144 176 /home/assembly/tomato150/dev150/assemblies/mapping/pimp/in/pimpinellifolium_NoIndex_L003_R1_001.fastq.gz;/home/assembly/tomato150/dev150/assemblies/mapping/pimp/in/pimpinellifolium_NoIndex_L003_R2_001.fastq.gz diff --git a/pimp/run.sh b/pimp/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..f03253ff53356b88a0a4838bfb0447b61afd6c4d --- /dev/null +++ b/pimp/run.sh @@ -0,0 +1 @@ +./SConstruct.run --incsv=all3.csv --threads=10 -j 5 pimp diff --git a/pimp/snp.sh b/pimp/snp.sh new file mode 100644 index 0000000000000000000000000000000000000000..38e0cca934a5e589fc2cd2963ec5528d1109eb82 --- /dev/null +++ b/pimp/snp.sh @@ -0,0 +1,27 @@ +INBAM1=/home/assembly/tomato150/dev150/assemblies/mapping/pimp/output/C3VVEACXX.bam.filtered.bam +RUNNAME=/home/assembly/tomato150/dev150/assemblies/mapping/pimp/output/C3VVEACXX.bam.filtered.bam +REF=heinz.fa + + +samtools rmdup $INBAM1 $RUNNAME.dedup.bam +samtools calmd -AEur $RUNNAME.dedup.bam $REF | samtools mpileup -d 99999 -L 99999 -Dugf $REF - | bcftools view -bvcg - > $RUNNAME.dedup.bam.bcf +bcftools view $RUNNAME.dedup.bam.bcf | /usr/share/samtools/vcfutils.pl varFilter -W 5 -Q 20 -d 4 -D100 > $RUNNAME.dedup.bam.bcf.vcf + + +samtools mpileup $RUNNAME.dedup.bam | awk '{print $4}' | perl mean_coverage.pl + +cat $RUNNAME.dedup.bam.bcf.vcf | \ + grep -v \# | \ + cut -f 8 | \ + cut -d ';' -f 1 | \ + grep -v INDEL | \ + cut -d '=' -f 2 | \ + perl -ne 'BEGIN{$v=0;$c=0;$min=10000000;$max=0;$av=0;$var=0;} END{ $a=$v/$c; $dev=sqrt($var/$c); $mx=$a+(2*$dev); print "$c\t$v\t$a\t$min\t$max\t$av\t$var\t$dev\t$mx\n"; } $c+=1; $v+=$_; if ($_ < $min) { $min = $_; }; if ($_ > $max) { $max = $_; }; if ($c==1) { $av = $_; $var=0; } else { $av=(($av*$c)+$_)/($c+1); $var+=($_-$av)**2; $dev=sqrt($var/$c); print "$av\t$var\t$dev\n";}' | \ + tee $RUNNAME.dedup.bam.bcf.vcf.stats + + +samtools index $RUNNAME.dedup.bam +samtools depth $RUNNAME.dedup.bam > $RUNNAME.dedup.bam.cov +samtools idxstats $RUNNAME.dedup.bam > $RUNNAME.dedup.bam.idxflags +samtools flagstat $RUNNAME.dedup.bam > $RUNNAME.dedup.bam.stats + diff --git a/pimp/snpcalling.txt b/pimp/snpcalling.txt new file mode 120000 index 0000000000000000000000000000000000000000..a25eb1917e49b89b44f184d54c41146a781a0c30 --- /dev/null +++ b/pimp/snpcalling.txt @@ -0,0 +1 @@ +../heinz/snpcalling.txt \ No newline at end of file diff --git a/run2.py b/run2.py index 302f37726aef70422dbccfb229a4315a90187fb1..08dd184d68f901f032ca116b62d2031631357ea0 100755 --- a/run2.py +++ b/run2.py @@ -9,6 +9,8 @@ import glob import atexit from pprint import pprint as pp +#./SConstruct.runReal --incsv=all3.csv --threads=20 -j 5 all + dry_run = False #dry_run = True @@ -255,7 +257,7 @@ def addCommand(name, cmd, TARGET=None, SOURCE=[], DATA={}, DEPS=[], REQS=[], PRE valStr = " ".join([str(x) for x in val]) valindex = data.index(val) data[valindex] = valStr - print " CONVERTING %s FROM %s TO %s" % (key, val, valStr) + print " CONVERTING FROM %s TO %s" % (val, valStr) #print "checking target" @@ -462,13 +464,13 @@ def main(): os.makedirs(out_folder) projsetup = { - 'proj_name' : proj_name, - 'in_fasta' : in_fasta, - 'in_csv' : in_csv, - 'base_folder': base_folder, - 'db_folder' : db_folder, - 'map_folder' : map_folder, - 'out_folder' : out_folder + 'proj_name' : proj_name, + 'in_fasta' : in_fasta, + 'in_csv' : in_csv, + 'base_folder': base_folder, + 'db_folder' : db_folder, + 'map_folder' : map_folder, + 'out_folder' : out_folder } pp(projsetup) setup.append(projsetup) @@ -488,8 +490,8 @@ def main(): print "target %s: %s" % (projsetup['proj_name'].upper(), projsetup['outfile']) print "\n\n\n" -def project( setup ): +def project( setup ): datas = {} ########################## @@ -608,10 +610,10 @@ def process454(data, setup): out_fastq = out_prefix + '.fastq' + out_fastq_0 = out_fastq.replace('.fastq', '_0.fastq') out_fastq_f = out_fastq.replace('.fastq', '_1.fastq') out_fastq_r = out_fastq.replace('.fastq', '_2.fastq') out_fastq_n = out_fastq.replace('.fastq', '_n.fastq') - out_fastq_0 = out_fastq.replace('.fastq', '_0.fastq') data[comm_name]['file_names' ] = [out_fastq_f, out_fastq_r] data[comm_name]['file_names_454' ] = infile @@ -758,16 +760,22 @@ def run_bwa(data, setup): CMD_MAPPER1 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq1cmd)s > %(TARGET)s" CMD_MAPPER2 = "%(bwa_exe)s aln -t %(threads)d %(db_name)s %(fastq2cmd)s > %(TARGET)s" - CMD_ALIGNER = "%(bwa_exe)s sampe -o 20 -P -f %(TARGET)s " + CMD_ALIGNER = "%(bwa_exe)s sampe -o 20 -P " + #CMD_ALIGNER = "%(bwa_exe)s sampe -o 20 -P -f %(TARGET)s " + if 'gsize' in info: CMD_ALIGNER += " -a %(gmax)s " CMD_ALIGNER += "%(db_name)s %(sai1)s %(sai2)s %(fastq1cmd)s %(fastq2cmd)s" - CMD_CONVERT = "%(samtools_exe)s view -S -b -q 30 -1 %(sam)s > %(TARGET)s" # TARGET = bam_tmp + CMD_ALIGNER += " | %(samtools_exe)s view -Su -b -q 30 -1 -" + #CMD_CONVERT = "%(samtools_exe)s view -S -b -q 30 -1 %(sam)s > %(TARGET)s" # TARGET = bam_tmp #CMD_MV1 = "mv %(bam_tmp_rnd)s %(bam_tmp)s" - CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -o -l 1 -@ 20 -m 2G %(bam_tmp)s %(TARGET)s > %(TARGET)s" + + CMD_ALIGNER += " | %(samtools_exe)s sort -m 53687091200 -o -l 1 -@ 20 -m 2G - %(TARGET)s > %(TARGET)s" + #CMD_SORT = "%(samtools_exe)s sort -m 53687091200 -o -l 1 -@ 20 -m 2G %(bam_tmp)s %(TARGET)s > %(TARGET)s" #samtools sort -m 53687091200 -o -l 1 -@ 20 -m 2G 136.bam /run/shm/bwa_scons_mapping_13117_J9DFw5 > /run/shm/bwa_scons_mapping_13117_J9DFw5 #CMD_MV2 = "mv %(bam_pfx_rnd_bam)s %(bam)s" + CMD_INDEX = '%(samtools_exe)s index %(bam)s' CMD_COV = '%(samtools_exe)s depth %(bam)s > %(TARGET)s' CMD_STATS = '%(samtools_exe)s idxstats %(bam)s > %(TARGET)s' @@ -777,19 +785,21 @@ def run_bwa(data, setup): addCommand('map' , CMD_MAPPER1, TARGET = info['sai1'] , SOURCE = info['fastq1'] , DATA = info, REQS = [ info['bwt_index'] ] , USETMP=True ) addCommand('map' , CMD_MAPPER2, TARGET = info['sai2'] , SOURCE = info['fastq2'] , DATA = info, REQS = [ info['bwt_index'] ] , USETMP=True ) - addCommand('align' , CMD_ALIGNER, TARGET = info['sam'] , SOURCE = [ info['fastq1'], info['fastq2'] ] , DATA = info, DEPS = [ bwt_index, info['sai1'], info['sai2'] ], USETMP=True ) + #addCommand('align' , CMD_ALIGNER, TARGET = info['sam'] , SOURCE = [ info['fastq1'], info['fastq2'] ] , DATA = info, DEPS = [ bwt_index, info['sai1'], info['sai2'] ], USETMP=True ) + addCommand('align' , CMD_ALIGNER, TARGET = info['bam'] , SOURCE = [ info['fastq1'], info['fastq2'] ] , DATA = info, DEPS = [ bwt_index, info['sai1'], info['sai2'] ], USETMP=True ) + CMD_CONVERT_tmp = False if (not os.path.exists(info['bam'])) or ( os.path.getsize(info['bam']) == 0 ): CMD_CONVERT_tmp = True - addCommand('convert' , CMD_CONVERT, TARGET = info['bam_tmp'] , SOURCE = info['sam' ] , DATA = info , USETMP=True ) + #addCommand('convert' , CMD_CONVERT, TARGET = info['bam_tmp'] , SOURCE = info['sam' ] , DATA = info , USETMP=True ) #addCommand('moving' , CMD_MV1 , TARGET = info['bam_tmp' ] , SOURCE = info['bam_tmp_rnd'] , DATA = info) - CMD_SORT_tmp = False - if (not os.path.exists(info['bam'])) or ( os.path.getsize(info['bam']) == 0 ): - CMD_SORT_tmp = True - addCommand('sort' , CMD_SORT , TARGET = info['bam'] , SOURCE = info['bam_tmp' ] , DATA = info , USETMP=True) + #CMD_SORT_tmp = False + #if (not os.path.exists(info['bam'])) or ( os.path.getsize(info['bam']) == 0 ): + # CMD_SORT_tmp = True + #addCommand('sort' , CMD_SORT , TARGET = info['bam'] , SOURCE = info['bam_tmp' ] , DATA = info , USETMP=True) #addCommand('moving' , move , TARGET = info['bam'] , SOURCE = info['bam_pfx_rnd_bam'] , DATA = info) addCommand('index' , CMD_INDEX , TARGET = info['bai'] , SOURCE = info['bam'] , DATA = info ) @@ -825,29 +835,30 @@ def run_bwa(data, setup): addCommand('preoking' , CMD_PRE_OK , TARGET = pre_ok , SOURCE = oks, DATA = data ) - ########################## - ## IF NOT TO MERGE, STOPS HERE - ########################## - setup['data'] = data - if not do_merge: - print "FINISHED", setup['proj_name'] - env.Alias( setup['proj_name'], pre_ok ) - setup['bamdata'] = "" - setup['outfile'] = pre_ok - return [ pre_ok ] + RND_NAME1 = None + RND_NAME2 = None + bam_tmp = '%s.tmp.bam' % bam_name + bam_sht = '%s' % bam_name + bam_out = '%s.bam' % bam_name + if ( not do_merge ) or ( len(bams) == 1 ): + bam_name = bams[0] + RND_NAME1 = "" + RND_NAME2 = "" + bam_tmp = '%s.tmp.bam' % bam_name + bam_sht = '%s' % bam_name + bam_out = '%s' % bam_name + + else: + RND_NAME1 = tempfile.mktemp(dir=TMP_FOLDER, prefix='bwa_', suffix='.bam') + RND_NAME2 = tempfile.mktemp(dir=TMP_FOLDER, prefix='bwa_', suffix='.bam') + bam_tmp = '%s.tmp.bam' % bam_name + bam_sht = '%s' % bam_name + bam_out = '%s.bam' % bam_name - ########################## - ## MERGE BAMS - ########################## - bam_tmp = '%s.tmp.bam' % bam_name - bam_sht = '%s' % bam_name - bam_out = '%s.bam' % bam_name - RND_NAME1 = tempfile.mktemp(dir=TMP_FOLDER, prefix='bwa_', suffix='.bam') - RND_NAME2 = tempfile.mktemp(dir=TMP_FOLDER, prefix='bwa_', suffix='.bam') bam_data = { 'samtools_exe': SAMTOOLS_EXE, @@ -864,6 +875,7 @@ def run_bwa(data, setup): 'stat' : bam_out + '.stats', 'bok' : bam_out + '.ok', 'flag' : bam_out + '.flagstats', + 'filter' : bam_out + '.filtered.bam', #'fix' : bam_out + '.filtered.bam.fixmate.bam', #'dedup' : bam_out + '.filtered.bam.dedup.bam', @@ -872,6 +884,16 @@ def run_bwa(data, setup): 'fstat' : bam_out + '.filtered.bam.stats', 'fflag' : bam_out + '.filtered.bam.flagstats', 'fok' : bam_out + '.filtered.bam.ok', + + 'dedup' : bam_out + '.filtered.bam.dedup.bam', + #'fix' : bam_out + '.filtered.bam.fixmate.bam', + #'dedup' : bam_out + '.filtered.bam.dedup.bam', + 'dindex' : bam_out + '.filtered.bam.dedup.bam.bai', + 'dcov' : bam_out + '.filtered.bam.dedup.bam.cov', + 'dstat' : bam_out + '.filtered.bam.dedup.bam.stats', + 'dflag' : bam_out + '.filtered.bam.dedup.bam.flagstats', + 'dok' : bam_out + '.filtered.bam.dedup.bam.ok', + 'bams' : " ".join(bams) } @@ -896,6 +918,26 @@ def run_bwa(data, setup): + ########################## + ## IF NOT TO MERGE, STOPS HERE + ########################## + setup['data'] = data + if ( not do_merge ) or ( len(bams) == 1 ): + bam_data['bok'] = info['ok'] + + filterbam(bam_data) + + print "FINISHED", setup['proj_name'] + env.Alias( setup['proj_name'], bam_data['dok'] ) + setup['bamdata'] = bam_data + setup['outfile'] = bam_data['dok'] + return [ bam_data['dok'] ] + + + + ########################## + ## MERGE BAMS + ########################## # attatch RG tag (source), min compression CMD_MERGE = '%(samtools_exe)s merge -r -1 %(TARGET)s %(bams)s' #CMD_MV1 = "mv %(bamrnd1)s %(bam)s" @@ -904,15 +946,6 @@ def run_bwa(data, setup): CMD_STAT = '%(samtools_exe)s idxstats %(bam)s > %(TARGET)s' CMD_FLAG = '%(samtools_exe)s flagstat %(bam)s > %(TARGET)s' CMD_BOK = touch - CMD_FILTER = '%(samtools_exe)s view %(sam_filter)s -b %(bam)s > %(TARGET)s' - #CMD_MV2 = 'mv %(bamrnd2)s %(filter)s' - #CMD_FIX = '%(samtools_exe)s fixmate %(filter)s %(fix)s' - #CMD_DEDUP = '%(samtools_exe)s rmdup -S %(filter)s %(dedup)s' - CMD_FINDEX = '%(samtools_exe)s index %(filter)s' - CMD_FCOV = '%(samtools_exe)s depth %(filter)s > %(TARGET)s' - CMD_FSTAT = '%(samtools_exe)s idxstats %(filter)s > %(TARGET)s' - CMD_FFLAG = '%(samtools_exe)s flagstat %(filter)s > %(TARGET)s' - CMD_FOK = touch CMD_MERGE_tmp = False @@ -930,6 +963,26 @@ def run_bwa(data, setup): bdeps = [ bam_data['index'], bam_data['cov'], bam_data['stat'], bam_data['flag'] ] addCommand('bam ok' , CMD_BOK , TARGET = bam_data['bok'] , SOURCE = bdeps ) + filterbam(bam_data) + + if __name__ == 'SCons.Script': + print "FINISHED", setup['proj_name'] + env.Alias( setup['proj_name'], bam_data['dok'] ) + setup['bamdata'] = bam_data + setup['outfile'] = bam_data['dok'] + return [ bam_data['dok'] ] + +def filterbam( bam_data ): + CMD_FILTER = '%(samtools_exe)s view %(sam_filter)s -b %(bam)s > %(TARGET)s' + #CMD_MV2 = 'mv %(bamrnd2)s %(filter)s' + #CMD_FIX = '%(samtools_exe)s fixmate %(filter)s %(fix)s' + #CMD_DEDUP = '%(samtools_exe)s rmdup -S %(filter)s %(dedup)s' + CMD_FINDEX = '%(samtools_exe)s index %(filter)s' + CMD_FCOV = '%(samtools_exe)s depth %(filter)s > %(TARGET)s' + CMD_FSTAT = '%(samtools_exe)s idxstats %(filter)s > %(TARGET)s' + CMD_FFLAG = '%(samtools_exe)s flagstat %(filter)s > %(TARGET)s' + CMD_FOK = touch + CMD_FILTER_tmp = False if ( not os.path.exists( bam_data['filter'] )) or ( os.path.getsize(bam_data['filter']) == 0 ): CMD_FILTER_tmp = True @@ -951,13 +1004,35 @@ def run_bwa(data, setup): ] , DATA = bam_data ) - if __name__ == 'SCons.Script': - print "FINISHED", setup['proj_name'] - env.Alias( setup['proj_name'], bam_data['fok'] ) - setup['bamdata'] = bam_data - setup['outfile'] = bam_data['fok'] - return [ bam_data['fok'] ] + CMD_DEDUP = '%(samtools_exe)s rmdup %(filter)s %(TARGET)s' + #CMD_MV2 = 'mv %(bamrnd2)s %(filter)s' + #CMD_FIX = '%(samtools_exe)s fixmate %(filter)s %(fix)s' + #CMD_DEDUP = '%(samtools_exe)s rmdup -S %(filter)s %(dedup)s' + CMD_DINDEX = '%(samtools_exe)s index %(filter)s' + CMD_DCOV = '%(samtools_exe)s depth %(filter)s > %(TARGET)s' + CMD_DSTAT = '%(samtools_exe)s idxstats %(filter)s > %(TARGET)s' + CMD_DFLAG = '%(samtools_exe)s flagstat %(filter)s > %(TARGET)s' + CMD_DOK = touch + + CMD_DEDUP_tmp = False + if ( not os.path.exists( bam_data['dedup'] )) or ( os.path.getsize(bam_data['dedup']) == 0 ): + CMD_DEDUP_tmp = True + addCommand('deduping' , CMD_DEDUP , TARGET = bam_data['dedup'] , SOURCE = bam_data['filter'] , DATA = bam_data, DEPS = [ bam_data['fok'] ], USETMP=CMD_DEDUP_tmp ) + #addCommand('moving' , CMD_MV2 , TARGET = bam_data['filter'] , SOURCE = bam_data['bamrnd2'], DATA = bam_data ) + + + addCommand('Dindexing' , CMD_DINDEX, TARGET = bam_data['dindex'] , SOURCE = bam_data['dedup'] , DATA = bam_data ) + addCommand('Dcoverage' , CMD_DCOV , TARGET = bam_data['dcov'] , SOURCE = bam_data['dedup'] , DATA = bam_data, DEPS = [ bam_data['dindex'] ], USETMP=True ) + addCommand('Dstatistics' , CMD_DSTAT , TARGET = bam_data['dstat'] , SOURCE = bam_data['dedup'] , DATA = bam_data, DEPS = [ bam_data['dindex'] ], USETMP=True ) + addCommand('Dflag stats' , CMD_DFLAG , TARGET = bam_data['dflag'] , SOURCE = bam_data['dedup'] , DATA = bam_data, DEPS = [ bam_data['dindex'] ], USETMP=True ) + addCommand('Dok' , CMD_DOK , TARGET = bam_data['dok'] , SOURCE = [ + bam_data['dindex' ], + bam_data['dcov' ], + bam_data['dstat' ], + bam_data['dflag' ], + bam_data['fok'] + ] , DATA = bam_data ) def bf_to_str(bf): diff --git a/sa1/sa.csv b/sa1/sa.csv index d23dc9ff3277dfd259c8e8aa7eb3866b69f72d36..295472f899632462202552d2a9264cc5915cd2c6 100644 --- a/sa1/sa.csv +++ b/sa1/sa.csv @@ -1,12 +1,12 @@ #group name platform library name library size library size min library size max file names -120524_I238_FCD1174ACXX_L2_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz -120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz -120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz +120524_I238_FCD1174ACXX_L2_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L2_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz +120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120527_I247_FCC0U0NACXX_L6_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz +120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133 illumina pe 170 153 187 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009264-133_2.fq.gz.clean.dup.clean.gz 120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/058/illumina/pairedend_500/120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/058/illumina/pairedend_500/120530_I649_FCC0U6WACXX_L4_SZAXPI009359-46_2.fq.gz.clean.dup.clean.gz 120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz 120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120530_I235_FCC0U06ACXX_L1_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz -120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz -120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz +120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L7_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz +120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/arcanum/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L5_TOMxitDGJDWAAPEI-93_2.fq.gz.clean.dup.clean.gz 20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/arcanum/filtered/454/20000/20000.dedup.sff 8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/arcanum/filtered/454/8000/8000.dedup.sff diff --git a/sa1/sa.groups.csv b/sa1/sa.groups.csv deleted file mode 120000 index 44be302312b82f8bb30236e770e6e20e7f9b4510..0000000000000000000000000000000000000000 --- a/sa1/sa.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_arcanum_raw/in_groups2.csv \ No newline at end of file diff --git a/sa1/sa.libs.csv b/sa1/sa.libs.csv deleted file mode 120000 index dba8698185d5c22f5a8a61bb55f0a0520ffa7484..0000000000000000000000000000000000000000 --- a/sa1/sa.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_arcanum_raw/in_libs2.csv \ No newline at end of file diff --git a/sa2/run.py b/sa2/run.py deleted file mode 120000 index b2af2326060ab4d54b535a1beba01ad0228427e8..0000000000000000000000000000000000000000 --- a/sa2/run.py +++ /dev/null @@ -1 +0,0 @@ -../run.py \ No newline at end of file diff --git a/sa2/sa.fa b/sa2/sa.fa index 756e5ce87476ea222287411c43b5e4301925990f..28265c306598cf333d80f3fb2b370cbc42191502 120000 --- a/sa2/sa.fa +++ b/sa2/sa.fa @@ -1 +1 @@ -../../allpaths_lg_sample_arcanum_raw/sa/data/run/ASSEMBLIES/test2/final.assembly.fasta \ No newline at end of file +../../allpaths_lg_sample_arcanum_raw/sa/data/run/ASSEMBLIES/test2/final.contigs.fasta \ No newline at end of file diff --git a/sa2/sa.groups.csv b/sa2/sa.groups.csv deleted file mode 120000 index 44be302312b82f8bb30236e770e6e20e7f9b4510..0000000000000000000000000000000000000000 --- a/sa2/sa.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_arcanum_raw/in_groups2.csv \ No newline at end of file diff --git a/sa2/sa.libs.csv b/sa2/sa.libs.csv deleted file mode 120000 index dba8698185d5c22f5a8a61bb55f0a0520ffa7484..0000000000000000000000000000000000000000 --- a/sa2/sa.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_arcanum_raw/in_libs2.csv \ No newline at end of file diff --git a/sh1/sh.csv b/sh1/sh.csv index 81a3c10ec628a2f15890e720c4b971721b173cd9..f3e73d4583abeaf853f757b3053c316e88aac2a3 100644 --- a/sh1/sh.csv +++ b/sh1/sh.csv @@ -2,10 +2,10 @@ 120524_I238_FCD1174ACXX_L3_SZAXPI009265-136 illumina pe 170 153 187 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L3_SZAXPI009265-136_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L3_SZAXPI009265-136_2.fq.gz.clean.dup.clean.gz 120530_I235_FCC0U06ACXX_L8_SZAXPI009265-136 illumina pe 170 153 187 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009265-136_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L8_SZAXPI009265-136_2.fq.gz.clean.dup.clean.gz 120530_I235_FCD117YACXX_L7_SZAXPI009265-136 illumina pe 170 153 187 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L7_SZAXPI009265-136_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L7_SZAXPI009265-136_2.fq.gz.clean.dup.clean.gz -120512_I238_FCD115HACXX_L1_SZAXPI008752-75 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120512_I238_FCD115HACXX_L1_SZAXPI008752-75_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120512_I238_FCD115HACXX_L1_SZAXPI008752-75_1.fq.gz.clean.dup.clean.gz -120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75_1.fq.gz.clean.dup.clean.gz +120512_I238_FCD115HACXX_L1_SZAXPI008752-75 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120512_I238_FCD115HACXX_L1_SZAXPI008752-75_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120512_I238_FCD115HACXX_L1_SZAXPI008752-75_2.fq.gz.clean.dup.clean.gz +120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/072/illumina/pairedend_500/120613_I246_FCC0WM1ACXX_L7_SZAXPI008752-75_2.fq.gz.clean.dup.clean.gz 120524_I238_FCD1174ACXX_L7_TOMxitDGKDWAAPEI-90 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L7_TOMxitDGKDWAAPEI-90_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L7_TOMxitDGKDWAAPEI-90_2.fq.gz.clean.dup.clean.gz 120527_I247_FCC0U0NACXX_L7_TOMxitDGKDWAAPEI-90 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGKDWAAPEI-90_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L7_TOMxitDGKDWAAPEI-90_2.fq.gz.clean.dup.clean.gz -120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90_1.fq.gz.clean.dup.clean.gz +120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/habrochaites/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGKDWAAPEI-90_2.fq.gz.clean.dup.clean.gz 20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/habrochaites/filtered/454/20000/20000.dedup.sff 8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/habrochaites/filtered/454/8000/8000.dedup.sff diff --git a/sh1/sh.groups.csv b/sh1/sh.groups.csv deleted file mode 120000 index 78bb6a49bbed9363443b4178b957dd215eb50eaf..0000000000000000000000000000000000000000 --- a/sh1/sh.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_habrochaites_raw/in_groups.csv \ No newline at end of file diff --git a/sh1/sh.libs.csv b/sh1/sh.libs.csv deleted file mode 120000 index 3376ea01c874b1843660c511c338168cd16c7c93..0000000000000000000000000000000000000000 --- a/sh1/sh.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_habrochaites_raw/in_libs.csv \ No newline at end of file diff --git a/sh2/sh.fa b/sh2/sh.fa index 5e6914cc0f71faa2563618a424638a0c91f76e3a..870d516e2c7c2dd1679d8d03e093cc2516589ce8 120000 --- a/sh2/sh.fa +++ b/sh2/sh.fa @@ -1 +1 @@ -../../allpaths_lg_habrochaites_raw/sh/data/run/ASSEMBLIES/test2/final.assembly.fasta \ No newline at end of file +../../allpaths_lg_habrochaites_raw/sh/data/run/ASSEMBLIES/test2/final.contigs.fasta \ No newline at end of file diff --git a/sh2/sh.groups.csv b/sh2/sh.groups.csv deleted file mode 120000 index 78bb6a49bbed9363443b4178b957dd215eb50eaf..0000000000000000000000000000000000000000 --- a/sh2/sh.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_habrochaites_raw/in_groups.csv \ No newline at end of file diff --git a/sl1/run.py b/sl1/run.py deleted file mode 120000 index b2af2326060ab4d54b535a1beba01ad0228427e8..0000000000000000000000000000000000000000 --- a/sl1/run.py +++ /dev/null @@ -1 +0,0 @@ -../run.py \ No newline at end of file diff --git a/sl1/sl.fa b/sl1/sl.fa index e91afcfcb27dece25aeed564c55d0a48a55f710d..713a313b68c05f526423341746a7102d32e7bbf9 120000 --- a/sl1/sl.fa +++ b/sl1/sl.fa @@ -1 +1 @@ -../../allpaths_lg_sample_heinz_raw/sl/data/run/ASSEMBLIES/test/final.assembly.fasta \ No newline at end of file +../../allpaths_lg_sample_heinz_raw/sl/data/run/ASSEMBLIES/test/final.contigs.fasta \ No newline at end of file diff --git a/sl1/sl.groups.csv b/sl1/sl.groups.csv deleted file mode 120000 index bcbf57e362bbc7ae32c34ef39e47d1bbbff2c1a7..0000000000000000000000000000000000000000 --- a/sl1/sl.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_heinz_raw/in_groups_heinz.csv \ No newline at end of file diff --git a/sl1/sl.libs.csv b/sl1/sl.libs.csv deleted file mode 120000 index af27025951551d9feb21b4d6e40458128678806f..0000000000000000000000000000000000000000 --- a/sl1/sl.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_sample_heinz_raw/in_libs_heinz.csv \ No newline at end of file diff --git a/sp1/run.py b/sp1/run.py deleted file mode 120000 index b2af2326060ab4d54b535a1beba01ad0228427e8..0000000000000000000000000000000000000000 --- a/sp1/run.py +++ /dev/null @@ -1 +0,0 @@ -../run.py \ No newline at end of file diff --git a/sp1/sp.csv b/sp1/sp.csv index f1dacf430d315280df1e55810be903e76f9fdfd3..bb7c546e9881bd0b8692cd211c280214f5e14b14 100644 --- a/sp1/sp.csv +++ b/sp1/sp.csv @@ -1,13 +1,13 @@ #group name platform library name library size library size min library size max file names -120524_I238_FCD1174ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz +120524_I238_FCD1174ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120524_I238_FCD1174ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz 120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCC0U06ACXX_L4_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz -120530_I235_FCD117YACXX_L1_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz +120530_I235_FCD117YACXX_L1_SZAXPI009266-140 illumina pe 170 153 187 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/pairedend_170/120530_I235_FCD117YACXX_L1_SZAXPI009266-140_2.fq.gz.clean.dup.clean.gz 120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79 illumina pe 500 450 550 /home/assembly/tomato150/reseq/filtered/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/reseq/filtered/074/illumina/pairedend_500/120512_I238_FCC0U42ACXX_L1_SZAXPI008753-79_2.fq.gz.clean.dup.clean.gz -120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz +120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120530_I235_FCD117YACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz 120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120615_I248_FCD14ADACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz 120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120624_I232_FCC0U3HACXX_L6_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz -120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz -120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz +120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120524_I238_FCD1174ACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz +120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89 illumina mp 2000 1800 2200 /home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_1.fq.gz.clean.dup.clean.gz;/home/assembly/tomato150/denovo/pennellii/filtered/illumina/matepair_2000/120527_I247_FCC0U0NACXX_L8_TOMxitDGLDWAAPEI-89_2.fq.gz.clean.dup.clean.gz 20000.dedup 454 mp 20000 18000 22000 /home/assembly/tomato150/denovo/pennellii/filtered/454/20000/20000.dedup.sff 8000.dedup 454 mp 8000 7200 8800 /home/assembly/tomato150/denovo/pennellii/filtered/454/8000/8000.dedup.sff 3000.dedup 454 mp 3000 2700 3300 /home/assembly/tomato150/denovo/pennellii/filtered/454/3000/3000.dedup.sff diff --git a/sp1/sp.groups.csv b/sp1/sp.groups.csv deleted file mode 120000 index 092a6fdfeba79fa70aaa5d987d37c21fae192808..0000000000000000000000000000000000000000 --- a/sp1/sp.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_pennellii_raw/in_groups2.csv \ No newline at end of file diff --git a/sp1/sp.libs.csv b/sp1/sp.libs.csv deleted file mode 120000 index 945ecca20e6aaae2c5299be8966c2aea4a6bb359..0000000000000000000000000000000000000000 --- a/sp1/sp.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_pennellii_raw/in_libs2.csv \ No newline at end of file diff --git a/sp2/sp.groups.csv b/sp2/sp.groups.csv deleted file mode 120000 index 092a6fdfeba79fa70aaa5d987d37c21fae192808..0000000000000000000000000000000000000000 --- a/sp2/sp.groups.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_pennellii_raw/in_groups2.csv \ No newline at end of file diff --git a/sp2/sp.libs.csv b/sp2/sp.libs.csv deleted file mode 120000 index 945ecca20e6aaae2c5299be8966c2aea4a6bb359..0000000000000000000000000000000000000000 --- a/sp2/sp.libs.csv +++ /dev/null @@ -1 +0,0 @@ -../../allpaths_lg_pennellii_raw/in_libs2.csv \ No newline at end of file