Commit 71f407d4 authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

First import of FRC runscript for Allpaths of Heinz

parents
#!/bin/bash
# Activate debugging from here
#set -o xtrace
#set -o verbose
# Safeguards
set -o nounset
set -o errexit
# Grab script path (from http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-in )
SOURCE="${BASH_SOURCE[0]}"
DIR="$( dirname "$SOURCE" )"
while [ -h "$SOURCE" ]
do
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
done
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
# Use a single file for logging, use the starttime as identifier
LOGFILE="kmerfreq_heinz."`date +%Y%m%d_%k%M%S`.out
# Input assembly
#allpaths /home/assembly/dev_150/assemblies/allpaths_lg_sample_heinz_raw/sl/data/run/ASSEMBLIES/test/final.assembly.fasta
#allpaths_454 /home/assembly/dev_150/assemblies/allpaths_lg_sample_heinz_raw_with454/sl/data/run/ASSEMBLIES/test/final.assembly.fasta
#clc /home/assembly/dev_150/assemblies/clc-default/clc_contigs.fa
#heinz /home/assembly/dev_150/assemblies/S_lycopersicum_scaffolds.2.40.fa
#fermi /home/assembly/progs/fermi/heinz/fmdef.p4.fa
ASSEMBLY=/home/assembly/dev_150/assemblies/allpaths_lg_sample_heinz_raw/sl/data/run/ASSEMBLIES/test/final.assembly.fasta
PREFIX=allpaths_lg
$MIN_PE_INS=320
$MAX_PE_INS=430
$MIN_MP_INS=2435
$MAX_MP_INS=3555
$ESTIMATED_GENOME_SIZE=760000000
$OUTPUT_HEADER=${PREFIX}.FRC.
# Map reads against assembly with BWA
# Create index
if [ ! -f ${PREFIX}.bwt ]
then
bwa index -a is -p ${PREFIX} $ASSEMBLY
fi
# MP data
# Convert the mate pair data into paired-end format.
if [ ! -f rc_3018DAAXX_2_f.fastq ]
then
revseq -sequence /home/assembly/dev_150/sample_heinz/raw/illumina/MP_3000/3018DAAXX_2_f.fastq -outseq rc_3018DAAXX_2_f.fastq -notag
fi
if [ ! -f rc_3018DAAXX_2_r.fastq ]
then
revseq -sequence /home/assembly/dev_150/sample_heinz/raw/illumina/MP_3000/3018DAAXX_2_r.fastq -outseq rc_3018DAAXX_2_r.fastq -notag
fi
if [ ! -f rc_30THBAAXX_2_f.fastq ]
then
revseq -sequence /home/assembly/dev_150/sample_heinz/raw/illumina/MP_3000/30THBAAXX_2_f.fastq -outseq rc_30THBAAXX_2_f.fastq -notag
fi
if [ ! -f rc_30THBAAXX_2_r.fastq ]
then
revseq -sequence /home/assembly/dev_150/sample_heinz/raw/illumina/MP_3000/30THBAAXX_2_r.fastq -outseq rc_30THBAAXX_2_r.fastq -notag
fi
# Map MP reads
for file in rc_3018DAAXX_2_f.fastq rc_3018DAAXX_2_r.fastq rc_30THBAAXX_2_f.fastq rc_30THBAAXX_2_r.fastq
do
BASENAME=`basename $file .fastq`
bwa aln -t 80 ${PREFIX} ${file} > ${PREFIX}.${BASENAME}.sai
done
# Create SAM files of the paired mappings
for name in rc_3018DAAXX_2 rc_30THBAAXX_2
do
bwa sampe -P ${PREFIX} ${PREFIX}.${name}_f.sai ${PREFIX}.${name}_r.sai ${name}_f.fastq ${name}_r.fastq > ${PREFIX}.${name}.sam
# Convert result to BAM
samtools view -S ${PREFIX}.${name}.sam -b > ${PREFIX}.${name}.bam
# Sort the BAM
samtools sort ${PREFIX}.${name}.bam ${PREFIX}.${name}.sorted
# Index the BAM
samtools index ${PREFIX}.${name}.sorted.bam
done
# Merge the created BAMs
samtools merge -r -1 ${PREFIX}.MP.bam ${PREFIX}.rc_3018DAAXX_2.sorted.bam ${PREFIX}.rc_30THBAAXX_2.sorted.bam
# The PE data : /home/assembly/dev_150/sample_heinz/raw/illumina/PE_500/*.64.fastq
# Map PE reads
for file in /home/assembly/dev_150/sample_heinz/raw/illumina/PE_500/sample_R1_001.64.fastq /home/assembly/dev_150/sample_heinz/raw/illumina/PE_500/sample_R2_001.64.fastq
do
BASENAME=`basename $file .fastq`
bwa aln -t 80 ${PREFIX} ${file} > ${PREFIX}.${BASENAME}.sai
done
# Create SAM files of the paired mappings
bwa sampe -P ${PREFIX} ${PREFIX}.sample_R1_001.64.sai \
${PREFIX}.sample_R2_001.64.sai \
/home/assembly/dev_150/sample_heinz/raw/illumina/PE_500/sample_R1_001.64.fastq \
/home/assembly/dev_150/sample_heinz/raw/illumina/PE_500/sample_R2_001.64.fastq \
> ${PREFIX}.PE.sam
# Convert result to BAM
samtools view -S ${PREFIX}.PE.sam -b > ${PREFIX}.PE.bam
# Sort the BAM
samtools sort ${PREFIX}.PE.bam ${PREFIX}.PE.sorted
# Index the BAM
samtools index ${PREFIX}.PE.sorted.bam
# run FRC
FRC --pe-sam ${PREFIX}.PE.sorted.ba \
--pe-min-insert $MIN_PE_INS \
--pe-max-insert $MAX_PE_INS \
--mp-sam ${PREFIX}.MP.bam \
--mp-min-insert $MIN_MP_INS \
--mp-max-insert $MAX_MP_INS \
--genome-size $ESTIMATED_GENOME_SIZE \
--output $OUTPUT_HEADER \
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment