From 73e07d707194809d372a68d2786ac4a65c573130 Mon Sep 17 00:00:00 2001 From: bart <bart.nijsse@wur.nl> Date: Mon, 16 May 2022 16:43:02 +0200 Subject: [PATCH] moved to metagenomics --- bin_assembly_stats.sh | 19 ------------------- get_unbinned_contigs.sh | 7 ------- 2 files changed, 26 deletions(-) delete mode 100755 bin_assembly_stats.sh delete mode 100755 get_unbinned_contigs.sh diff --git a/bin_assembly_stats.sh b/bin_assembly_stats.sh deleted file mode 100755 index 93fd72c..0000000 --- a/bin_assembly_stats.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Loops through a folder of fasta files and calculates #contigs, total length, N50, largst contig and GC%. -# (also works on gzipped fasta files) -# -# The raw_n50 executeable comes from the idba_ud assembler. - -echo -e 'bin\tcontigs\ttotal_length\tN50\tlargest\tGC%' -for filepath in `ls $1/*.fa*`; do - bin=$(basename $filepath | sed s'/.fa.*//'g) - raw_n50_stats=$(/unlock/infrastructure/binaries/raw_n50 $filepath) - - GC=$(grep -v ">" $filepath | sed -e 's/\(.\)/\1\n/g' | grep -c "G\|C") - size=$(echo $raw_n50_stats | awk '{print $11}') - GCcontent=$(echo | awk -v GC=$GC -v size=$size '{printf "%.2f", GC/size*100}') - - stats=$(echo $raw_n50_stats | awk '{print $2"\t"$11"\t"$4"\t"$6}') - echo -e $bin"\t"$stats"\t"$GCcontent | sed 's/ /\t/g' -done diff --git a/get_unbinned_contigs.sh b/get_unbinned_contigs.sh deleted file mode 100755 index 22ab695..0000000 --- a/get_unbinned_contigs.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# sh get_unbinned_contigs.sh <contigs.fasta> <folder containing .fa bins> <identifier> - -cat $2/*.fa | grep "^>" | sed 's/>//g' | sort > binned.headers -/unlock/infrastructure/binaries/faTools/faSomeRecords $1 -exclude binned.headers $3_unbinned.fasta -pigz -p $4 $3_unbinned.fasta -- GitLab