Skip to content
Snippets Groups Projects
Commit eb7ced6e authored by Bart's avatar Bart
Browse files

general statistics about read mapping of bins and the assembly

parent db081f1f
No related branches found
No related tags found
No related merge requests found
#!/bin/python3
'''
Generates a table of the metagenomic assembly mapping and binning statistics:
input:
1. binContigsFile, format: bin_name<tab>contig
2. samtools idxstats output file
3. BBMap mapping stats stdErr file
4. An identifier (string)
standard out:
ID
Reads
Mapped reads %
Assembly size
Bins
Total bin size
Binned %
Reads mapped to bins %
'''
import os
import sys
import re
binContigsFile = sys.argv[1]
samstatsFile = sys.argv[2]
bbmap = sys.argv[3]
identifier = sys.argv[4]
# Get total reads from BBmap stats
for line in open(bbmap,"r").readlines():
hit = re.search("^Reads:", line)
if hit:
total_reads = int(line.strip().split()[-1])
# Get totals bins and all contigs with in bin from binContigsFile
binContigs = set()
bins = 0
prev_bin = ""
for line in open(binContigsFile).readlines():
if line.split()[0] != prev_bin: bins+=1
binContigs.add(line.strip().split()[1])
prev_bin = line.split()[0]
total_assembly_size = 0
mapped_reads = 0
total_bin_size = 0
mapped_bin_reads = 0
# Read through the samtools idxstats file
for line in open(samstatsFile,"r").readlines():
sline = line.split()
contig = sline[0]
contigLen = int(sline[1])
contigReads = int(sline[2])
total_assembly_size += contigLen
mapped_reads += contigReads
if contig in binContigs:
total_bin_size += contigLen
mapped_bin_reads += contigReads
mapped_reads_perc = round(mapped_reads/total_reads*100,2)
mapped_bin_reads_perc = round(mapped_bin_reads/mapped_reads*100,2)
binned_perc = round(total_bin_size/total_assembly_size*100,2)
print("ID"+"\t"+identifier)
print("Reads"+"\t"+str(total_reads))
print("Mapped reads"+"\t"+str(mapped_reads_perc)+"%")
print("Assembly size"+"\t"+str(total_assembly_size)+" bp")
print("Bins"+"\t"+str(bins))
print("Total bin size"+"\t"+str(total_bin_size)+" bp")
print("Binned"+"\t"+str(binned_perc)+"%")
print("Reads mapped to bins"+"\t"+str(mapped_bin_reads_perc)+"%")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment