size bug fix

af4dae74 · Jasper Koehorst · 39167e17 · af4dae74
Commit af4dae74 authored 3 years ago by Jasper Koehorst
--- a/medaka/medaka.py
+++ b/medaka/medaka.py
@@ -20,9 +20,10 @@ def parse_arguments():
 	parser.add_argument("-i", "--input", help = "input fast(a/q) file", required=True)
 	parser.add_argument("-r", "--reference", help = "reference fasta file", required=True)
 	parser.add_argument("-t", "--threads", help = "Number of threads to use", nargs='?', default=2, type=int)
-	parser.add_argument("-m", help = "fill MD tag")
+	# parser.add_argument("-m", help = "fill MD tag")
 	parser.add_argument("-p", help = "output file prefix (default: reads)",nargs='?', default="reads", type=str)
 	parser.add_argument("-c", "--contig-chunks", help = "Number of contigs per medaka_consensus step", nargs='?',default=100, type=int)
+	parser.add_argument("-s", "--size-chunks", help = "Maximum size in bases for a chunk ", nargs='?',default=10000, type=int)
 	parser.add_argument("--model", help = "Model definition, default is equivalent to r941_min_fast_g303.", default="r941_min_fast_g303")
 	# Adding optional argument for ....
 	# ...
@@ -74,19 +75,25 @@ def medaka_consensus(args):
 	else:
 		reader = open(args.reference)

+	# Chunk based on number of chunks and genome size
+	size = 0
 	chunk = 0
-
+	
 	for line in reader:
 		if type(line) != str:
-			line = line.decode('ascii').strip()
+			line = line.decode('ascii')
+		line = line.strip()
 		if line.startswith(">"):
 			header = line.strip(">").strip()			
 			if chunk not in chunks:
 				chunks[chunk] = set()
-			if len(chunks[chunk]) > args.contig_chunks:
+			if len(chunks[chunk]) > args.contig_chunks or size > args.size_chunks:
+				size = 0
 				chunk = chunk + 1
 				chunks[chunk] = set()
 			chunks[chunk].add(header)
+		else:
+			size = size + len(line)
 	reader.close()

 	# Run this in parallel for each chunk by number of threads / 2
@@ -120,7 +127,7 @@ def medaka_consensus(args):
 # # wait for jobs, then collate results
 # medaka stitch results/*.hdf polished.assembly.fasta
 def medaka_stich(args):
-	arguments = ["medaka", "stitch", "--threads", str(args.threads), "results/*.hdf", args.reference, "polished_" + args.reference]
+	arguments = ["medaka", "stitch", "--threads", str(args.threads), "results/*.hdf", args.reference, "polished_" + os.path.basename(args.reference)]
 	print(arguments)
 	command = ' '.join(map(str,arguments))
 	print(">", command)