Skip to content
Snippets Groups Projects
Commit af4dae74 authored by Jasper Koehorst's avatar Jasper Koehorst
Browse files

size bug fix

parent 39167e17
Branches
No related tags found
No related merge requests found
......@@ -20,9 +20,10 @@ def parse_arguments():
parser.add_argument("-i", "--input", help = "input fast(a/q) file", required=True)
parser.add_argument("-r", "--reference", help = "reference fasta file", required=True)
parser.add_argument("-t", "--threads", help = "Number of threads to use", nargs='?', default=2, type=int)
parser.add_argument("-m", help = "fill MD tag")
# parser.add_argument("-m", help = "fill MD tag")
parser.add_argument("-p", help = "output file prefix (default: reads)",nargs='?', default="reads", type=str)
parser.add_argument("-c", "--contig-chunks", help = "Number of contigs per medaka_consensus step", nargs='?',default=100, type=int)
parser.add_argument("-s", "--size-chunks", help = "Maximum size in bases for a chunk ", nargs='?',default=10000, type=int)
parser.add_argument("--model", help = "Model definition, default is equivalent to r941_min_fast_g303.", default="r941_min_fast_g303")
# Adding optional argument for ....
# ...
......@@ -74,19 +75,25 @@ def medaka_consensus(args):
else:
reader = open(args.reference)
# Chunk based on number of chunks and genome size
size = 0
chunk = 0
for line in reader:
if type(line) != str:
line = line.decode('ascii').strip()
line = line.decode('ascii')
line = line.strip()
if line.startswith(">"):
header = line.strip(">").strip()
if chunk not in chunks:
chunks[chunk] = set()
if len(chunks[chunk]) > args.contig_chunks:
if len(chunks[chunk]) > args.contig_chunks or size > args.size_chunks:
size = 0
chunk = chunk + 1
chunks[chunk] = set()
chunks[chunk].add(header)
else:
size = size + len(line)
reader.close()
# Run this in parallel for each chunk by number of threads / 2
......@@ -120,7 +127,7 @@ def medaka_consensus(args):
# # wait for jobs, then collate results
# medaka stitch results/*.hdf polished.assembly.fasta
def medaka_stich(args):
arguments = ["medaka", "stitch", "--threads", str(args.threads), "results/*.hdf", args.reference, "polished_" + args.reference]
arguments = ["medaka", "stitch", "--threads", str(args.threads), "results/*.hdf", args.reference, "polished_" + os.path.basename(args.reference)]
print(arguments)
command = ' '.join(map(str,arguments))
print(">", command)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment