Skip to content
Snippets Groups Projects
Commit f67ec03b authored by Noordijk, Ben's avatar Noordijk, Ben
Browse files

DB creation can now handle edge cases

parent c188067d
No related branches found
No related tags found
1 merge request!5compare_accuracy.py now saves confusion matrices and can be called on the...
......@@ -196,6 +196,15 @@ class TrainingRead(Persistent):
width_l -= random_offset
width_r += random_offset
mid_idx = ch[1] + len(ch[2])//2
# Handle edge cases
if width_l > mid_idx:
# Cannot cut off enough on left side
width_l = mid_idx
width_r = width - width_l
elif width_r > len(self.raw) - mid_idx:
# Cannot cut off enough on right side
width_r = len(self.raw) - mid_idx
width_l = width - width_r
candidate_raw = self.raw[mid_idx - width_l:
mid_idx + width_r]
kmer_in_read = np.all(np.in1d(ch[2], candidate_raw))
......@@ -221,14 +230,16 @@ class TrainingRead(Persistent):
pos_kmer_seqs = [pos_event[2] for pos_event in pos_events]
# Make sure the negative examples are far enough away from
# the target k mer (manually add 30 to be extra safe).
idx_list.remove(cur_idx)
if np.all(np.array(distances_to_kmer) > width):
mid_idx = cur_condensed_event[1] + len(cur_condensed_event[2])//2
candidate_raw = self.raw[mid_idx - width_l:mid_idx + width_r]
target_kmer_in_read = np.all([np.in1d(pos_raw, candidate_raw)
for pos_raw in pos_kmer_seqs])
assert not target_kmer_in_read, 'Target kmer in negative read'
if target_kmer_in_read:
# Something went wrong (probably edge case (that I should fix)), try again
continue
raw_hits_out.append(candidate_raw)
raw_kmers_out.append(cur_condensed_event[0])
idx_list.remove(cur_idx)
return raw_hits_out, raw_kmers_out
import random
import re
from collections import Counter
import yaml
import pickle
import importlib
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment