Commit 818f3dd1 authored by Noordijk, Ben's avatar Noordijk, Ben
Browse files

Some bloody messy files and changes that were used for debugging

parent 500c7110
#########################################
# CONVOLUTIONAL NEURAL NETWORK #
# PARAMETER FILE #
#########################################
# CNN ARCHITECTURE
nn_class: Cnn_test
batch_norm: 0
batch_size: 32
dropout_keep_prob: 0.69
eps_per_kmer_switch: 25
max_sequence_length: 50000
filter_width: 1000
filters: 6
kernel_size: 15
learning_rate: 0.001
num_batches: 320
num_kmer_switches: 1
num_layers: 3
pool_size: 8 # Pool size to use for 1d maxpool
threshold: 0.0
......@@ -6,6 +6,7 @@ from os.path import isdir, dirname, basename, splitext
from shutil import rmtree
from pathlib import Path
from random import shuffle
import os
__location__ = dirname(Path(__file__).resolve())
sys.path.extend([__location__, f'{__location__}/..'])
......@@ -19,9 +20,9 @@ def main(args):
if isdir(out_path):
rmtree(out_path)
if args.read_index:
read_index_df = pd.read_csv(args.read_index, index_col=0)
read_index_df = pd.read_csv(args.read_index)
if args.db_type == 'train':
file_list = list(read_index_df.query(f'fold').fn)
file_list = list(read_index_df.squeeze())
else: # test
file_list = list(read_index_df.query(f'fold == False').fn)
else:
......@@ -37,16 +38,17 @@ def main(args):
nb_files = len(file_list)
count_pct_lim = 5
for i, file in enumerate(file_list):
file = os.path.join(args.fast5_in, file)
try:
with h5py.File(file, 'r') as f:
# try:
tr = TrainingRead(f, normalization=args.normalization,
hdf_path=args.hdf_path,
kmer_size=kmer_size)
db.add_training_read(training_read=tr,
uncenter_kmer=args.uncenter_kmer)
tr = TrainingRead(f, normalization=args.normalization,
hdf_path=args.hdf_path,
kmer_size=kmer_size)
db.add_training_read(training_read=tr,
uncenter_kmer=args.uncenter_kmer)
if args.store_example_reads:
np.savez(npz_path + splitext(basename(file))[0], base_labels=tr.events, raw=tr.raw)
np.savez(npz_path + splitext(basename(file))[0],
base_labels=tr.events, raw=tr.raw)
if not i+1 % 10: # Every 10 reads remove history of transactions ('pack' the database) to reduce size
db.pack_db()
if db.nb_pos > args.max_nb_examples:
......
import h5py
from db_building.TrainingRead import Read, TrainingRead
from pathlib import Path
from inference.InferenceModel import InferenceModel
from nns.Cnn_test import NeuralNetwork
def main():
# Hunt for positive read
input_length = 1000
target_kmer = 'AGGAGAGT'
# for file in Path('/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test').iterdir():
# print(f'Scanning {file}')
# with h5py.File(file, 'r') as h5_file:
# try:
# train_read = TrainingRead(h5_file, 'median',
# 'Analyses/RawGenomeCorrected_000', 8)
# if [i for i in train_read.condensed_events
# if i[0] == target_kmer]:
# print(f"found in {file}")
# break
# except KeyError as e:
# print('Got keyerror, continuing')
#
# return
pos_read_path = Path(
'/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test/L0144169_20181212_FAK22428_MN19628_sequencing_run_16Srhizhome_2_99947_read_129882_ch_413_strand.fast5'
)
neg_read_path = Path(
'/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test/L0144169_20181212_FAK22428_MN19628_sequencing_run_16Srhizhome_2_99947_read_34522_ch_344_strand.fast5')
with h5py.File(pos_read_path, 'r') as f:
pos_read = Read(f, 'median')
pos_train_read = TrainingRead(f, 'median',
'Analyses/RawGenomeCorrected_000', 8)
with h5py.File(neg_read_path, 'r') as f:
neg_read = Read(f, 'median')
neg_train_read = TrainingRead(f, 'median',
'Analyses/RawGenomeCorrected_000', 8)
split_pos_read = pos_read.get_split_raw_read(input_length, stride=10)
split_neg_read = neg_read.get_split_raw_read(input_length, stride=10)
split_pos_train_read = pos_train_read.get_split_raw_read(input_length,
stride=10)
split_neg_train_read = neg_train_read.get_split_raw_read(input_length,
stride=10)
# compiled_model = InferenceModel('/home/noord087/lustre_link/mscthesis/'
# 'baseless/baseless_2_on_16s/out_model_test'
# '.tar')
cnn = NeuralNetwork(target=target_kmer, weights='/home/noord087/lustre_link/mscthesis/baseless/baseless_2_on_16s/nns/AGGAGAGT/nn.h5')
true_idx = [ce for ce in pos_train_read.condensed_events
if ce[0] == target_kmer]
cnn.predict(split_pos_read, target_kmer)
cnn.predict(split_neg_read, target_kmer)
cnn.predict(split_pos_train_read, target_kmer)
cnn.predict(split_neg_train_read, target_kmer)
if __name__ == '__main__':
main()
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment