Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lannoy, Carlos de
baseLess
Commits
818f3dd1
Commit
818f3dd1
authored
Nov 09, 2021
by
Noordijk, Ben
Browse files
Some bloody messy files and changes that were used for debugging
parent
500c7110
Changes
3
Hide whitespace changes
Inline
Side-by-side
CnnParameterFile_large.yaml
0 → 100644
View file @
818f3dd1
#########################################
# CONVOLUTIONAL NEURAL NETWORK #
# PARAMETER FILE #
#########################################
# CNN ARCHITECTURE
nn_class
:
Cnn_test
batch_norm
:
0
batch_size
:
32
dropout_keep_prob
:
0.69
eps_per_kmer_switch
:
25
max_sequence_length
:
50000
filter_width
:
1000
filters
:
6
kernel_size
:
15
learning_rate
:
0.001
num_batches
:
320
num_kmer_switches
:
1
num_layers
:
3
pool_size
:
8
# Pool size to use for 1d maxpool
threshold
:
0.0
db_building/build_db.py
View file @
818f3dd1
...
...
@@ -6,6 +6,7 @@ from os.path import isdir, dirname, basename, splitext
from
shutil
import
rmtree
from
pathlib
import
Path
from
random
import
shuffle
import
os
__location__
=
dirname
(
Path
(
__file__
).
resolve
())
sys
.
path
.
extend
([
__location__
,
f
'
{
__location__
}
/..'
])
...
...
@@ -19,9 +20,9 @@ def main(args):
if
isdir
(
out_path
):
rmtree
(
out_path
)
if
args
.
read_index
:
read_index_df
=
pd
.
read_csv
(
args
.
read_index
,
index_col
=
0
)
read_index_df
=
pd
.
read_csv
(
args
.
read_index
)
if
args
.
db_type
==
'train'
:
file_list
=
list
(
read_index_df
.
que
ry
(
f
'fold'
).
fn
)
file_list
=
list
(
read_index_df
.
s
que
eze
()
)
else
:
# test
file_list
=
list
(
read_index_df
.
query
(
f
'fold == False'
).
fn
)
else
:
...
...
@@ -37,16 +38,17 @@ def main(args):
nb_files
=
len
(
file_list
)
count_pct_lim
=
5
for
i
,
file
in
enumerate
(
file_list
):
file
=
os
.
path
.
join
(
args
.
fast5_in
,
file
)
try
:
with
h5py
.
File
(
file
,
'r'
)
as
f
:
# try:
tr
=
TrainingRead
(
f
,
normalization
=
args
.
normalization
,
hdf_path
=
args
.
hdf_path
,
kmer_size
=
kmer_size
)
db
.
add_training_read
(
training_read
=
tr
,
uncenter_kmer
=
args
.
uncenter_kmer
)
tr
=
TrainingRead
(
f
,
normalization
=
args
.
normalization
,
hdf_path
=
args
.
hdf_path
,
kmer_size
=
kmer_size
)
db
.
add_training_read
(
training_read
=
tr
,
uncenter_kmer
=
args
.
uncenter_kmer
)
if
args
.
store_example_reads
:
np
.
savez
(
npz_path
+
splitext
(
basename
(
file
))[
0
],
base_labels
=
tr
.
events
,
raw
=
tr
.
raw
)
np
.
savez
(
npz_path
+
splitext
(
basename
(
file
))[
0
],
base_labels
=
tr
.
events
,
raw
=
tr
.
raw
)
if
not
i
+
1
%
10
:
# Every 10 reads remove history of transactions ('pack' the database) to reduce size
db
.
pack_db
()
if
db
.
nb_pos
>
args
.
max_nb_examples
:
...
...
trash_debug_CNN.py
0 → 100644
View file @
818f3dd1
import
h5py
from
db_building.TrainingRead
import
Read
,
TrainingRead
from
pathlib
import
Path
from
inference.InferenceModel
import
InferenceModel
from
nns.Cnn_test
import
NeuralNetwork
def
main
():
# Hunt for positive read
input_length
=
1000
target_kmer
=
'AGGAGAGT'
# for file in Path('/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test').iterdir():
# print(f'Scanning {file}')
# with h5py.File(file, 'r') as h5_file:
# try:
# train_read = TrainingRead(h5_file, 'median',
# 'Analyses/RawGenomeCorrected_000', 8)
# if [i for i in train_read.condensed_events
# if i[0] == target_kmer]:
# print(f"found in {file}")
# break
# except KeyError as e:
# print('Got keyerror, continuing')
#
# return
pos_read_path
=
Path
(
'/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test/L0144169_20181212_FAK22428_MN19628_sequencing_run_16Srhizhome_2_99947_read_129882_ch_413_strand.fast5'
)
neg_read_path
=
Path
(
'/home/noord087/lustre_link/HoiCarlos/16Sreads_mockcommunity/demultiplexed_reads/files_for_initial_training/test/L0144169_20181212_FAK22428_MN19628_sequencing_run_16Srhizhome_2_99947_read_34522_ch_344_strand.fast5'
)
with
h5py
.
File
(
pos_read_path
,
'r'
)
as
f
:
pos_read
=
Read
(
f
,
'median'
)
pos_train_read
=
TrainingRead
(
f
,
'median'
,
'Analyses/RawGenomeCorrected_000'
,
8
)
with
h5py
.
File
(
neg_read_path
,
'r'
)
as
f
:
neg_read
=
Read
(
f
,
'median'
)
neg_train_read
=
TrainingRead
(
f
,
'median'
,
'Analyses/RawGenomeCorrected_000'
,
8
)
split_pos_read
=
pos_read
.
get_split_raw_read
(
input_length
,
stride
=
10
)
split_neg_read
=
neg_read
.
get_split_raw_read
(
input_length
,
stride
=
10
)
split_pos_train_read
=
pos_train_read
.
get_split_raw_read
(
input_length
,
stride
=
10
)
split_neg_train_read
=
neg_train_read
.
get_split_raw_read
(
input_length
,
stride
=
10
)
# compiled_model = InferenceModel('/home/noord087/lustre_link/mscthesis/'
# 'baseless/baseless_2_on_16s/out_model_test'
# '.tar')
cnn
=
NeuralNetwork
(
target
=
target_kmer
,
weights
=
'/home/noord087/lustre_link/mscthesis/baseless/baseless_2_on_16s/nns/AGGAGAGT/nn.h5'
)
true_idx
=
[
ce
for
ce
in
pos_train_read
.
condensed_events
if
ce
[
0
]
==
target_kmer
]
cnn
.
predict
(
split_pos_read
,
target_kmer
)
cnn
.
predict
(
split_neg_read
,
target_kmer
)
cnn
.
predict
(
split_pos_train_read
,
target_kmer
)
cnn
.
predict
(
split_neg_train_read
,
target_kmer
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment