Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
baseLess
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lannoy, Carlos de
baseLess
Commits
5378f7bc
Commit
5378f7bc
authored
3 years ago
by
Noordijk, Ben
Browse files
Options
Downloads
Patches
Plain Diff
Added basic script that plots runtime of benchmark
parent
b0e5ed2f
No related branches found
Branches containing commit
No related tags found
1 merge request
!3
Added data preparation, hyperparameter optimisation, benchmarking code and k-mer library visualisation
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
compare_benchmark_performance/compare_accuracy.py
+6
-10
6 additions, 10 deletions
compare_benchmark_performance/compare_accuracy.py
compare_benchmark_performance/compare_run_times.py
+99
-1
99 additions, 1 deletion
compare_benchmark_performance/compare_run_times.py
with
105 additions
and
11 deletions
compare_benchmark_performance/compare_accuracy.py
+
6
−
10
View file @
5378f7bc
...
...
@@ -99,6 +99,7 @@ def parse_paf(paf_path, ground_truth, plot_cm=False):
def
main
():
# For debugging
pd
.
options
.
display
.
width
=
0
parser
=
argparse
.
ArgumentParser
(
description
=
"""
Plot accuracy of
benchmarked algorithms
"""
)
...
...
@@ -119,17 +120,16 @@ def main():
args
=
parser
.
parse_args
()
ground_truth
=
pd
.
read_csv
(
args
.
ground_truth
)
#
Get all
files
#
Parsing accuracy
files
deepnano_files
=
args
.
benchmark_path
.
glob
(
"
output_16s_files/deepnano/*/minimap2_deepnano_fold*.paf
"
)
uncalled_files
=
args
.
benchmark_path
.
glob
(
"
output_16s_files/uncalled/*/uncalled_out_fold*.paf
"
)
guppy_files
=
args
.
benchmark_path
.
glob
(
"
output_16s_files/guppy/*/fold?/sequencing_summary.txt
"
)
# Parse all files
with
Pool
(
30
)
as
p
:
args
=
[[
file
,
ground_truth
]
for
file
in
chain
(
deepnano_files
,
uncalled_files
)]
uncalled_deepnano_results
=
p
.
starmap
(
parse_paf
,
args
)
args
_uncalled_deepnano
=
[[
file
,
ground_truth
]
for
file
in
chain
(
deepnano_files
,
uncalled_files
)]
uncalled_deepnano_results
=
p
.
starmap
(
parse_paf
,
args
_uncalled_deepnano
)
args_guppy
=
[[
file
,
ground_truth
]
for
file
in
guppy_files
]
guppy_results
=
p
.
starmap
(
parse_sequencing_summary
,
args_guppy
)
...
...
@@ -146,10 +146,6 @@ def main():
all_species
=
df
.
species
.
unique
()
all_species
.
sort
()
# # Do basic plotting
# df.groupby(['species', 'tool']).mean().plot.bar()
# plt.tight_layout()
# plt.show()
sns
.
catplot
(
x
=
'
species
'
,
y
=
'
accuracy
'
,
ci
=
'
sd
'
,
data
=
df
,
hue
=
'
tool
'
,
kind
=
'
bar
'
,
order
=
all_species
,
legend_out
=
False
,
aspect
=
2
)
...
...
This diff is collapsed.
Click to expand it.
compare_benchmark_performance/compare_run_times.py
+
99
−
1
View file @
5378f7bc
"""
STUB
"""
\ No newline at end of file
import
argparse
from
datetime
import
timedelta
from
pathlib
import
Path
from
multiprocessing
import
Pool
from
itertools
import
chain
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
from
matplotlib.dates
import
date2num
import
seaborn
as
sns
def
parse_benchmark_file
(
file_path
):
"""
Extract runtime from a benchmark file output by snakemake
:param file_path: path to benchmark file
:type file_path: Path
:return: tuple of toolname, target species, fold no,
walltime and cpu time as timedelta object
"""
print
(
f
'
Processing
{
file_path
}
'
)
fold
=
file_path
.
stem
[
-
1
]
target_species
=
file_path
.
parts
[
-
2
].
replace
(
'
_
'
,
'
'
)
tool
=
file_path
.
parts
[
-
3
]
step
=
None
if
tool
==
'
deepnano
'
:
if
'
basecall
'
in
file_path
.
stem
:
step
=
'
basecall
'
else
:
step
=
'
minimap
'
elif
tool
==
'
uncalled
'
:
if
'
index
'
in
file_path
.
stem
:
step
=
'
index
'
fold
=
None
else
:
step
=
'
map
'
df
=
pd
.
read_csv
(
file_path
,
sep
=
'
\t
'
)
walltime
=
timedelta
(
seconds
=
float
(
df
.
s
))
cpu_time
=
timedelta
(
seconds
=
float
(
df
.
cpu_time
))
return
tool
,
step
,
target_species
,
fold
,
walltime
,
cpu_time
def
main
(
args
):
# For debugging:
pd
.
options
.
display
.
width
=
0
# Runtime parsing
deepnano_basecall_benchmark_files
=
args
.
benchmark_path
.
glob
(
'
**/deepnano_basecall_benchmark_fold?.txt
'
)
deepnano_minimap_benchmark_files
=
args
.
benchmark_path
.
glob
(
'
**/minimap_deepnano_benchmark_fold?.txt
'
)
guppy_benchmark_files
=
args
.
benchmark_path
.
glob
(
'
**/guppy_benchmark_fold?.txt
'
)
uncalled_map_benchmark_files
=
args
.
benchmark_path
.
glob
(
'
**/uncalled_map_benchmark_fold?.txt
'
)
uncalled_index_benchmark_files
=
args
.
benchmark_path
.
glob
(
'
**/uncalled_index_benchmark.txt
'
)
with
Pool
(
30
)
as
p
:
all_files
=
chain
(
deepnano_basecall_benchmark_files
,
deepnano_minimap_benchmark_files
,
guppy_benchmark_files
,
uncalled_map_benchmark_files
,
uncalled_index_benchmark_files
)
all_benchmarks
=
p
.
map
(
parse_benchmark_file
,
all_files
)
df
=
pd
.
DataFrame
.
from_records
(
all_benchmarks
,
columns
=
[
'
tool
'
,
'
step
'
,
'
species
'
,
'
fold
'
,
'
walltime
'
,
'
cpu_time
'
])
# Provisional plot:
# TODO Split by processing step
df
[
'
matplotlib_time
'
]
=
date2num
(
df
.
cpu_time
)
sns
.
catplot
(
x
=
[
'
species
'
,
'
step
'
],
y
=
'
matplotlib_time
'
,
data
=
df
,
ci
=
'
sd
'
,
hue
=
'
tool
'
,
kind
=
'
bar
'
,
legend_out
=
False
,
aspect
=
2
)
plt
.
tight_layout
()
plt
.
xticks
(
rotation
=
90
)
plt
.
show
()
if
__name__
==
'
__main__
'
:
parser
=
argparse
.
ArgumentParser
(
description
=
"""
Plot runtime of
benchmarked algorithms
"""
)
parser
.
add_argument
(
'
--benchmark-path
'
,
help
=
'
Path to folder that contains all benchmarks
'
'
output by the snakemake workflow.
'
'
Should contain a folder called output_16s_files
'
,
required
=
True
,
type
=
Path
)
parser
.
add_argument
(
'
--out-dir
'
,
help
=
'
Directory in which to save the figures and
'
'
csv with model performance
'
,
required
=
True
,
type
=
Path
)
args
=
parser
.
parse_args
()
main
(
args
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment