Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FLiTrak3D
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Cribellier, Antoine
FLiTrak3D
Commits
99deb744
Commit
99deb744
authored
2 years ago
by
Cribellier, Antoine
Browse files
Options
Downloads
Patches
Plain Diff
Added function to get unmatched_recordings + associated test functions
parent
d097db0d
No related branches found
No related tags found
1 merge request
!7
Improved architecture, documentation and flexibility, added working tests for preprocessing of images
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
images/recordings.py
+0
-218
0 additions, 218 deletions
images/recordings.py
tests/test_processes.py
+19
-5
19 additions, 5 deletions
tests/test_processes.py
with
19 additions
and
223 deletions
images/recordings.py
deleted
100644 → 0
+
0
−
218
View file @
d097db0d
import
os
,
re
from
typing
import
List
from
difflib
import
SequenceMatcher
from
datetime
import
datetime
import
numpy
as
np
__author__
=
"
C.J. Voesenek and A. Cribellier
"
__maintainer__
=
"
A. Cribellier
"
__email__
=
"
antoine.cribellier@wur.nl
"
def
match_folders_by_name
(
folder_names
:
List
[
str
],
threshold_ratio_diff
:
float
=
None
,
threshold_nb_diff_char
:
int
=
None
)
->
List
[
List
[
str
]]:
"""
Matches list of folder names together by checking how similar their folder names are and associate them together
The best matches will be found by comparing to the smaller list of folders to avoid matching one to two folders.
Args:
folder_names: List[list[]] of folder names
threshold_ratio_diff: Will check the similarity percentage (between 0 and 1)
threshold_nb_diff_char: Will check how many characters are different between the file names.
Returns:
matched_directories: List of lists (# of recordings * # of cameras) containing the directory paths of the matched recordings for all cameras.
Raises:
ValueError: If both threshold type are given instead of only one
TypeError: thresholds have to be either a positive int or a float between 0.0 and 1.0.
"""
if
threshold_ratio_diff
is
not
None
and
threshold_nb_diff_char
is
not
None
:
raise
ValueError
(
"
Only one threshold has to be procured
"
)
elif
threshold_ratio_diff
is
None
and
threshold_nb_diff_char
is
None
:
threshold_ratio_diff
=
1.0
if
threshold_ratio_diff
is
not
None
:
assert
type
(
threshold_ratio_diff
)
is
float
assert
0.0
<=
threshold_ratio_diff
<=
1.0
elif
threshold_nb_diff_char
is
not
None
:
if
type
(
threshold_nb_diff_char
)
is
float
and
threshold_nb_diff_char
.
is_integer
():
threshold_nb_diff_char
=
int
(
threshold_nb_diff_char
)
assert
threshold_nb_diff_char
>=
0
else
:
raise
TypeError
(
'
Threshold has to be either a positive int or a float between 0.0 and 1.0
'
)
nb_directories
=
len
(
folder_names
)
nb_folders
=
[
len
(
x
)
for
x
in
folder_names
]
i_min
=
np
.
argmin
(
nb_folders
)
matched_directories
,
ratios
=
[[]]
*
len
(
folder_names
),
[[]]
*
len
(
folder_names
)
is_similar_enough
=
[[]]
*
len
(
folder_names
)
for
i
in
range
(
nb_directories
):
if
i
==
i_min
:
matched_directories
[
i
]
=
folder_names
[
i_min
]
ratios
[
i
]
=
[
1.0
]
*
len
(
folder_names
[
i_min
])
else
:
matches
=
[
max
([(
SequenceMatcher
(
a
=
f1
,
b
=
f2
).
ratio
(),
f2
)
for
f2
in
folder_names
[
i
]])
for
f1
in
folder_names
[
i_min
]]
matched_directories
[
i
]
=
[
folder_name
for
ratio
,
folder_name
in
matches
]
ratios
[
i
]
=
[
ratio
for
ratio
,
folder_name
in
matches
]
if
threshold_ratio_diff
is
not
None
:
is_similar_enough
[
i
]
=
[
ratio
>=
threshold_ratio_diff
for
ratio
in
ratios
[
i
]]
elif
threshold_nb_diff_char
is
not
None
:
nb_char_diffs
=
[
round
((
1
-
ratio
)
*
len
(
folder_names
[
i_min
][
j
]))
for
j
,
ratio
in
enumerate
(
ratios
[
i
])]
is_similar_enough
[
i
]
=
[
nb_char_diff
<=
threshold_nb_diff_char
for
nb_char_diff
in
nb_char_diffs
]
matched_directories
=
[
list
(
x
)
for
x
in
zip
(
*
matched_directories
)]
is_similar_enough
=
[
list
(
x
)
for
x
in
zip
(
*
is_similar_enough
)]
matched_directories
=
[
x
for
i
,
x
in
enumerate
(
matched_directories
)
if
all
(
is_similar_enough
[
i
])]
return
matched_directories
def
match_recordings_by_name
(
directories
:
List
[
str
],
threshold_ratio_diff
:
float
=
None
,
threshold_nb_diff_char
:
int
=
None
)
->
List
[
List
[
str
]]:
"""
Matches the recordings together (minimum 2) by checking how similar their folder names are and associate them together
The best matches will be found by comparing to the smaller list of folders to avoid matching one to two other folders.
Args:
directories: List of directory paths (one per camera) each containing folders (to be matched) with recorded images
threshold_ratio_diff: Will check the similarity percentage (between 0 and 1)
threshold_nb_diff_char: Will check how many characters are different between the file names.
Returns:
matched_directories: List of lists (# of recordings * # of cameras) containing the directory paths of the matched recordings for all cameras.
"""
folder_names
,
nb_folders
=
[[]]
*
len
(
directories
),
[[]]
*
len
(
directories
)
for
i
,
directory
in
enumerate
(
directories
):
folders
=
os
.
listdir
(
directory
)
folder_names
[
i
]
=
[
os
.
path
.
basename
(
os
.
path
.
normpath
(
folder
))
for
folder
in
folders
]
if
threshold_nb_diff_char
is
not
None
:
return
match_folders_by_name
(
folder_names
,
threshold_nb_diff_char
=
threshold_nb_diff_char
)
else
:
return
match_folders_by_name
(
folder_names
,
threshold_ratio_diff
=
threshold_ratio_diff
)
def
match_folders_by_date
(
folder_names
:
List
[
str
],
expr
:
str
=
r
"
\d{8}_\d{6}
"
,
format_date_str
:
str
=
'
%Y%m%d_%H%M%S
'
,
threshold_s
:
int
=
30
)
->
List
[
List
[
str
]]:
"""
Matches list of folder names together by checking how similar their folder names are and associate them together
The best matches will be found by comparing to the smaller list of folders to avoid matching one to two folders.
Args:
folder_names: List[list[]] of folder names
expr: Regular expression pattern matching recording folders.
format_date_str: format of the date and time in the folder names.
threshold_s: Will check if the date in the folder names differ from less than threshold in second
Returns:
matched_directories: List of lists (# of recordings * # of cameras) containing the directory paths of the matched recordings for all cameras.
Raises:
TypeError: threshold_s has to be a positive int or a float between 0.0 and 1.0.
"""
if
type
(
threshold_s
)
is
float
and
threshold_s
.
is_integer
():
threshold_s
=
int
(
threshold_s
)
if
not
type
(
threshold_s
)
is
int
and
threshold_s
>=
0
:
raise
TypeError
(
'
Threshold has to be either a positive int or a float between 0.0 and 1.0
'
)
pattern
=
re
.
compile
(
expr
)
dates
=
[[]]
*
len
(
folder_names
)
for
i
in
range
(
len
(
folder_names
)):
new_folder_names
,
new_dates
=
[],
[]
for
j
in
range
(
len
(
folder_names
[
i
])):
match
=
re
.
search
(
pattern
,
folder_names
[
i
][
j
])
if
match
:
new_folder_names
.
append
(
folder_names
[
i
][
j
])
new_dates
.
append
(
datetime
.
strptime
(
match
.
group
(),
format_date_str
))
folder_names
[
i
]
=
new_folder_names
dates
[
i
]
=
new_dates
nb_folders
=
[
len
(
x
)
for
x
in
folder_names
]
i_min
=
np
.
argmin
(
nb_folders
)
matched_directories
,
diff_time_s
=
[[]]
*
len
(
folder_names
),
[[]]
*
len
(
folder_names
)
is_similar_enough
=
[[]]
*
len
(
folder_names
)
for
i
in
range
(
len
(
folder_names
)):
if
i
==
i_min
:
matched_directories
[
i
]
=
folder_names
[
i_min
]
diff_time_s
[
i
]
=
[
0
]
*
len
(
folder_names
[
i_min
])
else
:
matches
=
[
min
([(
abs
((
d1
-
d2
).
total_seconds
()),
folder_names
[
i
][
j
])
for
j
,
d2
in
enumerate
(
dates
[
i
])])
for
d1
in
dates
[
i_min
]]
matched_directories
[
i
]
=
[
folder_name
for
diff_time_s
,
folder_name
in
matches
]
diff_time_s
[
i
]
=
[
diff_s
for
diff_s
,
folder_name
in
matches
]
is_similar_enough
[
i
]
=
[
x
<=
threshold_s
for
x
in
diff_time_s
[
i
]]
matched_directories
=
[
list
(
x
)
for
x
in
zip
(
*
matched_directories
)]
is_similar_enough
=
[
list
(
x
)
for
x
in
zip
(
*
is_similar_enough
)]
matched_directories
=
[
x
for
i
,
x
in
enumerate
(
matched_directories
)
if
all
(
is_similar_enough
[
i
])]
return
matched_directories
def
match_recordings_by_date
(
directories
:
List
[
str
],
expr
:
str
=
r
"
\d{8}_\d{6}
"
,
format_date_str
:
str
=
'
%Y%m%d_%H%M%S
'
,
threshold_s
:
int
=
30
)
->
List
[
List
[
str
]]:
"""
Matches the recordings together (minimum 2) by checking how similar the date in their folder names are and associate them together
The best matches will be found by comparing to the smaller list of folders to avoid matching one to two other folders.
Args:
directories: List of directory paths (one per camera) each containing folders (to be matched) with recorded images
expr: Regular expression pattern matching recording folders.
format_date_str: format of the date and time in the folder names.
threshold_s: Will check if the date in the folder names differ from less than threshold in second
Returns:
matched_directories: List of lists (# of recordings * # of cameras) containing the directory paths of the matched recordings for all cameras.
"""
folder_names
,
nb_folders
=
[[]]
*
len
(
directories
),
[[]]
*
len
(
directories
)
for
i
,
directory
in
enumerate
(
directories
):
folders
=
os
.
listdir
(
directory
)
folder_names
[
i
]
=
[
os
.
path
.
basename
(
os
.
path
.
normpath
(
folder
))
for
folder
in
folders
]
return
match_folders_by_date
(
folder_names
,
expr
,
format_date_str
,
threshold_s
)
def
get_unmatched_recordings
(
directories
:
List
[
str
],
matched_directories
:
List
[
str
])
->
List
[
str
]:
unmatched_directories
=
[[]]
*
len
(
directories
)
return
unmatched_directories
def
move_recordings_to_folder
(
sources
:
List
[
str
],
destinations
:
List
[
str
])
->
None
:
unmatched_directories
=
0
\ No newline at end of file
This diff is collapsed.
Click to expand it.
tests/test_processes.py
+
19
−
5
View file @
99deb744
from
images
import
process
,
recordings
from
images
import
process
,
utils_
recordings
def
test_match_folders
()
->
None
:
f
ile
_names
=
[[
'
cam1_20220304_055123
'
,
'
cam1_20220304_101111
'
,
'
cam1_20220304_110140
'
,
'
cam1_20220304_120352
'
],
f
older
_names
=
[[
'
cam1_20220304_055123
'
,
'
cam1_20220304_101111
'
,
'
cam1_20220304_110140
'
,
'
cam1_20220304_120352
'
],
[
'
cam2_20220304_055123
'
,
'
cam2_20220304_101115
'
,
'
cam2_20220304_120402
'
],
[
'
cam3_20220304_055123
'
,
'
cam3_20220304_101111
'
,
'
cam3_20220304_111158
'
,
'
cam3_20220304_120352
'
]]
matched_directories
=
recordings
.
match_folders_by_name
(
f
ile
_names
)
matched_directories
=
recordings
.
match_folders_by_name
(
f
older
_names
)
assert
matched_directories
==
[]
matched_directories
=
recordings
.
match_folders_by_name
(
f
ile
_names
,
threshold_nb_diff_char
=
1
)
matched_directories
=
recordings
.
match_folders_by_name
(
f
older
_names
,
threshold_nb_diff_char
=
1
)
assert
matched_directories
==
[[
'
cam1_20220304_055123
'
,
'
cam2_20220304_055123
'
,
'
cam3_20220304_055123
'
]]
matched_directories
=
recordings
.
match_folders_by_date
(
f
ile
_names
,
threshold_s
=
60
)
matched_directories
=
recordings
.
match_folders_by_date
(
f
older
_names
,
threshold_s
=
60
)
assert
matched_directories
==
[[
'
cam1_20220304_055123
'
,
'
cam2_20220304_055123
'
,
'
cam3_20220304_055123
'
],
[
'
cam1_20220304_101111
'
,
'
cam2_20220304_101115
'
,
'
cam3_20220304_101111
'
],
[
'
cam1_20220304_120352
'
,
'
cam2_20220304_120402
'
,
'
cam3_20220304_120352
'
]]
...
...
@@ -27,6 +27,20 @@ def test_match_recordings() -> None:
assert
len
(
matched_directories
)
==
2
def
test_get_unmatched_folder_names
()
->
None
:
folder_names
=
[[
'
cam1_20220304_055123
'
,
'
cam1_20220304_101111
'
,
'
cam1_20220304_110140
'
,
'
cam1_20220304_120352
'
],
[
'
cam2_20220304_055123
'
,
'
cam2_20220304_101115
'
,
'
cam2_20220304_120402
'
],
[
'
cam3_20220304_055123
'
,
'
cam3_20220304_101111
'
,
'
cam3_20220304_111158
'
,
'
cam3_20220304_120352
'
]]
matched_folder_names
=
[[
'
cam1_20220304_055123
'
,
'
cam2_20220304_055123
'
,
'
cam3_20220304_055123
'
],
[
'
cam1_20220304_101111
'
,
'
cam2_20220304_101115
'
,
'
cam3_20220304_101111
'
],
[
'
cam1_20220304_120352
'
,
'
cam2_20220304_120402
'
,
'
cam3_20220304_120352
'
]]
unmatched_folder_names
=
recordings
.
get_unmatched_folders
(
folder_names
,
matched_folder_names
)
assert
unmatched_folder_names
==
[[
'
cam1_20220304_110140
'
],
[],
[
'
cam3_20220304_111158
'
,]]
def
test_process
()
->
None
:
# TODO! Make test_process
print
(
'
TODO
'
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment