Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
U
uwb
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
nlas
uwb
Commits
ce864035
Commit
ce864035
authored
2 years ago
by
Adriaens, Ines
Browse files
Options
Downloads
Patches
Plain Diff
data exploration +correction areas
parent
3c7e6b8f
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
exploration.py
+129
-5
129 additions, 5 deletions
exploration.py
heatmap_visualisation.py
+176
-0
176 additions, 0 deletions
heatmap_visualisation.py
with
305 additions
and
5 deletions
exploration.py
+
129
−
5
View file @
ce864035
...
...
@@ -20,14 +20,25 @@ import seaborn as sns
#%% set paths and constants and load data
# path_out
svpath
=
r
"
C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration
"
# path to per barn directories
path
=
os
.
path
.
join
(
"
W:
"
,
"
\ASG
"
,
"
WLR_Dataopslag
"
,
"
DairyCampus
"
,
"
3406_Nlas
"
,
"
uwb_processed
"
)
path_zones
=
os
.
path
.
join
(
"
W:
"
,
"
\ASG
"
,
"
WLR_Dataopslag
"
,
"
DairyCampus
"
,
"
3406_Nlas
"
,
"
raw
"
,
"
dc_sewio
"
)
fna
=
"
\\
Barn_areas.xlsx
"
area_zones
=
pd
.
read_excel
(
path_zones
+
fna
,
sheet_name
=
"
areas
"
)
barn_edges
=
pd
.
read_excel
(
path_zones
+
fna
,
sheet_name
=
"
edges
"
)
del
fna
,
path_zones
# settings
settings
=
{
'
barn
'
:
[
60
,
61
,
62
,
70
,
71
,
72
,
73
],
# [60,61,62,70,71,72,73],
'
startdate
'
:
date
(
2022
,
10
,
1
),
'
enddate
'
:
date
(
2022
,
10
,
15
),
'
cows
'
:
[
907
,
89
,
98
,
517
,
1748
,
60
5
,
3107
],
# or specific cow number
'
cows
'
:
[
907
,
89
,
98
,
517
,
1748
,
49
5
,
3107
],
# or specific cow number
}
# files that comply with settings
...
...
@@ -65,16 +76,99 @@ for f in fn:
sub
[
"
date
"
]
=
pd
.
to_datetime
(
sub
[
"
date
"
],
format
=
"
%Y-%m-%d
"
)
data
=
pd
.
concat
([
data
,
sub
])
data
=
data
.
sort_values
(
by
=
[
"
cowid
"
,
"
date
"
,
"
t
"
])
data
=
data
.
reset_index
(
drop
=
1
)
# correct the areas
cows
=
data
[
"
cowid
"
].
drop_duplicates
().
reset_index
(
drop
=
1
)
for
cow
in
cows
:
print
(
cow
)
days
=
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
),[
"
date
"
]].
drop_duplicates
()
days
=
days
.
sort_values
(
by
=
[
"
date
"
]).
reset_index
(
drop
=
1
)
for
dd
in
days
[
"
date
"
]:
print
(
dd
)
barnnr
=
int
(
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
),
"
barn
"
].
mean
())
myarea
=
area_zones
.
loc
[(
area_zones
[
"
barn
"
]
==
barnnr
)
|
\
(
area_zones
[
"
barn
"
].
isna
()
==
True
)
\
,:].
reset_index
(
drop
=
1
)
for
j
in
range
(
0
,
len
(
myarea
)):
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
x4
,
y4
=
myarea
[[
"
x1
"
,
"
y1
"
,
"
x2
"
,
"
y2
"
,
"
x3
"
,
"
y3
"
,
"
x4
"
,
"
y4
"
]].
values
[
j
]
data
.
loc
[((
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
)
&
\
(
data
[
"
xnew
"
]
>=
x1
)
&
(
data
[
"
xnew
"
]
<=
x2
)
&
\
(
data
[
"
xnew
"
]
>=
x3
)
&
(
data
[
"
xnew
"
]
<=
x4
)
&
\
(
data
[
"
ynew
"
]
>=
y1
)
&
(
data
[
"
ynew
"
]
>=
y2
)
&
\
(
data
[
"
ynew
"
]
<=
y3
)
&
(
data
[
"
ynew
"
]
<=
y4
)),
"
area
"
]
=
myarea
[
"
area
"
][
j
]
data
.
loc
[((
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
)
&
\
(
data
[
"
xnew
"
]
>=
x1
)
&
(
data
[
"
xnew
"
]
<=
x2
)
&
\
(
data
[
"
xnew
"
]
>=
x3
)
&
(
data
[
"
xnew
"
]
<=
x4
)
&
\
(
data
[
"
ynew
"
]
>=
y1
)
&
(
data
[
"
ynew
"
]
>=
y2
)
&
\
(
data
[
"
ynew
"
]
<=
y3
)
&
(
data
[
"
ynew
"
]
<=
y4
)),
"
zone
"
]
=
myarea
[
"
zone
"
][
j
]
del
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
x4
,
y4
del
cows
,
dd
,
days
data
.
loc
[
data
[
"
zone
"
].
isna
(),
"
zone
"
]
=
8
data
.
loc
[
data
[
"
zone
"
]
==
8
,
"
area
"
]
=
"
unknown
"
data
[
"
date
"
]
=
data
[
"
date
"
].
dt
.
date
# check correctly read ids
print
(
data
[[
"
cowid
"
,
"
barn
"
]].
drop_duplicates
())
#%% summarize data
"""
areas
areas / zones
zone = 0 : walking area
zone = 1 : cublicles a, b, c
zone = 2 : feed_rack
zone = 3 : drink_trough
zone = 4 : concentrate feeder
zone = 5 : slatted area of barn 62
zone = 6 : resting area of barn 62
zone = 7 : waiting area
zone = 8 : no area assigned
"""
# summarize data per zone
zones
=
data
[[
"
cowid
"
,
"
date
"
,
"
area
"
,
"
t
"
]].
groupby
(
by
=
[
"
cowid
"
,
"
date
"
,
"
area
"
]).
count
().
reset_index
()
zones
=
zones
.
sort_values
(
by
=
[
"
cowid
"
,
"
area
"
])
# time spent per zone
cows
=
data
[
"
cowid
"
].
drop_duplicates
()
for
cow
in
cows
:
subset
=
data
.
loc
[
data
[
"
cowid
"
]
==
cow
,[
"
cowid
"
,
"
barn
"
,
"
date
"
,
"
t
"
,
"
xnew
"
,
"
ynew
"
,
"
area
"
,
"
zone
"
]]
barnnr
=
int
(
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
),
"
barn
"
].
mean
())
sumdata
=
subset
[[
"
date
"
,
"
zone
"
,
"
cowid
"
]].
groupby
(
by
=
[
"
date
"
,
"
zone
"
]).
count
().
reset_index
()
if
barnnr
!=
62
:
areas
=
pd
.
DataFrame
([
'
walk
'
,
'
cubicle
'
,
'
feed
'
,
'
drink
'
,
'
concentrate
'
,
'
waiting_area
'
,
'
unknown
'
],
columns
=
[
"
area
"
])
areas
[
"
zone
"
]
=
[
0
,
1
,
2
,
3
,
4
,
7
,
8
]
sumdata
=
sumdata
.
merge
(
areas
,
on
=
"
zone
"
)
else
:
areas
=
pd
.
DataFrame
([
'
feed
'
,
'
drink
'
,
'
concentrate
'
,
'
slatted
'
,
'
resting
'
,
'
waiting_area
'
,
'
unknown
'
],
columns
=
[
"
area
"
])
areas
[
"
zone
"
]
=
[
2
,
3
,
4
,
5
,
6
,
7
,
8
]
sumdata
=
sumdata
.
merge
(
areas
,
on
=
"
zone
"
)
sumdata
[
"
percentage
"
]
=
round
(
sumdata
[
"
cowid
"
]
/
864
,
2
)
sumdate
=
sumdata
.
sort_values
(
by
=
[
"
date
"
,
"
zone
"
]).
reset_index
(
drop
=
1
)
#sumdata2=sumdata.groupby(by="date").sum()
#fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
sns
.
set
(
style
=
"
ticks
"
)
sns
.
set_style
(
"
whitegrid
"
)
g
=
sns
.
catplot
(
data
=
sumdata
,
x
=
"
date
"
,
y
=
"
percentage
"
,
col
=
"
area
"
,
kind
=
"
bar
"
,
height
=
5.5
,
aspect
=
0.7
,
palette
=
'
magma
'
)
#ax.legend(["walk","cub_a","cub_b","cub_c","feed","drink","conc"])
g
.
set_axis_labels
(
""
,
"
Percentage area usage
"
)
g
.
set_xticklabels
(
sumdata
.
date
.
drop_duplicates
(),
rotation
=
45
,
horizontalalignment
=
'
right
'
)
g
.
set_titles
(
"
{col_name}
"
)
g
.
fig
.
suptitle
(
"
cow
"
+
str
(
cow
)
+
"
, barn
"
+
str
(
barnnr
))
g
.
fig
.
subplots_adjust
(
top
=
0.85
)
g
.
savefig
(
svpath
+
"
\\
timebudgets_cow
"
+
str
(
cow
)
+
"
_barn
"
+
str
(
barnnr
)
+
"
.png
"
)
plt
.
close
()
# summarize variables
data
[[
"
xnew
"
,
"
X
"
,
"
ynew
"
,
"
y
"
]].
describe
()
test
=
data
.
loc
[(
data
[
"
ynew
"
]
<
-
17
)
&
(
data
[
"
ynew
"
]
>
-
19
),:]
data
[
""
]
test
=
data
.
loc
[(
data
[
"
ynew
"
]
<
-
17
)
&
(
data
[
"
ynew
"
]
>
-
19
)
&
(
data
[
"
cowid
"
]
=
cow
),:]
sns
.
scatterplot
(
data
=
test
,
x
=
test
.
index
.
values
,
y
=
"
ynew
"
)
sns
.
scatterplot
(
data
=
test
,
x
=
test
.
index
.
values
,
y
=
"
y
"
)
...
...
@@ -88,7 +182,37 @@ sns.scatterplot(data=test, x = test.index.values, y = "y")
- explore when no areas are assigned and so, wrong barn is entered
- explore gaps and gapsize
"""
#gapsize
subset
=
data
.
loc
[
data
[
"
X
"
].
isna
(),[
"
cowid
"
,
"
barn
"
,
"
date
"
,
"
t
"
]]
sumdata
=
subset
.
groupby
(
by
=
[
"
cowid
"
,
"
barn
"
,
"
date
"
]).
count
()
subset
=
data
.
loc
[
data
[
"
xnew
"
].
isna
(),[
"
cowid
"
,
"
barn
"
,
"
date
"
,
"
t
"
]]
test
=
subset
.
groupby
(
by
=
[
"
cowid
"
,
"
barn
"
,
"
date
"
]).
count
()
sumdata
[
"
xnew
"
]
=
test
[
"
t
"
]
sumdata
[
"
gapperc
"
]
=
sumdata
[
"
t
"
]
/
864
sumdata
[
"
gap180s
"
]
=
sumdata
[
"
xnew
"
]
/
864
sumdata
=
sumdata
.
reset_index
()
sumdata
=
sumdata
.
sort_values
(
by
=
[
"
date
"
,
"
cowid
"
]).
reset_index
(
drop
=
1
)
sumdata
[
"
xlabel
"
]
=
np
.
arange
(
len
(
sumdata
))
plt
.
figure
()
ax
=
sns
.
scatterplot
(
data
=
sumdata
,
x
=
"
xlabel
"
,
y
=
"
gapperc
"
,
palette
=
"
tab10
"
,
hue
=
"
cowid
"
,
style
=
"
date
"
,
legend
=
False
)
plt
.
savefig
(
svpath
+
"
\\
gap_notimputed.png
"
)
plt
.
figure
()
ax
=
sns
.
scatterplot
(
data
=
sumdata
,
x
=
"
xlabel
"
,
y
=
"
gap180s
"
,
palette
=
"
tab10
"
,
hue
=
"
cowid
"
,
style
=
"
date
"
,
legend
=
False
)
plt
.
savefig
(
svpath
+
"
\\
gap_imputed.png
"
)
# average gap
This diff is collapsed.
Click to expand it.
heatmap_visualisation.py
0 → 100644
+
176
−
0
View file @
ce864035
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 6 11:20:43 2023
@author: adria036
"""
#%% import packages
import
pandas
as
pd
# from datetime import date,timedelta
import
numpy
as
np
# from read_data_sewio import start_end_dates, read_all_data, read_ids, combine_data_id
import
os
import
seaborn
as
sns
import
matplotlib
as
plt
# path_out
path_out
=
r
"
C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration
"
#%% visualisation functions
def
heatmap_barn
(
data_x
,
data_y
,
xstep
,
ystep
,
x_lim
,
y_lim
):
"""
Plot the frequency of positions registered, given data_x and data_y
xstep and ystep determine how fine these positions are taken
x_lim and y_lim set boundaries (e.g. incl feeding lane)
Parameters
----------
data_x : TYPE = Pandas series
Data of the x-coordinate as registered by the sewio system
data_y : TYPE = Pandas series
Data of the y-coordinate as registered by the sewio system
xstep : TYPE = float
How fine you want to determine location in x-direction (e.g. 1 is one
meter, 0.1 = 10 centimeters)
ystep : TYPE = float
How fine you want to determine location in x-direction (e.g. 1 is one
meter, 0.1 = 10 centimeters)
x_lim : TYPE = list of floats
Physical boundaries of plot you want to make (e.g. cut off feeding lane
then indicate lower boundary in y direction = -4)
y_lim : TYPE = list of floats
Physical boundaries of plot you want to make (e.g. cut off feeding lane
then indicate lower boundary in y direction = -4)
Returns
-------
ax : TYPE = sns heatmap
figure axes with heatmap
"""
# transform data_x and data_y for count
x_transf
=
(
data_x
*
(
1
/
xstep
)).
round
()
*
xstep
y_transf
=
(
data_y
*
(
1
/
ystep
)).
round
()
*
ystep
# put x_transf and y_transf in DataFrame
frame
=
{
'
xT
'
:
x_transf
.
round
(
decimals
=
4
),
'
yT
'
:
y_transf
.
round
(
decimals
=
4
)
}
df
=
pd
.
DataFrame
(
frame
)
# count and groupby
heatdata
=
df
.
groupby
([
"
xT
"
,
"
yT
"
]).
size
()
heatdata
=
heatdata
.
to_frame
(
name
=
"
counts
"
)
heatdata
=
heatdata
.
reset_index
()
# set limits to agree with steps
x_low
=
round
(
round
(
x_lim
[
0
]
*
(
1
/
xstep
))
*
xstep
,
ndigits
=
4
)
x_high
=
round
(
round
(
x_lim
[
1
]
*
(
1
/
xstep
))
*
xstep
,
ndigits
=
4
)
y_low
=
round
(
round
(
y_lim
[
0
]
*
(
1
/
ystep
))
*
ystep
,
ndigits
=
4
)
y_high
=
round
(
round
(
y_lim
[
1
]
*
(
1
/
ystep
))
*
ystep
,
ndigits
=
4
)
# delete the rows for which values are outside x_lim or y_lim
heatdata
=
heatdata
.
loc
[(
heatdata
[
"
xT
"
]
>=
x_lim
[
0
])
&
(
heatdata
[
"
xT
"
]
<=
x_lim
[
1
])
&
(
heatdata
[
"
yT
"
]
>=
y_lim
[
0
])
&
(
heatdata
[
"
yT
"
]
<=
y_lim
[
1
])]
heatdata
.
reset_index
(
drop
=
True
,
inplace
=
True
)
# expected axes limits based on lim and step
x
=
np
.
arange
(
x_low
,
x_high
+
xstep
,
xstep
,
dtype
=
float
)
y
=
np
.
arange
(
y_low
,
y_high
+
ystep
,
ystep
,
dtype
=
float
)
# fill array
heat_all
=
[]
for
i
in
y
:
for
ii
in
x
:
number_values
=
heatdata
.
loc
[(
heatdata
[
"
xT
"
]
==
round
(
ii
,
ndigits
=
4
))
&
(
heatdata
[
"
yT
"
]
==
round
(
i
,
ndigits
=
4
)),
[
"
counts
"
]]
if
number_values
.
empty
:
number_values
=
pd
.
DataFrame
(
data
=
[
0
],
columns
=
[
"
counts
"
])
number_values
=
number_values
.
reset_index
(
drop
=
1
)
# print(round(i,ndigits = 4),round(ii,ndigits = 4),number_values["counts"][0])
heat_all
.
append
(
number_values
[
"
counts
"
][
0
])
# make rectangular and convert list to numpy
heat_all
=
np
.
array
(
heat_all
)
heat_all
=
np
.
reshape
(
heat_all
,
[
len
(
y
),
len
(
x
)])
heat_all
=
heat_all
.
transpose
()
test
=
np
.
flip
(
heat_all
,
axis
=
1
)
# convert to df with right colnames and indices
xcols
=
np
.
array
(
x
.
round
(
decimals
=
3
),
dtype
=
str
)
ycols
=
np
.
flip
(
np
.
array
(
y
.
round
(
decimals
=
3
),
dtype
=
str
))
df
=
pd
.
DataFrame
(
test
,
columns
=
ycols
,
index
=
xcols
)
# make heatmap
ax
=
sns
.
heatmap
(
df
,
yticklabels
=
round
(
1
/
xstep
),
xticklabels
=
round
(
1
/
ystep
*
2
),
robust
=
True
)
return
ax
#%% usage
# ------------------------- individual cow heatmaps ---------------------------
# unique cows
cows
=
data
.
loc
[:,[
"
cowid
"
]].
drop_duplicates
()
cows
=
cows
.
sort_values
(
by
=
[
"
cowid
"
]).
reset_index
(
drop
=
1
)
# settings heatmap
xstep
=
0.1
ystep
=
0.1
#barn limits
barn
=
{
"
60
"
:[
64.8
,
75.6
],
"
61
"
:[
54
,
64.8
],
"
62
"
:[
32.4
,
54
],
"
70
"
:[
21.6
,
32.4
],
"
71
"
:[
10.8
,
21.6
],
"
72
"
:[
0
,
10.8
],
"
73
"
:[
-
10.8
,
0
],
}
# plots per cow-day
for
cow
in
cows
[
"
cowid
"
]:
# unique days
days
=
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
),[
"
date
"
]].
drop_duplicates
()
days
=
days
.
sort_values
(
by
=
[
"
date
"
]).
reset_index
(
drop
=
1
)
# select data and create heatmaps
for
dd
in
days
[
"
date
"
]:
print
(
cow
,
dd
)
# select data
data_x
=
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
),
"
X
"
]
data_y
=
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
),
"
y
"
]
# set limits (based on information barn)
barnnr
=
int
(
data
.
loc
[(
data
[
"
cowid
"
]
==
cow
)
&
(
data
[
"
date
"
]
==
dd
),
"
barn
"
].
mean
())
x_lim
=
barn
[
str
(
barnnr
)]
#[data_x.min(),data_x.max()]
y_lim
=
[
-
18
,
0
]
#[data_y.min(),data_y.max()]
# plot heatmap
plt
.
rcdefaults
()
fig
,
ax
=
plt
.
pyplot
.
subplots
()
ax
=
heatmap_barn
(
data_x
,
data_y
,
xstep
,
ystep
,
x_lim
,
y_lim
)
ax
.
set_xlabel
(
'
y coordinate
'
)
ax
.
set_ylabel
(
'
x coordinate
'
)
ax
.
set_title
(
"
barn =
"
+
str
(
barnnr
)
+
'
, cow =
'
+
str
(
cow
)
+
"
, date =
"
+
datetime
.
strftime
(
dd
,
"
%Y-%m-%d
"
).
replace
(
"
-
"
,
""
))
ax
.
xaxis
.
tick_top
()
plt
.
pyplot
.
savefig
(
path_out
+
"
//heatmap_
"
+
str
(
cow
)
+
"
_
"
+
datetime
.
strftime
(
dd
,
"
%Y-%m-%d
"
).
replace
(
"
-
"
,
""
))
plt
.
pyplot
.
close
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment