Skip to content
Snippets Groups Projects
Commit ce864035 authored by Adriaens, Ines's avatar Adriaens, Ines :v_tone2:
Browse files

data exploration +correction areas

parent 3c7e6b8f
No related branches found
No related tags found
No related merge requests found
......@@ -20,14 +20,25 @@ import seaborn as sns
#%% set paths and constants and load data
# path_out
svpath = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration"
# path to per barn directories
path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed")
path_zones = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","raw","dc_sewio")
fna = "\\Barn_areas.xlsx"
area_zones = pd.read_excel(path_zones+fna, sheet_name = "areas")
barn_edges = pd.read_excel(path_zones+fna, sheet_name = "edges")
del fna, path_zones
# settings
settings = {'barn' : [60,61,62,70,71,72,73], # [60,61,62,70,71,72,73],
'startdate' : date(2022,10,1),
'enddate' : date(2022,10,15),
'cows' : [907,89,98,517,1748,605,3107], # or specific cow number
'cows' : [907,89,98,517,1748,495,3107], # or specific cow number
}
# files that comply with settings
......@@ -65,16 +76,99 @@ for f in fn:
sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d")
data = pd.concat([data,sub])
data = data.sort_values(by = ["cowid","date","t"])
data = data.reset_index(drop=1)
# correct the areas
cows = data["cowid"].drop_duplicates().reset_index(drop=1)
for cow in cows:
print(cow)
days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates()
days = days.sort_values(by = ["date"]).reset_index(drop=1)
for dd in days["date"]:
print(dd)
barnnr = int(data.loc[(data["cowid"] == cow) &
(data["date"] == dd),"barn"].mean())
myarea = area_zones.loc[(area_zones["barn"] == barnnr) | \
(area_zones["barn"].isna() == True) \
,:].reset_index(drop=1)
for j in range(0,len(myarea)):
x1,y1,x2,y2,x3,y3,x4,y4 = myarea[["x1","y1","x2","y2","x3","y3","x4","y4"]].values[j]
data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
(data["xnew"] >= x1) & (data["xnew"] <= x2) & \
(data["xnew"] >= x3) & (data["xnew"] <= x4) & \
(data["ynew"] >= y1) & (data["ynew"] >= y2) & \
(data["ynew"] <= y3) & (data["ynew"] <= y4)),"area"] = myarea["area"][j]
data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
(data["xnew"] >= x1) & (data["xnew"] <= x2) & \
(data["xnew"] >= x3) & (data["xnew"] <= x4) & \
(data["ynew"] >= y1) & (data["ynew"] >= y2) & \
(data["ynew"] <= y3) & (data["ynew"] <= y4)),"zone"] = myarea["zone"][j]
del x1,y1,x2,y2,x3,y3,x4,y4
del cows, dd, days
data.loc[data["zone"].isna(),"zone"] = 8
data.loc[data["zone"] == 8 ,"area"] = "unknown"
data["date"] = data["date"].dt.date
# check correctly read ids
print(data[["cowid","barn"]].drop_duplicates())
#%% summarize data
""" areas
areas / zones
zone = 0 : walking area
zone = 1 : cublicles a, b, c
zone = 2 : feed_rack
zone = 3 : drink_trough
zone = 4 : concentrate feeder
zone = 5 : slatted area of barn 62
zone = 6 : resting area of barn 62
zone = 7 : waiting area
zone = 8 : no area assigned
"""
# summarize data per zone
zones = data[["cowid","date","area","t"]].groupby(by = ["cowid","date","area"]).count().reset_index()
zones = zones.sort_values(by = ["cowid","area"])
# time spent per zone
cows = data["cowid"].drop_duplicates()
for cow in cows:
subset = data.loc[data["cowid"]==cow,["cowid","barn","date","t","xnew","ynew","area","zone"]]
barnnr = int(data.loc[(data["cowid"] == cow),"barn"].mean())
sumdata = subset[["date","zone","cowid"]].groupby(by=["date","zone"]).count().reset_index()
if barnnr != 62:
areas = pd.DataFrame(['walk','cubicle','feed','drink','concentrate','waiting_area','unknown'],columns = ["area"])
areas["zone"] = [0,1,2,3,4,7,8]
sumdata = sumdata.merge(areas,on = "zone")
else:
areas = pd.DataFrame(['feed','drink','concentrate','slatted','resting','waiting_area','unknown'],columns = ["area"])
areas["zone"] = [2,3,4,5,6,7,8]
sumdata = sumdata.merge(areas,on = "zone")
sumdata["percentage"] = round(sumdata["cowid"]/864,2)
sumdate = sumdata.sort_values(by = ["date","zone"]).reset_index(drop=1)
#sumdata2=sumdata.groupby(by="date").sum()
#fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
sns.set(style="ticks")
sns.set_style("whitegrid")
g = sns.catplot(data=sumdata,x="date",y = "percentage", col="area",kind="bar",height=5.5,aspect=0.7,palette = 'magma')
#ax.legend(["walk","cub_a","cub_b","cub_c","feed","drink","conc"])
g.set_axis_labels("", "Percentage area usage")
g.set_xticklabels(sumdata.date.drop_duplicates(),rotation=45,horizontalalignment='right')
g.set_titles("{col_name}")
g.fig.suptitle("cow " + str(cow) + ", barn " + str(barnnr))
g.fig.subplots_adjust(top = 0.85)
g.savefig(svpath + "\\timebudgets_cow"+str(cow) + "_barn" + str(barnnr) + ".png")
plt.close()
# summarize variables
data[["xnew","X","ynew","y"]].describe()
test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19),:]
data[""]
test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19) & (data["cowid"] = cow),:]
sns.scatterplot(data=test, x = test.index.values, y = "ynew")
sns.scatterplot(data=test, x = test.index.values, y = "y")
......@@ -88,7 +182,37 @@ sns.scatterplot(data=test, x = test.index.values, y = "y")
- explore when no areas are assigned and so, wrong barn is entered
- explore gaps and gapsize
"""
#gapsize
subset = data.loc[data["X"].isna(),["cowid","barn","date","t"]]
sumdata = subset.groupby(by = ["cowid","barn","date"]).count()
subset = data.loc[data["xnew"].isna(),["cowid","barn","date","t"]]
test = subset.groupby(by = ["cowid","barn","date"]).count()
sumdata["xnew"] = test["t"]
sumdata["gapperc"] = sumdata["t"]/864
sumdata["gap180s"] = sumdata["xnew"]/864
sumdata = sumdata.reset_index()
sumdata = sumdata.sort_values(by = ["date","cowid"]).reset_index(drop=1)
sumdata["xlabel"] = np.arange(len(sumdata))
plt.figure()
ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gapperc",
palette = "tab10", hue = "cowid",
style = "date", legend = False)
plt.savefig(svpath + "\\gap_notimputed.png")
plt.figure()
ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gap180s",
palette = "tab10", hue = "cowid",
style = "date", legend = False)
plt.savefig(svpath + "\\gap_imputed.png")
# average gap
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 6 11:20:43 2023
@author: adria036
"""
#%% import packages
import pandas as pd
# from datetime import date,timedelta
import numpy as np
# from read_data_sewio import start_end_dates, read_all_data, read_ids, combine_data_id
import os
import seaborn as sns
import matplotlib as plt
# path_out
path_out = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration"
#%% visualisation functions
def heatmap_barn(data_x,data_y,xstep,ystep,x_lim,y_lim):
"""
Plot the frequency of positions registered, given data_x and data_y
xstep and ystep determine how fine these positions are taken
x_lim and y_lim set boundaries (e.g. incl feeding lane)
Parameters
----------
data_x : TYPE = Pandas series
Data of the x-coordinate as registered by the sewio system
data_y : TYPE = Pandas series
Data of the y-coordinate as registered by the sewio system
xstep : TYPE = float
How fine you want to determine location in x-direction (e.g. 1 is one
meter, 0.1 = 10 centimeters)
ystep : TYPE = float
How fine you want to determine location in x-direction (e.g. 1 is one
meter, 0.1 = 10 centimeters)
x_lim : TYPE = list of floats
Physical boundaries of plot you want to make (e.g. cut off feeding lane
then indicate lower boundary in y direction = -4)
y_lim : TYPE = list of floats
Physical boundaries of plot you want to make (e.g. cut off feeding lane
then indicate lower boundary in y direction = -4)
Returns
-------
ax : TYPE = sns heatmap
figure axes with heatmap
"""
# transform data_x and data_y for count
x_transf = (data_x*(1/xstep)).round()*xstep
y_transf = (data_y*(1/ystep)).round()*ystep
# put x_transf and y_transf in DataFrame
frame = {'xT' : x_transf.round(decimals = 4),
'yT' : y_transf.round(decimals = 4) }
df = pd.DataFrame(frame)
# count and groupby
heatdata = df.groupby(["xT","yT"]).size()
heatdata = heatdata.to_frame(name = "counts")
heatdata = heatdata.reset_index()
# set limits to agree with steps
x_low = round(round(x_lim[0]*(1/xstep))*xstep,ndigits = 4)
x_high = round(round(x_lim[1]*(1/xstep))*xstep,ndigits = 4)
y_low = round(round(y_lim[0]*(1/ystep))*ystep,ndigits = 4)
y_high = round(round(y_lim[1]*(1/ystep))*ystep,ndigits = 4)
# delete the rows for which values are outside x_lim or y_lim
heatdata = heatdata.loc[(heatdata["xT"] >= x_lim[0]) &
(heatdata["xT"] <= x_lim[1]) &
(heatdata["yT"] >= y_lim[0]) &
(heatdata["yT"] <= y_lim[1])]
heatdata.reset_index(drop = True, inplace = True)
# expected axes limits based on lim and step
x = np.arange(x_low,x_high+xstep,xstep,dtype = float)
y = np.arange(y_low,y_high+ystep,ystep,dtype = float)
# fill array
heat_all = []
for i in y:
for ii in x:
number_values = heatdata.loc[(heatdata["xT"] == round(ii,ndigits = 4)) &
(heatdata["yT"] == round(i,ndigits = 4)),
["counts"]]
if number_values.empty:
number_values = pd.DataFrame(data = [0], columns = ["counts"])
number_values = number_values.reset_index(drop=1)
# print(round(i,ndigits = 4),round(ii,ndigits = 4),number_values["counts"][0])
heat_all.append(number_values["counts"][0])
# make rectangular and convert list to numpy
heat_all = np.array(heat_all)
heat_all = np.reshape(heat_all, [len(y),len(x)])
heat_all = heat_all.transpose()
test = np.flip(heat_all, axis = 1)
# convert to df with right colnames and indices
xcols = np.array(x.round(decimals=3),dtype = str)
ycols = np.flip(np.array(y.round(decimals=3),dtype = str))
df = pd.DataFrame(test, columns = ycols, index = xcols)
# make heatmap
ax = sns.heatmap(df,
yticklabels = round(1/xstep),
xticklabels = round(1/ystep*2),
robust = True
)
return ax
#%% usage
# ------------------------- individual cow heatmaps ---------------------------
# unique cows
cows = data.loc[:,["cowid"]].drop_duplicates()
cows = cows.sort_values(by = ["cowid"]).reset_index(drop=1)
# settings heatmap
xstep = 0.1
ystep = 0.1
#barn limits
barn = {"60":[64.8,75.6],
"61":[54,64.8],
"62":[32.4,54],
"70":[21.6,32.4],
"71":[10.8,21.6],
"72":[0,10.8],
"73":[-10.8,0],
}
# plots per cow-day
for cow in cows["cowid"]:
# unique days
days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates()
days = days.sort_values(by = ["date"]).reset_index(drop=1)
# select data and create heatmaps
for dd in days["date"]:
print(cow, dd)
# select data
data_x = data.loc[(data["cowid"] == cow) &
(data["date"] == dd),"X"]
data_y = data.loc[(data["cowid"] == cow) &
(data["date"] == dd),"y"]
# set limits (based on information barn)
barnnr = int(data.loc[(data["cowid"] == cow) &
(data["date"] == dd),"barn"].mean())
x_lim = barn[str(barnnr)] #[data_x.min(),data_x.max()]
y_lim = [-18,0] #[data_y.min(),data_y.max()]
# plot heatmap
plt.rcdefaults()
fig, ax = plt.pyplot.subplots()
ax = heatmap_barn(data_x, data_y, xstep, ystep, x_lim, y_lim)
ax.set_xlabel('y coordinate')
ax.set_ylabel('x coordinate')
ax.set_title("barn = " +str(barnnr) + ', cow = ' + str(cow) + ", date = " + datetime.strftime(dd,"%Y-%m-%d").replace("-",""))
ax.xaxis.tick_top()
plt.pyplot.savefig(path_out + "//heatmap_" + str(cow) + "_" + datetime.strftime(dd,"%Y-%m-%d").replace("-",""))
plt.pyplot.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment