diff --git a/exploration.py b/exploration.py index 5110a4b404dd22fa271fac936def1ea849a1a38e..82d8c5b188525896a0f315070d4ad9bb41998b60 100644 --- a/exploration.py +++ b/exploration.py @@ -20,14 +20,25 @@ import seaborn as sns #%% set paths and constants and load data +# path_out +svpath = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration" + + + # path to per barn directories path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed") +path_zones = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","raw","dc_sewio") +fna = "\\Barn_areas.xlsx" +area_zones = pd.read_excel(path_zones+fna, sheet_name = "areas") +barn_edges = pd.read_excel(path_zones+fna, sheet_name = "edges") +del fna, path_zones + # settings settings = {'barn' : [60,61,62,70,71,72,73], # [60,61,62,70,71,72,73], 'startdate' : date(2022,10,1), 'enddate' : date(2022,10,15), - 'cows' : [907,89,98,517,1748,605,3107], # or specific cow number + 'cows' : [907,89,98,517,1748,495,3107], # or specific cow number } # files that comply with settings @@ -65,16 +76,99 @@ for f in fn: sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d") data = pd.concat([data,sub]) data = data.sort_values(by = ["cowid","date","t"]) - +data = data.reset_index(drop=1) + +# correct the areas +cows = data["cowid"].drop_duplicates().reset_index(drop=1) +for cow in cows: + print(cow) + days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() + days = days.sort_values(by = ["date"]).reset_index(drop=1) + for dd in days["date"]: + print(dd) + barnnr = int(data.loc[(data["cowid"] == cow) & + (data["date"] == dd),"barn"].mean()) + myarea = area_zones.loc[(area_zones["barn"] == barnnr) | \ + (area_zones["barn"].isna() == True) \ + ,:].reset_index(drop=1) + for j in range(0,len(myarea)): + x1,y1,x2,y2,x3,y3,x4,y4 = myarea[["x1","y1","x2","y2","x3","y3","x4","y4"]].values[j] + data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \ + (data["xnew"] >= x1) & (data["xnew"] <= x2) & \ + (data["xnew"] >= x3) & (data["xnew"] <= x4) & \ + (data["ynew"] >= y1) & (data["ynew"] >= y2) & \ + (data["ynew"] <= y3) & (data["ynew"] <= y4)),"area"] = myarea["area"][j] + data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \ + (data["xnew"] >= x1) & (data["xnew"] <= x2) & \ + (data["xnew"] >= x3) & (data["xnew"] <= x4) & \ + (data["ynew"] >= y1) & (data["ynew"] >= y2) & \ + (data["ynew"] <= y3) & (data["ynew"] <= y4)),"zone"] = myarea["zone"][j] + del x1,y1,x2,y2,x3,y3,x4,y4 +del cows, dd, days +data.loc[data["zone"].isna(),"zone"] = 8 +data.loc[data["zone"] == 8 ,"area"] = "unknown" + +data["date"] = data["date"].dt.date + +# check correctly read ids +print(data[["cowid","barn"]].drop_duplicates()) #%% summarize data + +""" areas +areas / zones +zone = 0 : walking area +zone = 1 : cublicles a, b, c +zone = 2 : feed_rack +zone = 3 : drink_trough +zone = 4 : concentrate feeder +zone = 5 : slatted area of barn 62 +zone = 6 : resting area of barn 62 +zone = 7 : waiting area +zone = 8 : no area assigned +""" + # summarize data per zone zones = data[["cowid","date","area","t"]].groupby(by = ["cowid","date","area"]).count().reset_index() zones = zones.sort_values(by = ["cowid","area"]) +# time spent per zone +cows = data["cowid"].drop_duplicates() +for cow in cows: + subset = data.loc[data["cowid"]==cow,["cowid","barn","date","t","xnew","ynew","area","zone"]] + barnnr = int(data.loc[(data["cowid"] == cow),"barn"].mean()) + sumdata = subset[["date","zone","cowid"]].groupby(by=["date","zone"]).count().reset_index() + if barnnr != 62: + areas = pd.DataFrame(['walk','cubicle','feed','drink','concentrate','waiting_area','unknown'],columns = ["area"]) + areas["zone"] = [0,1,2,3,4,7,8] + sumdata = sumdata.merge(areas,on = "zone") + else: + areas = pd.DataFrame(['feed','drink','concentrate','slatted','resting','waiting_area','unknown'],columns = ["area"]) + areas["zone"] = [2,3,4,5,6,7,8] + sumdata = sumdata.merge(areas,on = "zone") + sumdata["percentage"] = round(sumdata["cowid"]/864,2) + sumdate = sumdata.sort_values(by = ["date","zone"]).reset_index(drop=1) + #sumdata2=sumdata.groupby(by="date").sum() + + + #fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10)) + sns.set(style="ticks") + sns.set_style("whitegrid") + g = sns.catplot(data=sumdata,x="date",y = "percentage", col="area",kind="bar",height=5.5,aspect=0.7,palette = 'magma') + #ax.legend(["walk","cub_a","cub_b","cub_c","feed","drink","conc"]) + g.set_axis_labels("", "Percentage area usage") + g.set_xticklabels(sumdata.date.drop_duplicates(),rotation=45,horizontalalignment='right') + g.set_titles("{col_name}") + g.fig.suptitle("cow " + str(cow) + ", barn " + str(barnnr)) + g.fig.subplots_adjust(top = 0.85) + g.savefig(svpath + "\\timebudgets_cow"+str(cow) + "_barn" + str(barnnr) + ".png") + plt.close() + + # summarize variables data[["xnew","X","ynew","y"]].describe() -test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19),:] +data[""] +test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19) & (data["cowid"] = cow),:] sns.scatterplot(data=test, x = test.index.values, y = "ynew") sns.scatterplot(data=test, x = test.index.values, y = "y") @@ -88,7 +182,37 @@ sns.scatterplot(data=test, x = test.index.values, y = "y") - explore when no areas are assigned and so, wrong barn is entered - explore gaps and gapsize """ - - + +#gapsize + +subset = data.loc[data["X"].isna(),["cowid","barn","date","t"]] +sumdata = subset.groupby(by = ["cowid","barn","date"]).count() +subset = data.loc[data["xnew"].isna(),["cowid","barn","date","t"]] +test = subset.groupby(by = ["cowid","barn","date"]).count() + +sumdata["xnew"] = test["t"] + +sumdata["gapperc"] = sumdata["t"]/864 +sumdata["gap180s"] = sumdata["xnew"]/864 +sumdata = sumdata.reset_index() +sumdata = sumdata.sort_values(by = ["date","cowid"]).reset_index(drop=1) +sumdata["xlabel"] = np.arange(len(sumdata)) + + +plt.figure() +ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gapperc", + palette = "tab10", hue = "cowid", + style = "date", legend = False) +plt.savefig(svpath + "\\gap_notimputed.png") +plt.figure() +ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gap180s", + palette = "tab10", hue = "cowid", + style = "date", legend = False) +plt.savefig(svpath + "\\gap_imputed.png") +# average gap + + + + diff --git a/heatmap_visualisation.py b/heatmap_visualisation.py new file mode 100644 index 0000000000000000000000000000000000000000..0cf37a0bdcf91e08d133354f0d7778521463a1d6 --- /dev/null +++ b/heatmap_visualisation.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Feb 6 11:20:43 2023 + +@author: adria036 +""" +#%% import packages + +import pandas as pd +# from datetime import date,timedelta +import numpy as np +# from read_data_sewio import start_end_dates, read_all_data, read_ids, combine_data_id +import os +import seaborn as sns +import matplotlib as plt + + +# path_out +path_out = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration" + + +#%% visualisation functions + +def heatmap_barn(data_x,data_y,xstep,ystep,x_lim,y_lim): + """ + Plot the frequency of positions registered, given data_x and data_y + xstep and ystep determine how fine these positions are taken + x_lim and y_lim set boundaries (e.g. incl feeding lane) + + Parameters + ---------- + data_x : TYPE = Pandas series + Data of the x-coordinate as registered by the sewio system + data_y : TYPE = Pandas series + Data of the y-coordinate as registered by the sewio system + xstep : TYPE = float + How fine you want to determine location in x-direction (e.g. 1 is one + meter, 0.1 = 10 centimeters) + ystep : TYPE = float + How fine you want to determine location in x-direction (e.g. 1 is one + meter, 0.1 = 10 centimeters) + x_lim : TYPE = list of floats + Physical boundaries of plot you want to make (e.g. cut off feeding lane + then indicate lower boundary in y direction = -4) + y_lim : TYPE = list of floats + Physical boundaries of plot you want to make (e.g. cut off feeding lane + then indicate lower boundary in y direction = -4) + + Returns + ------- + ax : TYPE = sns heatmap + figure axes with heatmap + + """ + # transform data_x and data_y for count + x_transf = (data_x*(1/xstep)).round()*xstep + y_transf = (data_y*(1/ystep)).round()*ystep + + # put x_transf and y_transf in DataFrame + frame = {'xT' : x_transf.round(decimals = 4), + 'yT' : y_transf.round(decimals = 4) } + df = pd.DataFrame(frame) + + # count and groupby + heatdata = df.groupby(["xT","yT"]).size() + heatdata = heatdata.to_frame(name = "counts") + heatdata = heatdata.reset_index() + + # set limits to agree with steps + x_low = round(round(x_lim[0]*(1/xstep))*xstep,ndigits = 4) + x_high = round(round(x_lim[1]*(1/xstep))*xstep,ndigits = 4) + y_low = round(round(y_lim[0]*(1/ystep))*ystep,ndigits = 4) + y_high = round(round(y_lim[1]*(1/ystep))*ystep,ndigits = 4) + + # delete the rows for which values are outside x_lim or y_lim + heatdata = heatdata.loc[(heatdata["xT"] >= x_lim[0]) & + (heatdata["xT"] <= x_lim[1]) & + (heatdata["yT"] >= y_lim[0]) & + (heatdata["yT"] <= y_lim[1])] + heatdata.reset_index(drop = True, inplace = True) + + # expected axes limits based on lim and step + x = np.arange(x_low,x_high+xstep,xstep,dtype = float) + y = np.arange(y_low,y_high+ystep,ystep,dtype = float) + + # fill array + heat_all = [] + for i in y: + for ii in x: + number_values = heatdata.loc[(heatdata["xT"] == round(ii,ndigits = 4)) & + (heatdata["yT"] == round(i,ndigits = 4)), + ["counts"]] + if number_values.empty: + number_values = pd.DataFrame(data = [0], columns = ["counts"]) + number_values = number_values.reset_index(drop=1) + # print(round(i,ndigits = 4),round(ii,ndigits = 4),number_values["counts"][0]) + heat_all.append(number_values["counts"][0]) + + + # make rectangular and convert list to numpy + heat_all = np.array(heat_all) + heat_all = np.reshape(heat_all, [len(y),len(x)]) + heat_all = heat_all.transpose() + test = np.flip(heat_all, axis = 1) + + + # convert to df with right colnames and indices + xcols = np.array(x.round(decimals=3),dtype = str) + ycols = np.flip(np.array(y.round(decimals=3),dtype = str)) + df = pd.DataFrame(test, columns = ycols, index = xcols) + + # make heatmap + ax = sns.heatmap(df, + yticklabels = round(1/xstep), + xticklabels = round(1/ystep*2), + robust = True + ) + + return ax + +#%% usage +# ------------------------- individual cow heatmaps --------------------------- +# unique cows +cows = data.loc[:,["cowid"]].drop_duplicates() +cows = cows.sort_values(by = ["cowid"]).reset_index(drop=1) + + + +# settings heatmap +xstep = 0.1 +ystep = 0.1 + +#barn limits +barn = {"60":[64.8,75.6], + "61":[54,64.8], + "62":[32.4,54], + "70":[21.6,32.4], + "71":[10.8,21.6], + "72":[0,10.8], + "73":[-10.8,0], + } + +# plots per cow-day +for cow in cows["cowid"]: + + # unique days + days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() + days = days.sort_values(by = ["date"]).reset_index(drop=1) + + # select data and create heatmaps + for dd in days["date"]: + print(cow, dd) + # select data + data_x = data.loc[(data["cowid"] == cow) & + (data["date"] == dd),"X"] + data_y = data.loc[(data["cowid"] == cow) & + (data["date"] == dd),"y"] + + # set limits (based on information barn) + barnnr = int(data.loc[(data["cowid"] == cow) & + (data["date"] == dd),"barn"].mean()) + x_lim = barn[str(barnnr)] #[data_x.min(),data_x.max()] + y_lim = [-18,0] #[data_y.min(),data_y.max()] + + # plot heatmap + plt.rcdefaults() + fig, ax = plt.pyplot.subplots() + ax = heatmap_barn(data_x, data_y, xstep, ystep, x_lim, y_lim) + ax.set_xlabel('y coordinate') + ax.set_ylabel('x coordinate') + ax.set_title("barn = " +str(barnnr) + ', cow = ' + str(cow) + ", date = " + datetime.strftime(dd,"%Y-%m-%d").replace("-","")) + ax.xaxis.tick_top() + plt.pyplot.savefig(path_out + "//heatmap_" + str(cow) + "_" + datetime.strftime(dd,"%Y-%m-%d").replace("-","")) + plt.pyplot.close() + +