data exploration +correction areas

ce864035 · Adriaens, Ines · 3c7e6b8f · ce864035 · ce864035
Commit ce864035 authored 2 years ago by Adriaens, Ines
--- a/exploration.py
+++ b/exploration.py
@@ -20,14 +20,25 @@ import seaborn as sns

 #%% set paths and constants and load data

+# path_out 
+svpath = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration"
+
+
+
 # path to per barn directories
 path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed")
+path_zones = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","raw","dc_sewio")
+fna = "\\Barn_areas.xlsx"
+area_zones = pd.read_excel(path_zones+fna, sheet_name = "areas")    
+barn_edges = pd.read_excel(path_zones+fna, sheet_name = "edges")
+del fna, path_zones
+

 # settings
 settings = {'barn' : [60,61,62,70,71,72,73], # [60,61,62,70,71,72,73],
            'startdate' : date(2022,10,1),
            'enddate' :  date(2022,10,15),
-            'cows' : [907,89,98,517,1748,605,3107], # or specific cow number
+            'cows' : [907,89,98,517,1748,495,3107], # or specific cow number
            }

 # files that comply with settings
@@ -65,16 +76,99 @@ for f in fn:
    sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d") 
    data = pd.concat([data,sub])
 data = data.sort_values(by = ["cowid","date","t"])
-
+data = data.reset_index(drop=1)
+
+# correct the areas
+cows = data["cowid"].drop_duplicates().reset_index(drop=1)
+for cow in cows:
+    print(cow)
+    days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() 
+    days = days.sort_values(by = ["date"]).reset_index(drop=1)
+    for dd in days["date"]:
+        print(dd)
+        barnnr = int(data.loc[(data["cowid"] == cow) &
+                          (data["date"] == dd),"barn"].mean()) 
+        myarea = area_zones.loc[(area_zones["barn"] == barnnr) | \
+                            (area_zones["barn"].isna() == True) \
+                            ,:].reset_index(drop=1)
+        for j in range(0,len(myarea)):
+            x1,y1,x2,y2,x3,y3,x4,y4 = myarea[["x1","y1","x2","y2","x3","y3","x4","y4"]].values[j]
+            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
+                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
+                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
+                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
+                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"area"] = myarea["area"][j]
+            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
+                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
+                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
+                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
+                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"zone"] = myarea["zone"][j]
+        del x1,y1,x2,y2,x3,y3,x4,y4
+del cows, dd, days
+data.loc[data["zone"].isna(),"zone"] = 8
+data.loc[data["zone"] == 8 ,"area"] = "unknown"
+
+data["date"] = data["date"].dt.date
+
+# check correctly read ids
+print(data[["cowid","barn"]].drop_duplicates())

 #%% summarize data
+
+""" areas
+areas / zones
+zone = 0 : walking area
+zone = 1 : cublicles a, b, c
+zone = 2 : feed_rack
+zone = 3 : drink_trough
+zone = 4 : concentrate feeder
+zone = 5 : slatted area of barn 62
+zone = 6 : resting area of barn 62
+zone = 7 : waiting area
+zone = 8 : no area assigned
+"""
+
 # summarize data per zone
 zones = data[["cowid","date","area","t"]].groupby(by = ["cowid","date","area"]).count().reset_index()
 zones = zones.sort_values(by = ["cowid","area"])

+# time spent per zone
+cows = data["cowid"].drop_duplicates()
+for cow in cows:
+    subset = data.loc[data["cowid"]==cow,["cowid","barn","date","t","xnew","ynew","area","zone"]]
+    barnnr = int(data.loc[(data["cowid"] == cow),"barn"].mean())                      
+    sumdata = subset[["date","zone","cowid"]].groupby(by=["date","zone"]).count().reset_index()
+    if barnnr != 62:
+        areas = pd.DataFrame(['walk','cubicle','feed','drink','concentrate','waiting_area','unknown'],columns = ["area"])
+        areas["zone"] = [0,1,2,3,4,7,8] 
+        sumdata = sumdata.merge(areas,on = "zone")
+    else:
+        areas = pd.DataFrame(['feed','drink','concentrate','slatted','resting','waiting_area','unknown'],columns = ["area"])
+        areas["zone"] = [2,3,4,5,6,7,8] 
+        sumdata = sumdata.merge(areas,on = "zone")
+    sumdata["percentage"] = round(sumdata["cowid"]/864,2)
+    sumdate = sumdata.sort_values(by = ["date","zone"]).reset_index(drop=1)
+    #sumdata2=sumdata.groupby(by="date").sum()
+    
+    
+    #fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(20,10))
+    sns.set(style="ticks")
+    sns.set_style("whitegrid")
+    g = sns.catplot(data=sumdata,x="date",y = "percentage", col="area",kind="bar",height=5.5,aspect=0.7,palette = 'magma')
+    #ax.legend(["walk","cub_a","cub_b","cub_c","feed","drink","conc"])
+    g.set_axis_labels("", "Percentage area usage")
+    g.set_xticklabels(sumdata.date.drop_duplicates(),rotation=45,horizontalalignment='right')
+    g.set_titles("{col_name}")
+    g.fig.suptitle("cow " + str(cow) + ", barn " + str(barnnr))
+    g.fig.subplots_adjust(top = 0.85)
+    g.savefig(svpath + "\\timebudgets_cow"+str(cow) + "_barn" + str(barnnr) + ".png")
+    plt.close()
+
+
 # summarize variables
 data[["xnew","X","ynew","y"]].describe()
-test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19),:]
+data[""]
+test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19) & (data["cowid"] = cow),:]
 sns.scatterplot(data=test, x = test.index.values, y = "ynew")
 sns.scatterplot(data=test, x = test.index.values, y = "y")

@@ -88,7 +182,37 @@ sns.scatterplot(data=test, x = test.index.values, y = "y")
     - explore when no areas are assigned and so, wrong barn is entered
     - explore gaps and gapsize
 """
-    
-    
+  
+#gapsize
+
+subset = data.loc[data["X"].isna(),["cowid","barn","date","t"]]
+sumdata = subset.groupby(by = ["cowid","barn","date"]).count()
+subset = data.loc[data["xnew"].isna(),["cowid","barn","date","t"]]
+test = subset.groupby(by = ["cowid","barn","date"]).count()
+
+sumdata["xnew"] = test["t"]
+
+sumdata["gapperc"] = sumdata["t"]/864
+sumdata["gap180s"] = sumdata["xnew"]/864
+sumdata = sumdata.reset_index()
+sumdata = sumdata.sort_values(by = ["date","cowid"]).reset_index(drop=1)
+sumdata["xlabel"] = np.arange(len(sumdata))
+
+
+plt.figure()
+ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gapperc", 
+                     palette = "tab10", hue = "cowid", 
+                     style = "date", legend = False)
+plt.savefig(svpath + "\\gap_notimputed.png")
+plt.figure()
+ax = sns.scatterplot(data=sumdata,x = "xlabel", y = "gap180s", 
+                     palette = "tab10", hue = "cowid", 
+                     style = "date", legend = False)
+plt.savefig(svpath + "\\gap_imputed.png")
+# average gap
+
+
+
+
    

--- a/heatmap_visualisation.py
+++ b/heatmap_visualisation.py
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb  6 11:20:43 2023
+
+@author: adria036
+"""
+#%% import packages
+
+import pandas as pd
+# from datetime import  date,timedelta
+import numpy as np
+# from read_data_sewio import start_end_dates, read_all_data, read_ids, combine_data_id
+import os
+import seaborn as sns
+import matplotlib as plt
+
+
+# path_out 
+path_out = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\exploration"
+
+
+#%% visualisation functions
+
+def heatmap_barn(data_x,data_y,xstep,ystep,x_lim,y_lim):
+    """
+    Plot the frequency of positions registered, given data_x and data_y
+    xstep and ystep determine how fine these positions are taken
+    x_lim and y_lim set boundaries (e.g. incl feeding lane)
+    
+    Parameters
+    ----------
+    data_x : TYPE = Pandas series
+        Data of the x-coordinate as registered by the sewio system
+    data_y : TYPE = Pandas series
+        Data of the y-coordinate as registered by the sewio system
+    xstep : TYPE = float
+        How fine you want to determine location in x-direction (e.g. 1 is one
+            meter, 0.1 = 10 centimeters)
+    ystep : TYPE = float
+        How fine you want to determine location in x-direction (e.g. 1 is one
+            meter, 0.1 = 10 centimeters)
+    x_lim : TYPE = list of floats
+        Physical boundaries of plot you want to make (e.g. cut off feeding lane
+            then indicate lower boundary in y direction = -4)
+    y_lim : TYPE = list of floats
+        Physical boundaries of plot you want to make (e.g. cut off feeding lane
+            then indicate lower boundary in y direction = -4)
+
+    Returns
+    -------
+    ax : TYPE = sns heatmap
+        figure axes with heatmap
+
+    """
+    # transform data_x and data_y for count
+    x_transf = (data_x*(1/xstep)).round()*xstep
+    y_transf = (data_y*(1/ystep)).round()*ystep
+    
+    # put x_transf and y_transf in DataFrame
+    frame = {'xT' : x_transf.round(decimals = 4),
+             'yT' : y_transf.round(decimals = 4) }
+    df = pd.DataFrame(frame)
+    
+    # count and groupby
+    heatdata = df.groupby(["xT","yT"]).size()
+    heatdata = heatdata.to_frame(name = "counts")
+    heatdata = heatdata.reset_index()
+    
+    # set limits to agree with steps
+    x_low = round(round(x_lim[0]*(1/xstep))*xstep,ndigits = 4)
+    x_high = round(round(x_lim[1]*(1/xstep))*xstep,ndigits = 4)
+    y_low = round(round(y_lim[0]*(1/ystep))*ystep,ndigits = 4)
+    y_high = round(round(y_lim[1]*(1/ystep))*ystep,ndigits = 4)
+    
+    # delete the rows for which values are outside x_lim or y_lim
+    heatdata = heatdata.loc[(heatdata["xT"] >= x_lim[0]) &
+                 (heatdata["xT"] <= x_lim[1]) &
+                 (heatdata["yT"] >= y_lim[0]) &
+                 (heatdata["yT"] <= y_lim[1])]
+    heatdata.reset_index(drop = True, inplace = True)
+    
+    # expected axes limits based on lim and step
+    x = np.arange(x_low,x_high+xstep,xstep,dtype = float)
+    y = np.arange(y_low,y_high+ystep,ystep,dtype = float)
+    
+    # fill array
+    heat_all = []
+    for i in y:
+        for ii in x:
+            number_values = heatdata.loc[(heatdata["xT"] == round(ii,ndigits = 4)) &
+                                         (heatdata["yT"] == round(i,ndigits = 4)),
+                                         ["counts"]]
+            if number_values.empty:
+                number_values = pd.DataFrame(data = [0], columns = ["counts"])
+            number_values = number_values.reset_index(drop=1)
+            # print(round(i,ndigits = 4),round(ii,ndigits = 4),number_values["counts"][0])
+            heat_all.append(number_values["counts"][0])
+            
+        
+    # make rectangular and convert list to numpy
+    heat_all = np.array(heat_all)
+    heat_all = np.reshape(heat_all, [len(y),len(x)])
+    heat_all = heat_all.transpose()
+    test = np.flip(heat_all, axis = 1)
+    
+    
+    # convert to df with right colnames and indices
+    xcols = np.array(x.round(decimals=3),dtype = str)
+    ycols = np.flip(np.array(y.round(decimals=3),dtype = str))
+    df = pd.DataFrame(test, columns = ycols, index = xcols)
+    
+    # make heatmap
+    ax = sns.heatmap(df,
+                     yticklabels = round(1/xstep),
+                     xticklabels = round(1/ystep*2),
+                     robust = True
+                     )
+
+    return ax
+
+#%% usage
+# ------------------------- individual cow heatmaps ---------------------------
+# unique cows
+cows = data.loc[:,["cowid"]].drop_duplicates() 
+cows = cows.sort_values(by = ["cowid"]).reset_index(drop=1)
+
+
+
+# settings heatmap
+xstep = 0.1
+ystep = 0.1
+
+#barn limits
+barn = {"60":[64.8,75.6],
+        "61":[54,64.8],
+        "62":[32.4,54],
+        "70":[21.6,32.4],
+        "71":[10.8,21.6],
+        "72":[0,10.8],
+        "73":[-10.8,0],
+        }
+
+# plots per cow-day
+for cow in cows["cowid"]:
+    
+    # unique days
+    days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() 
+    days = days.sort_values(by = ["date"]).reset_index(drop=1)
+    
+    # select data and create heatmaps
+    for dd in days["date"]:
+        print(cow, dd)
+        # select data
+        data_x = data.loc[(data["cowid"] == cow) &
+                          (data["date"] == dd),"X"]
+        data_y = data.loc[(data["cowid"] == cow) &
+                          (data["date"] == dd),"y"]
+        
+        # set limits (based on information barn)
+        barnnr = int(data.loc[(data["cowid"] == cow) &
+                          (data["date"] == dd),"barn"].mean())
+        x_lim = barn[str(barnnr)] #[data_x.min(),data_x.max()]
+        y_lim = [-18,0] #[data_y.min(),data_y.max()]
+        
+        # plot heatmap
+        plt.rcdefaults() 
+        fig, ax = plt.pyplot.subplots() 
+        ax = heatmap_barn(data_x, data_y, xstep, ystep, x_lim, y_lim)
+        ax.set_xlabel('y coordinate')
+        ax.set_ylabel('x coordinate')
+        ax.set_title("barn = " +str(barnnr) + ', cow = ' + str(cow) + ", date = " + datetime.strftime(dd,"%Y-%m-%d").replace("-",""))
+        ax.xaxis.tick_top()
+        plt.pyplot.savefig(path_out + "//heatmap_" + str(cow) + "_" + datetime.strftime(dd,"%Y-%m-%d").replace("-",""))
+        plt.pyplot.close()
+
+