calculate bouts

4d1faaf2 · Adriaens, Ines · 01225e30 · 4d1faaf2 · 4d1faaf2 · 4d1faaf2
Commit 4d1faaf2 authored 2 years ago by Adriaens, Ines
--- a/behaviours.py
+++ b/behaviours.py
@@ -12,13 +12,13 @@ os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriae

 #%% import modules

-
 from datetime import date, datetime
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib
 import seaborn as sns
+from uwbfunctions import behaviour_bouts, heatmap_barn
 #%matplotlib qt


@@ -38,10 +38,10 @@ del fna, path_zones


 # settings
-settings = {'barn' : [60,61], # [60,61,62,70,71,72,73],
-            'startdate' : date(2022,10,1),
-            'enddate' :  date(2022,10,10),
-            'cows' : [2658,2155], # or specific cow number
+settings = {'barn' : [60], # [60,61,62,70,71,72,73],
+            'startdate' : date(2022,10,7),
+            'enddate' :  date(2022,10,7),
+            'cows' : [907], # or specific cow number
            }

 # files that comply with settings
@@ -81,33 +81,8 @@ for f in fn:
 data = data.sort_values(by = ["cowid","date","t"])
 data = data.reset_index(drop=1)

-# correct the areas
-cows = data["cowid"].drop_duplicates().reset_index(drop=1)
-for cow in cows:
-    print(cow)
-    days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() 
-    days = days.sort_values(by = ["date"]).reset_index(drop=1)
-    for dd in days["date"]:
-        print(dd)
-        barnnr = int(data.loc[(data["cowid"] == cow) &
-                          (data["date"] == dd),"barn"].mean()) 
-        myarea = area_zones.loc[(area_zones["barn"] == barnnr) | \
-                            (area_zones["barn"].isna() == True) \
-                            ,:].reset_index(drop=1)
-        for j in range(0,len(myarea)):
-            x1,y1,x2,y2,x3,y3,x4,y4 = myarea[["x1","y1","x2","y2","x3","y3","x4","y4"]].values[j]
-            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
-                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
-                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
-                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
-                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"area"] = myarea["area"][j]
-            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
-                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
-                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
-                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
-                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"zone"] = myarea["zone"][j]
-        del x1,y1,x2,y2,x3,y3,x4,y4
-del cows, dd, days
+
+
 data.loc[data["zone"].isna(),"zone"] = 8
 data.loc[data["zone"] == 8 ,"area"] = "unknown"

@@ -117,8 +92,60 @@ data["date"] = data["date"].dt.date
 print(data[["cowid","barn"]].drop_duplicates())


-#%% RESTING BEHAVIOUR
+
+#%% calculate and summarize behaviours
+

 cows = data["cowid"].drop_duplicates().reset_index(drop=1)
+bouts = pd.DataFrame([])
+bsums = pd.DataFrame([])
 for cow in cows:
-    subset = 
+    print(cow)
+    # ------------------------ feeding behaviour ------------------------------
+    interval = 5*60      # min number of seconds between separate bouts
+    behaviour = "feed"   # feeding behaviour
+    min_length = 30      # do not count if shorter than 30 seconds
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour,min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+    
+    # ------------------------ drinking behaviour -----------------------------
+    interval = 5*60      # min number of seconds between separate bouts
+    behaviour = "drink"   # drinking behaviour
+    min_length = 5      # do not count if shorter than 2 seconds
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour, min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+    
+    # ------------------------ resting behaviour ------------------------------
+    interval = 2*60      # min number of seconds between separate bouts
+    behaviour = "cubicle"   # resting behaviour
+    min_length = 30      # do not count if shorter than 30 seconds
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour, min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+    
+    # ------------------------ concentrate feeder -----------------------------
+    interval = 5*60      # min number of seconds between separate bouts
+    behaviour = "concentrate"   # concentrate feeding behaviour
+    min_length = 0       # always count
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour, min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+    
+    # ------------------------ waiting area ------------------------------
+    interval = 20*60      # min number of seconds between separate bouts
+    behaviour = "wait"   # in the waiting area
+    min_length = 0        # always count
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour, min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+    
+    # ------------------------ unknown area ------------------------------
+    interval = 5*60      # min number of seconds between separate bouts
+    behaviour = "unknown"   # unknown behaviour
+    min_length = 10      # do not count if shorter than 10 seconds
+    df,summary = behaviour_bouts(data.loc[data["cowid"] == cow], interval, behaviour, min_length)
+    bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+    bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+
--- a/bin.py
+++ b/bin.py
+"""
+# correct the areas
+cows = data["cowid"].drop_duplicates().reset_index(drop=1)
+for cow in cows:
+    print(cow)
+    days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() 
+    days = days.sort_values(by = ["date"]).reset_index(drop=1)
+    for dd in days["date"]:
+        print(dd)
+        barnnr = int(data.loc[(data["cowid"] == cow) &
+                          (data["date"] == dd),"barn"].mean()) 
+        myarea = area_zones.loc[(area_zones["barn"] == barnnr) | \
+                            (area_zones["barn"].isna() == True) \
+                            ,:].reset_index(drop=1)
+        for j in range(0,len(myarea)):
+            x1,y1,x2,y2,x3,y3,x4,y4 = myarea[["x1","y1","x2","y2","x3","y3","x4","y4"]].values[j]
+            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
+                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
+                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
+                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
+                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"area"] = myarea["area"][j]
+            data.loc[((data["cowid"] == cow) & (data["date"] == dd) & \
+                      (data["xnew"] >= x1) & (data["xnew"] <= x2) & \
+                      (data["xnew"] >= x3) & (data["xnew"] <= x4) & \
+                      (data["ynew"] >= y1) & (data["ynew"] >= y2) & \
+                      (data["ynew"] <= y3) & (data["ynew"] <= y4)),"zone"] = myarea["zone"][j]
+        del x1,y1,x2,y2,x3,y3,x4,y4
+del cows, dd, days
+"""
\ No newline at end of file
--- a/heatmap_visualisation.py
+++ b/heatmap_visualisation.py
@@ -123,6 +123,107 @@ def heatmap_barn(data_x,data_y,xstep,ystep,x_lim,y_lim):
    return ax


+#%% bout calculations
+def behaviour_bouts(data, interval, behaviour,min_length):
+    """
+    Parameters
+    ----------
+    data : Pandas DataFrame
+        data array with at least: 
+            - t           : time stamp
+            - xnew, ynew  : preprocessed position
+            - area        : area (walking, drink_trough, cubicle_A-B-C, feed_rack) 
+            - zone
+    interval : numeric
+        time in seconds that the behaviour is discontinued to belong to another
+        bout
+
+    Returns
+    -------
+    df : Pandas DataFrame
+        dataframe with bouts of selected behaviour, according to max interval 
+        of interval between two bouts
+    summary : Pandas DataFrame
+        summary of the behaviour for that day
+
+    """
+    
+    #---------------------------- for development -----------------------------
+    # interval = 5*60
+    # behaviour = "unknown"
+    # min_length = 5
+    # sep = {"cubicle": 60, "walking" : 3*60, "drink_trough" : 5*60}
+    #---------------------------- for development -----------------------------
+    
+    # select data of behaviour of interest and calculate discontinuities
+    subset = data.loc[(data["area"].str.contains(behaviour)),:].copy().reset_index(drop=1)
+    if len(subset) > 0:
+        subset["gap"] = subset["t"].diff()
+        subset["gapprev"] = np.nan
+        subset.iloc[0:-1,subset.columns == "gapprev"] = subset.iloc[1:,subset.columns == "gap"]
+        
+        # keep where gap > interval and add previous without gap to mark end of bout
+        idx = np.array(subset.loc[subset["gap"] > interval].index.values)
+        idxstart = np.sort(np.append(idx,subset.index.values[0]))
+        idx = np.array(subset.loc[subset["gapprev"] > interval].index.values)
+        idxend = np.sort(np.append(idx,subset.index.values[-1]))
+    
+        df = subset.loc[idxstart,:].reset_index(drop=1)
+        df["end_time"] = subset.loc[idxend,"t"].values
+        
+        # add time to check with figures
+        df["bhour"] = np.floor(df["t"]/3600)
+        df["bmin"] = np.floor((df["t"]-df["bhour"]*3600)/60)
+        df["ehour"] = np.floor(df["end_time"]/3600)
+        df["emin"] = np.floor((df["end_time"]-df["ehour"]*3600)/60)
+            
+        # for the bout length, calculate both true number of times behaviour
+        #    is recorded, and end - begin incl the other behaviours in between
+        #    => if "unknown", also put it as the behaviour of interest
+        df["len1"] = round((df["end_time"] - df["t"])/60,1)
+        
+        # calculate true number of seconds this behaviour is noted
+        df["len2"] = np.nan
+        for i in range(0,len(df)):
+            df.loc[i,"len2"] = round(len(data.loc[(data["t"] >= df["t"][i]) & \
+                                                  (data["t"] <= df["end_time"][i]) & \
+                                                  ((data["area"].str.contains(behaviour)) | \
+                                                   (data["area"].str.contains("unknown")))])/60,1)
+        
+        # only keep bouts > min_length
+        df = df.loc[df["len1"] > min_length,:].reset_index(drop=1)
+        
+        # calculate gap between successive bouts
+        df["gap"] = np.nan
+        df.iloc[1:,df.columns == "gap"] = np.round((df["t"][1:].values - df["end_time"][0:-1].values)/60)
+        
+        # add behaviour and select data
+        df["behaviour"] = behaviour
+        df["btime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["t"],unit = 's')
+        df["etime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["end_time"],unit = 's')
+        df = df[["cowid","barn","behaviour","date","t","end_time","btime","etime",
+                 "gap","len1","len2"]]
+        df = df.rename(columns = {"end_time" : "end","t" : "start"})
+                
+        # calculate summary 
+        summary = df[["cowid","barn","date","behaviour","len1","len2"]].groupby(by = ["cowid","barn","date","behaviour"]).mean().round(1).reset_index()
+        summary = summary.rename(columns = {"len1" : "len1_av", "len2" : "len2_av"})
+        summary["no_bouts"] = len(df)
+        summary["gap_med"] = df["gap"].median()
+        summary["total1"] = df["len1"].sum()
+        summary["total2"] = df["len2"].sum()
+        #print(summary)
+        
+    else:
+        df = pd.DataFrame([],columns = ["cowid","barn","behaviour","date","start",
+                                        "end","btime","etime",
+                                        "gap","len1","len2"])
+        summary = pd.DataFrame([],columns = ["cowid","barn","date","behaviour",
+                                             "len1_av","len2_av","no_bouts",
+                                             "gap_med","total1","total2"])
+    
+    return df,summary
+

 """
 #%% usage