add behaviours per cow for calculations

30807ab4 · Adriaens, Ines · c1f18212 · 30807ab4 · 30807ab4 · 30807ab4
Commit 30807ab4 authored 2 years ago by Adriaens, Ines
--- a/behaviours.py
+++ b/behaviours.py
@@ -158,7 +158,7 @@ for cow in cows:
        # ------------------------ concentrate feeder -------------------------
        interval = 5*60      # min number of seconds between separate bouts
        behaviour = "concentrate"   # concentrate feeding behaviour
-        min_length = 0       # always count
+        min_length = 2       # always count
        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)

--- a/behaviours_percow.py
+++ b/behaviours_percow.py
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Feb 14 12:08:41 2023
+
+@author: adria036
+"""
+
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb  8 16:47:19 2023
+
+@author: adria036
+"""
+
+
+import os
+os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\scripts\uwb") 
+
+
+#%% import modules
+
+from datetime import date, datetime
+import pandas as pd
+import numpy as np
+# import matplotlib.pyplot as plt
+# import matplotlib
+# import seaborn as sns
+from uwbfunctions import behaviour_bouts
+#%matplotlib qt
+
+
+#%% import data, seth paths and constants
+
+
+# path_out 
+svpath = r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\results\behaviours"
+
+# path to per barn areas
+path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed")
+path_zones = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","raw","dc_sewio")
+fna = "\\Barn_areas.xlsx"
+area_zones = pd.read_excel(path_zones+fna, sheet_name = "areas")    
+# barn_edges = pd.read_excel(path_zones+fna, sheet_name = "edges")
+del fna, path_zones
+
+
+# settings
+settings = {'barn' : [60,61,62,70,71,72,73],
+            'startdate' : date(2021,11,1),
+            'enddate' :  date(2022,2,15),
+            'cows' : 0, # or specific cow number
+            }
+
+# files that comply with settings
+fn = []
+for b in range(0,len(settings["barn"])):
+    print("barn = " + str(settings["barn"][b]))
+    if settings["cows"] == 0:
+        fbarn = [f for f in os.listdir(path + "/barn" + str(settings["barn"][b])) \
+                if os.path.isfile(os.path.join(path,"barn"+str(settings["barn"][b]),f)) \
+                    and (datetime.strptime(f[5:13], '%Y%m%d').date() >= settings["startdate"]) \
+                    and (datetime.strptime(f[5:13], '%Y%m%d').date() <= settings["enddate"])]
+        fbarn.sort()
+    else:
+        fbarn = [f for f in os.listdir(path + "/barn" + str(settings["barn"][b])) \
+                if os.path.isfile(os.path.join(path,"barn"+str(settings["barn"][b]),f)) \
+                    and (int(f[26:-4]) in settings["cows"]) \
+                    and (datetime.strptime(f[5:13], '%Y%m%d').date() >= settings["startdate"]) \
+                    and (datetime.strptime(f[5:13], '%Y%m%d').date() <= settings["enddate"])]
+        fbarn.sort()
+    fn.extend(fbarn)
+    fn.sort()
+
+# find unique cows
+cows = list(set([int(f[26:-4]) for f in fn]))
+
+# read data and calculate bouts and behaviours
+for cow in cows:
+    print("read data of cow " + str(cow))
+    # read data
+    data = pd.DataFrame([])
+    for f in fn:
+        if str(cow) in f:
+            print(f)
+            barn = f[19:21]
+            sub = pd.read_csv(path + "/barn" + barn + "/" + f, 
+                      usecols = ["cowid","barn","date","t","xnew","ynew","area","zone","X","y"],
+                      dtype = {"cowid" : "int64","barn" : "int64","date" : "object",
+                               "t" : "int64", "xnew":"float64","ynew":"float64",
+                               "area":"object","zone":"float64","X":"float64","y" : "float64"})
+            sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d") 
+            data = pd.concat([data,sub])
+            data = data.sort_values(by = ["cowid","date","t"])
+            data = data.reset_index(drop=1)
+
+    data.loc[data["zone"].isna(),"zone"] = 8
+    data.loc[data["zone"] == 8 ,"area"] = "unknown"
+
+    data["date"] = data["date"].dt.date
+
+    # calculate and summarize behaviours
+    bouts = pd.DataFrame([])
+    bsums = pd.DataFrame([])
+    act_dist = pd.DataFrame([])
+    
+    # summarize days
+    days = data.loc[(data["cowid"]==cow),["date"]].drop_duplicates() 
+    days = days.sort_values(by = ["date"]).reset_index(drop=1)
+
+    # calculate bouts and active behaviours
+    for dd in days["date"]:
+        print("cow = " + str(cow) + ", date = " + str(dd))
+        dset = data.loc[(data["cowid"] == cow) & (data["date"] == dd)].copy()
+       
+        # ------------------------ feeding behaviour --------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "feed"   # feeding behaviour
+        min_length = 30      # do not count if shorter than 30 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour,min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ---------------------- drinking behaviour ---------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "drink"   # drinking behaviour
+        min_length = 5      # do not count if shorter than 2 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ---------------------- resting behaviour ----------------------------
+        interval = 2*60      # min number of seconds between separate bouts
+        behaviour = "cubicle"   # resting behaviour
+        min_length = 30      # do not count if shorter than 30 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # -------------------------- cubicle_A --------------------------------
+        interval = 2*60      # min number of seconds between separate bouts
+        behaviour = "cubicle_A"   # resting behaviour
+        min_length = 30      # do not count if shorter than 30 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # -------------------------- cubicle_B --------------------------------
+        interval = 2*60      # min number of seconds between separate bouts
+        behaviour = "cubicle_B"   # resting behaviour
+        min_length = 30      # do not count if shorter than 30 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # -------------------------- cublicle_C -------------------------------
+        interval = 2*60      # min number of seconds between separate bouts
+        behaviour = "cubicle_C"   # resting behaviour
+        min_length = 30      # do not count if shorter than 30 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ------------------------ concentrate feeder -------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "concentrate"   # concentrate feeding behaviour
+        min_length = 0       # always count
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ------------------------ waiting area ------------------------------
+        interval = 20*60      # min number of seconds between separate bouts
+        behaviour = "wait"   # in the waiting area
+        min_length = 0        # always count
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ------------------------ unknown area ------------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "unknown"   # unknown behaviour
+        min_length = 10      # do not count if shorter than 10 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ------------------------- slatted (barn 62) -------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "slatted"   # unknown behaviour
+        min_length = 10      # do not count if shorter than 10 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # ------------------------- resting (barn 62) -------------------------
+        interval = 5*60      # min number of seconds between separate bouts
+        behaviour = "resting"   # unknown behaviour
+        min_length = 10      # do not count if shorter than 10 seconds
+        df,summary = behaviour_bouts(dset, interval, behaviour, min_length)
+        bouts = pd.concat([bouts,df],axis = 0).reset_index(drop=1)
+        bsums = pd.concat([bsums,summary],axis = 0).reset_index(drop=1)
+        
+        # # ------------------------- distance travelled ------------------------
+        dset.loc[:,"dist"] = 0
+        dset.iloc[1:,dset.columns == "dist"] = \
+                np.sqrt((dset["xnew"].iloc[:-1].values-dset["xnew"].iloc[1:].values)**2 + \
+                        (dset["ynew"].iloc[:-1].values-dset["ynew"].iloc[1:].values)**2)
+        dset.loc[(dset["dist"]<0.30) | (dset["dist"]>5),"dist"] = 0
+        
+        # distance travelled and % active (with and without nans)
+        act = dset[["cowid","date","barn","dist"]].groupby(by = ["cowid","date","barn"]).sum().reset_index()
+        act["dist"] = round(act["dist"],2)
+        act["perc_act"] = round(len(dset.loc[dset["dist"] > 0])/len(dset)*100,2)
+        act["perc_act_nan"] = round(len(dset.loc[dset["dist"] > 0])/len(dset.loc[dset["area"]!="unknown"])*100,2)
+        
+        # % of the day not in the cubicles
+        act["perc_no_lying"] = 100- round(len(dset.loc[(dset["area"].str.contains("cubicle") == True) | \
+                                             (dset["area"].str.contains("resting") == True)])/len(dset)*100,2)
+        act["perc_no_lying_nan"] = 100- round(len(dset.loc[(dset["area"].str.contains("cubicle") == True) | \
+                                             (dset["area"].str.contains("resting") == True)])/len(dset.loc[dset["area"]!="unknown"])*100,2)
+        
+        act_dist = pd.concat([act_dist,act]).reset_index(drop=1)
+        
+    # save bouts, summaries and behaviours per cow id
+    act_dist["dist"] = round(act_dist["dist"],2)
+    act_dist["perc_no_lying_nan"] = round(act_dist["perc_no_lying_nan"],2)
+    act_dist["perc_no_lying"] = round(act_dist["perc_no_lying"],2)
+    act_dist.to_csv(svpath + "/activity_cow_" + str(cow) + ".txt")
+    bouts.to_csv(svpath + "/bouts_cow_" + str(cow) + ".txt")
+    bsums["total2"] = round(bsums["total2"],2)
+    bsums["total1"] = round(bsums["total1"],2)
+    bsums.to_csv(svpath + "/summary_cow_" + str(cow) + ".txt")    
--- a/uwbfunctions.py
+++ b/uwbfunctions.py
@@ -193,29 +193,37 @@ def behaviour_bouts(data, interval, behaviour,min_length):
        # only keep bouts > min_length
        df = df.loc[df["len1"]*60 > min_length,:].reset_index(drop=1)
        
-        # calculate gap between successive bouts
-        df["gap"] = np.nan
-        df.iloc[1:,df.columns == "gap"] = np.round((df["t"][1:].values - df["end_time"][0:-1].values)/60)
+        if len(df) > 0:
+            # calculate gap between successive bouts
+            df["gap"] = np.nan
+            df.iloc[1:,df.columns == "gap"] = np.round((df["t"][1:].values - df["end_time"][0:-1].values)/60)
        
-        # add behaviour and select data
-        df["behaviour"] = behaviour
-        df["btime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["t"],unit = 's')
-        df["etime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["end_time"],unit = 's')
-        df = df[["cowid","barn","behaviour","date","t","end_time","btime","etime",
-                 "gap","len1","len2"]]
-        df = df.rename(columns = {"end_time" : "end","t" : "start"})
+            # add behaviour and select data
+            df["behaviour"] = behaviour
+            df["btime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["t"],unit = 's')
+            df["etime"] = pd.to_datetime(df["date"]) + pd.to_timedelta(df["end_time"],unit = 's')
+            df = df[["cowid","barn","behaviour","date","t","end_time","btime","etime",
+                     "gap","len1","len2"]]
+            df = df.rename(columns = {"end_time" : "end","t" : "start"})
                
-        # calculate summary 
-        summary = df[["cowid","barn","date","behaviour","len1","len2"]].groupby(by = ["cowid","barn","date","behaviour"]).mean().round(1).reset_index()
-        summary = summary.rename(columns = {"len1" : "len1_av", "len2" : "len2_av"})
-        summary["no_bouts"] = len(df)
-        summary["gap_med"] = df["gap"].median()
-        summary["total1"] = df["len1"].sum()
-        summary["total2"] = df["len2"].sum()
-        summary["nmeas"] = len(data.loc[data["area"].str.contains(behaviour)])
-        summary["perc"] = round(len(data.loc[data["area"].str.contains(behaviour)]) / \
-                          len(data) * 100,2)
-        #print(summary)
+            # calculate summary 
+            summary = df[["cowid","barn","date","behaviour","len1","len2"]].groupby(by = ["cowid","barn","date","behaviour"]).mean().round(1).reset_index()
+            summary = summary.rename(columns = {"len1" : "len1_av", "len2" : "len2_av"})
+            summary["no_bouts"] = len(df)
+            summary["gap_med"] = df["gap"].median()
+            summary["total1"] = df["len1"].sum()
+            summary["total2"] = df["len2"].sum()
+            summary["nmeas"] = len(data.loc[data["area"].str.contains(behaviour)])
+            summary["perc"] = round(len(data.loc[data["area"].str.contains(behaviour)]) / \
+                              len(data) * 100,2)
+            #print(summary)
+        else:
+            df = pd.DataFrame([],columns = ["cowid","barn","behaviour","date","start",
+                                            "end","btime","etime",
+                                            "gap","len1","len2"])
+            summary = pd.DataFrame([],columns = ["cowid","barn","date","behaviour",
+                                                 "len1_av","len2_av","no_bouts",
+                                                 "gap_med","total1","total2"])
        
    else:
        df = pd.DataFrame([],columns = ["cowid","barn","behaviour","date","start",