From a6be5c167f0dd02abd8885b1c40e5c90b35f4c7d Mon Sep 17 00:00:00 2001 From: Adriaens <ines.adriaens@wur.nl> Date: Wed, 7 Dec 2022 14:02:03 +0100 Subject: [PATCH] finish dataset preparation HD sheep --- .../harmen/preprocess_sheepdata.py | 62 ++++++++++++++++--- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/datapreparation/harmen/preprocess_sheepdata.py b/datapreparation/harmen/preprocess_sheepdata.py index 9ec4541..c114153 100644 --- a/datapreparation/harmen/preprocess_sheepdata.py +++ b/datapreparation/harmen/preprocess_sheepdata.py @@ -35,31 +35,75 @@ for f in os.listdir(path): new = new.drop(new.index.values[-1],axis=0) new["t"] = new.index.values new["at"] = pd.to_datetime(new["datetime"],format = "%d-%m-%Y %H:%M:%S.%f") + """ + 28/10/2022 : start recording 16:30 + 04/11/2022 : remove data between 13:00 and 14:00 - keep only data from 1 accelerometer + 11/11/2022 : remove data between 14:00 and 15:00 - keep only data from 1 accelerometer + """ + if "wk1_20221028" in f: + new = new.loc[new["at"].dt.hour >= 17,:].reset_index(drop=1) + elif "wk1_20221104" in f: + new = new.loc[new["at"].dt.hour < 13,:].reset_index(drop=1) + # fill gap with nan data + idx = np.linspace(new.index.values[-1]+1,new.index.values[-1]+60*25*60,new.index.values[-1]+60*25*60-new.index.values[-1]).astype(int) + refdate = pd.to_datetime("2022/11/04 13:00:00.000",format = "%Y/%m/%d %H:%M:%S.%f") + df = pd.DataFrame([],index=idx,columns=new.columns) + df["at"] = refdate + np.linspace(0,len(idx)-1,len(idx))* pd.Timedelta(40,"milli") + df["t"] = idx + df["datetime"] = df["at"].dt.strftime("%d-%m-%Y %H:%M:%S.%f") + # add "gap" to new + new = pd.concat([new,df],axis = 0) + elif "wk2_20221104" in f: + new = new.loc[new["at"].dt.hour >= 14,:].reset_index(drop=1) + elif "wk2_20221111" in f: + new = new.loc[new["at"].dt.hour < 14,:].reset_index(drop=1) + # fill gap with nan data + idx = np.linspace(new.index.values[-1]+1,new.index.values[-1]+60*25*60*2,new.index.values[-1]+60*25*60*2-new.index.values[-1]).astype(int) + refdate = pd.to_datetime("2022/11/11 14:00:00.000",format = "%Y/%m/%d %H:%M:%S.%f") + df = pd.DataFrame([],index=idx,columns=new.columns) + df["at"] = refdate + np.linspace(0,len(idx)-1,len(idx))* pd.Timedelta(40,"milli") + df["t"] = idx + df["datetime"] = df["at"].dt.strftime("%d-%m-%Y %H:%M:%S.%f") + # add "gap" to new + new = pd.concat([new,df],axis = 0) + elif "wk3_20221111" in f: + new = new.loc[new["at"].dt.hour >= 16,:].reset_index(drop=1) + elif "wk3_20221124" in f: + new = new.loc[new["at"].dt.hour < 10,:].reset_index(drop=1) + # preprocess new["win"] = np.floor(new["t"]/25) - test2 = new["win"].drop_duplicates() - test3 = new.iloc[test2.index.values,:] - test3.index = test3.win - accum = new[["win","acc_x","acc_y","acc_z"]].groupby(by="win").mean().reset_index() - test = accum.join(test3[["at","win"]], on= "win", rsuffix = "_m") - test = test.drop(columns = "win_m") + # prepare frame with datetimes aggregated to one second + test2 = new["win"].drop_duplicates() # keep first + test3 = new.iloc[test2.index.values,:] # select data of first + test3.index = test3.win # change index to win + accum = new[["win","acc_x","acc_y","acc_z"]].groupby(by="win").mean().reset_index() # calculate mean activity + test = accum.join(test3[["at","win"]], on= "win", rsuffix = "_m") # join activity with date in at + test = test.drop(columns = "win_m") data = pd.concat([data,test]) - +# sort data based on date + add id data = data.sort_values(by = "at").reset_index(drop=1) data["id"] = 1 + +# smooth with rolling median data["acc_xm"] = data["acc_x"].rolling(60).median() data["acc_ym"] = data["acc_y"].rolling(60).median() data["acc_zm"] = data["acc_z"].rolling(60).median() - +# plot and save data["day"] = data["at"].dt.day +data["month"] = data["at"].dt.month days = data["day"].drop_duplicates() for day in days: print(day) - fn = "sheep1" + "_day" + str(day) + ".png" + month = data.loc[data["day"]==day,"at"].dt.month.drop_duplicates().reset_index(drop=1) + fn = "sheep1" + "_2022" + str(month[0]) + str(day) + ".png" fig,ax = plt.subplots(nrows=1,ncols=1,figsize = (20,10)) ax.plot(data.loc[data["day"]==day,"at"],data.loc[data["day"]==day,["acc_x","acc_y","acc_z"]]) ax.plot(data.loc[data["day"]==day,"at"],data.loc[data["day"]==day,["acc_xm","acc_ym","acc_zm"]], color = "k", linewidth = 0.5) + ax.set_title("sheep 1 - " + str(day) + "/" + str(month[0]) + '/2022') + ax.set_xlabel("time") + ax.set_ylabel("acceleration in m/s²") plt.savefig(svpath + "\\" + fn) plt.close() -- GitLab