diff --git a/tool/segmentation.py b/tool/segmentation.py index be6aff6cb68acd5c0af4663528ad80bc9b696e0e..c92b016826b106b8ff69f2e7e3f5d4c011d4ce2b 100644 --- a/tool/segmentation.py +++ b/tool/segmentation.py @@ -45,6 +45,12 @@ General (mathematical) remarks on changepoint analysis need to be known (prior knowledge) and the model is either a change in mean or a change in mean and scale (not with real heteroscedastic data: mean and std multiplied with the same factor?) +- the computational cost is proportional to the number of data points in the ts + this means that it is wise to cut you dataset in smaller meaningful windows + before calculating the breakpoints, which can afterwards be combined again for + the classification algorithms +- outliers influence the estimation of cost tremendously. Where possible, try + to reduce the outliers before estimating the changepoints. ------------------------------------------------------------------------------- RUPTURES - COST FUNCTIONS @@ -55,6 +61,13 @@ RUPTURES - BASE FUNCTIONS * error(start,end) -- returns the cost on segment (start,end) * fit(*args,**kwargs) -- set parameters of the +RUPTURES - SEARCH METHODS +* PELT: Pruned Linear Exact Cost + - + - + - + - + """ @@ -62,12 +75,15 @@ RUPTURES - BASE FUNCTIONS import os os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\iAdriaens\bait\scripts\bait\datapreparation\ines") +%matplotlib qt + #%% filepaths, constants and load data import pandas as pd import numpy as np import matplotlib.pyplot as plt import ruptures as rpt +from scipy.signal import medfilt # path path = os.path.join("C:","/Users","adria036", @@ -77,20 +93,79 @@ path = os.path.join("C:","/Users","adria036", # read dataset data = pd.read_csv(path+"\\DCdata.csv", index_col=0) data["at"] = pd.to_datetime(data["at"],format = "%Y-%m-%d %H:%M:%S.%f%z") +#data = data[["cowid","at","t","gap","activity"]] del path #%% segmentation tests +# savepath +path = os.path.join("C:","/Users","adria036", + "OneDrive - Wageningen University & Research","iAdriaens_doc", + "Projects","iAdriaens","bait","results","segmentation") # define parameters for segmentation -model = "l1" -min_size = 5 # minimum distance between changepoints - - -data = - - -# - - +model = "normal" +min_size = 2*60 # minimum distance between changepoints +dim = 3 +variable = ["acc_x","acc_y","acc_z"] +# set minimum time instead of minimum size and set signal + +# select data +cows = data["cowid"].drop_duplicates() +for cow in cows: + days = data["at"].dt.day.drop_duplicates() + for day in days: + # select data + signal = data.loc[(data["cowid"]==cow) & \ + (data["at"].dt.day == day) & \ + (~data["t"].isna()),variable] + signal = signal.dropna().to_numpy() + # filter data to remove noise / errors + for d in range(0,signal.shape[1]): + mfilt = medfilt(signal[:,d],21) + signal[:,d] = mfilt + + # set penalty values // rule of thumb = log(n)*dim*std(signal) + #pen = np.log(len(signal)) * dim * np.std(signal, axis=0).mean()**2 + pen = np.log(len(signal)) * dim * np.std(signal)**2 + if pen < 100: + pen = 950 + # if segment std ~= signal std- this might not work + + + # fit and define changepoint model + # algo = rpt.Pelt(model=model,min_size=min_size).fit(signal) + + c = rpt.costs.CostNormal().fit(signal) + algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal) + #algo = rpt.Pelt(model="rbf",min_size=min_size).fit(signal) + cpts = algo.predict(pen = pen) + + # plot + fig, axes = rpt.display(signal, cpts,figsize = (18,9)) + axes[0].set_title("acceleration in x direction") + axes[1].set_title("acceleration in y direction") + axes[2].set_title("acceleration in z direction") + axes[0].set_ylabel("acceleration [m/s²]") + axes[1].set_ylabel("acceleration [m/s²]") + axes[2].set_ylabel("acceleration [m/s²]") + axes[2].set_xlabel("point") + plt.tight_layout() + plt.savefig(path+"\\add_segm_cow"+str(round(cow))+"_day"+str(round(day))+".tif") + plt.close() + + + + + +# # fit and define changepoint model +# # algo = rpt.Pelt(model=model,min_size=min_size).fit(signal) +# c = rpt.costs.CostNormal().fit(signal) +# algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal) +# cpts = algo.predict(pen = pen) + +# # plot +# rpt.display(signal, cpts) +# ax[0].set_xlabel("test") +# plt.show()