Skip to content
Snippets Groups Projects
Commit 23bb616d authored by Adriaens, Ines's avatar Adriaens, Ines :v_tone2:
Browse files

part 1 : segmentation tool

parent 61a02b7c
No related branches found
No related tags found
No related merge requests found
......@@ -45,6 +45,12 @@ General (mathematical) remarks on changepoint analysis
need to be known (prior knowledge) and the model is either a change in mean or
a change in mean and scale (not with real heteroscedastic data: mean and std
multiplied with the same factor?)
- the computational cost is proportional to the number of data points in the ts
this means that it is wise to cut you dataset in smaller meaningful windows
before calculating the breakpoints, which can afterwards be combined again for
the classification algorithms
- outliers influence the estimation of cost tremendously. Where possible, try
to reduce the outliers before estimating the changepoints.
-------------------------------------------------------------------------------
RUPTURES - COST FUNCTIONS
......@@ -55,6 +61,13 @@ RUPTURES - BASE FUNCTIONS
* error(start,end) -- returns the cost on segment (start,end)
* fit(*args,**kwargs) -- set parameters of the
RUPTURES - SEARCH METHODS
* PELT: Pruned Linear Exact Cost
-
-
-
-
"""
......@@ -62,12 +75,15 @@ RUPTURES - BASE FUNCTIONS
import os
os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\iAdriaens\bait\scripts\bait\datapreparation\ines")
%matplotlib qt
#%% filepaths, constants and load data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ruptures as rpt
from scipy.signal import medfilt
# path
path = os.path.join("C:","/Users","adria036",
......@@ -77,20 +93,79 @@ path = os.path.join("C:","/Users","adria036",
# read dataset
data = pd.read_csv(path+"\\DCdata.csv", index_col=0)
data["at"] = pd.to_datetime(data["at"],format = "%Y-%m-%d %H:%M:%S.%f%z")
#data = data[["cowid","at","t","gap","activity"]]
del path
#%% segmentation tests
# savepath
path = os.path.join("C:","/Users","adria036",
"OneDrive - Wageningen University & Research","iAdriaens_doc",
"Projects","iAdriaens","bait","results","segmentation")
# define parameters for segmentation
model = "l1"
min_size = 5 # minimum distance between changepoints
data =
#
model = "normal"
min_size = 2*60 # minimum distance between changepoints
dim = 3
variable = ["acc_x","acc_y","acc_z"]
# set minimum time instead of minimum size and set signal
# select data
cows = data["cowid"].drop_duplicates()
for cow in cows:
days = data["at"].dt.day.drop_duplicates()
for day in days:
# select data
signal = data.loc[(data["cowid"]==cow) & \
(data["at"].dt.day == day) & \
(~data["t"].isna()),variable]
signal = signal.dropna().to_numpy()
# filter data to remove noise / errors
for d in range(0,signal.shape[1]):
mfilt = medfilt(signal[:,d],21)
signal[:,d] = mfilt
# set penalty values // rule of thumb = log(n)*dim*std(signal)
#pen = np.log(len(signal)) * dim * np.std(signal, axis=0).mean()**2
pen = np.log(len(signal)) * dim * np.std(signal)**2
if pen < 100:
pen = 950
# if segment std ~= signal std- this might not work
# fit and define changepoint model
# algo = rpt.Pelt(model=model,min_size=min_size).fit(signal)
c = rpt.costs.CostNormal().fit(signal)
algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal)
#algo = rpt.Pelt(model="rbf",min_size=min_size).fit(signal)
cpts = algo.predict(pen = pen)
# plot
fig, axes = rpt.display(signal, cpts,figsize = (18,9))
axes[0].set_title("acceleration in x direction")
axes[1].set_title("acceleration in y direction")
axes[2].set_title("acceleration in z direction")
axes[0].set_ylabel("acceleration [m/s²]")
axes[1].set_ylabel("acceleration [m/s²]")
axes[2].set_ylabel("acceleration [m/s²]")
axes[2].set_xlabel("point")
plt.tight_layout()
plt.savefig(path+"\\add_segm_cow"+str(round(cow))+"_day"+str(round(day))+".tif")
plt.close()
# # fit and define changepoint model
# # algo = rpt.Pelt(model=model,min_size=min_size).fit(signal)
# c = rpt.costs.CostNormal().fit(signal)
# algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal)
# cpts = algo.predict(pen = pen)
# # plot
# rpt.display(signal, cpts)
# ax[0].set_xlabel("test")
# plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment