part 1 : segmentation tool

23bb616d · Adriaens, Ines · 61a02b7c · 23bb616d
Commit 23bb616d authored 2 years ago by Adriaens, Ines
--- a/tool/segmentation.py
+++ b/tool/segmentation.py
@@ -45,6 +45,12 @@ General (mathematical) remarks on changepoint analysis
  need to be known (prior knowledge) and the model is either a change in mean or
  a change in mean and scale (not with real heteroscedastic data: mean and std
  multiplied with the same factor?)
+- the computational cost is proportional to the number of data points in the ts
+  this means that it is wise to cut you dataset in smaller meaningful windows 
+  before calculating the breakpoints, which can afterwards be combined again for
+  the classification algorithms
+- outliers influence the estimation of cost tremendously. Where possible, try 
+  to reduce the outliers before estimating the changepoints. 

 -------------------------------------------------------------------------------
 RUPTURES - COST FUNCTIONS
@@ -55,6 +61,13 @@ RUPTURES - BASE FUNCTIONS
 * error(start,end) -- returns the cost on segment (start,end)
 * fit(*args,**kwargs)  -- set parameters of the 

+RUPTURES - SEARCH METHODS
+* PELT: Pruned Linear Exact Cost
+    -
+    -
+    -
+    -
+

 """

@@ -62,12 +75,15 @@ RUPTURES - BASE FUNCTIONS
 import os
 os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\iAdriaens\bait\scripts\bait\datapreparation\ines") 

+%matplotlib qt
+
 #%% filepaths, constants and load data

 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import ruptures as rpt
+from scipy.signal import medfilt

 # path
 path = os.path.join("C:","/Users","adria036",
@@ -77,20 +93,79 @@ path = os.path.join("C:","/Users","adria036",
 # read dataset 
 data = pd.read_csv(path+"\\DCdata.csv", index_col=0)
 data["at"] = pd.to_datetime(data["at"],format = "%Y-%m-%d %H:%M:%S.%f%z")
+#data = data[["cowid","at","t","gap","activity"]]

 del path

 #%% segmentation tests
+# savepath
+path = os.path.join("C:","/Users","adria036",
+                    "OneDrive - Wageningen University & Research","iAdriaens_doc",
+                    "Projects","iAdriaens","bait","results","segmentation")

 # define parameters for segmentation
-model = "l1"
-min_size = 5  # minimum distance between changepoints
-
-
-data = 
-
-
-#
-
-
+model = "normal"
+min_size = 2*60  # minimum distance between changepoints
+dim = 3
+variable = ["acc_x","acc_y","acc_z"]
+# set minimum time instead of minimum size and set signal
+
+# select data
+cows = data["cowid"].drop_duplicates()
+for cow in cows:
+    days = data["at"].dt.day.drop_duplicates()
+    for day in days:
+        # select data
+        signal = data.loc[(data["cowid"]==cow) & \
+                  (data["at"].dt.day == day) & \
+                  (~data["t"].isna()),variable]
+        signal = signal.dropna().to_numpy()
+        # filter data to remove noise / errors
+        for d in range(0,signal.shape[1]):
+            mfilt = medfilt(signal[:,d],21)
+            signal[:,d] = mfilt
+        
+        # set penalty values // rule of thumb = log(n)*dim*std(signal)
+        #pen = np.log(len(signal)) * dim * np.std(signal, axis=0).mean()**2
+        pen = np.log(len(signal)) * dim * np.std(signal)**2
+        if pen < 100:
+            pen = 950
+        # if segment std ~= signal std-  this might not work
+        
+    
+        # fit and define changepoint model
+        # algo = rpt.Pelt(model=model,min_size=min_size).fit(signal)
+        
+        c = rpt.costs.CostNormal().fit(signal)
+        algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal)
+        #algo = rpt.Pelt(model="rbf",min_size=min_size).fit(signal)
+        cpts = algo.predict(pen = pen)
+
+        # plot
+        fig, axes = rpt.display(signal, cpts,figsize = (18,9))
+        axes[0].set_title("acceleration in x direction")
+        axes[1].set_title("acceleration in y direction")
+        axes[2].set_title("acceleration in z direction")
+        axes[0].set_ylabel("acceleration [m/s²]")
+        axes[1].set_ylabel("acceleration [m/s²]")
+        axes[2].set_ylabel("acceleration [m/s²]")
+        axes[2].set_xlabel("point")
+        plt.tight_layout()
+        plt.savefig(path+"\\add_segm_cow"+str(round(cow))+"_day"+str(round(day))+".tif")
+        plt.close()
+
+
+
+
+
+# # fit and define changepoint model
+# # algo = rpt.Pelt(model=model,min_size=min_size).fit(signal)
+# c = rpt.costs.CostNormal().fit(signal)
+# algo = rpt.Pelt(custom_cost=rpt.costs.CostNormal(),min_size=min_size).fit(signal)
+# cpts = algo.predict(pen = pen)
+
+# # plot
+# rpt.display(signal, cpts)
+# ax[0].set_xlabel("test")
+# plt.show()