diff --git a/S1_splitvideo.py b/S1_splitvideo.py
index 82cd8d858d4b3efeab628fdeba8e9bb6a9fed75f..24193a3c055648df512055d9ae49bf00a1a74361 100644
--- a/S1_splitvideo.py
+++ b/S1_splitvideo.py
@@ -158,8 +158,9 @@ if k != 0:                                 # wait for ESC key to exit
     
 # Euclidian distance between columns
 for i in range(0,len(frames)):
-    
-
+    for j in range(0,len(frames)):
+        distGrey = np.linalg.norm(A - B)
+        distColor = np.
 # sampling > take 2 most distant frames > take most distant frames etx
     
     
@@ -195,7 +196,7 @@ imgproperties = pd.DataFrame(data=None, index=None, dtype=None, copy=False)
 
 b = frames[i]
 
-cv2.imshow('blueimage', c)           # show the result of meanFrame
+cv2.imshow('blueimage', b)           # show the result of meanFrame
 k = cv2.waitKey(0) & 0xFF                   # avoid crashing of kernel
 if k == 27:                                 # wait for ESC key to exit
     cv2.destroyAllWindows()
diff --git a/S3_sampleframes.py b/S2_sampleframes.py
similarity index 100%
rename from S3_sampleframes.py
rename to S2_sampleframes.py
diff --git a/S4_sampleframes_time.py b/S3_sampleframes_time.py
similarity index 72%
rename from S4_sampleframes_time.py
rename to S3_sampleframes_time.py
index 381937498d6e49fc1a82a8270e17c48e2ac3d3e4..c963e9342328f52bce8b986a394619d68009e08d 100644
--- a/S4_sampleframes_time.py
+++ b/S3_sampleframes_time.py
@@ -31,7 +31,7 @@ src = r'W:\ASG\WLR_Dataopslag\Genomica\Sustainable_breeding//' \
         '44 0000 2700 KB DDHT AI 2020\\6. data/annotation_videos/'
 
 # file with information of the vids
-vidfile = 'videos_voor_annotatie.xlsx'
+vidfile = '20210801_videos_voor_annotatie.xlsx'
 
 # destination directory
 dst = src 
@@ -43,7 +43,7 @@ if os.path.exists(dst) == False:
     os.mkdir(dst)
     
 # read excel with meta data in pandas dataframe
-vid_list = pd.read_excel(src+vidfile, skiprows=1, header=2)
+vid_list = pd.read_excel(src+vidfile, skiprows=1, header=2,usecols = 'B:J')
 vid_list.head()
 
 # set number of frames to be sampled per video
@@ -65,45 +65,61 @@ no_frames = []                              # prepare inventory no frames
 vid_length = []                             # prepare video length
 frame_rate = []                             # prepare frame rate
 frame_sel = []                              # prepare selected sample
-for i  in range(0, len(vid_list)):
+vid_drop = []                               # prepare to drop
+for i in range(0, len(vid_list)):
     file = vid_list.directory[i] + '\\' + vid_list.video[i]  # video path
-    cap = cv2.VideoCapture(file)                      # capture video
-    
-    # video properties
-    data = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # no of frames
-    no_frames.append(data)                  # make list of no of frames
-    data = int(cap.get(cv2.CAP_PROP_FPS))   # frames per second (framerate)
-    frame_rate.append(data)                 # make list of fps
-    data = no_frames[i]/frame_rate[i]       # calculate video length in sec
-    vid_length.append(data)                 # make list of vid length
+    print("i = " + str(i)+ " of " + str(len(vid_list)-1))
+    if os.path.exists(file):
+        cap = cv2.VideoCapture(file)                      # capture video
     
-    if pd.notnull(vid_list["interest"][i]):
-        # calculate frame to be sampled (excel => variable "interest" )
-        dt = vid_list.interest[i]               # time object
-        nseconds = dt.hour*3600 + \
-                   dt.minute*60 + \
-                   dt.second                    # no seconds in video
-        data = nseconds * frame_rate[i]         # frame to sample
-        frame_sel.append(data)                  # list
+        # video properties
+        data = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # no of frames
+        no_frames.append(data)                  # make list of no of frames
+        data = int(cap.get(cv2.CAP_PROP_FPS))   # frames per second (framerate)
+        frame_rate.append(data)                 # make list of fps
+        data = no_frames[i]/frame_rate[i]       # calculate video length in sec
+        vid_length.append(data)                 # make list of vid length
+        
+        if pd.notnull(vid_list["interest"][i]):
+            # calculate frame to be sampled (excel => variable "interest" )
+            dt = vid_list.interest[i]               # time object
+            nseconds = dt.hour*3600 + \
+                       dt.minute*60 + \
+                       dt.second                    # no seconds in video
+            data = nseconds * frame_rate[i]         # frame to sample
+            frame_sel.append(data)                  # list
+        else:
+            data = np.nan
+            frame_sel.append(data)
+            
+        # release videocapture
+        cap.release()                           # release video capture
     else:
-        data = np.nan
+        vid_drop.append(i)                  # add to list to drop
+        data = np.nan                       # add nan
+        no_frames.append(data)                  # make list of no of frames
+        frame_rate.append(data)                 # make list of fps
         frame_sel.append(data)
+        vid_length.append(data) 
         
-    # release videocapture
-    cap.release()                           # release video capture
-
 # add lists as columns to dataframe
 vid_list["no_frames"] = no_frames
 vid_list["vid_length"] = vid_length
 vid_list["frame_rate"] = frame_rate
 vid_list["frame_sel"] = frame_sel
     
-# show vid_list first 10 lines
+# show vid_list first and last 10 lines
 vid_list.head(10)
-vid_list.tail(10)
+vid_list.tail(30)
+
+# drop lines where vid_drop = 1
+vid_list = vid_list.drop(labels=vid_drop)
+vid_list = vid_list.reset_index()
+vid_list = vid_list.drop(columns="index")
 
 # del variables
-del data, file, frame_rate, frame_sel, no_frames, nseconds, i, vid_length
+del data, file, frame_rate, frame_sel, no_frames, nseconds, i
+del vid_drop, vid_length
 
 
 #%% sample frames and save with specified name, save xlsx with name and sample
@@ -127,8 +143,11 @@ for i in range(0,len(vid_list)):
         # release videocapture
         cap.release()
         
+        # clear workspace
+        del samplename, ret, img, file
+        
 # clear workspace
-del samplename, ret, img, i, file
+del i
 
 #%% Sample frames based on random numbers using file_list
 
@@ -164,6 +183,7 @@ del arr, edges, i, sampledframes, j, sample, frame
 
 for i in range(0,len(vid_list)):
     print(vid_list["video"][i])            # print filename read
+    print("i = " + str(i)+ " of " + str(len(vid_list)-1))
     os.path.exists(vid_list["directory"][i] + "\\" + \
                            vid_list["video"][i])
     
diff --git a/S2_augment_imgdata.py b/S4_augment_imgdata.py
similarity index 100%
rename from S2_augment_imgdata.py
rename to S4_augment_imgdata.py
diff --git a/S5_partition_frames_ann.py b/S5_partition_frames_ann.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c83e8381bf14bcf60f601ebcfbc87469dbde3f7
--- /dev/null
+++ b/S5_partition_frames_ann.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Aug  3 13:28:12 2021
+
+@author: adria036
+
+This script selects frames for annotation and partitions them in different
+folders between the annotators. The frame selection is uniform across the 
+videos (resulting from S3_sampleframes_time), that cover the necessary 
+variability. 
+
+----
+Extension (when time) should include loading the excel file to 
+keep track of which videos are already annotated. To this end, we need to read
+FrameSelection.xlsx in the source folder, and use the frame_% and vid_% name 
+to indicate which frames are selected in 'fn'. 
+----
+
+"""
+
+
+
+#%% Load packages
+
+from datetime import date
+import os
+import pandas as pd
+from math import ceil
+from shutil import copyfile
+
+
+
+#%% define constants and filepaths
+
+# directory with frames (source)
+src = r'W:\ASG\WLR_Dataopslag\Genomica\Sustainable_breeding//' \
+        '44 0000 2700 KB DDHT AI 2020\\6. data/annotation_videos/'
+
+#----------------------------------------------------------------------------#        
+# list of video frames selected with certain criteria
+### fn = [f for f in os.listdir(src) if os.path.isfile(src+f) \
+###        and "frame_5.jpg" in f]
+
+#### OR  ####
+
+# list uniformely sampled with prespecified number
+noframes = 150                        # specify approx number of frames
+fn = [f for f in os.listdir(src) if os.path.isfile(src+f) \
+        and ".jpg" in f]              # all .jpg files
+fn.sort()                             # sort fn list
+fn = fn[0:len(fn):round(len(fn)/noframes)]
+#----------------------------------------------------------------------------#
+
+# annotators
+annotators = ["inesadriaens","inahulsegge"]
+
+# prepare saving -- today's date for record
+today = date.today()
+today = today.strftime("%Y%m%d")
+
+# directory to write selected frames to
+framedir = []                       # var to append to
+for i in range(0,len(annotators)):
+    framedir.append(src + today + "_" + annotators[i] + '/')
+    # create destination folder when it doesn't exists
+    if os.path.exists(framedir[i]) == False:
+        os.mkdir(framedir[i])
+        
+# excel file with meta data of the videos selected for annotation
+vidfile = '20210801_videos_voor_annotatie.xlsx'
+selsheet = '20210802'                           # sheet to load
+
+# read meta data in pandas data frame
+vid_list = pd.read_excel(src+vidfile,
+                         sheet_name = selsheet,
+                         usecols = 'B:Q')                       
+vid_list.head()
+
+
+#%% put annotation frames in folders
+        
+# make list with annotators directories length equal to number of frames
+framedir_list = framedir*(ceil(len(fn)/len(annotators)))
+
+# copy selected frames in directories
+for f in range(0,len(fn)):
+    copyfile(src+fn[f], framedir_list[f]+fn[f])
+    
+
+#%% save in excelfile which frames are already annotated
+
+# 
+
+for i in range(0,len(fn)):
+    idx = fn[i].find('_',4)                 # location of second '_'
+    vid = fn[i][0:idx]                      # vid
+    frame = int(fn[i][idx+7:-4])            # frame number
+    
+    vid_list["samplename"] 
+