diff --git a/exploration.py b/exploration.py new file mode 100644 index 0000000000000000000000000000000000000000..a872141a2cec4ff4c2ae7c61cbdb1669755ac81b --- /dev/null +++ b/exploration.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Feb 2 13:51:49 2023 + +@author: adria036 +""" +import os +os.chdir(r"C:\Users\adria036\OneDrive - Wageningen University & Research\iAdriaens_doc\Projects\cKamphuis\nlas\scripts\uwb") + + +#%% import modules + + +from datetime import date, timedelta,datetime +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + + +#%% set paths and constants and load data + +# path to per barn directories +path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed") + +# settings +settings = {'barn' : [60,61,62,70,71,72,73], + 'startdate' : date(2022,10,5), + 'enddate' : date(2022,12,30), + 'cows' : [1790], # or specific cow number + } + +# files that comply with settings +fn = [] +for b in range(0,len(settings["barn"])): + print("barn = " + str(settings["barn"][b])) + if settings["cows"] == 0: + fbarn = [f for f in os.listdir(path + "/barn" + str(settings["barn"][b])) \ + if os.path.isfile(os.path.join(path,"barn"+str(settings["barn"][b]),f)) \ + and (datetime.strptime(f[5:13], '%Y%m%d').date() >= settings["startdate"]) \ + and (datetime.strptime(f[5:13], '%Y%m%d').date() <= settings["enddate"])] + fbarn.sort() + else: + fbarn = [f for f in os.listdir(path + "/barn" + str(settings["barn"][b])) \ + if os.path.isfile(os.path.join(path,"barn"+str(settings["barn"][b]),f)) \ + and (int(f[26:-4]) in settings["cows"]) \ + and (datetime.strptime(f[5:13], '%Y%m%d').date() >= settings["startdate"]) \ + and (datetime.strptime(f[5:13], '%Y%m%d').date() <= settings["enddate"])] + fbarn.sort() + fn.extend(fbarn) + fn.sort() + +# find unique cows +cows = list(set([int(f[26:-4]) for f in fn])) + +# read data +data = pd.DataFrame([]) +for f in fn: + barn = f[19:21] + sub = pd.read_csv(path + "/barn" + barn + "/" + f, + usecols = ["cowid","barn","date","t","xnew","ynew","area","zone"], + dtype = {"cowid" : "int64","barn" : "int64","date" : "object", + "t" : "int64", "xnew":"float64","ynew":"float64", + "area":"object","zone":"float64"}) + sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d") + data = pd.concat([data,sub]) +data = data.sort_values(by = ["cowid","date","t"]) + + + + +#%% data exploration and visualisation + +#TODO +""" + - explore the number of cows with data on each day + - explore when no areas are assigned and so, wrong barn is entered + - explore gaps and gapsize +""" + + + +