diff --git a/exploration.py b/exploration.py index a872141a2cec4ff4c2ae7c61cbdb1669755ac81b..4a36b68bb37b65ed5d3cd84189b7a48328a3634e 100644 --- a/exploration.py +++ b/exploration.py @@ -16,7 +16,7 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns - +#%matplotlib qt #%% set paths and constants and load data @@ -24,10 +24,10 @@ import seaborn as sns path = os.path.join("W:","\ASG","WLR_Dataopslag","DairyCampus","3406_Nlas","uwb_processed") # settings -settings = {'barn' : [60,61,62,70,71,72,73], +settings = {'barn' : [71], # [60,61,62,70,71,72,73], 'startdate' : date(2022,10,5), - 'enddate' : date(2022,12,30), - 'cows' : [1790], # or specific cow number + 'enddate' : date(2022,10,30), + 'cows' : [2433], # or specific cow number } # files that comply with settings @@ -58,14 +58,23 @@ data = pd.DataFrame([]) for f in fn: barn = f[19:21] sub = pd.read_csv(path + "/barn" + barn + "/" + f, - usecols = ["cowid","barn","date","t","xnew","ynew","area","zone"], + usecols = ["cowid","barn","date","t","xnew","ynew","area","zone","X","y"], dtype = {"cowid" : "int64","barn" : "int64","date" : "object", "t" : "int64", "xnew":"float64","ynew":"float64", - "area":"object","zone":"float64"}) + "area":"object","zone":"float64","X":"float64","y" : "float64"}) sub["date"] = pd.to_datetime(sub["date"], format = "%Y-%m-%d") data = pd.concat([data,sub]) data = data.sort_values(by = ["cowid","date","t"]) +# summarize data per zone +zones = data[["cowid","date","area","t"]].groupby(by = ["cowid","date","area"]).count().reset_index() +zones = zones.sort_values(by = ["cowid","area"]) + +# summarize variables +data[["xnew","X","ynew","y"]].describe() +test = data.loc[(data["ynew"] < -17) & (data["ynew"] > -19),:] +sns.scatterplot(data=test, x = test.index.values, y = "ynew") +sns.scatterplot(data=test, x = test.index.values, y = "y")