From 6f62148bf18d7abb973854ec426f50eccd1221dd Mon Sep 17 00:00:00 2001
From: "Bolding, Bob" <bob.bolding@wur.nl>
Date: Tue, 18 Jul 2023 18:17:22 +0000
Subject: [PATCH] Upload New File

---
 util.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 util.py

diff --git a/util.py b/util.py
new file mode 100644
index 0000000..501f265
--- /dev/null
+++ b/util.py
@@ -0,0 +1,69 @@
+import os
+import warnings
+
+from PIL import Image
+import pandas as pd
+import torchvision.transforms as T
+
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
+METADATA_PATH = 'data/2023-05-05-SF-defect-images-dataset.csv'
+DAMAGES_OF_INTEREST = ["white_shoulders", "mildew", "mechanical_damage", "ripeness", "green_tips"]
+ADDITIONAL_DAMAGES = ['brown_calyx', 'cracked_heads', 'decay_mould', 'dry_bruising', 'misshaped', 'firmness', 'insects_damage', 'size', 'start_decay_mould', 'wet_bruising']
+DATA_DIR = 
+
+UNLOADABLE_IMAGES = 
+
+def load_agrinorm_metadata():
+    df = pd.read_csv(METADATA_PATH).drop(columns=['Unnamed: 0'])
+    df = df[df["product"] == "Strawberries"]  # filter by strawberry
+
+
+    file_exists = df["image_path"].apply(lambda x: os.path.exists(f"{DATA_DIR}/{x}"))
+    loadable = ~df["image_path"].isin(UNLOADABLE_IMAGES)
+    df = df[(file_exists == True) & loadable]  # filter out images that don't exist or are unloadable
+
+    df = pd.get_dummies(df, columns=['question_id'])  # create dummy variables for each defect
+
+    aggregation_dict = {
+        "inspection_day": "first"
+    }
+    for damage in DAMAGES_OF_INTEREST + ADDITIONAL_DAMAGES:
+        if f"question_id_{damage}" in df.columns:
+            aggregation_dict[f"question_id_{damage}"] = "any"
+
+    df = df.groupby("image_path").agg(aggregation_dict).reset_index()
+    # Now we have a table with ALL strawberry images and dummy variables for each defect
+    return df
+
+def load_and_preprocess_image(path: str, img_size: int = 224, precrop_size: int =256,
+                              random_crop:bool = False, normalize: bool = True, augment: bool = False):
+    if precrop_size < img_size:
+        warnings.warn("precrop_size should be larger than img_size")
+        precrop_size = int(1.15 * img_size)
+
+    img = Image.open(path)
+    dims = img.size
+    img = T.CenterCrop(min(dims))(img)  # center crop to smaller dim
+
+    img = T.Resize(precrop_size)(img)  # resize to img_size
+    if random_crop:
+        img = T.RandomCrop(img_size)(img)
+    else:
+        img = T.CenterCrop(img_size)(img)
+
+    if augment:
+        img = T.RandomHorizontalFlip()(img)
+        img = T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)(img)
+
+    img = T.ToTensor()(img)  # convert to tensor
+    if normalize:
+        img = T.Normalize(mean=[0.485, 0.456, 0.406],
+                          std=[0.229, 0.224, 0.225])(img)
+
+    return img
+
+if __name__ == '__main__':
+    df = load_agrinorm_metadata()
+    print()
\ No newline at end of file
-- 
GitLab