From 6f62148bf18d7abb973854ec426f50eccd1221dd Mon Sep 17 00:00:00 2001 From: "Bolding, Bob" <bob.bolding@wur.nl> Date: Tue, 18 Jul 2023 18:17:22 +0000 Subject: [PATCH] Upload New File --- util.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 util.py diff --git a/util.py b/util.py new file mode 100644 index 0000000..501f265 --- /dev/null +++ b/util.py @@ -0,0 +1,69 @@ +import os +import warnings + +from PIL import Image +import pandas as pd +import torchvision.transforms as T + +from PIL import ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +METADATA_PATH = 'data/2023-05-05-SF-defect-images-dataset.csv' +DAMAGES_OF_INTEREST = ["white_shoulders", "mildew", "mechanical_damage", "ripeness", "green_tips"] +ADDITIONAL_DAMAGES = ['brown_calyx', 'cracked_heads', 'decay_mould', 'dry_bruising', 'misshaped', 'firmness', 'insects_damage', 'size', 'start_decay_mould', 'wet_bruising'] +DATA_DIR = + +UNLOADABLE_IMAGES = + +def load_agrinorm_metadata(): + df = pd.read_csv(METADATA_PATH).drop(columns=['Unnamed: 0']) + df = df[df["product"] == "Strawberries"] # filter by strawberry + + + file_exists = df["image_path"].apply(lambda x: os.path.exists(f"{DATA_DIR}/{x}")) + loadable = ~df["image_path"].isin(UNLOADABLE_IMAGES) + df = df[(file_exists == True) & loadable] # filter out images that don't exist or are unloadable + + df = pd.get_dummies(df, columns=['question_id']) # create dummy variables for each defect + + aggregation_dict = { + "inspection_day": "first" + } + for damage in DAMAGES_OF_INTEREST + ADDITIONAL_DAMAGES: + if f"question_id_{damage}" in df.columns: + aggregation_dict[f"question_id_{damage}"] = "any" + + df = df.groupby("image_path").agg(aggregation_dict).reset_index() + # Now we have a table with ALL strawberry images and dummy variables for each defect + return df + +def load_and_preprocess_image(path: str, img_size: int = 224, precrop_size: int =256, + random_crop:bool = False, normalize: bool = True, augment: bool = False): + if precrop_size < img_size: + warnings.warn("precrop_size should be larger than img_size") + precrop_size = int(1.15 * img_size) + + img = Image.open(path) + dims = img.size + img = T.CenterCrop(min(dims))(img) # center crop to smaller dim + + img = T.Resize(precrop_size)(img) # resize to img_size + if random_crop: + img = T.RandomCrop(img_size)(img) + else: + img = T.CenterCrop(img_size)(img) + + if augment: + img = T.RandomHorizontalFlip()(img) + img = T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)(img) + + img = T.ToTensor()(img) # convert to tensor + if normalize: + img = T.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225])(img) + + return img + +if __name__ == '__main__': + df = load_agrinorm_metadata() + print() \ No newline at end of file -- GitLab