Skip to content
Snippets Groups Projects
Commit 6f62148b authored by Bolding, Bob's avatar Bolding, Bob
Browse files

Upload New File

parent 3f86d351
Branches master
No related tags found
No related merge requests found
util.py 0 → 100644
import os
import warnings
from PIL import Image
import pandas as pd
import torchvision.transforms as T
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
METADATA_PATH = 'data/2023-05-05-SF-defect-images-dataset.csv'
DAMAGES_OF_INTEREST = ["white_shoulders", "mildew", "mechanical_damage", "ripeness", "green_tips"]
ADDITIONAL_DAMAGES = ['brown_calyx', 'cracked_heads', 'decay_mould', 'dry_bruising', 'misshaped', 'firmness', 'insects_damage', 'size', 'start_decay_mould', 'wet_bruising']
DATA_DIR =
UNLOADABLE_IMAGES =
def load_agrinorm_metadata():
df = pd.read_csv(METADATA_PATH).drop(columns=['Unnamed: 0'])
df = df[df["product"] == "Strawberries"] # filter by strawberry
file_exists = df["image_path"].apply(lambda x: os.path.exists(f"{DATA_DIR}/{x}"))
loadable = ~df["image_path"].isin(UNLOADABLE_IMAGES)
df = df[(file_exists == True) & loadable] # filter out images that don't exist or are unloadable
df = pd.get_dummies(df, columns=['question_id']) # create dummy variables for each defect
aggregation_dict = {
"inspection_day": "first"
}
for damage in DAMAGES_OF_INTEREST + ADDITIONAL_DAMAGES:
if f"question_id_{damage}" in df.columns:
aggregation_dict[f"question_id_{damage}"] = "any"
df = df.groupby("image_path").agg(aggregation_dict).reset_index()
# Now we have a table with ALL strawberry images and dummy variables for each defect
return df
def load_and_preprocess_image(path: str, img_size: int = 224, precrop_size: int =256,
random_crop:bool = False, normalize: bool = True, augment: bool = False):
if precrop_size < img_size:
warnings.warn("precrop_size should be larger than img_size")
precrop_size = int(1.15 * img_size)
img = Image.open(path)
dims = img.size
img = T.CenterCrop(min(dims))(img) # center crop to smaller dim
img = T.Resize(precrop_size)(img) # resize to img_size
if random_crop:
img = T.RandomCrop(img_size)(img)
else:
img = T.CenterCrop(img_size)(img)
if augment:
img = T.RandomHorizontalFlip()(img)
img = T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)(img)
img = T.ToTensor()(img) # convert to tensor
if normalize:
img = T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])(img)
return img
if __name__ == '__main__':
df = load_agrinorm_metadata()
print()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment