# Standartni knihovny import os import argparse import subprocess # Externi knihovny import cv2 import albumentations as A # Pocet augmentaci jednoho obrazku AUGMENTATIONS_PER_IMAGE = 25 def parse_arguments(): # Inicializace parseru argumentu a pridani argumentu pro specifikaci cesty k datasetu parser = argparse.ArgumentParser(prog='image_augmentation.py', description='Image augmentation framework.') parser.add_argument("-d", metavar="DATASET_PATH", required=True, type=str, default=None, action="store", help="Specify the path to the dataset (the directory containing the 'dev' and 'train' folders).") # Parsovani argumentu args = parser.parse_args() # Vraceni cesty k datasetu return args.d def save_augmentation(person, augmented_images): # Ziskani kategorie (train, dev) a identifikatoru osoby + ziskani cesty do slozky, kde se ulozi augmentace pro danou osobu category, person_id = person.split('_', 1) person_dir = os.path.join(augmented_dir, category, person_id) # Ulozeni augmentovanych obrazku do nove slozky s augmentacemi for augmented_image in augmented_images: for image_name, image_data in augmented_image: image_path = os.path.join(person_dir, image_name) try: image_data = cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR) cv2.imwrite(image_path, image_data) except Exception as e: print(f"Saving image error (path: \"{image_path}\"): {e}") def apply_augmentation(image, count): # Vytvoreni augmentacni pipeline, ve ktere se na kazdy obrazek budou aplikovat jednotlive transformace na zaklade pravdepodobnosti # U nekterych transformaci (barevne, sum) se aplikuje maximalne jedna operace (trida OneOf) augmentation = A.Compose([ A.Rotate(limit=15, p=0.25), A.HorizontalFlip(p=0.5), A.Affine(scale=(0.8, 1.2), translate_percent=(-0.15, 0.15), shear=(-8, 8), p=0.2), A.OneOf([ A.GaussianBlur(blur_limit=2), A.GaussNoise(std_range=(0.03, 0.07), noise_scale_factor=0.9), A.ToGray() ], p=0.125), A.OneOf([ A.RandomBrightnessContrast(brightness_limit=(-0.15, 0.15), contrast_limit=(-0.15, 0.15), p=0.2), A.RandomGamma(gamma_limit=(80, 120), p=0.15), A.HueSaturationValue(hue_shift_limit=(-10, 10), sat_shift_limit=(-15, 15), val_shift_limit=(-20, 20), p=0.2) ]), A.RandomCrop(height=80, width=80, p=0.3) ]) # Ziskani obrazku a jeho jmena bez pripony image_name, image_data = image image_name = image_name.replace(".png", "") # Inicializace listu pro ukladani augmentaci daneho obrazku augmented_image = [] # Provadeni prednastaveneho poctu augmentaci na dany obrazek a jeho ulozeni (vcetne noveho jmena) for index in range(count): augmented = augmentation(image=image_data) augmented_image.append((f"{image_name}_a{index}.png", augmented['image'])) # Vraceni augmentovanych dat return augmented_image if __name__ == "__main__": # Parsovani argumentu dataset_dir = parse_arguments() # Ziskani cesty nadrazene slozky datasetu a jmena datasetu dir_name = os.path.dirname(dataset_dir) last_part = os.path.basename(dataset_dir) # Vytvoreni cesty ke slozce s augmentovanymi daty augmented_dir = os.path.join(dir_name, f"augmented_image_{last_part}") # Vytvoreni kopie datasetu subprocess.run(["cp", "-a", dataset_dir + "/.", augmented_dir]) # Odstraneni audio (.wav) souboru subprocess.run(["find", augmented_dir, "-type", "f", "-name", "*.wav", "-exec", "rm", "-f", "{}", ";"]) # Vytvoreni karegorii a identifikatoru osob categories = ['train', 'dev'] persons_id = [str(id) for id in list(range(1, 32))] # Vytvoreni slovniku pro ulozeni obrazku dane osoby v dane kategorii person_images_dict = {} # Prochazi se vsechny kategorie a vsechny osoby for category in categories: for person_id in persons_id: # Ziska se cesta k obrazkum dane osoby v dane kategorii person_dir = os.path.join(augmented_dir, category, person_id) # Ziskaji se vsechny obrazky dane osoby v dane kategorii a vytvori se list pro ulozeni techto obrazku person_images = os.listdir(f"{person_dir}") images = [] # Otevre se kazdy obrazek od kazde osoby (+ se prevede na RGB format) a ulozi se do slovniku for person_image in person_images: image_path = os.path.join(person_dir, person_image) try: image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) images.append((person_image, image)) except Exception as e: print(f"Loading image error (path: \"{image_path}\"): {e}") person_images_dict[f"{category}_{person_id}"] = images # Pro kazdou osobu se provede dany pocet augmentaci pro kazdy obrazek a vysledky se ulozi do slozky s augmentovanymi daty for person, person_images in person_images_dict.items(): augmented_images = [apply_augmentation(person_image, AUGMENTATIONS_PER_IMAGE) for person_image in person_images] save_augmentation(person, augmented_images)