Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cats&Dogs model improvement #254

Merged
merged 10 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 114 additions & 143 deletions datasets/cats_vs_dogs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
###################################################################################################
#
# Copyright (C) 2022 Maxim Integrated Products, Inc. All Rights Reserved.
# Copyright (C) 2023 Analog Devices, Inc. All Rights Reserved.
# This software is proprietary to Analog Devices, Inc. and its licensors.
#
###################################################################################################
#
# Copyright (C) 2022 Maxim Integrated Products, Inc. (now owned by Analog Devices Inc.)
# All Rights Reserved.
#
# Maxim Integrated Products, Inc. Default Copyright Notice:
# https://www.maximintegrated.com/en/aboutus/legal/copyrights.html
Expand All @@ -9,59 +15,72 @@
"""
Cats and Dogs Datasets
"""
import errno
import os
import shutil
import sys

import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms

from PIL import Image
import albumentations as album
import cv2

import ai8x

torch.manual_seed(0)


def augment_affine_jitter_blur(orig_img):
class CatsvsDogs(Dataset):
"""
Augment with multiple transformations
`Cats vs Dogs dataset <https://www.kaggle.com/datasets/salader/dogs-vs-cats>` Dataset.

Args:
root_dir (string): Root directory of dataset where ``KWS/processed/dataset.pt``
exist.
d_type(string): Option for the created dataset. ``train`` or ``test``.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version.
resize_size(int, int): Width and height of the images to be resized for the dataset.
augment_data(bool): Flag to augment the data or not. If d_type is `test`, augmentation is
disabled.
"""
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomAffine(degrees=10, translate=(0.05, 0.05), shear=5),
transforms.RandomPerspective(distortion_scale=0.3, p=0.2),
transforms.CenterCrop((180, 180)),
transforms.ColorJitter(brightness=.7),
transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 5)),
transforms.RandomHorizontalFlip(),
])
return train_transform(orig_img)


def augment_blur(orig_img):
"""
Augment with center crop and bluring
"""
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.CenterCrop((220, 220)),
transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 5))
])
return train_transform(orig_img)


def catsdogs_get_datasets(data, load_train=True, load_test=True, aug=2):
"""
Load Cats & Dogs dataset
"""
(data_dir, args) = data
path = data_dir
dataset_path = os.path.join(path, "cats_vs_dogs")
is_dir = os.path.isdir(dataset_path)
if not is_dir:
labels = ['cat', 'dog']
label_to_id_map = {k: v for v, k in enumerate(labels)}
label_to_folder_map = {'cat': 'cats', 'dog': 'dogs'}

def __init__(self, root_dir, d_type, transform=None,
resize_size=(128, 128), augment_data=False):
self.root_dir = root_dir
self.data_dir = os.path.join(root_dir, 'cats_vs_dogs', d_type)

if not self.__check_catsvsdogs_data_exist():
self.__print_download_manual()
sys.exit("Dataset not found!")

self.__get_image_paths()

self.album_transform = None
if d_type == 'train' and augment_data:
self.album_transform = album.Compose([
album.GaussNoise(var_limit=(1.0, 20.0), p=0.25),
album.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
album.ColorJitter(p=0.5),
album.SmallestMaxSize(max_size=int(1.2*min(resize_size))),
album.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
album.RandomCrop(height=resize_size[0], width=resize_size[1]),
album.HorizontalFlip(p=0.5),
album.Normalize(mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question: Doesn't this normalize all images & all channels independently? It is different than dividing the entire dataset by 255, which we employ in the other vision examples. Whichever correct way is needed to be applied to all examples.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both approaches are very similar to each other. The previous appraoch divides the dataset by 255 while this one divides with the maximum value of the sample. Both should provide similar results and it requires to make extensive experiments to understand if one is correct.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. I assumed that it divides each channel with its own maximum value. Doublechecked the documentation and it divides with max possible, which is 255.

if not augment_data or d_type == 'test':
self.album_transform = album.Compose([
album.SmallestMaxSize(max_size=int(1.2*min(resize_size))),
album.CenterCrop(height=resize_size[0], width=resize_size[1]),
album.Normalize(mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))])

self.transform = transform

def __check_catsvsdogs_data_exist(self):
return os.path.isdir(self.data_dir)

def __print_download_manual(self):
print("******************************************")
print("Please follow the instructions below:")
print("Download the dataset to the \'data\' folder by visiting this link: "
Expand All @@ -78,112 +97,64 @@ def catsdogs_get_datasets(data, load_train=True, load_test=True, aug=2):
print("with all the original and augmented images. Remove this folder if you want "
"to change the augmentation and to recreate the dataset.")
print("******************************************")
sys.exit("Dataset not found!")
else:
processed_dataset_path = os.path.join(dataset_path, "augmented")

if os.path.isdir(processed_dataset_path):
print("augmented folder exits. Remove if you want to regenerate")

train_path = os.path.join(dataset_path, "train")
test_path = os.path.join(dataset_path, "test")
processed_train_path = os.path.join(processed_dataset_path, "train")
processed_test_path = os.path.join(processed_dataset_path, "test")
if not os.path.isdir(processed_dataset_path):
os.makedirs(processed_dataset_path, exist_ok=True)
os.makedirs(processed_test_path, exist_ok=True)
os.makedirs(processed_train_path, exist_ok=True)

# create label folders
for d in os.listdir(test_path):
mk = os.path.join(processed_test_path, d)
try:
os.mkdir(mk)
except OSError as e:
if e.errno == errno.EEXIST:
print(f'{mk} already exists!')
else:
raise
for d in os.listdir(train_path):
mk = os.path.join(processed_train_path, d)
try:
os.mkdir(mk)
except OSError as e:
if e.errno == errno.EEXIST:
print(f'{mk} already exists!')
else:
raise

# copy test folder files
test_cnt = 0
for (dirpath, _, filenames) in os.walk(test_path):
print(f'copying {dirpath} -> {processed_test_path}')
for filename in filenames:
if filename.endswith('.jpg'):
relsourcepath = os.path.relpath(dirpath, test_path)
destpath = os.path.join(processed_test_path, relsourcepath)

destfile = os.path.join(destpath, filename)
shutil.copyfile(os.path.join(dirpath, filename), destfile)
test_cnt += 1

# copy and augment train folder files
train_cnt = 0
for (dirpath, _, filenames) in os.walk(train_path):
print(f'copying and augmenting {dirpath} -> {processed_train_path}')
for filename in filenames:
if filename.endswith('.jpg'):
relsourcepath = os.path.relpath(dirpath, train_path)
destpath = os.path.join(processed_train_path, relsourcepath)
srcfile = os.path.join(dirpath, filename)
destfile = os.path.join(destpath, filename)

# original file
shutil.copyfile(srcfile, destfile)
train_cnt += 1

orig_img = Image.open(srcfile)

# crop center & blur only
aug_img = augment_blur(orig_img)
augfile = destfile[:-4] + '_ab' + str(0) + '.jpg'
aug_img.save(augfile)
train_cnt += 1

# random jitter, affine, brightness & blur
for i in range(aug):
aug_img = augment_affine_jitter_blur(orig_img)
augfile = destfile[:-4] + '_aj' + str(i) + '.jpg'
aug_img.save(augfile)
train_cnt += 1
print(f'Augmented dataset: {test_cnt} test, {train_cnt} train samples')

# Loading and normalizing train dataset

def __get_image_paths(self):
self.data_list = []

for label in self.labels:
image_dir = os.path.join(self.data_dir, self.label_to_folder_map[label])
for file_name in sorted(os.listdir(image_dir)):
file_path = os.path.join(image_dir, file_name)
if os.path.isfile(file_path):
self.data_list.append((file_path, self.label_to_id_map[label]))

def __len__(self):
return len(self.data_list)

def __getitem__(self, index):
label = torch.tensor(self.data_list[index][1], dtype=torch.int64)

image_path = self.data_list[index][0]
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

if self.album_transform:
image = self.album_transform(image=image)["image"]

if self.transform:
image = self.transform(image)

return image, label


def get_catsvsdogs_dataset(data, load_train, load_test):
"""
Load the Cats vs Dogs dataset.
Returns each datasample in 128x128 size.

Data Augmentation: Train samples are augmented randomly with
- Additive Gaussian Noise
- RGB Shift
- Color Jitter
- Shift&Scale&Rotate
- Random Crop
- Horizontal Flip
"""
(data_dir, args) = data

transform = transforms.Compose([
transforms.ToTensor(),
ai8x.normalize(args=args),
])

if load_train:
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
ai8x.normalize(args=args)
])

train_dataset = torchvision.datasets.ImageFolder(root=processed_train_path,
transform=train_transform)
train_dataset = CatsvsDogs(root_dir=data_dir, d_type='train',
transform=transform, augment_data=True)
else:
train_dataset = None

# Loading and normalizing test dataset
if load_test:
test_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
ai8x.normalize(args=args)
])

test_dataset = torchvision.datasets.ImageFolder(root=processed_test_path,
transform=test_transform)

if args.truncate_testset:
test_dataset.data = test_dataset.data[:1]
test_dataset = CatsvsDogs(root_dir=data_dir, d_type='test', transform=transform)
else:
test_dataset = None

Expand All @@ -195,6 +166,6 @@ def catsdogs_get_datasets(data, load_train=True, load_test=True, aug=2):
'name': 'cats_vs_dogs',
'input': (3, 128, 128),
'output': ('cat', 'dog'),
'loader': catsdogs_get_datasets,
'loader': get_catsvsdogs_dataset,
},
]
4 changes: 4 additions & 0 deletions policies/qat_policy_cd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
start_epoch: 30
weight_bits: 8
shift_quantile: 1.0
4 changes: 2 additions & 2 deletions policies/schedule-catsdogs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
lr_schedulers:
training_lr:
class: MultiStepLR
milestones: [20, 50, 80, 110, 140, 170]
gamma: 0.6
milestones: [20, 50, 150]
gamma: 0.5

policies:
- lr_scheduler:
Expand Down
2 changes: 1 addition & 1 deletion scripts/train_catsdogs.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/sh
python train.py --epochs 250 --optimizer Adam --lr 0.001 --wd 0 --deterministic --compress policies/schedule-catsdogs.yaml --model ai85cdnet --dataset cats_vs_dogs --confusion --param-hist --embedding --device MAX78000 "$@"
python train.py --epochs 200 --optimizer Adam --lr 0.001 --wd 0 --deterministic --compress policies/schedule-catsdogs.yaml --qat-policy policies/qat_policy_late_cd.yaml --model ai85cdnet --dataset cats_vs_dogs --confusion --param-hist --embedding --device MAX78000 "$@"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that "qat_policy_late_cd.yaml" should be replaced with "qat_policy_cd.yaml".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct and fixed...