-
Notifications
You must be signed in to change notification settings - Fork 0
/
mvtec.py
152 lines (125 loc) · 5.26 KB
/
mvtec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# This is a modified version of original (cifar dataloader):
# https://github.com/pytorch/vision/blob/master/torchvision/datasets/cifar.py
# This file and the mvtec data directory must be in the same directory, such that:
# /.../this_directory/mvtec.py
# /.../this_directory/mvtec/bottle/...
# /.../this_directory/mvtec/cable/...
# and so on
import os
import numpy as np
from PIL import Image
import matplotlib.image as mpimg
from typing import Any, Callable, Optional, Tuple
from torchvision import transforms
from torchvision.datasets.vision import VisionDataset
class MVTEC(VisionDataset):
"""`MVTEC <https://www.mvtec.com/company/research/datasets/mvtec-ad/>`_ Dataset.
Args:
root (string): Root directory of dataset where directories
``bottle``, ``cable``, etc., exists.
train (bool, optional): If True, creates dataset from training set, otherwise
creates from test set.
transform (callable, optional): A function/transform that takes in a PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
resize (int, optional): Desired output image size (H=W=resize).
interpolation (int, optional): Interpolation method for downsizing image. If 'resize'
is not None, a value for interpolation must be provided.
See https://pytorch.org/vision/main/_modules/torchvision/transforms/functional.html
category (string, optional): bottle, cable, capsule, etc.
"""
def __init__(
self,
root: str,
train: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
resize: Optional[int] = None,
interpolation: int = 2,
category: str = 'carpet',
) -> None:
super().__init__(root,
transform=transform,
target_transform=target_transform)
self.root = os.path.expanduser(root)
self.train = train
self.transform = transform
self.target_transform = target_transform
self.resize = resize
self.interpolation = interpolation
self.category = category
self.data = []
self.targets = []
if self.train:
# load images for training
cwd = os.getcwd()
trainFolder = os.path.join(self.root, self.category, 'train/good/')
os.chdir(trainFolder)
for file in os.scandir():
img = mpimg.imread(file.name)
img = img*255
img = img.astype(np.uint8)
self.data.append(img)
# label 1 = 'good' image
self.targets.append(1)
os.chdir(cwd)
else:
# load images for testing
cwd = os.getcwd()
testFolder = os.path.join(self.root, self.category, 'test/')
os.chdir(testFolder)
subfolders = [subfolder.name for subfolder in os.scandir() if subfolder.is_dir()]
cwsd = os.getcwd()
# for every subfolder in test folder
for subfolder in subfolders:
# label 0 = 'defective' image
label = 0
if subfolder == 'good':
label = 1
os.chdir(subfolder)
#filenames = [file.name for file in os.scandir()]
#for file in filenames:
for file in os.scandir():
img = mpimg.imread(file.name)
img = img*255
img = img.astype(np.uint8)
self.data.append(img)
self.targets.append(label)
os.chdir(cwsd)
os.chdir(cwd)
# data (images) is a numpy array,
# targets (labels) is a list
self.data = np.array(self.data)
# print original data shape to screen
print('original data shape: (N, H, W, C)', self.data.shape)
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is 0 for 'defective' images
and 1 for 'good' images
"""
img, target = self.data[index], self.targets[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img)
# if resizing image
# See: https://pytorch.org/vision/main/generated/torchvision.transforms.Resize.html
if self.resize:
resizeTransf = transforms.Resize(self.resize, self.interpolation)
img = resizeTransf(img)
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
def __len__(self) -> int:
"""
Args:
None
Returns:
int: length of data
"""
return len(self.data)