forked from fregu856/segmentation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess_data.py
315 lines (253 loc) · 15.7 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import cv2
import cPickle
import os
import numpy as np
import tensorflow as tf
from collections import namedtuple
import random
project_dir = "/root/segmentation/"
data_dir = "/root/data/"
# (NOTE! this is taken from the official Cityscapes scripts:)
Label = namedtuple( 'Label' , [
'name' , # The identifier of this label, e.g. 'car', 'person', ... .
# We use them to uniquely name a class
'id' , # An integer ID that is associated with this label.
# The IDs are used to represent the label in ground truth images
# An ID of -1 means that this label does not have an ID and thus
# is ignored when creating ground truth images (e.g. license plate).
# Do not modify these IDs, since exactly these IDs are expected by the
# evaluation server.
'trainId' , # Feel free to modify these IDs as suitable for your method. Then create
# ground truth images with train IDs, using the tools provided in the
# 'preparation' folder. However, make sure to validate or submit results
# to our evaluation server using the regular IDs above!
# For trainIds, multiple labels might have the same ID. Then, these labels
# are mapped to the same class in the ground truth images. For the inverse
# mapping, we use the label that is defined first in the list below.
# For example, mapping all void-type classes to the same ID in training,
# might make sense for some approaches.
# Max value is 255!
'category' , # The name of the category that this label belongs to
'categoryId' , # The ID of this category. Used to create ground truth images
# on category level.
'hasInstances', # Whether this label distinguishes between single instances or not
'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
# during evaluations or not
'color' , # The color of this label
] )
# (NOTE! this is taken from the official Cityscapes scripts:)
labels = [
# name id trainId category catId hasInstances ignoreInEval color
Label( 'unlabeled' , 0 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'ego vehicle' , 1 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'rectification border' , 2 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'out of roi' , 3 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'static' , 4 , 19 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'dynamic' , 5 , 19 , 'void' , 0 , False , True , (111, 74, 0) ),
Label( 'ground' , 6 , 19 , 'void' , 0 , False , True , ( 81, 0, 81) ),
Label( 'road' , 7 , 0 , 'flat' , 1 , False , False , (128, 64,128) ),
Label( 'sidewalk' , 8 , 1 , 'flat' , 1 , False , False , (244, 35,232) ),
Label( 'parking' , 9 , 19 , 'flat' , 1 , False , True , (250,170,160) ),
Label( 'rail track' , 10 , 19 , 'flat' , 1 , False , True , (230,150,140) ),
Label( 'building' , 11 , 2 , 'construction' , 2 , False , False , ( 70, 70, 70) ),
Label( 'wall' , 12 , 3 , 'construction' , 2 , False , False , (102,102,156) ),
Label( 'fence' , 13 , 4 , 'construction' , 2 , False , False , (190,153,153) ),
Label( 'guard rail' , 14 , 19 , 'construction' , 2 , False , True , (180,165,180) ),
Label( 'bridge' , 15 , 19 , 'construction' , 2 , False , True , (150,100,100) ),
Label( 'tunnel' , 16 , 19 , 'construction' , 2 , False , True , (150,120, 90) ),
Label( 'pole' , 17 , 5 , 'object' , 3 , False , False , (153,153,153) ),
Label( 'polegroup' , 18 , 19 , 'object' , 3 , False , True , (153,153,153) ),
Label( 'traffic light' , 19 , 6 , 'object' , 3 , False , False , (250,170, 30) ),
Label( 'traffic sign' , 20 , 7 , 'object' , 3 , False , False , (220,220, 0) ),
Label( 'vegetation' , 21 , 8 , 'nature' , 4 , False , False , (107,142, 35) ),
Label( 'terrain' , 22 , 9 , 'nature' , 4 , False , False , (152,251,152) ),
Label( 'sky' , 23 , 10 , 'sky' , 5 , False , False , ( 70,130,180) ),
Label( 'person' , 24 , 11 , 'human' , 6 , True , False , (220, 20, 60) ),
Label( 'rider' , 25 , 12 , 'human' , 6 , True , False , (255, 0, 0) ),
Label( 'car' , 26 , 13 , 'vehicle' , 7 , True , False , ( 0, 0,142) ),
Label( 'truck' , 27 , 14 , 'vehicle' , 7 , True , False , ( 0, 0, 70) ),
Label( 'bus' , 28 , 15 , 'vehicle' , 7 , True , False , ( 0, 60,100) ),
Label( 'caravan' , 29 , 19 , 'vehicle' , 7 , True , True , ( 0, 0, 90) ),
Label( 'trailer' , 30 , 19 , 'vehicle' , 7 , True , True , ( 0, 0,110) ),
Label( 'train' , 31 , 16 , 'vehicle' , 7 , True , False , ( 0, 80,100) ),
Label( 'motorcycle' , 32 , 17 , 'vehicle' , 7 , True , False , ( 0, 0,230) ),
Label( 'bicycle' , 33 , 18 , 'vehicle' , 7 , True , False , (119, 11, 32) ),
Label( 'license plate' , -1 , -1 , 'vehicle' , 7 , False , True , ( 0, 0,142) ),
]
# create a function mapping id to trainId:
id_to_trainId = {label.id: label.trainId for label in labels}
id_to_trainId_map_func = np.vectorize(id_to_trainId.get)
new_img_height = 512 # (the height all images fed to the model will be resized to)
new_img_width = 1024 # (the width all images fed to the model will be resized to)
no_of_classes = 20 # (number of object classes (road, sidewalk, car etc.))
cityscapes_dir = data_dir + "cityscapes/"
train_imgs_dir = cityscapes_dir + "leftImg8bit/train/"
train_gt_dir = cityscapes_dir + "gtFine/train/"
val_imgs_dir = cityscapes_dir + "leftImg8bit/val/"
val_gt_dir = cityscapes_dir + "gtFine/val/"
train_dirs = ["jena/", "zurich/", "weimar/", "ulm/", "tubingen/", "stuttgart/",
"strasbourg/", "monchengladbach/", "krefeld/", "hanover/",
"hamburg/", "erfurt/", "dusseldorf/", "darmstadt/", "cologne/",
"bremen/", "bochum/", "aachen/"]
val_dirs = ["frankfurt/", "munster/", "lindau/"]
# get the path to all training images and their corresponding label image:
train_img_paths = []
train_trainId_label_paths = []
for dir_step, dir in enumerate(train_dirs):
img_dir = train_imgs_dir + dir
file_names = os.listdir(img_dir)
for step, file_name in enumerate(file_names):
if step % 10 == 0:
print ("train dir %d/%d, step %d/%d" % (dir_step, len(train_dirs)-1,
step, len(file_names)-1))
img_id = file_name.split("_left")[0]
# read the image:
img_path = img_dir + file_name
img = cv2.imread(img_path, -1)
# resize the image without interpolation (want the image to still match
# the corresponding label image which we reisize below) and save to
# project_dir/data:
img_small = cv2.resize(img, (new_img_width, new_img_height),
interpolation=cv2.INTER_NEAREST)
img_small_path = project_dir + "data/" + img_id + ".png"
cv2.imwrite(img_small_path, img_small)
train_img_paths.append(img_small_path)
# read and resize the corresponding label image without interpolation
# (want the resulting image to still only contain pixel values
# corresponding to an object class):
gt_img_path = train_gt_dir + dir + img_id + "_gtFine_labelIds.png"
gt_img = cv2.imread(gt_img_path, -1)
gt_img_small = cv2.resize(gt_img, (new_img_width, new_img_height),
interpolation=cv2.INTER_NEAREST)
# convert the label image from id to trainId pixel values:
id_label = gt_img_small
trainId_label = id_to_trainId_map_func(id_label)
# save the label image to project_dir/data:
trainId_label_path = project_dir + "data/" + img_id + "_trainId_label.png"
cv2.imwrite(trainId_label_path, trainId_label)
train_trainId_label_paths.append(trainId_label_path)
# compute the mean color channels of the train imgs:
print "computing mean color channels of the train imgs"
no_of_train_imgs = len(train_img_paths)
mean_channels = np.zeros((3, ))
for step, img_path in enumerate(train_img_paths):
if step % 100 == 0:
print step
img = cv2.imread(img_path, -1)
img_mean_channels = np.mean(img, axis=0)
img_mean_channels = np.mean(img_mean_channels, axis=0)
mean_channels += img_mean_channels
mean_channels = mean_channels/float(no_of_train_imgs)
# # save to disk:
cPickle.dump(mean_channels, open(project_dir + "data/mean_channels.pkl", "w"))
# compute the class weights:
print "computing class weights"
trainId_to_count = {}
for trainId in range(no_of_classes):
trainId_to_count[trainId] = 0
# # get the total number of pixels in all train labels that are of each
# # object class:
for step, trainId_label_path in enumerate(train_trainId_label_paths):
if step % 100 == 0:
print step
# read the label image:
trainId_label = cv2.imread(trainId_label_path, -1)
for trainId in range(no_of_classes):
# count how many pixels in the label image are of object class trainId:
trainId_mask = np.equal(trainId_label, trainId)
label_trainId_count = np.sum(trainId_mask)
# add to the total count:
trainId_to_count[trainId] += label_trainId_count
# # compute the class weights according to the paper:
class_weights = []
total_count = sum(trainId_to_count.values())
for trainId, count in trainId_to_count.items():
trainId_prob = float(count)/float(total_count)
trainId_weight = 1/np.log(1.02 + trainId_prob)
class_weights.append(trainId_weight)
# # save to disk:
cPickle.dump(class_weights, open(project_dir + "data/class_weights.pkl", "w"))
# get the path to all validation images and their corresponding label image:
val_img_paths = []
val_trainId_label_paths = []
for dir_step, dir in enumerate(val_dirs):
img_dir = val_imgs_dir + dir
file_names = os.listdir(img_dir)
for step, file_name in enumerate(file_names):
if step % 10 == 0:
print "val dir %d/%d, step %d/%d" % (dir_step, len(val_dirs)-1,
step, len(file_names)-1)
img_id = file_name.split("_left")[0]
# read the image:
img_path = img_dir + file_name
img = cv2.imread(img_path, -1)
# resize the image without interpolation (want the image to still match
# the corresponding label image which we reisize below) and save to
# project_dir/data:
img_small = cv2.resize(img, (new_img_width, new_img_height),
interpolation=cv2.INTER_NEAREST)
img_small_path = project_dir + "data/" + img_id + ".png"
cv2.imwrite(img_small_path, img_small)
val_img_paths.append(img_small_path)
# read and resize the corresponding label image without interpolation
# (want the resulting image to still only contain pixel values
# corresponding to an object class):
gt_img_path = val_gt_dir + dir + img_id + "_gtFine_labelIds.png"
gt_img = cv2.imread(gt_img_path, -1)
gt_img_small = cv2.resize(gt_img, (new_img_width, new_img_height),
interpolation=cv2.INTER_NEAREST)
# convert the label image from id to trainId pixel values:
id_label = gt_img_small
trainId_label = id_to_trainId_map_func(id_label)
# save the label image to project_dir/data:
trainId_label_path = project_dir + "data/" + img_id + "_trainId_label.png"
cv2.imwrite(trainId_label_path, trainId_label)
val_trainId_label_paths.append(trainId_label_path)
# # save the validation data to disk:
cPickle.dump(val_trainId_label_paths,
open(project_dir + "data/val_trainId_label_paths.pkl", "w"))
cPickle.dump(val_img_paths,
open(project_dir + "data/val_img_paths.pkl", "w"))
# val_trainId_label_paths = cPickle.load(open(project_dir + "data/val_trainId_label_paths.pkl"))
# val_img_paths = cPickle.load(open(project_dir + "data/val_img_paths.pkl"))
# augment the train data by flipping all train imgs:
no_of_train_imgs = len(train_img_paths)
print "number of train imgs before augmentation: %d " % no_of_train_imgs
augmented_train_img_paths = []
augmented_train_trainId_label_paths = []
for step, (img_path, label_path) in enumerate(zip(train_img_paths, train_trainId_label_paths)):
if step % 100 == 0:
print step
augmented_train_img_paths.append(img_path)
augmented_train_trainId_label_paths.append(label_path)
# read the image:
img = cv2.imread(img_path, -1)
# flip the image and save to project_dir/data:
img_flipped = cv2.flip(img, 1)
img_flipped_path = img_path.split(".png")[0] + "_flipped.png"
cv2.imwrite(img_flipped_path, img_flipped)
augmented_train_img_paths.append(img_flipped_path)
# read the corresponding label image:
label_img = cv2.imread(label_path, -1)
# flip the label image and save to project_dir/data:
label_img_flipped = cv2.flip(label_img, 1)
label_img_flipped_path = label_path.split(".png")[0] + "_flipped.png"
cv2.imwrite(label_img_flipped_path, label_img_flipped)
augmented_train_trainId_label_paths.append(label_img_flipped_path)
# # randomly shuffle the augmented train data:
augmented_train_data = zip(augmented_train_img_paths, augmented_train_trainId_label_paths)
random.shuffle(augmented_train_data)
random.shuffle(augmented_train_data)
random.shuffle(augmented_train_data)
random.shuffle(augmented_train_data)
# # save the augmented train data to disk:
train_data = augmented_train_data
train_img_paths, train_trainId_label_paths = zip(*train_data)
cPickle.dump(train_img_paths,
open(project_dir + "data/train_img_paths.pkl", "w"))
cPickle.dump(train_trainId_label_paths,
open(project_dir + "data/train_trainId_label_paths.pkl", "w"))
# train_img_paths = cPickle.load(open(project_dir + "data/train_img_paths.pkl"))
# train_trainId_label_paths = cPickle.load(open(project_dir + "data/train_trainId_label_paths.pkl"))
no_of_train_imgs = len(train_img_paths)
print "number of train imgs after augmentation: %d " % no_of_train_imgs