forked from tensorflow/models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
input_preprocess.py
280 lines (235 loc) · 11.4 KB
/
input_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Prepare the data used for FEELVOS training/evaluation."""
import tensorflow as tf
from deeplab.core import feature_extractor
from deeplab.core import preprocess_utils
# The probability of flipping the images and labels
# left-right during training
_PROB_OF_FLIP = 0.5
get_random_scale = preprocess_utils.get_random_scale
randomly_scale_image_and_label = (
preprocess_utils.randomly_scale_image_and_label)
def preprocess_image_and_label(image,
label,
crop_height,
crop_width,
min_resize_value=None,
max_resize_value=None,
resize_factor=None,
min_scale_factor=1.,
max_scale_factor=1.,
scale_factor_step_size=0,
ignore_label=255,
is_training=True,
model_variant=None):
"""Preprocesses the image and label.
Args:
image: Input image.
label: Ground truth annotation label.
crop_height: The height value used to crop the image and label.
crop_width: The width value used to crop the image and label.
min_resize_value: Desired size of the smaller image side.
max_resize_value: Maximum allowed size of the larger image side.
resize_factor: Resized dimensions are multiple of factor plus one.
min_scale_factor: Minimum scale factor value.
max_scale_factor: Maximum scale factor value.
scale_factor_step_size: The step size from min scale factor to max scale
factor. The input is randomly scaled based on the value of
(min_scale_factor, max_scale_factor, scale_factor_step_size).
ignore_label: The label value which will be ignored for training and
evaluation.
is_training: If the preprocessing is used for training or not.
model_variant: Model variant (string) for choosing how to mean-subtract the
images. See feature_extractor.network_map for supported model variants.
Returns:
original_image: Original image (could be resized).
processed_image: Preprocessed image.
label: Preprocessed ground truth segmentation label.
Raises:
ValueError: Ground truth label not provided during training.
"""
if is_training and label is None:
raise ValueError('During training, label must be provided.')
if model_variant is None:
tf.logging.warning('Default mean-subtraction is performed. Please specify '
'a model_variant. See feature_extractor.network_map for '
'supported model variants.')
# Keep reference to original image.
original_image = image
processed_image = tf.cast(image, tf.float32)
if label is not None:
label = tf.cast(label, tf.int32)
# Resize image and label to the desired range.
if min_resize_value is not None or max_resize_value is not None:
[processed_image, label] = (
preprocess_utils.resize_to_range(
image=processed_image,
label=label,
min_size=min_resize_value,
max_size=max_resize_value,
factor=resize_factor,
align_corners=True))
# The `original_image` becomes the resized image.
original_image = tf.identity(processed_image)
# Data augmentation by randomly scaling the inputs.
scale = get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_image, label = randomly_scale_image_and_label(
processed_image, label, scale)
processed_image.set_shape([None, None, 3])
if crop_height is not None and crop_width is not None:
# Pad image and label to have dimensions >= [crop_height, crop_width].
image_shape = tf.shape(processed_image)
image_height = image_shape[0]
image_width = image_shape[1]
target_height = image_height + tf.maximum(crop_height - image_height, 0)
target_width = image_width + tf.maximum(crop_width - image_width, 0)
# Pad image with mean pixel value.
mean_pixel = tf.reshape(
feature_extractor.mean_pixel(model_variant), [1, 1, 3])
processed_image = preprocess_utils.pad_to_bounding_box(
processed_image, 0, 0, target_height, target_width, mean_pixel)
if label is not None:
label = preprocess_utils.pad_to_bounding_box(
label, 0, 0, target_height, target_width, ignore_label)
# Randomly crop the image and label.
if is_training and label is not None:
processed_image, label = preprocess_utils.random_crop(
[processed_image, label], crop_height, crop_width)
processed_image.set_shape([crop_height, crop_width, 3])
if label is not None:
label.set_shape([crop_height, crop_width, 1])
if is_training:
# Randomly left-right flip the image and label.
processed_image, label, _ = preprocess_utils.flip_dim(
[processed_image, label], _PROB_OF_FLIP, dim=1)
return original_image, processed_image, label
def preprocess_images_and_labels_consistently(images,
labels,
crop_height,
crop_width,
min_resize_value=None,
max_resize_value=None,
resize_factor=None,
min_scale_factor=1.,
max_scale_factor=1.,
scale_factor_step_size=0,
ignore_label=255,
is_training=True,
model_variant=None):
"""Preprocesses images and labels in a consistent way.
Similar to preprocess_image_and_label, but works on a list of images
and a list of labels and uses the same crop coordinates and either flips
all images and labels or none of them.
Args:
images: List of input images.
labels: List of ground truth annotation labels.
crop_height: The height value used to crop the image and label.
crop_width: The width value used to crop the image and label.
min_resize_value: Desired size of the smaller image side.
max_resize_value: Maximum allowed size of the larger image side.
resize_factor: Resized dimensions are multiple of factor plus one.
min_scale_factor: Minimum scale factor value.
max_scale_factor: Maximum scale factor value.
scale_factor_step_size: The step size from min scale factor to max scale
factor. The input is randomly scaled based on the value of
(min_scale_factor, max_scale_factor, scale_factor_step_size).
ignore_label: The label value which will be ignored for training and
evaluation.
is_training: If the preprocessing is used for training or not.
model_variant: Model variant (string) for choosing how to mean-subtract the
images. See feature_extractor.network_map for supported model variants.
Returns:
original_images: Original images (could be resized).
processed_images: Preprocessed images.
labels: Preprocessed ground truth segmentation labels.
Raises:
ValueError: Ground truth label not provided during training.
"""
if is_training and labels is None:
raise ValueError('During training, labels must be provided.')
if model_variant is None:
tf.logging.warning('Default mean-subtraction is performed. Please specify '
'a model_variant. See feature_extractor.network_map for '
'supported model variants.')
if labels is not None:
assert len(images) == len(labels)
num_imgs = len(images)
# Keep reference to original images.
original_images = images
processed_images = [tf.cast(image, tf.float32) for image in images]
if labels is not None:
labels = [tf.cast(label, tf.int32) for label in labels]
# Resize images and labels to the desired range.
if min_resize_value is not None or max_resize_value is not None:
processed_images, labels = zip(*[
preprocess_utils.resize_to_range(
image=processed_image,
label=label,
min_size=min_resize_value,
max_size=max_resize_value,
factor=resize_factor,
align_corners=True) for processed_image, label
in zip(processed_images, labels)])
# The `original_images` becomes the resized images.
original_images = [tf.identity(processed_image)
for processed_image in processed_images]
# Data augmentation by randomly scaling the inputs.
scale = get_random_scale(
min_scale_factor, max_scale_factor, scale_factor_step_size)
processed_images, labels = zip(
*[randomly_scale_image_and_label(processed_image, label, scale)
for processed_image, label in zip(processed_images, labels)])
for processed_image in processed_images:
processed_image.set_shape([None, None, 3])
if crop_height is not None and crop_width is not None:
# Pad image and label to have dimensions >= [crop_height, crop_width].
image_shape = tf.shape(processed_images[0])
image_height = image_shape[0]
image_width = image_shape[1]
target_height = image_height + tf.maximum(crop_height - image_height, 0)
target_width = image_width + tf.maximum(crop_width - image_width, 0)
# Pad image with mean pixel value.
mean_pixel = tf.reshape(
feature_extractor.mean_pixel(model_variant), [1, 1, 3])
processed_images = [preprocess_utils.pad_to_bounding_box(
processed_image, 0, 0, target_height, target_width, mean_pixel)
for processed_image in processed_images]
if labels is not None:
labels = [preprocess_utils.pad_to_bounding_box(
label, 0, 0, target_height, target_width, ignore_label)
for label in labels]
# Randomly crop the images and labels.
if is_training and labels is not None:
cropped = preprocess_utils.random_crop(
processed_images + labels, crop_height, crop_width)
assert len(cropped) == 2 * num_imgs
processed_images = cropped[:num_imgs]
labels = cropped[num_imgs:]
for processed_image in processed_images:
processed_image.set_shape([crop_height, crop_width, 3])
if labels is not None:
for label in labels:
label.set_shape([crop_height, crop_width, 1])
if is_training:
# Randomly left-right flip the image and label.
res = preprocess_utils.flip_dim(
list(processed_images + labels), _PROB_OF_FLIP, dim=1)
maybe_flipped = res[:-1]
assert len(maybe_flipped) == 2 * num_imgs
processed_images = maybe_flipped[:num_imgs]
labels = maybe_flipped[num_imgs:]
return original_images, processed_images, labels