forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
layer_model_helper.py
534 lines (458 loc) · 19.2 KB
/
layer_model_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
# Copyright (c) 2016-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
## @package layer_model_helper
# Module caffe2.python.layer_model_helper
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from caffe2.python import core, model_helper, schema, scope
from caffe2.python.modeling.parameter_info import (
ParameterInfo,
)
from caffe2.python.modeling.parameter_sharing import (
parameter_sharing_context,
)
from caffe2.python.optimizer import get_param_device
from caffe2.python.regularizer import Regularizer
from caffe2.python.layers import layers
from caffe2.proto import caffe2_pb2
from future.utils import viewitems, viewvalues
import logging
import numpy as np
import six
import copy
logger = logging.getLogger(__name__)
class LayerModelHelper(model_helper.ModelHelper):
"""
Model helper for building models on top of layers abstractions.
Each layer is the abstraction that is higher level than Operator. Layer
is responsible for ownership of it's own parameters and can easily be
instantiated in multiple nets possible with different sets of ops.
As an example: one can easily instantiate predict and train nets from
the same set of layers, where predict net will have subset of the
operators from train net.
"""
def __init__(self, name, input_feature_schema, trainer_extra_schema,
keep_blobs=False):
''' TODO(amalevich): more documnetation on input args
'''
super(LayerModelHelper, self).__init__(name=name)
self._layer_names = set()
self._layers = []
self._param_to_shape = {}
# seed default
self._seed = None
self._sequence_seed = True
# optimizer bookkeeping
self.param_to_optim = {}
self.param_to_reg = {}
self._default_optimizer = None
self._loss = None
self._output_schema = None
# breakdown map; breakdown features are categorical (like dense) but not
# necessarily used to represent data for training
self._breakdown_map = None
# Connect Schema to self.net. That particular instance of schmea will be
# use for generation of the Layers accross the network and would be used
# for connection with Readers.
self._input_feature_schema = schema.NewRecord(
self.net,
input_feature_schema
) if not keep_blobs else input_feature_schema.clone()
self._trainer_extra_schema = schema.NewRecord(
self.net,
trainer_extra_schema
) if not keep_blobs else trainer_extra_schema.clone()
self._metrics_schema = schema.Struct()
self._init_global_constants()
self.param_init_net = self.create_init_net('param_init_net')
self._initialize_params = True
def clear_output_schema(self):
self._output_schema = None
def set_initialize_params(self, initialize_params):
self._initialize_params = initialize_params
def add_metric_field(self, name, value):
assert name not in self._metrics_schema.fields, (
"Try to add metric field twice: {}".format(name))
self._metrics_schema = self._metrics_schema + schema.Struct(
(name, value)
)
@staticmethod
def _get_global_constant_initializer_op(
blob_name, array=None, dtype=None, initializer=None
):
# to add a global constant to model, one first need to get the
# initializer
if array is not None:
assert initializer is None,\
"Only one from array and initializer should be specified"
if dtype is None:
array = np.array(array)
else:
array = np.array(array, dtype=dtype)
# TODO: make GivenTensor generic
op_name = None
if array.dtype == np.int32:
op_name = 'GivenTensorIntFill'
elif array.dtype == np.int64:
op_name = 'GivenTensorInt64Fill'
elif array.dtype == np.str:
op_name = 'GivenTensorStringFill'
elif array.dtype == np.bool:
op_name = 'GivenTensorBoolFill'
else:
op_name = 'GivenTensorFill'
def initializer(blob_name):
return core.CreateOperator(
op_name, [],
blob_name,
shape=array.shape,
values=array.flatten().tolist()
)
else:
assert initializer is not None
initializer_op = initializer(blob_name)
return initializer_op
def add_global_constant(
self, name, array=None, dtype=None, initializer=None
):
assert isinstance(name, six.string_types), (
'name should be a string as we are using it as map key')
# This is global namescope for constants. They will be created in all
# init_nets and there should be very few of them.
assert name not in self.global_constants, \
"%s already added in global_constants" % name
blob_name = self.net.NextBlob(name)
self.global_constants[name] = blob_name
initializer_op = LayerModelHelper._get_global_constant_initializer_op(
blob_name, array, dtype, initializer
)
assert blob_name not in self.global_constant_initializers, \
"there is already a initializer op associated with blob %s" % \
blob_name
self.global_constant_initializers[blob_name] = initializer_op
return blob_name
def maybe_add_global_constant(self, name, *args, **kwargs):
# To ad hoc add new global constants without duplication
# if the name was already registered in global_constants, it will not be
# added even if the intended value is different from its original value
def op_equal(operator1, operator2):
o1 = copy.deepcopy(operator1)
o2 = copy.deepcopy(operator2)
# debug_info is supposed to be different, and we don't need to
# compare debug_info
if hasattr(o1, 'debug_info'):
o1.debug_info = ''
if hasattr(o2, 'debug_info'):
o2.debug_info = ''
return o1 == o2
if name in self.global_constants:
blob_name = self.global_constants[name]
initializer_op = \
LayerModelHelper._get_global_constant_initializer_op(
blob_name, *args, **kwargs
)
# check if the original initializer is the same as the one intended
# now
assert op_equal(initializer_op,
self.global_constant_initializers[blob_name]), \
"conflict initializers for global constant %s, " \
"previous %s, now %s" % (
blob_name, str(initializer_op),
str(self.global_constant_initializers[blob_name]))
return blob_name
return self.add_global_constant(name, *args, **kwargs)
def _init_global_constants(self):
self.global_constants = {}
self.global_constant_initializers = {}
self.add_global_constant('ONE', 1.0)
self.add_global_constant('ZERO', 0.0)
self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
def _add_global_constants(self, init_net):
for initializer_op in viewvalues(self.global_constant_initializers):
init_net._net.op.extend([initializer_op])
def create_init_net(self, name):
init_net = core.Net(name)
self._add_global_constants(init_net)
return init_net
def _validate_param_shape(self, param_name, shape):
if param_name not in self._param_to_shape:
return
ref_shape = self._param_to_shape[param_name]
if shape != ref_shape:
raise ValueError(
"Got inconsistent shapes between shared parameters "
"when trying to map a blob in scope {0} to {1}. ref_shape : "
" {2}, shape : {3}".format(
scope.CurrentNameScope(), param_name, ref_shape, shape)
)
def create_param(self, param_name, shape, initializer, optimizer=None,
ps_param=None, regularizer=None):
if isinstance(param_name, core.BlobReference):
param_name = str(param_name)
elif isinstance(param_name, six.string_types):
# Parameter name will be equal to current Namescope that got
# resolved with the respect of parameter sharing of the scopes.
param_name = parameter_sharing_context.get_parameter_name(
param_name)
else:
raise "Unsupported type for param_name"
param_blob = core.BlobReference(param_name)
if len(initializer) == 1:
init_op_args = {}
else:
assert len(initializer) == 2
init_op_args = copy.deepcopy(initializer[1])
if shape is not None:
assert 'shape' not in init_op_args
init_op_args.update({'shape': shape})
initializer_op = None
if self._initialize_params:
initializer_op = core.CreateOperator(
initializer[0],
[],
param_blob,
**init_op_args
)
param = layers.LayerParameter(
parameter=param_blob,
initializer=initializer_op,
optimizer=optimizer,
ps_param=ps_param,
regularizer=regularizer
)
self._validate_param_shape(param_name, shape)
self._param_to_shape[param_name] = shape
return param
def next_layer_name(self, prefix):
base_name = core.ScopedName(prefix)
name = base_name
index = 0
while name in self._layer_names:
name = base_name + '_auto_' + str(index)
index += 1
self._layer_names.add(name)
return name
def add_layer(self, layer):
self._layers.append(layer)
for param in layer.get_parameters():
assert isinstance(param.parameter, core.BlobReference)
self.param_to_optim[str(param.parameter)] = \
param.optimizer or self.default_optimizer
self.params.append(param.parameter)
if isinstance(param, layers.LayerParameter):
self.param_to_reg[param.parameter] = param.regularizer
elif isinstance(param, ParameterInfo):
# TODO:
# Currently, LSTM and RNNcells, which use ModelHelper instead of
# LayerModelHelper as super class, are called in pooling_methods
# In ModelHelper, regularization is not supported in create_param
# We will unify the way of create_param of ModelHelper and
# LayerModelHelper in the future.
logger.info('regularization is unsupported for ParameterInfo object')
else:
raise ValueError(
'unknown object type besides ParameterInfo and LayerParameter: {}'
.format(param)
)
# The primary value of adding everything to self.net - generation of the
# operators right away, i.e. if error happens it'll be detected
# immediately. Other than this - create_x_net should be called.
layer.add_operators(self.net, self.param_init_net)
return layer.output_schema
def get_parameter_blobs(self):
param_blobs = []
for layer in self._layers:
for param in layer.get_parameters():
param_blobs.append(param.parameter)
return param_blobs
@property
def seed(self):
return self._seed
def store_seed(self, seed, sequence_seed=True):
# Store seed config that will be applied to each op in the net.
self._seed = seed
# If sequence_seed is True, the i-th op has rand_seed=`seed + i`
self._sequence_seed = sequence_seed
def apply_seed(self, net):
if self._seed:
net.set_rand_seed(self._seed, self._sequence_seed)
@property
def default_optimizer(self):
return self._default_optimizer
@default_optimizer.setter
def default_optimizer(self, optimizer):
self._default_optimizer = optimizer
@property
def input_feature_schema(self):
return self._input_feature_schema
@property
def trainer_extra_schema(self):
return self._trainer_extra_schema
@property
def metrics_schema(self):
"""
Returns the schema that represents model output that should be used for
metric reporting.
During the training/evaluation this schema will be appended to the
schema that represents model output.
"""
return self._metrics_schema
@property
def output_schema(self):
assert self._output_schema is not None
return self._output_schema
@output_schema.setter
def output_schema(self, schema):
assert self._output_schema is None
self._output_schema = schema
@property
def loss(self):
assert self._loss is not None
return self._loss
@loss.setter
def loss(self, loss):
assert self._loss is None
self._loss = loss
def has_loss(self):
return self._loss is not None
def add_loss(self, loss, name='unnamed'):
assert loss is not None, "Added loss should not be None"
assert isinstance(loss, schema.Scalar) or isinstance(
loss, schema.Struct
), "Added loss should be a scalar or a struct"
if self._loss is None:
self._loss = schema.Struct((name, loss))
else:
prefix_base = name + '_auto_'
index = 0
prefix = name
while prefix in self._loss:
prefix = prefix_base + str(index)
index += 1
loss_struct = schema.Struct((prefix, loss))
self._loss = self._loss + loss_struct
def add_trainer_extra_schema(self, trainer_extra_schema):
trainer_extra_record = schema.NewRecord(self.net, trainer_extra_schema)
self._trainer_extra_schema += trainer_extra_record
def __getattr__(self, layer):
if layer.startswith('__'):
raise AttributeError(layer)
# TODO(amalevich): Add add support for ifbpy inline documentation
if layers.layer_exists(layer):
def wrapper(*args, **kwargs):
new_layer = layers.create_layer(layer, self, *args, **kwargs)
if kwargs.get("output_to_metrics", False):
new_layer.export_output_for_metrics()
if kwargs.get("params_to_metrics", False):
new_layer.export_params_for_metrics()
return self.add_layer(new_layer)
return wrapper
elif core.IsOperator(layer):
def wrapper(*args, **kwargs):
def apply_operator(net, in_record, out_record, **kwargs):
# TODO(amalevich): Switch to net.operator as soon as it gets
# landed
net.__getattr__(layer)(in_record.field_blobs(),
out_record.field_blobs(),
**kwargs)
if 'name' not in kwargs:
kwargs['name'] = layer
new_layer = layers.create_layer(
'Functional',
self, *args, function=apply_operator,
**kwargs
)
if kwargs.get("output_to_metrics", False):
new_layer.export_output_for_metrics()
if kwargs.get("params_to_metrics", False):
new_layer.export_params_for_metrics()
return self.add_layer(new_layer)
return wrapper
else:
raise ValueError(
"Trying to create non-registered layer: {}".format(layer))
@property
def layers(self):
return self._layers
def apply_regularizers_on_loss(
self,
train_net,
train_init_net,
blob_to_device=None,
):
for param, regularizer in viewitems(self.param_to_reg):
if regularizer is None or regularizer.apply_after_optimizer:
continue
assert isinstance(regularizer, Regularizer)
added_loss_blob = regularizer(train_net, train_init_net, param)
self.add_loss(
schema.Scalar(blob=added_loss_blob),
str(added_loss_blob)
)
def apply_regularizers_after_optimizer(
self,
train_net,
train_init_net,
grad_map,
blob_to_device=None,
):
for param, regularizer in viewitems(self.param_to_reg):
if regularizer is None or not regularizer.apply_after_optimizer:
continue
assert isinstance(regularizer, Regularizer)
regularizer(
train_net, train_init_net, param, grad_map.get(str(param)))
def apply_optimizers(
self,
train_net,
train_init_net,
grad_map,
blob_to_device=None,
):
CPU = core.DeviceOption(caffe2_pb2.CPU)
# if given, blob_to_device is a map from blob to device_option
blob_to_device = blob_to_device or {}
for param, optimizer in viewitems(self.param_to_optim):
assert optimizer is not None, \
"default optimizer must have been set in add_layer"
# note that not all params has gradient and thus we sent None if
# gradient does not exists
device = get_param_device(
param,
grad_map.get(str(param)),
param_to_device=blob_to_device,
default_device=CPU,
)
with core.DeviceScope(device):
optimizer(
train_net, train_init_net, param, grad_map.get(str(param)))
def _GetOne(self):
return self.global_constants['ONE']
# An optimizer which allows us to do NO optimization
def NoOptim(self, *args, **kwargs):
pass
@property
def breakdown_map(self):
return self._breakdown_map
@breakdown_map.setter
def breakdown_map(self, breakdown_map):
# TODO(xlwang): provide more rich feature information in breakdown_map;
# and change the assertion accordingly
assert isinstance(breakdown_map, dict)
assert all(isinstance(k, six.string_types) for k in breakdown_map)
assert sorted(list(breakdown_map.values())) == range(len(breakdown_map))
self._breakdown_map = breakdown_map