object_detection_pruning.patch

diff --git a/research/object_detection/hooks/__init__.py b/research/object_detection/hooks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/research/object_detection/hooks/train_hooks.py b/research/object_detection/hooks/train_hooks.py
new file mode 100644
index 00000000..7c9ee4d2
--- /dev/null
+++ b/research/object_detection/hooks/train_hooks.py
@@ -0,0 +1,89 @@
+"""Hooks used during training."""
+
+import tensorflow as tf
+
+model_pruning = tf.contrib.model_pruning
+
+
+class ModelPruningHook(tf.train.SessionRunHook):
+  """Updates model pruning masks and thresholds during training."""
+
+  def __init__(self, target_sparsity, start_step, end_step):
+    """Initializes a `ModelPruningHook`.
+
+    This hooks updates masks to a specified sparsity over a certain number of
+    training steps.
+
+    Args:
+      target_sparsity: float between 0 and 1 with desired sparsity
+      start_step: int step to start pruning
+      end_step: int step to end pruning
+    """
+    tf.logging.info("Create ModelPruningHook.")
+    self.pruning_hparams = self._get_pruning_hparams(
+      target_sparsity=target_sparsity,
+      start_step=start_step,
+      end_step=end_step
+    )
+
+  def begin(self):
+    """Called once before using the session.
+    When called, the default graph is the one that will be launched in the
+    session.  The hook can modify the graph by adding new operations to it.
+    After the `begin()` call the graph will be finalized and the other callbacks
+    can not modify the graph anymore. Second call of `begin()` on the same
+    graph, should not change the graph.
+    """
+    self.global_step_tensor = tf.train.get_global_step()
+    self.mask_update_op = self._get_mask_update_op()
+
+  def after_run(self, run_context, run_values):
+    """Called after each call to run().
+    The `run_values` argument contains results of requested ops/tensors by
+    `before_run()`.
+    The `run_context` argument is the same one send to `before_run` call.
+    `run_context.request_stop()` can be called to stop the iteration.
+    If `session.run()` raises any exceptions then `after_run()` is not called.
+    Args:
+      run_context: A `SessionRunContext` object.
+      run_values: A SessionRunValues object.
+    """
+    run_context.session.run(self.mask_update_op)
+
+  def _get_mask_update_op(self):
+    """Fetches model pruning mask update op."""
+    graph = tf.get_default_graph()
+    with graph.as_default():
+      pruning = model_pruning.Pruning(
+        self.pruning_hparams,
+        global_step=self.global_step_tensor
+      )
+      mask_update_op = pruning.conditional_mask_update_op()
+      pruning.add_pruning_summaries()
+      return mask_update_op
+
+  def _get_pruning_hparams(self,
+                           target_sparsity=0.5,
+                           start_step=0,
+                           end_step=-1):
+    """Get pruning hyperparameters with updated values.
+
+    Args:
+      target_sparsity: float between 0 and 1 with desired sparsity
+      start_step: int step to start pruning
+      end_step: int step to end pruning
+    """
+    pruning_hparams = model_pruning.get_pruning_hparams()
+
+    # Set the target sparsity
+    pruning_hparams.target_sparsity = target_sparsity
+
+    # Set begin pruning step
+    pruning_hparams.begin_pruning_step = start_step
+    pruning_hparams.sparsity_function_begin_step = start_step
+
+    # Set final pruning step
+    pruning_hparams.end_pruning_step = end_step
+    pruning_hparams.sparsity_function_end_step = end_step
+
+    return pruning_hparams
diff --git a/research/object_detection/model_hparams.py b/research/object_detection/model_hparams.py
index 12b043e9..54c1de42 100644
--- a/research/object_detection/model_hparams.py
+++ b/research/object_detection/model_hparams.py
@@ -30,7 +30,7 @@ except ImportError:
 # pylint: enable=g-import-not-at-top


-def create_hparams(hparams_overrides=None):
+def create_hparams(load_pretrained=True, hparams_overrides=None):
   """Returns hyperparameters, including any flag value overrides.

   Args:
diff --git a/research/object_detection/model_lib.py b/research/object_detection/model_lib.py
index ba0f0b4b..469921f4 100644
--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -40,37 +40,30 @@ from object_detection.utils import visualization_utils as vis_utils

 # pylint: disable=g-import-not-at-top
 try:
-  from tensorflow.contrib import framework as contrib_framework
-  from tensorflow.contrib import layers as contrib_layers
-  from tensorflow.contrib import learn as contrib_learn
-  from tensorflow.contrib import tpu as contrib_tpu
-  from tensorflow.contrib import training as contrib_training
+    from tensorflow.contrib import framework as contrib_framework
+    from tensorflow.contrib import layers as contrib_layers
+    from tensorflow.contrib import learn as contrib_learn
+    from tensorflow.contrib import tpu as contrib_tpu
+    from tensorflow.contrib import training as contrib_training
 except ImportError:
-  # TF 2.0 doesn't ship with contrib.
-  pass
+    # TF 2.0 doesn't ship with contrib.
+    pass
 # pylint: enable=g-import-not-at-top

 # A map of names to methods that help build the model.
 MODEL_BUILD_UTIL_MAP = {
-    'get_configs_from_pipeline_file':
-        config_util.get_configs_from_pipeline_file,
-    'create_pipeline_proto_from_configs':
-        config_util.create_pipeline_proto_from_configs,
-    'merge_external_params_with_configs':
-        config_util.merge_external_params_with_configs,
-    'create_train_input_fn':
-        inputs.create_train_input_fn,
-    'create_eval_input_fn':
-        inputs.create_eval_input_fn,
-    'create_predict_input_fn':
-        inputs.create_predict_input_fn,
-    'detection_model_fn_base': model_builder.build,
+    "get_configs_from_pipeline_file": config_util.get_configs_from_pipeline_file,
+    "create_pipeline_proto_from_configs": config_util.create_pipeline_proto_from_configs,
+    "merge_external_params_with_configs": config_util.merge_external_params_with_configs,
+    "create_train_input_fn": inputs.create_train_input_fn,
+    "create_eval_input_fn": inputs.create_eval_input_fn,
+    "create_predict_input_fn": inputs.create_predict_input_fn,
+    "detection_model_fn_base": model_builder.build,
 }


-def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
-                                  max_number_of_boxes):
-  """Extracts groundtruth data from detection_model and prepares it for eval.
+def _prepare_groundtruth_for_eval(detection_model, class_agnostic, max_number_of_boxes):
+    """Extracts groundtruth data from detection_model and prepares it for eval.

   Args:
     detection_model: A `DetectionModel` object.
@@ -97,54 +90,66 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
         tensor of keypoints (if provided in groundtruth).
     class_agnostic: Boolean indicating whether detections are class agnostic.
   """
-  input_data_fields = fields.InputDataFields()
-  groundtruth_boxes = tf.stack(
-      detection_model.groundtruth_lists(fields.BoxListFields.boxes))
-  groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
-  # For class-agnostic models, groundtruth one-hot encodings collapse to all
-  # ones.
-  if class_agnostic:
-    groundtruth_classes_one_hot = tf.ones(
-        [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1])
-  else:
-    groundtruth_classes_one_hot = tf.stack(
-        detection_model.groundtruth_lists(fields.BoxListFields.classes))
-  label_id_offset = 1  # Applying label id offset (b/63711816)
-  groundtruth_classes = (
-      tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset)
-  groundtruth = {
-      input_data_fields.groundtruth_boxes: groundtruth_boxes,
-      input_data_fields.groundtruth_classes: groundtruth_classes
-  }
-  if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
-    groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
-        detection_model.groundtruth_lists(fields.BoxListFields.masks))
-
-  if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
-    groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(
-        detection_model.groundtruth_lists(fields.BoxListFields.is_crowd))
-
-  if detection_model.groundtruth_has_field(input_data_fields.groundtruth_area):
-    groundtruth[input_data_fields.groundtruth_area] = tf.stack(
-        detection_model.groundtruth_lists(input_data_fields.groundtruth_area))
-
-  if detection_model.groundtruth_has_field(fields.BoxListFields.keypoints):
-    groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack(
-        detection_model.groundtruth_lists(fields.BoxListFields.keypoints))
-
-  if detection_model.groundtruth_has_field(
-      fields.BoxListFields.keypoint_visibilities):
-    groundtruth[input_data_fields.groundtruth_keypoint_visibilities] = tf.stack(
-        detection_model.groundtruth_lists(
-            fields.BoxListFields.keypoint_visibilities))
-
-  groundtruth[input_data_fields.num_groundtruth_boxes] = (
-      tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
-  return groundtruth
+    input_data_fields = fields.InputDataFields()
+    groundtruth_boxes = tf.stack(
+        detection_model.groundtruth_lists(fields.BoxListFields.boxes)
+    )
+    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
+    # For class-agnostic models, groundtruth one-hot encodings collapse to all
+    # ones.
+    if class_agnostic:
+        groundtruth_classes_one_hot = tf.ones(
+            [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1]
+        )
+    else:
+        groundtruth_classes_one_hot = tf.stack(
+            detection_model.groundtruth_lists(fields.BoxListFields.classes)
+        )
+    label_id_offset = 1  # Applying label id offset (b/63711816)
+    groundtruth_classes = (
+        tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset
+    )
+    groundtruth = {
+        input_data_fields.groundtruth_boxes: groundtruth_boxes,
+        input_data_fields.groundtruth_classes: groundtruth_classes,
+    }
+    if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
+        groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
+            detection_model.groundtruth_lists(fields.BoxListFields.masks)
+        )
+
+    if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
+        groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(
+            detection_model.groundtruth_lists(fields.BoxListFields.is_crowd)
+        )
+
+    if detection_model.groundtruth_has_field(input_data_fields.groundtruth_area):
+        groundtruth[input_data_fields.groundtruth_area] = tf.stack(
+            detection_model.groundtruth_lists(input_data_fields.groundtruth_area)
+        )
+
+    if detection_model.groundtruth_has_field(fields.BoxListFields.keypoints):
+        groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack(
+            detection_model.groundtruth_lists(fields.BoxListFields.keypoints)
+        )
+
+    if detection_model.groundtruth_has_field(
+        fields.BoxListFields.keypoint_visibilities
+    ):
+        groundtruth[input_data_fields.groundtruth_keypoint_visibilities] = tf.stack(
+            detection_model.groundtruth_lists(
+                fields.BoxListFields.keypoint_visibilities
+            )
+        )
+
+    groundtruth[input_data_fields.num_groundtruth_boxes] = tf.tile(
+        [max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]
+    )
+    return groundtruth


 def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
-  """Unstacks all tensors in `tensor_dict` along 0th dimension.
+    """Unstacks all tensors in `tensor_dict` along 0th dimension.

   Unstacks tensor from the tensor dict along 0th dimension and returns a
   tensor_dict containing values that are lists of unstacked, unpadded tensors.
@@ -174,52 +179,57 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
     ValueError: If unpad_tensors is True and `tensor_dict` does not contain
       `num_groundtruth_boxes` tensor.
   """
-  unbatched_tensor_dict = {
-      key: tf.unstack(tensor) for key, tensor in tensor_dict.items()
-  }
-  if unpad_groundtruth_tensors:
-    if (fields.InputDataFields.num_groundtruth_boxes not in
-        unbatched_tensor_dict):
-      raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
-                       'Keys available: {}'.format(
-                           unbatched_tensor_dict.keys()))
-    unbatched_unpadded_tensor_dict = {}
-    unpad_keys = set([
-        # List of input data fields that are padded along the num_boxes
-        # dimension. This list has to be kept in sync with InputDataFields in
-        # standard_fields.py.
-        fields.InputDataFields.groundtruth_instance_masks,
-        fields.InputDataFields.groundtruth_classes,
-        fields.InputDataFields.groundtruth_boxes,
-        fields.InputDataFields.groundtruth_keypoints,
-        fields.InputDataFields.groundtruth_keypoint_visibilities,
-        fields.InputDataFields.groundtruth_group_of,
-        fields.InputDataFields.groundtruth_difficult,
-        fields.InputDataFields.groundtruth_is_crowd,
-        fields.InputDataFields.groundtruth_area,
-        fields.InputDataFields.groundtruth_weights
-    ]).intersection(set(unbatched_tensor_dict.keys()))
-
-    for key in unpad_keys:
-      unpadded_tensor_list = []
-      for num_gt, padded_tensor in zip(
-          unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
-          unbatched_tensor_dict[key]):
-        tensor_shape = shape_utils.combined_static_and_dynamic_shape(
-            padded_tensor)
-        slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
-        slice_size = tf.stack(
-            [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]])
-        unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
-        unpadded_tensor_list.append(unpadded_tensor)
-      unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
-    unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)
-
-  return unbatched_tensor_dict
+    unbatched_tensor_dict = {
+        key: tf.unstack(tensor) for key, tensor in tensor_dict.items()
+    }
+    if unpad_groundtruth_tensors:
+        if fields.InputDataFields.num_groundtruth_boxes not in unbatched_tensor_dict:
+            raise ValueError(
+                "`num_groundtruth_boxes` not found in tensor_dict. "
+                "Keys available: {}".format(unbatched_tensor_dict.keys())
+            )
+        unbatched_unpadded_tensor_dict = {}
+        unpad_keys = set(
+            [
+                # List of input data fields that are padded along the num_boxes
+                # dimension. This list has to be kept in sync with InputDataFields in
+                # standard_fields.py.
+                fields.InputDataFields.groundtruth_instance_masks,
+                fields.InputDataFields.groundtruth_classes,
+                fields.InputDataFields.groundtruth_boxes,
+                fields.InputDataFields.groundtruth_keypoints,
+                fields.InputDataFields.groundtruth_keypoint_visibilities,
+                fields.InputDataFields.groundtruth_group_of,
+                fields.InputDataFields.groundtruth_difficult,
+                fields.InputDataFields.groundtruth_is_crowd,
+                fields.InputDataFields.groundtruth_area,
+                fields.InputDataFields.groundtruth_weights,
+            ]
+        ).intersection(set(unbatched_tensor_dict.keys()))
+
+        for key in unpad_keys:
+            unpadded_tensor_list = []
+            for num_gt, padded_tensor in zip(
+                unbatched_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
+                unbatched_tensor_dict[key],
+            ):
+                tensor_shape = shape_utils.combined_static_and_dynamic_shape(
+                    padded_tensor
+                )
+                slice_begin = tf.zeros([len(tensor_shape)], dtype=tf.int32)
+                slice_size = tf.stack(
+                    [num_gt] + [-1 if dim is None else dim for dim in tensor_shape[1:]]
+                )
+                unpadded_tensor = tf.slice(padded_tensor, slice_begin, slice_size)
+                unpadded_tensor_list.append(unpadded_tensor)
+            unbatched_unpadded_tensor_dict[key] = unpadded_tensor_list
+        unbatched_tensor_dict.update(unbatched_unpadded_tensor_dict)
+
+    return unbatched_tensor_dict


 def provide_groundtruth(model, labels):
-  """Provides the labels to a model as groundtruth.
+    """Provides the labels to a model as groundtruth.

   This helper function extracts the corresponding boxes, classes,
   keypoints, weights, masks, etc. from the labels, and provides it
@@ -229,52 +239,52 @@ def provide_groundtruth(model, labels):
     model: The detection model to provide groundtruth to.
     labels: The labels for the training or evaluation inputs.
   """
-  gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
-  gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
-  gt_masks_list = None
-  if fields.InputDataFields.groundtruth_instance_masks in labels:
-    gt_masks_list = labels[
-        fields.InputDataFields.groundtruth_instance_masks]
-  gt_keypoints_list = None
-  if fields.InputDataFields.groundtruth_keypoints in labels:
-    gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
-  gt_keypoint_visibilities_list = None
-  if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
-    gt_keypoint_visibilities_list = labels[
-        fields.InputDataFields.groundtruth_keypoint_visibilities]
-  gt_weights_list = None
-  if fields.InputDataFields.groundtruth_weights in labels:
-    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
-  gt_confidences_list = None
-  if fields.InputDataFields.groundtruth_confidences in labels:
-    gt_confidences_list = labels[
-        fields.InputDataFields.groundtruth_confidences]
-  gt_is_crowd_list = None
-  if fields.InputDataFields.groundtruth_is_crowd in labels:
-    gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
-  gt_area_list = None
-  if fields.InputDataFields.groundtruth_area in labels:
-    gt_area_list = labels[fields.InputDataFields.groundtruth_area]
-  gt_labeled_classes = None
-  if fields.InputDataFields.groundtruth_labeled_classes in labels:
-    gt_labeled_classes = labels[
-        fields.InputDataFields.groundtruth_labeled_classes]
-  model.provide_groundtruth(
-      groundtruth_boxes_list=gt_boxes_list,
-      groundtruth_classes_list=gt_classes_list,
-      groundtruth_confidences_list=gt_confidences_list,
-      groundtruth_labeled_classes=gt_labeled_classes,
-      groundtruth_masks_list=gt_masks_list,
-      groundtruth_keypoints_list=gt_keypoints_list,
-      groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
-      groundtruth_weights_list=gt_weights_list,
-      groundtruth_is_crowd_list=gt_is_crowd_list,
-      groundtruth_area_list=gt_area_list)
-
-
-def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
-                    postprocess_on_cpu=False):
-  """Creates a model function for `Estimator`.
+    gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
+    gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
+    gt_masks_list = None
+    if fields.InputDataFields.groundtruth_instance_masks in labels:
+        gt_masks_list = labels[fields.InputDataFields.groundtruth_instance_masks]
+    gt_keypoints_list = None
+    if fields.InputDataFields.groundtruth_keypoints in labels:
+        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
+    gt_keypoint_visibilities_list = None
+    if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
+        gt_keypoint_visibilities_list = labels[
+            fields.InputDataFields.groundtruth_keypoint_visibilities
+        ]
+    gt_weights_list = None
+    if fields.InputDataFields.groundtruth_weights in labels:
+        gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
+    gt_confidences_list = None
+    if fields.InputDataFields.groundtruth_confidences in labels:
+        gt_confidences_list = labels[fields.InputDataFields.groundtruth_confidences]
+    gt_is_crowd_list = None
+    if fields.InputDataFields.groundtruth_is_crowd in labels:
+        gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
+    gt_area_list = None
+    if fields.InputDataFields.groundtruth_area in labels:
+        gt_area_list = labels[fields.InputDataFields.groundtruth_area]
+    gt_labeled_classes = None
+    if fields.InputDataFields.groundtruth_labeled_classes in labels:
+        gt_labeled_classes = labels[fields.InputDataFields.groundtruth_labeled_classes]
+    model.provide_groundtruth(
+        groundtruth_boxes_list=gt_boxes_list,
+        groundtruth_classes_list=gt_classes_list,
+        groundtruth_confidences_list=gt_confidences_list,
+        groundtruth_labeled_classes=gt_labeled_classes,
+        groundtruth_masks_list=gt_masks_list,
+        groundtruth_keypoints_list=gt_keypoints_list,
+        groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
+        groundtruth_weights_list=gt_weights_list,
+        groundtruth_is_crowd_list=gt_is_crowd_list,
+        groundtruth_area_list=gt_area_list,
+    )
+
+
+def create_model_fn(
+    detection_model_fn, configs, hparams, use_tpu=False, postprocess_on_cpu=False
+):
+    """Creates a model function for `Estimator`.

   Args:
     detection_model_fn: Function that returns a `DetectionModel` instance.
@@ -288,12 +298,12 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
   Returns:
     `model_fn` for `Estimator`.
   """
-  train_config = configs['train_config']
-  eval_input_config = configs['eval_input_config']
-  eval_config = configs['eval_config']
+    train_config = configs["train_config"]
+    eval_input_config = configs["eval_input_config"]
+    eval_config = configs["eval_config"]

-  def model_fn(features, labels, mode, params=None):
-    """Constructs the object detection model.
+    def model_fn(features, labels, mode, params=None):
+        """Constructs the object detection model.

     Args:
       features: Dictionary of feature tensors, returned from `input_fn`.
@@ -306,305 +316,350 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
       An `EstimatorSpec` that encapsulates the model and its serving
         configurations.
     """
-    params = params or {}
-    total_loss, train_op, detections, export_outputs = None, None, None, None
-    is_training = mode == tf.estimator.ModeKeys.TRAIN
-
-    # Make sure to set the Keras learning phase. True during training,
-    # False for inference.
-    tf.keras.backend.set_learning_phase(is_training)
-    # Set policy for mixed-precision training with Keras-based models.
-    if use_tpu and train_config.use_bfloat16:
-      from tensorflow.python.keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
-      # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
-      base_layer_utils.enable_v2_dtype_behavior()
-      tf.compat.v2.keras.mixed_precision.experimental.set_policy(
-          'mixed_bfloat16')
-    detection_model = detection_model_fn(
-        is_training=is_training, add_summaries=(not use_tpu))
-    scaffold_fn = None
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-      labels = unstack_batch(
-          labels,
-          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
-    elif mode == tf.estimator.ModeKeys.EVAL:
-      # For evaling on train data, it is necessary to check whether groundtruth
-      # must be unpadded.
-      boxes_shape = (
-          labels[fields.InputDataFields.groundtruth_boxes].get_shape()
-          .as_list())
-      unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
-      labels = unstack_batch(
-          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
-
-    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
-      provide_groundtruth(detection_model, labels)
-
-    preprocessed_images = features[fields.InputDataFields.image]
-
-    side_inputs = detection_model.get_side_inputs(features)
-
-    if use_tpu and train_config.use_bfloat16:
-      with contrib_tpu.bfloat16_scope():
-        prediction_dict = detection_model.predict(
-            preprocessed_images,
-            features[fields.InputDataFields.true_image_shape], **side_inputs)
-        prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
-    else:
-      prediction_dict = detection_model.predict(
-          preprocessed_images,
-          features[fields.InputDataFields.true_image_shape], **side_inputs)
-
-    def postprocess_wrapper(args):
-      return detection_model.postprocess(args[0], args[1])
-
-    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
-      if use_tpu and postprocess_on_cpu:
-        detections = contrib_tpu.outside_compilation(
-            postprocess_wrapper,
-            (prediction_dict,
-             features[fields.InputDataFields.true_image_shape]))
-      else:
-        detections = postprocess_wrapper((
-            prediction_dict,
-            features[fields.InputDataFields.true_image_shape]))
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-      load_pretrained = hparams.load_pretrained if hparams else False
-      if train_config.fine_tune_checkpoint and load_pretrained:
-        if not train_config.fine_tune_checkpoint_type:
-          # train_config.from_detection_checkpoint field is deprecated. For
-          # backward compatibility, set train_config.fine_tune_checkpoint_type
-          # based on train_config.from_detection_checkpoint.
-          if train_config.from_detection_checkpoint:
-            train_config.fine_tune_checkpoint_type = 'detection'
-          else:
-            train_config.fine_tune_checkpoint_type = 'classification'
-        asg_map = detection_model.restore_map(
-            fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
-            load_all_detection_checkpoint_vars=(
-                train_config.load_all_detection_checkpoint_vars))
-        available_var_map = (
-            variables_helper.get_variables_available_in_checkpoint(
-                asg_map,
-                train_config.fine_tune_checkpoint,
-                include_global_step=False))
-        if use_tpu:
-
-          def tpu_scaffold():
-            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
-                                          available_var_map)
-            return tf.train.Scaffold()
-
-          scaffold_fn = tpu_scaffold
+        params = params or {}
+        total_loss, train_op, detections, export_outputs = None, None, None, None
+        is_training = mode == tf.estimator.ModeKeys.TRAIN
+
+        # Make sure to set the Keras learning phase. True during training,
+        # False for inference.
+        tf.keras.backend.set_learning_phase(is_training)
+        # Set policy for mixed-precision training with Keras-based models.
+        if use_tpu and train_config.use_bfloat16:
+            from tensorflow.python.keras.engine import (
+                base_layer_utils,
+            )  # pylint: disable=g-import-not-at-top
+
+            # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
+            base_layer_utils.enable_v2_dtype_behavior()
+            tf.compat.v2.keras.mixed_precision.experimental.set_policy("mixed_bfloat16")
+        detection_model = detection_model_fn(
+            is_training=is_training, add_summaries=(not use_tpu)
+        )
+        scaffold_fn = None
+
+        if mode == tf.estimator.ModeKeys.TRAIN:
+            labels = unstack_batch(
+                labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors
+            )
+        elif mode == tf.estimator.ModeKeys.EVAL:
+            # For evaling on train data, it is necessary to check whether groundtruth
+            # must be unpadded.
+            boxes_shape = (
+                labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list()
+            )
+            unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
+            labels = unstack_batch(
+                labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors
+            )
+
+        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
+            provide_groundtruth(detection_model, labels)
+
+        preprocessed_images = features[fields.InputDataFields.image]
+
+        side_inputs = detection_model.get_side_inputs(features)
+
+        if use_tpu and train_config.use_bfloat16:
+            with contrib_tpu.bfloat16_scope():
+                prediction_dict = detection_model.predict(
+                    preprocessed_images,
+                    features[fields.InputDataFields.true_image_shape],
+                    **side_inputs
+                )
+                prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
         else:
-          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
-                                        available_var_map)
-
-    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
-      if (mode == tf.estimator.ModeKeys.EVAL and
-          eval_config.use_dummy_loss_in_eval):
-        total_loss = tf.constant(1.0)
-        losses_dict = {'Loss/total_loss': total_loss}
-      else:
-        losses_dict = detection_model.loss(
-            prediction_dict, features[fields.InputDataFields.true_image_shape])
-        losses = [loss_tensor for loss_tensor in losses_dict.values()]
-        if train_config.add_regularization_loss:
-          regularization_losses = detection_model.regularization_losses()
-          if use_tpu and train_config.use_bfloat16:
-            regularization_losses = ops.bfloat16_to_float32_nested(
-                regularization_losses)
-          if regularization_losses:
-            regularization_loss = tf.add_n(
-                regularization_losses, name='regularization_loss')
-            losses.append(regularization_loss)
-            losses_dict['Loss/regularization_loss'] = regularization_loss
-        total_loss = tf.add_n(losses, name='total_loss')
-        losses_dict['Loss/total_loss'] = total_loss
-
-      if 'graph_rewriter_config' in configs:
-        graph_rewriter_fn = graph_rewriter_builder.build(
-            configs['graph_rewriter_config'], is_training=is_training)
-        graph_rewriter_fn()
-
-      # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
-      # can write learning rate summaries on TPU without host calls.
-      global_step = tf.train.get_or_create_global_step()
-      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
-          train_config.optimizer)
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-      if use_tpu:
-        training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
-
-      # Optionally freeze some layers by setting their gradients to be zero.
-      trainable_variables = None
-      include_variables = (
-          train_config.update_trainable_variables
-          if train_config.update_trainable_variables else None)
-      exclude_variables = (
-          train_config.freeze_variables
-          if train_config.freeze_variables else None)
-      trainable_variables = contrib_framework.filter_variables(
-          tf.trainable_variables(),
-          include_patterns=include_variables,
-          exclude_patterns=exclude_variables)
-
-      clip_gradients_value = None
-      if train_config.gradient_clipping_by_norm > 0:
-        clip_gradients_value = train_config.gradient_clipping_by_norm
-
-      if not use_tpu:
-        for var in optimizer_summary_vars:
-          tf.summary.scalar(var.op.name, var)
-      summaries = [] if use_tpu else None
-      if train_config.summarize_gradients:
-        summaries = ['gradients', 'gradient_norm', 'global_gradient_norm']
-      train_op = contrib_layers.optimize_loss(
-          loss=total_loss,
-          global_step=global_step,
-          learning_rate=None,
-          clip_gradients=clip_gradients_value,
-          optimizer=training_optimizer,
-          update_ops=detection_model.updates(),
-          variables=trainable_variables,
-          summaries=summaries,
-          name='')  # Preventing scope prefix on all variables.
-
-    if mode == tf.estimator.ModeKeys.PREDICT:
-      exported_output = exporter_lib.add_output_tensor_nodes(detections)
-      export_outputs = {
-          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
-              tf.estimator.export.PredictOutput(exported_output)
-      }
-
-    eval_metric_ops = None
-    scaffold = None
-    if mode == tf.estimator.ModeKeys.EVAL:
-      class_agnostic = (
-          fields.DetectionResultFields.detection_classes not in detections)
-      groundtruth = _prepare_groundtruth_for_eval(
-          detection_model, class_agnostic,
-          eval_input_config.max_number_of_boxes)
-      use_original_images = fields.InputDataFields.original_image in features
-      if use_original_images:
-        eval_images = features[fields.InputDataFields.original_image]
-        true_image_shapes = tf.slice(
-            features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3])
-        original_image_spatial_shapes = features[fields.InputDataFields
-                                                 .original_image_spatial_shape]
-      else:
-        eval_images = features[fields.InputDataFields.image]
-        true_image_shapes = None
-        original_image_spatial_shapes = None
-
-      eval_dict = eval_util.result_dict_for_batched_example(
-          eval_images,
-          features[inputs.HASH_KEY],
-          detections,
-          groundtruth,
-          class_agnostic=class_agnostic,
-          scale_to_absolute=True,
-          original_image_spatial_shapes=original_image_spatial_shapes,
-          true_image_shapes=true_image_shapes)
-
-      if fields.InputDataFields.image_additional_channels in features:
-        eval_dict[fields.InputDataFields.image_additional_channels] = features[
-            fields.InputDataFields.image_additional_channels]
-
-      if class_agnostic:
-        category_index = label_map_util.create_class_agnostic_category_index()
-      else:
-        category_index = label_map_util.create_category_index_from_labelmap(
-            eval_input_config.label_map_path)
-      vis_metric_ops = None
-      if not use_tpu and use_original_images:
-        keypoint_edges = [
-            (kp.start, kp.end) for kp in eval_config.keypoint_edge]
-
-        eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
-            category_index,
-            max_examples_to_draw=eval_config.num_visualizations,
-            max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
-            min_score_thresh=eval_config.min_score_threshold,
-            use_normalized_coordinates=False,
-            keypoint_edges=keypoint_edges or None)
-        vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
-            eval_dict)
-
-      # Eval metrics on a single example.
-      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
-          eval_config, list(category_index.values()), eval_dict)
-      for loss_key, loss_tensor in iter(losses_dict.items()):
-        eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
-      for var in optimizer_summary_vars:
-        eval_metric_ops[var.op.name] = (var, tf.no_op())
-      if vis_metric_ops is not None:
-        eval_metric_ops.update(vis_metric_ops)
-      eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}
-
-      if eval_config.use_moving_averages:
-        variable_averages = tf.train.ExponentialMovingAverage(0.0)
-        variables_to_restore = variable_averages.variables_to_restore()
-        keep_checkpoint_every_n_hours = (
-            train_config.keep_checkpoint_every_n_hours)
-        saver = tf.train.Saver(
-            variables_to_restore,
-            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
-        scaffold = tf.train.Scaffold(saver=saver)
-
-    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
-    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
-      return contrib_tpu.TPUEstimatorSpec(
-          mode=mode,
-          scaffold_fn=scaffold_fn,
-          predictions=detections,
-          loss=total_loss,
-          train_op=train_op,
-          eval_metrics=eval_metric_ops,
-          export_outputs=export_outputs)
-    else:
-      if scaffold is None:
-        keep_checkpoint_every_n_hours = (
-            train_config.keep_checkpoint_every_n_hours)
-        saver = tf.train.Saver(
-            sharded=True,
-            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
-            save_relative_paths=True)
-        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
-        scaffold = tf.train.Scaffold(saver=saver)
-      return tf.estimator.EstimatorSpec(
-          mode=mode,
-          predictions=detections,
-          loss=total_loss,
-          train_op=train_op,
-          eval_metric_ops=eval_metric_ops,
-          export_outputs=export_outputs,
-          scaffold=scaffold)
-
-  return model_fn
-
-
-def create_estimator_and_inputs(run_config,
-                                hparams,
-                                pipeline_config_path,
-                                config_override=None,
-                                train_steps=None,
-                                sample_1_of_n_eval_examples=1,
-                                sample_1_of_n_eval_on_train_examples=1,
-                                model_fn_creator=create_model_fn,
-                                use_tpu_estimator=False,
-                                use_tpu=False,
-                                num_shards=1,
-                                params=None,
-                                override_eval_num_epochs=True,
-                                save_final_config=False,
-                                postprocess_on_cpu=False,
-                                export_to_tpu=None,
-                                **kwargs):
-  """Creates `Estimator`, input functions, and steps.
+            prediction_dict = detection_model.predict(
+                preprocessed_images,
+                features[fields.InputDataFields.true_image_shape],
+                **side_inputs
+            )
+
+        def postprocess_wrapper(args):
+            return detection_model.postprocess(args[0], args[1])
+
+        if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
+            if use_tpu and postprocess_on_cpu:
+                detections = contrib_tpu.outside_compilation(
+                    postprocess_wrapper,
+                    (
+                        prediction_dict,
+                        features[fields.InputDataFields.true_image_shape],
+                    ),
+                )
+            else:
+                detections = postprocess_wrapper(
+                    (prediction_dict, features[fields.InputDataFields.true_image_shape])
+                )
+
+        if mode == tf.estimator.ModeKeys.TRAIN:
+            load_pretrained = hparams.load_pretrained if hparams else False
+            if train_config.fine_tune_checkpoint and load_pretrained:
+                if not train_config.fine_tune_checkpoint_type:
+                    # train_config.from_detection_checkpoint field is deprecated. For
+                    # backward compatibility, set train_config.fine_tune_checkpoint_type
+                    # based on train_config.from_detection_checkpoint.
+                    if train_config.from_detection_checkpoint:
+                        train_config.fine_tune_checkpoint_type = "detection"
+                    else:
+                        train_config.fine_tune_checkpoint_type = "classification"
+                asg_map = detection_model.restore_map(
+                    fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
+                    load_all_detection_checkpoint_vars=(
+                        train_config.load_all_detection_checkpoint_vars
+                    ),
+                )
+                available_var_map = variables_helper.get_variables_available_in_checkpoint(
+                    asg_map,
+                    train_config.fine_tune_checkpoint,
+                    include_global_step=False,
+                )
+                if use_tpu:
+
+                    def tpu_scaffold():
+                        tf.train.init_from_checkpoint(
+                            train_config.fine_tune_checkpoint, available_var_map
+                        )
+                        return tf.train.Scaffold()
+
+                    scaffold_fn = tpu_scaffold
+                else:
+                    tf.train.init_from_checkpoint(
+                        train_config.fine_tune_checkpoint, available_var_map
+                    )
+
+        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
+            if (
+                mode == tf.estimator.ModeKeys.EVAL
+                and eval_config.use_dummy_loss_in_eval
+            ):
+                total_loss = tf.constant(1.0)
+                losses_dict = {"Loss/total_loss": total_loss}
+            else:
+                losses_dict = detection_model.loss(
+                    prediction_dict, features[fields.InputDataFields.true_image_shape]
+                )
+                losses = [loss_tensor for loss_tensor in losses_dict.values()]
+                if train_config.add_regularization_loss:
+                    regularization_losses = detection_model.regularization_losses()
+                    if use_tpu and train_config.use_bfloat16:
+                        regularization_losses = ops.bfloat16_to_float32_nested(
+                            regularization_losses
+                        )
+                    if regularization_losses:
+                        regularization_loss = tf.add_n(
+                            regularization_losses, name="regularization_loss"
+                        )
+                        losses.append(regularization_loss)
+                        losses_dict["Loss/regularization_loss"] = regularization_loss
+                total_loss = tf.add_n(losses, name="total_loss")
+                losses_dict["Loss/total_loss"] = total_loss
+
+            if "graph_rewriter_config" in configs:
+                graph_rewriter_fn = graph_rewriter_builder.build(
+                    configs["graph_rewriter_config"], is_training=is_training
+                )
+                graph_rewriter_fn()
+
+            # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
+            # can write learning rate summaries on TPU without host calls.
+            global_step = tf.train.get_or_create_global_step()
+            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
+                train_config.optimizer
+            )
+
+        if mode == tf.estimator.ModeKeys.TRAIN:
+            if use_tpu:
+                training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
+
+            # Optionally freeze some layers by setting their gradients to be zero.
+            trainable_variables = None
+            include_variables = (
+                train_config.update_trainable_variables
+                if train_config.update_trainable_variables
+                else None
+            )
+            exclude_variables = (
+                train_config.freeze_variables if train_config.freeze_variables else None
+            )
+            trainable_variables = contrib_framework.filter_variables(
+                tf.trainable_variables(),
+                include_patterns=include_variables,
+                exclude_patterns=exclude_variables,
+            )
+
+            clip_gradients_value = None
+            if train_config.gradient_clipping_by_norm > 0:
+                clip_gradients_value = train_config.gradient_clipping_by_norm
+
+            if not use_tpu:
+                for var in optimizer_summary_vars:
+                    tf.summary.scalar(var.op.name, var)
+            summaries = [] if use_tpu else None
+            if train_config.summarize_gradients:
+                summaries = ["gradients", "gradient_norm", "global_gradient_norm"]
+            train_op = contrib_layers.optimize_loss(
+                loss=total_loss,
+                global_step=global_step,
+                learning_rate=None,
+                clip_gradients=clip_gradients_value,
+                optimizer=training_optimizer,
+                update_ops=detection_model.updates(),
+                variables=trainable_variables,
+                summaries=summaries,
+                name="",
+            )  # Preventing scope prefix on all variables.
+
+        if mode == tf.estimator.ModeKeys.PREDICT:
+            exported_output = exporter_lib.add_output_tensor_nodes(detections)
+            export_outputs = {
+                tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(
+                    exported_output
+                )
+            }
+
+        eval_metric_ops = None
+        scaffold = None
+        if mode == tf.estimator.ModeKeys.EVAL:
+            class_agnostic = (
+                fields.DetectionResultFields.detection_classes not in detections
+            )
+            groundtruth = _prepare_groundtruth_for_eval(
+                detection_model, class_agnostic, eval_input_config.max_number_of_boxes
+            )
+            use_original_images = fields.InputDataFields.original_image in features
+            if use_original_images:
+                eval_images = features[fields.InputDataFields.original_image]
+                true_image_shapes = tf.slice(
+                    features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]
+                )
+                original_image_spatial_shapes = features[
+                    fields.InputDataFields.original_image_spatial_shape
+                ]
+            else:
+                eval_images = features[fields.InputDataFields.image]
+                true_image_shapes = None
+                original_image_spatial_shapes = None
+
+            eval_dict = eval_util.result_dict_for_batched_example(
+                eval_images,
+                features[inputs.HASH_KEY],
+                detections,
+                groundtruth,
+                class_agnostic=class_agnostic,
+                scale_to_absolute=True,
+                original_image_spatial_shapes=original_image_spatial_shapes,
+                true_image_shapes=true_image_shapes,
+            )
+
+            if fields.InputDataFields.image_additional_channels in features:
+                eval_dict[fields.InputDataFields.image_additional_channels] = features[
+                    fields.InputDataFields.image_additional_channels
+                ]
+
+            if class_agnostic:
+                category_index = label_map_util.create_class_agnostic_category_index()
+            else:
+                category_index = label_map_util.create_category_index_from_labelmap(
+                    eval_input_config.label_map_path
+                )
+            vis_metric_ops = None
+            if not use_tpu and use_original_images:
+                keypoint_edges = [
+                    (kp.start, kp.end) for kp in eval_config.keypoint_edge
+                ]
+
+                eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
+                    category_index,
+                    max_examples_to_draw=eval_config.num_visualizations,
+                    max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
+                    min_score_thresh=eval_config.min_score_threshold,
+                    use_normalized_coordinates=False,
+                    keypoint_edges=keypoint_edges or None,
+                )
+                vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
+                    eval_dict
+                )
+
+            # Eval metrics on a single example.
+            eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
+                eval_config, list(category_index.values()), eval_dict
+            )
+            for loss_key, loss_tensor in iter(losses_dict.items()):
+                eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
+            for var in optimizer_summary_vars:
+                eval_metric_ops[var.op.name] = (var, tf.no_op())
+            if vis_metric_ops is not None:
+                eval_metric_ops.update(vis_metric_ops)
+            eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}
+
+            if eval_config.use_moving_averages:
+                variable_averages = tf.train.ExponentialMovingAverage(0.0)
+                variables_to_restore = variable_averages.variables_to_restore()
+                keep_checkpoint_every_n_hours = (
+                    train_config.keep_checkpoint_every_n_hours
+                )
+                saver = tf.train.Saver(
+                    variables_to_restore,
+                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
+                )
+                scaffold = tf.train.Scaffold(saver=saver)
+
+        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
+        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
+            return contrib_tpu.TPUEstimatorSpec(
+                mode=mode,
+                scaffold_fn=scaffold_fn,
+                predictions=detections,
+                loss=total_loss,
+                train_op=train_op,
+                eval_metrics=eval_metric_ops,
+                export_outputs=export_outputs,
+            )
+        else:
+            if scaffold is None:
+                keep_checkpoint_every_n_hours = (
+                    train_config.keep_checkpoint_every_n_hours
+                )
+                saver = tf.train.Saver(
+                    sharded=True,
+                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
+                    save_relative_paths=True,
+                )
+                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
+                scaffold = tf.train.Scaffold(saver=saver)
+            return tf.estimator.EstimatorSpec(
+                mode=mode,
+                predictions=detections,
+                loss=total_loss,
+                train_op=train_op,
+                eval_metric_ops=eval_metric_ops,
+                export_outputs=export_outputs,
+                scaffold=scaffold,
+            )
+
+    return model_fn
+
+
+def create_estimator_and_inputs(
+    run_config,
+    hparams,
+    pipeline_config_path,
+    config_override=None,
+    train_steps=None,
+    sample_1_of_n_eval_examples=1,
+    sample_1_of_n_eval_on_train_examples=1,
+    model_fn_creator=create_model_fn,
+    use_tpu_estimator=False,
+    use_tpu=False,
+    num_shards=1,
+    params=None,
+    override_eval_num_epochs=True,
+    save_final_config=False,
+    postprocess_on_cpu=False,
+    export_to_tpu=None,
+    **kwargs
+):
+    """Creates `Estimator`, input functions, and steps.

   Args:
     run_config: A `RunConfig`.
@@ -659,122 +714,144 @@ def create_estimator_and_inputs(run_config,
     'train_steps': Number of training steps. Either directly from input or from
       configuration.
   """
-  get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
-      'get_configs_from_pipeline_file']
-  merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
-      'merge_external_params_with_configs']
-  create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
-      'create_pipeline_proto_from_configs']
-  create_train_input_fn = MODEL_BUILD_UTIL_MAP['create_train_input_fn']
-  create_eval_input_fn = MODEL_BUILD_UTIL_MAP['create_eval_input_fn']
-  create_predict_input_fn = MODEL_BUILD_UTIL_MAP['create_predict_input_fn']
-  detection_model_fn_base = MODEL_BUILD_UTIL_MAP['detection_model_fn_base']
-
-  configs = get_configs_from_pipeline_file(
-      pipeline_config_path, config_override=config_override)
-  kwargs.update({
-      'train_steps': train_steps,
-      'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
-  })
-  if sample_1_of_n_eval_examples >= 1:
-    kwargs.update({
-        'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
-    })
-  if override_eval_num_epochs:
-    kwargs.update({'eval_num_epochs': 1})
-    tf.logging.warning(
-        'Forced number of epochs for all eval validations to be 1.')
-  configs = merge_external_params_with_configs(
-      configs, hparams, kwargs_dict=kwargs)
-  model_config = configs['model']
-  train_config = configs['train_config']
-  train_input_config = configs['train_input_config']
-  eval_config = configs['eval_config']
-  eval_input_configs = configs['eval_input_configs']
-  eval_on_train_input_config = copy.deepcopy(train_input_config)
-  eval_on_train_input_config.sample_1_of_n_examples = (
-      sample_1_of_n_eval_on_train_examples)
-  if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1:
-    tf.logging.warning('Expected number of evaluation epochs is 1, but '
-                       'instead encountered `eval_on_train_input_config'
-                       '.num_epochs` = '
-                       '{}. Overwriting `num_epochs` to 1.'.format(
-                           eval_on_train_input_config.num_epochs))
-    eval_on_train_input_config.num_epochs = 1
-
-  # update train_steps from config but only when non-zero value is provided
-  if train_steps is None and train_config.num_steps != 0:
-    train_steps = train_config.num_steps
-
-  detection_model_fn = functools.partial(
-      detection_model_fn_base, model_config=model_config)
-
-  # Create the input functions for TRAIN/EVAL/PREDICT.
-  train_input_fn = create_train_input_fn(
-      train_config=train_config,
-      train_input_config=train_input_config,
-      model_config=model_config)
-  eval_input_fns = [
-      create_eval_input_fn(
-          eval_config=eval_config,
-          eval_input_config=eval_input_config,
-          model_config=model_config) for eval_input_config in eval_input_configs
-  ]
-  eval_input_names = [
-      eval_input_config.name for eval_input_config in eval_input_configs
-  ]
-  eval_on_train_input_fn = create_eval_input_fn(
-      eval_config=eval_config,
-      eval_input_config=eval_on_train_input_config,
-      model_config=model_config)
-  predict_input_fn = create_predict_input_fn(
-      model_config=model_config, predict_input_config=eval_input_configs[0])
-
-  # Read export_to_tpu from hparams if not passed.
-  if export_to_tpu is None:
-    export_to_tpu = hparams.get('export_to_tpu', False)
-  tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
-                  use_tpu, export_to_tpu)
-  model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
-                              postprocess_on_cpu)
-  if use_tpu_estimator:
-    estimator = contrib_tpu.TPUEstimator(
-        model_fn=model_fn,
-        train_batch_size=train_config.batch_size,
-        # For each core, only batch size 1 is supported for eval.
-        eval_batch_size=num_shards * 1 if use_tpu else 1,
-        use_tpu=use_tpu,
-        config=run_config,
-        export_to_tpu=export_to_tpu,
-        eval_on_tpu=False,  # Eval runs on CPU, so disable eval on TPU
-        params=params if params else {})
-  else:
-    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
-
-  # Write the as-run pipeline config to disk.
-  if run_config.is_chief and save_final_config:
-    pipeline_config_final = create_pipeline_proto_from_configs(configs)
-    config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
-
-  return dict(
-      estimator=estimator,
-      train_input_fn=train_input_fn,
-      eval_input_fns=eval_input_fns,
-      eval_input_names=eval_input_names,
-      eval_on_train_input_fn=eval_on_train_input_fn,
-      predict_input_fn=predict_input_fn,
-      train_steps=train_steps)
-
-
-def create_train_and_eval_specs(train_input_fn,
-                                eval_input_fns,
-                                eval_on_train_input_fn,
-                                predict_input_fn,
-                                train_steps,
-                                eval_on_train_data=False,
-                                final_exporter_name='Servo',
-                                eval_spec_names=None):
-  """Creates a `TrainSpec` and `EvalSpec`s.
+    get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
+        "get_configs_from_pipeline_file"
+    ]
+    merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
+        "merge_external_params_with_configs"
+    ]
+    create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
+        "create_pipeline_proto_from_configs"
+    ]
+    create_train_input_fn = MODEL_BUILD_UTIL_MAP["create_train_input_fn"]
+    create_eval_input_fn = MODEL_BUILD_UTIL_MAP["create_eval_input_fn"]
+    create_predict_input_fn = MODEL_BUILD_UTIL_MAP["create_predict_input_fn"]
+    detection_model_fn_base = MODEL_BUILD_UTIL_MAP["detection_model_fn_base"]
+
+    configs = get_configs_from_pipeline_file(
+        pipeline_config_path, config_override=config_override
+    )
+    kwargs.update(
+        {
+            "train_steps": train_steps,
+            "use_bfloat16": configs["train_config"].use_bfloat16 and use_tpu,
+        }
+    )
+    if sample_1_of_n_eval_examples >= 1:
+        kwargs.update({"sample_1_of_n_eval_examples": sample_1_of_n_eval_examples})
+    if override_eval_num_epochs:
+        kwargs.update({"eval_num_epochs": 1})
+        tf.logging.warning("Forced number of epochs for all eval validations to be 1.")
+    configs = merge_external_params_with_configs(configs, hparams, kwargs_dict=kwargs)
+    model_config = configs["model"]
+    train_config = configs["train_config"]
+    train_input_config = configs["train_input_config"]
+    eval_config = configs["eval_config"]
+    eval_input_configs = configs["eval_input_configs"]
+    eval_on_train_input_config = copy.deepcopy(train_input_config)
+    eval_on_train_input_config.sample_1_of_n_examples = (
+        sample_1_of_n_eval_on_train_examples
+    )
+    if override_eval_num_epochs and eval_on_train_input_config.num_epochs != 1:
+        tf.logging.warning(
+            "Expected number of evaluation epochs is 1, but "
+            "instead encountered `eval_on_train_input_config"
+            ".num_epochs` = "
+            "{}. Overwriting `num_epochs` to 1.".format(
+                eval_on_train_input_config.num_epochs
+            )
+        )
+        eval_on_train_input_config.num_epochs = 1
+
+    # update train_steps from config but only when non-zero value is provided
+    if train_steps is None and train_config.num_steps != 0:
+        train_steps = train_config.num_steps
+
+    detection_model_fn = functools.partial(
+        detection_model_fn_base, model_config=model_config
+    )
+
+    # Create the input functions for TRAIN/EVAL/PREDICT.
+    train_input_fn = create_train_input_fn(
+        train_config=train_config,
+        train_input_config=train_input_config,
+        model_config=model_config,
+    )
+    eval_input_fns = [
+        create_eval_input_fn(
+            eval_config=eval_config,
+            eval_input_config=eval_input_config,
+            model_config=model_config,
+        )
+        for eval_input_config in eval_input_configs
+    ]
+    eval_input_names = [
+        eval_input_config.name for eval_input_config in eval_input_configs
+    ]
+    eval_on_train_input_fn = create_eval_input_fn(
+        eval_config=eval_config,
+        eval_input_config=eval_on_train_input_config,
+        model_config=model_config,
+    )
+    predict_input_fn = create_predict_input_fn(
+        model_config=model_config, predict_input_config=eval_input_configs[0]
+    )
+
+    # Read export_to_tpu from hparams if not passed.
+    if export_to_tpu is None:
+        export_to_tpu = hparams.get("export_to_tpu", False)
+    tf.logging.info(
+        "create_estimator_and_inputs: use_tpu %s, export_to_tpu %s",
+        use_tpu,
+        export_to_tpu,
+    )
+    model_fn = model_fn_creator(
+        detection_model_fn, configs, hparams, use_tpu, postprocess_on_cpu
+    )
+    if use_tpu_estimator:
+        estimator = contrib_tpu.TPUEstimator(
+            model_fn=model_fn,
+            train_batch_size=train_config.batch_size,
+            # For each core, only batch size 1 is supported for eval.
+            eval_batch_size=num_shards * 1 if use_tpu else 1,
+            use_tpu=use_tpu,
+            config=run_config,
+            export_to_tpu=export_to_tpu,
+            eval_on_tpu=False,  # Eval runs on CPU, so disable eval on TPU
+            params=params if params else {},
+        )
+    else:
+        estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
+
+    # Write the as-run pipeline config to disk.
+    if run_config.is_chief and save_final_config:
+        pipeline_config_final = create_pipeline_proto_from_configs(configs)
+        config_util.save_pipeline_config(pipeline_config_final, estimator.model_dir)
+
+    return dict(
+        estimator=estimator,
+        train_input_fn=train_input_fn,
+        eval_input_fns=eval_input_fns,
+        eval_input_names=eval_input_names,
+        eval_on_train_input_fn=eval_on_train_input_fn,
+        predict_input_fn=predict_input_fn,
+        train_steps=train_steps,
+    )
+
+
+def create_train_and_eval_specs(
+    train_input_fn,
+    eval_input_fns,
+    eval_on_train_input_fn,
+    predict_input_fn,
+    train_steps,
+    eval_on_train_data=False,
+    final_exporter_name="Servo",
+    throttle_secs=900,
+    hooks=None,
+    eval_spec_names=None,
+):
+    """Creates a `TrainSpec` and `EvalSpec`s.

   Args:
     train_input_fn: Function that produces features and labels on train data.
@@ -787,6 +864,8 @@ def create_train_and_eval_specs(train_input_fn,
     eval_on_train_data: Whether to evaluate model on training data. Default is
       False.
     final_exporter_name: String name given to `FinalExporter`.
+    throttle_secs: Number of seconds to throttle training.
+    hooks: Iterable of tf.train.SessionRunHook objects to run on all workers.
     eval_spec_names: A list of string names for each `EvalSpec`.

   Returns:
@@ -794,40 +873,48 @@ def create_train_and_eval_specs(train_input_fn,
     True, the last `EvalSpec` in the list will correspond to training data. The
     rest EvalSpecs in the list are evaluation datas.
   """
-  train_spec = tf.estimator.TrainSpec(
-      input_fn=train_input_fn, max_steps=train_steps)
-
-  if eval_spec_names is None:
-    eval_spec_names = [str(i) for i in range(len(eval_input_fns))]
-
-  eval_specs = []
-  for index, (eval_spec_name, eval_input_fn) in enumerate(
-      zip(eval_spec_names, eval_input_fns)):
-    # Uses final_exporter_name as exporter_name for the first eval spec for
-    # backward compatibility.
-    if index == 0:
-      exporter_name = final_exporter_name
-    else:
-      exporter_name = '{}_{}'.format(final_exporter_name, eval_spec_name)
-    exporter = tf.estimator.FinalExporter(
-        name=exporter_name, serving_input_receiver_fn=predict_input_fn)
-    eval_specs.append(
-        tf.estimator.EvalSpec(
-            name=eval_spec_name,
-            input_fn=eval_input_fn,
-            steps=None,
-            exporters=exporter))
-
-  if eval_on_train_data:
-    eval_specs.append(
-        tf.estimator.EvalSpec(
-            name='eval_on_train', input_fn=eval_on_train_input_fn, steps=None))
-
-  return train_spec, eval_specs
+    train_spec = tf.estimator.TrainSpec(
+        input_fn=train_input_fn, max_steps=train_steps, hooks=hooks
+    )
+
+    if eval_spec_names is None:
+        eval_spec_names = [str(i) for i in range(len(eval_input_fns))]
+
+    eval_specs = []
+    for index, (eval_spec_name, eval_input_fn) in enumerate(
+        zip(eval_spec_names, eval_input_fns)
+    ):
+        # Uses final_exporter_name as exporter_name for the first eval spec for
+        # backward compatibility.
+        if index == 0:
+            exporter_name = final_exporter_name
+        else:
+            exporter_name = "{}_{}".format(final_exporter_name, eval_spec_name)
+        exporter = tf.estimator.FinalExporter(
+            name=exporter_name, serving_input_receiver_fn=predict_input_fn
+        )
+        eval_specs.append(
+            tf.estimator.EvalSpec(
+                name=eval_spec_name,
+                input_fn=eval_input_fn,
+                steps=None,
+                exporters=exporter,
+                throttle_secs=throttle_secs,
+            )
+        )
+
+    if eval_on_train_data:
+        eval_specs.append(
+            tf.estimator.EvalSpec(
+                name="eval_on_train", input_fn=eval_on_train_input_fn, steps=None
+            )
+        )
+
+    return train_spec, eval_specs


 def continuous_eval(estimator, model_dir, input_fn, train_steps, name):
-  """Perform continuous evaluation on checkpoints written to a model directory.
+    """Perform continuous evaluation on checkpoints written to a model directory.

   Args:
     estimator: Estimator object to use for evaluation.
@@ -838,40 +925,45 @@ def continuous_eval(estimator, model_dir, input_fn, train_steps, name):
     name: Namescope for eval summary.
   """

-  def terminate_eval():
-    tf.logging.info('Terminating eval after 180 seconds of no checkpoints')
-    return True
-
-  for ckpt in contrib_training.checkpoints_iterator(
-      model_dir, min_interval_secs=180, timeout=None,
-      timeout_fn=terminate_eval):
-
-    tf.logging.info('Starting Evaluation.')
-    try:
-      eval_results = estimator.evaluate(
-          input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name)
-      tf.logging.info('Eval results: %s' % eval_results)
-
-      # Terminate eval job when final checkpoint is reached
-      current_step = int(os.path.basename(ckpt).split('-')[1])
-      if current_step >= train_steps:
-        tf.logging.info(
-            'Evaluation finished after training step %d' % current_step)
-        break
-
-    except tf.errors.NotFoundError:
-      tf.logging.info(
-          'Checkpoint %s no longer exists, skipping checkpoint' % ckpt)
-
-
-def populate_experiment(run_config,
-                        hparams,
-                        pipeline_config_path,
-                        train_steps=None,
-                        eval_steps=None,
-                        model_fn_creator=create_model_fn,
-                        **kwargs):
-  """Populates an `Experiment` object.
+    def terminate_eval():
+        tf.logging.info("Terminating eval after 180 seconds of no checkpoints")
+        return True
+
+    for ckpt in contrib_training.checkpoints_iterator(
+        model_dir, min_interval_secs=180, timeout=None, timeout_fn=terminate_eval
+    ):
+
+        tf.logging.info("Starting Evaluation.")
+        try:
+            eval_results = estimator.evaluate(
+                input_fn=input_fn, steps=None, checkpoint_path=ckpt, name=name
+            )
+            tf.logging.info("Eval results: %s" % eval_results)
+
+            # Terminate eval job when final checkpoint is reached
+            current_step = int(os.path.basename(ckpt).split("-")[1])
+            if current_step >= train_steps:
+                tf.logging.info(
+                    "Evaluation finished after training step %d" % current_step
+                )
+                break
+
+        except tf.errors.NotFoundError:
+            tf.logging.info(
+                "Checkpoint %s no longer exists, skipping checkpoint" % ckpt
+            )
+
+
+def populate_experiment(
+    run_config,
+    hparams,
+    pipeline_config_path,
+    train_steps=None,
+    eval_steps=None,
+    model_fn_creator=create_model_fn,
+    **kwargs
+):
+    """Populates an `Experiment` object.

   EXPERIMENT CLASS IS DEPRECATED. Please switch to
   tf.estimator.train_and_evaluate. As an example, see model_main.py.
@@ -900,35 +992,39 @@ def populate_experiment(run_config,
     An `Experiment` that defines all aspects of training, evaluation, and
     export.
   """
-  tf.logging.warning('Experiment is being deprecated. Please use '
-                     'tf.estimator.train_and_evaluate(). See model_main.py for '
-                     'an example.')
-  train_and_eval_dict = create_estimator_and_inputs(
-      run_config,
-      hparams,
-      pipeline_config_path,
-      train_steps=train_steps,
-      eval_steps=eval_steps,
-      model_fn_creator=model_fn_creator,
-      save_final_config=True,
-      **kwargs)
-  estimator = train_and_eval_dict['estimator']
-  train_input_fn = train_and_eval_dict['train_input_fn']
-  eval_input_fns = train_and_eval_dict['eval_input_fns']
-  predict_input_fn = train_and_eval_dict['predict_input_fn']
-  train_steps = train_and_eval_dict['train_steps']
-
-  export_strategies = [
-      contrib_learn.utils.saved_model_export_utils.make_export_strategy(
-          serving_input_fn=predict_input_fn)
-  ]
-
-  return contrib_learn.Experiment(
-      estimator=estimator,
-      train_input_fn=train_input_fn,
-      eval_input_fn=eval_input_fns[0],
-      train_steps=train_steps,
-      eval_steps=None,
-      export_strategies=export_strategies,
-      eval_delay_secs=120,
-  )
+    tf.logging.warning(
+        "Experiment is being deprecated. Please use "
+        "tf.estimator.train_and_evaluate(). See model_main.py for "
+        "an example."
+    )
+    train_and_eval_dict = create_estimator_and_inputs(
+        run_config,
+        hparams,
+        pipeline_config_path,
+        train_steps=train_steps,
+        eval_steps=eval_steps,
+        model_fn_creator=model_fn_creator,
+        save_final_config=True,
+        **kwargs
+    )
+    estimator = train_and_eval_dict["estimator"]
+    train_input_fn = train_and_eval_dict["train_input_fn"]
+    eval_input_fns = train_and_eval_dict["eval_input_fns"]
+    predict_input_fn = train_and_eval_dict["predict_input_fn"]
+    train_steps = train_and_eval_dict["train_steps"]
+
+    export_strategies = [
+        contrib_learn.utils.saved_model_export_utils.make_export_strategy(
+            serving_input_fn=predict_input_fn
+        )
+    ]
+
+    return contrib_learn.Experiment(
+        estimator=estimator,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fns[0],
+        train_steps=train_steps,
+        eval_steps=None,
+        export_strategies=export_strategies,
+        eval_delay_secs=120,
+    )
diff --git a/research/object_detection/model_main.py b/research/object_detection/model_main.py
index 5e8db1e5..1cab113e 100644
--- a/research/object_detection/model_main.py
+++ b/research/object_detection/model_main.py
@@ -24,6 +24,7 @@ import tensorflow as tf

 from object_detection import model_hparams
 from object_detection import model_lib
+from object_detection.hooks import train_hooks

 flags.DEFINE_string(
     'model_dir', None, 'Path to output model directory '
@@ -41,6 +42,11 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
                      'one of every n train input examples for evaluation, '
                      'where n is provided. This is only used if '
                      '`eval_training_data` is True.')
+flags.DEFINE_integer(
+    "throttle_secs", 900, "Do not re-evaluate unless the last"
+    "evaluation was started at least this many seconds ago. "
+    "Of course, evaluation does not occur if no new "
+    "checkpoints are available, hence, this is the minimum")
 flags.DEFINE_string(
     'hparams_overrides', None, 'Hyperparameter overrides, '
     'represented as a string containing comma-separated '
@@ -53,6 +59,20 @@ flags.DEFINE_boolean(
     'run_once', False, 'If running in eval-only mode, whether to run just '
     'one round of eval vs running continuously (default).'
 )
+flags.DEFINE_boolean(
+    "load_pretrained", True, "If loading pretrained model, otherwise"
+    "initialize weights randomly"
+)
+flags.DEFINE_float(
+    "sparsity", None, "Desired sparsity to achieve during training. If sparsity"
+    "is None then model pruning will not take place"
+)
+flags.DEFINE_integer(
+    "pruning_start_step", None, "Step at which pruning will start"
+)
+flags.DEFINE_integer(
+    "pruning_end_step", None, "Step at which pruning will stop"
+)
 FLAGS = flags.FLAGS


@@ -63,7 +83,8 @@ def main(unused_argv):

   train_and_eval_dict = model_lib.create_estimator_and_inputs(
       run_config=config,
-      hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
+      hparams=model_hparams.create_hparams(
+        FLAGS.load_pretrained, FLAGS.hparams_overrides),
       pipeline_config_path=FLAGS.pipeline_config_path,
       train_steps=FLAGS.num_train_steps,
       sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
@@ -93,13 +114,24 @@ def main(unused_argv):
       model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn,
                                 train_steps, name)
   else:
+    if FLAGS.sparsity:
+        model_pruning_hook = train_hooks.ModelPruningHook(
+            target_sparsity=FLAGS.sparsity,
+            start_step=FLAGS.pruning_start_step,
+            end_step=FLAGS.pruning_end_step
+        )
+        hooks = [model_pruning_hook]
+    else:
+        hooks = None
     train_spec, eval_specs = model_lib.create_train_and_eval_specs(
         train_input_fn,
         eval_input_fns,
         eval_on_train_input_fn,
         predict_input_fn,
         train_steps,
-        eval_on_train_data=False)
+        eval_on_train_data=False,
+        hooks=hooks,
+        throttle_secs=FLAGS.throttle_secs)

     # Currently only a single Eval Spec is allowed.
     tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])
diff --git a/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py b/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
index 3152c7af..d04aa3df 100644
--- a/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_inception_v2_feature_extractor.py
@@ -19,12 +19,13 @@ See "Rethinking the Inception Architecture for Computer Vision"
 https://arxiv.org/abs/1512.00567
 """
 import tensorflow as tf
-from tensorflow.contrib import slim as contrib_slim

 from object_detection.meta_architectures import faster_rcnn_meta_arch
 from nets import inception_v2
+from nets import masked_inception_v2

-slim = contrib_slim
+slim = tf.contrib.slim
+model_pruning = tf.contrib.model_pruning


 def _batch_norm_arg_scope(list_ops,
@@ -253,3 +254,212 @@ class FasterRCNNInceptionV2FeatureExtractor(
                 [branch_0, branch_1, branch_2, branch_3], concat_dim)

     return proposal_classifier_features
+
+
+class FasterRCNNMaskedInceptionV2FeatureExtractor(
+    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
+  """Faster R-CNN Masked Inception V2 feature extractor implementation.
+
+  This variant uses a masked version of InceptionV2 which contains both
+  auxiliary mask and threshold variables at each layer which will be used for
+  model sparsification during training.
+  """
+
+  def __init__(self,
+               is_training,
+               first_stage_features_stride,
+               batch_norm_trainable=False,
+               reuse_weights=None,
+               weight_decay=0.0,
+               depth_multiplier=1.0,
+               min_depth=16):
+    """Constructor.
+
+    Args:
+      is_training: See base class.
+      first_stage_features_stride: See base class.
+      batch_norm_trainable: See base class.
+      reuse_weights: See base class.
+      weight_decay: See base class.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+
+    Raises:
+      ValueError: If `first_stage_features_stride` is not 8 or 16.
+    """
+    if first_stage_features_stride != 8 and first_stage_features_stride != 16:
+      raise ValueError('`first_stage_features_stride` must be 8 or 16.')
+    self._depth_multiplier = depth_multiplier
+    self._min_depth = min_depth
+    super(FasterRCNNMaskedInceptionV2FeatureExtractor, self).__init__(
+        is_training, first_stage_features_stride, batch_norm_trainable,
+        reuse_weights, weight_decay)
+
+  def preprocess(self, resized_inputs):
+    """Faster R-CNN Inception V2 preprocessing.
+
+    Maps pixel values to the range [-1, 1].
+
+    Args:
+      resized_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+
+    Returns:
+      preprocessed_inputs: a [batch, height, width, channels] float tensor
+        representing a batch of images.
+    """
+    return (2.0 / 255.0) * resized_inputs - 1.0
+
+  def _extract_proposal_features(self, preprocessed_inputs, scope):
+    """Extracts first stage RPN features.
+
+    Args:
+      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
+        representing a batch of images.
+      scope: A scope name.
+
+    Returns:
+      rpn_feature_map: A tensor with shape [batch, height, width, depth]
+      activations: A dictionary mapping feature extractor tensor names to
+        tensors
+
+    Raises:
+      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
+        (height or width) is less than 33.
+      ValueError: If the created network is missing the required activation.
+    """
+
+    preprocessed_inputs.get_shape().assert_has_rank(4)
+    shape_assert = tf.Assert(
+        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
+                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
+        ['image size must at least be 33 in both height and width.'])
+
+    with tf.control_dependencies([shape_assert]):
+      with tf.variable_scope('InceptionV2',
+                             reuse=self._reuse_weights) as scope:
+        with _batch_norm_arg_scope(
+          [model_pruning.masked_conv2d, slim.separable_conv2d],
+          batch_norm_scale=True, train_batch_norm=self._train_batch_norm):
+          _, activations = masked_inception_v2.masked_inception_v2_base(
+              preprocessed_inputs,
+              final_endpoint='Mixed_4e',
+              min_depth=self._min_depth,
+              depth_multiplier=self._depth_multiplier,
+              scope=scope)
+
+    return activations['Mixed_4e'], activations
+
+  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
+    """Extracts second stage box classifier features.
+
+    Args:
+      proposal_feature_maps: A 4-D float tensor with shape
+        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
+        representing the feature map cropped to each proposal.
+      scope: A scope name (unused).
+
+    Returns:
+      proposal_classifier_features: A 4-D float tensor with shape
+        [batch_size * self.max_num_proposals, height, width, depth]
+        representing box classifier features for each proposal.
+    """
+    net = proposal_feature_maps
+
+    depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
+    trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
+
+    data_format = 'NHWC'
+    concat_dim = 3 if data_format == 'NHWC' else 1
+
+    with tf.variable_scope('InceptionV2', reuse=self._reuse_weights):
+      with slim.arg_scope(
+          [model_pruning.masked_conv2d, slim.max_pool2d, slim.avg_pool2d],
+          stride=1,
+          padding='SAME',
+          data_format=data_format):
+        with _batch_norm_arg_scope(
+          [model_pruning.masked_conv2d, slim.separable_conv2d],
+          batch_norm_scale=True, train_batch_norm=self._train_batch_norm):
+
+          with tf.variable_scope('Mixed_5a'):
+            with tf.variable_scope('Branch_0'):
+              branch_0 = model_pruning.masked_conv2d(
+                  net, depth(128), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_0 = model_pruning.masked_conv2d(
+                branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+            with tf.variable_scope('Branch_1'):
+              branch_1 = model_pruning.masked_conv2d(
+                  net, depth(192), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_1 = model_pruning.masked_conv2d(
+                branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3')
+              branch_1 = model_pruning.masked_conv2d(
+                branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+            with tf.variable_scope('Branch_2'):
+              branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
+                                         scope='MaxPool_1a_3x3')
+            net = tf.concat([branch_0, branch_1, branch_2], concat_dim)
+
+          with tf.variable_scope('Mixed_5b'):
+            with tf.variable_scope('Branch_0'):
+              branch_0 = model_pruning.masked_conv2d(
+                net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
+            with tf.variable_scope('Branch_1'):
+              branch_1 = model_pruning.masked_conv2d(
+                  net, depth(192), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_1 = model_pruning.masked_conv2d(
+                branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3')
+            with tf.variable_scope('Branch_2'):
+              branch_2 = model_pruning.masked_conv2d(
+                  net, depth(160), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_2 = model_pruning.masked_conv2d(
+                branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3')
+              branch_2 = model_pruning.masked_conv2d(
+                branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3')
+            with tf.variable_scope('Branch_3'):
+              branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+              branch_3 = model_pruning.masked_conv2d(
+                  branch_3, depth(128), [1, 1],
+                  weights_initializer=trunc_normal(0.1),
+                  scope='Conv2d_0b_1x1')
+            net = tf.concat([branch_0, branch_1, branch_2, branch_3],
+                            concat_dim)
+
+          with tf.variable_scope('Mixed_5c'):
+            with tf.variable_scope('Branch_0'):
+              branch_0 = model_pruning.masked_conv2d(
+                net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
+            with tf.variable_scope('Branch_1'):
+              branch_1 = model_pruning.masked_conv2d(
+                  net, depth(192), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_1 = model_pruning.masked_conv2d(
+                branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3')
+            with tf.variable_scope('Branch_2'):
+              branch_2 = model_pruning.masked_conv2d(
+                  net, depth(192), [1, 1],
+                  weights_initializer=trunc_normal(0.09),
+                  scope='Conv2d_0a_1x1')
+              branch_2 = model_pruning.masked_conv2d(
+                branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3')
+              branch_2 = model_pruning.masked_conv2d(
+                branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3')
+            with tf.variable_scope('Branch_3'):
+              branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
+              branch_3 = model_pruning.masked_conv2d(
+                  branch_3, depth(128), [1, 1],
+                  weights_initializer=trunc_normal(0.1),
+                  scope='Conv2d_0b_1x1')
+            proposal_classifier_features = tf.concat(
+                [branch_0, branch_1, branch_2, branch_3], concat_dim)
+
+    return proposal_classifier_features
diff --git a/research/slim/nets/inception.py b/research/slim/nets/inception.py
index b69cd2aa..2a9eb369 100644
--- a/research/slim/nets/inception.py
+++ b/research/slim/nets/inception.py
@@ -34,4 +34,10 @@ from nets.inception_v3 import inception_v3_base
 from nets.inception_v4 import inception_v4
 from nets.inception_v4 import inception_v4_arg_scope
 from nets.inception_v4 import inception_v4_base
+
+# Masked inception models
+from nets.masked_inception_v2 import masked_inception_v2
+from nets.masked_inception_v2 import masked_inception_v2_base
+from nets.masked_inception_v2 import masked_inception_v2_arg_scope
+
 # pylint: enable=unused-import
diff --git a/research/slim/nets/inception_utils.py b/research/slim/nets/inception_utils.py
index 493a684c..7f19c64c 100644
--- a/research/slim/nets/inception_utils.py
+++ b/research/slim/nets/inception_utils.py
@@ -25,19 +25,18 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
-from tensorflow.contrib import slim as contrib_slim

-slim = contrib_slim
+slim = tf.contrib.slim
+model_pruning = tf.contrib.model_pruning


-def inception_arg_scope(
-    weight_decay=0.00004,
-    use_batch_norm=True,
-    batch_norm_decay=0.9997,
-    batch_norm_epsilon=0.001,
-    activation_fn=tf.nn.relu,
-    batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS,
-    batch_norm_scale=False):
+def inception_arg_scope(weight_decay=0.00004,
+                        use_batch_norm=True,
+                        batch_norm_decay=0.9997,
+                        batch_norm_epsilon=0.001,
+                        activation_fn=tf.nn.relu,
+                        batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
+                        batch_norm_scale=False):
   """Defines the default arg scope for inception models.

   Args:
@@ -82,3 +81,57 @@ def inception_arg_scope(
         normalizer_fn=normalizer_fn,
         normalizer_params=normalizer_params) as sc:
       return sc
+
+
+def masked_inception_arg_scope(weight_decay=0.00004,
+                               use_batch_norm=True,
+                               batch_norm_decay=0.9997,
+                               batch_norm_epsilon=0.001,
+                               activation_fn=tf.nn.relu,
+                               batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
+                               batch_norm_scale=False):
+  """Defines the default arg scope for masked inception models.
+
+  Args:
+    weight_decay: The weight decay to use for regularizing the model.
+    use_batch_norm: "If `True`, batch_norm is applied after each convolution.
+    batch_norm_decay: Decay for batch norm moving average.
+    batch_norm_epsilon: Small float added to variance to avoid dividing by zero
+      in batch norm.
+    activation_fn: Activation function for conv2d.
+    batch_norm_updates_collections: Collection for the update ops for
+      batch norm.
+    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
+      activations in the batch normalization layer.
+
+  Returns:
+    An `arg_scope` to use for the inception models.
+  """
+  batch_norm_params = {
+      # Decay for the moving averages.
+      'decay': batch_norm_decay,
+      # epsilon to prevent 0s in variance.
+      'epsilon': batch_norm_epsilon,
+      # collection containing update_ops.
+      'updates_collections': batch_norm_updates_collections,
+      # use fused batch norm if possible.
+      'fused': None,
+      'scale': batch_norm_scale,
+  }
+  if use_batch_norm:
+    normalizer_fn = slim.batch_norm
+    normalizer_params = batch_norm_params
+  else:
+    normalizer_fn = None
+    normalizer_params = {}
+  # Set weight_decay for weights in Conv and FC layers.
+  with slim.arg_scope(
+    [model_pruning.masked_conv2d, model_pruning.masked_fully_connected],
+    weights_regularizer=slim.l2_regularizer(weight_decay)):
+    with slim.arg_scope(
+        [model_pruning.masked_conv2d],
+        weights_initializer=slim.variance_scaling_initializer(),
+        activation_fn=activation_fn,
+        normalizer_fn=normalizer_fn,
+        normalizer_params=normalizer_params) as sc:
+      return sc
diff --git a/research/slim/nets/masked_inception_v2.py b/research/slim/nets/masked_inception_v2.py
new file mode 100644
index 00000000..ae2e7cdf
--- /dev/null
+++ b/research/slim/nets/masked_inception_v2.py
@@ -0,0 +1,596 @@
+"""Contains the definition for inception v2 with masked layers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from nets import inception_utils
+
+slim = tf.contrib.slim
+model_pruning = tf.contrib.model_pruning
+trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
+
+
+def masked_inception_v2_base(inputs,
+                             final_endpoint='Mixed_5c',
+                             min_depth=16,
+                             depth_multiplier=1.0,
+                             use_separable_conv=True,
+                             data_format='NHWC',
+                             include_root_block=True,
+                             scope=None):
+  """Masked Inception v2 (6a2).
+
+  Constructs an Inception v2 network from inputs to the given final endpoint.
+  This method can construct the network up to the layer inception(5b) as
+  described in http://arxiv.org/abs/1502.03167.
+
+  However, in this implementation instead of using tf.layers.conv2d we use
+  the tf.contrib.model_pruning variant layers.masked_conv2d which includes
+  auxilary mask and threshold variables used in model sparsification. These
+  additional variables are to be removed after training is completed.
+
+
+  Args:
+    inputs: a tensor of shape [batch_size, height, width, channels].
+    final_endpoint: specifies the endpoint to construct the network up to. It
+      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
+      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
+      'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
+      'Mixed_5c']. If include_root_block is False, ['Conv2d_1a_7x7',
+      'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will
+      not be available.
+    min_depth: Minimum depth value (number of channels) for all convolution ops.
+      Enforced when depth_multiplier < 1, and not an active constraint when
+      depth_multiplier >= 1.
+    depth_multiplier: Float multiplier for the depth (number of channels)
+      for all convolution ops. The value must be greater than zero. Typical
+      usage will be to set this value in (0, 1) to reduce the number of
+      parameters or computation cost of the model.
+    use_separable_conv: Use a separable convolution for the first layer
+      Conv2d_1a_7x7. If this is False, use a normal convolution instead.
+    data_format: Data format of the activations ('NHWC' or 'NCHW').
+    include_root_block: If True, include the convolution and max-pooling layers
+      before the inception modules. If False, excludes those layers.
+    scope: Optional variable_scope.
+
+  Returns:
+    tensor_out: output tensor corresponding to the final_endpoint.
+    end_points: a set of activations for external use, for example summaries or
+                losses.
+
+  Raises:
+    ValueError: if final_endpoint is not set to one of the predefined values,
+                or depth_multiplier <= 0
+  """
+
+  # end_points will collect relevant activations for external use, for example
+  # summaries or losses.
+  end_points = {}
+
+  # Used to find thinned depths for each layer.
+  if depth_multiplier <= 0:
+    raise ValueError('depth_multiplier is not greater than zero.')
+  depth = lambda d: max(int(d * depth_multiplier), min_depth)
+
+  if data_format != 'NHWC' and data_format != 'NCHW':
+    raise ValueError('data_format must be either NHWC or NCHW.')
+  if data_format == 'NCHW' and use_separable_conv:
+    raise ValueError(
+        'separable convolution only supports NHWC layout. NCHW data format can'
+        ' only be used when use_separable_conv is False.'
+    )
+
+  concat_dim = 3 if data_format == 'NHWC' else 1
+  with tf.variable_scope(scope, 'InceptionV2', [inputs]):
+    with slim.arg_scope(
+        [model_pruning.masked_conv2d, slim.max_pool2d, slim.avg_pool2d],
+        stride=1,
+        padding='SAME',
+        data_format=data_format):
+
+      net = inputs
+      if include_root_block:
+        # Note that sizes in the comments below assume an input spatial size of
+        # 224x224, however, the inputs can be of any size greater 32x32.
+
+        # 224 x 224 x 3
+        end_point = 'Conv2d_1a_7x7'
+
+        if use_separable_conv:
+          # depthwise_multiplier here is different from depth_multiplier.
+          # depthwise_multiplier determines the output channels of the initial
+          # depthwise conv (see docs for tf.nn.separable_conv2d), while
+          # depth_multiplier controls the # channels of the subsequent 1x1
+          # convolution. Must have
+          #   in_channels * depthwise_multipler <= out_channels
+          # so that the separable convolution is not overparameterized.
+          depthwise_multiplier = min(int(depth(64) / 3), 8)
+          net = slim.separable_conv2d(
+              inputs,
+              depth(64), [7, 7],
+              depth_multiplier=depthwise_multiplier,
+              stride=2,
+              padding='SAME',
+              weights_initializer=trunc_normal(1.0),
+              scope=end_point)
+        else:
+          # Use a normal convolution instead of a separable convolution.
+          net = model_pruning.masked_conv2d(
+              inputs,
+              depth(64), [7, 7],
+              stride=2,
+              weights_initializer=trunc_normal(1.0),
+              scope=end_point)
+        end_points[end_point] = net
+        if end_point == final_endpoint:
+          return net, end_points
+        # 112 x 112 x 64
+        end_point = 'MaxPool_2a_3x3'
+        net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
+        end_points[end_point] = net
+        if end_point == final_endpoint:
+          return net, end_points
+        # 56 x 56 x 64
+        end_point = 'Conv2d_2b_1x1'
+        net = model_pruning.masked_conv2d(
+            net,
+            depth(64), [1, 1],
+            scope=end_point,
+            weights_initializer=trunc_normal(0.1))
+        end_points[end_point] = net
+        if end_point == final_endpoint:
+          return net, end_points
+        # 56 x 56 x 64
+        end_point = 'Conv2d_2c_3x3'
+        net = model_pruning.masked_conv2d(
+          net, depth(192), [3, 3], scope=end_point)
+        end_points[end_point] = net
+        if end_point == final_endpoint:
+          return net, end_points
+        # 56 x 56 x 192
+        end_point = 'MaxPool_3a_3x3'
+        net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
+        end_points[end_point] = net
+        if end_point == final_endpoint:
+          return net, end_points
+
+      # 28 x 28 x 192
+      # Inception module.
+      end_point = 'Mixed_3b'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(64), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(32), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 28 x 28 x 256
+      end_point = 'Mixed_3c'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 28 x 28 x 320
+      end_point = 'Mixed_4a'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+              net, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_0 = model_pruning.masked_conv2d(
+            branch_0, depth(160), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
+          branch_1 = model_pruning.masked_conv2d(
+              branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = slim.max_pool2d(
+              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
+        net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 14 x 14 x 576
+      end_point = 'Mixed_4b'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(64), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(96), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 14 x 14 x 576
+      end_point = 'Mixed_4c'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(96), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(128), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(96), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 14 x 14 x 576
+      end_point = 'Mixed_4d'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(160), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(160), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(160), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(96), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 14 x 14 x 576
+      end_point = 'Mixed_4e'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(192), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(160), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(192), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(192), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(96), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 14 x 14 x 576
+      end_point = 'Mixed_5a'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+              net, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_0 = model_pruning.masked_conv2d(
+            branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(192), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
+                                     scope='MaxPool_1a_3x3')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 7 x 7 x 1024
+      end_point = 'Mixed_5b'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(192), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(160), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+      # 7 x 7 x 1024
+      end_point = 'Mixed_5c'
+      with tf.variable_scope(end_point):
+        with tf.variable_scope('Branch_0'):
+          branch_0 = model_pruning.masked_conv2d(
+            net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
+        with tf.variable_scope('Branch_1'):
+          branch_1 = model_pruning.masked_conv2d(
+              net, depth(192), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_1 = model_pruning.masked_conv2d(
+            branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3')
+        with tf.variable_scope('Branch_2'):
+          branch_2 = model_pruning.masked_conv2d(
+              net, depth(192), [1, 1],
+              weights_initializer=trunc_normal(0.09),
+              scope='Conv2d_0a_1x1')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3')
+          branch_2 = model_pruning.masked_conv2d(
+            branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3')
+        with tf.variable_scope('Branch_3'):
+          branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
+          branch_3 = model_pruning.masked_conv2d(
+              branch_3, depth(128), [1, 1],
+              weights_initializer=trunc_normal(0.1),
+              scope='Conv2d_0b_1x1')
+        net = tf.concat(
+            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
+        end_points[end_point] = net
+        if end_point == final_endpoint: return net, end_points
+    raise ValueError('Unknown final endpoint %s' % final_endpoint)
+
+
+def masked_inception_v2(inputs,
+                        num_classes=1000,
+                        is_training=True,
+                        dropout_keep_prob=0.8,
+                        min_depth=16,
+                        depth_multiplier=1.0,
+                        prediction_fn=slim.softmax,
+                        spatial_squeeze=True,
+                        reuse=None,
+                        scope='InceptionV2',
+                        global_pool=False):
+  """Masked Inception v2 model for classification.
+
+  Constructs an Inception v2 network for classification as described in
+  http://arxiv.org/abs/1502.03167.
+
+  The default image size used to train this network is 224x224.
+
+  However, in this implementation instead of using tf.layers.conv2d we use
+  the tf.contrib.model_pruning variant layers.masked_conv2d which includes
+  auxilary mask and threshold variables used in model sparsification. These
+  additional variables are to be removed after training is completed.
+
+  Args:
+    inputs: a tensor of shape [batch_size, height, width, channels].
+    num_classes: number of predicted classes. If 0 or None, the logits layer
+      is omitted and the input features to the logits layer (before dropout)
+      are returned instead.
+    is_training: whether is training or not.
+    dropout_keep_prob: the percentage of activation values that are retained.
+    min_depth: Minimum depth value (number of channels) for all convolution ops.
+      Enforced when depth_multiplier < 1, and not an active constraint when
+      depth_multiplier >= 1.
+    depth_multiplier: Float multiplier for the depth (number of channels)
+      for all convolution ops. The value must be greater than zero. Typical
+      usage will be to set this value in (0, 1) to reduce the number of
+      parameters or computation cost of the model.
+    prediction_fn: a function to get predictions out of logits.
+    spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
+        shape [B, 1, 1, C], where B is batch_size and C is number of classes.
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+    global_pool: Optional boolean flag to control the avgpooling before the
+      logits layer. If false or unset, pooling is done with a fixed window
+      that reduces default-sized inputs to 1x1, while larger inputs lead to
+      larger outputs. If true, any input size is pooled down to 1x1.
+
+  Returns:
+    net: a Tensor with the logits (pre-softmax activations) if num_classes
+      is a non-zero integer, or the non-dropped-out input to the logits layer
+      if num_classes is 0 or None.
+    end_points: a dictionary from components of the network to the corresponding
+      activation.
+
+  Raises:
+    ValueError: if final_endpoint is not set to one of the predefined values,
+                or depth_multiplier <= 0
+  """
+  if depth_multiplier <= 0:
+    raise ValueError('depth_multiplier is not greater than zero.')
+
+  # Final pooling and prediction
+  with tf.variable_scope(scope, 'InceptionV2', [inputs], reuse=reuse) as scope:
+    with slim.arg_scope([slim.batch_norm, slim.dropout],
+                        is_training=is_training):
+      net, end_points = masked_inception_v2_base(
+          inputs, scope=scope, min_depth=min_depth,
+          depth_multiplier=depth_multiplier)
+      with tf.variable_scope('Logits'):
+        if global_pool:
+          # Global average pooling.
+          net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
+          end_points['global_pool'] = net
+        else:
+          # Pooling with a fixed kernel size.
+          kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
+          net = slim.avg_pool2d(net, kernel_size, padding='VALID',
+                                scope='AvgPool_1a_{}x{}'.format(*kernel_size))
+          end_points['AvgPool_1a'] = net
+        if not num_classes:
+          return net, end_points
+        # 1 x 1 x 1024
+        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
+        logits = model_pruning.masked_conv2d(
+          net, num_classes, [1, 1], activation_fn=None,
+          normalizer_fn=None, scope='Conv2d_1c_1x1')
+        if spatial_squeeze:
+          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
+      end_points['Logits'] = logits
+      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
+  return logits, end_points
+masked_inception_v2.default_image_size = 224
+
+
+def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
+  """Define kernel size which is automatically reduced for small input.
+
+  If the shape of the input images is unknown at graph construction time this
+  function assumes that the input images are is large enough.
+
+  Args:
+    input_tensor: input tensor of size [batch_size, height, width, channels].
+    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
+
+  Returns:
+    a tensor with the kernel size.
+
+  TODO(jrru): Make this function work with unknown shapes. Theoretically, this
+  can be done with the code below. Problems are two-fold: (1) If the shape was
+  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
+  handle tensors that define the kernel size.
+      shape = tf.shape(input_tensor)
+      return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
+                         tf.minimum(shape[2], kernel_size[1])])
+
+  """
+  shape = input_tensor.get_shape().as_list()
+  if shape[1] is None or shape[2] is None:
+    kernel_size_out = kernel_size
+  else:
+    kernel_size_out = [min(shape[1], kernel_size[0]),
+                       min(shape[2], kernel_size[1])]
+  return kernel_size_out
+
+
+masked_inception_v2_arg_scope = inception_utils.masked_inception_arg_scope
diff --git a/research/slim/nets/masked_inception_v2_test.py b/research/slim/nets/masked_inception_v2_test.py
new file mode 100644
index 00000000..00e274ed
--- /dev/null
+++ b/research/slim/nets/masked_inception_v2_test.py
@@ -0,0 +1,396 @@
+"""Test for nets.masked_inception_v2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+from nets import inception
+
+slim = tf.contrib.slim
+model_pruning = tf.contrib.model_pruning
+
+
+class MaskedInceptionV2Test(tf.test.TestCase):
+
+  def testBuildClassificationNetwork(self):
+    batch_size = 5
+    height, width = 224, 224
+    num_classes = 1000
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    logits, end_points = inception.masked_inception_v2(inputs, num_classes)
+    self.assertTrue(logits.op.name.startswith(
+        'InceptionV2/Logits/SpatialSqueeze'))
+    self.assertListEqual(logits.get_shape().as_list(),
+                         [batch_size, num_classes])
+    self.assertTrue('Predictions' in end_points)
+    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
+                         [batch_size, num_classes])
+
+  def testBuildPreLogitsNetwork(self):
+    batch_size = 5
+    height, width = 224, 224
+    num_classes = None
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    net, end_points = inception.masked_inception_v2(inputs, num_classes)
+    self.assertTrue(net.op.name.startswith('InceptionV2/Logits/AvgPool'))
+    self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
+    self.assertFalse('Logits' in end_points)
+    self.assertFalse('Predictions' in end_points)
+
+  def testBuildBaseNetwork(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    mixed_5c, end_points = inception.masked_inception_v2_base(inputs)
+    self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))
+    self.assertListEqual(mixed_5c.get_shape().as_list(),
+                         [batch_size, 7, 7, 1024])
+    expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',
+                          'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',
+                          'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',
+                          'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
+                          'MaxPool_3a_3x3']
+    self.assertItemsEqual(end_points.keys(), expected_endpoints)
+
+  def testBuildOnlyUptoFinalEndpoint(self):
+    batch_size = 5
+    height, width = 224, 224
+    endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
+                 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
+                 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
+                 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']
+    for index, endpoint in enumerate(endpoints):
+      with tf.Graph().as_default():
+        inputs = tf.random_uniform((batch_size, height, width, 3))
+        out_tensor, end_points = inception.masked_inception_v2_base(
+            inputs, final_endpoint=endpoint)
+        self.assertTrue(out_tensor.op.name.startswith(
+            'InceptionV2/' + endpoint))
+        self.assertItemsEqual(endpoints[:index+1], end_points.keys())
+
+  def testBuildAndCheckAllEndPointsUptoMixed5c(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.masked_inception_v2_base(inputs,
+                                                final_endpoint='Mixed_5c')
+    endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],
+                        'Mixed_3c': [batch_size, 28, 28, 320],
+                        'Mixed_4a': [batch_size, 14, 14, 576],
+                        'Mixed_4b': [batch_size, 14, 14, 576],
+                        'Mixed_4c': [batch_size, 14, 14, 576],
+                        'Mixed_4d': [batch_size, 14, 14, 576],
+                        'Mixed_4e': [batch_size, 14, 14, 576],
+                        'Mixed_5a': [batch_size, 7, 7, 1024],
+                        'Mixed_5b': [batch_size, 7, 7, 1024],
+                        'Mixed_5c': [batch_size, 7, 7, 1024],
+                        'Conv2d_1a_7x7': [batch_size, 112, 112, 64],
+                        'MaxPool_2a_3x3': [batch_size, 56, 56, 64],
+                        'Conv2d_2b_1x1': [batch_size, 56, 56, 64],
+                        'Conv2d_2c_3x3': [batch_size, 56, 56, 192],
+                        'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}
+    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
+    for endpoint_name in endpoints_shapes:
+      expected_shape = endpoints_shapes[endpoint_name]
+      self.assertTrue(endpoint_name in end_points)
+      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
+                           expected_shape)
+
+  def testModelHasExpectedNumberOfParameters(self):
+    batch_size = 5
+    height, width = 224, 224
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    with slim.arg_scope(inception.masked_inception_v2_arg_scope()):
+      inception.masked_inception_v2_base(inputs)
+    total_params, _ = slim.model_analyzer.analyze_vars(
+        slim.get_model_variables())
+    # NOTE: since we're adding mask and threshold variables equivalent to all
+    # convolutional kernels, we're roughly doubling the size of the original
+    # inception_v2 network.
+    self.assertAlmostEqual(20313852, total_params)
+
+  def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
+    batch_size = 5
+    height, width = 224, 224
+    num_classes = 1000
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.masked_inception_v2(inputs, num_classes)
+
+    endpoint_keys = [key for key in end_points.keys()
+                     if key.startswith('Mixed') or key.startswith('Conv')]
+
+    _, end_points_with_multiplier = inception.masked_inception_v2(
+        inputs, num_classes, scope='depth_multiplied_net',
+        depth_multiplier=0.5)
+
+    for key in endpoint_keys:
+      original_depth = end_points[key].get_shape().as_list()[3]
+      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
+      self.assertEqual(0.5 * original_depth, new_depth)
+
+  def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
+    batch_size = 5
+    height, width = 224, 224
+    num_classes = 1000
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.masked_inception_v2(inputs, num_classes)
+
+    endpoint_keys = [key for key in end_points.keys()
+                     if key.startswith('Mixed') or key.startswith('Conv')]
+
+    _, end_points_with_multiplier = inception.masked_inception_v2(
+        inputs, num_classes, scope='depth_multiplied_net',
+        depth_multiplier=2.0)
+
+    for key in endpoint_keys:
+      original_depth = end_points[key].get_shape().as_list()[3]
+      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
+      self.assertEqual(2.0 * original_depth, new_depth)
+
+  def testRaiseValueErrorWithInvalidDepthMultiplier(self):
+    batch_size = 5
+    height, width = 224, 224
+    num_classes = 1000
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    with self.assertRaises(ValueError):
+      _ = inception.masked_inception_v2(inputs, num_classes, depth_multiplier=-0.1)
+    with self.assertRaises(ValueError):
+      _ = inception.masked_inception_v2(inputs, num_classes, depth_multiplier=0.0)
+
+  def testBuildEndPointsWithUseSeparableConvolutionFalse(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.masked_inception_v2_base(inputs)
+
+    endpoint_keys = [
+        key for key in end_points.keys()
+        if key.startswith('Mixed') or key.startswith('Conv')
+    ]
+
+    _, end_points_with_replacement = inception.masked_inception_v2_base(
+        inputs, use_separable_conv=False)
+
+    # The endpoint shapes must be equal to the original shape even when the
+    # separable convolution is replaced with a normal convolution.
+    for key in endpoint_keys:
+      original_shape = end_points[key].get_shape().as_list()
+      self.assertTrue(key in end_points_with_replacement)
+      new_shape = end_points_with_replacement[key].get_shape().as_list()
+      self.assertListEqual(original_shape, new_shape)
+
+  def testBuildEndPointsNCHWDataFormat(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    _, end_points = inception.masked_inception_v2_base(inputs)
+
+    endpoint_keys = [
+        key for key in end_points.keys()
+        if key.startswith('Mixed') or key.startswith('Conv')
+    ]
+
+    inputs_in_nchw = tf.random_uniform((batch_size, 3, height, width))
+    _, end_points_with_replacement = inception.masked_inception_v2_base(
+        inputs_in_nchw, use_separable_conv=False, data_format='NCHW')
+
+    # With the 'NCHW' data format, all endpoint activations have a transposed
+    # shape from the original shape with the 'NHWC' layout.
+    for key in endpoint_keys:
+      transposed_original_shape = tf.transpose(
+          end_points[key], [0, 3, 1, 2]).get_shape().as_list()
+      self.assertTrue(key in end_points_with_replacement)
+      new_shape = end_points_with_replacement[key].get_shape().as_list()
+      self.assertListEqual(transposed_original_shape, new_shape)
+
+  def testBuildErrorsForDataFormats(self):
+    batch_size = 5
+    height, width = 224, 224
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+
+    # 'NCWH' data format is not supported.
+    with self.assertRaises(ValueError):
+      _ = inception.masked_inception_v2_base(inputs, data_format='NCWH')
+
+    # 'NCHW' data format is not supported for separable convolution.
+    with self.assertRaises(ValueError):
+      _ = inception.masked_inception_v2_base(inputs, data_format='NCHW')
+
+  def testHalfSizeImages(self):
+    batch_size = 5
+    height, width = 112, 112
+    num_classes = 1000
+
+    inputs = tf.random_uniform((batch_size, height, width, 3))
+    logits, end_points = inception.masked_inception_v2(inputs, num_classes)
+    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
+    self.assertListEqual(logits.get_shape().as_list(),
+                         [batch_size, num_classes])
+    pre_pool = end_points['Mixed_5c']
+    self.assertListEqual(pre_pool.get_shape().as_list(),
+                         [batch_size, 4, 4, 1024])
+
+  def testBuildBaseNetworkWithoutRootBlock(self):
+    batch_size = 5
+    height, width = 28, 28
+    channels = 192
+
+    inputs = tf.random_uniform((batch_size, height, width, channels))
+    _, end_points = inception.masked_inception_v2_base(
+        inputs, include_root_block=False)
+    endpoints_shapes = {
+        'Mixed_3b': [batch_size, 28, 28, 256],
+        'Mixed_3c': [batch_size, 28, 28, 320],
+        'Mixed_4a': [batch_size, 14, 14, 576],
+        'Mixed_4b': [batch_size, 14, 14, 576],
+        'Mixed_4c': [batch_size, 14, 14, 576],
+        'Mixed_4d': [batch_size, 14, 14, 576],
+        'Mixed_4e': [batch_size, 14, 14, 576],
+        'Mixed_5a': [batch_size, 7, 7, 1024],
+        'Mixed_5b': [batch_size, 7, 7, 1024],
+        'Mixed_5c': [batch_size, 7, 7, 1024]
+    }
+    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
+    for endpoint_name in endpoints_shapes:
+      expected_shape = endpoints_shapes[endpoint_name]
+      self.assertTrue(endpoint_name in end_points)
+      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
+                           expected_shape)
+
+  def testUnknownImageShape(self):
+    tf.reset_default_graph()
+    batch_size = 2
+    height, width = 224, 224
+    num_classes = 1000
+    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
+    with self.test_session() as sess:
+      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
+      logits, end_points = inception.masked_inception_v2(inputs, num_classes)
+      self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
+      self.assertListEqual(logits.get_shape().as_list(),
+                           [batch_size, num_classes])
+      pre_pool = end_points['Mixed_5c']
+      feed_dict = {inputs: input_np}
+      tf.global_variables_initializer().run()
+      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
+      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
+
+  def testGlobalPoolUnknownImageShape(self):
+    tf.reset_default_graph()
+    batch_size = 1
+    height, width = 250, 300
+    num_classes = 1000
+    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
+    with self.test_session() as sess:
+      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
+      logits, end_points = inception.masked_inception_v2(inputs, num_classes,
+                                                  global_pool=True)
+      self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
+      self.assertListEqual(logits.get_shape().as_list(),
+                           [batch_size, num_classes])
+      pre_pool = end_points['Mixed_5c']
+      feed_dict = {inputs: input_np}
+      tf.global_variables_initializer().run()
+      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
+      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 10, 1024])
+
+  def testUnknowBatchSize(self):
+    batch_size = 1
+    height, width = 224, 224
+    num_classes = 1000
+
+    inputs = tf.placeholder(tf.float32, (None, height, width, 3))
+    logits, _ = inception.masked_inception_v2(inputs, num_classes)
+    self.assertTrue(logits.op.name.startswith('MaskedInceptionV2/Logits'))
+    self.assertListEqual(logits.get_shape().as_list(),
+                         [None, num_classes])
+    images = tf.random_uniform((batch_size, height, width, 3))
+
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(logits, {inputs: images.eval()})
+      self.assertEquals(output.shape, (batch_size, num_classes))
+
+  def testEvaluation(self):
+    batch_size = 2
+    height, width = 224, 224
+    num_classes = 1000
+
+    eval_inputs = tf.random_uniform((batch_size, height, width, 3))
+    logits, _ = inception.masked_inception_v2(eval_inputs, num_classes,
+                                       is_training=False)
+    predictions = tf.argmax(logits, 1)
+
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(predictions)
+      self.assertEquals(output.shape, (batch_size,))
+
+  def testTrainEvalWithReuse(self):
+    train_batch_size = 5
+    eval_batch_size = 2
+    height, width = 150, 150
+    num_classes = 1000
+
+    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
+    inception.masked_inception_v2(train_inputs, num_classes)
+    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
+    logits, _ = inception.masked_inception_v2(eval_inputs, num_classes, reuse=True)
+    predictions = tf.argmax(logits, 1)
+
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      output = sess.run(predictions)
+      self.assertEquals(output.shape, (eval_batch_size,))
+
+  def testLogitsNotSqueezed(self):
+    num_classes = 25
+    images = tf.random_uniform([1, 224, 224, 3])
+    logits, _ = inception.masked_inception_v2(images,
+                                       num_classes=num_classes,
+                                       spatial_squeeze=False)
+
+    with self.test_session() as sess:
+      tf.global_variables_initializer().run()
+      logits_out = sess.run(logits)
+      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
+
+  def testNoBatchNormScaleByDefault(self):
+    height, width = 224, 224
+    num_classes = 1000
+    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
+    with slim.arg_scope(inception.masked_inception_v2_arg_scope()):
+      inception.masked_inception_v2(inputs, num_classes, is_training=False)
+
+    self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
+
+  def testBatchNormScale(self):
+    height, width = 224, 224
+    num_classes = 1000
+    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
+    with slim.arg_scope(
+        inception.masked_inception_v2_arg_scope(batch_norm_scale=True)):
+      inception.masked_inception_v2(inputs, num_classes, is_training=False)
+
+    gamma_names = set(
+        v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
+    self.assertGreater(len(gamma_names), 0)
+    for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
+      self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
+
+
+if __name__ == '__main__':
+  tf.test.main()