From f6a81cc2f524bf3d2e9cb1923d101f1308e4711b Mon Sep 17 00:00:00 2001 From: "Hongyu, Chiu" <20734616+james77777778@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:44:07 +0800 Subject: [PATCH] Bring back loss information for multiple outputs (#20023) * Bring back loss info for multiple outputs * Fix CI * Update torch eval mode * Add SymbolicScope * Minor updates and add tests * Address comment * Fix TorchWrapper of training args --- keras/__init__.py | 1 + keras/api/__init__.py | 1 + keras/api/_tf_keras/keras/__init__.py | 1 + keras/src/backend/__init__.py | 2 + keras/src/backend/common/symbolic_scope.py | 23 +++++++ .../src/backend/common/symbolic_scope_test.py | 26 +++++++ keras/src/backend/jax/core.py | 3 +- keras/src/backend/numpy/core.py | 3 +- keras/src/backend/numpy/trainer.py | 14 +++- keras/src/backend/tensorflow/core.py | 3 +- keras/src/backend/torch/core.py | 3 +- keras/src/layers/layer.py | 6 +- keras/src/models/model_test.py | 47 ++++++------- keras/src/trainers/compile_utils.py | 53 +++++++++++++- keras/src/trainers/trainer.py | 19 ++++- keras/src/trainers/trainer_test.py | 69 ++++++++++++++++++- keras/src/utils/torch_utils.py | 6 +- keras/src/utils/torch_utils_test.py | 50 +++++++++++++- 18 files changed, 289 insertions(+), 41 deletions(-) create mode 100644 keras/src/backend/common/symbolic_scope.py create mode 100644 keras/src/backend/common/symbolic_scope_test.py diff --git a/keras/__init__.py b/keras/__init__.py index 701568f2a01..5a429d3a5d8 100644 --- a/keras/__init__.py +++ b/keras/__init__.py @@ -18,6 +18,7 @@ from keras.api import Regularizer from keras.api import Sequential from keras.api import StatelessScope +from keras.api import SymbolicScope from keras.api import Variable from keras.api import __version__ from keras.api import activations diff --git a/keras/api/__init__.py b/keras/api/__init__.py index 1750a42e869..9d082ae9b89 100644 --- a/keras/api/__init__.py +++ b/keras/api/__init__.py @@ -33,6 +33,7 @@ from keras.api import utils from keras.src.backend.common.keras_tensor import KerasTensor from keras.src.backend.common.stateless_scope import StatelessScope +from keras.src.backend.common.symbolic_scope import SymbolicScope from keras.src.backend.exports import Variable from keras.src.backend.exports import device from keras.src.backend.exports import name_scope diff --git a/keras/api/_tf_keras/keras/__init__.py b/keras/api/_tf_keras/keras/__init__.py index 5e0a7229473..39a7e9cdb18 100644 --- a/keras/api/_tf_keras/keras/__init__.py +++ b/keras/api/_tf_keras/keras/__init__.py @@ -31,6 +31,7 @@ from keras.api._tf_keras.keras import preprocessing from keras.src.backend.common.keras_tensor import KerasTensor from keras.src.backend.common.stateless_scope import StatelessScope +from keras.src.backend.common.symbolic_scope import SymbolicScope from keras.src.backend.exports import Variable from keras.src.backend.exports import device from keras.src.backend.exports import name_scope diff --git a/keras/src/backend/__init__.py b/keras/src/backend/__init__.py index 5c7fa223520..794fe3ca364 100644 --- a/keras/src/backend/__init__.py +++ b/keras/src/backend/__init__.py @@ -14,6 +14,8 @@ from keras.src.backend.common.stateless_scope import StatelessScope from keras.src.backend.common.stateless_scope import get_stateless_scope from keras.src.backend.common.stateless_scope import in_stateless_scope +from keras.src.backend.common.symbolic_scope import SymbolicScope +from keras.src.backend.common.symbolic_scope import in_symbolic_scope from keras.src.backend.common.variables import AutocastScope from keras.src.backend.common.variables import get_autocast_scope from keras.src.backend.common.variables import is_float_dtype diff --git a/keras/src/backend/common/symbolic_scope.py b/keras/src/backend/common/symbolic_scope.py new file mode 100644 index 00000000000..15cd7a5ee05 --- /dev/null +++ b/keras/src/backend/common/symbolic_scope.py @@ -0,0 +1,23 @@ +from keras.src.api_export import keras_export +from keras.src.backend.common import global_state + + +@keras_export("keras.SymbolicScope") +class SymbolicScope: + """Scope to indicate the symbolic stage.""" + + def __enter__(self): + self.original_scope = get_symbolic_scope() + global_state.set_global_attribute("symbolic_scope", self) + return self + + def __exit__(self, *args, **kwargs): + global_state.set_global_attribute("symbolic_scope", self.original_scope) + + +def in_symbolic_scope(): + return global_state.get_global_attribute("symbolic_scope") is not None + + +def get_symbolic_scope(): + return global_state.get_global_attribute("symbolic_scope") diff --git a/keras/src/backend/common/symbolic_scope_test.py b/keras/src/backend/common/symbolic_scope_test.py new file mode 100644 index 00000000000..092dcfe0748 --- /dev/null +++ b/keras/src/backend/common/symbolic_scope_test.py @@ -0,0 +1,26 @@ +import numpy as np + +from keras.src import ops +from keras.src import testing +from keras.src.backend.common.symbolic_scope import SymbolicScope +from keras.src.backend.common.symbolic_scope import in_symbolic_scope + + +class TestSymbolicScope(testing.TestCase): + def test_basic_flow(self): + + # Define a function that behaves differently according to + # `in_symbolic_scope`. + def compute_loss(y, y_pred): + if in_symbolic_scope(): + return ops.zeros_like(y) + return ops.add(y, y_pred) + + y = ops.ones(shape=(2,)) + y_pred = ops.ones(shape=(2,)) + with SymbolicScope(): + loss = compute_loss(y, y_pred) + self.assertAllClose(loss, np.zeros((2,))) + + loss = compute_loss(y, y_pred) + self.assertAllClose(loss, 2 * np.ones((2,))) diff --git a/keras/src/backend/jax/core.py b/keras/src/backend/jax/core.py index 3ccaf06a980..c36dfee6a04 100644 --- a/keras/src/backend/jax/core.py +++ b/keras/src/backend/jax/core.py @@ -10,6 +10,7 @@ from keras.src.backend.common import standardize_dtype from keras.src.backend.common.keras_tensor import KerasTensor from keras.src.backend.common.stateless_scope import StatelessScope +from keras.src.backend.common.symbolic_scope import SymbolicScope from keras.src.backend.jax import distribution_lib SUPPORTS_SPARSE_TENSORS = True @@ -101,7 +102,7 @@ def cast(x, dtype): # Shape / dtype / sparseness inference util def compute_output_spec(fn, *args, **kwargs): - with StatelessScope(): + with StatelessScope(), SymbolicScope(): built_in_types = (type(None), int, float, str, bool, complex, bytes) # First, separate symbolic args from other args diff --git a/keras/src/backend/numpy/core.py b/keras/src/backend/numpy/core.py index 2d34fcd7c6c..97be123f9e8 100644 --- a/keras/src/backend/numpy/core.py +++ b/keras/src/backend/numpy/core.py @@ -12,6 +12,7 @@ from keras.src.backend.common.dtypes import result_type from keras.src.backend.common.keras_tensor import KerasTensor from keras.src.backend.common.stateless_scope import StatelessScope +from keras.src.backend.common.symbolic_scope import SymbolicScope SUPPORTS_SPARSE_TENSORS = False @@ -88,7 +89,7 @@ def vectorized_map(function, elements): # Shape / dtype inference util def compute_output_spec(fn, *args, **kwargs): - with StatelessScope(): + with StatelessScope(), SymbolicScope(): def has_none_shape(x): if isinstance(x, KerasTensor): diff --git a/keras/src/backend/numpy/trainer.py b/keras/src/backend/numpy/trainer.py index 6d40982be43..12c3aad56b6 100644 --- a/keras/src/backend/numpy/trainer.py +++ b/keras/src/backend/numpy/trainer.py @@ -97,7 +97,10 @@ def _symbolic_build(self, data_batch): self._compile_metrics is not None and not self._compile_metrics.built ) - if model_unbuilt or compile_metrics_unbuilt: + compile_loss_unbuilt = ( + self._compile_loss is not None and not self._compile_loss.built + ) + if model_unbuilt or compile_metrics_unbuilt or compile_loss_unbuilt: # Create symbolic tensors matching an input batch. def to_symbolic_input(v): @@ -133,6 +136,15 @@ def to_symbolic_input(v): y_pred, sample_weight=sample_weight, ) + if compile_loss_unbuilt: + # Build `CompileLoss` state with `backend.compute_output_spec`. + backend.compute_output_spec( + self._compute_loss, + x, + y, + y_pred, + sample_weight=sample_weight, + ) self._post_build() def fit( diff --git a/keras/src/backend/tensorflow/core.py b/keras/src/backend/tensorflow/core.py index db33ce227d0..09d65e827cc 100644 --- a/keras/src/backend/tensorflow/core.py +++ b/keras/src/backend/tensorflow/core.py @@ -14,6 +14,7 @@ from keras.src.backend.common.name_scope import name_scope as base_name_scope from keras.src.backend.common.stateless_scope import StatelessScope from keras.src.backend.common.stateless_scope import in_stateless_scope +from keras.src.backend.common.symbolic_scope import SymbolicScope from keras.src.backend.tensorflow.sparse import sparse_to_dense from keras.src.utils.naming import auto_name @@ -182,7 +183,7 @@ def cast(x, dtype): def compute_output_spec(fn, *args, **kwargs): - with StatelessScope(): + with StatelessScope(), SymbolicScope(): graph_name = auto_name("scratch_graph") with tf.__internal__.FuncGraph(graph_name).as_default(): diff --git a/keras/src/backend/torch/core.py b/keras/src/backend/torch/core.py index 3a941fc46a4..5f01d57d5b7 100644 --- a/keras/src/backend/torch/core.py +++ b/keras/src/backend/torch/core.py @@ -17,6 +17,7 @@ from keras.src.backend.common.stateless_scope import StatelessScope from keras.src.backend.common.stateless_scope import get_stateless_scope from keras.src.backend.common.stateless_scope import in_stateless_scope +from keras.src.backend.common.symbolic_scope import SymbolicScope from keras.src.backend.config import floatx SUPPORTS_SPARSE_TENSORS = False @@ -335,7 +336,7 @@ def symbolic_call(fn, args, kwargs, fill_value): ) return fn(*eager_args, **eager_kwargs) - with StatelessScope(), torch.no_grad(): + with StatelessScope(), SymbolicScope(), torch.no_grad(): outputs = symbolic_call(fn, args, kwargs, fill_value=83) none_in_shape = any( diff --git a/keras/src/layers/layer.py b/keras/src/layers/layer.py index a7383c97441..d09c1358268 100644 --- a/keras/src/layers/layer.py +++ b/keras/src/layers/layer.py @@ -32,6 +32,7 @@ from keras.src.backend import KerasTensor from keras.src.backend.common import global_state from keras.src.backend.common.name_scope import current_path +from keras.src.backend.common.symbolic_scope import in_symbolic_scope from keras.src.distribution import distribution_lib from keras.src.dtype_policies import DTypePolicyMap from keras.src.layers import input_spec @@ -1148,7 +1149,10 @@ def _get_regularization_losses(self): for variable in self.trainable_weights: if variable.regularizer is None: continue - if backend.in_stateless_scope(): + if backend.in_stateless_scope() and not in_symbolic_scope(): + # If in symbolic scope, we might get `None` from + # `get_current_value` in `backend.compute_output_spec`. So we + # assign `variable` instead. v = backend.get_stateless_scope().get_current_value(variable) else: v = variable diff --git a/keras/src/models/model_test.py b/keras/src/models/model_test.py index dc2d1816c4f..7ece156dcea 100644 --- a/keras/src/models/model_test.py +++ b/keras/src/models/model_test.py @@ -239,14 +239,13 @@ def test_functional_list_outputs_list_losses(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_mean_squared_error", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", ] ) @@ -270,16 +269,15 @@ def test_functional_list_outputs_list_losses_abbr(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_bce", "output_a_mae", "output_a_mse", "output_b_acc", - # "output_b_loss", + "output_b_loss", "output_b_mse", ] ) @@ -303,14 +301,13 @@ def test_functional_list_outputs_nested_list_losses(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_mean_squared_error", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", ] ) @@ -351,15 +348,14 @@ def test_functional_dict_outputs_dict_losses(self): verbose=0, ) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_mean_squared_error", "output_a_weighted_mean_squared_error", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", "output_b_weighted_accuracy", "output_b_weighted_mean_squared_error", @@ -396,15 +392,14 @@ def test_functional_list_outputs_dict_losses_metrics(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_mean_squared_error", "output_a_weighted_mean_squared_error", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", "output_b_weighted_accuracy", "output_b_weighted_mean_squared_error", @@ -436,18 +431,17 @@ def test_functional_list_outputs_dict_losses_metrics_uniq_weighted(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs # `output_b_accuracy` doesn't have `weighted_` in metric name. # When a metric is only in weighted metrics, it skips `weighted_` # prefix. This behavior matches`tf.keras`. ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_a_mean_squared_error", "output_a_weighted_mean_squared_error", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", ] ) @@ -472,13 +466,12 @@ def test_functional_list_outputs_dict_losses_partial_metrics(self): # Fit the model to make sure compile_metrics are built hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) hist_keys = sorted(hist.history.keys()) - # TODO `tf.keras` also outputs individual losses for outputs ref_keys = sorted( [ "loss", - # "output_a_loss", + "output_a_loss", "output_b_accuracy", - # "output_b_loss", + "output_b_loss", "output_b_mean_squared_error", ] ) @@ -500,7 +493,10 @@ def test_functional_dict_outputs_with_single_tensor(self): "output_b": "binary_crossentropy", }, ) - model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) + hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) + hist_keys = sorted(hist.history.keys()) + ref_keys = sorted(["loss", "output_a_loss", "output_b_loss"]) + self.assertListEqual(hist_keys, ref_keys) def test_functional_list_outputs_with_custom_compute_loss(self): model = _get_model_with_custom_compute_loss() @@ -514,7 +510,12 @@ def test_functional_list_outputs_with_custom_compute_loss(self): model.compile( optimizer="sgd", loss=["mean_squared_error", "binary_crossentropy"] ) - model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) + hist = model.fit(x, (y1, y2), batch_size=2, epochs=1, verbose=0) + hist_keys = sorted(hist.history.keys()) + ref_keys = sorted( + ["binary_crossentropy_loss", "loss", "mean_squared_error_loss"] + ) + self.assertListEqual(hist_keys, ref_keys) def test_functional_list_outputs_dict_losses_invalid_keys(self): model = _get_model_multi_outputs_list() diff --git a/keras/src/trainers/compile_utils.py b/keras/src/trainers/compile_utils.py index 9e21da2cf75..114925e669d 100644 --- a/keras/src/trainers/compile_utils.py +++ b/keras/src/trainers/compile_utils.py @@ -3,6 +3,7 @@ from keras.src import ops from keras.src import tree from keras.src.utils.naming import get_object_name +from keras.src.utils.tracking import Tracker class MetricsList(metrics_module.Metric): @@ -431,6 +432,28 @@ def __init__( # Inferred by `y_pred` and `output_names` self.inferred_output_names = None + # Use `Tracker` to track metrcis for individual losses. + self._metrics = [] + self._tracker = Tracker( + { + "metrics": ( + lambda x: isinstance(x, metrics_module.Metric), + self._metrics, + ) + } + ) + + @property + def metrics(self): + return self._metrics + + @property + def variables(self): + vars = [] + for m in self.metrics: + vars.extend(m.variables) + return vars + def build(self, y_true, y_pred): loss = self._user_loss loss_weights = self._user_loss_weights @@ -527,6 +550,21 @@ def build(self, y_true, y_pred): for identifier, _y_true, _y_pred in zip(flat_losses, y_true, y_pred) ] + # Add `Mean` metric to the tracker for each loss. + if len(flat_losses) > 1: + for i, _loss in enumerate(flat_losses): + if _loss is not None: + if inferred_output_names is not None and len( + inferred_output_names + ) == len(flat_losses): + name = inferred_output_names[i] + else: + name = _loss.name + name += "_loss" + self._tracker.add_to_store( + "metrics", metrics_module.Mean(name=name) + ) + self.flat_losses = flat_losses self.flat_loss_weights = flat_loss_weights self.filtered_y_true_keys = filtered_y_true_keys @@ -596,22 +634,31 @@ def call(self, y_true, y_pred, sample_weight=None): else: sample_weight = [None for _ in y_true] + # We need to add a dummy `None` if the model has only a single output. + metrics = [None] if len(self.metrics) == 0 else self.metrics + # Iterate all losses in flat form. loss_values = [] - for loss, y_t, y_p, loss_weight, sample_weight in zip( + for loss_fn, y_t, y_p, loss_weight, sample_weight, metric in zip( self.flat_losses, y_true, y_pred, self.flat_loss_weights, sample_weight, + metrics, ): - if loss: + if loss_fn: value = ops.cast( - loss(y_t, y_p, sample_weight), dtype=self.dtype + loss_fn(y_t, y_p, sample_weight), dtype=self.dtype ) if loss_weight is not None: value = ops.multiply(value, loss_weight) loss_values.append(value) + # Record individual losses. + if metric: + metric.update_state( + value, sample_weight=tree.flatten(y_p)[0].shape[0] + ) if loss_values: total_loss = sum(loss_values) return total_loss diff --git a/keras/src/trainers/trainer.py b/keras/src/trainers/trainer.py index 397c49ce391..9b027da9c4c 100644 --- a/keras/src/trainers/trainer.py +++ b/keras/src/trainers/trainer.py @@ -250,6 +250,8 @@ def metrics(self): metrics.extend(super().metrics) if self.compiled and self._compile_metrics is not None: metrics += [self._compile_metrics] + if self.compiled and self._compile_loss is not None: + metrics.extend(self._compile_loss.metrics) return metrics @property @@ -1004,10 +1006,13 @@ def _symbolic_build(self, iterator=None, data_batch=None): self._compile_metrics is not None and not self._compile_metrics.built ) + compile_loss_unbuilt = ( + self._compile_loss is not None and not self._compile_loss.built + ) optimizer_unbuilt = ( self.optimizer is not None and not self.optimizer.built ) - if model_unbuilt or compile_metrics_unbuilt: + if model_unbuilt or compile_metrics_unbuilt or compile_loss_unbuilt: # Create symbolic tensors matching an input batch. def to_symbolic_input(v): @@ -1030,7 +1035,7 @@ def to_symbolic_input(v): # Build all model state with `backend.compute_output_spec`. try: - y_pred = backend.compute_output_spec(self, x) + y_pred = backend.compute_output_spec(self, x, training=False) except Exception as e: raise RuntimeError( "Unable to automatically build the model. " @@ -1052,6 +1057,16 @@ def to_symbolic_input(v): y_pred, sample_weight=sample_weight, ) + if compile_loss_unbuilt: + # Build `CompileLoss` state with `backend.compute_output_spec`. + backend.compute_output_spec( + self._compute_loss, + x, + y, + y_pred, + sample_weight=sample_weight, + training=False, + ) if optimizer_unbuilt: # Build optimizer self.optimizer.build(self.trainable_variables) diff --git a/keras/src/trainers/trainer_test.py b/keras/src/trainers/trainer_test.py index a4b21e5f505..064df23adc2 100644 --- a/keras/src/trainers/trainer_test.py +++ b/keras/src/trainers/trainer_test.py @@ -14,6 +14,7 @@ from keras.src import ops from keras.src import optimizers from keras.src import testing +from keras.src.backend.common.symbolic_scope import in_symbolic_scope from keras.src.callbacks.callback import Callback from keras.src.optimizers.rmsprop import RMSprop from keras.src.testing.test_utils import named_product @@ -1406,7 +1407,8 @@ def compute_loss( sample_weight=None, training=True, ): - test_self.assertTrue(training) + if not in_symbolic_scope(): + test_self.assertTrue(training) loss = super().compute_loss( x, y, y_pred, sample_weight, training ) @@ -1443,7 +1445,8 @@ def compute_loss( sample_weight=None, training=True, ): - test_self.assertTrue(training) + if not in_symbolic_scope(): + test_self.assertTrue(training) loss = super().compute_loss( x, y, y_pred, sample_weight, training ) @@ -1478,7 +1481,8 @@ def compute_loss( sample_weight=None, training=True, ): - test_self.assertFalse(training) + if not in_symbolic_scope(): + test_self.assertFalse(training) loss = super().compute_loss( x, y, y_pred, sample_weight, training ) @@ -1613,6 +1617,65 @@ def test_loss_weights(self): atol=1e-3, ) + def test_symbolic_build(self): + class ExampleModelWithTrainingArgs(Trainer, layers.Layer): + def __init__(self, units): + layers.Layer.__init__(self) + Trainer.__init__(self) + self.dense = layers.Dense(units) + self.bn = layers.BatchNormalization(axis=-1) + + def build(self, input_shape): + self.dense.build(input_shape) + input_shape = self.dense.compute_output_shape(input_shape) + self.bn.build(input_shape) + + def call(self, x, training=None): + outputs = self.bn(self.dense(x), training=training) + return [outputs, outputs] + + model = ExampleModelWithTrainingArgs(units=3) + model.compile( + optimizer=optimizers.SGD(), + loss=[losses.MeanSquaredError(), losses.MeanSquaredError()], + metrics=[metrics.MeanSquaredError(), metrics.MeanSquaredError()], + ) + x = np.ones((4, 4)) + y = np.zeros((4, 3)) + model(x) # Eager call to build model weights + ref_weights = model.get_weights() + + # Before `_symbolic_build` + self.assertTrue(model.built) + self.assertFalse(model._compile_metrics.built) + self.assertFalse(model._compile_loss.built) + self.assertLen(model._compile_loss.metrics, 0) + self.assertLen(model.metrics, 2) + + model._symbolic_build(data_batch=(x, (y, y))) + weights = model.get_weights() + + # Ensure weights are intact + self.assertEqual(len(weights), len(ref_weights)) + for w, ref_w in zip(weights, ref_weights): + self.assertAllClose(w, ref_w) + + # Ensure `built` + self.assertTrue(model.built) + self.assertTrue(model._compile_metrics.built) + self.assertTrue(model._compile_loss.built) + + # Ensure the len of metrics (original metrics + loss trackers) + self.assertLen(model._compile_metrics.metrics, 2) + self.assertLen(model._compile_loss.metrics, 2) + self.assertLen(model.metrics, 4) + + # Ensure no values in metrics + for v in model._compile_metrics.variables: + self.assertAllClose(v, 0.0) + for v in model._compile_loss.variables: + self.assertAllClose(v, 0.0) + class TrainerDistributeTest(testing.TestCase): @pytest.mark.skipif( diff --git a/keras/src/utils/torch_utils.py b/keras/src/utils/torch_utils.py index f20669e4955..e81018e0da7 100644 --- a/keras/src/utils/torch_utils.py +++ b/keras/src/utils/torch_utils.py @@ -112,7 +112,11 @@ def _track_module_parameters(self): self._track_variable(variable) self.built = True - def call(self, *args, **kwargs): + def call(self, *args, training=None, **kwargs): + if training is False: + self.eval() + else: + self.train() return self.module(*args, **kwargs) def save_own_variables(self, store): diff --git a/keras/src/utils/torch_utils_test.py b/keras/src/utils/torch_utils_test.py index 7e972f5b1b5..55003240710 100644 --- a/keras/src/utils/torch_utils_test.py +++ b/keras/src/utils/torch_utils_test.py @@ -29,9 +29,9 @@ def __init__( self.torch_wrappers.append(TorchModuleWrapper(torch_model)) self.fc = layers.Dense(1) - def call(self, x): + def call(self, x, training=None): for wrapper in self.torch_wrappers: - x = wrapper(x) + x = wrapper(x, training=training) return self.fc(x) def get_config(self): @@ -49,7 +49,7 @@ def __init__(self, *args, **kwargs): self.fc2 = torch.nn.Linear(4, 4) self.fc3 = layers.Dense(2) - def call(self, x): + def call(self, x, training=None): return self.fc3(self.fc2(self.bn1(self.fc1(x)))) @@ -82,6 +82,50 @@ def test_basic_usage(self, use_batch_norm, num_torch_layers): model.compile(optimizer="sgd", loss="mse") model.fit(np.random.random((3, 2)), np.random.random((3, 1))) + @parameterized.named_parameters( + ( + "explicit_torch_wrapper", + Classifier, + {"use_batch_norm": True, "num_torch_layers": 1}, + ), + ("implicit_torch_wrapper", ClassifierWithNoSpecialCasing, {}), + ) + def test_training_args(self, cls, kwargs): + model = cls(**kwargs) + model(np.random.random((3, 2)), training=False) # Eager call to build + ref_weights = model.get_weights() + ref_running_mean = backend.convert_to_numpy( + model.torch_wrappers[0].module[-1].running_mean + if cls is Classifier + else model.bn1.module.running_mean + ) + + # Test training=False doesn't affect model weights + model(np.random.random((3, 2)), training=False) + weights = model.get_weights() + for w, ref_w in zip(weights, ref_weights): + self.assertAllClose(w, ref_w) + + # Test training=None affects BN's stats + model.set_weights(ref_weights) # Restore previous weights + model(np.random.random((3, 2))) + running_mean = backend.convert_to_numpy( + model.torch_wrappers[0].module[-1].running_mean + if cls is Classifier + else model.bn1.module.running_mean + ) + self.assertNotAllClose(running_mean, ref_running_mean) + + # Test training=True affects BN's stats + model.set_weights(ref_weights) # Restore previous weights + model(np.random.random((3, 2)), training=True) + running_mean = backend.convert_to_numpy( + model.torch_wrappers[0].module[-1].running_mean + if cls is Classifier + else model.bn1.module.running_mean + ) + self.assertNotAllClose(running_mean, ref_running_mean) + def test_module_autowrapping(self): model = ClassifierWithNoSpecialCasing() self.assertIsInstance(model.fc1, TorchModuleWrapper)