fix jacobian calculation with dict

henrysky · Sep 1, 2024 · f990a98 · f990a98
1 parent 7a91f58
commit f990a98
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 37 deletions.
diff --git a/src/astroNN/models/base_master_nn.py b/src/astroNN/models/base_master_nn.py
@@ -491,21 +491,21 @@ def hessian(self, x=None, mean_output=False, mc_num=1, denormalize=False):
         if len(input_shape_expectation) == 1:
             input_shape_expectation = input_shape_expectation[0]
 
-        # just in case only 1 data point is provided and mess up the shape issue
-        if len(input_shape_expectation) == 3:
-            x_data = np.atleast_3d(x_data)
-        elif len(input_shape_expectation) == 4:
-            if len(x_data.shape) < 4:
-                x_data = x_data[:, :, :, np.newaxis]
-        else:
-            raise ValueError("Input data shape do not match neural network expectation")
+        # # just in case only 1 data point is provided and mess up the shape issue
+        # if len(input_shape_expectation) == 3:
+        #     x_data = np.atleast_3d(x_data)
+        # elif len(input_shape_expectation) == 4:
+        #     if len(x_data.shape) < 4:
+        #         x_data = x_data[:, :, :, np.newaxis]
+        # else:
+        #     raise ValueError("Input data shape do not match neural network expectation")
 
         total_num = x_data.shape[0]
 
-        input_dim = len(np.squeeze(np.ones(input_shape_expectation[1:])).shape)
-        output_dim = len(np.squeeze(np.ones(output_shape_expectation[1:])).shape)
-        if input_dim > 3 or output_dim > 3:
-            raise ValueError("Unsupported data dimension")
+        # input_dim = len(np.squeeze(np.ones(input_shape_expectation[1:])).shape)
+        # output_dim = len(np.squeeze(np.ones(output_shape_expectation[1:])).shape)
+        # if input_dim > 3 or output_dim > 3:
+        #     raise ValueError("Unsupported data dimension")
 
         start_time = time.time()
         if keras.backend.backend() == "tensorflow":
@@ -520,17 +520,20 @@ def hessian(self, x=None, mean_output=False, mc_num=1, denormalize=False):
                     temp = _model(xtensor)
                 jacobian = tf.squeeze(dtape.batch_jacobian(temp, xtensor))
 
-            hessian = tf.squeeze(tape.batch_jacobian(jacobian, xtensor))
+            hessian = tape.batch_jacobian(jacobian, xtensor)
         elif keras.backend.backend() == "torch":
             import torch
 
             # add new axis for vmap
             xtensor = torch.tensor(x_data, requires_grad=True)[:, None, ...]
             hessian = torch.vmap(torch.func.hessian(_model), randomness="different")(xtensor)
-            hessian = torch.squeeze(hessian)
         else:
             raise ValueError("Only Tensorflow and PyTorch backend is supported")
 
+        if isinstance(hessian, dict):
+            hessian = hessian["output"]
+        hessian = keras.ops.squeeze(hessian)
+
         if np.all(
             keras.ops.convert_to_numpy(keras.ops.equal(hessian, 0.0))
         ):  # warn user about not so linear activation like ReLU will get all zeros
@@ -600,10 +603,11 @@ def jacobian(self, x=None, mean_output=False, mc_num=1, denormalize=False):
         try:
             input_shape_expectation = self.keras_model_predict.get_layer(
                 "input"
-            ).input.shape
+            ).output.shape
             output_shape_expectation = self.keras_model_predict.get_layer(
                 "output"
             ).output.shape
+
             _model = self.keras_model_predict
         except AttributeError:
             input_shape_expectation = self.keras_model.input_shape
@@ -619,20 +623,20 @@ def jacobian(self, x=None, mean_output=False, mc_num=1, denormalize=False):
             input_shape_expectation = input_shape_expectation[0]
 
         # just in case only 1 data point is provided and mess up the shape issue
-        if len(input_shape_expectation) == 3:
-            x_data = np.atleast_3d(x_data)
-        elif len(input_shape_expectation) == 4:
-            if len(x_data.shape) < 4:
-                x_data = x_data[:, :, :, np.newaxis]
-        else:
-            raise ValueError("Input data shape do not match neural network expectation")
+        # if len(input_shape_expectation) == 2:
+        #     x_data = np.atleast_3d(x_data)
+        # elif len(input_shape_expectation) == 4:
+        #     if len(x_data.shape) < 4:
+        #         x_data = x_data[:, :, :, np.newaxis]
+        # else:
+        #     raise ValueError(f"Input data shape {x_data.shape} do not match neural network expectation {len(input_shape_expectation)}-d")
 
         total_num = x_data.shape[0]
 
-        input_dim = len(np.squeeze(np.ones(input_shape_expectation[1:])).shape)
-        output_dim = len(np.squeeze(np.ones(output_shape_expectation[1:])).shape)
-        if input_dim > 3 or output_dim > 3:
-            raise ValueError("Unsupported data dimension")
+        # input_dim = len(np.squeeze(np.ones(input_shape_expectation[1:])).shape)
+        # output_dim = len(np.squeeze(np.ones(output_shape_expectation[1:])).shape)
+        # if input_dim > 3 or output_dim > 3:
+        #     raise ValueError("Unsupported data dimension")
 
         start_time = time.time()
 
@@ -645,16 +649,19 @@ def jacobian(self, x=None, mean_output=False, mc_num=1, denormalize=False):
                 tape.watch(xtensor)
                 temp = _model(xtensor)
 
-            jacobian = tf.squeeze(tape.batch_jacobian(temp, xtensor))
+            jacobian = tape.batch_jacobian(temp, xtensor)
         elif keras.backend.backend() == "torch":
             import torch
 
             # add new axis for vmap
             xtensor = torch.tensor(x_data, requires_grad=True)[:, None, ...]
             jacobian = torch.vmap(torch.func.jacrev(_model), randomness="different")(xtensor)
-            jacobian = torch.squeeze(jacobian)
         else:
             raise ValueError("Only Tensorflow and PyTorch backend is supported")
+
+        if isinstance(jacobian, dict):
+            jacobian = jacobian["output"]
+        jacobian = keras.ops.squeeze(jacobian)
 
         if mean_output is True:
             jacobian_master = keras.ops.convert_to_numpy(

diff --git a/tests/test_apogee_model.py b/tests/test_apogee_model.py
@@ -47,14 +47,23 @@ def test_apogee_cnn(spectra_ci_data):
 
     prediction = neuralnet.predict(xdata)
     # assert most of them have less than 15% error
-    assert 0.15 > np.nanmedian(np.abs((ydata[neuralnet.val_idx] - prediction[neuralnet.val_idx]) / ydata[neuralnet.val_idx]))
+    assert 0.15 > np.nanmedian(
+        np.abs(
+            (ydata[neuralnet.val_idx] - prediction[neuralnet.val_idx])
+            / ydata[neuralnet.val_idx]
+        )
+    )
     jacobian = neuralnet.jacobian(xdata[:5])
     # assert shape correct as expected
     npt.assert_array_equal(prediction.shape, ydata.shape)
-    assert jacobian.shape == (xdata[:5].shape[0], ydata.shape[1], xdata.shape[1]), f"Jacobian shape is {jacobian.shape}, expected {(xdata[:5].shape[0], ydata.shape[1], xdata.shape[1])}"
+    assert (
+        jacobian.shape == (xdata[:5].shape[0], ydata.shape[1], xdata.shape[1])
+    ), f"Jacobian shape is {jacobian.shape}, expected {(xdata[:5].shape[0], ydata.shape[1], xdata.shape[1])}"
 
     hessian = neuralnet.hessian(xdata[:5], mean_output=True)
-    assert hessian.shape == (ydata.shape[1], xdata.shape[1], xdata.shape[1]), f"Hessian shape is {hessian.shape}, expected {(ydata.shape[1], xdata.shape[1], xdata.shape[1])}"
+    assert (
+        hessian.shape == (ydata.shape[1], xdata.shape[1], xdata.shape[1])
+    ), f"Hessian shape is {hessian.shape}, expected {(ydata.shape[1], xdata.shape[1], xdata.shape[1])}"
 
     # make sure raised if data dimension not as expected
     with pytest.raises(ValueError):
@@ -109,11 +118,16 @@ def test_apogee_bcnn(spectra_ci_data):
     bneuralnet.mc_num = 2
     prediction, prediction_err = bneuralnet.predict(xdata)
     # assert most of them have less than 15% error
-    assert 0.15 > np.median(np.abs((ydata[bneuralnet.val_idx] - prediction[bneuralnet.val_idx])/ydata[bneuralnet.val_idx]))
+    assert 0.15 > np.median(keras.ops.convert_to_numpy(
+        mape(
+            keras.ops.array(ydata[bneuralnet.val_idx]),
+            keras.ops.array(prediction[bneuralnet.val_idx]),
+        ) / 100.
+    ))
     assert np.all(0.25 > np.median(prediction_err["total"], axis=0))  # assert entropy
     # assert all of them not equal becaues of MC Dropout
     npt.assert_equal(
-        np.all(bneuralnet.evaluate(xdata, ydata) != bneuralnet.evaluate(xdata, ydata)),
+        np.all(bneuralnet.predict(xdata)[0] != bneuralnet.predict(xdata)[0]),
         True,
     )
     jacobian = bneuralnet.jacobian(xdata[:2], mean_output=True)
@@ -306,8 +320,12 @@ def test_apogee_identical_transfer(spectra_ci_data):
     # transfer weight
     neuralnet2.transfer_weights(neuralnet)
     pred2 = neuralnet2.predict(xdata[:5000, :1500][neuralnet.val_idx])
-    mad_1 = keras.ops.convert_to_numpy(mad(ydata[neuralnet.val_idx][:, 0], pred[:, 0], axis=None))
-    mad_2 = keras.ops.convert_to_numpy(mad(ydata[neuralnet.val_idx][:, 0], pred2[:, 0], axis=None))
+    mad_1 = keras.ops.convert_to_numpy(
+        mad(ydata[neuralnet.val_idx][:, 0], pred[:, 0], axis=None)
+    )
+    mad_2 = keras.ops.convert_to_numpy(
+        mad(ydata[neuralnet.val_idx][:, 0], pred2[:, 0], axis=None)
+    )
 
     # accurancy sould be very similar as they are the same model
     npt.assert_almost_equal(mad_1, mad_2)
@@ -338,8 +356,12 @@ def test_apogee_transferlearning(spectra_ci_data):
     bneuralnet2.max_epochs = 10
     bneuralnet2.fit(xdata[5000:, 1500:], ydata[5000:])
     pred2 = bneuralnet2.predict(xdata[5000:, 1500:][bneuralnet2.val_idx])
-    keras.ops.convert_to_numpy(mad(ydata[bneuralnet.val_idx][:, 0], pred[0][:, 0], axis=None))
-    keras.ops.convert_to_numpy(mad(ydata[5000:, 0][bneuralnet2.val_idx], pred2[0][:, 0], axis=None))
+    keras.ops.convert_to_numpy(
+        mad(ydata[bneuralnet.val_idx][:, 0], pred[0][:, 0], axis=None)
+    )
+    keras.ops.convert_to_numpy(
+        mad(ydata[5000:, 0][bneuralnet2.val_idx], pred2[0][:, 0], axis=None)
+    )
 
     # transferred weights should be untrainable thus stay the same
     npt.assert_array_equal(