aai-institute · schroedk · Jun 11, 2024 · Jun 10, 2024 · Jun 10, 2024 · Jun 10, 2024
diff --git a/src/pydvl/influence/base_influence_function_model.py b/src/pydvl/influence/base_influence_function_model.py
@@ -505,7 +505,10 @@ def influences_from_factors(
             self._create_batch(x, y),
             mode,
         )
-        return cast(TensorType, sum(tensors))
+        result: TensorType = next(tensors)
+        for tensor in tensors:
+            result = result + tensor
+        return result
 
     @staticmethod
     @abstractmethod

diff --git a/src/pydvl/influence/torch/base.py b/src/pydvl/influence/torch/base.py
@@ -17,7 +17,6 @@
     GradientProvider,
     Operator,
     OperatorGradientComposition,
-    TensorType,
 )
 from .util import (
     BlockMode,
@@ -286,10 +285,6 @@ def inner_prod(
 
     def _inner_product(self, left: torch.Tensor, right: torch.Tensor) -> torch.Tensor:
         left_result = self.operator.apply(left)
-
-        if left_result.ndim == right.ndim and left.shape[-1] == right.shape[-1]:
-            return left_result @ right.T
-
         return torch.einsum("ia,j...a->ij...", left_result, right)
 
 
@@ -546,7 +541,10 @@ def as_bilinear_form(self) -> DictBilinearForm:
 
 class TorchOperatorGradientComposition(
     OperatorGradientComposition[
-        torch.Tensor, TorchBatch, TorchOperatorType, TorchGradientProvider
+        torch.Tensor,
+        TorchBatch,
+        TorchOperatorType,
+        TorchGradientProvider,
     ]
 ):
     """
@@ -567,6 +565,11 @@ def to(self, device: torch.device):
         self.op = self.op.to(device)
         return self
 
+    def _tensor_inner_product(
+        self, left: torch.Tensor, right: torch.Tensor
+    ) -> torch.Tensor:
+        return torch.einsum("ia,j...a->ij...", left, right)
+
 
 class TorchBlockMapper(
     BlockMapper[

diff --git a/src/pydvl/influence/types.py b/src/pydvl/influence/types.py
@@ -215,7 +215,7 @@ def inner_prod(self, left: TensorType, right: Optional[TensorType]) -> TensorTyp
         In this case, the trailing dimension of the `left` and `right` tensors are
         considered for the computation of the inner product. For example,
         if `left` is a tensor of shape $(N, D)$ and, `right` is of shape $(M,..., D)$,
-        then the result is of shape $(N,..., M)$
+        then the result is of shape $(N, M, ...)$.
 
         Args:
             left: The first tensor in the inner product computation.
@@ -367,7 +367,12 @@ def as_bilinear_form(self) -> BilinearFormType:
 
 
 class OperatorGradientComposition(
-    Generic[TensorType, BatchType, OperatorType, GradientProviderType]
+    Generic[
+        TensorType,
+        BatchType,
+        OperatorType,
+        GradientProviderType,
+    ]
 ):
     """
     Generic base class representing a composable block that integrates an operator and
@@ -385,6 +390,11 @@ def __init__(self, op: OperatorType, gp: GradientProviderType):
         self.gp = gp
         self.op = op
 
+    @abstractmethod
+    def _tensor_inner_product(self, left: TensorType, right: TensorType) -> TensorType:
+        """Implement this method in a way such that the aggregation of the tensors
+        is represented by the Einstein summation convention ia,j...a -> ij..."""
+
     def interactions(
         self,
         left_batch: BatchType,
@@ -477,7 +487,7 @@ def interactions_from_transformed_grads(
             right_grads = self.gp.flat_grads(right_batch)
         else:
             right_grads = self.gp.flat_mixed_grads(right_batch)
-        return self.op.as_bilinear_form().inner_prod(left_factors, right_grads)
+        return self._tensor_inner_product(left_factors, right_grads)
 
 
 OperatorGradientCompositionType = TypeVar(

diff --git a/tests/influence/torch/test_influence_model.py b/tests/influence/torch/test_influence_model.py
@@ -451,7 +451,9 @@ def test_influence_linear_model(
     x_train, y_train = tuple(map(torch.from_numpy, train_data))
     x_test, y_test = tuple(map(torch.from_numpy, test_data))
     influence_values = (
-        influence.influences(x_test, y_test, x_train, y_train, mode=mode).cpu().numpy()
+        (influence.influences(x_test, y_test, x_train, y_train, mode=mode))
+        .cpu()
+        .numpy()
     )
     sym_influence_values = (
         influence.influences(x_train, y_train, x_train, y_train, mode=mode)
@@ -516,14 +518,21 @@ def test_influences_lissa(
     influence_model = influence_factory(
         model.to(device), loss, train_dataloader, test_case.hessian_reg
     )
-    approx_influences = (
-        influence_model.influences(
-            x_test, y_test, x_train, y_train, mode=test_case.mode
-        )
-        .cpu()
-        .numpy()
+    approx_influences = influence_model.influences(
+        x_test, y_test, x_train, y_train, mode=test_case.mode
+    )
+
+    influence_factors = influence_model.influence_factors(x_test, y_test)
+    influences_from_factors = influence_model.influences_from_factors(
+        influence_factors, x_train, y_train, mode=test_case.mode
+    )
+
+    assert torch.allclose(
+        influences_from_factors, approx_influences, atol=1e-5, rtol=1e-4
     )
 
+    approx_influences = approx_influences.cpu().numpy()
+
     assert not np.any(np.isnan(approx_influences))
 
     assert np.allclose(approx_influences, direct_influences, rtol=1e-1)
@@ -783,14 +792,19 @@ def test_influences_cg(
     )
     influence_model = influence_model.fit(train_dataloader)
 
-    approx_influences = (
-        influence_model.influences(
-            x_test, y_test, x_train, y_train, mode=test_case.mode
-        )
-        .cpu()
-        .numpy()
+    approx_influences = influence_model.influences(
+        x_test, y_test, x_train, y_train, mode=test_case.mode
+    )
+
+    influence_factors = influence_model.influence_factors(x_test, y_test)
+    influences_from_factors = influence_model.influences_from_factors(
+        influence_factors, x_train, y_train, mode=test_case.mode
     )
 
+    assert torch.allclose(influences_from_factors, approx_influences, rtol=1e-4)
+
+    approx_influences = approx_influences.cpu().numpy()
+
     assert not np.any(np.isnan(approx_influences))
 
     assert np.allclose(approx_influences, direct_influences, atol=1e-6, rtol=1e-4)