Remove reproducibility tests from semivalues.py and deactivated affec…

…ted test cases from test monte carlo.
aai-institute · Sep 1, 2023 · 4e889f7 · 4e889f7
1 parent 26d59bf
commit 4e889f7
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 124 deletions.
diff --git a/src/pydvl/utils/functional.py b/src/pydvl/utils/functional.py
@@ -4,53 +4,36 @@
 from functools import partial
 from typing import Callable, Set, Tuple
 
-__all__ = ["fn_accepts_param_name"]
+__all__ = ["unroll_partial_fn_args"]
 
 
-def fn_accepts_param_name(fn: Callable, param_name: str) -> bool:
+def unroll_partial_fn_args(fun: Callable) -> Set[str]:
     """
-    Checks if a function accepts a given parameter, even if it is set by partial.
+    Unroll a function that was set by functools.partial.
 
     Args:
-        fn: The function to check.
-        param_name: The name of the parameter to check.
+        fun: Either or a function to unroll.
 
     Returns:
-        True if the function accepts the parameter, False otherwise.
-    """
-
-    wrapped_fn, args_set_by_partial = _unroll_partial_fn(fn)
-
-    sig = inspect.signature(wrapped_fn)
-    params = sig.parameters
-
-    if param_name in args_set_by_partial:
-        return False
-
-    if param_name in params:
-        return True
-
-    return False
-
-
-def _unroll_partial_fn(fn: Callable) -> Tuple[Callable, Set[str]]:
-    """
-    Unroll a function that was set by functools.partial.
-
-    :param fn: Either or a function to unroll.
-    :return: A tuple of the unrolled function and a set of the parameters that were set
-        by functools.partial.
+        A tuple of the unrolled function and a set of the parameters that were set by
+        functools.partial.
     """
     args_set_by_partial: Set[str] = set()
 
     def _rec_unroll_partial_function(g: Callable):
+        """
+        Store arguments and recursively call itself if the function is a partial. In the
+        end, return the original function.
+        """
         nonlocal args_set_by_partial
 
         if isinstance(g, partial):
             args_set_by_partial.update(g.keywords.keys())
             args_set_by_partial.update(g.args)
-            return _rec_unroll_partial_function(g.func)
+            return _rec_unroll_partial_function(g.keywords["fun"])
         else:
             return g
 
-    return _rec_unroll_partial_function(fn), args_set_by_partial
+    wrapped_fn = _rec_unroll_partial_function(fun)
+    sig = inspect.signature(wrapped_fn)
+    return args_set_by_partial | set(sig.parameters.keys())
diff --git a/src/pydvl/utils/parallel/map_reduce.py b/src/pydvl/utils/parallel/map_reduce.py
@@ -147,6 +147,7 @@ def __call__(
                     zip(chunks, seed_seq.spawn(len(chunks)))
                 )
             )
+
         reduce_results: R = self._reduce_func(map_results, **self.reduce_kwargs)
         return reduce_results
 

diff --git a/src/pydvl/utils/types.py b/src/pydvl/utils/types.py
@@ -11,7 +11,7 @@
 from numpy.random import Generator, SeedSequence
 from numpy.typing import NDArray
 
-from pydvl.utils.functional import fn_accepts_param_name
+from pydvl.utils.functional import unroll_partial_fn_args
 
 __all__ = ["SupervisedModel", "MapFunction", "ReduceFunction", "NoPublicConstructor"]
 
@@ -83,7 +83,7 @@ def maybe_add_argument(fun: Callable, new_arg: str) -> Callable:
     Returns:
         A new function accepting one more keyword argument.
     """
-    if fn_accepts_param_name(fun, new_arg):
+    if new_arg in unroll_partial_fn_args(fun):
         return fun
 
     return functools.partial(call_fun_remove_arg, fun=fun, arg=new_arg)

diff --git a/tests/utils/test_parallel.py b/tests/utils/test_parallel.py
@@ -147,7 +147,7 @@ def reduce_func(x, y):
     assert result == 150
 
 
-def test_map_reduce_reproducible(parallel_config, seed, seed_alt):
+def test_map_reduce_reproducible(parallel_config, seed):
     """
     Test that the same result is obtained when using the same seed. And that different
     results are obtained when using different seeds.

diff --git a/tests/value/shapley/test_montecarlo.py b/tests/value/shapley/test_montecarlo.py
@@ -73,13 +73,14 @@ def test_analytic_montecarlo_shapley(
 
 
 test_cases_montecarlo_shapley_reproducible_stochastic = [
-    (12, ShapleyMode.PermutationMontecarlo, {"done": MaxChecks(1)}),
-    # FIXME! it should be enough with 2**(len(data)-1) samples
-    (
-        8,
-        ShapleyMode.CombinatorialMontecarlo,
-        {"done": MaxChecks(1)},
-    ),
+    # TODO Add once issue #416 is closed.
+    # (12, ShapleyMode.PermutationMontecarlo, {"done": MaxChecks(1)}),
+    # # FIXME! it should be enough with 2**(len(data)-1) samples
+    # (
+    #     8,
+    #     ShapleyMode.CombinatorialMontecarlo,
+    #     {"done": MaxChecks(1)},
+    # ),
     (12, ShapleyMode.Owen, dict(n_samples=4, max_q=200)),
     (12, ShapleyMode.OwenAntithetic, dict(n_samples=4, max_q=200)),
     (

diff --git a/tests/value/test_semivalues.py b/tests/value/test_semivalues.py
@@ -3,11 +3,8 @@
 
 import numpy as np
 import pytest
-from sklearn.linear_model import LinearRegression
 
-from pydvl.utils import Dataset, ParallelConfig, Utility
-from pydvl.utils.types import Seed, call_fn_multiple_seeds
-from pydvl.value import ValuationResult
+from pydvl.utils import ParallelConfig
 from pydvl.value.sampler import (
     AntitheticSampler,
     DeterministicPermutationSampler,
@@ -23,7 +20,7 @@
     compute_generic_semivalues,
     shapley_coefficient,
 )
-from pydvl.value.stopping import AbsoluteStandardError, MaxUpdates, StoppingCriterion
+from pydvl.value.stopping import AbsoluteStandardError, MaxUpdates
 
 from . import check_values
 
@@ -61,84 +58,6 @@ def test_shapley(
     check_values(values, exact_values, rtol=0.2)
 
 
-def semivalues_seed_wrapper(
-    sampler_t: Type[PowersetSampler], u: Utility, *args, seed: Seed, **kwargs
-) -> ValuationResult:
-    """
-    Wrapper for semivalues that takes a seed as an argument to be used with
-    call_fn_multiple_seeds.
-    """
-    sampler = sampler_t(u.data.indices, seed=seed)
-    return semivalues(sampler, u, *args, **kwargs)
-
-
-@pytest.mark.parametrize("num_samples", [5])
-@pytest.mark.parametrize(
-    "sampler_t",
-    [
-        UniformSampler,
-        PermutationSampler,
-        AntitheticSampler,
-    ],
-)
-@pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)])
-@pytest.mark.parametrize("num_points, num_features", [(12, 3)])
-def test_semivalues_shapley_reproducible(
-    num_samples: int,
-    housing_dataset: Dataset,
-    sampler_t: Type[PowersetSampler],
-    coefficient: SVCoefficient,
-    n_jobs: int,
-    parallel_config: ParallelConfig,
-    seed: Seed,
-):
-    values_1, values_2 = call_fn_multiple_seeds(
-        semivalues_seed_wrapper,
-        sampler_t,
-        Utility(LinearRegression(), data=housing_dataset, scorer="r2"),
-        coefficient,
-        AbsoluteStandardError(0.02, 1.0) | MaxUpdates(2 ** (num_samples * 2)),
-        n_jobs=n_jobs,
-        config=parallel_config,
-        seeds=(seed, seed),
-    )
-    assert np.all(values_1.values == values_2.values)
-
-
-@pytest.mark.parametrize("num_samples", [5])
-@pytest.mark.parametrize(
-    "sampler_t",
-    [
-        UniformSampler,
-        PermutationSampler,
-        AntitheticSampler,
-    ],
-)
-@pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)])
-@pytest.mark.parametrize("num_points, num_features", [(12, 3)])
-def test_semivalues_shapley_stochastic(
-    num_samples: int,
-    housing_dataset: Dataset,
-    sampler_t: Type[PowersetSampler],
-    coefficient: SVCoefficient,
-    n_jobs: int,
-    parallel_config: ParallelConfig,
-    seed: Seed,
-    seed_alt: Seed,
-):
-    values_1, values_2 = call_fn_multiple_seeds(
-        semivalues_seed_wrapper,
-        sampler_t,
-        Utility(LinearRegression(), data=housing_dataset, scorer="r2"),
-        coefficient,
-        AbsoluteStandardError(0.02, 1.0) | MaxUpdates(2 ** (num_samples * 2)),
-        n_jobs=n_jobs,
-        config=parallel_config,
-        seeds=(seed, seed_alt),
-    )
-    assert np.any(values_1.values != values_2.values)
-
-
 @pytest.mark.parametrize("num_samples", [5])
 @pytest.mark.parametrize(
     "sampler",
-Original file line number
+Diff line change
@@ Expand Up / @@ -147,6 +147,7 @@ def __call__( @@
                         zip(chunks, seed_seq.spawn(len(chunks)))
                     )
                 )
             reduce_results: R = self._reduce_func(map_results, **self.reduce_kwargs)
             return reduce_results
@@ Expand Down @@