From 6f1d58cef552f7a678eba506fdd7a8f649412f22 Mon Sep 17 00:00:00 2001
From: Markus Semmler <markus.semmler@unternehmertum.de>
Date: Tue, 12 Sep 2023 18:55:40 +0200
Subject: [PATCH] Rework test cases.

---
 src/pydvl/value/shapley/classwise.py  |   2 +-
 tests/value/shapley/test_classwise.py | 846 +++++---------------------
 2 files changed, 147 insertions(+), 701 deletions(-)

diff --git a/src/pydvl/value/shapley/classwise.py b/src/pydvl/value/shapley/classwise.py
index 93868db15..547308253 100644
--- a/src/pydvl/value/shapley/classwise.py
+++ b/src/pydvl/value/shapley/classwise.py
@@ -348,7 +348,7 @@ def compute_classwise_shapley_values(
                     done_sample_complements=done_sample_complements,
                     use_default_scorer_value=use_default_scorer_value,
                     min_elements_per_label=min_elements_per_label,
-                    algorithm=algorithm,
+                    algorithm_name=algorithm,
                     seed=seeds[i],
                 )
                 pending.add(future)
diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py
index 41c022607..bd4f55a5d 100644
--- a/tests/value/shapley/test_classwise.py
+++ b/tests/value/shapley/test_classwise.py
@@ -1,6 +1,3 @@
-"""
-Test cases for the class wise shapley value.
-"""
 from typing import Dict, Tuple, cast
 
 import numpy as np
@@ -19,216 +16,9 @@
 
 
 @pytest.fixture(scope="function")
-def linear_classifier_cs_scorer_args_exact_solution_use_default_score() -> Tuple[
-    Dict, ValuationResult, Dict
-]:
-    r"""
-    Returns the exact solution for the class wise shapley value of the training and
-    validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture.
-
-    ===========================
-    CS-Shapley Manual Derivation
-    ===========================
-
-    :Author: Markus Semmler
-    :Date:   August 2023
-
-    Dataset description
-    ===================
-
-    We have a training and a test dataset. We want to model a simple XOR dataset. The
-    development set :math:`D` is given by
-
-    .. math::
-        \begin{aligned}
-            \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\
-            \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\
-            \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\
-            \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\
-        \end{aligned}
-
-    and the training set :math:`T` is given by
-
-    .. math::
-        \begin{aligned}
-            x_0 &= 1 \quad &y_0 = 0 \\
-            x_1 &= 2 \quad &y_1 = 0 \\
-            x_2 &= 3 \quad &y_2 = 1 \\
-            x_3 &= 4 \quad &y_3 = 1 \\
-        \end{aligned}
-
-    Note that the training set and the development set contain the same
-    inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2`
-
-    Model
-    =====
-
-    We use an adapted version of linear regression
-
-    .. math:: y = \max(0, \min(1, \text{round}(\beta^T x)))
-
-    for classification, with the closed form solution
-
-    .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)}
-
-    Fitted model
-    ============
-
-    The hyperparameters for all combinations are
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}`
-         & :math:`\frac{7}{25}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` &
-         :math:`\frac{4}{17}` & :math:`\frac{7}{26}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` &
-         :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` &
-         :math:`\frac{4}{21}` & :math:`\frac{7}{30}`
-
-    Accuracy tables on development set :math:`D`
-    ============================================
-
-    (*) Note that the algorithm described in the paper overwrites these
-    values with 0.
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}`
-       | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{2}`
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-
-    CS-Shapley
-    ==========
-
-    The formulas of the algorithm are given by
-
-    .. math::
-
-        \begin{aligned}
-            \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i})
-                - v_{y_i}(\pi_{:i} | S_{-y_i}) \\
-            \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!}
-                \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\
-            \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i}
-                \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ]
-        \end{aligned}
-
-    Valuation of :math:`x_0`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \{ x_2 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{2} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_0 | \{ x_2 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\
-            \left [ \phi_0 | \{ x_2, x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{1}{6} e^\frac{1}{4} \approx 0.214
-
-    Valuation of :math:`x_1`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_1 | \{ x_2 \} \right] &= \frac{3}{8} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{3}{8} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{1}{3} e^\frac{1}{4} \approx 0.428
-
-    Valuation of :math:`x_2`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 2)
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_2 | \{ x_0 \} \right]
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757
-
-    Valuation of :math:`x_3`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061
+def classwise_shapley_exact_solution() -> Tuple[Dict, ValuationResult, Dict]:
+    """
+    See [classwise.py][pydvl.value.shapley.classwise] for details of the derivation.
     """
     return (
         {
@@ -249,17 +39,14 @@ def linear_classifier_cs_scorer_args_exact_solution_use_default_score() -> Tuple
 
 
 @pytest.fixture(scope="function")
-def linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm(
-    linear_classifier_cs_scorer_args_exact_solution_use_default_score: Tuple[
-        Dict, ValuationResult, Dict
-    ]
+def classwise_shapley_exact_solution_normalized(
+    classwise_shapley_exact_solution,
 ) -> Tuple[Dict, ValuationResult, Dict]:
     """
-    Same as :func:`linear_classifier_cs_scorer_args_exact_solution_use_default_score`
-    but with normalization. The values of label c are normalized by the in-class score
-    of label c divided by the sum of values of that specific label.
+    It additionally normalizes the values using the argument `normalize_values`. See
+    [classwise.py][pydvl.value.shapley.classwise] for details of the derivation.
     """
-    values = linear_classifier_cs_scorer_args_exact_solution_use_default_score[1].values
+    values = classwise_shapley_exact_solution[1].values
     label_zero_coefficient = 1 / np.exp(1 / 4)
     label_one_coefficient = 1 / (1 / 3 * np.exp(1 / 4) + 2 / 3 * np.exp(1 / 2))
 
@@ -282,213 +69,11 @@ def linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm(
 
 
 @pytest.fixture(scope="function")
-def linear_classifier_cs_scorer_args_exact_solution_use_add_idx() -> Tuple[
-    Dict, ValuationResult, Dict
-]:
-    r"""
-    Returns the exact solution for the class wise shapley value of the training and
-    validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture.
-
-    ===========================
-    CS-Shapley Manual Derivation
-    ===========================
-
-    :Author: Markus Semmler
-    :Date:   August 2023
-
-    Dataset description
-    ===================
-
-    We have a training and a test dataset. We want to model a simple XOR dataset. The
-    development set :math:`D` is given by
-
-    .. math::
-        \begin{aligned}
-            \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\
-            \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\
-            \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\
-            \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\
-        \end{aligned}
-
-    and the training set :math:`T` is given by
-
-    .. math::
-        \begin{aligned}
-            x_0 &= 1 \quad &y_0 = 0 \\
-            x_1 &= 2 \quad &y_1 = 0 \\
-            x_2 &= 3 \quad &y_2 = 1 \\
-            x_3 &= 4 \quad &y_3 = 1 \\
-        \end{aligned}
-
-    Note that the training set and the development set contain the same
-    inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2`
-
-    Model
-    =====
-
-    We use an adapted version of linear regression
-
-    .. math:: y = \max(0, \min(1, \text{round}(\beta^T x)))
-
-    for classification, with the closed form solution
-
-    .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)}
-
-    Fitted model
-    ============
-
-    The hyperparameters for all combinations are
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}`
-         & :math:`\frac{7}{25}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` &
-         :math:`\frac{4}{17}` & :math:`\frac{7}{26}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` &
-         :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` &
-         :math:`\frac{4}{21}` & :math:`\frac{7}{30}`
-
-    Accuracy tables on development set :math:`D`
-    ============================================
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}`
-       | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{2}`
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-
-    CS-Shapley
-    ==========
-
-    The formulas of the algorithm are given by
-
-    .. math::
-
-        \begin{aligned}
-            \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i})
-                - v_{y_i}(\pi_{:i} | S_{-y_i}) \\
-            \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!}
-                \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\
-            \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i}
-                \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ]
-        \end{aligned}
-
-    Valuation of :math:`x_0`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\
-            \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{1}{24} e^\frac{1}{4} \approx 0.0535
-
-    Valuation of :math:`x_1`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{5}{24} e^\frac{1}{4} \approx 0.2675
-
-    Valuation of :math:`x_2`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 2)
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_2 | \{ x_0 \} \right]
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757
-
-    Valuation of :math:`x_3`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061
+def classwise_shapley_exact_solution_no_default() -> Tuple[Dict, ValuationResult, Dict]:
+    """
+    Note that this special case doesn't set the utility to 0 if the permutation is
+    empty. See [classwise.py][pydvl.value.shapley.classwise] for details of the
+    derivation.
     """
     return (
         {
@@ -510,227 +95,13 @@ def linear_classifier_cs_scorer_args_exact_solution_use_add_idx() -> Tuple[
 
 
 @pytest.fixture(scope="function")
-def linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set() -> Tuple[
-    Dict, ValuationResult, Dict
-]:
+def classwise_shapley_exact_solution_no_default_allow_empty_set() -> (
+    Tuple[Dict, ValuationResult, Dict]
+):
     r"""
-    Returns the exact solution for the class wise shapley value of the training and
-    validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture.
-
-    ===========================
-    CS-Shapley Manual Derivation
-    ===========================
-
-    :Author: Markus Semmler
-    :Date:   August 2023
-
-    Dataset description
-    ===================
-
-    We have a training and a test dataset. We want to model a simple XOR dataset. The
-    development set :math:`D` is given by
-
-    .. math::
-        \begin{aligned}
-            \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\
-            \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\
-            \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\
-            \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\
-        \end{aligned}
-
-    and the training set :math:`T` is given by
-
-    .. math::
-        \begin{aligned}
-            x_0 &= 1 \quad &y_0 = 0 \\
-            x_1 &= 2 \quad &y_1 = 0 \\
-            x_2 &= 3 \quad &y_2 = 1 \\
-            x_3 &= 4 \quad &y_3 = 1 \\
-        \end{aligned}
-
-    Note that the training set and the development set contain the same
-    inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2`
-
-    Model
-    =====
-
-    We use an adapted version of linear regression
-
-    .. math:: y = \max(0, \min(1, \text{round}(\beta^T x)))
-
-    for classification, with the closed form solution
-
-    .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)}
-
-    Fitted model
-    ============
-
-    The hyperparameters for all combinations are
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}`
-         & :math:`\frac{7}{25}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` &
-         :math:`\frac{4}{17}` & :math:`\frac{7}{26}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` &
-         :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` &
-         :math:`\frac{4}{21}` & :math:`\frac{7}{30}`
-
-    Accuracy tables on development set :math:`D`
-    ============================================
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}`
-       | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` &
-         :math:`\frac{1}{2}` & :math:`\frac{1}{2}`
-
-    .. container:: tabular
-
-       | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` &
-         :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` &
-         :math:`\{x_2, x_3\}`
-       | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-       | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}`
-       | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` &
-         :math:`\frac{1}{4}` & :math:`\frac{1}{4}`
-
-    CS-Shapley
-    ==========
-
-    The formulas of the algorithm are given by
-
-    .. math::
-
-        \begin{aligned}
-            \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i})
-                - v_{y_i}(\pi_{:i} | S_{-y_i}) \\
-            \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!}
-                \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\
-            \phi_i &= \frac{1}{2^{|T_{-y_i}|}} \left [\sum_{S_{-y_i}
-                \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ]
-        \end{aligned}
-
-    Valuation of :math:`x_0`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \emptyset, 0) &= \frac{3}{4} &\quad
-                \delta((x_1, x_0), \emptyset, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_0 | \emptyset \right] &= \frac{3}{8} \\
-            \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\
-            \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{3}{32} + \frac{1}{32} e^\frac{1}{4} \approx 0.1339
-
-    Valuation of :math:`x_1`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_0, x_1), \emptyset, 1) &= 0 &\quad
-                \delta((x_1, x_0), \emptyset, 1) &= \frac{3}{4} \\
-            \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad
-                \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\
-            \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_1 | \emptyset \right] &= \frac{3}{8} \\
-            \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4}
-        \end{aligned}
-
-    .. math:: \phi_0 = \frac{3}{32} + \frac{5}{32} e^\frac{1}{4} \approx 0.2944
-
-    Valuation of :math:`x_2`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \emptyset, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_3, x_2), \emptyset, 2) &= 0 \\
-            \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 2)
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_2 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_2 | \{ x_0 \} \right]
-                &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math::
-        \phi_2 = \frac{5}{32} e^\frac{1}{4} + \frac{1}{32} e^\frac{1}{2} \approx 0.2522
-
-    Valuation of :math:`x_3`
-    ========================
-
-    .. math::
-        \begin{aligned}
-            \delta((x_2, x_3), \emptyset, 3) &= 0 &\quad
-                \delta((x_3, x_2), \emptyset, 3) &= \frac{1}{4} e^\frac{1}{4} \\
-            \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\
-            \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad
-                \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math::
-        \begin{aligned}
-            \left [ \phi_3 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\
-            \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\
-            \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2}
-        \end{aligned}
-
-    .. math::
-        \phi_3 = \frac{1}{32} e^\frac{1}{4} + \frac{3}{32} e^\frac{1}{2} \approx 0.1947
+    Note that this special case doesn't set the utility to 0 if the permutation is
+    empty and additionally allows $S^{(k)} = \emptyset$. See
+    [classwise.py][pydvl.value.shapley.classwise] for details of the derivation.
     """
     return (
         {
@@ -759,26 +130,24 @@ def linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set() -> T
     ids=lambda x: "n_resample_complement_sets={}".format(x),
 )
 @pytest.mark.parametrize(
-    "linear_classifier_cs_scorer_args_exact_solution",
+    "exact_solution",
     [
-        "linear_classifier_cs_scorer_args_exact_solution_use_default_score",
-        "linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm",
-        "linear_classifier_cs_scorer_args_exact_solution_use_add_idx",
-        "linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set",
+        "classwise_shapley_exact_solution",
+        "classwise_shapley_exact_solution_normalized",
+        "classwise_shapley_exact_solution_no_default",
+        "classwise_shapley_exact_solution_no_default_allow_empty_set",
     ],
 )
 def test_classwise_shapley(
-    linear_classifier_cs_scorer: Utility,
-    linear_classifier_cs_scorer_args_exact_solution: Tuple[Dict, ValuationResult],
+    classwise_shapley_utility: Utility,
+    exact_solution: Tuple[Dict, ValuationResult, Dict],
     n_samples: int,
     n_resample_complement_sets: int,
     request,
 ):
-    args, exact_solution, check_args = request.getfixturevalue(
-        linear_classifier_cs_scorer_args_exact_solution
-    )
+    args, exact_solution, check_args = request.getfixturevalue(exact_solution)
     values = compute_classwise_shapley_values(
-        linear_classifier_cs_scorer,
+        classwise_shapley_utility,
         done=MaxChecks(n_samples),
         truncation=NoTruncation(),
         done_sample_complements=MaxChecks(n_resample_complement_sets),
@@ -789,17 +158,24 @@ def test_classwise_shapley(
     assert np.all(values.counts == n_samples * n_resample_complement_sets)
 
 
-@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)])
-def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple):
+def test_classwise_scorer_representation():
     """
-    Tests the class wise scorer.
+    Tests the (string) representation of the ClassWiseScorer.
     """
 
     scorer = ClasswiseScorer("accuracy", initial_label=0)
     assert str(scorer) == "classwise accuracy"
     assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))"
 
-    x, y, info = dataset_alt_seq_simple
+
+@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)])
+def test_classwise_scorer_utility(dataset_left_right_margins):
+    """
+    Tests whether the ClassWiseScorer returns the expected utility value.
+    See [classwise.py][pydvl.value.shapley.classwise] for more details.
+    """
+    scorer = ClasswiseScorer("accuracy", initial_label=0)
+    x, y, info = dataset_left_right_margins
     n_element = len(x)
     target_in_cls_acc_0 = (info["left_margin"] * 100 + 1) / n_element
     target_out_of_cls_acc_0 = (info["right_margin"] * 100 + 1) / n_element
@@ -811,6 +187,30 @@ def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple):
     assert np.isclose(in_cls_acc_0, target_in_cls_acc_0)
     assert np.isclose(out_of_cls_acc_0, target_out_of_cls_acc_0)
 
+    value = scorer(model, x, y)
+    assert np.isclose(value, in_cls_acc_0 * np.exp(out_of_cls_acc_0))
+
+    scorer.label = 1
+    value = scorer(model, x, y)
+    assert np.isclose(value, out_of_cls_acc_0 * np.exp(in_cls_acc_0))
+
+
+@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)])
+def test_classwise_scorer_is_symmetric(
+    dataset_left_right_margins,
+):
+    """
+    Tests whether the ClassWiseScorer is symmetric. For a two-class classification the
+    in-class accuracy for the first label needs to match the out-of-class accuracy for
+    the second label. See [classwise.py][pydvl.value.shapley.classwise] for more
+    details.
+    """
+    scorer = ClasswiseScorer("accuracy", initial_label=0)
+    x, y, info = dataset_left_right_margins
+    model = ThresholdClassifier()
+    in_cls_acc_0, out_of_cls_acc_0 = scorer.estimate_in_class_and_out_of_class_score(
+        model, x, y
+    )
     scorer.label = 1
     in_cls_acc_1, out_of_cls_acc_1 = scorer.estimate_in_class_and_out_of_class_score(
         model, x, y
@@ -818,32 +218,19 @@ def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple):
     assert in_cls_acc_1 == out_of_cls_acc_0
     assert in_cls_acc_0 == out_of_cls_acc_1
 
-    scorer.label = 0
-    value = scorer(model, x, y)
-    assert np.isclose(value, in_cls_acc_0 * np.exp(out_of_cls_acc_0))
-
-    scorer.label = 1
-    value = scorer(model, x, y)
-    assert np.isclose(value, in_cls_acc_1 * np.exp(out_of_cls_acc_1))
-
 
-def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score(
-    linear_classifier_cs_scorer: Utility,
+def test_classwise_scorer_accuracies_manual_derivation(
+    classwise_shapley_utility: Utility,
 ):
+    """
+    Tests whether the model of the scorer is fitted correctly and returns the expected
+    in-class and out-of-class accuracies. See
+    [classwise.py][pydvl.value.shapley.classwise] for more details.
+    """
     subsets_zero = list(powerset(np.array((0, 1))))
     subsets_one = list(powerset(np.array((2, 3))))
     subsets_zero = [tuple(s) for s in subsets_zero]
     subsets_one = [tuple(s) for s in subsets_one]
-    target_betas = pd.DataFrame(
-        [
-            [np.nan, 1 / 3, 1 / 4, 7 / 25],
-            [0, 3 / 10, 4 / 17, 7 / 26],
-            [0, 3 / 13, 1 / 5, 7 / 29],
-            [0, 3 / 14, 4 / 21, 7 / 30],
-        ],
-        index=subsets_zero,
-        columns=subsets_one,
-    )
     target_accuracies_zero = pd.DataFrame(
         [
             [0, 1 / 4, 1 / 4, 1 / 4],
@@ -864,8 +251,8 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score(
         index=subsets_zero,
         columns=subsets_one,
     )
-    model = linear_classifier_cs_scorer.model
-    scorer = cast(ClasswiseScorer, linear_classifier_cs_scorer.scorer)
+    model = classwise_shapley_utility.model
+    scorer = cast(ClasswiseScorer, classwise_shapley_utility.scorer)
     scorer.label = 0
 
     for set_zero_idx in range(len(subsets_zero)):
@@ -874,20 +261,13 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score(
             (
                 x_train,
                 y_train,
-            ) = linear_classifier_cs_scorer.data.get_training_data(indices)
-            linear_classifier_cs_scorer.model.fit(x_train, y_train)
-            fitted_beta = linear_classifier_cs_scorer.model._beta  # noqa
-            target_beta = target_betas.iloc[set_zero_idx, set_one_idx]
-            assert (
-                np.isnan(fitted_beta)
-                if np.isnan(target_beta)
-                else fitted_beta == target_beta
-            )
+            ) = classwise_shapley_utility.data.get_training_data(indices)
+            classwise_shapley_utility.model.fit(x_train, y_train)
 
             (
                 x_test,
                 y_test,
-            ) = linear_classifier_cs_scorer.data.get_test_data()
+            ) = classwise_shapley_utility.data.get_test_data()
             (
                 in_cls_acc_0,
                 in_cls_acc_1,
@@ -898,6 +278,69 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score(
             assert in_cls_acc_1 == target_accuracies_one.iloc[set_zero_idx, set_one_idx]
 
 
+@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)])
+def test_classwise_scorer_accuracies_left_right_margins(dataset_left_right_margins):
+    """
+    Tests whether the model of the scorer is fitted correctly and returns the expected
+    in-class and out-of-class accuracies. See
+    [classwise.py][pydvl.value.shapley.classwise] for more details.
+    """
+    scorer = ClasswiseScorer("accuracy", initial_label=0)
+    x, y, info = dataset_left_right_margins
+    n_element = len(x)
+
+    target_in_cls_acc_0 = (info["left_margin"] * 100 + 1) / n_element
+    target_out_of_cls_acc_0 = (info["right_margin"] * 100 + 1) / n_element
+
+    model = ThresholdClassifier()
+    in_cls_acc_0, out_of_cls_acc_0 = scorer.estimate_in_class_and_out_of_class_score(
+        model, x, y
+    )
+    assert np.isclose(in_cls_acc_0, target_in_cls_acc_0)
+    assert np.isclose(out_of_cls_acc_0, target_out_of_cls_acc_0)
+
+
+def test_closed_form_linear_classifier(
+    classwise_shapley_utility: Utility,
+):
+    """
+    Tests whether the model is fitted correctly and contains the right $\beta$
+    parameter. See [classwise.py][pydvl.value.shapley.classwise] for more details.
+    """
+    subsets_zero = list(powerset(np.array((0, 1))))
+    subsets_one = list(powerset(np.array((2, 3))))
+    subsets_zero = [tuple(s) for s in subsets_zero]
+    subsets_one = [tuple(s) for s in subsets_one]
+    target_betas = pd.DataFrame(
+        [
+            [np.nan, 1 / 3, 1 / 4, 7 / 25],
+            [0, 3 / 10, 4 / 17, 7 / 26],
+            [0, 3 / 13, 1 / 5, 7 / 29],
+            [0, 3 / 14, 4 / 21, 7 / 30],
+        ],
+        index=subsets_zero,
+        columns=subsets_one,
+    )
+    scorer = cast(ClasswiseScorer, classwise_shapley_utility.scorer)
+    scorer.label = 0
+
+    for set_zero_idx in range(len(subsets_zero)):
+        for set_one_idx in range(len(subsets_one)):
+            indices = list(subsets_zero[set_zero_idx] + subsets_one[set_one_idx])
+            (
+                x_train,
+                y_train,
+            ) = classwise_shapley_utility.data.get_training_data(indices)
+            classwise_shapley_utility.model.fit(x_train, y_train)
+            fitted_beta = classwise_shapley_utility.model._beta  # noqa
+            target_beta = target_betas.iloc[set_zero_idx, set_one_idx]
+            assert (
+                np.isnan(fitted_beta)
+                if np.isnan(target_beta)
+                else fitted_beta == target_beta
+            )
+
+
 class ThresholdClassifier:
     def fit(self, x: NDArray, y: NDArray) -> float:
         raise NotImplementedError("Mock model")
@@ -933,19 +376,22 @@ def score(self, x: NDArray, y: NDArray) -> float:
 
 
 @pytest.fixture(scope="function")
-def linear_classifier_cs_scorer(
-    dataset_alt_seq_full: Dataset,
+def classwise_shapley_utility(
+    dataset_manual_derivation: Dataset,
 ) -> Utility:
     return Utility(
         ClosedFormLinearClassifier(),
-        dataset_alt_seq_full,
+        dataset_manual_derivation,
         ClasswiseScorer("accuracy"),
         catch_errors=False,
     )
 
 
 @pytest.fixture(scope="function")
-def dataset_alt_seq_full() -> Dataset:
+def dataset_manual_derivation() -> Dataset:
+    """
+    See [classwise.py][pydvl.value.shapley.classwise] for more details.
+    """
     x_train = np.arange(1, 5).reshape([-1, 1])
     y_train = np.array([0, 0, 1, 1])
     x_test = x_train
@@ -954,7 +400,7 @@ def dataset_alt_seq_full() -> Dataset:
 
 
 @pytest.fixture(scope="function")
-def dataset_alt_seq_simple(
+def dataset_left_right_margins(
     n_element: int, left_margin: float, right_margin: float
 ) -> Tuple[NDArray[np.float_], NDArray[np.int_], Dict[str, float]]:
     """