From 6f1d58cef552f7a678eba506fdd7a8f649412f22 Mon Sep 17 00:00:00 2001 From: Markus Semmler Date: Tue, 12 Sep 2023 18:55:40 +0200 Subject: [PATCH] Rework test cases. --- src/pydvl/value/shapley/classwise.py | 2 +- tests/value/shapley/test_classwise.py | 846 +++++--------------------- 2 files changed, 147 insertions(+), 701 deletions(-) diff --git a/src/pydvl/value/shapley/classwise.py b/src/pydvl/value/shapley/classwise.py index 93868db15..547308253 100644 --- a/src/pydvl/value/shapley/classwise.py +++ b/src/pydvl/value/shapley/classwise.py @@ -348,7 +348,7 @@ def compute_classwise_shapley_values( done_sample_complements=done_sample_complements, use_default_scorer_value=use_default_scorer_value, min_elements_per_label=min_elements_per_label, - algorithm=algorithm, + algorithm_name=algorithm, seed=seeds[i], ) pending.add(future) diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py index 41c022607..bd4f55a5d 100644 --- a/tests/value/shapley/test_classwise.py +++ b/tests/value/shapley/test_classwise.py @@ -1,6 +1,3 @@ -""" -Test cases for the class wise shapley value. -""" from typing import Dict, Tuple, cast import numpy as np @@ -19,216 +16,9 @@ @pytest.fixture(scope="function") -def linear_classifier_cs_scorer_args_exact_solution_use_default_score() -> Tuple[ - Dict, ValuationResult, Dict -]: - r""" - Returns the exact solution for the class wise shapley value of the training and - validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. - - =========================== - CS-Shapley Manual Derivation - =========================== - - :Author: Markus Semmler - :Date: August 2023 - - Dataset description - =================== - - We have a training and a test dataset. We want to model a simple XOR dataset. The - development set :math:`D` is given by - - .. math:: - \begin{aligned} - \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ - \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ - \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ - \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ - \end{aligned} - - and the training set :math:`T` is given by - - .. math:: - \begin{aligned} - x_0 &= 1 \quad &y_0 = 0 \\ - x_1 &= 2 \quad &y_1 = 0 \\ - x_2 &= 3 \quad &y_2 = 1 \\ - x_3 &= 4 \quad &y_3 = 1 \\ - \end{aligned} - - Note that the training set and the development set contain the same - inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` - - Model - ===== - - We use an adapted version of linear regression - - .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) - - for classification, with the closed form solution - - .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} - - Fitted model - ============ - - The hyperparameters for all combinations are - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` - & :math:`\frac{7}{25}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & - :math:`\frac{4}{17}` & :math:`\frac{7}{26}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & - :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & - :math:`\frac{4}{21}` & :math:`\frac{7}{30}` - - Accuracy tables on development set :math:`D` - ============================================ - - (*) Note that the algorithm described in the paper overwrites these - values with 0. - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` - | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{2}` - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - - CS-Shapley - ========== - - The formulas of the algorithm are given by - - .. math:: - - \begin{aligned} - \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) - - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ - \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} - \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ - \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i} - \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] - \end{aligned} - - Valuation of :math:`x_0` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \{ x_2 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{2} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_0 | \{ x_2 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ - \left [ \phi_0 | \{ x_2, x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} - \end{aligned} - - .. math:: \phi_0 = \frac{1}{6} e^\frac{1}{4} \approx 0.214 - - Valuation of :math:`x_1` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad - \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{2} e^\frac{1}{4} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_1 | \{ x_2 \} \right] &= \frac{3}{8} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{3}{8} e^\frac{1}{4} - \end{aligned} - - .. math:: \phi_0 = \frac{1}{3} e^\frac{1}{4} \approx 0.428 - - Valuation of :math:`x_2` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_3, x_2), \{ x_0 \}, 2) - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_2 | \{ x_0 \} \right] - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757 - - Valuation of :math:`x_3` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061 +def classwise_shapley_exact_solution() -> Tuple[Dict, ValuationResult, Dict]: + """ + See [classwise.py][pydvl.value.shapley.classwise] for details of the derivation. """ return ( { @@ -249,17 +39,14 @@ def linear_classifier_cs_scorer_args_exact_solution_use_default_score() -> Tuple @pytest.fixture(scope="function") -def linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm( - linear_classifier_cs_scorer_args_exact_solution_use_default_score: Tuple[ - Dict, ValuationResult, Dict - ] +def classwise_shapley_exact_solution_normalized( + classwise_shapley_exact_solution, ) -> Tuple[Dict, ValuationResult, Dict]: """ - Same as :func:`linear_classifier_cs_scorer_args_exact_solution_use_default_score` - but with normalization. The values of label c are normalized by the in-class score - of label c divided by the sum of values of that specific label. + It additionally normalizes the values using the argument `normalize_values`. See + [classwise.py][pydvl.value.shapley.classwise] for details of the derivation. """ - values = linear_classifier_cs_scorer_args_exact_solution_use_default_score[1].values + values = classwise_shapley_exact_solution[1].values label_zero_coefficient = 1 / np.exp(1 / 4) label_one_coefficient = 1 / (1 / 3 * np.exp(1 / 4) + 2 / 3 * np.exp(1 / 2)) @@ -282,213 +69,11 @@ def linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm( @pytest.fixture(scope="function") -def linear_classifier_cs_scorer_args_exact_solution_use_add_idx() -> Tuple[ - Dict, ValuationResult, Dict -]: - r""" - Returns the exact solution for the class wise shapley value of the training and - validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. - - =========================== - CS-Shapley Manual Derivation - =========================== - - :Author: Markus Semmler - :Date: August 2023 - - Dataset description - =================== - - We have a training and a test dataset. We want to model a simple XOR dataset. The - development set :math:`D` is given by - - .. math:: - \begin{aligned} - \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ - \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ - \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ - \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ - \end{aligned} - - and the training set :math:`T` is given by - - .. math:: - \begin{aligned} - x_0 &= 1 \quad &y_0 = 0 \\ - x_1 &= 2 \quad &y_1 = 0 \\ - x_2 &= 3 \quad &y_2 = 1 \\ - x_3 &= 4 \quad &y_3 = 1 \\ - \end{aligned} - - Note that the training set and the development set contain the same - inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` - - Model - ===== - - We use an adapted version of linear regression - - .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) - - for classification, with the closed form solution - - .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} - - Fitted model - ============ - - The hyperparameters for all combinations are - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` - & :math:`\frac{7}{25}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & - :math:`\frac{4}{17}` & :math:`\frac{7}{26}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & - :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & - :math:`\frac{4}{21}` & :math:`\frac{7}{30}` - - Accuracy tables on development set :math:`D` - ============================================ - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` - | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{2}` - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - - CS-Shapley - ========== - - The formulas of the algorithm are given by - - .. math:: - - \begin{aligned} - \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) - - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ - \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} - \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ - \phi_i &= \frac{1}{2^{|T_{-y_i}|}-1} \left [\sum_{\emptyset \subset S_{-y_i} - \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] - \end{aligned} - - Valuation of :math:`x_0` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad - \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\ - \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0 - \end{aligned} - - .. math:: \phi_0 = \frac{1}{24} e^\frac{1}{4} \approx 0.0535 - - Valuation of :math:`x_1` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad - \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} - \end{aligned} - - .. math:: \phi_0 = \frac{5}{24} e^\frac{1}{4} \approx 0.2675 - - Valuation of :math:`x_2` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_3, x_2), \{ x_0 \}, 2) - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_2 | \{ x_0 \} \right] - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: \phi_2 = \frac{1}{12} e^\frac{1}{4} + \frac{1}{24} e^\frac{1}{2} \approx 0.1757 - - Valuation of :math:`x_3` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: \phi_3 = \frac{1}{8} e^\frac{1}{2} \approx 0.2061 +def classwise_shapley_exact_solution_no_default() -> Tuple[Dict, ValuationResult, Dict]: + """ + Note that this special case doesn't set the utility to 0 if the permutation is + empty. See [classwise.py][pydvl.value.shapley.classwise] for details of the + derivation. """ return ( { @@ -510,227 +95,13 @@ def linear_classifier_cs_scorer_args_exact_solution_use_add_idx() -> Tuple[ @pytest.fixture(scope="function") -def linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set() -> Tuple[ - Dict, ValuationResult, Dict -]: +def classwise_shapley_exact_solution_no_default_allow_empty_set() -> ( + Tuple[Dict, ValuationResult, Dict] +): r""" - Returns the exact solution for the class wise shapley value of the training and - validation set of the `utility_alt_seq_cf_linear_classifier_cs_scorer` fixture. - - =========================== - CS-Shapley Manual Derivation - =========================== - - :Author: Markus Semmler - :Date: August 2023 - - Dataset description - =================== - - We have a training and a test dataset. We want to model a simple XOR dataset. The - development set :math:`D` is given by - - .. math:: - \begin{aligned} - \hat{x}_0 &= 1 \quad &\hat{y}_0 = 0 \\ - \hat{x}_1 &= 2 \quad &\hat{y}_1 = 0 \\ - \hat{x}_2 &= 3 \quad &\hat{y}_2 = 0 \\ - \hat{x}_3 &= 4 \quad &\hat{y}_3 = 1 \\ - \end{aligned} - - and the training set :math:`T` is given by - - .. math:: - \begin{aligned} - x_0 &= 1 \quad &y_0 = 0 \\ - x_1 &= 2 \quad &y_1 = 0 \\ - x_2 &= 3 \quad &y_2 = 1 \\ - x_3 &= 4 \quad &y_3 = 1 \\ - \end{aligned} - - Note that the training set and the development set contain the same - inputs x, but differ in the label :math:`\hat{y}_2 \neq y_2` - - Model - ===== - - We use an adapted version of linear regression - - .. math:: y = \max(0, \min(1, \text{round}(\beta^T x))) - - for classification, with the closed form solution - - .. math:: \beta = \frac{\text{dot}(x, y)}{\text{dot}(x, x)} - - Fitted model - ============ - - The hyperparameters for all combinations are - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & nan & :math:`\frac{1}{3}` & :math:`\frac{1}{4}` - & :math:`\frac{7}{25}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{3}{10}` & - :math:`\frac{4}{17}` & :math:`\frac{7}{26}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{3}{13}` & - :math:`\frac{1}{5}` &\ :math:`\frac{7}{29}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{3}{14}` & - :math:`\frac{4}{21}` & :math:`\frac{7}{30}` - - Accuracy tables on development set :math:`D` - ============================================ - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{4}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` &\ :math:`\frac{1}{2}` - | :math:`\{x_0, x_1 \}` & :math:`\frac{3}{4}` & :math:`\frac{1}{2}` & - :math:`\frac{1}{2}` & :math:`\frac{1}{2}` - - .. container:: tabular - - | \|c||Sc \| Sc \| Sc \| Sc \| :math:`S_1 \cup S_2` & - :math:`\emptyset` & :math:`\{x_2\}` & :math:`\{x_3\}` & - :math:`\{x_2, x_3\}` - | :math:`\emptyset` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_0\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - | :math:`\{x_1\}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` &\ :math:`\frac{1}{4}` - | :math:`\{x_0, x_1 \}` & :math:`0` & :math:`\frac{1}{4}` & - :math:`\frac{1}{4}` & :math:`\frac{1}{4}` - - CS-Shapley - ========== - - The formulas of the algorithm are given by - - .. math:: - - \begin{aligned} - \delta(\pi, S_{-y_i}, i) &= v_{y_i}(\pi_{:i} \cup \{ i \} | S_{-y_i}) - - v_{y_i}(\pi_{:i} | S_{-y_i}) \\ - \left [ \phi_i | S_{-y_i} \right ] &= \frac{1}{|T_{y_i}|!} - \sum_{\pi \in \Pi(T_{y_i})} \delta(\pi, S_{-y_i}, i) \\ - \phi_i &= \frac{1}{2^{|T_{-y_i}|}} \left [\sum_{S_{-y_i} - \subseteq T_{-y_i}} \left [ \phi_i | S_{-y_i} \right ] \right ] - \end{aligned} - - Valuation of :math:`x_0` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \emptyset, 0) &= \frac{3}{4} &\quad - \delta((x_1, x_0), \emptyset, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_2 \}, 0) &= 0 &\quad - \delta((x_1, x_0), \{ x_2 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_3 \}, 0) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_3 \}, 0) &= 0 \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 0) &= 0 &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 0) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_0 | \emptyset \right] &= \frac{3}{8} \\ - \left [ \phi_0 | \{ x_2 \} \right] &= 0 \\ - \left [ \phi_0 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_0 | \{ x_2, x_3 \} \right] &= 0 - \end{aligned} - - .. math:: \phi_0 = \frac{3}{32} + \frac{1}{32} e^\frac{1}{4} \approx 0.1339 - - Valuation of :math:`x_1` - ======================== - - .. math:: - \begin{aligned} - \delta((x_0, x_1), \emptyset, 1) &= 0 &\quad - \delta((x_1, x_0), \emptyset, 1) &= \frac{3}{4} \\ - \delta((x_0, x_1), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_3 \}, 1) &= 0 &\quad - \delta((x_1, x_0), \{ x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} \\ - \delta((x_0, x_1), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_1, x_0), \{ x_2, x_3 \}, 1) &= \frac{1}{4} e^\frac{1}{4} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_1 | \emptyset \right] &= \frac{3}{8} \\ - \left [ \phi_1 | \{ x_2 \} \right] &= \frac{1}{4} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_3 \} \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_1 | \{ x_2, x_3 \} \right] &= \frac{1}{4} e^\frac{1}{4} - \end{aligned} - - .. math:: \phi_0 = \frac{3}{32} + \frac{5}{32} e^\frac{1}{4} \approx 0.2944 - - Valuation of :math:`x_2` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \emptyset, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_3, x_2), \emptyset, 2) &= 0 \\ - \delta((x_2, x_3), \{ x_0 \}, 2) &= \frac{1}{4} e^\frac{1}{4} &\quad - \delta((x_3, x_2), \{ x_0 \}, 2) - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_1 \}, 2) &= 0 \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 2) &= \frac{1}{4} e^\frac{1}{2} &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 2) &= 0 - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_2 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_2 | \{ x_0 \} \right] - &= \frac{1}{4} e^\frac{1}{4} - \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_2 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: - \phi_2 = \frac{5}{32} e^\frac{1}{4} + \frac{1}{32} e^\frac{1}{2} \approx 0.2522 - - Valuation of :math:`x_3` - ======================== - - .. math:: - \begin{aligned} - \delta((x_2, x_3), \emptyset, 3) &= 0 &\quad - \delta((x_3, x_2), \emptyset, 3) &= \frac{1}{4} e^\frac{1}{4} \\ - \delta((x_2, x_3), \{ x_0 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} \\ - \delta((x_2, x_3), \{ x_0, x_1 \}, 3) &= 0 &\quad - \delta((x_3, x_2), \{ x_0, x_1 \}, 3) &= \frac{1}{4} e^\frac{1}{2} - \end{aligned} - - .. math:: - \begin{aligned} - \left [ \phi_3 | \emptyset \right] &= \frac{1}{8} e^\frac{1}{4} \\ - \left [ \phi_3 | \{ x_0 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} \\ - \left [ \phi_3 | \{ x_0, x_1 \} \right] &= \frac{1}{8} e^\frac{1}{2} - \end{aligned} - - .. math:: - \phi_3 = \frac{1}{32} e^\frac{1}{4} + \frac{3}{32} e^\frac{1}{2} \approx 0.1947 + Note that this special case doesn't set the utility to 0 if the permutation is + empty and additionally allows $S^{(k)} = \emptyset$. See + [classwise.py][pydvl.value.shapley.classwise] for details of the derivation. """ return ( { @@ -759,26 +130,24 @@ def linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set() -> T ids=lambda x: "n_resample_complement_sets={}".format(x), ) @pytest.mark.parametrize( - "linear_classifier_cs_scorer_args_exact_solution", + "exact_solution", [ - "linear_classifier_cs_scorer_args_exact_solution_use_default_score", - "linear_classifier_cs_scorer_args_exact_solution_use_default_score_norm", - "linear_classifier_cs_scorer_args_exact_solution_use_add_idx", - "linear_classifier_cs_scorer_args_exact_solution_use_add_idx_empty_set", + "classwise_shapley_exact_solution", + "classwise_shapley_exact_solution_normalized", + "classwise_shapley_exact_solution_no_default", + "classwise_shapley_exact_solution_no_default_allow_empty_set", ], ) def test_classwise_shapley( - linear_classifier_cs_scorer: Utility, - linear_classifier_cs_scorer_args_exact_solution: Tuple[Dict, ValuationResult], + classwise_shapley_utility: Utility, + exact_solution: Tuple[Dict, ValuationResult, Dict], n_samples: int, n_resample_complement_sets: int, request, ): - args, exact_solution, check_args = request.getfixturevalue( - linear_classifier_cs_scorer_args_exact_solution - ) + args, exact_solution, check_args = request.getfixturevalue(exact_solution) values = compute_classwise_shapley_values( - linear_classifier_cs_scorer, + classwise_shapley_utility, done=MaxChecks(n_samples), truncation=NoTruncation(), done_sample_complements=MaxChecks(n_resample_complement_sets), @@ -789,17 +158,24 @@ def test_classwise_shapley( assert np.all(values.counts == n_samples * n_resample_complement_sets) -@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)]) -def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple): +def test_classwise_scorer_representation(): """ - Tests the class wise scorer. + Tests the (string) representation of the ClassWiseScorer. """ scorer = ClasswiseScorer("accuracy", initial_label=0) assert str(scorer) == "classwise accuracy" assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))" - x, y, info = dataset_alt_seq_simple + +@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)]) +def test_classwise_scorer_utility(dataset_left_right_margins): + """ + Tests whether the ClassWiseScorer returns the expected utility value. + See [classwise.py][pydvl.value.shapley.classwise] for more details. + """ + scorer = ClasswiseScorer("accuracy", initial_label=0) + x, y, info = dataset_left_right_margins n_element = len(x) target_in_cls_acc_0 = (info["left_margin"] * 100 + 1) / n_element target_out_of_cls_acc_0 = (info["right_margin"] * 100 + 1) / n_element @@ -811,6 +187,30 @@ def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple): assert np.isclose(in_cls_acc_0, target_in_cls_acc_0) assert np.isclose(out_of_cls_acc_0, target_out_of_cls_acc_0) + value = scorer(model, x, y) + assert np.isclose(value, in_cls_acc_0 * np.exp(out_of_cls_acc_0)) + + scorer.label = 1 + value = scorer(model, x, y) + assert np.isclose(value, out_of_cls_acc_0 * np.exp(in_cls_acc_0)) + + +@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)]) +def test_classwise_scorer_is_symmetric( + dataset_left_right_margins, +): + """ + Tests whether the ClassWiseScorer is symmetric. For a two-class classification the + in-class accuracy for the first label needs to match the out-of-class accuracy for + the second label. See [classwise.py][pydvl.value.shapley.classwise] for more + details. + """ + scorer = ClasswiseScorer("accuracy", initial_label=0) + x, y, info = dataset_left_right_margins + model = ThresholdClassifier() + in_cls_acc_0, out_of_cls_acc_0 = scorer.estimate_in_class_and_out_of_class_score( + model, x, y + ) scorer.label = 1 in_cls_acc_1, out_of_cls_acc_1 = scorer.estimate_in_class_and_out_of_class_score( model, x, y @@ -818,32 +218,19 @@ def test_cs_scorer_on_dataset_alt_seq_simple(dataset_alt_seq_simple): assert in_cls_acc_1 == out_of_cls_acc_0 assert in_cls_acc_0 == out_of_cls_acc_1 - scorer.label = 0 - value = scorer(model, x, y) - assert np.isclose(value, in_cls_acc_0 * np.exp(out_of_cls_acc_0)) - - scorer.label = 1 - value = scorer(model, x, y) - assert np.isclose(value, in_cls_acc_1 * np.exp(out_of_cls_acc_1)) - -def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score( - linear_classifier_cs_scorer: Utility, +def test_classwise_scorer_accuracies_manual_derivation( + classwise_shapley_utility: Utility, ): + """ + Tests whether the model of the scorer is fitted correctly and returns the expected + in-class and out-of-class accuracies. See + [classwise.py][pydvl.value.shapley.classwise] for more details. + """ subsets_zero = list(powerset(np.array((0, 1)))) subsets_one = list(powerset(np.array((2, 3)))) subsets_zero = [tuple(s) for s in subsets_zero] subsets_one = [tuple(s) for s in subsets_one] - target_betas = pd.DataFrame( - [ - [np.nan, 1 / 3, 1 / 4, 7 / 25], - [0, 3 / 10, 4 / 17, 7 / 26], - [0, 3 / 13, 1 / 5, 7 / 29], - [0, 3 / 14, 4 / 21, 7 / 30], - ], - index=subsets_zero, - columns=subsets_one, - ) target_accuracies_zero = pd.DataFrame( [ [0, 1 / 4, 1 / 4, 1 / 4], @@ -864,8 +251,8 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score( index=subsets_zero, columns=subsets_one, ) - model = linear_classifier_cs_scorer.model - scorer = cast(ClasswiseScorer, linear_classifier_cs_scorer.scorer) + model = classwise_shapley_utility.model + scorer = cast(ClasswiseScorer, classwise_shapley_utility.scorer) scorer.label = 0 for set_zero_idx in range(len(subsets_zero)): @@ -874,20 +261,13 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score( ( x_train, y_train, - ) = linear_classifier_cs_scorer.data.get_training_data(indices) - linear_classifier_cs_scorer.model.fit(x_train, y_train) - fitted_beta = linear_classifier_cs_scorer.model._beta # noqa - target_beta = target_betas.iloc[set_zero_idx, set_one_idx] - assert ( - np.isnan(fitted_beta) - if np.isnan(target_beta) - else fitted_beta == target_beta - ) + ) = classwise_shapley_utility.data.get_training_data(indices) + classwise_shapley_utility.model.fit(x_train, y_train) ( x_test, y_test, - ) = linear_classifier_cs_scorer.data.get_test_data() + ) = classwise_shapley_utility.data.get_test_data() ( in_cls_acc_0, in_cls_acc_1, @@ -898,6 +278,69 @@ def test_cs_scorer_on_alt_seq_cf_linear_classifier_cs_score( assert in_cls_acc_1 == target_accuracies_one.iloc[set_zero_idx, set_one_idx] +@pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)]) +def test_classwise_scorer_accuracies_left_right_margins(dataset_left_right_margins): + """ + Tests whether the model of the scorer is fitted correctly and returns the expected + in-class and out-of-class accuracies. See + [classwise.py][pydvl.value.shapley.classwise] for more details. + """ + scorer = ClasswiseScorer("accuracy", initial_label=0) + x, y, info = dataset_left_right_margins + n_element = len(x) + + target_in_cls_acc_0 = (info["left_margin"] * 100 + 1) / n_element + target_out_of_cls_acc_0 = (info["right_margin"] * 100 + 1) / n_element + + model = ThresholdClassifier() + in_cls_acc_0, out_of_cls_acc_0 = scorer.estimate_in_class_and_out_of_class_score( + model, x, y + ) + assert np.isclose(in_cls_acc_0, target_in_cls_acc_0) + assert np.isclose(out_of_cls_acc_0, target_out_of_cls_acc_0) + + +def test_closed_form_linear_classifier( + classwise_shapley_utility: Utility, +): + """ + Tests whether the model is fitted correctly and contains the right $\beta$ + parameter. See [classwise.py][pydvl.value.shapley.classwise] for more details. + """ + subsets_zero = list(powerset(np.array((0, 1)))) + subsets_one = list(powerset(np.array((2, 3)))) + subsets_zero = [tuple(s) for s in subsets_zero] + subsets_one = [tuple(s) for s in subsets_one] + target_betas = pd.DataFrame( + [ + [np.nan, 1 / 3, 1 / 4, 7 / 25], + [0, 3 / 10, 4 / 17, 7 / 26], + [0, 3 / 13, 1 / 5, 7 / 29], + [0, 3 / 14, 4 / 21, 7 / 30], + ], + index=subsets_zero, + columns=subsets_one, + ) + scorer = cast(ClasswiseScorer, classwise_shapley_utility.scorer) + scorer.label = 0 + + for set_zero_idx in range(len(subsets_zero)): + for set_one_idx in range(len(subsets_one)): + indices = list(subsets_zero[set_zero_idx] + subsets_one[set_one_idx]) + ( + x_train, + y_train, + ) = classwise_shapley_utility.data.get_training_data(indices) + classwise_shapley_utility.model.fit(x_train, y_train) + fitted_beta = classwise_shapley_utility.model._beta # noqa + target_beta = target_betas.iloc[set_zero_idx, set_one_idx] + assert ( + np.isnan(fitted_beta) + if np.isnan(target_beta) + else fitted_beta == target_beta + ) + + class ThresholdClassifier: def fit(self, x: NDArray, y: NDArray) -> float: raise NotImplementedError("Mock model") @@ -933,19 +376,22 @@ def score(self, x: NDArray, y: NDArray) -> float: @pytest.fixture(scope="function") -def linear_classifier_cs_scorer( - dataset_alt_seq_full: Dataset, +def classwise_shapley_utility( + dataset_manual_derivation: Dataset, ) -> Utility: return Utility( ClosedFormLinearClassifier(), - dataset_alt_seq_full, + dataset_manual_derivation, ClasswiseScorer("accuracy"), catch_errors=False, ) @pytest.fixture(scope="function") -def dataset_alt_seq_full() -> Dataset: +def dataset_manual_derivation() -> Dataset: + """ + See [classwise.py][pydvl.value.shapley.classwise] for more details. + """ x_train = np.arange(1, 5).reshape([-1, 1]) y_train = np.array([0, 0, 1, 1]) x_test = x_train @@ -954,7 +400,7 @@ def dataset_alt_seq_full() -> Dataset: @pytest.fixture(scope="function") -def dataset_alt_seq_simple( +def dataset_left_right_margins( n_element: int, left_margin: float, right_margin: float ) -> Tuple[NDArray[np.float_], NDArray[np.int_], Dict[str, float]]: """