From eb02f9f2771d309cd0d96f0ccc4b4bc599cc0a99 Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Sat, 11 May 2024 01:25:43 +0300 Subject: [PATCH 1/6] bugfix: drop_above arg used hard-coded names, removed Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- aif360/sklearn/detectors/facts/misc.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/aif360/sklearn/detectors/facts/misc.py b/aif360/sklearn/detectors/facts/misc.py index 7e88859a..2c0e6e66 100644 --- a/aif360/sklearn/detectors/facts/misc.py +++ b/aif360/sklearn/detectors/facts/misc.py @@ -6,7 +6,7 @@ from pandas import DataFrame from .parameters import * -from .predicate import Predicate, recIsValid, featureChangePred, drop_two_above +from .predicate import Predicate, recIsValid, featureChangePred from .frequent_itemsets import run_fpgrowth, preprocessDataset, fpgrowth_out_to_predicate_list from .metrics import ( incorrectRecoursesIfThen, @@ -182,7 +182,6 @@ def valid_ifthens( freqitem_minsupp: float = 0.01, missing_subgroup_val: str = "N/A", drop_infeasible: bool = True, - drop_above: bool = True, feats_not_allowed_to_change: List[str] = [], verbose: bool = True, ) -> List[Tuple[Predicate, Predicate, Dict[str, float], Dict[str, float]]]: @@ -196,7 +195,6 @@ def valid_ifthens( freqitem_minsupp (float): Minimum support threshold for frequent itemset mining. missing_subgroup_val (str): Value indicating missing or unknown subgroup. drop_infeasible (bool): Whether to drop infeasible if-then rules. - drop_above (bool): Whether to drop if-then rules with feature changes above a certain threshold. feats_not_allowed_to_change (list[str]): optionally, the user can provide some features which are not allowed to change at all (e.g. sex). verbose (bool): whether to print intermediate messages and progress bar. Defaults to True. @@ -281,16 +279,6 @@ def valid_ifthens( ) ) - # keep ifs that have change on features of max value 2 - if drop_above == True: - age = [val.left for val in X.age.unique()] - age.sort() - ifthens = [ - (ifs, then, cov) - for ifs, then, cov in ifthens - if drop_two_above(ifs, then, age) - ] - # Calculate correctness percentages if verbose: print("Computing percentages of individuals flipped by each action independently.", flush=True) From 357a61052da667ff0279b3a8692cb91a66e02696 Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Sat, 11 May 2024 01:26:45 +0300 Subject: [PATCH 2/6] FACTS hotfix: drop_infeasible set to False this part of the code is problematic due to the use of hardcoded feature names. The required functionality should be achieved in some other way. Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- aif360/sklearn/detectors/facts/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aif360/sklearn/detectors/facts/__init__.py b/aif360/sklearn/detectors/facts/__init__.py index e01a7cd5..fef2ee1e 100644 --- a/aif360/sklearn/detectors/facts/__init__.py +++ b/aif360/sklearn/detectors/facts/__init__.py @@ -342,6 +342,7 @@ def fit(self, X: DataFrame, verbose: bool = True): model=self.clf, sensitive_attribute=self.prot_attr, freqitem_minsupp=self.freq_itemset_min_supp, + drop_infeasible=False, feats_not_allowed_to_change=list(feats_not_allowed_to_change), verbose=verbose, ) From f8f3985132609e2cc5eae0228034f0c2bbec4295 Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Thu, 16 May 2024 10:46:44 +0300 Subject: [PATCH 3/6] FACTS bugfix: feat weights were not passed properly Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- aif360/sklearn/detectors/facts/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aif360/sklearn/detectors/facts/__init__.py b/aif360/sklearn/detectors/facts/__init__.py index fef2ee1e..4537fc62 100644 --- a/aif360/sklearn/detectors/facts/__init__.py +++ b/aif360/sklearn/detectors/facts/__init__.py @@ -359,7 +359,7 @@ def fit(self, X: DataFrame, verbose: bool = True): params=params, verbose=verbose, ) - self.rules_by_if = calc_costs(rules_by_if) + self.rules_by_if = calc_costs(rules_by_if, params=params) self.dataset = X.copy(deep=True) From 04f81e33ef6a64722c6ba2a8c4cef59dedfa02db Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Mon, 27 May 2024 00:07:17 +0300 Subject: [PATCH 4/6] removed obsolete use of drop_above argument Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- tests/sklearn/facts/test_misc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sklearn/facts/test_misc.py b/tests/sklearn/facts/test_misc.py index 57857df8..fb0023f4 100644 --- a/tests/sklearn/facts/test_misc.py +++ b/tests/sklearn/facts/test_misc.py @@ -54,7 +54,6 @@ def test_rule_generation() -> None: sensitive_attribute="sex", freqitem_minsupp=0.5, drop_infeasible=False, - drop_above=True ) ifthens = rules2rulesbyif(ifthens) From 8710d16d5d62a165fb334f692fcfa0fb7b2d5256 Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:29:26 +0300 Subject: [PATCH 5/6] FACTS: added test for user interface API Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- tests/sklearn/facts/test_init.py | 172 +++++++++++++++++++++++++++++++ tests/sklearn/facts/test_misc.py | 2 - 2 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 tests/sklearn/facts/test_init.py diff --git a/tests/sklearn/facts/test_init.py b/tests/sklearn/facts/test_init.py new file mode 100644 index 00000000..6a6637e7 --- /dev/null +++ b/tests/sklearn/facts/test_init.py @@ -0,0 +1,172 @@ +import numpy as np +import pandas as pd + +import pytest + +from aif360.sklearn.detectors import FACTS, FACTS_bias_scan + +from aif360.sklearn.detectors.facts.predicate import Predicate +from aif360.sklearn.detectors.facts.parameters import ParameterProxy, feature_change_builder + +def test_FACTS(): + class MockModel: + def predict(self, X: pd.DataFrame) -> np.ndarray: + ret = [] + for i, r in X.iterrows(): + if r["a"] > 20: + ret.append(1) + elif r["c"] < 15: + ret.append(1) + else: + ret.append(0) + return np.array(ret) + + X = pd.DataFrame( + [ + [21, 2, 3, 4, "Female", pd.Interval(60, 70)], + [21, 13, 3, 19, "Male", pd.Interval(60, 70)], + [25, 2, 7, 4, "Female", pd.Interval(60, 70)], + [21, 2, 3, 4, "Male", pd.Interval(60, 70)], + [1, 2, 3, 4, "Male", pd.Interval(20, 30)], + [1, 20, 30, 40, "Male", pd.Interval(40, 50)], + [19, 2, 30, 43, "Male", pd.Interval(30, 40)], + [19, 13, 30, 4, "Male", pd.Interval(10, 20)], + [1, 2, 30, 4, "Female", pd.Interval(20, 30)], + [19, 20, 30, 40, "Female", pd.Interval(40, 50)], + [19, 2, 30, 4, "Female", pd.Interval(30, 40)], + ], + columns=["a", "b", "c", "d", "sex", "age"] + ) + model = MockModel() + + detector = FACTS( + clf=model, + prot_attr="sex", + categorical_features=["sex", "age"], + freq_itemset_min_supp=0.5, + feature_weights={f: 10 for f in X.columns}, + feats_not_allowed_to_change=[], + ) + detector.fit(X, verbose=False) + + expected_ifthens = { + Predicate.from_dict({"a": 19}): { + "Male": (2/3, [ + (Predicate.from_dict({"a": 21}), 1., 20.) + ]), + "Female": (2/3, [ + (Predicate.from_dict({"a": 21}), 1., 20.) + ]) + }, + Predicate.from_dict({"c": 30}): { + "Male": (1., [ + (Predicate.from_dict({"c": 3}), 1., 270.) + ]), + "Female": (1., [ + (Predicate.from_dict({"c": 3}), 1., 270.) + ]) + }, + Predicate.from_dict({"a": 19, "c": 30}): { + "Male": (2/3, [ + (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.) + ]), + "Female": (2/3, [ + (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.) + ]) + }, + } + + assert set(expected_ifthens.keys()) == set(detector.rules_by_if) + for ifclause, all_thens in expected_ifthens.items(): + assert detector.rules_by_if[ifclause] == all_thens + +def test_FACTS_bias_scan(): + class MockModel: + def predict(self, X: pd.DataFrame) -> np.ndarray: + ret = [] + for i, r in X.iterrows(): + if r["sex"] == "Female" and r["d"] < 15: + ret.append(0) + elif r["a"] > 20: + ret.append(1) + elif r["c"] < 15: + ret.append(1) + else: + ret.append(0) + return np.array(ret) + + X = pd.DataFrame( + [ + [21, 2, 3, 20, "Female", pd.Interval(60, 70)], + [21, 13, 3, 19, "Male", pd.Interval(60, 70)], + [25, 2, 7, 21, "Female", pd.Interval(60, 70)], + [21, 2, 3, 4, "Male", pd.Interval(60, 70)], + [1, 2, 3, 4, "Male", pd.Interval(20, 30)], + [1, 20, 30, 40, "Male", pd.Interval(40, 50)], + [19, 2, 30, 43, "Male", pd.Interval(30, 40)], + [19, 13, 30, 4, "Male", pd.Interval(10, 20)], + [1, 2, 30, 4, "Female", pd.Interval(20, 30)], + [19, 20, 30, 7, "Female", pd.Interval(40, 50)], + [19, 2, 30, 4, "Female", pd.Interval(30, 40)], + ], + columns=["a", "b", "c", "d", "sex", "age"] + ) + model = MockModel() + + most_biased_subgroups = FACTS_bias_scan( + X=X, + clf=model, + prot_attr="sex", + metric="equal-cost-of-effectiveness", + categorical_features=["sex", "age"], + freq_itemset_min_supp=0.5, + feature_weights={f: 10 for f in X.columns}, + feats_not_allowed_to_change=[], + viewpoint="macro", + sort_strategy="max-cost-diff-decr", + top_count=3, + phi=0.5, + verbose=False, + print_recourse_report=False, + ) + + # just so we can see them here + expected_ifthens = { + Predicate.from_dict({"a": 19}): { + "Male": (2/3, [ + (Predicate.from_dict({"a": 21}), 1., 20.) + ]), + "Female": (2/3, [ + (Predicate.from_dict({"a": 21}), 0., 20.) + ]) + }, + Predicate.from_dict({"c": 30}): { + "Male": (1., [ + (Predicate.from_dict({"c": 7}), 1., 230.), + (Predicate.from_dict({"c": 3}), 1., 270.), + ]), + "Female": (1., [ + (Predicate.from_dict({"c": 7}), 0., 230.), + (Predicate.from_dict({"c": 3}), 1., 270.), + ]) + }, + Predicate.from_dict({"a": 19, "c": 30}): { + "Male": (2/3, [ + (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.) + ]), + "Female": (2/3, [ + (Predicate.from_dict({"a": 21, "c": 3}), 0., 290.) + ]) + }, + } + expected_most_biased_subgroups = [ + ({"a": 19}, float("inf")), + ({"c": 30}, float("inf")), + ({"a": 19, "c": 30}, float("inf")), + ] + + assert len(most_biased_subgroups) == len(expected_most_biased_subgroups) + for g in expected_most_biased_subgroups: + assert g in most_biased_subgroups + for g in most_biased_subgroups: + assert g in expected_most_biased_subgroups diff --git a/tests/sklearn/facts/test_misc.py b/tests/sklearn/facts/test_misc.py index fb0023f4..a10e1a01 100644 --- a/tests/sklearn/facts/test_misc.py +++ b/tests/sklearn/facts/test_misc.py @@ -1,5 +1,3 @@ -from pprint import pprint - import numpy as np import pandas as pd From 34c0ffd74cefe7738e3b5065dac6e055ab9aea45 Mon Sep 17 00:00:00 2001 From: phantom-duck <37215464+phantom-duck@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:44:29 +0300 Subject: [PATCH 6/6] FACTS_bias_scan: test improvement Previously, the test case only had inf costs. Consequently, the exact values of the feature weights were not actually tested properly. Signed-off-by: phantom-duck <37215464+phantom-duck@users.noreply.github.com> --- tests/sklearn/facts/test_init.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/sklearn/facts/test_init.py b/tests/sklearn/facts/test_init.py index 6a6637e7..38a986ec 100644 --- a/tests/sklearn/facts/test_init.py +++ b/tests/sklearn/facts/test_init.py @@ -86,7 +86,10 @@ def predict(self, X: pd.DataFrame) -> np.ndarray: ret = [] for i, r in X.iterrows(): if r["sex"] == "Female" and r["d"] < 15: - ret.append(0) + if r["c"] < 5: + ret.append(1) + else: + ret.append(0) elif r["a"] > 20: ret.append(1) elif r["c"] < 15: @@ -101,7 +104,8 @@ def predict(self, X: pd.DataFrame) -> np.ndarray: [21, 13, 3, 19, "Male", pd.Interval(60, 70)], [25, 2, 7, 21, "Female", pd.Interval(60, 70)], [21, 2, 3, 4, "Male", pd.Interval(60, 70)], - [1, 2, 3, 4, "Male", pd.Interval(20, 30)], + [1, 2, 7, 4, "Male", pd.Interval(20, 30)], + [1, 2, 7, 40, "Female", pd.Interval(20, 30)], [1, 20, 30, 40, "Male", pd.Interval(40, 50)], [19, 2, 30, 43, "Male", pd.Interval(30, 40)], [19, 13, 30, 4, "Male", pd.Interval(10, 20)], @@ -155,14 +159,14 @@ def predict(self, X: pd.DataFrame) -> np.ndarray: (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.) ]), "Female": (2/3, [ - (Predicate.from_dict({"a": 21, "c": 3}), 0., 290.) + (Predicate.from_dict({"a": 21, "c": 3}), 1., 290.) ]) }, } expected_most_biased_subgroups = [ ({"a": 19}, float("inf")), - ({"c": 30}, float("inf")), - ({"a": 19, "c": 30}, float("inf")), + ({"c": 30}, 40.), + ({"a": 19, "c": 30}, 0.), ] assert len(most_biased_subgroups) == len(expected_most_biased_subgroups)