update

fidelity · Sep 6, 2023 · 10b1c9e · 10b1c9e
1 parent f5e91cd
commit 10b1c9e
Show file tree

Hide file tree

Showing 16 changed files with 296 additions and 201 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -3,10 +3,11 @@ CHANGELOG
 =========
 
 -------------------------------------------------------------------------------
-Feb 09, 2022 2.0.0
+Sep 09, 2022 2.0.0
 -------------------------------------------------------------------------------
 
-- Probabalistic fairness metrics are added based on membership likelihoods and surrogates.
+- Probabilistic fairness metrics are added based on membership likelihoods and surrogates --thanks to @mthielbar
+- Algorithm based on Surrogate Membership for Inferred Metrics in Fairness Evaluation (LION 2023)
 
 -------------------------------------------------------------------------------
 April 21, 2023 1.3.3

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -1,2 +1,2 @@
 # These owners will be the default owners for everything in the repo.
-*       @bkleyn @dorukkilitcioglu @filip_michalsky @mthielbar @skadio
+*       @bkleyn @skadio
diff --git a/README.md b/README.md
@@ -3,9 +3,12 @@
 
 # Jurity: Fairness & Evaluation Library
 
-Jurity is a research library that provides fairness metrics, recommender system evaluations, classification metrics and bias mitigation techniques. The library adheres to PEP-8 standards and is tested heavily.
+Jurity([LION'23](), [ICMLA'21](https://ieeexplore.ieee.org/document/9680169)) is a research library 
+that provides fairness metrics, recommender system evaluations, classification metrics and bias mitigation techniques. 
+The library adheres to PEP-8 standards and is tested heavily.
 
-Jurity is developed by the Artificial Intelligence Center of Excellence at Fidelity Investments. Documentation is available at [fidelity.github.io/jurity](https://fidelity.github.io/jurity).
+Jurity is developed by the Artificial Intelligence Center of Excellence at Fidelity Investments. 
+Documentation is available at [fidelity.github.io/jurity](https://fidelity.github.io/jurity).
 
 ## Fairness Metrics
 * [Average Odds](https://fidelity.github.io/jurity/about_fairness.html#average-odds)
@@ -50,7 +53,7 @@ from jurity.fairness import BinaryFairnessMetrics, MultiClassFairnessMetrics
 binary_predictions = [1, 1, 0, 1, 0, 0]
 multi_class_predictions = ["a", "b", "c", "b", "a", "a"]
 multi_class_multi_label_predictions = [["a", "b"], ["b", "c"], ["b"], ["a", "b"], ["c", "a"], ["c"]]
-is_member = [0, 0, 0, 1, 1, 1]
+memberships = [0, 0, 0, 1, 1, 1]
 classes = ["a", "b", "c"]
 
 # Metrics (see also other available metrics)
@@ -62,11 +65,46 @@ print("Metric:", metric.description)
 print("Lower Bound: ", metric.lower_bound)
 print("Upper Bound: ", metric.upper_bound)
 print("Ideal Value: ", metric.ideal_value)
-print("Binary Fairness score: ", metric.get_score(binary_predictions, is_member))
-print("Multi-class Fairness scores: ", multi_metric.get_scores(multi_class_predictions, is_member))
-print("Multi-class multi-label Fairness scores: ", multi_metric.get_scores(multi_class_multi_label_predictions, is_member))
+print("Binary Fairness score: ", metric.get_score(binary_predictions, memberships))
+print("Multi-class Fairness scores: ", multi_metric.get_scores(multi_class_predictions, memberships))
+print("Multi-class multi-label Fairness scores: ", multi_metric.get_scores(multi_class_multi_label_predictions, memberships))
 ```
 
+## Quick Start: Probabilistic Fairness Evaluation
+
+What if we do not know the protected membership attribute of each sample? 
+This is the case for _probabilistic_ fairness evaluation that we studied in 
+[Surrogate Membership for Inferred Metrics in Fairness Evaluation (LION 2023)](). 
+Instead of deterministic membership at individual level, 
+we assume access to its surrogate at the group level. 
+This surrogate information provides the probability of membership to each protected group. 
+We can then _infer_ the fairness metrics using a bootstrapping technique as follows: 
+
+```python
+# Import binary and multi-class fairness metrics
+from jurity.fairness import BinaryFairnessMetrics
+
+# Data
+binary_predictions = [1, 1, 0, 1]
+# We do not have access to "deterministic" 0/1 membership of each sample/individual, as before.
+# Instead, we have access to surrogate membership of each sample at the group level.
+# Within each surrogate group, we know the "probability" of membership to each protected class
+# Then, we have probabilistic membership for each sample and can calculate fairness metrics
+surrogates = [0, 2, 0, 1]
+memberships = [[0.2, 0.8], [0.4, 0.6], [0.2, 0.8], [0.9, 0.1]]
+
+# Metrics (see also other available metrics)
+metric = BinaryFairnessMetrics.StatisticalParity()
+
+# Scores
+print("Metric:", metric.description)
+print("Lower Bound: ", metric.lower_bound)
+print("Upper Bound: ", metric.upper_bound)
+print("Ideal Value: ", metric.ideal_value)
+print("Binary Fairness score: ", metric.get_score(binary_predictions, memberships))
+```
+
+
 ## Quick Start: Bias Mitigation
 
 ```python

diff --git a/jurity/fairness/__init__.py b/jurity/fairness/__init__.py
@@ -11,8 +11,8 @@
 
 from jurity.fairness.base import _BaseBinaryFairness
 from jurity.fairness.base import _BaseMultiClassMetric
-from jurity.utils import check_inputs, check_inputs_argmax,is_deterministic, check_inputs_proba
 from jurity.utils import Constants
+from jurity.utils import check_inputs, check_inputs_argmax, is_deterministic, check_inputs_proba
 from jurity.utils_proba import get_bootstrap_results
 from .average_odds import AverageOdds
 from .disparate_impact import BinaryDisparateImpact, MultiDisparateImpact
@@ -42,31 +42,35 @@ class BinaryFairnessMetrics(NamedTuple):
     def get_all_scores(labels: Union[List, np.ndarray, pd.Series],
                        predictions: Union[List, np.ndarray, pd.Series],
                        memberships: Union[List, np.ndarray, pd.Series],
-                       surrogates: Union[List, np.ndarray, pd.Series]=None,
+                       surrogates: Union[List, np.ndarray, pd.Series] = None,
                        membership_labels: Union[str, float, int, List, np.array] = 1) -> pd.DataFrame:
         """
-        Calculates and tabulates all of the fairness metric scores.
+        Calculates and tabulates all fairness metric scores.
         Parameters
         ----------
+        labels: Union[List, np.ndarray, pd.Series]
+            Binary ground truth labels for each sample.
         predictions: Union[List, np.ndarray, pd.Series]
-            Binary predictions from some black-box classifier (0/1).
-            Binary prediction for each sample from a binary (0/1) lack-box classifier.
-        memberships: Union[List, np.ndarray, pd.Series, List[List], pd.DataFrame],
+            Binary prediction for each sample from a black-box classifier binary (0/1).
+        memberships: Union[List, np.ndarray, pd.Series, List[List], pd.DataFrame]
             Membership attribute for each sample.
-                If deterministic, it is a binary label for each sample [0, 1, 0, .., 1]
-                If probabilistic, it is the likelihoods array of membership labels for each sample. [[0.6, 0.2, 0.2], .., [..]]
+                If deterministic, it is the binary label for each sample [0, 1, 0, ..., 1]
+                If probabilistic, it is the likelihoods array of membership labels
+                                  for each sample, i.e., a two-dim array [[0.6, 0.2, 0.2], ..., [..]]
         surrogates: Union[List, np.ndarray, pd.Series]
             Surrogate class attribute for each sample.
                 If the membership is deterministic, surrogates are not needed.
                 If the membership is probabilistic,
-                    - if surrogates are given, inferred metrics are used to calculate the fairness metric as proposed in [1]_.
-                    - when surrogates are not given, the arg max likelihood is considered as the membership for each sample.
+                    - if surrogates are given, inferred metrics are used
+                                               to calculate the fairness metric as proposed in [1]_.
+                    - when surrogates are not given, the arg max likelihood is used as the membership for each sample.
             Default is None.
         membership_labels: Union[int, float, str, List[int],np.array[int]]
             Labels indicating group membership.
-                If the membership is deterministic, a single str/int is expected, e.g., 1. Default is 1.
+                If the membership is deterministic, a single str/int is expected, e.g., 1.
                 If the membership is probabilistic, a list or np.array of int is expected,
-                    with the positions of the protected groups in the memberships vectors (e.g, [1, 2, 3]
+                                                    with the index of the protected groups in the memberships array,
+                                                    e.g, [1, 2, 3], if 1-2-3 indexes are protected.
                 Default value is 1.
         Returns
         ----------
@@ -85,10 +89,9 @@ def get_all_scores(labels: Union[List, np.ndarray, pd.Series],
         df = pd.DataFrame(columns=["Metric", "Value", "Ideal Value", "Lower Bound", "Upper Bound"])
 
         if not is_deterministic(memberships) and surrogates is not None:
-            bootstrap_results=get_bootstrap_results(predictions,memberships,surrogates,
-                          membership_labels, labels)
+            bootstrap_results = get_bootstrap_results(predictions, memberships, surrogates, membership_labels, labels)
         else:
-            bootstrap_results=None
+            bootstrap_results = None
 
         for fairness_func in fairness_funcs:
 
@@ -97,12 +100,12 @@ def get_all_scores(labels: Union[List, np.ndarray, pd.Series],
             instance = class_()  # dynamically instantiate such class
 
             if bootstrap_results is not None and name in Constants.bootstrap_implemented:
-                if name in ["PredictiveEquality","AverageOdds","FNRDifference"]:
-                    score=instance.get_score(labels,predictions,memberships,membership_labels,bootstrap_results)
-                elif name=="StatisticalParity":
+                if name in ["PredictiveEquality", "AverageOdds","FNRDifference"]:
+                    score = instance.get_score(labels,predictions,memberships,membership_labels,bootstrap_results)
+                elif name == "StatisticalParity":
                     score = instance.get_score(predictions, memberships, membership_labels, bootstrap_results)
                 else:
-                    score=None
+                    score = None
             elif name in ["DisparateImpact", "StatisticalParity"]:
                 score = instance.get_score(predictions, memberships, membership_labels)
             elif name in ["GeneralizedEntropyIndex", "TheilIndex"]:
@@ -113,8 +116,7 @@ def get_all_scores(labels: Union[List, np.ndarray, pd.Series],
             if score is None:
                 score = np.nan
             score = np.round(score, 3)
-            df = pd.concat([df, pd.DataFrame(
-                [[instance.name, score, instance.ideal_value, instance.lower_bound, instance.upper_bound]],
+            df = pd.concat([df, pd.DataFrame([[instance.name, score, instance.ideal_value, instance.lower_bound, instance.upper_bound]],
                 columns=df.columns)], axis=0, ignore_index=True)
 
         df = df.set_index("Metric")

diff --git a/jurity/fairness/average_odds.py b/jurity/fairness/average_odds.py
@@ -9,11 +9,11 @@
 import pandas as pd
 
 from jurity.fairness.base import _BaseBinaryFairness
-from jurity.utils import check_and_convert_list_types
+from jurity.utils import check_and_convert_list_types, Constants
 from jurity.utils import check_inputs, is_deterministic
 from jurity.utils import performance_measures, calc_is_member
-from jurity.utils_proba import get_bootstrap_results, unpack_bootstrap
 from jurity.utils import split_array_based_on_membership_label
+from jurity.utils_proba import get_bootstrap_results, unpack_bootstrap
 
 
 class AverageOdds(_BaseBinaryFairness):
@@ -64,10 +64,10 @@ def get_score(labels: Union[List, np.ndarray, pd.Series],
             Default is None.
         membership_labels: Union[int, float, str, List[int] np.array[int]]
             Labels indicating group membership.
-                If the membership is deterministic, a single str/int is expected, e.g., 1. Default is 1.
+                If the membership is deterministic, a single str/int is expected, e.g., 1.
                 If the membership is probabilistic, a list of int or np.array of int is expected,
-                    with the positions of the protected groups in the memberships vectors (e.g, [1, 2, 3])
-                Default value is 1.
+                    with the index of the protected groups in the memberships vectors (e.g, [1, 2, 3])
+                Default value is 1 for deterministic case or [1] for probabilistic case.
         bootstrap_results: Optional[pd.DataFrame]
             A Pandas dataframe with inferred scores based surrogate class memberships.
             Default value is None.
@@ -104,9 +104,12 @@ def get_score(labels: Union[List, np.ndarray, pd.Series],
             tpr_group_1 = results_group_1["TPR"]
             tpr_group_2 = results_group_2["TPR"]
         else:
+            if membership_labels == 1:
+                membership_labels = [1]
+
             if bootstrap_results is None:
                 bootstrap_results = get_bootstrap_results(predictions, memberships, surrogates, membership_labels, labels)
-            tpr_group_1, tpr_group_2 = unpack_bootstrap(bootstrap_results, "TPR", membership_labels)
-            fpr_group_1, fpr_group_2 = unpack_bootstrap(bootstrap_results, "FPR", membership_labels)
+            tpr_group_1, tpr_group_2 = unpack_bootstrap(bootstrap_results, Constants.TPR, membership_labels)
+            fpr_group_1, fpr_group_2 = unpack_bootstrap(bootstrap_results, Constants.FPR, membership_labels)
 
         return 0.5 * (fpr_group_1 - fpr_group_2) + 0.5 * (tpr_group_1 - tpr_group_2)
diff --git a/jurity/fairness/equal_opportunity.py b/jurity/fairness/equal_opportunity.py
@@ -9,11 +9,11 @@
 import pandas as pd
 
 from jurity.fairness.base import _BaseBinaryFairness
-from jurity.utils import check_and_convert_list_types,calc_is_member
-from jurity.utils import check_inputs,is_deterministic
+from jurity.utils import check_and_convert_list_types, calc_is_member, Constants
+from jurity.utils import check_inputs, is_deterministic
 from jurity.utils import performance_measures
 from jurity.utils import split_array_based_on_membership_label
-from jurity.utils_proba import get_bootstrap_results,unpack_bootstrap
+from jurity.utils_proba import get_bootstrap_results, unpack_bootstrap
 
 
 class EqualOpportunity(_BaseBinaryFairness):
@@ -56,10 +56,10 @@ def get_score(labels: Union[List, np.ndarray, pd.Series],
             Default is None.
         membership_labels: Union[int, float, str, List[int] np.array[int]]
             Labels indicating group membership.
-                If the membership is deterministic, a single str/int is expected, e.g., 1. Default is 1.
+                If the membership is deterministic, a single str/int is expected, e.g., 1.
                 If the membership is probabilistic, a list of int or np.array of int is expected,
-                    with the positions of the protected groups in the memberships vectors (e.g, [1, 2, 3])
-                Default value is 1.
+                    with the index of the protected groups in the memberships vectors (e.g, [1, 2, 3])
+                Default value is 1 for deterministic case or [1] for probabilistic case.
         bootstrap_results: Optional[pd.DataFrame]
             A Pandas dataframe with inferred scores based surrogate class memberships.
             Default value is None.
@@ -70,7 +70,7 @@ def get_score(labels: Union[List, np.ndarray, pd.Series],
         """
 
         # Logic to check input types.
-        if is_deterministic(memberships)  or (surrogates is None and bootstrap_results is None):
+        if is_deterministic(memberships) or (surrogates is None and bootstrap_results is None):
             check_inputs(predictions, memberships, membership_labels, must_have_labels=True, labels=labels)
             # Convert to numpy arrays
             is_member = calc_is_member(memberships, membership_labels, predictions)
@@ -82,18 +82,21 @@ def get_score(labels: Union[List, np.ndarray, pd.Series],
                 split_array_based_on_membership_label(labels, is_member, membership_labels)
 
             if np.unique(labels[group_1_group_idx]).shape[0] == 1 or np.unique(labels[group_2_group_idx]).shape[0] == 1:
-                warnings.warn("Encountered homogeneous unary ground truth either in group 2/group 1 group. \
-                Equal Opportunity will be calculated but numpy will raise division by zero.")
-            elif np.unique(labels[group_1_group_idx]).shape[0] == 1 and \
-                np.unique(labels[group_2_group_idx]).shape[0] == 1:
-                warnings.warn("Encountered homogeneous unary ground truth in both group 1/group 2. \
-                          Equal Opportunity cannot be calculated.")
+                warnings.warn("Encountered homogeneous unary ground truth either in group 2/group 1 group. "
+                              "Equal Opportunity will be calculated but numpy will raise division by zero.")
+            elif (np.unique(labels[group_1_group_idx]).shape[0] == 1 and
+                  np.unique(labels[group_2_group_idx]).shape[0] == 1):
+                warnings.warn("Encountered homogeneous unary ground truth in both group 1/group 2. "
+                              "Equal Opportunity cannot be calculated.")
 
             tpr_group_1 = performance_measures(labels, predictions, group_1_group_idx, group_membership=True)["TPR"]
             tpr_group_2 = performance_measures(labels, predictions, group_2_group_idx, group_membership=True)["TPR"]
         else:
+            if membership_labels == 1:
+                membership_labels = [1]
+
             if bootstrap_results is None:
-                bootstrap_results=get_bootstrap_results(predictions, memberships, surrogates, membership_labels, labels)
-            tpr=bootstrap_results["TPR"]
-            tpr_group_1,tpr_group_2=unpack_bootstrap(bootstrap_results,"TPR",membership_labels)
+                bootstrap_results = get_bootstrap_results(predictions, memberships, surrogates, membership_labels, labels)
+            tpr_group_1, tpr_group_2=unpack_bootstrap(bootstrap_results, Constants.TPR, membership_labels)
+
         return tpr_group_1 - tpr_group_2