Skip to content

Commit

Permalink
Improvements and Bug Fixes for Probabilistic Fairness (#27)
Browse files Browse the repository at this point in the history
* Add test for get_all_scores

* Bug fix. EqualOpportunity should be included in get_all_scores.

* Small updates to test_utils_proba.py

* Rearrange simulation into its own class.

* Simulator is its own class. Simulator unit tests running clean.

* Small edits to test_utils_proba.py

* Fix small bug that occurs in summarizer when mambership_df has a surrogate column not named 'surrogate'. Update unit tests to fix error.

* Add tests for summarizer.

* Cleanup code after merging changes to fix summarizer bug.

* run_bootstrap was using incorrect class label function call.

* Clean up print statements in is_one_dimensional.

* Clean up deprecation warning caused by cvx.Variable returning a one-dimensional numpy array in EqualizedOdds bias mitigation. Convert to float when necessary.

* Turn off user warnings where possible in test_utils_proba.py. Warnings will still print during unit tests because higher-level API will not have the option to turn warnings off. This keeps the API cleaner.

* Update to utils_proba.py

* Edit comments in simulator.

* Update minimum weight to 5 rows, according to results from simulation experiment with min counts per surrogate.

* Make simulation dataframe large enough so values are not unstable and unit tests do not fail.

* Add simulation scripts and readme.md for probabilistic fairness.

* Update comments and readme.md

* Add descriptions and citations to readme

* Add input data for simulations and supporting notebooks to create output charts from results.

* update

* update

* update

* update

---------

Co-authored-by: mfthielb <[email protected]>
Co-authored-by: skadio <[email protected]>
  • Loading branch information
3 people authored Jan 25, 2024
1 parent fa968bc commit 87026e1
Show file tree
Hide file tree
Showing 5 changed files with 286 additions and 188 deletions.
17 changes: 15 additions & 2 deletions jurity/mitigation/equalized_odds.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,25 @@ def fit(self,
# Solve
prob.solve()

# Save fairness probabilities
# Save fairness probabilities (cvxpy value is a numpy array or None)
self.p2p_prob_0 = variables_0["p2p"].value
self.n2p_prob_0 = variables_0["n2p"].value
self.p2p_prob_1 = variables_1["p2p"].value
self.n2p_prob_1 = variables_1["n2p"].value

# Get the scalar/primitive value unless it is None
if isinstance(self.p2p_prob_0, np.ndarray):
self.p2p_prob_0 = self.p2p_prob_0[0]

if isinstance(self.n2p_prob_0, np.ndarray):
self.n2p_prob_0 = self.n2p_prob_0[0]

if isinstance(self.p2p_prob_1, np.ndarray):
self.p2p_prob_1 = self.p2p_prob_1[0]

if isinstance(self.n2p_prob_1, np.ndarray):
self.n2p_prob_1 = self.n2p_prob_1[0]

def fit_transform(self,
labels: Union[List, np.ndarray, pd.Series],
predictions: Union[List, np.ndarray, pd.Series],
Expand Down Expand Up @@ -227,7 +240,7 @@ def _get_variables(self, labels, likelihoods, predictions, group):
p2p = cvx.Variable(1)
n2p = cvx.Variable(1)
n2n = cvx.Variable(1) # trivially equals to 1 - n2p
p2n = cvx.Variable(1) # trivially equals to 1 - p2p
p2n = cvx.Variable(1) # trivially equals to 1 - p2p

# Baseline label-wise FNR, FPR, TPR, TNR for the group
tpr, fpr, tnr, fnr = self._get_label_wise_rates(labels, predictions)
Expand Down
6 changes: 3 additions & 3 deletions jurity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ def is_one_dimensional(array):
if isinstance(array, pd.Series) and array.dtype != 'object':
return True
elif type(array) == list:
print(array[0])
print(type(array[0]))
print(isinstance(array[0], np.ndarray))
#print(array[0])
#print(type(array[0]))
#print(isinstance(array[0], np.ndarray))
if type(array[0]) != list and (not isinstance(array[0], np.ndarray)):
return True
else:
Expand Down
38 changes: 20 additions & 18 deletions jurity/utils_proba.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def check_memberships_proba_df(memberships_df: pd.DataFrame, unique_surrogate_li
if membership_names is None:
membership_names = memberships_df.columns
sum_to_one = pd.Series(memberships_df.sum(axis=1)).apply(lambda x: math.isclose(x, 1.0))
check_true(len(unique_surrogate_list) == memberships_df.shape[0],
InputShapeError("", "Memberships dataframe must have one row per surrogate class."))
check_true(set(memberships_df.index.values) == unique_surrogate_list,
check_true(len(unique_surrogate_list) <= memberships_df.shape[0],
InputShapeError("", "Unique surrogates in cannot exceed the number surrogate memberships."))
check_true(unique_surrogate_list.issubset(memberships_df.index.values),
InputShapeError("", "Memberships dataframe must have an index with surrogate values"))
check_true(memberships_df.shape[1] == len(membership_names),
InputShapeError("", "Memberships dataframe must have one column per protected class name."))
Expand Down Expand Up @@ -464,7 +464,7 @@ def run_bootstrap(self, bootstrap_trials: int) -> pd.DataFrame:
if binary_metrics is not None:
all_model_results.append(pd.concat([binary_metrics, preds], axis=1))
else:
preds['class'] = self.class_labels()
preds['class'] = self.all_class_labels()
all_model_results.append(preds)
out_data = pd.concat(all_model_results, axis=0).reset_index().drop(["index"], axis=1)
return out_data
Expand Down Expand Up @@ -681,24 +681,24 @@ def get_W_array(self, df: pd.DataFrame) -> np.ndarray:
raise ValueError("weight name: {0} are not in dataframe.".format(self._weight_name))
return df[self._weight_name].to_numpy(dtype='f')

def get_bias_calculator(self, df: pd.DataFrame, min_weight: int = 30, weight_warnings: bool = True):
def get_bias_calculator(self, df: pd.DataFrame, min_weight: int = 5, weight_warnings: bool = True):
"""
Make bias calculator.
Arguments:
df: pd.DataFrame, summarized by surrogate class, with columns for confusion matrix and/or prediction percentages
min_weight: surrogate classes that are smaller than this value will be dropped.
weight_warnings: Whether to print warnings when too many rows are dropped from surrogate class matrix
"""
if min_weight < 10:
if min_weight < 5:
if weight_warnings:
warnings.warn("Recommended minimum count for surrogate class is 30. "
"Minimum weights of less than 10 will give unstable results.")
warnings.warn("Recommended minimum count for surrogate class is 5. "
"Minimum weights of less than 5 will give unstable results.")

if self.weight_name() in df.columns:
subset = df[df[self._weight_name] >= min_weight]
if weight_warnings:
print("{0} rows removed from datafame for insufficient weight values" \
.format(df.shape[0] - subset.shape[0]))
warnings.warn("{0} rows removed from datafame for insufficient weight values".format(
df.shape[0] - subset.shape[0]))
if subset.shape[0] < len(self.class_names()):
raise WeightTooLarge("Input dataframe does not have enough rows to estimate surrogate classes "
"reduce minimum weight.")
Expand Down Expand Up @@ -735,7 +735,8 @@ def summarize(cls,
memberships: Union[List, np.ndarray, pd.Series, pd.DataFrame],
surrogates: Union[List, np.ndarray, pd.Series],
labels: Union[List, np.ndarray, pd.Series] = None,
membership_names: List[str] = None) -> pd.DataFrame:
membership_names: List[str] = None,
warnings: bool = False) -> pd.DataFrame:
"""
Return a summary dataframe suitable for bootstrap calculations.
Arguments:
Expand Down Expand Up @@ -768,9 +769,8 @@ def summarize(cls,
# 2. A dataframe ttehat has a row for each surrogate class value and
# a column for each likelihood value. The dataframe must have surrogate class as an index.
if isinstance(memberships, pd.DataFrame):
membership_surrogates = pd.Series(memberships.index.values)
membership_surrogates.name = 'surrogates'
likes_df = pd.concat([membership_surrogates, memberships], axis=1)
name = memberships.index.name
likes_df = memberships.reset_index().rename(columns={name: 'surrogates'})
else:
if len(memberships) != df.shape[0]:
len_predictions = len(predictions)
Expand All @@ -791,7 +791,7 @@ def summarize(cls,
likes_df.columns = membership_names
likes_df = likes_df.reset_index()
summarizer = cls("surrogates", "surrogates", "predictions", true_name=label_name, test_names=test_names)
return summarizer.make_summary_data(perf_df=df, surrogate_df=likes_df)
return summarizer.make_summary_data(perf_df=df, surrogate_df=likes_df, warnings=warnings)

def __init__(self, surrogate_surrogate_col_name: str,
surrogate_perf_col_name: str,
Expand Down Expand Up @@ -896,7 +896,7 @@ def check_read_data(df: pd.DataFrame, needed_names: List[str], df_name: str, id_
n_unique_ids = df[id_col_name].nunique()
if not n_rows == n_unique_ids:
raise Warning(f"Number of unique ids in {df_name} is: {n_unique_ids} but number of rows is {n_rows}")
print(f"There are {n_rows} in {df_name}.")
# print(f"There are {n_rows} in {df_name}.")
names = df.columns
if not set(needed_names).issubset(set(names)):
raise ValueError("Some necessary columns not in {0} data: {1} are missing.".format(df_name, list(
Expand Down Expand Up @@ -981,7 +981,7 @@ def check_surrogate_confusion_matrix(self, confusion_df, merged_df):
# return False
return True

def make_summary_data(self, perf_df: pd.DataFrame, surrogate_df: pd.DataFrame = None):
def make_summary_data(self, perf_df: pd.DataFrame, surrogate_df: pd.DataFrame = None, warnings=True):
"""
Function that merges two dfs to make a surrogate-based summary file that includes confusion matrix ratios.
Arguments:
Expand All @@ -992,12 +992,13 @@ def make_summary_data(self, perf_df: pd.DataFrame, surrogate_df: pd.DataFrame =
self.check_surrogate_data(surrogate_df)
merged_data = perf_df.merge(surrogate_df, left_on=self.surrogate_perf_col_name(),
right_on=self.surrogate_surrogate_col_name())
self.check_merged_data(merged_data, perf_df)
self.check_merged_data(merged_data, perf_df, warnings)

# Create accuracy columns that measure true positive, true negative etc
accuracy_df = pd.concat([merged_data[self.surrogate_surrogate_col_name()],
self.confusion_matrix_actual(merged_data, self.pred_name(), self.true_name())], axis=1)
# Use calc_accuracy_metrics to create surrogate-level summary
# TODO: Accomodate cases where we don't have a binary classifier
confusion_matrix_surrogate_summary = self.calc_accuracy_metrics(accuracy_df)
self.check_surrogate_confusion_matrix(confusion_matrix_surrogate_summary, merged_data)
return confusion_matrix_surrogate_summary.join(
Expand Down Expand Up @@ -1068,3 +1069,4 @@ def calc_accuracy_metrics(self, test_df):
Constants.false_negative_ratio, Constants.false_positive_ratio]
# Return a dataframe that has the stats by group. Use these to compare to expected values
return check_accuracy[out_cols]
# TODO: Needs string method
38 changes: 8 additions & 30 deletions tests/test_mitigation_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,10 @@ def test_numerical_stability_mixing_rate_small(self):

mitigation.fit(labels, predictions, likelihoods, is_member)

p2p_prob_0 = mitigation.p2p_prob_0
n2p_prob_0 = mitigation.n2p_prob_0
p2p_prob_1 = mitigation.p2p_prob_1
n2p_prob_1 = mitigation.n2p_prob_1

# Convert types
p2p_prob_0 = p2p_prob_0.item()
n2p_prob_0 = n2p_prob_0.item()
p2p_prob_1 = p2p_prob_1.item()
n2p_prob_1 = n2p_prob_1.item()

self.assertAlmostEqual(p2p_prob_0, 0.8429378)
self.assertAlmostEqual(n2p_prob_0, 1.)
self.assertAlmostEqual(p2p_prob_1, 1.)
self.assertAlmostEqual(n2p_prob_1, 0.8893096)
self.assertAlmostEqual(mitigation.p2p_prob_0, 0.8429378)
self.assertAlmostEqual(mitigation.n2p_prob_0, 1.)
self.assertAlmostEqual(mitigation.p2p_prob_1, 1.)
self.assertAlmostEqual(mitigation.n2p_prob_1, 0.8893096)

def test_numerical_stability_mixing_rate_large(self):

Expand All @@ -183,21 +172,10 @@ def test_numerical_stability_mixing_rate_large(self):

mitigation.fit(labels, predictions, likelihoods, is_member)

p2p_prob_0 = mitigation.p2p_prob_0
n2p_prob_0 = mitigation.n2p_prob_0
p2p_prob_1 = mitigation.p2p_prob_1
n2p_prob_1 = mitigation.n2p_prob_1

# Convert types
p2p_prob_0 = p2p_prob_0.item()
n2p_prob_0 = n2p_prob_0.item()
p2p_prob_1 = p2p_prob_1.item()
n2p_prob_1 = n2p_prob_1.item()

self.assertAlmostEqual(p2p_prob_0, 0.819513)
self.assertAlmostEqual(n2p_prob_0, 1.)
self.assertAlmostEqual(p2p_prob_1, 0.644566)
self.assertAlmostEqual(n2p_prob_1, 1.)
self.assertAlmostEqual(mitigation.p2p_prob_0, 0.819513)
self.assertAlmostEqual(mitigation.n2p_prob_0, 1.)
self.assertAlmostEqual(mitigation.p2p_prob_1, 0.644566)
self.assertAlmostEqual(mitigation.n2p_prob_1, 1.)

def test_numerical_stability_bias_mitigation(self):

Expand Down
Loading

0 comments on commit 87026e1

Please sign in to comment.