Skip to content

Commit

Permalink
Add input data for simulations and supporting notebooks to create out…
Browse files Browse the repository at this point in the history
…put charts from results.
  • Loading branch information
mthielbar committed Dec 27, 2023
1 parent 83f1782 commit 377756a
Show file tree
Hide file tree
Showing 8 changed files with 10,719 additions and 11 deletions.
3,589 changes: 3,589 additions & 0 deletions examples/probabilistic_fairness/input_data/sampled_surrogate_inputs.csv

Large diffs are not rendered by default.

1,909 changes: 1,909 additions & 0 deletions examples/probabilistic_fairness/notebooks/analyze_prob_vs_model.ipynb

Large diffs are not rendered by default.

5,209 changes: 5,209 additions & 0 deletions examples/probabilistic_fairness/notebooks/analyze_sample_size_sim.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
sys.path.append('../../jurity/tests')
sys.path.append('../../jurity/jurity')
from jurity.fairness import BinaryFairnessMetrics as bfm

from test_utils_proba import UtilsProbaSimulator

output_path='~/Documents/data/jurity_tests/simulations/'

testing_simulation=False
n_runs=30
avg_counts=[30,50]
Expand All @@ -22,11 +24,11 @@
"moderately_unfair":moderately_unfair_sim,
"very_unfair":very_unfair_sim,
"extremely_unfair":extremely_unfair_sim}
surrogates=pd.read_csv('./supporting_data/surrogate_inputs.csv')
surrogates=pd.read_csv('../input_data/surrogate_inputs.csv')
if testing_simulation:
output_string = '~/Documents/data/jurity_tests/simulations/{0}_simulation_count_{1}_surrogates_{2}_test.csv'
output_string = output_path+'{0}_simulation_count_{1}_surrogates_{2}_test.csv'
else:
output_string = '~/Documents/data/jurity_tests/simulations/{0}_simulation_count_{1}_surrogates_{2}.csv'
output_string = output_path+'{0}_simulation_count_{1}_surrogates_{2}.csv'

def run_one_sim(simulator, membership_df,count_mean,rng=np.random.default_rng()):
membership_df["count"]=pd.Series(rng.poisson(lam=count_mean,size=membership_df.shape[0]))
Expand All @@ -36,7 +38,6 @@ def run_one_sim(simulator, membership_df,count_mean,rng=np.random.default_rng())
prob_metrics=bfm.get_all_scores(test_data["label"],test_data["prediction"],
membership_df.set_index("ZIP")[["not_protected","protected"]],
test_data["ZIP"],[1]).rename(columns={"Value":"probabilistic_estimate"})
#TODO: Add simulation for argmax
predicted_class=test_data[["not_protected","protected"]].values.tolist()
argmax_metrics=bfm.get_all_scores(test_data["label"].values,test_data["prediction"].values,
predicted_class).rename(columns={"Value":"argmax_estimate"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
sys.path.append('../../jurity/tests')
sys.path.append('../../jurity/jurity')
from jurity.fairness import BinaryFairnessMetrics as bfm

from test_utils_proba import UtilsProbaSimulator
output_path='~/Documents/data/jurity_tests/simulations/sample_size/min_weight_0/'
testing_simulation=False
n_runs=30
avg_counts=[5,10,20,30,40]
Expand All @@ -26,9 +26,9 @@
surrogates=pd.read_csv('./supporting_data/surrogate_inputs.csv')
surrogates["ZIP"]=surrogates["ZIP"].astype(int)
if testing_simulation:
output_string = '~/Documents/data/jurity_tests/simulations/sample_size/min_weight_0/{0}_simulation_count_{1}_test_surrogates_{2}.csv'
output_string = output_path+'{0}_simulation_count_{1}_test_surrogates_{2}.csv'
else:
output_string = '~/Documents/data/jurity_tests/simulations/sample_size/min_weight_0/{0}_simulation_count_{1}_surrogates_{2}.csv'
output_string = output_path+'sample_size/min_weight_0/{0}_simulation_count_{1}_surrogates_{2}.csv'

def run_one_sim(test_data,membership_df):
#Sometimes the sub-sampling leads to data errors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ This folder contains the files necessary to reproduce the results from
academic papers demonstrating the accuracy of probabilistic
fairness under different sample sizes. Probabilistic fairness is a technique
unique to jurity that allows users to calculate fairness metrics when protected status is
unknown but a <i>surrogate class</i> is available. A surrogate class is
an alternative feature that divides the population into groups and where the probability
of protected class membership is known given the surrogate class.
unknown but a surrogate class feature is available. A <i>surrogate</i> class divides
the population into groups where
the probability of protected class membership is known given the surrogate class membership.

Probabilistic fairness, its accuracy, and the simulation method used in
these demonstrations are detailed in
Expand Down

0 comments on commit 377756a

Please sign in to comment.