-
Notifications
You must be signed in to change notification settings - Fork 8
/
dataGenerator.py
90 lines (72 loc) · 4.35 KB
/
dataGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import random
import numpy as np
# a python script define algorithm to generate different rankings
# test of this script can be found in testDataGenerator.py
def generateUnfairRanking(_ranking,_protected_group,_fairness_probability):
"""
An algorithm for generating rankings with varying degree of fairness.
:param _ranking: A ranking
:param _protected_group: The protected group
:param _fairness_probability: The unfair degree, where 0 is most unfair (unprotected
group ranked first) and 1 is fair (groups are mixed randomly
in the output ranking)
:return: returns a ranking that has the specified degree of unfairness w.r.t.
the protected group
"""
# error handling for ranking and protected group
completeCheckRankingProperties(_ranking,_protected_group)
if not isinstance( _fairness_probability, ( int, long, float, complex ) ):
raise TypeError("Input fairness probability must be a number")
# error handling for value
if _fairness_probability > 1 or _fairness_probability < 0:
raise ValueError("Input fairness probability must be a number in [0,1]")
pro_ranking=[x for x in _ranking if x not in _protected_group] # partial ranking of protected member
unpro_ranking=[x for x in _ranking if x in _protected_group] # partial ranking of unprotected member
pro_ranking.reverse() #prepare for pop function to get the first element
unpro_ranking.reverse()
unfair_ranking=[]
while(len(unpro_ranking)>0 and len(pro_ranking)>0):
random_seed=random.random() # generate a random value in range [0,1]
if random_seed<_fairness_probability:
unfair_ranking.append(unpro_ranking.pop()) # insert protected group first
else:
unfair_ranking.append(pro_ranking.pop()) # insert unprotected group first
if len(unpro_ranking)>0: # insert the remain unprotected member
unpro_ranking.reverse()
unfair_ranking=unfair_ranking+unpro_ranking
if len(pro_ranking)>0: # insert the remain protected member
pro_ranking.reverse()
unfair_ranking=unfair_ranking+pro_ranking
if len(unfair_ranking)<len(_ranking): # check error for insertation
print "Error!"
return unfair_ranking
# Function for error handling
def completeCheckRankingProperties(_ranking,_protected_group):
"""
Check whether input ranking and protected group is valid.
:param _ranking: A ranking
:param _protected_group: The protected group
:return: no returns. Raise errors if founded.
"""
# error handling for input type
if not isinstance(_ranking, (list, tuple, np.ndarray)) and not isinstance( _ranking, basestring ):
raise TypeError("Input ranking must be a list-wise structure defined by '[]' symbol")
if not isinstance(_protected_group, (list, tuple, np.ndarray)) and not isinstance( _protected_group, basestring ):
raise TypeError("Input protected group must be a list-wise structure defined by '[]' symbol")
user_N=len(_ranking)
pro_N=len(_protected_group)
# error handling for input value
if user_N <= 0: # check size of input ranking
raise ValueError("Please input a valid ranking")
if pro_N <= 0: # check size of input ranking
raise ValueError("Please input a valid protected group whose length is larger than 0")
if pro_N >= user_N: # check size of protected group
raise ValueError("Please input a protected group with size less than total user")
if len(set(_ranking)) != user_N: # check for repetition in input ranking
raise ValueError("Please input a valid complete ranking")
if len(set(_protected_group)) != pro_N: # check repetition of protected group
raise ValueError("Please input a valid protected group that have no repetitive members")
if len(set(_protected_group).intersection(_ranking)) <=0: # check valid of protected group
raise ValueError("Please input a valid protected group that is a subset of total user")
if len(set(_protected_group).intersection(_ranking)) != pro_N: # check valid of protected group
raise ValueError("Please input a valid protected group that is a subset of total user")