-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjaccard.py
134 lines (118 loc) · 4.32 KB
/
jaccard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from base import BaseComparisons
class Jaccard(BaseComparisons):
"""Class to calculate the Jaccard index.
n=2 formula:
(3 * a)/(3 * a + b + c)
Attributes
----------
fingerprints : np.ndarray
Numpy array with the fingerprints that will be compared.
The fingerprints must be also given as Numpy arrays.
c_threshold : {None, 'dissimilar', int}
Coincidence threshold.
Properties
----------
n_fingerprints : int
Number of fingerprints that will be compared.
Methods
-------
__init__(self, fingerprints, c_threshold=None, w_factor="fraction")
Initialize the object.
assign_fingerprints(fingerprints)
Assign fingerprints.
assign_c_threshold(c_threshold)
Assign coincidence threshold.
matches()
Calculate the matches between the fingerprints.
set_d_vector()
Calculate the d vector.
set_w_factor(w_factor)
Calculate weight factors.
set_weighted_matches()
Calculate weighted matches.
set_a()
Calculate the (unweighted) 1-similarity counter.
set_d()
Calculate the (unweighted) 0-similarity counter.
set_weighted_a()
Calculate the (weighted) 1-similarity counter.
set_weighted_d()
Calculate the (weighted) 0-similarity counter.
set_dis_counters()
Calculate the (unweighted) dissimilarity counters.
set_weighted_dis_counters()
Calculate the (weighted) dissimilarity counters.
set_total_sim_counter()
Calculate the total number of (unweighted) similarity counters.
set_total_weighted_sim_counter()
Calculate the total number of (unweighted) similarity counters.
total_dis_counters()
Calculate total number of (unweighted) dissimilarity counters.
total_weighted_dis_counters()
Calculate total number of (weighted) dissimilarity counters.
set_p()
Calculate p.
set_weighted_p()
Calculate weighted p.
ja_sim_wdis
Calculate the index with sim-counters and with weighted denominator.
ja_1sim_wdis()
Calculate the index with 1-sim-counters and with weighted denominator.
ja_sim_dis()
Calculate the index with sim-counters and with unweighted denominator.
ja_1sim_dis()
Calculate the index with 1-sim-counters and with unweighted denominator.
"""
def __init__(self, fingerprints, c_threshold=None, w_factor="fraction"):
"""Initialize the object.
Parameters
----------
fingerprints : np.ndrarray
Numpy array with the fingerprints that will be compared.
The fingerprints must be also given as Numpy arrays.
c_threshold : {None, 'dissimilar', int}
Coincidence threshold.
w_factor : {"fraction", "power_n"}
Type of weight function that will be used.
"""
super().__init__(fingerprints, c_threshold, w_factor)
self.ja_sim_wdis()
self.ja_1sim_wdis()
self.ja_sim_dis()
self.ja_1sim_dis()
def ja_sim_wdis(self):
"""Calculate the index with sim-counters and with weighted denominator.
Note
----
(3 * (w_a + w_d))/(3 * (w_a + w_d) + w_b + w_c)
"""
numerator = 3 * self.total_w_sim
denominator = 3 * self.total_w_sim + self.total_w_dis
self.Ja_sim_wdis = numerator/denominator
def ja_1sim_wdis(self):
"""Calculate the index with 1-sim-counters and with weighted denominator.
Note
----
(3 * w_a)/(3 * w_a + w_b + w_c)
"""
numerator = 3 * self.w_a
denominator = 3 * self.w_a + self.total_w_dis
self.Ja_1sim_wdis = numerator/denominator
def ja_sim_dis(self):
"""Calculate the index with sim-counters and with unweighted denominator.
Note
----
(3 * (w_a + w_d))/(3 * (a + d) + b + c)
"""
numerator = 3 * self.total_w_sim
denominator = 3 * self.total_sim + self.total_dis
self.Ja_sim_dis = numerator/denominator
def ja_1sim_dis(self):
"""Calculate the index with 1-sim-counters and with unweighted denominator.
Note
----
(3 * w_a)/(3 * a + b + c)
"""
numerator = 3 * self.w_a
denominator = 3 * self.a + self.total_dis
self.Ja_1sim_dis = numerator/denominator