Skip to content

Commit

Permalink
Add audmetric.linkability() (#40)
Browse files Browse the repository at this point in the history
* Add audmetric.linkability()

* Add doc entry

* Extend docstring

* Update docstring

* Add tests

* Make sure no DOI is link checked

* Use generator function with random_state

* Add reference scores

* Fix empty line in refs.bib

* Fix missing omega

* Remove custom estimation of bins

* Remove test for 50 samples (no ground truth)

* Fix tests

* Update docstring

* Remove comment

* Use tmp file and store final results

* Mention results

* Add comment describing tests

* Add commas

* Update import
  • Loading branch information
hagenw authored May 2, 2023
1 parent 23fdfc5 commit 70e5362
Show file tree
Hide file tree
Showing 11 changed files with 609 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ venv/
__init__.pyc
coverage.xml
docs/api/
tests/assests/linkability/anonymization_metrics/
1 change: 1 addition & 0 deletions audmetric/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
equal_error_rate,
event_error_rate,
fscore_per_class,
linkability,
mean_absolute_error,
mean_squared_error,
pearson_cc,
Expand Down
215 changes: 194 additions & 21 deletions audmetric/core/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections import namedtuple
import collections
import operator
import typing
import warnings
Expand Down Expand Up @@ -246,24 +246,11 @@ def detection_error_tradeoff(
(array([1., 0.]), array([0., 0.]), array([0.1, 0.9]))
""" # noqa: E501
truth = np.array(truth)

allowed_truth_values = set([1, 0, True, False])
if not set(truth).issubset(allowed_truth_values):
raise ValueError(
"'truth' is only allowed to contain "
"[1, 0, True, False], "
'yours contains:\n'
f"[{', '.join([str(t) for t in set(truth)])}]"
)

truth = truth.astype(bool)
prediction = np.array(prediction).astype(np.float64)

# Genuine matching scores
gscores = prediction[truth]
# Impostor matching scores
iscores = prediction[~truth]
# Get mated scores
# (genuine matching scores)
# and non-mated scores
# (impostor matching scores)
gscores, iscores = _matching_scores(truth, prediction)

gscores_number = len(gscores)
iscores_number = len(iscores)
Expand Down Expand Up @@ -357,7 +344,7 @@ def equal_error_rate(
typing.Union[bool, int, float],
typing.Sequence[typing.Union[bool, int, float]]
],
) -> typing.Tuple[float, namedtuple]:
) -> typing.Tuple[float, collections.namedtuple]:
r"""Equal error rate for verification tasks.
The equal error rate (EER) is the point
Expand Down Expand Up @@ -422,7 +409,7 @@ def equal_error_rate(
0.5
"""
Stats = namedtuple(
Stats = collections.namedtuple(
'stats',
[
'fmr', # False match rates (FMR)
Expand Down Expand Up @@ -575,6 +562,116 @@ def fscore_per_class(
return fscore


def linkability(
truth: typing.Union[
typing.Union[bool, int],
typing.Sequence[typing.Union[bool, int]]
],
prediction: typing.Union[
typing.Union[bool, int, float],
typing.Sequence[typing.Union[bool, int, float]]
],
omega: float = 1.0,
nbins: int = None,
) -> float:
r"""Linkability for verification tasks.
Let :math:`s` be the provided prediction score
for the similarity of the tested sample.
The clipped local linkability metric is then defined as:
.. math::
\text{max}(0, p(\text{mated} | s) - p(\text{non-mated} | s))
The higher the value,
the more likely
that an attacker can link two mated samples.
The global linkability metric :math:`D_\text{sys}`
is the mean value
over all local scores,\ :footcite:`GomezBarrero2017`
and in the range :math:`0` and :math:`1`.
Implementation is based on
`code from M. Maouche`_,
which is licensed under LGPL.
.. footbibliography::
.. _code from M. Maouche: https://gitlab.inria.fr/magnet/anonymization_metrics
Args:
truth: ground truth classes
prediction: predicted classes or similarity scores
omega: prior ratio
:math:`\frac{p(\text{mated})}{p(\text{non-mated})}`
nbins: number of bins
of the histograms
that estimate the distributions
of mated and non-mated scores.
If ``None`` it is set to
:math:`\min(\frac{\text{len}(\text{mated})}{10}, 100)`
Returns:
global linkability :math:`D_\text{sys}`
Raises:
ValueError: if ``truth`` contains values
different from ``1``, ``0``, ``True``, ``False``
Examples:
>>> np.random.seed(1)
>>> samples = 10000
>>> truth = [1, 0] * int(samples / 2)
>>> prediction = []
>>> for _ in range(int(samples / 2)):
... prediction.extend(
... [np.random.uniform(0, 0.2), np.random.uniform(0.8, 1.0)]
... )
>>> linkability(truth, prediction)
0.9747999999999999
>>> truth = [1, 0, 0, 0] * int(samples / 4)
>>> prediction = [np.random.uniform(0, 1) for _ in range(samples)]
>>> linkability(truth, prediction, omega=1/3)
0.0
""" # noqa: E501
mated_scores, non_mated_scores = _matching_scores(truth, prediction)

# Limiting the number of bins
# (100 maximum or lower if few scores available)
if nbins is None:
nbins = min(int(len(mated_scores) / 10), 100)

# Define range of scores to compute D
bin_edges = np.linspace(
min([min(mated_scores), min(non_mated_scores)]),
max([max(mated_scores), max(non_mated_scores)]),
num=nbins + 1,
endpoint=True,
)
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2

# Compute score distributions using normalized histograms
y1 = np.histogram(mated_scores, bins=bin_edges, density=True)[0]
y2 = np.histogram(non_mated_scores, bins=bin_edges, density=True)[0]
# LR = P[s|mated ]/P[s|non-mated]
LR = np.divide(y1, y2, out=np.ones_like(y1), where=y2 != 0)
D = 2 * (omega * LR / (1 + omega*LR)) - 1
# Def of D
D[omega * LR <= 1] = 0
# Taking care of inf/NaN
mask = [
True if y2[i] == 0 and y1[i] != 0 else False
for i in range(len(y1))
]
D[mask] = 1
# Global measure using trapz numerical integration
Dsys = np.trapz(x=bin_centers, y=D * y1)

return Dsys


def mean_absolute_error(
truth: typing.Sequence[float],
prediction: typing.Sequence[float],
Expand Down Expand Up @@ -1142,3 +1239,79 @@ def word_error_rate(

num_samples = len(truth) if len(truth) > 1 else 1
return wer / num_samples


def _matching_scores(
truth: typing.Union[
typing.Union[bool, int],
typing.Sequence[typing.Union[bool, int]]
],
prediction: typing.Union[
typing.Union[bool, int, float],
typing.Sequence[typing.Union[bool, int, float]]
],
) -> typing.Tuple[np.ndarray, np.ndarray]:
r"""Mated and non-mated scores for verification tasks.
For verification task,
predictions are usually seperated
in all predictions belonging
to the matching examples,
and all other predictions.
The first are called mated scores
or genuine matching scores,
the second non-mated scores
or impostor matching scores.
For example,
in a speaker verification task
the mated scores are all similarity values
that belong to the matching speaker.
Args:
truth: ground truth classes
prediction: predicted classes or similarity scores
Returns:
* mated scores
* non-mated scores
Raises:
ValueError: if ``truth`` contains values
different from ``1, 0, True, False``
Examples:
>>> truth = [1, 0]
>>> prediction = [0.9, 0.1]
>>> _matching_scores(truth, prediction)
(array([0.9]), array([0.1]))
"""
truth = np.array(truth)

allowed_truth_values = set([1, 0, True, False])
if not set(truth).issubset(allowed_truth_values):
raise ValueError(
"'truth' is only allowed to contain "
"[1, 0, True, False], "
'yours contains:\n'
f"[{', '.join([str(t) for t in set(truth)])}]"
)

truth = truth.astype(bool)
prediction = np.array(prediction).astype(np.float64)

# Predictions for all matching examples
# (truth is 1 or True)
# In literature these are called
# "genuine matching scores"
# or "mated scores"
mated_scores = prediction[truth]
# Predictions for all non-matching examples
# (truth is 0 or False)
# In literature these are called
# "impostor matching scores"
# or "non-mated scores"
non_mated_scores = prediction[~truth]

return mated_scores, non_mated_scores
1 change: 1 addition & 0 deletions docs/api-src/audmetric.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ audmetric
edit_distance
equal_error_rate
event_error_rate
linkability
mean_absolute_error
mean_squared_error
pearson_cc
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@
autodoc_inherit_docstrings = False # disable docstring inheritance
bibtex_bibfiles = ['refs.bib']
# Don't check for DOIs as they will always work
# and tend to fail the test
linkcheck_ignore = [
'https://doi.org/10.2307/2532051',
'https://doi.org/10.1109/34.990140',
'https://doi.org/',
]

# Disable auto-generation of TOC entries in the API
Expand Down
10 changes: 10 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
@article{GomezBarrero2017,
author={Gomez-Barrero, M. and Galbally, J. and Rathgeb, C. and Busch, C.},
title={General framework to evaluate unlinkability in biometric template protection systems},
journal={IEEE Transactions on Information Forensics and Security},
volume={13},
issue={6},
pages={1406--1420},
doi={10.1109/TIFS.2017.2788000},
year={2017}
}
@article{Lin1989,
author={Lin, Lawrence I-Kuei},
title={A concordance correlation coefficient to evaluate reproducibility},
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ addopts =
--cov-fail-under=100
--cov-report xml
--cov-report term-missing
--ignore tests/assests/linkability/
xfail_strict = true

[flake8]
Expand Down
14 changes: 14 additions & 0 deletions tests/assests/linkability/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
To calculate the reference values run:

```bash
$ git clone https://gitlab.inria.fr/magnet/anonymization_metrics.git
$ cd anonymization_metrics
$ virtualenv --python=python3.8 env
$ source env/bin/activate
$ pip install -r requirements.txt
$ cd ../../../../
$ pip install -r requirements.txt
$ cd -
$ cd ..
$ python linkability_reference.py
```
Loading

0 comments on commit 70e5362

Please sign in to comment.