Skip to content

Commit

Permalink
BREAKING: drop support for all but RTTM files
Browse files Browse the repository at this point in the history
- setup: remove pyannote.parser requirement
- setup: switch to pyannote.database 1.6
- doc: update documentation

Deprecates pyannote/DEPRECATED-pyannote-parser#3
  • Loading branch information
hbredin committed Mar 12, 2019
1 parent 4ea95df commit 7c6126f
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 40 deletions.
5 changes: 2 additions & 3 deletions docs/source/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Here is an example use of the command line interface that is provided to solve t

.. code-block:: bash
$ pyannote.metrics.py diarization --subset=development Etape.SpeakerDiarization.TV hypothesis.mdtm
$ pyannote.metrics.py diarization --subset=development Etape.SpeakerDiarization.TV hypothesis.rttm
Diarization (collar = 0 ms) error purity coverage total correct % fa. % miss. % conf. %
-------------------------------------- ------- -------- ---------- -------- --------- ----- ------ ----- ------- ---- ------- -----
Expand Down Expand Up @@ -53,8 +53,7 @@ Results are both reported for each file in the selected subset, and aggregated i

As of March 2017, ``pyannote.database`` packages exist for the ETAPE corpus, the REPERE corpus, and the AMI corpus. As more people contribute new ``pyannote.database`` packages, they will be added to the `pyannote` ecosystem.


File formats
------------

While the MDTM file format is used in this example, several other file formats are available (and can be contributed) thanks to the internal use of the ``pyannote.parser` package.
Hypothesis files must use the [Rich Transcription Time Marked](https://web.archive.org/web/20170119114252/http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf) (RTTM) format.
97 changes: 62 additions & 35 deletions scripts/pyannote-metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# The MIT License (MIT)

# Copyright (c) 2017-2018 CNRS
# Copyright (c) 2017-2019 CNRS

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -30,10 +30,10 @@
Evaluation
Usage:
pyannote-metrics.py detection [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.mdtm>
pyannote-metrics.py segmentation [--subset=<subset> --tolerance=<seconds>] <database.task.protocol> <hypothesis.mdtm>
pyannote-metrics.py diarization [--subset=<subset> --greedy --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.mdtm>
pyannote-metrics.py identification [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.mdtm>
pyannote-metrics.py detection [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
pyannote-metrics.py segmentation [--subset=<subset> --tolerance=<seconds>] <database.task.protocol> <hypothesis.rttm>
pyannote-metrics.py diarization [--subset=<subset> --greedy --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
pyannote-metrics.py identification [--subset=<subset> --collar=<seconds> --skip-overlap] <database.task.protocol> <hypothesis.rttm>
pyannote-metrics.py spotting [--subset=<subset> --latency=<seconds>... --filter=<expression>...] <database.task.protocol> <hypothesis.json>
pyannote-metrics.py -h | --help
pyannote-metrics.py --version
Expand All @@ -53,16 +53,14 @@
-h --help Show this screen.
--version Show version.
All modes but "spotting" expect hypothesis using the MDTM file format.
MDTM files contain one line per speech turn, using the following convention:
All modes but "spotting" expect hypothesis using the RTTM file format.
RTTM files contain one line per speech turn, using the following convention:
<uri> 1 <start_time> <duration> speaker <confidence> <gender> <speaker_id>
SPEAKER {uri} 1 {start_time} {duration} <NA> <NA> {speaker_id} <NA> <NA>
* uri: file identifier (as given by pyannote.database protocols)
* start_time: speech turn start time in seconds
* duration: speech turn duration in seconds
* confidence: confidence score (can be anything, not used for now)
* gender: speaker gender (can be anything, not used for now)
* speaker_id: speaker identifier
"spotting" mode expects hypothesis using the following JSON file format.
Expand Down Expand Up @@ -104,8 +102,8 @@
from tabulate import tabulate
# import multiprocessing as mp

# use for parsing hypothesis file
from pyannote.parser import MagicParser
from pyannote.core import Annotation
from pyannote.database.util import load_rttm

# evaluation protocols
from pyannote.database import get_protocol
Expand Down Expand Up @@ -140,31 +138,48 @@ def showwarning(message, category, *args, **kwargs):

warnings.showwarning = showwarning

def get_hypothesis(hypotheses, item):

uri = item['uri']
def get_hypothesis(hypotheses, current_file):
"""Get hypothesis for given file
if uri in hypotheses.uris:
hypothesis = hypotheses(uri=uri)
else:
# if the exact 'uri' is not available in hypothesis,
# look for matching substring
tmp_uri = [u for u in hypotheses.uris if u in uri]
if len(tmp_uri) == 0:
msg = 'Could not find hypothesis for file "{uri}".'
raise ValueError(msg.format(uri=uri))
elif len(tmp_uri) > 1:
msg = 'Found too many hypotheses matching file "{uri}" ({uris}).'
raise ValueError(msg.format(uri=uri, uris=tmp_uri))
else:
tmp_uri = tmp_uri[0]
msg = 'Could not find hypothesis for file "{uri}"; using "{tmp_uri}" instead.'
warnings.warn(msg.format(tmp_uri=tmp_uri, uri=uri))
Parameters
----------
hypotheses : `dict`
Speaker diarization hypothesis provided by `load_rttm`.
current_file : `dict`
File description as given by pyannote.database protocols.
Returns
-------
hypothesis : `pyannote.core.Annotation`
Hypothesis corresponding to `current_file`.
"""

uri = current_file['uri']

if uri in hypotheses:
return hypotheses[uri]

# if the exact 'uri' is not available in hypothesis,
# look for matching substring
tmp_uri = [u for u in hypotheses if u in uri]

hypothesis = hypotheses(uri=tmp_uri)
# no matching speech turns. return empty annotation
if len(tmp_uri) == 0:
msg = 'Could not find hypothesis for file "{uri}"; assuming empty file.'
warnings.warn(msg)
return Annotation(uri=uri, modality='speaker')

# exactly one matching file. return it
if len(tmp_uri) == 1:
hypothesis = hypotheses[tmp_uri[0]]
hypothesis.uri = uri
return hypothesis

# more that one matching file. error.
msg = 'Found too many hypotheses matching file "{uri}" ({uris}).'
raise ValueError(msg.format(uri=uri, uris=tmp_uri))

return hypothesis

def process_one(item, hypotheses=None, metrics=None):
reference = item['annotation']
Expand Down Expand Up @@ -552,9 +567,21 @@ def spotting(protocol, subset, latencies, hypotheses, output_prefix,

sys.exit(0)

# hypothesis
hypothesis_mdtm = arguments['<hypothesis.mdtm>']
hypotheses = MagicParser().read(hypothesis_mdtm, modality='speaker')
hypothesis_rttm = arguments['<hypothesis.rttm>']

try:
hypotheses = load_rttm(hypothesis_rttm)

except FileNotFoundError:
msg = f'Could not find file {hypothesis_rttm}.'
sys.exit(msg)

except:
msg = (
f'Failed to load {hypothesis_rttm}, please check its format '
f'(only RTTM files are supported).'
)
sys.exit(msg)

if arguments['detection']:
detection(protocol, subset, hypotheses,
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
'scripts/pyannote-metrics.py',
],
install_requires=[
'pyannote.database >= 1.5.1',
'pyannote.parser >= 0.7.1',
'pyannote.core >= 2.1',
'pyannote.database >= 1.6',
'pandas >= 0.19',
'scipy >= 0.10.0',
'scikit-learn >= 0.17.1',
Expand Down

0 comments on commit 7c6126f

Please sign in to comment.