Skip to content

Commit

Permalink
feat: add option to skip RTTM lines based on "type" field
Browse files Browse the repository at this point in the history
Related: #86
  • Loading branch information
hbredin committed Sep 30, 2022
1 parent 11d8dcb commit 48c7e98
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions pyannote/database/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,16 @@ def get_label_identifier(label, current_file):
return database + "|" + label


def load_rttm(file_rttm):
def load_rttm(file_rttm, keep_type="SPEAKER"):
"""Load RTTM file
Parameter
---------
file_rttm : `str`
Path to RTTM file.
keep_type : str, optional
Only keep lines with this type (field #1 in RTTM specs).
Defaults to "SPEAKER".
Returns
-------
Expand All @@ -291,7 +294,7 @@ def load_rttm(file_rttm):
"""

names = [
"NA1",
"type",
"uri",
"NA2",
"start",
Expand All @@ -308,13 +311,15 @@ def load_rttm(file_rttm):
names=names,
dtype=dtype,
delim_whitespace=True,
keep_default_na=False,
keep_default_na=True,
)

annotations = dict()
for uri, turns in data.groupby("uri"):
annotation = Annotation(uri=uri)
for i, turn in turns.iterrows():
if turn.type != keep_type:
continue
segment = Segment(turn.start, turn.start + turn.duration)
annotation[segment, i] = turn.speaker
annotations[uri] = annotation
Expand Down

0 comments on commit 48c7e98

Please sign in to comment.