Skip to content

Commit

Permalink
🐛 Fix DONT_COMBINE_RES not doing anything
Browse files Browse the repository at this point in the history
  • Loading branch information
pajowu committed Nov 27, 2023
1 parent 1e78b2d commit 6182b6a
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
{
"input": [
{
"type": "paragraph",
"speaker": null,
"lang": "de",
"children": [
{
"text": "*",
"start": 0.0,
"end": 0.82,
"conf": 0.4493102431297302,
"conf_ts": 0.0
},
{
"text": "Klirren",
"start": 0.82,
"end": 1.0,
"conf": 0.9744400978088379,
"conf_ts": 0.005903157405555248
},
{
"text": "*",
"start": 1.0,
"end": 1.07,
"conf": 0.9744400978088379,
"conf_ts": 0.005903157405555248
}
]
},
{
"type": "paragraph",
"speaker": null,
"lang": "de",
"children": [
{
"text": "Ich ",
"start": 1.07,
"end": 1.65,
"conf": 0.9838394522666931,
"conf_ts": 0.01149927917867899
},
{
"text": "will ",
"start": 1.65,
"end": 2.0,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
},
{
"text": "Infos, ",
"start": 2.0,
"end": 2.07,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
}
]
},
{
"type": "paragraph",
"speaker": null,
"lang": "de",
"children": [
{
"text": "Ich ",
"start": 2.07,
"end": 2.65,
"conf": 0.9838394522666931,
"conf_ts": 0.01149927917867899
},
{
"text": "will ",
"start": 2.65,
"end": 3.06,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
},
{
"text": "Fakten, ",
"start": 3.06,
"end": 3.09,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
}
]
}
],
"expected": [
{
"type": "paragraph",
"speaker": null,
"lang": "de",
"children": [
{
"text": "*",
"start": 0.0,
"end": 0.82,
"conf": 0.4493102431297302,
"conf_ts": 0.0
},
{
"text": "Klirren",
"start": 0.82,
"end": 1.0,
"conf": 0.9744400978088379,
"conf_ts": 0.005903157405555248
},
{
"text": "*",
"start": 1.0,
"end": 1.07,
"conf": 0.9744400978088379,
"conf_ts": 0.005903157405555248
}
]
},
{
"type": "paragraph",
"speaker": null,
"lang": "de",
"children": [
{
"text": "Ich ",
"start": 1.07,
"end": 1.65,
"conf": 0.9838394522666931,
"conf_ts": 0.01149927917867899
},
{
"text": "will ",
"start": 1.65,
"end": 2.0,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
},
{
"text": "Infos, ",
"start": 2.0,
"end": 2.07,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
},
{
"text": "Ich ",
"start": 2.07,
"end": 2.65,
"conf": 0.9838394522666931,
"conf_ts": 0.01149927917867899
},
{
"text": "will ",
"start": 2.65,
"end": 3.06,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
},
{
"text": "Fakten, ",
"start": 3.06,
"end": 3.09,
"conf": 0.9566531777381897,
"conf_ts": 0.0096774036064744
}
]
}
]
}
5 changes: 3 additions & 2 deletions worker/tests/test_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def test_strict_sentence_paragraphs(data_file):
async_doc_chain_func_to_list(strict_sentence_paragraphs)(test_data.input)
)
)
assert [x.text() for x in output] == [x.text() for x in test_data.expected]
assert output == test_data.expected


Expand Down Expand Up @@ -103,6 +104,6 @@ def test_space_and_sentences(data_file):
)(test_data.input)
)
)
for p in output:
print(p.json())
# for p in output:
# print(p.json())
assert output == test_data.expected
2 changes: 1 addition & 1 deletion worker/transcribee_worker/whisper_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ async def strict_sentence_paragraphs(
)
acc_used_paras = []

elif any(regex.search(paragraph.text()) for regex in DONT_COMBINE_RES):
if any(regex.search(paragraph.text()) for regex in DONT_COMBINE_RES):
if acc_paragraph.children:
yield acc_paragraph
acc_paragraph = None
Expand Down

0 comments on commit 6182b6a

Please sign in to comment.