Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ignore first pac command if there are two consecutive pac commands #314

Merged
merged 9 commits into from
Feb 20, 2024
3 changes: 3 additions & 0 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ def _handle_double_command(self, word):
if word == self.last_command:
self.last_command = ''
return True
elif _is_pac_command(word) and _is_pac_command(self.last_command):
Copy link
Contributor

@ana-nichifor ana-nichifor Nov 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic was used to ignore the second duplicate command, not the first as it seems to be your intention (I saw you wrote a test called test_skip_first_pac_command).

You go with the first command, it doesn’t find a previous double (last_command=‘’) so it returns False (as in the command is NOT a double) and it gets processed. This command is saved as last_command to be used as comparison for the next one.
Then the second command comes, it is compared to the first one (last_command=‘previousword’), it is identified as a double, (last command is being reset to avoid ignoring multiple commands) return True (the command IS a double) and doesn’t get processed.

I realise now that a better name for this would have been _is_double_command or _skip_command for it to be easier to understand

self.last_command = ''
return True
# Fix for the <position> <tab offset> <position> <tab offset>
# repetition
elif _is_pac_command(word) and word in self.last_command:
Expand Down
7 changes: 4 additions & 3 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,9 @@ def interpret_command(self, command):

:type command: str
"""
self._update_positioning(command)

if command not in ["9120", "91ae", "912f", "91a1"]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This list could be stored in a constant to be more suggestive of what it represents, you might not remember its purpose when coming back here after a while

self._update_positioning(command)
text = COMMANDS.get(command, '')

if 'italic' in text:
if 'end' not in text:
self._collection.append(
Expand All @@ -365,11 +364,13 @@ def _update_positioning(self, command):

:type command: str
"""

if command in PAC_TAB_OFFSET_COMMANDS:
tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
prev_positioning = self._position_tracer.default
positioning = (prev_positioning[0],
prev_positioning[1] + tab_offset)

else:
first, second = command[:2], command[2:]

Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@
sample_scc_with_ampersand_character, sample_scc_multiple_formats,
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
sample_scc_tab_offset, sample_scc_with_unknown_commands,
sample_scc_special_and_extended_characters
sample_scc_special_and_extended_characters,
sample_scc_with_consecutive_pac_commands
)
from tests.fixtures.srt import ( # noqa: F401
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
Expand Down
27 changes: 27 additions & 0 deletions tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,33 @@ def sample_scc_created_dfxp_with_wrongly_closing_spans():
"""


@pytest.fixture(scope="session")
def sample_scc_with_consecutive_pac_commands():
return """\
Scenarist_SCC V1.0

00:00:00;15 942c

00:11:45;10 9420 94d0 94ce 5b20 cec1 5252 c154 4f52 205d 9470 946e cd4f cecb 45d9 d320 4c4f d645 2054 c849 cec7 d320 54c8 c154 2046 4cd9 ae80 942c 8080 8080 942f

00:11:47;28 9420 9454 9723 d9c1 d9a1 94f4 9723 5b20 c84f 4f54 49ce c720 5d80 942c 8080 8080 942f

00:11:50;08 9420 94d0 94ce 45d3 d045 4349 c14c 4cd9 2049 4620 54c8 45d9 a752 4520 54c8 4520 4fce 45d3 9470 946e 57c8 4f20 c745 5420 544f 2046 4cd9 2054 c845 cdae 942c 8080 8080 942f

00:11:54;06 942c

00:23:00;13 9420 1370 136e 5b20 43c8 494c c420 5d80 94d0 94ce c745 4f52 c745 20cd c1c4 4520 c120 cdc1 43c8 49ce 4580 9470 946e 464f 5220 c84f 5749 4520 544f 942c 8080 8080 942f

00:23:02;04 9420 91d0 91ce 544f 2046 49ce c420 43d5 5249 4fd5 d320 c745 4f52 c745 9170 916e c1ce c420 c849 d320 4652 4945 cec4 d380 92d0 92ce 45d6 4552 d920 c4c1 d920 4fce 4c49 ce45 2c80 942c 8080 8080 942f

00:23:05;00 9420 9152 91ae d357 49ce c720 c2d9 20d0 c2d3 cb49 c4d3 ae4f 52c7 9170 916e 544f 20d0 4cc1 d920 46d5 ce20 c7c1 cd45 d320 c1ce c420 57c1 5443 c880 92d0 9723 91ae d94f d552 2046 c1d6 4f52 4954 4520 d649 c445 4fd3 ae80 942c 8080 8080 942f


00:23:05;00 9420 9152 91ae d357 49ce c720 c2d9 20d0 c2d3 cb49 c4d3 ae4f 52c7 9170 916e 544f 20d0 4cc1 d920 46d5 ce20 c7c1 cd45 d320 c1ce c420 57c1 5443 c880 92d0 9723 91ae d94f d552 2046 c1d6 4f52 4954 4520 d649 c445 4fd3 ae80 942c 8080 8080 942f

"""


@pytest.fixture(scope="session")
def scc_that_generates_webvtt_with_proper_newlines():
return """\
Expand Down
43 changes: 38 additions & 5 deletions tests/test_scc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from pytest_lazyfixture import lazy_fixture

from pycaption import SCCReader, CaptionReadNoCaptions, CaptionNode
from pycaption.exceptions import CaptionReadTimingError
Expand All @@ -23,11 +24,11 @@ def test_positive_answer_for_detection(self, sample_scc_pop_on):
super().assert_positive_answer_for_detection(sample_scc_pop_on)

@pytest.mark.parametrize('different_sample', [
pytest.lazy_fixture('sample_dfxp'),
pytest.lazy_fixture('sample_microdvd'),
pytest.lazy_fixture('sample_sami'),
pytest.lazy_fixture('sample_srt'),
pytest.lazy_fixture('sample_webvtt')
lazy_fixture('sample_dfxp'),
lazy_fixture('sample_microdvd'),
lazy_fixture('sample_sami'),
lazy_fixture('sample_srt'),
lazy_fixture('sample_webvtt')
])
def test_negative_answer_for_detection(self, different_sample):
super().assert_negative_answer_for_detection(different_sample)
Expand Down Expand Up @@ -237,6 +238,38 @@ def test_flashing_cue(self, sample_scc_flashing_cue):
assert exc_info.value.args[0].startswith(
"Unsupported cue duration around 00:00:20.433")

def test_skip_first_pac_command(self, sample_scc_with_consecutive_pac_commands):
caption_set = SCCReader().read(sample_scc_with_consecutive_pac_commands)
caption = caption_set.get_captions('en-US')
actual_lines = [
node.content
for cap_ in caption
for node in cap_.nodes
if node.type_ == CaptionNode.TEXT
]
expected_lines = [
'[ NARRATOR ]',
'MONKEYS LOVE THINGS THAT FLY.',
'YAY!',
'[ HOOTING ]',
"ESPECIALLY IF THEY'RE THE ONES",
'WHO GET TO FLY THEM.',
'[ CHILD ]',
'GEORGE MADE A MACHINE',
'FOR HOWIE TO',
'TO FIND CURIOUS GEORGE',
'AND HIS FRIENDS',
'EVERY DAY ONLINE,',
'SWING BY PBSKIDS.ORG',
'TO PLAY FUN GAMES AND WATCH',
'YOUR FAVORITE VIDEOS.',
'SWING BY PBSKIDS.ORG',
'TO PLAY FUN GAMES AND WATCH',
'YOUR FAVORITE VIDEOS.'
]
# is not breaking the lines
assert expected_lines == actual_lines


class TestCoverageOnly:
"""In order to refactor safely, we need coverage of 95% or more.
Expand Down
Loading