From be49cb2dba9a916f2035ecba8e3aff1cf09a3363 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Thu, 16 Nov 2023 17:18:39 +0200 Subject: [PATCH 1/7] ignore first pac command if there are two consecutive pac commands --- pycaption/scc/__init__.py | 3 +++ pycaption/scc/specialized_collections.py | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 39326f45..48c3e1b0 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -327,6 +327,9 @@ def _handle_double_command(self, word): if word == self.last_command: self.last_command = '' return True + elif _is_pac_command(word) and _is_pac_command(self.last_command): + self.last_command = '' + return True # Fix for the # repetition elif _is_pac_command(word) and word in self.last_command: diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index c67d36d0..184a650b 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -342,10 +342,9 @@ def interpret_command(self, command): :type command: str """ - self._update_positioning(command) - + if command not in ["9120", "91ae", "912f", "91a1"]: + self._update_positioning(command) text = COMMANDS.get(command, '') - if 'italic' in text: if 'end' not in text: self._collection.append( @@ -353,6 +352,8 @@ def interpret_command(self, command): self._position_tracer.get_current_position()) ) else: + if command == "91ae": + print(command) self._collection.append( _InstructionNode.create_italics_style( self._position_tracer.get_current_position(), @@ -365,11 +366,13 @@ def _update_positioning(self, command): :type command: str """ + if command in PAC_TAB_OFFSET_COMMANDS: tab_offset = PAC_TAB_OFFSET_COMMANDS[command] prev_positioning = self._position_tracer.default positioning = (prev_positioning[0], prev_positioning[1] + tab_offset) + else: first, second = command[:2], command[2:] From 8a5661ce5c9fda1d7daab8642411243580be8399 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 28 Nov 2023 15:56:14 +0200 Subject: [PATCH 2/7] add a test --- pycaption/scc/specialized_collections.py | 2 -- tests/conftest.py | 3 +- tests/fixtures/scc.py | 27 +++++++++++++++ tests/test_scc.py | 43 +++++++++++++++++++++--- 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index 184a650b..f3faecb1 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -352,8 +352,6 @@ def interpret_command(self, command): self._position_tracer.get_current_position()) ) else: - if command == "91ae": - print(command) self._collection.append( _InstructionNode.create_italics_style( self._position_tracer.get_current_position(), diff --git a/tests/conftest.py b/tests/conftest.py index 55b785fb..df15611e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,7 +60,8 @@ sample_scc_with_ampersand_character, sample_scc_multiple_formats, sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters, sample_scc_tab_offset, sample_scc_with_unknown_commands, - sample_scc_special_and_extended_characters + sample_scc_special_and_extended_characters, + sample_scc_with_consecutive_pac_commands ) from tests.fixtures.srt import ( # noqa: F401 sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty, diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index 34091f3e..329cddb5 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -24,6 +24,33 @@ def sample_scc_created_dfxp_with_wrongly_closing_spans(): """ +@pytest.fixture(scope="session") +def sample_scc_with_consecutive_pac_commands(): + return """\ +Scenarist_SCC V1.0 + +00:00:00;15 942c + +00:11:45;10 9420 94d0 94ce 5b20 cec1 5252 c154 4f52 205d 9470 946e cd4f cecb 45d9 d320 4c4f d645 2054 c849 cec7 d320 54c8 c154 2046 4cd9 ae80 942c 8080 8080 942f + +00:11:47;28 9420 9454 9723 d9c1 d9a1 94f4 9723 5b20 c84f 4f54 49ce c720 5d80 942c 8080 8080 942f + +00:11:50;08 9420 94d0 94ce 45d3 d045 4349 c14c 4cd9 2049 4620 54c8 45d9 a752 4520 54c8 4520 4fce 45d3 9470 946e 57c8 4f20 c745 5420 544f 2046 4cd9 2054 c845 cdae 942c 8080 8080 942f + +00:11:54;06 942c + +00:23:00;13 9420 1370 136e 5b20 43c8 494c c420 5d80 94d0 94ce c745 4f52 c745 20cd c1c4 4520 c120 cdc1 43c8 49ce 4580 9470 946e 464f 5220 c84f 5749 4520 544f 942c 8080 8080 942f + +00:23:02;04 9420 91d0 91ce 544f 2046 49ce c420 43d5 5249 4fd5 d320 c745 4f52 c745 9170 916e c1ce c420 c849 d320 4652 4945 cec4 d380 92d0 92ce 45d6 4552 d920 c4c1 d920 4fce 4c49 ce45 2c80 942c 8080 8080 942f + +00:23:05;00 9420 9152 91ae d357 49ce c720 c2d9 20d0 c2d3 cb49 c4d3 ae4f 52c7 9170 916e 544f 20d0 4cc1 d920 46d5 ce20 c7c1 cd45 d320 c1ce c420 57c1 5443 c880 92d0 9723 91ae d94f d552 2046 c1d6 4f52 4954 4520 d649 c445 4fd3 ae80 942c 8080 8080 942f + + +00:23:05;00 9420 9152 91ae d357 49ce c720 c2d9 20d0 c2d3 cb49 c4d3 ae4f 52c7 9170 916e 544f 20d0 4cc1 d920 46d5 ce20 c7c1 cd45 d320 c1ce c420 57c1 5443 c880 92d0 9723 91ae d94f d552 2046 c1d6 4f52 4954 4520 d649 c445 4fd3 ae80 942c 8080 8080 942f + +""" + + @pytest.fixture(scope="session") def scc_that_generates_webvtt_with_proper_newlines(): return """\ diff --git a/tests/test_scc.py b/tests/test_scc.py index df785142..057c3037 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -1,4 +1,5 @@ import pytest +from pytest_lazyfixture import lazy_fixture from pycaption import SCCReader, CaptionReadNoCaptions, CaptionNode from pycaption.exceptions import CaptionReadTimingError @@ -23,11 +24,11 @@ def test_positive_answer_for_detection(self, sample_scc_pop_on): super().assert_positive_answer_for_detection(sample_scc_pop_on) @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_srt'), - pytest.lazy_fixture('sample_webvtt') + lazy_fixture('sample_dfxp'), + lazy_fixture('sample_microdvd'), + lazy_fixture('sample_sami'), + lazy_fixture('sample_srt'), + lazy_fixture('sample_webvtt') ]) def test_negative_answer_for_detection(self, different_sample): super().assert_negative_answer_for_detection(different_sample) @@ -237,6 +238,38 @@ def test_flashing_cue(self, sample_scc_flashing_cue): assert exc_info.value.args[0].startswith( "Unsupported cue duration around 00:00:20.433") + def test_skip_first_pac_command(self, sample_scc_with_consecutive_pac_commands): + caption_set = SCCReader().read(sample_scc_with_consecutive_pac_commands) + caption = caption_set.get_captions('en-US') + actual_lines = [ + node.content + for cap_ in caption + for node in cap_.nodes + if node.type_ == CaptionNode.TEXT + ] + expected_lines = [ + '[ NARRATOR ]', + 'MONKEYS LOVE THINGS THAT FLY.', + 'YAY!', + '[ HOOTING ]', + "ESPECIALLY IF THEY'RE THE ONES", + 'WHO GET TO FLY THEM.', + '[ CHILD ]', + 'GEORGE MADE A MACHINE', + 'FOR HOWIE TO', + 'TO FIND CURIOUS GEORGE', + 'AND HIS FRIENDS', + 'EVERY DAY ONLINE,', + 'SWING BY PBSKIDS.ORG', + 'TO PLAY FUN GAMES AND WATCH', + 'YOUR FAVORITE VIDEOS.', + 'SWING BY PBSKIDS.ORG', + 'TO PLAY FUN GAMES AND WATCH', + 'YOUR FAVORITE VIDEOS.' + ] + # is not breaking the lines + assert expected_lines == actual_lines + class TestCoverageOnly: """In order to refactor safely, we need coverage of 95% or more. From 7d758a22121c47077993011f5cd22d0ac5858102 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 5 Dec 2023 15:26:52 +0200 Subject: [PATCH 3/7] change method name and extract unhandled commands into constamt --- pycaption/scc/__init__.py | 4 ++-- pycaption/scc/constants.py | 2 ++ pycaption/scc/specialized_collections.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 48c3e1b0..71cf9fb1 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -292,7 +292,7 @@ def _translate_line(self, line): self._translate_word(word) def _translate_word(self, word): - if self._handle_double_command(word): + if self._skip_double_command(word): # count frames for timing self.time_translator.increment_frames() return @@ -316,7 +316,7 @@ def _translate_word(self, word): # count frames for timing only after processing a command self.time_translator.increment_frames() - def _handle_double_command(self, word): + def _skip_double_command(self, word): # If the caption is to be broadcast, each of the commands are doubled # up for redundancy in case the signal is garbled in transmission. # The decoder is programmed to ignore a second command when it is the diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py index a8e16f23..eb123e0f 100644 --- a/pycaption/scc/constants.py +++ b/pycaption/scc/constants.py @@ -985,3 +985,5 @@ def _restructure_bytes_to_position_map(byte_to_pos_map): HEADER = 'Scenarist_SCC V1.0' + +UNHANDLED_COMMANDS = ["9120", "91ae", "912f", "91a1"] diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index f3faecb1..677e6661 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -8,7 +8,7 @@ ) from .constants import ( PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS, - MICROSECONDS_PER_CODEWORD, + MICROSECONDS_PER_CODEWORD, UNHANDLED_COMMANDS ) PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end") @@ -342,7 +342,7 @@ def interpret_command(self, command): :type command: str """ - if command not in ["9120", "91ae", "912f", "91a1"]: + if command not in UNHANDLED_COMMANDS: self._update_positioning(command) text = COMMANDS.get(command, '') if 'italic' in text: From 32bf95b3aeee3351bf6bb1a73b6863b2396d706f Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 16 Jan 2024 16:23:03 +0200 Subject: [PATCH 4/7] change the skip logic for offset commands --- pycaption/scc/__init__.py | 21 ++++++++++-------- tests/fixtures/scc.py | 2 +- tests/test_scc.py | 45 +++++++++++++++++++++++++++++++++------ 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 5793fa29..18ab2890 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -236,7 +236,7 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0): for caption in self.caption_stash._collection: caption_text = "".join(caption.to_real_caption().get_text_nodes()) lines.extend(caption_text.split("\n")) - lines_too_long = [line for line in lines if len(line) >= 32] + lines_too_long = [line for line in lines if len(line) > 32] if bool(lines_too_long): msg = "" @@ -347,28 +347,31 @@ def _skip_double_command(self, word, words, idx): # with only one member of each pair being displayed. next_command = self.get_command(words, idx + 1) second_next = self.get_command(words, idx + 2) + prev_command = self.get_command(words, idx - 1) + if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS: - # skip duplicates, execute the last occurrence - if word == next_command: + # skip duplicates, execute the last occurrence if not a positioning command + if word == self.last_command and not _is_pac_command(word): self.last_command = '' return True - # Fix for the to execute only the last one + # skip consecutive positioning commands, execute the last one elif _is_pac_command(word) and _is_pac_command(next_command): self.last_command = '' return True - # Fix for the - # repetition + # Fix for the repetition + # execute the last positioning command elif _is_pac_command(word) and next_command in PAC_TAB_OFFSET_COMMANDS and _is_pac_command(second_next): self.last_command = '' return True - # execute offset commands only if previous command is PAC and next is not pack + # execute offset commands only if previous command is PAC and next is not PAC elif word in PAC_TAB_OFFSET_COMMANDS: if _is_pac_command(self.last_command) and not _is_pac_command(next_command): + self.last_command = word return False else: return True - self.last_command = word - return False + self.last_command = word + return False def _translate_special_char(self, word): self.buffer.add_chars(SPECIAL_CHARS[word]) diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index fa797619..efc90c5e 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -462,7 +462,7 @@ def sample_scc_with_line_too_long(): 00:00:00;03 942c -00:00:01;45 9420 91f4 cb45 4c4c d920 4ac1 cd45 d3ba 20c8 eff7 9254 f468 e520 7368 eff7 2073 f461 f2f4 e564 942c 8080 8080 942f +00:00:01;45 9420 91f4 cb45 4c4c d920 4ac1 cd45 d3ba 20c8 eff7 9254 f468 e520 7368 eff7d3ba 20c8 eff7 9254 f468 e520 7368 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 eff7 2073 f461 f2f4 e564 942c 8080 8080 942f 00:00:02;55 9420 91e0 9723 f761 7320 4361 ec20 c4e5 6ee9 73ef 6e2c 2061 20e6 f2e9 e56e 6480 9240 9723 efe6 20ef 75f2 732c 20f7 6173 2064 efe9 6e67 206d 7920 43c4 73ae 942c 8080 8080 942f diff --git a/tests/test_scc.py b/tests/test_scc.py index 92cfe202..0fd9e705 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -73,17 +73,15 @@ def test_positioning(self, sample_scc_multiple_positioning): ((40.0, UnitEnum.PERCENT), (53.0, UnitEnum.PERCENT)), ((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)), ((20.0, UnitEnum.PERCENT), (35.0, UnitEnum.PERCENT)), - ((20.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)), + ((25.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)), ((70.0, UnitEnum.PERCENT), (11.0, UnitEnum.PERCENT)), ((40.0, UnitEnum.PERCENT), (41.0, UnitEnum.PERCENT)), - ((20.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT)) + ((25.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT)) ] - actual_positioning = [ caption_.layout_info.origin.serialized() for caption_ in captions.get_captions('en-US') ] - assert expected_positioning == actual_positioning def test_tab_offset(self, sample_scc_tab_offset): @@ -275,10 +273,10 @@ def test_skip_first_pac_command(self, sample_scc_with_consecutive_pac_commands): def test_line_too_long(self, sample_scc_with_line_too_long): with pytest.raises(CaptionLineLengthError) as exc_info: SCCReader().read(sample_scc_with_line_too_long) - assert exc_info.value.args[0].startswith( "32 character limit for caption cue in scc file.") - assert "And he said, I can do a TV show. - Length 32" in exc_info.value.args[0].split("\n") + assert ("the showowowowowowowowowowowowowowowowowowowowowowowowowowow started - Length 68" + in exc_info.value.args[0].split("\n")) class TestCoverageOnly: @@ -371,6 +369,24 @@ def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): (44266666.666666664, 44866666.666666664), ] + l = [(766666.6666666667, 2800000.0), + (2800000.0, 4600000.0), + (4600000.0, 6166666.666666667), + (6166666.666666667, 9733333.333333332), + (9733333.333333332, 11266666.666666668), + (11266666.666666668, 12266666.666666668), + (12266666.666666668, 13266666.666666668), + (13266666.666666668, 14266666.666666668), + (14266666.666666668, 17066666.666666668), + (17066666.666666668, 18666666.666666668), + (18666666.666666668, 20233333.333333336), + (20233333.333333336, 21833333.333333332), + (21833333.333333332, 34900000.0), + (34900000.0, 36400000.0), + (36400000.0, 44266666.666666664), + (44266666.666666664, 44866666.666666664) +] + actual_timings = [(c_.start, c_.end) for c_ in captions] assert expected_timings == actual_timings @@ -389,6 +405,21 @@ def test_freeze_colon_spec_time(self, sample_scc_pop_on): (32132100.000000004, 36169466.666666664), ] + l = [(9776433.333333332, 12312300.0), + (14781433.33333333, 16950266.666666664), + (16950266.666666664, 18685333.333333332), + (18685333.333333332, 20820800.0), + (20820800.0, 26693333.333333332), + (26693333.333333332, 32165466.66666666), + (32165466.66666666, 36202833.33333332)] + l1 = [(9776433.333333332, 12312300.0), + (14781433.33333333, 16950266.666666664), + (16950266.666666664, 18685333.333333332), + (18685333.333333332, 20820800.0), + (20820800.0, 26693333.333333332), + (26693333.333333332, 32165466.66666666), + (32165466.66666666, 36202833.33333332)] + actual_timings = [ (c_.start, c_.end) for c_ in scc1.get_captions('en-US')] @@ -610,4 +641,4 @@ def test_eoc_first_command(self, sample_scc_eoc_first_command): # just one caption, first EOC disappears num_captions = len(caption_set.get_captions('en-US')) - assert num_captions == 2 + assert num_captions == 1 From 94461d191c39b0f7b2e038f4a9068c46a26c9dcb Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 16 Jan 2024 16:32:30 +0200 Subject: [PATCH 5/7] remove leftovers --- pycaption/scc/__init__.py | 1 - tests/test_scc.py | 33 --------------------------------- 2 files changed, 34 deletions(-) diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 18ab2890..cb534bab 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -347,7 +347,6 @@ def _skip_double_command(self, word, words, idx): # with only one member of each pair being displayed. next_command = self.get_command(words, idx + 1) second_next = self.get_command(words, idx + 2) - prev_command = self.get_command(words, idx - 1) if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS: # skip duplicates, execute the last occurrence if not a positioning command diff --git a/tests/test_scc.py b/tests/test_scc.py index 0fd9e705..5af5d4b8 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -369,24 +369,6 @@ def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): (44266666.666666664, 44866666.666666664), ] - l = [(766666.6666666667, 2800000.0), - (2800000.0, 4600000.0), - (4600000.0, 6166666.666666667), - (6166666.666666667, 9733333.333333332), - (9733333.333333332, 11266666.666666668), - (11266666.666666668, 12266666.666666668), - (12266666.666666668, 13266666.666666668), - (13266666.666666668, 14266666.666666668), - (14266666.666666668, 17066666.666666668), - (17066666.666666668, 18666666.666666668), - (18666666.666666668, 20233333.333333336), - (20233333.333333336, 21833333.333333332), - (21833333.333333332, 34900000.0), - (34900000.0, 36400000.0), - (36400000.0, 44266666.666666664), - (44266666.666666664, 44866666.666666664) -] - actual_timings = [(c_.start, c_.end) for c_ in captions] assert expected_timings == actual_timings @@ -405,21 +387,6 @@ def test_freeze_colon_spec_time(self, sample_scc_pop_on): (32132100.000000004, 36169466.666666664), ] - l = [(9776433.333333332, 12312300.0), - (14781433.33333333, 16950266.666666664), - (16950266.666666664, 18685333.333333332), - (18685333.333333332, 20820800.0), - (20820800.0, 26693333.333333332), - (26693333.333333332, 32165466.66666666), - (32165466.66666666, 36202833.33333332)] - l1 = [(9776433.333333332, 12312300.0), - (14781433.33333333, 16950266.666666664), - (16950266.666666664, 18685333.333333332), - (18685333.333333332, 20820800.0), - (20820800.0, 26693333.333333332), - (26693333.333333332, 32165466.66666666), - (32165466.66666666, 36202833.33333332)] - actual_timings = [ (c_.start, c_.end) for c_ in scc1.get_captions('en-US')] From f2cd7c07857d3ce4b5535d39b1f513895074c0e2 Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Tue, 16 Jan 2024 16:41:41 +0200 Subject: [PATCH 6/7] fix converter test --- tests/fixtures/dfxp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py index 714e5d52..901e9f12 100644 --- a/tests/fixtures/dfxp.py +++ b/tests/fixtures/dfxp.py @@ -920,10 +920,10 @@ def sample_dfxp_from_scc_output(): - + - + From 1b8f6be969180b223c9992d6141e8d7c2ba240ba Mon Sep 17 00:00:00 2001 From: OlteanuRares Date: Wed, 7 Feb 2024 10:05:59 +0200 Subject: [PATCH 7/7] bump version to 2.2.5.dev --- docs/conf.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 3d8b4b2a..6e7c1281 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,9 +53,9 @@ # built documents. # # The short X.Y version. -version = '2.2.4' +version = '2.2.5' # The full version, including alpha/beta/rc tags. -release = '2.2.4' +release = '2.2.5' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/setup.py b/setup.py index 7c1c3858..7a20a670 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.4', + version='2.2.5.dev', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton',