From a3f4172ca97f5119cb5bf6eb44521d1db0dad76b Mon Sep 17 00:00:00 2001
From: OlteanuRares <rares.olteanu@3pillarglobal.com>
Date: Mon, 20 May 2024 20:16:48 +0300
Subject: [PATCH] OCTO-10987-honor-backspaces-in-scc-captions

---
 pycaption/scc/__init__.py                | 50 +++++++++++--------
 pycaption/scc/specialized_collections.py | 34 ++++++++-----
 tests/fixtures/dfxp.py                   |  4 +-
 tests/fixtures/scc.py                    |  6 ++-
 tests/test_scc.py                        | 61 +++++++++---------------
 5 files changed, 80 insertions(+), 75 deletions(-)
diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py
index 3bd810fa..b2d5af42 100644
--- a/pycaption/scc/__init__.py
+++ b/pycaption/scc/__init__.py
@@ -308,15 +308,16 @@ def _translate_line(self, line):
 
         self.time_translator.start_at(parts[0][0])
 
-        # loop through each word
-        for word in parts[0][2].split(' '):
+        word_list = parts[0][2].split(' ')
+        pacs_are_doubled = len(word_list) > 1 and word_list[0] == word_list[1]
+        for word in word_list:
             # ignore empty results or invalid commands
             word = word.strip()
             if len(word) == 4:
-                self._translate_word(word)
+                self._translate_word(word, pacs_are_doubled)
 
-    def _translate_word(self, word):
-        if self._handle_double_command(word):
+    def _translate_word(self, word, pacs_are_doubled):
+        if self._handle_double_command(word, pacs_are_doubled):
             # count frames for timing
             self.time_translator.increment_frames()
             return
@@ -340,33 +341,41 @@ def _translate_word(self, word):
         # count frames for timing only after processing a command
         self.time_translator.increment_frames()
 
-    def _handle_double_command(self, word):
+    def _handle_double_command(self, word, pacs_are_doubled):
         # If the caption is to be broadcast, each of the commands are doubled
         # up for redundancy in case the signal is garbled in transmission.
         # The decoder is programmed to ignore a second command when it is the
         # same as the first.
-        # Also like codes, Special Characters are always doubled up,
+        # If we have doubled commands we're skipping also
+        # doubled special characters and doubled extended characters
         # with only one member of each pair being displayed.
-        if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS:
-            if word == self.last_command:
-                self.last_command = ''
-                return True
+        actionable_commands = {
+            key: COMMANDS[key] for key in COMMANDS.keys() if key != "94a1"
+        }
+        doubled_types = word in actionable_commands or _is_pac_command(word)
+        if pacs_are_doubled:
+            doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS
+
+        if doubled_types and word == self.last_command:
+            return True
             # Fix for the <position> <tab offset> <position> <tab offset>
             # repetition
-            elif _is_pac_command(word) and word in self.last_command:
-                self.last_command = ''
+        elif _is_pac_command(word) and word in self.last_command:
+            self.last_command = ''
+            return True
+        elif word in PAC_TAB_OFFSET_COMMANDS:
+            if _is_pac_command(self.last_command):
+                self.last_command += f" {word}"
+                return False
+            else:
                 return True
-            elif word in PAC_TAB_OFFSET_COMMANDS:
-                if _is_pac_command(self.last_command):
-                    self.last_command += f" {word}"
-                    return False
-                else:
-                    return True
 
         self.last_command = word
         return False
 
     def _translate_special_char(self, word):
+        self.buffer.handle_backspace(word)
+        # add to buffer
         self.buffer.add_chars(SPECIAL_CHARS[word])
 
     def _translate_extended_char(self, word):
@@ -377,8 +386,7 @@ def _translate_extended_char(self, word):
         character on a row), erasing any character which may be in that location,
         then displays the Extended Character.
         """
-        self.buffer.handle_backspace()
-
+        self.buffer.handle_backspace(word)
         # add to buffer
         self.buffer.add_chars(EXTENDED_CHARS[word])
 
diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
index 1166d4e5..90624a84 100644
--- a/pycaption/scc/specialized_collections.py
+++ b/pycaption/scc/specialized_collections.py
@@ -9,7 +9,7 @@
 from .constants import (
     PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
     MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES,
-    MID_ROW_CODES
+    MID_ROW_CODES, EXTENDED_CHARS, SPECIAL_CHARS
 )
 
 PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
@@ -348,20 +348,24 @@ def interpret_command(self, command, mode=None):
 
         text = COMMANDS.get(command, '')
 
-        if command == "94a1" and mode in ["roll", "paint"]:
-            self.handle_backspace()
+        if command == "94a1":
+            self.handle_backspace("94a1")
 
         if command in BACKGROUND_COLOR_CODES:
             # Since these codes are optional, they must be preceded
             # with the space character (20h),
             # which will be deleted when the code is applied.
-            if self._collection[-1].text[-1].isspace():
+            # ex: 2080 97ad 94a1
+            if (
+                    self._collection[-1].is_text_node() and
+                    self._collection[-1].text[-1].isspace()
+            ):
                 self._collection[-1].text = self._collection[-1].text[:-1]
 
-        # mid row code that is not first code of the line
+        # mid row code that is not first code on the line
         # (previous node is not a break node)
         # fixes OCTO-11022
-        if command in MID_ROW_CODES:
+        if command.lower() in MID_ROW_CODES:
             not_after_break = (
                 len(self._collection) > 1 and self._collection[-2].is_explicit_break()
             )
@@ -437,18 +441,26 @@ def from_list(cls, stash_list, position_tracker):
 
         return instance
 
-    def handle_backspace(self):
+    def handle_backspace(self, word):
         """
         Move cursor back one position and delete that character
         """
         node = self.get_previous_text_node()
         # in case of no previous text nodes or
-        # if the backspace is required after the first char in text
+        # if the backspace is required while no character
         # do nothing
-        if node is None or len(node.text) == 1:
+        if node is None:
             return
-        # otherwise do backspace by deleting the last char
-        node.text = node.text[:-1]
+        last_char = node.text[-1]
+        delete_previous_condition = (
+            (word in SPECIAL_CHARS and last_char not in SPECIAL_CHARS.values()) or
+            (word in EXTENDED_CHARS and last_char not in EXTENDED_CHARS.values()) or
+            word == "94a1"
+        )
+        # in case of special / extended char, perform backspace
+        # only if the previous character in not also special / extended
+        if delete_previous_condition:
+            node.text = node.text[:-1]
 
     def get_previous_text_node(self):
         for node in self._collection[::-1]:
diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py
index 714e5d52..901e9f12 100644
--- a/tests/fixtures/dfxp.py
+++ b/tests/fixtures/dfxp.py
@@ -920,10 +920,10 @@ def sample_dfxp_from_scc_output():
    <region tts:displayAlign="before" tts:origin="40% 53%" tts:textAlign="left" xml:id="r5"/>
    <region tts:displayAlign="before" tts:origin="70% 17%" tts:textAlign="left" xml:id="r6"/>
    <region tts:displayAlign="before" tts:origin="20% 35%" tts:textAlign="left" xml:id="r7"/>
-   <region tts:displayAlign="before" tts:origin="20% 83%" tts:textAlign="left" xml:id="r8"/>
+   <region tts:displayAlign="before" tts:origin="25% 83%" tts:textAlign="left" xml:id="r8"/>
    <region tts:displayAlign="before" tts:origin="70% 11%" tts:textAlign="left" xml:id="r9"/>
    <region tts:displayAlign="before" tts:origin="40% 41%" tts:textAlign="left" xml:id="r10"/>
-   <region tts:displayAlign="before" tts:origin="20% 71%" tts:textAlign="left" xml:id="r11"/>
+   <region tts:displayAlign="before" tts:origin="25% 71%" tts:textAlign="left" xml:id="r11"/>
   </layout>
  </head>
  <body>
diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py
index 23661d36..425e1d24 100644
--- a/tests/fixtures/scc.py
+++ b/tests/fixtures/scc.py
@@ -367,9 +367,11 @@ def sample_scc_duplicate_special_characters():
     return """\
 Scenarist_SCC V1.0
 
-00:23:28;01 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f
+00:23:28;01 9420 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f
 
-00:53:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f
+00:33:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f
+
+00:53:28;01 9420 91b0 9131 c1c1 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc c1c1 913d 913e 91bf 942f
 
 """
 
diff --git a/tests/test_scc.py b/tests/test_scc.py
index 78bac4c2..c6b17931 100644
--- a/tests/test_scc.py
+++ b/tests/test_scc.py
@@ -77,17 +77,15 @@ def test_positioning(self, sample_scc_multiple_positioning):
             ((40.0, UnitEnum.PERCENT), (53.0, UnitEnum.PERCENT)),
             ((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)),
             ((20.0, UnitEnum.PERCENT), (35.0, UnitEnum.PERCENT)),
-            ((20.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+            ((25.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
             ((70.0, UnitEnum.PERCENT), (11.0, UnitEnum.PERCENT)),
             ((40.0, UnitEnum.PERCENT), (41.0, UnitEnum.PERCENT)),
-            ((20.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT))
+            ((25.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT))
         ]
-
         actual_positioning = [
             caption_.layout_info.origin.serialized()
             for caption_ in captions.get_captions('en-US')
         ]
-
         assert expected_positioning == actual_positioning
 
     def test_tab_offset(self, sample_scc_tab_offset):
@@ -224,7 +222,11 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset):
 
     def test_skip_duplicate_special_characters(
             self, sample_scc_duplicate_special_characters):
-        expected_lines = ['®°½¿™¢£♪à èâêîôû', '®°½¿™¢£♪à èâêîôû']
+        expected_lines = [
+            '®°½¿™¢£♪à èâêîôû',   # double commands so we skip one
+            '®°½¿™¢£♪à èâêîôû',   # no double command, nothing skipped equal with above
+            '®°A½¿™¢£♪à èâêAîôû'  # no skips but a couple of normal chars "c1c1" = AA
+        ]
 
         caption_set = SCCReader().read(sample_scc_duplicate_special_characters)
         actual_lines = [
@@ -233,7 +235,6 @@ def test_skip_duplicate_special_characters(
             for node in cap_.nodes
             if node.type_ == CaptionNode.TEXT
         ]
-
         assert expected_lines == actual_lines
 
     def test_flashing_cue(self, sample_scc_flashing_cue):
@@ -270,41 +271,23 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2):
         scc1 = SCCReader().read(sample_scc_roll_up_ru2)
         captions = scc1.get_captions('en-US')
         actual_texts = [cap_.nodes[0].content for cap_ in captions]
-        # expected_texts = [
-        #     '>>> HI.',
-        #     "I'M KEVIN CUNNING AND AT",
-        #     "INVESTOR'S BANK WE BELIEVE IN",
-        #     'HELPING THE LOCAL NEIGHBORHOODS',
-        #     'AND IMPROVING THE LIVES OF ALL',
-        #     'WE SERVE.',
-        #     '®°½',
-        #     'ABû',
-        #     'ÁÉÓ¡',
-        #     "WHERE YOU'RE STANDING NOW,",
-        #     "LOOKING OUT THERE, THAT'S AL",
-        #     'THE CROWD.',
-        #     '>> IT WAS GOOD TO BE IN TH',
-        #     "And restore Iowa's land, water",
-        #     'And wildlife.',
-        #     '>> Bike Iowa, your source for',
-        # ]
         expected_texts = [
             '>>> HI.',
-            "I'M KEVIN CUNNING AND AT",
-            "INVESTOR'S BANK WE BELIEVE IN",
-            'HELPING THE LOCAL NEIGHBORHOODS',
-            'AND IMPROVING THE LIVES OF ALL',
-            'WE SERVE.',
-            '®°½',
-            'ABû',
-            'Á¡',
-            "WHERE YOU'RE STANDING NOW,",
-            "LOOKING OUT THERE, THAT'S AL",
-            'THE CROWD.',
-            '>> IT WAS GOOD TO BE IN TH',
-            "And restore Iowa's land, water",
-            'And wildlife.',
-            '>> Bike Iowa, your source for'
+             "I'M KEVIN CUNNING AND AT",
+             "INVESTOR'S BANK WE BELIEVE IN",
+             'HELPING THE LOCAL NEIGHBORHOODS',
+             'AND IMPROVING THE LIVES OF ALL',
+             'WE SERVE.',
+             '®°½',
+             'Aû',
+             'ÁÉÓ¡',
+             "WHERE YOU'RE STANDING NOW,",
+             "LOOKING OUT THERE, THAT'S AL",
+             'THE CROWD.',
+             '>> IT WAS GOOD TO BE IN TH',
+             "And restore Iowa's land, water",
+             'And wildlife.',
+             '>> Bike Iowa, your source for'
         ]
         assert expected_texts == actual_texts