Skip to content

Commit

Permalink
OCTO-10987-honor-backspaces-in-scc-captions
Browse files Browse the repository at this point in the history
  • Loading branch information
OlteanuRares committed May 20, 2024
1 parent 38694ce commit a3f4172
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 75 deletions.
50 changes: 29 additions & 21 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,15 +308,16 @@ def _translate_line(self, line):

self.time_translator.start_at(parts[0][0])

# loop through each word
for word in parts[0][2].split(' '):
word_list = parts[0][2].split(' ')
pacs_are_doubled = len(word_list) > 1 and word_list[0] == word_list[1]
for word in word_list:
# ignore empty results or invalid commands
word = word.strip()
if len(word) == 4:
self._translate_word(word)
self._translate_word(word, pacs_are_doubled)

def _translate_word(self, word):
if self._handle_double_command(word):
def _translate_word(self, word, pacs_are_doubled):
if self._handle_double_command(word, pacs_are_doubled):
# count frames for timing
self.time_translator.increment_frames()
return
Expand All @@ -340,33 +341,41 @@ def _translate_word(self, word):
# count frames for timing only after processing a command
self.time_translator.increment_frames()

def _handle_double_command(self, word):
def _handle_double_command(self, word, pacs_are_doubled):
# If the caption is to be broadcast, each of the commands are doubled
# up for redundancy in case the signal is garbled in transmission.
# The decoder is programmed to ignore a second command when it is the
# same as the first.
# Also like codes, Special Characters are always doubled up,
# If we have doubled commands we're skipping also
# doubled special characters and doubled extended characters
# with only one member of each pair being displayed.
if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS:
if word == self.last_command:
self.last_command = ''
return True
actionable_commands = {
key: COMMANDS[key] for key in COMMANDS.keys() if key != "94a1"
}
doubled_types = word in actionable_commands or _is_pac_command(word)
if pacs_are_doubled:
doubled_types = doubled_types or word in SPECIAL_CHARS or word in EXTENDED_CHARS

if doubled_types and word == self.last_command:
return True
# Fix for the <position> <tab offset> <position> <tab offset>
# repetition
elif _is_pac_command(word) and word in self.last_command:
self.last_command = ''
elif _is_pac_command(word) and word in self.last_command:
self.last_command = ''
return True
elif word in PAC_TAB_OFFSET_COMMANDS:
if _is_pac_command(self.last_command):
self.last_command += f" {word}"
return False
else:
return True
elif word in PAC_TAB_OFFSET_COMMANDS:
if _is_pac_command(self.last_command):
self.last_command += f" {word}"
return False
else:
return True

self.last_command = word
return False

def _translate_special_char(self, word):
self.buffer.handle_backspace(word)
# add to buffer
self.buffer.add_chars(SPECIAL_CHARS[word])

def _translate_extended_char(self, word):
Expand All @@ -377,8 +386,7 @@ def _translate_extended_char(self, word):
character on a row), erasing any character which may be in that location,
then displays the Extended Character.
"""
self.buffer.handle_backspace()

self.buffer.handle_backspace(word)
# add to buffer
self.buffer.add_chars(EXTENDED_CHARS[word])

Expand Down
34 changes: 23 additions & 11 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .constants import (
PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES,
MID_ROW_CODES
MID_ROW_CODES, EXTENDED_CHARS, SPECIAL_CHARS
)

PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
Expand Down Expand Up @@ -348,20 +348,24 @@ def interpret_command(self, command, mode=None):

text = COMMANDS.get(command, '')

if command == "94a1" and mode in ["roll", "paint"]:
self.handle_backspace()
if command == "94a1":
self.handle_backspace("94a1")

if command in BACKGROUND_COLOR_CODES:
# Since these codes are optional, they must be preceded
# with the space character (20h),
# which will be deleted when the code is applied.
if self._collection[-1].text[-1].isspace():
# ex: 2080 97ad 94a1
if (
self._collection[-1].is_text_node() and
self._collection[-1].text[-1].isspace()
):
self._collection[-1].text = self._collection[-1].text[:-1]

# mid row code that is not first code of the line
# mid row code that is not first code on the line
# (previous node is not a break node)
# fixes OCTO-11022
if command in MID_ROW_CODES:
if command.lower() in MID_ROW_CODES:
not_after_break = (
len(self._collection) > 1 and self._collection[-2].is_explicit_break()
)
Expand Down Expand Up @@ -437,18 +441,26 @@ def from_list(cls, stash_list, position_tracker):

return instance

def handle_backspace(self):
def handle_backspace(self, word):
"""
Move cursor back one position and delete that character
"""
node = self.get_previous_text_node()
# in case of no previous text nodes or
# if the backspace is required after the first char in text
# if the backspace is required while no character
# do nothing
if node is None or len(node.text) == 1:
if node is None:
return
# otherwise do backspace by deleting the last char
node.text = node.text[:-1]
last_char = node.text[-1]
delete_previous_condition = (
(word in SPECIAL_CHARS and last_char not in SPECIAL_CHARS.values()) or
(word in EXTENDED_CHARS and last_char not in EXTENDED_CHARS.values()) or
word == "94a1"
)
# in case of special / extended char, perform backspace
# only if the previous character in not also special / extended
if delete_previous_condition:
node.text = node.text[:-1]

def get_previous_text_node(self):
for node in self._collection[::-1]:
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/dfxp.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,10 +920,10 @@ def sample_dfxp_from_scc_output():
<region tts:displayAlign="before" tts:origin="40% 53%" tts:textAlign="left" xml:id="r5"/>
<region tts:displayAlign="before" tts:origin="70% 17%" tts:textAlign="left" xml:id="r6"/>
<region tts:displayAlign="before" tts:origin="20% 35%" tts:textAlign="left" xml:id="r7"/>
<region tts:displayAlign="before" tts:origin="20% 83%" tts:textAlign="left" xml:id="r8"/>
<region tts:displayAlign="before" tts:origin="25% 83%" tts:textAlign="left" xml:id="r8"/>
<region tts:displayAlign="before" tts:origin="70% 11%" tts:textAlign="left" xml:id="r9"/>
<region tts:displayAlign="before" tts:origin="40% 41%" tts:textAlign="left" xml:id="r10"/>
<region tts:displayAlign="before" tts:origin="20% 71%" tts:textAlign="left" xml:id="r11"/>
<region tts:displayAlign="before" tts:origin="25% 71%" tts:textAlign="left" xml:id="r11"/>
</layout>
</head>
<body>
Expand Down
6 changes: 4 additions & 2 deletions tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,11 @@ def sample_scc_duplicate_special_characters():
return """\
Scenarist_SCC V1.0
00:23:28;01 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f
00:23:28;01 9420 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f
00:53:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f
00:33:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f
00:53:28;01 9420 91b0 9131 c1c1 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc c1c1 913d 913e 91bf 942f
"""

Expand Down
61 changes: 22 additions & 39 deletions tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,15 @@ def test_positioning(self, sample_scc_multiple_positioning):
((40.0, UnitEnum.PERCENT), (53.0, UnitEnum.PERCENT)),
((70.0, UnitEnum.PERCENT), (17.0, UnitEnum.PERCENT)),
((20.0, UnitEnum.PERCENT), (35.0, UnitEnum.PERCENT)),
((20.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
((25.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
((70.0, UnitEnum.PERCENT), (11.0, UnitEnum.PERCENT)),
((40.0, UnitEnum.PERCENT), (41.0, UnitEnum.PERCENT)),
((20.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT))
((25.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT))
]

actual_positioning = [
caption_.layout_info.origin.serialized()
for caption_ in captions.get_captions('en-US')
]

assert expected_positioning == actual_positioning

def test_tab_offset(self, sample_scc_tab_offset):
Expand Down Expand Up @@ -224,7 +222,11 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset):

def test_skip_duplicate_special_characters(
self, sample_scc_duplicate_special_characters):
expected_lines = ['®°½¿™¢£♪à èâêîôû', '®°½¿™¢£♪à èâêîôû']
expected_lines = [
'®°½¿™¢£♪à èâêîôû', # double commands so we skip one
'®°½¿™¢£♪à èâêîôû', # no double command, nothing skipped equal with above
'®°A½¿™¢£♪à èâêAîôû' # no skips but a couple of normal chars "c1c1" = AA
]

caption_set = SCCReader().read(sample_scc_duplicate_special_characters)
actual_lines = [
Expand All @@ -233,7 +235,6 @@ def test_skip_duplicate_special_characters(
for node in cap_.nodes
if node.type_ == CaptionNode.TEXT
]

assert expected_lines == actual_lines

def test_flashing_cue(self, sample_scc_flashing_cue):
Expand Down Expand Up @@ -270,41 +271,23 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2):
scc1 = SCCReader().read(sample_scc_roll_up_ru2)
captions = scc1.get_captions('en-US')
actual_texts = [cap_.nodes[0].content for cap_ in captions]
# expected_texts = [
# '>>> HI.',
# "I'M KEVIN CUNNING AND AT",
# "INVESTOR'S BANK WE BELIEVE IN",
# 'HELPING THE LOCAL NEIGHBORHOODS',
# 'AND IMPROVING THE LIVES OF ALL',
# 'WE SERVE.',
# '®°½',
# 'ABû',
# 'ÁÉÓ¡',
# "WHERE YOU'RE STANDING NOW,",
# "LOOKING OUT THERE, THAT'S AL",
# 'THE CROWD.',
# '>> IT WAS GOOD TO BE IN TH',
# "And restore Iowa's land, water",
# 'And wildlife.',
# '>> Bike Iowa, your source for',
# ]
expected_texts = [
'>>> HI.',
"I'M KEVIN CUNNING AND AT",
"INVESTOR'S BANK WE BELIEVE IN",
'HELPING THE LOCAL NEIGHBORHOODS',
'AND IMPROVING THE LIVES OF ALL',
'WE SERVE.',
'®°½',
'ABû',
¡',
"WHERE YOU'RE STANDING NOW,",
"LOOKING OUT THERE, THAT'S AL",
'THE CROWD.',
'>> IT WAS GOOD TO BE IN TH',
"And restore Iowa's land, water",
'And wildlife.',
'>> Bike Iowa, your source for'
"I'M KEVIN CUNNING AND AT",
"INVESTOR'S BANK WE BELIEVE IN",
'HELPING THE LOCAL NEIGHBORHOODS',
'AND IMPROVING THE LIVES OF ALL',
'WE SERVE.',
'®°½',
'Aû',
'ÁÉÓ¡',
"WHERE YOU'RE STANDING NOW,",
"LOOKING OUT THERE, THAT'S AL",
'THE CROWD.',
'>> IT WAS GOOD TO BE IN TH',
"And restore Iowa's land, water",
'And wildlife.',
'>> Bike Iowa, your source for'
]
assert expected_texts == actual_texts

Expand Down

0 comments on commit a3f4172

Please sign in to comment.