Skip to content

Commit

Permalink
fix itlics breaking the lines too early, preventing webvtt writer fro…
Browse files Browse the repository at this point in the history
…m create a new cue in case of line break, add back special characters on code skipping condition
  • Loading branch information
OlteanuRares committed May 29, 2024
1 parent 5c12b60 commit 6e5d9c7
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 16 deletions.
20 changes: 14 additions & 6 deletions pycaption/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,16 @@ class CaptionNode:
STYLE = 2
BREAK = 3

def __init__(self, type_, layout_info=None, content=None, start=None):
def __init__(
self, type_, layout_info=None, content=None, start=None, position=None
):
"""
:type type_: int
:type layout_info: Layout
"""
self.type_ = type_
self.content = content
self.position = position

# Boolean. Marks the beginning/ end of a Style node.
self.start = start
Expand All @@ -139,19 +142,24 @@ def __repr__(self):
raise RuntimeError(f'Unknown node type: {t}')

@staticmethod
def create_text(text, layout_info=None):
def create_text(text, layout_info=None, position=None):
return CaptionNode(
CaptionNode.TEXT, layout_info=layout_info, content=text)
type_=CaptionNode.TEXT, layout_info=layout_info,
position=position, content=text
)

@staticmethod
def create_style(start, content, layout_info=None):
return CaptionNode(
CaptionNode.STYLE, layout_info=layout_info, content=content,
type_=CaptionNode.STYLE, layout_info=layout_info, content=content,
start=start)

@staticmethod
def create_break(layout_info=None):
return CaptionNode(CaptionNode.BREAK, layout_info=layout_info)
def create_break(layout_info=None, content=None):
return CaptionNode(
type_=CaptionNode.BREAK, layout_info=layout_info,
content=content
)


class Caption:
Expand Down
3 changes: 2 additions & 1 deletion pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,10 @@ def _handle_double_command(self, word):
# If we have doubled commands we're skipping also
# doubled special characters and doubled extended characters
# with only one member of each pair being displayed.

doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word)
if self.double_starter:
doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1"
doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" or word in SPECIAL_CHARS

if word in CUE_STARTING_COMMAND and word != self.last_command:
self.double_starter = False
Expand Down
9 changes: 8 additions & 1 deletion pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,10 @@ def create_and_store(self, node_buffer, start, end=0):
layout_info = _get_layout_from_tuple(instruction.position)
caption.nodes.append(
CaptionNode.create_text(
instruction.text, layout_info=layout_info),
text=instruction.text,
layout_info=layout_info,
position=instruction.position
)
)
caption.layout_info = layout_info

Expand Down Expand Up @@ -365,6 +368,10 @@ def interpret_command(self, command, previous_is_pac_or_tab=False):
self._collection[-1].text = self._collection[-1].text[:-1]

if 'italic' in text:
if self._position_tracer.is_linebreak_required():
self._collection.append(_InstructionNode.create_break(
position=self._position_tracer.get_current_position()))
self._position_tracer.acknowledge_linebreak_consumed()
if 'end' not in text:
self._collection.append(
_InstructionNode.create_italics_style(
Expand Down
12 changes: 10 additions & 2 deletions pycaption/webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ def _group_cues_by_layout(self, nodes, caption_set):
return []

current_layout = None
current_node = None

# A list with layout groups. Since WebVTT only support positioning
# for different cues, each layout group has to be represented in a
Expand All @@ -402,17 +403,24 @@ def _group_cues_by_layout(self, nodes, caption_set):
# A properly encoded WebVTT string (plain unicode must be properly
# escaped before being appended to this string)
s = ''
row, column, prev_row, prev_column = 0, 0, 0, 0
for i, node in enumerate(nodes):
if node.type_ == CaptionNode.TEXT:
if s and current_layout and node.layout_info != current_layout:
# If the positioning changes from one text node to
# another, a new WebVTT cue has to be created.
layout_groups.append((s, current_layout))
s = ''
row, column = node.position if node.position else (0, 0)
prev_row, prev_column = current_node.position if current_node.position else (0, 0)
if row == prev_row + 1:
s += '\n'
else:
layout_groups.append((s, current_layout))
s = ''
# ATTENTION: This is where the plain unicode node content is
# finally encoded as WebVTT.
s += self._encode_illegal_characters(node.content) or ' '
current_layout = node.layout_info
current_node = node
elif node.type_ == CaptionNode.STYLE:
resulting_style = self._calculate_resulting_style(
node.content, caption_set
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@
scc_that_generates_webvtt_with_proper_newlines,
sample_scc_produces_captions_with_start_and_end_time_the_same,
sample_scc_pop_on, sample_scc_multiple_positioning, sample_scc_with_italics,
sample_scc_empty, sample_scc_roll_up_ru2, sample_no_positioning_at_all_scc,
sample_scc_empty, sample_scc_roll_up_ru2, sample_scc_roll_up_ru3,
sample_no_positioning_at_all_scc, sample_scc_with_line_too_long,
sample_scc_no_explicit_end_to_last_caption, sample_scc_flashing_cue,
sample_scc_eoc_first_command, sample_scc_with_extended_characters,
sample_scc_with_ampersand_character, sample_scc_multiple_formats,
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
sample_scc_tab_offset, sample_scc_with_unknown_commands,
sample_scc_special_and_extended_characters,
sample_scc_with_line_too_long
)
from tests.fixtures.srt import ( # noqa: F401
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
Expand Down
40 changes: 40 additions & 0 deletions tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,46 @@ def sample_scc_roll_up_ru2():
00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
00:00:12;30 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf
00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7
00:00:17;01 9426 9426 94ad 94ad 9470 9470 57c8 4552 4520 d94f d5a7 5245 20d3 54c1 cec4 49ce c720 ce4f 572c
00:00:18;19 9426 9426 94ad 94ad 9470 9470 4c4f 4fcb 49ce c720 4fd5 5420 54c8 4552 452c 2054 c8c1 54a7 d320 c14c 4c
00:00:20;06 9426 9426 94ad 94ad 9470 9470 54c8 4520 4352 4f57 c4ae
00:00:21;24 9426 9426 94ad 94ad 9470 9470 3e3e 2049 5420 57c1 d320 c74f 4fc4 2054 4f20 c245 2049 ce20 54c8 45
00:00:34;27 94a7 94ad 9470 c16e 6420 f2e5 73f4 eff2 e520 49ef f761 a773 20ec 616e 642c 20f7 61f4 e5f2
00:00:36;12 94a7 94ad 9470 c16e 6420 f7e9 ec64 ece9 e6e5 ae80
00:00:44;08 94a7 94ad 9470 3e3e 20c2 e96b e520 49ef f761 2c20 79ef 75f2 2073 ef75 f2e3 e520 e6ef f280
"""


@pytest.fixture(scope="session")
def sample_scc_roll_up_ru3():
return """\
Scenarist_SCC V1.0
00:00:00;22 9425 9425 94ad 94ad 9470 9470 3e3e 3e20 c849 ae80
00:00:02;23 9425 9425 94ad 94ad 9470 9470 49a7 cd20 cb45 d649 ce20 43d5 cece 49ce c720 c1ce c420 c154
00:00:04;17 9425 9425 94ad 94ad 9470 9470 49ce d645 d354 4f52 a7d3 20c2 c1ce cb20 5745 20c2 454c 4945 d645 2049 ce80
00:00:06;04 9425 9425 94ad 94ad 9470 9470 c845 4cd0 49ce c720 54c8 4520 4c4f 43c1 4c20 ce45 49c7 c8c2 4f52 c84f 4fc4 d380
00:00:09;21 9425 9425 94ad 94ad 9470 9470 c1ce c420 49cd d052 4fd6 49ce c720 54c8 4520 4c49 d645 d320 4f46 20c1 4c4c
00:00:11;07 9425 9425 94ad 94ad 9470 9470 5745 20d3 4552 d645 ae80
00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132
00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf
00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7
Expand Down
8 changes: 4 additions & 4 deletions tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset):
def test_skip_duplicate_special_characters(
self, sample_scc_duplicate_special_characters):
expected_lines = [
®°°½½¿¿™™¢¢££♪♪àà èèââêêîîôôûû',
°½¿™¢£♪à èâêîôû',
'®°½¿™¢£♪à èâêîôû',
'®°AA½¿™¢£♪à èâêAAîôû'
]
Expand Down Expand Up @@ -277,6 +277,7 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2):
'HELPING THE LOCAL NEIGHBORHOODS',
'AND IMPROVING THE LIVES OF ALL',
'WE SERVE.',
'®°½',
'®°½½',
'ABû',
'ÁÉÓ¡',
Expand Down Expand Up @@ -322,8 +323,8 @@ def test_multiple_formats(self, sample_scc_multiple_formats):

assert expected_text_lines == text_lines

def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2):
scc1 = SCCReader().read(sample_scc_roll_up_ru2)
def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru3):
scc1 = SCCReader().read(sample_scc_roll_up_ru3)
captions = scc1.get_captions('en-US')
expected_timings = [
(733333.3333333333, 2766666.6666666665),
Expand All @@ -345,7 +346,6 @@ def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2):
]

actual_timings = [(c_.start, c_.end) for c_ in captions]

assert expected_timings == actual_timings

def test_freeze_colon_spec_time(self, sample_scc_pop_on):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_scc_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def test_dfxp_is_valid_xml_when_scc_source_has_weird_italic_commands(

dfxp = DFXPWriter().write(caption_set)

print("================")
print("================")
print("================")
print(dfxp)

assert dfxp == sample_dfxp_with_properly_closing_spans_output

def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character(
Expand Down

0 comments on commit 6e5d9c7

Please sign in to comment.