Skip to content

Commit

Permalink
Merge pull request #342 from pbs/OCTO-11049-spacing-rules-for-mid-row…
Browse files Browse the repository at this point in the history
…-codes

OCTO-11049-spacing-rules-for-mid-row-codes
  • Loading branch information
OlteanuRares authored Sep 16, 2024
2 parents 143952c + 1279716 commit 50bb5d5
Show file tree
Hide file tree
Showing 17 changed files with 1,819 additions and 1,135 deletions.
14 changes: 13 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
Changelog
---------
2.2.13
^^^^^^
- Mid-row codes only add spaces only if there isn't one before.
- Mid-row codes add spaces only if they affect the text in the same row (not adding if it follows break or PACS).
- Remove spaces to the end of the lines.
- Close italics on receiving another style setting command.
- Throw an CaptionReadNoCaptions error in case of empty input file are provided.
- Ignore repositioning commands which are not followed by any text before breaks.
- Mid-row codes will not add the space if it is in front of punctuation.
- Fix a bug with background codes when the InstructionNodeCreator collection is empty.
- Fix a bug WebVTT writer adding double line breaks.

2.2.12
^^^^^^
- Pinned nltk version to 3.8.0
- Pinned nltk to 3.8.0

2.2.11
^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
# built documents.
#
# The short X.Y version.
version = '2.2.11'
version = '2.2.12.dev2'
# The full version, including alpha/beta/rc tags.
release = '2.2.11'
release = '2.2.12.dev2'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
3 changes: 3 additions & 0 deletions pycaption/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def detect_format(caps):
:returns: the reader class for the detected format.
"""
if not len(caps):
raise CaptionReadNoCaptions("Empty caption file")

for reader in SUPPORTED_READERS:
if reader().detect(caps):
return reader
Expand Down
86 changes: 45 additions & 41 deletions pycaption/base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import os
from collections import defaultdict
from datetime import timedelta
from numbers import Number

from .exceptions import CaptionReadError, CaptionReadTimingError

# `und` a special identifier for an undetermined language according to ISO 639-2
DEFAULT_LANGUAGE_CODE = os.getenv('PYCAPTION_DEFAULT_LANG', 'und')
DEFAULT_LANGUAGE_CODE = os.getenv("PYCAPTION_DEFAULT_LANG", "und")


def force_byte_string(content):
try:
return content.encode('UTF-8')
return content.encode("UTF-8")
except UnicodeEncodeError:
raise RuntimeError('Invalid content encoding')
raise RuntimeError("Invalid content encoding")
except UnicodeDecodeError:
return content

Expand Down Expand Up @@ -50,8 +51,9 @@ def read(self, content):


class BaseWriter:
def __init__(self, relativize=True, video_width=None, video_height=None,
fit_to_screen=True):
def __init__(
self, relativize=True, video_width=None, video_height=None, fit_to_screen=True
):
"""
Initialize writer with the given parameters.
Expand Down Expand Up @@ -81,7 +83,8 @@ def _relativize_and_fit_to_screen(self, layout_info):
if self.relativize:
# Transform absolute values (e.g. px) into percentages
layout_info = layout_info.as_percentage_of(
self.video_width, self.video_height)
self.video_width, self.video_height
)
if self.fit_to_screen:
# Make sure origin + extent <= 100%
layout_info = layout_info.fit_to_screen()
Expand Down Expand Up @@ -115,7 +118,7 @@ class CaptionNode:
BREAK = 3

def __init__(
self, type_, layout_info=None, content=None, start=None, position=None
self, type_, layout_info=None, content=None, start=None, position=None
):
"""
:type type_: int
Expand All @@ -135,30 +138,34 @@ def __repr__(self):
if t == CaptionNode.TEXT:
return repr(self.content)
elif t == CaptionNode.BREAK:
return repr('BREAK')
return repr("BREAK")
elif t == CaptionNode.STYLE:
return repr(f'STYLE: {self.start} {self.content}')
return repr(f"STYLE: {self.start} {self.content}")
else:
raise RuntimeError(f'Unknown node type: {t}')
raise RuntimeError(f"Unknown node type: {t}")

@staticmethod
def create_text(text, layout_info=None, position=None):
return CaptionNode(
type_=CaptionNode.TEXT, layout_info=layout_info,
position=position, content=text
type_=CaptionNode.TEXT,
layout_info=layout_info,
position=position,
content=text,
)

@staticmethod
def create_style(start, content, layout_info=None):
return CaptionNode(
type_=CaptionNode.STYLE, layout_info=layout_info, content=content,
start=start)
type_=CaptionNode.STYLE,
layout_info=layout_info,
content=content,
start=start,
)

@staticmethod
def create_break(layout_info=None, content=None):
return CaptionNode(
type_=CaptionNode.BREAK, layout_info=layout_info,
content=content
type_=CaptionNode.BREAK, layout_info=layout_info, content=content
)


Expand All @@ -184,11 +191,13 @@ def __init__(self, start, end, nodes, style={}, layout_info=None):
:type layout_info: Layout
"""
if not isinstance(start, Number):
raise CaptionReadTimingError("Captions must be initialized with a"
" valid start time")
raise CaptionReadTimingError(
"Captions must be initialized with a" " valid start time"
)
if not isinstance(end, Number):
raise CaptionReadTimingError("Captions must be initialized with a"
" valid end time")
raise CaptionReadTimingError(
"Captions must be initialized with a" " valid end time"
)
if not nodes:
raise CaptionReadError("Node list cannot be empty")
self.start = start
Expand Down Expand Up @@ -216,9 +225,7 @@ def format_end(self, msec_separator=None):
return self._format_timestamp(self.end, msec_separator)

def __repr__(self):
return repr(
f'{self.format_start()} --> {self.format_end()}\n{self.get_text()}'
)
return repr(f"{self.format_start()} --> {self.format_end()}\n{self.get_text()}")

def get_text_nodes(self):
"""
Expand All @@ -229,22 +236,24 @@ def get_text_for_node(node):
if node.type_ == CaptionNode.TEXT:
return node.content
if node.type_ == CaptionNode.BREAK:
return '\n'
return ''
return "\n"
return ""

return [get_text_for_node(node) for node in self.nodes]

def get_text(self):
text_nodes = self.get_text_nodes()
return ''.join(text_nodes).strip()
return "".join(text_nodes).strip()

def _format_timestamp(self, microseconds, msec_separator=None):
duration = timedelta(microseconds=microseconds)
hours, rem = divmod(duration.seconds, 3600)
minutes, seconds = divmod(rem, 60)
milliseconds = f"{duration.microseconds // 1000:03d}"
timestamp = (f"{hours:02d}:{minutes:02d}:{seconds:02d}"
f"{msec_separator or '.'}{milliseconds:.3s}")
timestamp = (
f"{hours:02d}:{minutes:02d}:{seconds:02d}"
f"{msec_separator or '.'}{milliseconds:.3s}"
)
return timestamp


Expand All @@ -261,8 +270,7 @@ def __init__(self, iterable=None, layout_info=None):
super().__init__(*args)

def __getslice__(self, i, j):
return CaptionList(
list.__getslice__(self, i, j), layout_info=self.layout_info)
return CaptionList(list.__getslice__(self, i, j), layout_info=self.layout_info)

def __getitem__(self, y):
item = list.__getitem__(self, y)
Expand All @@ -272,20 +280,19 @@ def __getitem__(self, y):

def __add__(self, other):
add_is_safe = (
not hasattr(other, 'layout_info')
not hasattr(other, "layout_info")
or not other.layout_info
or self.layout_info == other.layout_info
)
if add_is_safe:
return CaptionList(
list.__add__(self, other), layout_info=self.layout_info)
return CaptionList(list.__add__(self, other), layout_info=self.layout_info)
else:
raise ValueError(
"Cannot add CaptionList objects with different layout_info")
"Cannot add CaptionList objects with different layout_info"
)

def __mul__(self, other):
return CaptionList(
list.__mul__(self, other), layout_info=self.layout_info)
return CaptionList(list.__mul__(self, other), layout_info=self.layout_info)

__rmul__ = __mul__

Expand Down Expand Up @@ -341,9 +348,7 @@ def set_styles(self, styles):
self._styles = styles

def is_empty(self):
return all(
[len(captions) == 0 for captions in list(self._captions.values())]
)
return all([len(captions) == 0 for captions in list(self._captions.values())])

def set_layout_info(self, lang, layout_info):
self._captions[lang].layout_info = layout_info
Expand Down Expand Up @@ -412,6 +417,5 @@ def merge(captions):
new_nodes.append(CaptionNode.create_break())
for node in caption.nodes:
new_nodes.append(node)
caption = Caption(
captions[0].start, captions[0].end, new_nodes, captions[0].style)
caption = Caption(captions[0].start, captions[0].end, new_nodes, captions[0].style)
return caption
Loading

0 comments on commit 50bb5d5

Please sign in to comment.