Skip to content

Commit

Permalink
Merge pull request #320 from pbs/OCTO-10912-Add-new-substitute-charac…
Browse files Browse the repository at this point in the history
…ter-to-ignore-before-extended-character-in-SCC

Add-new-substitute-character-to-ignore-before-extended-character-in-SCC
  • Loading branch information
OlteanuRares authored Jan 30, 2024
2 parents 19755dc + 6144e28 commit ec21f9d
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 65 deletions.
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Changelog
---------
2.2.3.dev
^^^^^
- Add new substitute character to ignore before extended character
in SCC input files

2.2.2
^^^^^
- Remove support for Python 3.6 & 3.7
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
# built documents.
#
# The short X.Y version.
version = '2.2.0'
version = '2.2.3.dev'
# The full version, including alpha/beta/rc tags.
release = '2.2.0'
release = '2.2.3.dev'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
112 changes: 56 additions & 56 deletions pycaption/scc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,60 +990,60 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
# taken from
# http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/CC_CHARS.HTML
INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION = {
'¡': "!", # inverted exclamation mark
'¤': "C", # currency
'¥': "Y", # yen
'¦': "-", # broken bar
'©': "c", # copyright sign
'«': '"', # left pointing double angle quotation mark
'»': '"', # right pointing double angle quotation mark
'À': "A",
'Á': "A",
'Â': "A",
'Ã': "A",
'Ä': "A",
'Å': "A",
'Ç': "C",
'È': "E",
'É': "E",
'Ê': "E",
'Ë': "E",
'Ì': "I",
'Í': "I",
'Î': "I",
'Ï': "I",
'Ò': "O",
'Ó': "O",
'Ô': ")",
'Õ': "O",
'Ö': "O",
'Ø': "O",
'Ù': "U",
'Ú': "U",
'Û': "U",
'Ü': "U",
'ß': "s",
'ã': "a",
'ä': "a",
'å': "a",
'ë': "e",
'ì': "i",
'ï': "i",
'ò': "o",
'õ': "o",
'ö': "o",
'ø': "o",
'ù': "u",
'ü': "u",
'—': "-", # em dash
'‘': "'",
'’': "'",
'“': '"',
'”': '"',
'•': ".",
'℠': "s",
'┌': "+",
'┐': "+",
'└': "+",
'┘': "+"
'¡': ["!", "i"], # inverted exclamation mark
'¤': ["C"], # currency
'¥': ["Y"], # yen
'¦': ["-"], # broken bar
'©': ["c"], # copyright sign
'«': ['"'], # left pointing double angle quotation mark
'»': ['"'], # right pointing double angle quotation mark
'À': ["A"],
'Á': ["A"],
'Â': ["A"],
'Ã': ["A"],
'Ä': ["A"],
'Å': ["A"],
'Ç': ["C"],
'È': ["E"],
'É': ["E"],
'Ê': ["E"],
'Ë': ["E"],
'Ì': ["I"],
'Í': ["I"],
'Î': ["I"],
'Ï': ["I"],
'Ò': ["O"],
'Ó': ["O"],
'Ô': [")"],
'Õ': ["O"],
'Ö': ["O"],
'Ø': ["O"],
'Ù': ["U"],
'Ú': ["U"],
'Û': ["U"],
'Ü': ["U"],
'ß': ["s"],
'ã': ["a"],
'ä': ["a"],
'å': ["a"],
'ë': ["e"],
'ì': ["i"],
'ï': ["i"],
'ò': ["o"],
'õ': ["o"],
'ö': ["o"],
'ø': ["o"],
'ù': ["u"],
'ü': ["u"],
'—': ["-"], # em dash
'‘': ["'"],
'’': ["'"],
'“': ['"'],
'”': ['"'],
'•': ["."],
'℠': ["s"],
'┌': ["+"],
'┐': ["+"],
'└': ["+"],
'┘': ["+"]
}
11 changes: 5 additions & 6 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,18 +430,17 @@ def remove_ascii_duplicate(self, accented_character):
)
if is_text_node:
try:
ascii_char = unicodedata.normalize('NFD', accented_character) \
ascii_char = [
unicodedata.normalize('NFD', accented_character)
.encode('ascii', 'strict').decode("utf-8")
except (UnicodeEncodeError, UnicodeDecodeError):
ascii_char = INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION[
accented_character
]
except (UnicodeEncodeError, UnicodeDecodeError):
ascii_char = INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION.get(accented_character)

if ascii_char and self._collection[-1].text[-1] == ascii_char:
if ascii_char and self._collection[-1].text[-1] in ascii_char:
self._collection[-1].text = self._collection[-1].text[:-1]



def _get_layout_from_tuple(position_tuple):
"""Create a Layout object from the positioning information given
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setup(
name='pycaption',
version='2.2.2',
version='2.2.3.dev',
description='Closed caption converter',
long_description=open(README_PATH).read(),
author='Joe Norton',
Expand Down

0 comments on commit ec21f9d

Please sign in to comment.