Skip to content

Commit

Permalink
merged main and change the skipping logic
Browse files Browse the repository at this point in the history
  • Loading branch information
OlteanuRares committed Jan 11, 2024
2 parents 7d758a2 + f4cc953 commit 5b990fc
Show file tree
Hide file tree
Showing 14 changed files with 202 additions and 1,318 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["py37", "py38", "py39", "py310", "py311"]
python-version: ["py38", "py39", "py310", "py311", "py312"]

steps:
- uses: actions/checkout@v2
Expand Down
20 changes: 10 additions & 10 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
version: '3.8'

services:
test_py37:
image: python:3.7-slim-bullseye
test_py38:
image: python:3.8-slim-bullseye
command: sh -c "
cd pycaption;
pip install --upgrade pip;
Expand All @@ -13,8 +13,8 @@ services:
volumes:
- .:/pycaption
test_py38:
image: python:3.8-slim-bullseye
test_py39:
image: python:3.9-slim-bullseye
command: sh -c "
cd pycaption;
pip install --upgrade pip;
Expand All @@ -25,8 +25,8 @@ services:
volumes:
- .:/pycaption
test_py39:
image: python:3.9-slim-bullseye
test_py310:
image: python:3.10-slim-bullseye
command: sh -c "
cd pycaption;
pip install --upgrade pip;
Expand All @@ -37,8 +37,8 @@ services:
volumes:
- .:/pycaption
test_py310:
image: python:3.10-slim-bullseye
test_py311:
image: python:3.11-slim-bullseye
command: sh -c "
cd pycaption;
pip install --upgrade pip;
Expand All @@ -49,8 +49,8 @@ services:
volumes:
- .:/pycaption
test_py311:
image: python:3.11-slim-bullseye
test_py312:
image: python:3.12-slim-bullseye
command: sh -c "
cd pycaption;
pip install --upgrade pip;
Expand Down
9 changes: 9 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
Changelog
---------
2.2.2
^^^^^
- Remove support for Python 3.6 & 3.7
- Restrict SCC source files to 31 characters per line (32 will throw an exception)

2.2.1
^^^^^
- Ignore the substitute character that comes before the extended character in SCC files.

2.2.0
^^^^^
- Added support for Python 3.11
Expand Down
1,264 changes: 5 additions & 1,259 deletions examples/example.scc

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions pycaption/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def __repr__(self):
f'{self.format_start()} --> {self.format_end()}\n{self.get_text()}'
)

def get_text(self):
def get_text_nodes(self):
"""
Get the text of the caption.
"""
Expand All @@ -224,7 +224,10 @@ def get_text_for_node(node):
return '\n'
return ''

text_nodes = [get_text_for_node(node) for node in self.nodes]
return [get_text_for_node(node) for node in self.nodes]

def get_text(self):
text_nodes = self.get_text_nodes()
return ''.join(text_nodes).strip()

def _format_timestamp(self, microseconds, msec_separator=None):
Expand Down
6 changes: 6 additions & 0 deletions pycaption/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ class RelativizationError(Exception):

class InvalidInputError(RuntimeError):
"""Error raised when the input is invalid (i.e. a unicode string)"""


class CaptionLineLengthError(CaptionReadError):
"""
Error raised when a Caption has a line longer than 32 characters.
"""
67 changes: 43 additions & 24 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
BaseReader, BaseWriter, CaptionSet, CaptionNode,
)
from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \
CaptionReadTimingError
CaptionReadTimingError, CaptionLineLengthError
from .constants import (
HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS,
MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
Expand Down Expand Up @@ -232,6 +232,22 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
captions = CaptionSet({lang: self.caption_stash.get_all()})

# check captions for incorrect lengths
lines = []
for caption in self.caption_stash._collection:
caption_text = "".join(caption.to_real_caption().get_text_nodes())
lines.extend(caption_text.split("\n"))
lines_too_long = [line for line in lines if len(line) >= 32]

if bool(lines_too_long):
msg = ""
for line in lines_too_long:
msg += line + f" - Length { len(line)}" + "\n"
raise CaptionLineLengthError(
f"32 character limit for caption cue in scc file.\n"
f"Lines longer than 32:\n"
f"{msg}"
)

for cap in captions.get_captions(lang):
# if there's an end time on a caption and the difference is
# less than .05s kill it (this is likely caused by a standalone
Expand Down Expand Up @@ -285,21 +301,27 @@ def _translate_line(self, line):
self.time_translator.start_at(parts[0][0])

# loop through each word
for word in parts[0][2].split(' '):
# ignore empty results or invalid commands
word = word.strip()
if len(word) == 4:
self._translate_word(word)

def _translate_word(self, word):
if self._skip_double_command(word):
words = [word.strip() for word in parts[0][2].split(' ') if len(word) == 4]

for idx, word in enumerate(words):
self._translate_word(word, words, idx)

@staticmethod
def get_command(commands, idx):
try:
return commands[idx]
except IndexError:
return None

def _translate_word(self, word, words, idx):
if self._skip_double_command(word, words, idx):
# count frames for timing
self.time_translator.increment_frames()
return
# first check if word is a command
# TODO - check that all the positioning commands are here, or use
# some other strategy to determine if the word is a command.
if word in COMMANDS or _is_pac_command(word):
if word in COMMANDS or _is_pac_command(word) or word in PAC_TAB_OFFSET_COMMANDS:
self._translate_command(word)

# second, check if word is a special character
Expand All @@ -316,32 +338,35 @@ def _translate_word(self, word):
# count frames for timing only after processing a command
self.time_translator.increment_frames()

def _skip_double_command(self, word):
def _skip_double_command(self, word, words, idx):
# If the caption is to be broadcast, each of the commands are doubled
# up for redundancy in case the signal is garbled in transmission.
# The decoder is programmed to ignore a second command when it is the
# same as the first.
# Also like codes, Special Characters are always doubled up,
# with only one member of each pair being displayed.
next_command = self.get_command(words, idx + 1)
second_next = self.get_command(words, idx + 2)
if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS:
if word == self.last_command:
# skip duplicates, execute the last occurrence
if word == next_command:
self.last_command = ''
return True
elif _is_pac_command(word) and _is_pac_command(self.last_command):
# Fix for the <position> <position> to execute only the last one
elif _is_pac_command(word) and _is_pac_command(next_command):
self.last_command = ''
return True
# Fix for the <position> <tab offset> <position> <tab offset>
# repetition
elif _is_pac_command(word) and word in self.last_command:
elif _is_pac_command(word) and next_command in PAC_TAB_OFFSET_COMMANDS and _is_pac_command(second_next):
self.last_command = ''
return True
# execute offset commands only if previous command is PAC and next is not pack
elif word in PAC_TAB_OFFSET_COMMANDS:
if _is_pac_command(self.last_command):
self.last_command += f" {word}"
if _is_pac_command(self.last_command) and not _is_pac_command(next_command):

This comment has been minimized.

Copy link
@ana-nichifor

ana-nichifor Jan 11, 2024

Contributor

do you still need to check the last command? it looks like you can remove this variable entirely from the new logic

return False
else:
return True

self.last_command = word
return False

Expand Down Expand Up @@ -529,13 +554,7 @@ def write(self, caption_set):
# Wrap lines at 32 chars
@staticmethod
def _layout_line(caption):
def caption_node_to_text(caption_node):
if caption_node.type_ == CaptionNode.TEXT:
return caption_node.content
elif caption_node.type_ == CaptionNode.BREAK:
return '\n'
caption_text = ''.join(
[caption_node_to_text(node) for node in caption.nodes])
caption_text = "".join(caption.get_text_nodes())
inner_lines = caption_text.split('\n')
inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
return '\n'.join(inner_lines_laid_out)
Expand Down
62 changes: 62 additions & 0 deletions pycaption/scc/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from itertools import product
from collections import defaultdict

COMMANDS = {
'9420': '',
Expand Down Expand Up @@ -987,3 +988,64 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
HEADER = 'Scenarist_SCC V1.0'

UNHANDLED_COMMANDS = ["9120", "91ae", "912f", "91a1"]

# taken from
# http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/CC_CHARS.HTML
INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION = {
'¡': "!", # inverted exclamation mark
'¤': "C", # currency
'¥': "Y", # yen
'¦': "-", # broken bar
'©': "c", # copyright sign
'«': '"', # left pointing double angle quotation mark
'»': '"', # right pointing double angle quotation mark
'À': "A",
'Á': "A",
'Â': "A",
'Ã': "A",
'Ä': "A",
'Å': "A",
'Ç': "C",
'È': "E",
'É': "E",
'Ê': "E",
'Ë': "E",
'Ì': "I",
'Í': "I",
'Î': "I",
'Ï': "I",
'Ò': "O",
'Ó': "O",
'Ô': ")",
'Õ': "O",
'Ö': "O",
'Ø': "O",
'Ù': "U",
'Ú': "U",
'Û': "U",
'Ü': "U",
'ß': "s",
'ã': "a",
'ä': "a",
'å': "a",
'ë': "e",
'ì': "i",
'ï': "i",
'ò': "o",
'õ': "o",
'ö': "o",
'ø': "o",
'ù': "u",
'ü': "u",
'—': "-", # em dash
'‘': "'",
'’': "'",
'“': '"',
'”': '"',
'•': ".",
'℠': "s",
'┌': "+",
'┐': "+",
'└': "+",
'┘': "+"
}
23 changes: 16 additions & 7 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
)
from .constants import (
PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
MICROSECONDS_PER_CODEWORD, UNHANDLED_COMMANDS
MICROSECONDS_PER_CODEWORD, UNHANDLED_COMMANDS,
INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION
)

PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
Expand Down Expand Up @@ -364,13 +365,11 @@ def _update_positioning(self, command):
:type command: str
"""

if command in PAC_TAB_OFFSET_COMMANDS:
tab_offset = PAC_TAB_OFFSET_COMMANDS[command]
prev_positioning = self._position_tracer.default
positioning = (prev_positioning[0],
prev_positioning[1] + tab_offset)

else:
first, second = command[:2], command[2:]

Expand Down Expand Up @@ -424,10 +423,20 @@ def remove_ascii_duplicate(self, accented_character):
:type accented_character: str
"""
if self._collection and self._collection[-1].is_text_node() and \
self._collection[-1].text:
ascii_char = unicodedata.normalize('NFD', accented_character)\
.encode('ascii', 'ignore').decode("utf-8")
is_text_node = (
self._collection and
self._collection[-1].is_text_node() and
self._collection[-1].text
)
if is_text_node:
try:
ascii_char = unicodedata.normalize('NFD', accented_character) \
.encode('ascii', 'strict').decode("utf-8")
except (UnicodeEncodeError, UnicodeDecodeError):
ascii_char = INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION[
accented_character
]

if ascii_char and self._collection[-1].text[-1] == ascii_char:
self._collection[-1].text = self._collection[-1].text[:-1]

Expand Down
6 changes: 3 additions & 3 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

DOCKER_CMD="docker-compose -p pycaption"

SERVICE="test_py311"
SERVICE="test_py312"

if [ "$@" ]; then
if [ "$1" == "test_py37" ] || [ "$1" == "test_py38" ] || \
[ "$1" == "test_py39" ] || [ "$1" == "test_py310" ] || [ "$1" == "test_py311" ]; then
if [ "$1" == "test_py38" ] || [ "$1" == "test_py39" ] ||
[ "$1" == "test_py310" ] || [ "$1" == "test_py311" ] || [ "$1" == "test_py312" ]; then
SERVICE="$1"
fi
fi
Expand Down
Loading

0 comments on commit 5b990fc

Please sign in to comment.