Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCTO-10955-fix-position-updates #330

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
Changelog
---------
2.2.6
^^^^^
- Fix positioning to avoid linebreaks when we have both a break and a repositioning commands.
- Add logic to reset close italics tags if a new instruction that sets text to plain follow.
- Fix to honor spaces before inline italic commands.
- Fix for italics commands with repositioning to reposition before creating tag.

2.2.5
^^^^^
- Yank.
2.2.4
^^^^^
- Skip duplicated extended characters.
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
# built documents.
#
# The short X.Y version.
version = '2.2.4'
version = '2.2.6'
# The full version, including alpha/beta/rc tags.
release = '2.2.4'
release = '2.2.6'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
from copy import deepcopy

from pycaption.base import (
BaseReader, BaseWriter, CaptionSet, CaptionNode,
BaseReader, BaseWriter, CaptionSet,
)
from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \
CaptionReadTimingError, CaptionLineLengthError
Expand Down
1 change: 0 additions & 1 deletion pycaption/scc/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from itertools import product
from collections import defaultdict

COMMANDS = {
'9420': '',
Expand Down
70 changes: 62 additions & 8 deletions pycaption/scc/specialized_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
MICROSECONDS_PER_CODEWORD, INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION
)
from .translator import COMMAND_LABELS

PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")

Expand Down Expand Up @@ -254,7 +255,7 @@ def create_and_store(self, node_buffer, start, end=0):
layout_info = _get_layout_from_tuple(instruction.position)
caption.nodes.append(
CaptionNode.create_text(
instruction.get_text(), layout_info=layout_info),
instruction.text, layout_info=layout_info),
)
caption.layout_info = layout_info

Expand Down Expand Up @@ -312,17 +313,41 @@ def add_chars(self, *chars):
node = _InstructionNode(position=current_position)
self._collection.append(node)

break_and_repositioning = (
self._position_tracer._repositioning_required and
self._position_tracer._break_required
)
only_break = (
self._position_tracer._break_required and not
self._position_tracer._repositioning_required
)

only_repositioning = (
self._position_tracer._repositioning_required and not
self._position_tracer._break_required
)
# handle break followed by repositioning
if break_and_repositioning:
self._collection.append(_InstructionNode.create_break(
position=current_position))
self._collection.append(
_InstructionNode.create_repositioning_command(
current_position
)
)
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
self._position_tracer.acknowledge_linebreak_consumed()
self._position_tracer.acknowledge_position_changed()
# handle a simple line break
if self._position_tracer.is_linebreak_required():
# must insert a line break here
elif only_break:
self._collection.append(_InstructionNode.create_break(
position=current_position))
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
self._position_tracer.acknowledge_linebreak_consumed()

# handle completely new positioning
elif self._position_tracer.is_repositioning_required():
elif only_repositioning:
self._collection.append(
_InstructionNode.create_repositioning_command(
current_position
Expand All @@ -343,13 +368,42 @@ def interpret_command(self, command):
:type command: str
"""
self._update_positioning(command)

text = COMMANDS.get(command, '')
# if a command which sets text to plain have an open
# italics tag open, it should close it to reset the text style
plain_text_commands = {
key: value for key, value in COMMAND_LABELS.items()
if "plain" in value
}
has_open_italics_tag = False
for node in self._collection[::-1]:
if node.is_italics_node():
if node.sets_italics_on():
has_open_italics_tag = True
break
if command in plain_text_commands and has_open_italics_tag:
self._collection.append(
_InstructionNode.create_italics_style(
self._position_tracer.get_current_position(),
turn_on=False
)
)

if 'italic' in text:
if 'end' not in text:
self._collection.append(
_InstructionNode.create_italics_style(
# if the command is PAC and creates italics,
# first do break / repositioning before italic tag
if self._position_tracer._break_required:
self._collection.append(_InstructionNode.create_break(
position=self._position_tracer.get_current_position()))
self._position_tracer.acknowledge_linebreak_consumed()
if self._position_tracer._repositioning_required:
self._collection.append(
_InstructionNode.create_repositioning_command(
self._position_tracer.get_current_position()))
self._position_tracer.acknowledge_position_changed()

self._collection.append(_InstructionNode.create_italics_style(
self._position_tracer.get_current_position())
)
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

setup(
name='pycaption',
version='2.2.4',
version='2.2.6.dev1',
description='Closed caption converter',
long_description=open(README_PATH).read(),
author='Joe Norton',
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
sample_scc_tab_offset, sample_scc_with_unknown_commands,
sample_scc_special_and_extended_characters,
sample_scc_with_line_too_long
sample_scc_with_line_too_long,
sample_scc_with_spaces,
sample_scc_with_break_and_repositioning
)
from tests.fixtures.srt import ( # noqa: F401
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
Expand Down
8 changes: 4 additions & 4 deletions tests/fixtures/dfxp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,15 +1004,15 @@ def sample_dfxp_with_properly_closing_spans_output():
bbbb
</p>
<p begin="00:01:35.633" end="00:01:40.833" region="r2" style="default">
<span tts:fontStyle="italic" region="r2">cccc<br/>
bbaa</span>
<span tts:fontStyle="italic" region="r2">cccc</span><br/>
bbaa
</p>
<p begin="00:01:55.766" end="00:01:59.500" region="r0" style="default">
aa
</p>
<p begin="00:01:55.766" end="00:01:59.500" region="r3" style="default">
<span tts:fontStyle="italic" region="r3">bb<br/>
cc</span>
<span tts:fontStyle="italic" region="r3">bb</span><br/>
cc
</p>
<p begin="00:01:59.500" end="00:02:03.500" region="r3" style="default">
abcd
Expand Down
38 changes: 38 additions & 0 deletions tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

@pytest.fixture(scope="session")
def sample_scc_created_dfxp_with_wrongly_closing_spans():
# lines 3 and 6 have 9479 (plain text) after italics (91ae)
# so we need to close italic tag
return """\
Scenarist_SCC V1.0

Expand Down Expand Up @@ -443,3 +445,39 @@ def sample_scc_with_line_too_long():

00:00:08;58 9420 9452 4920 ea75 73f4 20f7 616e f4e5 6420 ef6e e520 7368 eff7 2c80 94f2 ea75 73f4 20f4 ef20 6861 76e5 2061 7320 6120 ece9 f4f4 ece5 942c 8080 8080 942f
"""


@pytest.fixture(scope="session")
def sample_scc_with_break_and_repositioning():
return """\
Scenarist_SCC V1.0

00:21:59;22 942c

00:22:38;22 9420 94ae 94d0 94ce c1ec ece9 e520 ece5 61f2 6ee5 6420 f4ef 2070 f261 e3f4 e9e3 e580 9470 946e 616e 6420 67ef f420 68e5 f220 6475 e36b 7320 e96e 2061 20f2 eff7 2c80 942f

00:22:42;17 9420 94ae 9470 946e 616e 6420 c7e5 eff2 67e5 2067 eff4 2061 2062 e561 f2ae 942f

00:23:01;10 9420 94ae 1370 136e 5b20 67e5 f420 e56e ef75 6768 206d ef6e 6be5 79bf 942f

00:23:02;15 9420 94ae 91d0 91ce 54ef 20e6 e96e 6420 4375 f2e9 ef75 7320 c7e5 eff2 67e5 9170 916e 616e 6420 68e9 7320 e6f2 e9e5 6e64 7380 92d0 92ce e576 e5f2 7920 6461 7920 ef6e ece9 6ee5 2c80 942f

00:23:05;11 9420 94ae 9152 73f7 e96e 6720 6279 91ae 2070 6273 6be9 6473 aeef f267 9170 916e f4ef 2070 ec61 7920 e675 6e20 6761 6de5 7320 616e 6420 f761 f4e3 6880 92d0 9723 91ae 79ef 75f2 20e6 6176 eff2 e9f4 e520 76e9 64e5 ef73 ae80 942f

00:23:09;05 9420 94ae 91d0 91ce d9ef 7520 e361 6e20 61ec 73ef 20f2 e561 6420 6def f2e5 9170 916e 4375 f2e9 ef75 7320 c7e5 eff2 67e5 2061 6476 e56e f475 f2e5 7380 92d0 92ce 6279 2076 e973 e9f4 e96e 6720 79ef 75f2 20ec efe3 61ec 20ec e962 f261 f279 ae80 942f

00:23:15;11 942c
"""


@pytest.fixture(scope="session")
def sample_scc_with_spaces():
return """\
Scenarist_SCC V1.0

00:00:55;25 9420 942c 942f 9420 9470 97a1 57e5 ece3 ef6d e520 f4ef 9723 2080 2080 2080 91ae 5468 e520 4675 f475 f2e5 20ef e620 57ef f26b ae80

00:00:58;08 942c

00:00:59;10 9420 942c 942f
"""
54 changes: 51 additions & 3 deletions tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,49 @@ def test_line_too_long(self, sample_scc_with_line_too_long):
assert ("was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l Denison, a friend - Length 81"
in exc_info.value.args[0].split("\n"))

def test_line_not_breaking_on_break_and_repositioning(
self, sample_scc_with_break_and_repositioning):
expected_lines = [
'Allie learned to practice',
'and got her ducks in a row,',
'and George got a bear.',
'[ get enough monkey?',
'To find Curious George',
'and his friends',
'every day online,',
'swing by',
' pbskids.org',
'to play fun games and watch',
'your favorite videos.',
'You can also read more',
'Curious George adventures',
'by visiting your local library.'
]

caption_set = SCCReader().read(sample_scc_with_break_and_repositioning)
actual_lines = [
node.content
for cap_ in caption_set.get_captions('en-US')
for node in cap_.nodes
if node.type_ == CaptionNode.TEXT
]

assert expected_lines == actual_lines

def test_spaces_are_respected(self, sample_scc_with_spaces):
expected_lines = ['Welcome to ', 'The Future of Work.']

caption_set = SCCReader().read(sample_scc_with_spaces)
actual_lines = [
node.content
for cap_ in caption_set.get_captions('en-US')
for node in cap_.nodes
if node.type_ == CaptionNode.TEXT
]

assert expected_lines == actual_lines
assert actual_lines[0][-3:] == " " * 3


class TestCoverageOnly:
"""In order to refactor safely, we need coverage of 95% or more.
Expand Down Expand Up @@ -382,6 +425,10 @@ def test_italics_commands_are_formatted_properly(self):
# 4. to get new opening italic nodes after changing position, if 3
# happened
# 5. to get a final italic closing node, if one is needed
# 6. close italics if there are open italics when
# a plain text command is issued
# 7, if an italics command also demands repositioning,
# first reposition then open italics tag
node_creator.interpret_command('9470') # row 15, col 0
node_creator.interpret_command('9120') # italics off
node_creator.interpret_command('9120') # italics off
Expand All @@ -407,13 +454,13 @@ def test_italics_commands_are_formatted_properly(self):
node_creator.add_chars('c')
node_creator.interpret_command('91ae') # italics ON

node_creator.interpret_command('9270') # row 4 col 0
node_creator.interpret_command('926e') # row 4 col 0
node_creator.add_chars('d')

node_creator.interpret_command('15d0') # row 5 col 0 - creates BR
node_creator.interpret_command('154f') # row 5 col 0 - creates BR
node_creator.add_chars('e')

node_creator.interpret_command('1570') # row 6 col 0 - creates BR
node_creator.interpret_command('156e') # row 6 col 0 - creates BR
node_creator.add_chars('f')

result = list(node_creator)
Expand All @@ -434,6 +481,7 @@ def test_italics_commands_are_formatted_properly(self):
assert result[8].is_text_node()

assert result[9].requires_repositioning()

assert result[10].is_italics_node()
assert result[10].sets_italics_on()

Expand Down
Loading