diff --git a/docs/changelog.rst b/docs/changelog.rst index 0e30cf30..8aa9de5d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,15 @@ Changelog --------- +2.2.6 +^^^^^ +- Fix positioning to avoid linebreaks when we have both a break and a repositioning commands. +- Add logic to reset close italics tags if a new instruction that sets text to plain follow. +- Fix to honor spaces before inline italic commands. +- Fix for italics commands with repositioning to reposition before creating tag. + +2.2.5 +^^^^^ +- Yank. 2.2.4 ^^^^^ - Skip duplicated extended characters. diff --git a/docs/conf.py b/docs/conf.py index 3d8b4b2a..a452cc1d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,9 +53,9 @@ # built documents. # # The short X.Y version. -version = '2.2.4' +version = '2.2.6' # The full version, including alpha/beta/rc tags. -release = '2.2.4' +release = '2.2.6' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 222c3bc6..34fcc84f 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -85,7 +85,7 @@ from copy import deepcopy from pycaption.base import ( - BaseReader, BaseWriter, CaptionSet, CaptionNode, + BaseReader, BaseWriter, CaptionSet, ) from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \ CaptionReadTimingError, CaptionLineLengthError diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py index 6aa25c45..481d08f5 100644 --- a/pycaption/scc/constants.py +++ b/pycaption/scc/constants.py @@ -1,5 +1,4 @@ from itertools import product -from collections import defaultdict COMMANDS = { '9420': '', diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index 5680869e..6b100141 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -10,6 +10,7 @@ PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS, MICROSECONDS_PER_CODEWORD, INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION ) +from .translator import COMMAND_LABELS PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end") @@ -254,7 +255,7 @@ def create_and_store(self, node_buffer, start, end=0): layout_info = _get_layout_from_tuple(instruction.position) caption.nodes.append( CaptionNode.create_text( - instruction.get_text(), layout_info=layout_info), + instruction.text, layout_info=layout_info), ) caption.layout_info = layout_info @@ -312,17 +313,41 @@ def add_chars(self, *chars): node = _InstructionNode(position=current_position) self._collection.append(node) + break_and_repositioning = ( + self._position_tracer._repositioning_required and + self._position_tracer._break_required + ) + only_break = ( + self._position_tracer._break_required and not + self._position_tracer._repositioning_required + ) + + only_repositioning = ( + self._position_tracer._repositioning_required and not + self._position_tracer._break_required + ) + # handle break followed by repositioning + if break_and_repositioning: + self._collection.append(_InstructionNode.create_break( + position=current_position)) + self._collection.append( + _InstructionNode.create_repositioning_command( + current_position + ) + ) + node = _InstructionNode.create_text(current_position) + self._collection.append(node) + self._position_tracer.acknowledge_linebreak_consumed() + self._position_tracer.acknowledge_position_changed() # handle a simple line break - if self._position_tracer.is_linebreak_required(): - # must insert a line break here + elif only_break: self._collection.append(_InstructionNode.create_break( position=current_position)) node = _InstructionNode.create_text(current_position) self._collection.append(node) self._position_tracer.acknowledge_linebreak_consumed() - # handle completely new positioning - elif self._position_tracer.is_repositioning_required(): + elif only_repositioning: self._collection.append( _InstructionNode.create_repositioning_command( current_position @@ -343,13 +368,42 @@ def interpret_command(self, command): :type command: str """ self._update_positioning(command) - text = COMMANDS.get(command, '') + # if a command which sets text to plain have an open + # italics tag open, it should close it to reset the text style + plain_text_commands = { + key: value for key, value in COMMAND_LABELS.items() + if "plain" in value + } + has_open_italics_tag = False + for node in self._collection[::-1]: + if node.is_italics_node(): + if node.sets_italics_on(): + has_open_italics_tag = True + break + if command in plain_text_commands and has_open_italics_tag: + self._collection.append( + _InstructionNode.create_italics_style( + self._position_tracer.get_current_position(), + turn_on=False + ) + ) if 'italic' in text: if 'end' not in text: - self._collection.append( - _InstructionNode.create_italics_style( + # if the command is PAC and creates italics, + # first do break / repositioning before italic tag + if self._position_tracer._break_required: + self._collection.append(_InstructionNode.create_break( + position=self._position_tracer.get_current_position())) + self._position_tracer.acknowledge_linebreak_consumed() + if self._position_tracer._repositioning_required: + self._collection.append( + _InstructionNode.create_repositioning_command( + self._position_tracer.get_current_position())) + self._position_tracer.acknowledge_position_changed() + + self._collection.append(_InstructionNode.create_italics_style( self._position_tracer.get_current_position()) ) else: diff --git a/setup.py b/setup.py index 7c1c3858..0af43617 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.4', + version='2.2.6.dev1', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', diff --git a/tests/conftest.py b/tests/conftest.py index 2e361fb8..aee20b93 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -61,7 +61,9 @@ sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters, sample_scc_tab_offset, sample_scc_with_unknown_commands, sample_scc_special_and_extended_characters, - sample_scc_with_line_too_long + sample_scc_with_line_too_long, + sample_scc_with_spaces, + sample_scc_with_break_and_repositioning ) from tests.fixtures.srt import ( # noqa: F401 sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty, diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py index 714e5d52..035f0242 100644 --- a/tests/fixtures/dfxp.py +++ b/tests/fixtures/dfxp.py @@ -1004,15 +1004,15 @@ def sample_dfxp_with_properly_closing_spans_output(): bbbb
- cccc
- bbaa
+ cccc
+ bbaa
aa
- bb
- cc
+ bb
+ cc
abcd diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index 23661d36..07e5a36c 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -3,6 +3,8 @@ @pytest.fixture(scope="session") def sample_scc_created_dfxp_with_wrongly_closing_spans(): +# lines 3 and 6 have 9479 (plain text) after italics (91ae) +# so we need to close italic tag return """\ Scenarist_SCC V1.0 @@ -443,3 +445,39 @@ def sample_scc_with_line_too_long(): 00:00:08;58 9420 9452 4920 ea75 73f4 20f7 616e f4e5 6420 ef6e e520 7368 eff7 2c80 94f2 ea75 73f4 20f4 ef20 6861 76e5 2061 7320 6120 ece9 f4f4 ece5 942c 8080 8080 942f """ + + +@pytest.fixture(scope="session") +def sample_scc_with_break_and_repositioning(): + return """\ +Scenarist_SCC V1.0 + +00:21:59;22 942c + +00:22:38;22 9420 94ae 94d0 94ce c1ec ece9 e520 ece5 61f2 6ee5 6420 f4ef 2070 f261 e3f4 e9e3 e580 9470 946e 616e 6420 67ef f420 68e5 f220 6475 e36b 7320 e96e 2061 20f2 eff7 2c80 942f + +00:22:42;17 9420 94ae 9470 946e 616e 6420 c7e5 eff2 67e5 2067 eff4 2061 2062 e561 f2ae 942f + +00:23:01;10 9420 94ae 1370 136e 5b20 67e5 f420 e56e ef75 6768 206d ef6e 6be5 79bf 942f + +00:23:02;15 9420 94ae 91d0 91ce 54ef 20e6 e96e 6420 4375 f2e9 ef75 7320 c7e5 eff2 67e5 9170 916e 616e 6420 68e9 7320 e6f2 e9e5 6e64 7380 92d0 92ce e576 e5f2 7920 6461 7920 ef6e ece9 6ee5 2c80 942f + +00:23:05;11 9420 94ae 9152 73f7 e96e 6720 6279 91ae 2070 6273 6be9 6473 aeef f267 9170 916e f4ef 2070 ec61 7920 e675 6e20 6761 6de5 7320 616e 6420 f761 f4e3 6880 92d0 9723 91ae 79ef 75f2 20e6 6176 eff2 e9f4 e520 76e9 64e5 ef73 ae80 942f + +00:23:09;05 9420 94ae 91d0 91ce d9ef 7520 e361 6e20 61ec 73ef 20f2 e561 6420 6def f2e5 9170 916e 4375 f2e9 ef75 7320 c7e5 eff2 67e5 2061 6476 e56e f475 f2e5 7380 92d0 92ce 6279 2076 e973 e9f4 e96e 6720 79ef 75f2 20ec efe3 61ec 20ec e962 f261 f279 ae80 942f + +00:23:15;11 942c +""" + + +@pytest.fixture(scope="session") +def sample_scc_with_spaces(): + return """\ +Scenarist_SCC V1.0 + +00:00:55;25 9420 942c 942f 9420 9470 97a1 57e5 ece3 ef6d e520 f4ef 9723 2080 2080 2080 91ae 5468 e520 4675 f475 f2e5 20ef e620 57ef f26b ae80 + +00:00:58;08 942c + +00:00:59;10 9420 942c 942f +""" diff --git a/tests/test_scc.py b/tests/test_scc.py index f5d6698b..b15a529f 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -253,6 +253,49 @@ def test_line_too_long(self, sample_scc_with_line_too_long): assert ("was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l Denison, a friend - Length 81" in exc_info.value.args[0].split("\n")) + def test_line_not_breaking_on_break_and_repositioning( + self, sample_scc_with_break_and_repositioning): + expected_lines = [ + 'Allie learned to practice', + 'and got her ducks in a row,', + 'and George got a bear.', + '[ get enough monkey?', + 'To find Curious George', + 'and his friends', + 'every day online,', + 'swing by', + ' pbskids.org', + 'to play fun games and watch', + 'your favorite videos.', + 'You can also read more', + 'Curious George adventures', + 'by visiting your local library.' + ] + + caption_set = SCCReader().read(sample_scc_with_break_and_repositioning) + actual_lines = [ + node.content + for cap_ in caption_set.get_captions('en-US') + for node in cap_.nodes + if node.type_ == CaptionNode.TEXT + ] + + assert expected_lines == actual_lines + + def test_spaces_are_respected(self, sample_scc_with_spaces): + expected_lines = ['Welcome to ', 'The Future of Work.'] + + caption_set = SCCReader().read(sample_scc_with_spaces) + actual_lines = [ + node.content + for cap_ in caption_set.get_captions('en-US') + for node in cap_.nodes + if node.type_ == CaptionNode.TEXT + ] + + assert expected_lines == actual_lines + assert actual_lines[0][-3:] == " " * 3 + class TestCoverageOnly: """In order to refactor safely, we need coverage of 95% or more. @@ -382,6 +425,10 @@ def test_italics_commands_are_formatted_properly(self): # 4. to get new opening italic nodes after changing position, if 3 # happened # 5. to get a final italic closing node, if one is needed + # 6. close italics if there are open italics when + # a plain text command is issued + # 7, if an italics command also demands repositioning, + # first reposition then open italics tag node_creator.interpret_command('9470') # row 15, col 0 node_creator.interpret_command('9120') # italics off node_creator.interpret_command('9120') # italics off @@ -407,13 +454,13 @@ def test_italics_commands_are_formatted_properly(self): node_creator.add_chars('c') node_creator.interpret_command('91ae') # italics ON - node_creator.interpret_command('9270') # row 4 col 0 + node_creator.interpret_command('926e') # row 4 col 0 node_creator.add_chars('d') - node_creator.interpret_command('15d0') # row 5 col 0 - creates BR + node_creator.interpret_command('154f') # row 5 col 0 - creates BR node_creator.add_chars('e') - node_creator.interpret_command('1570') # row 6 col 0 - creates BR + node_creator.interpret_command('156e') # row 6 col 0 - creates BR node_creator.add_chars('f') result = list(node_creator) @@ -434,6 +481,7 @@ def test_italics_commands_are_formatted_properly(self): assert result[8].is_text_node() assert result[9].requires_repositioning() + assert result[10].is_italics_node() assert result[10].sets_italics_on()