From 0b823608bc2961d10b1aeb9635c75d5e8fc314b3 Mon Sep 17 00:00:00 2001 From: Jakob Probst Date: Thu, 15 Oct 2020 01:30:40 +0200 Subject: [PATCH 1/2] CommentHandling: add support for reading #| - write obsolete entries entirely - add support for reading "previous" (#| ) comments --- babel/messages/catalog.py | 76 +++++----- babel/messages/frontend.py | 2 +- babel/messages/pofile.py | 252 ++++++++++++++++++++------------- tests/messages/test_catalog.py | 36 ++--- tests/messages/test_pofile.py | 26 ++-- 5 files changed, 221 insertions(+), 171 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index af1b6573c..1a43fa9f2 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -11,24 +11,22 @@ import re import time - from cgi import parse_header from collections import OrderedDict +from copy import copy from datetime import datetime, time as time_ from difflib import get_close_matches from email import message_from_string -from copy import copy from babel import __version__ as VERSION +from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text from babel.core import Locale, UnknownLocaleError from babel.dates import format_datetime from babel.messages.plurals import get_plural from babel.util import distinct, LOCALTZ, FixedOffsetTimezone -from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text __all__ = ['Message', 'Catalog', 'TranslationError'] - PYTHON_FORMAT = re.compile(r''' \% (?:\(([\w]*)\))? @@ -76,8 +74,8 @@ def _parse_datetime_header(value): class Message(object): """Representation of a single message in a catalog.""" - def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None, context=None): + def __init__(self, id, string=u'', locations=(), flags=(), extracted_comments=(), + translator_comments=(), previous_id=(), previous_context=None, lineno=None, context=None): """Create the message object. :param id: the message ID, or a ``(singular, plural)`` tuple for @@ -86,10 +84,11 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filename, lineno)`` tuples :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments for the message - :param user_comments: a sequence of user comments for the message + :param extracted_comments: a sequence of extracted comments for the message + :param translator_comments: a sequence of translator comments for the message :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages + :param previous_context: the previous message context :param lineno: the line number on which the msgid line was found in the PO file, if any :param context: the message context @@ -104,12 +103,10 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), self.flags.add('python-format') else: self.flags.discard('python-format') - self.auto_comments = list(distinct(auto_comments)) - self.user_comments = list(distinct(user_comments)) - if isinstance(previous_id, string_types): - self.previous_id = [previous_id] - else: - self.previous_id = list(previous_id) + self.extracted_comments = list(distinct(extracted_comments)) + self.translator_comments = list(distinct(translator_comments)) + self.previous_id = previous_id + self.previous_context = previous_context self.lineno = lineno self.context = context @@ -119,10 +116,12 @@ def __repr__(self): def __cmp__(self, other): """Compare Messages, taking into account plural ids""" + def values_to_compare(obj): if isinstance(obj, Message) and obj.pluralizable: return obj.id[0], obj.context or '' return obj.id, obj.context or '' + return cmp(values_to_compare(self), values_to_compare(other)) def __gt__(self, other): @@ -145,9 +144,10 @@ def __ne__(self, other): def clone(self): return Message(*map(copy, (self.id, self.string, self.locations, - self.flags, self.auto_comments, - self.user_comments, self.previous_id, - self.lineno, self.context))) + self.flags, self.extracted_comments, + self.translator_comments, self.previous_id, + self.previous_context, self.lineno, + self.context))) def check(self, catalog=None): """Run various validation checks on the message. Some validations @@ -223,7 +223,6 @@ class TranslationError(Exception): # FIRST AUTHOR , YEAR. #""" - if PY2: def _parse_header(header_string): # message_from_string only works for str, not for unicode @@ -336,9 +335,9 @@ def _get_header_comment(self): if hasattr(self.revision_date, 'strftime'): year = self.revision_date.strftime('%Y') comment = comment.replace('PROJECT', self.project) \ - .replace('VERSION', self.version) \ - .replace('YEAR', year) \ - .replace('ORGANIZATION', self.copyright_holder) + .replace('VERSION', self.version) \ + .replace('YEAR', year) \ + .replace('ORGANIZATION', self.copyright_holder) locale_name = (self.locale.english_name if self.locale else self.locale_identifier) if locale_name: comment = comment.replace('Translations template', '%s translations' % locale_name) @@ -617,17 +616,17 @@ def __setitem__(self, id, message): current.string = message.string current.locations = list(distinct(current.locations + message.locations)) - current.auto_comments = list(distinct(current.auto_comments + - message.auto_comments)) - current.user_comments = list(distinct(current.user_comments + - message.user_comments)) + current.extracted_comments = list(distinct(current.extracted_comments + + message.extracted_comments)) + current.translator_comments = list(distinct(current.translator_comments + + message.translator_comments)) current.flags |= message.flags message = current elif id == '': # special treatment for the header message self.mime_headers = _parse_header(message.string).items() self.header_comment = '\n'.join([('# %s' % c).rstrip() for c - in message.user_comments]) + in message.translator_comments]) self.fuzzy = message.fuzzy else: if isinstance(id, (list, tuple)): @@ -635,8 +634,8 @@ def __setitem__(self, id, message): 'Expected sequence but got %s' % type(message.string) self._messages[key] = message - def add(self, id, string=None, locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=(), lineno=None, context=None): + def add(self, id, string=None, locations=(), flags=(), extracted_comments=(), + translator_comments=(), previous_id=(), previous_context=None, lineno=None, context=None): """Add or update the message with the specified ID. >>> catalog = Catalog() @@ -654,17 +653,17 @@ def add(self, id, string=None, locations=(), flags=(), auto_comments=(), ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filename, lineno)`` tuples :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments - :param user_comments: a sequence of user comments + :param extracted_comments: a sequence of extracted comments + :param translator_comments: a sequence of translater comments :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages + :param previous_context: the previous message context :param lineno: the line number on which the msgid line was found in the PO file, if any :param context: the message context """ - message = Message(id, string, list(locations), flags, auto_comments, - user_comments, previous_id, lineno=lineno, - context=context) + message = Message(id, string, list(locations), flags, extracted_comments, + translator_comments, previous_id, previous_context, lineno, context) self[id] = message return message @@ -700,7 +699,7 @@ def delete(self, id, context=None): if key in self._messages: del self._messages[key] - def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True): + def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_translator_comments=True): """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog @@ -773,16 +772,13 @@ def _merge(message, oldkey, newkey): fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) - if isinstance(oldmsg.id, string_types): - message.previous_id = [oldmsg.id] - else: - message.previous_id = list(oldmsg.id) + message.previous_id = oldmsg.id else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string - if keep_user_comments: - message.user_comments = list(distinct(oldmsg.user_comments)) + if keep_translator_comments: + message.translator_comments = list(distinct(oldmsg.translator_comments)) if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 0b65a7c6b..0f6db9a3b 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -485,7 +485,7 @@ def callback(filename, method, options): filepath = os.path.normpath(os.path.join(path, filename)) catalog.add(message, None, [(filepath, lineno)], - auto_comments=comments, context=context) + extracted_comments=comments, context=context) self.log.info('writing PO template file to %s', self.output_file) write_po(outfile, catalog, width=self.width, diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index b8cb46976..943b410ff 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -11,12 +11,13 @@ """ from __future__ import print_function + import os import re +from babel._compat import text_type, cmp from babel.messages.catalog import Catalog, Message from babel.util import wraptext -from babel._compat import text_type, cmp def unescape(string): @@ -29,6 +30,7 @@ def unescape(string): :param string: the string to unescape """ + def replace_escapes(match): m = match.group(1) if m == 'n': @@ -39,6 +41,7 @@ def replace_escapes(match): return '\r' # m is \ or " return m + return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1]) @@ -75,6 +78,7 @@ def denormalize(string): class PoFileError(Exception): """Exception thrown by PoParser when an invalid po file is encountered.""" + def __init__(self, message, catalog, line, lineno): super(PoFileError, self).__init__('{message} on {lineno}'.format(message=message, lineno=lineno)) self.catalog = catalog @@ -128,7 +132,6 @@ def __ne__(self, other): return self.__cmp__(other) != 0 - class PoFileParser(object): """Support class to read messages from a ``gettext`` PO (portable object) file and add them to a `Catalog` @@ -153,13 +156,16 @@ def __init__(self, catalog, ignore_obsolete=False, abort_invalid=False): def _reset_message_state(self): self.messages = [] + self.previous_messages = [] self.translations = [] self.locations = [] self.flags = [] - self.user_comments = [] - self.auto_comments = [] + self.translator_comments = [] + self.extracted_comments = [] self.context = None + self.previous_context = None self.obsolete = False + self.previous = False self.in_msgid = False self.in_msgstr = False self.in_msgctxt = False @@ -188,9 +194,20 @@ def _add_message(self): msgctxt = self.context.denormalize() else: msgctxt = None + + if len(self.previous_messages) > 1: + previous_msgid = tuple([m.denormalize() for m in self.previous_messages]) + elif len(self.previous_messages) == 1: + previous_msgid = self.previous_messages[0].denormalize() + else: + previous_msgid = () + if self.previous_context: + previous_msgctxt = self.previous_context.denormalize() + else: + previous_msgctxt = None message = Message(msgid, string, list(self.locations), set(self.flags), - self.auto_comments, self.user_comments, lineno=self.offset + 1, - context=msgctxt) + self.extracted_comments, self.translator_comments, previous_msgid, + previous_msgctxt, self.offset + 1, msgctxt) if self.obsolete: if not self.ignore_obsolete: self.catalog.obsolete[msgid] = message @@ -203,17 +220,19 @@ def _finish_current_message(self): if self.messages: self._add_message() - def _process_message_line(self, lineno, line, obsolete=False): + def _process_message_line(self, lineno, line, obsolete=False, previous=False): if line.startswith('"'): - self._process_string_continuation_line(line, lineno) + self._process_string_continuation_line(line, lineno, previous) else: - self._process_keyword_line(lineno, line, obsolete) + self._process_keyword_line(lineno, line, obsolete, previous) - def _process_keyword_line(self, lineno, line, obsolete=False): + def _process_keyword_line(self, lineno, line, obsolete=False, previous=False): for keyword in self._keywords: try: if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: + if previous and line.startswith('msgstr'): + continue arg = line[len(keyword):] break except IndexError: @@ -225,47 +244,72 @@ def _process_keyword_line(self, lineno, line, obsolete=False): if keyword in ['msgid', 'msgctxt']: self._finish_current_message() - self.obsolete = obsolete + if not previous: + self.obsolete = obsolete + self.previous = previous # The line that has the msgid is stored as the offset of the msg # should this be the msgctxt if it has one? if keyword == 'msgid': self.offset = lineno - if keyword in ['msgid', 'msgid_plural']: - self.in_msgctxt = False - self.in_msgid = True - self.messages.append(_NormalizedString(arg)) + self.in_msgid = keyword in {'msgid', 'msgid_plural'} + self.in_msgstr = keyword == 'msgstr' and not previous + self.in_msgctxt = keyword == 'msgctxt' + + if self.in_msgid: + if previous: + self.previous_messages.append(_NormalizedString(arg)) + else: + self.messages.append(_NormalizedString(arg)) - elif keyword == 'msgstr': - self.in_msgid = False - self.in_msgstr = True + elif self.in_msgstr: if arg.startswith('['): idx, msg = arg[1:].split(']', 1) self.translations.append([int(idx), _NormalizedString(msg)]) else: self.translations.append([0, _NormalizedString(arg)]) - elif keyword == 'msgctxt': - self.in_msgctxt = True - self.context = _NormalizedString(arg) + elif self.in_msgctxt: + if previous: + self.previous_context = _NormalizedString(arg) + else: + self.context = _NormalizedString(arg) + + def _process_string_continuation_line(self, line, lineno, previous=False): + if self.previous != previous: + self._invalid_pofile(line, lineno, + "Got line starting with #| \" but not in previous msgid or previous msgctxt") + return - def _process_string_continuation_line(self, line, lineno): if self.in_msgid: - s = self.messages[-1] + if previous: + s = self.previous_messages[-1] + else: + s = self.messages[-1] elif self.in_msgstr: + if previous: + self._invalid_pofile(line, lineno, "Got line starting with \" in previous msgstr") + return s = self.translations[-1][1] elif self.in_msgctxt: - s = self.context + if previous: + s = self.previous_context + else: + s = self.context else: self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt") return s.append(line) - def _process_comment(self, line): + def _process_comment(self, line, lineno): self._finish_current_message() + original_line = line + if line[1:].startswith('~'): + line = '#' + line[2:] + if line[1:].startswith(':'): for location in line[2:].lstrip().split(): pos = location.rfind(':') @@ -278,16 +322,18 @@ def _process_comment(self, line): else: self.locations.append((location, None)) elif line[1:].startswith(','): - for flag in line[2:].lstrip().split(','): + for flag in line[2:].split(','): self.flags.append(flag.strip()) elif line[1:].startswith('.'): - # These are called auto-comments - comment = line[2:].strip() - if comment: # Just check that we're not adding empty comments - self.auto_comments.append(comment) + # These are called extracted-comments + self.extracted_comments.append(line[2:].strip()) + elif line[1:].startswith(' ') or len(line) == 1: + # These are called translator-comments + self.translator_comments.append(line[2:].strip()) + elif line[1:].startswith('|'): + self._process_message_line(lineno, line[2:].strip(), self.obsolete, True) else: - # These are called user comments - self.user_comments.append(line[1:].strip()) + self._invalid_pofile(original_line, lineno, "Unknown comment type") def parse(self, fileobj): """ @@ -296,16 +342,21 @@ def parse(self, fileobj): """ for lineno, line in enumerate(fileobj): - line = line.strip() if not isinstance(line, text_type): line = line.decode(self.catalog.charset) + line = line.strip() if not line: continue if line.startswith('#'): if line[1:].startswith('~'): - self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) + if line[2:].lstrip().startswith(tuple(self._keywords)) or ( + line[2:].lstrip().startswith('"') and (self.in_msgid or self.in_msgstr or self.in_msgctxt) + ): + self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) + else: + self._process_comment(line, lineno) else: - self._process_comment(line) + self._process_comment(line, lineno) else: self._process_message_line(lineno, line) @@ -313,7 +364,7 @@ def parse(self, fileobj): # No actual messages found, but there was some info in comments, from which # we'll construct an empty header message - if not self.counter and (self.flags or self.user_comments or self.auto_comments): + if not self.counter and (self.flags or self.translator_comments or self.extracted_comments): self.messages.append(_NormalizedString(u'""')) self.translations.append([0, _NormalizedString(u'""')]) self._add_message() @@ -356,7 +407,7 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No ... if message.id: ... print((message.id, message.string)) ... print(' ', (message.locations, sorted(list(message.flags)))) - ... print(' ', (message.user_comments, message.auto_comments)) + ... print(' ', (message.translator_comments, message.extracted_comments)) (u'foo %(name)s', u'quux %(name)s') ([(u'main.py', 1)], [u'fuzzy', u'python-format']) ([], []) @@ -383,9 +434,9 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=No WORD_SEP = re.compile('(' - r'\s+|' # any whitespace + r'\s+|' # any whitespace r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash ')') @@ -401,10 +452,10 @@ def escape(string): :param string: the string to escape """ return '"%s"' % string.replace('\\', '\\\\') \ - .replace('\t', '\\t') \ - .replace('\r', '\\r') \ - .replace('\n', '\\n') \ - .replace('\"', '\\"') + .replace('\t', '\\t') \ + .replace('\r', '\\r') \ + .replace('\n', '\\n') \ + .replace('\"', '\\"') def normalize(string, prefix='', width=76): @@ -514,6 +565,7 @@ def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, updating the catalog :param include_lineno: include line number in the location comment """ + def _normalize(key, prefix=''): return normalize(key, prefix=prefix, width=width) @@ -532,55 +584,41 @@ def _write_comment(comment, prefix=''): for line in wraptext(comment, _width): _write('#%s %s\n' % (prefix, line.strip())) - def _write_message(message, prefix=''): - if isinstance(message.id, (list, tuple)): - if message.context: + def _write_message_and_context(message_context, message_id, message_string, prefix='', previous=False): + if isinstance(message_id, (list, tuple)): + if message_context is not None: _write('%smsgctxt %s\n' % (prefix, - _normalize(message.context, prefix))) - _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) - _write('%smsgid_plural %s\n' % ( - prefix, _normalize(message.id[1], prefix) - )) - - for idx in range(catalog.num_plurals): - try: - string = message.string[idx] - except IndexError: - string = '' - _write('%smsgstr[%d] %s\n' % ( - prefix, idx, _normalize(string, prefix) + _normalize(message_context, prefix))) + if not previous or message_id: + _write('%smsgid %s\n' % (prefix, _normalize(message_id[0], prefix))) + _write('%smsgid_plural %s\n' % ( + prefix, _normalize(message_id[1], prefix) )) + + if not previous: + for idx in range(catalog.num_plurals): + try: + string = message_string[idx] or '' + except IndexError: + string = '' + _write('%smsgstr[%d] %s\n' % ( + prefix, idx, _normalize(string, prefix) + )) else: - if message.context: + if message_context is not None: _write('%smsgctxt %s\n' % (prefix, - _normalize(message.context, prefix))) - _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) - _write('%smsgstr %s\n' % ( - prefix, _normalize(message.string or '', prefix) - )) - - sort_by = None - if sort_output: - sort_by = "message" - elif sort_by_file: - sort_by = "location" + _normalize(message_context, prefix))) + _write('%smsgid %s\n' % (prefix, _normalize(message_id, prefix))) + if not previous: + _write('%smsgstr %s\n' % ( + prefix, _normalize(message_string or '', prefix) + )) - for message in _sort_messages(catalog, sort_by=sort_by): - if not message.id: # This is the header "message" - if omit_header: - continue - comment_header = catalog.header_comment - if width and width > 0: - lines = [] - for line in comment_header.splitlines(): - lines += wraptext(line, width=width, - subsequent_indent='# ') - comment_header = u'\n'.join(lines) - _write(comment_header + u'\n') + def _write_entry(message, obsolete=False): - for comment in message.user_comments: + for comment in message.translator_comments: _write_comment(comment) - for comment in message.auto_comments: + for comment in message.extracted_comments: _write_comment(comment, prefix='.') if not no_location: @@ -591,7 +629,7 @@ def _write_message(message, prefix=''): # if no sorting possible, leave unsorted. # (see issue #606) try: - locations = sorted(message.locations, + locations = sorted(message.locations, key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1)) except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()" locations = message.locations @@ -603,27 +641,41 @@ def _write_message(message, prefix=''): locs.append(u'%s' % filename.replace(os.sep, '/')) _write_comment(' '.join(locs), prefix=':') if message.flags: - _write('#%s\n' % ', '.join([''] + sorted(message.flags))) + _write_comment(', '.join(sorted(message.flags)), prefix=',') - if message.previous_id and include_previous: - _write_comment('msgid %s' % _normalize(message.previous_id[0]), - prefix='|') - if len(message.previous_id) > 1: - _write_comment('msgid_plural %s' % _normalize( - message.previous_id[1] - ), prefix='|') + if include_previous: + _write_message_and_context(message.previous_context, message.previous_id, None, prefix='#| ', previous=True) + + _write_message_and_context(message.context, message.id, message.string, prefix='#~ ' if obsolete else '') + + sort_by = None + if sort_output: + sort_by = "message" + elif sort_by_file: + sort_by = "location" + + for message in _sort_messages(catalog, sort_by=sort_by): + if not message.id: # This is the header "message" + if omit_header: + continue + comment_header = catalog.header_comment + if width and width > 0: + lines = [] + for line in comment_header.splitlines(): + lines += wraptext(line, width=width, + subsequent_indent='# ') + comment_header = u'\n'.join(lines) + _write(comment_header + u'\n') - _write_message(message) + _write_entry(message) _write('\n') if not ignore_obsolete: for message in _sort_messages( - catalog.obsolete.values(), - sort_by=sort_by + catalog.obsolete.values(), + sort_by=sort_by ): - for comment in message.user_comments: - _write_comment(comment) - _write_message(message, prefix='#~ ') + _write_entry(message, obsolete=True) _write('\n') diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 2d9e31d00..b29b258de 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -41,12 +41,12 @@ def test_python_format(self): assert catalog.PYTHON_FORMAT.search('foo %()s') def test_translator_comments(self): - mess = catalog.Message('foo', user_comments=['Comment About `foo`']) - self.assertEqual(mess.user_comments, ['Comment About `foo`']) + mess = catalog.Message('foo', translator_comments=['Comment About `foo`']) + self.assertEqual(mess.translator_comments, ['Comment About `foo`']) mess = catalog.Message('foo', - auto_comments=['Comment 1 About `foo`', + extracted_comments=['Comment 1 About `foo`', 'Comment 2 About `foo`']) - self.assertEqual(mess.auto_comments, ['Comment 1 About `foo`', + self.assertEqual(mess.extracted_comments, ['Comment 1 About `foo`', 'Comment 2 About `foo`']) def test_clone_message_object(self): @@ -73,17 +73,17 @@ def test_two_messages_with_same_singular(self): def test_duplicate_auto_comment(self): cat = catalog.Catalog() - cat.add('foo', auto_comments=['A comment']) - cat.add('foo', auto_comments=['A comment', 'Another comment']) + cat.add('foo', extracted_comments=['A comment']) + cat.add('foo', extracted_comments=['A comment', 'Another comment']) self.assertEqual(['A comment', 'Another comment'], - cat['foo'].auto_comments) + cat['foo'].extracted_comments) def test_duplicate_user_comment(self): cat = catalog.Catalog() - cat.add('foo', user_comments=['A comment']) - cat.add('foo', user_comments=['A comment', 'Another comment']) + cat.add('foo', translator_comments=['A comment']) + cat.add('foo', translator_comments=['A comment', 'Another comment']) self.assertEqual(['A comment', 'Another comment'], - cat['foo'].user_comments) + cat['foo'].translator_comments) def test_duplicate_location(self): cat = catalog.Catalog() @@ -112,16 +112,16 @@ def test_update_message_changed_to_simple(self): def test_update_message_updates_comments(self): cat = catalog.Catalog() cat[u'foo'] = catalog.Message('foo', locations=[('main.py', 5)]) - self.assertEqual(cat[u'foo'].auto_comments, []) - self.assertEqual(cat[u'foo'].user_comments, []) + self.assertEqual(cat[u'foo'].extracted_comments, []) + self.assertEqual(cat[u'foo'].translator_comments, []) # Update cat[u'foo'] with a new location and a comment cat[u'foo'] = catalog.Message('foo', locations=[('main.py', 7)], - user_comments=['Foo Bar comment 1']) - self.assertEqual(cat[u'foo'].user_comments, ['Foo Bar comment 1']) + translator_comments=['Foo Bar comment 1']) + self.assertEqual(cat[u'foo'].translator_comments, ['Foo Bar comment 1']) # now add yet another location with another comment cat[u'foo'] = catalog.Message('foo', locations=[('main.py', 9)], - auto_comments=['Foo Bar comment 2']) - self.assertEqual(cat[u'foo'].auto_comments, ['Foo Bar comment 2']) + extracted_comments=['Foo Bar comment 2']) + self.assertEqual(cat[u'foo'].extracted_comments, ['Foo Bar comment 2']) def test_update_fuzzy_matching_with_case_change(self): cat = catalog.Catalog() @@ -490,7 +490,7 @@ def test_update_catalog_comments(): msgstr "foo %(name)s" ''')) - assert all(message.user_comments and message.auto_comments for message in catalog if message.id) + assert all(message.translator_comments and message.extracted_comments for message in catalog if message.id) # NOTE: in the POT file, there are no comments template = pofile.read_po(StringIO(''' @@ -503,4 +503,4 @@ def test_update_catalog_comments(): catalog.update(template) # Auto comments will be obliterated here - assert all(message.user_comments for message in catalog if message.id) + assert all(message.translator_comments for message in catalog if message.id) diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index 2db7f6715..38bd311bd 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -163,7 +163,7 @@ def test_obsolete_message(self): message = catalog.obsolete[u'foo'] self.assertEqual(u'foo', message.id) self.assertEqual(u'Voh', message.string) - self.assertEqual(['This is an obsolete message'], message.user_comments) + self.assertEqual(['This is an obsolete message'], message.translator_comments) def test_obsolete_message_ignored(self): buf = StringIO(r'''# This is an obsolete message @@ -198,7 +198,7 @@ def test_multi_line_obsolete_message(self): message = catalog.obsolete[u'foofoo'] self.assertEqual(u'foofoo', message.id) self.assertEqual(u'VohVooooh', message.string) - self.assertEqual(['This is an obsolete message'], message.user_comments) + self.assertEqual(['This is an obsolete message'], message.translator_comments) def test_unit_following_multi_line_obsolete_message(self): buf = StringIO(r'''# This is an obsolete message @@ -219,7 +219,7 @@ def test_unit_following_multi_line_obsolete_message(self): message = catalog[u'bar'] self.assertEqual(u'bar', message.id) self.assertEqual(u'Bahr', message.string) - self.assertEqual(['This message is not obsolete'], message.user_comments) + self.assertEqual(['This message is not obsolete'], message.translator_comments) def test_unit_before_obsolete_is_not_obsoleted(self): buf = StringIO(r''' @@ -241,7 +241,7 @@ def test_unit_before_obsolete_is_not_obsoleted(self): message = catalog[u'bar'] self.assertEqual(u'bar', message.id) self.assertEqual(u'Bahr', message.string) - self.assertEqual(['This message is not obsolete'], message.user_comments) + self.assertEqual(['This message is not obsolete'], message.translator_comments) def test_with_context(self): buf = BytesIO(b'''# Some string in the menu @@ -509,8 +509,8 @@ def test_write_po_file_with_specified_charset(self): def test_duplicate_comments(self): catalog = Catalog() - catalog.add(u'foo', auto_comments=['A comment']) - catalog.add(u'foo', auto_comments=['A comment']) + catalog.add(u'foo', extracted_comments=['A comment']) + catalog.add(u'foo', extracted_comments=['A comment']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b'''#. A comment @@ -620,9 +620,9 @@ def test_no_wrap_and_width_behaviour_on_comments(self): def test_pot_with_translator_comments(self): catalog = Catalog() catalog.add(u'foo', locations=[('main.py', 1)], - auto_comments=['Comment About `foo`']) + extracted_comments=['Comment About `foo`']) catalog.add(u'bar', locations=[('utils.py', 3)], - user_comments=['Comment About `bar` with', + translator_comments=['Comment About `bar` with', 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) @@ -642,7 +642,7 @@ def test_po_with_obsolete_message(self): catalog.add(u'foo', u'Voh', locations=[('main.py', 1)]) catalog.obsolete['bar'] = Message(u'bar', u'Bahr', locations=[('utils.py', 3)], - user_comments=['User comment']) + translator_comments=['User comment']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b'''#: main.py:1 @@ -650,6 +650,7 @@ def test_po_with_obsolete_message(self): msgstr "Voh" # User comment +#: utils.py:3 #~ msgid "bar" #~ msgstr "Bahr"''', buf.getvalue().strip()) @@ -672,6 +673,7 @@ def test_po_with_multiline_obsolete_message(self): msgid "foo" msgstr "Voh" +#: utils.py:3 #~ msgid "" #~ "Here's a message that covers\\n" #~ "multiple lines, and should still be handled\\n" @@ -686,7 +688,7 @@ def test_po_with_obsolete_message_ignored(self): catalog.add(u'foo', u'Voh', locations=[('main.py', 1)]) catalog.obsolete['bar'] = Message(u'bar', u'Bahr', locations=[('utils.py', 3)], - user_comments=['User comment']) + translator_comments=['User comment']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, ignore_obsolete=True) self.assertEqual(b'''#: main.py:1 @@ -721,7 +723,7 @@ def test_po_with_previous_msgid_plural(self): def test_sorted_po(self): catalog = Catalog() catalog.add(u'bar', locations=[('utils.py', 3)], - user_comments=['Comment About `bar` with', + translator_comments=['Comment About `bar` with', 'multiple lines.']) catalog.add((u'foo', u'foos'), (u'Voh', u'Voeh'), locations=[('main.py', 1)]) @@ -789,7 +791,7 @@ def test_file_sorted_po(self): def test_file_with_no_lineno(self): catalog = Catalog() catalog.add(u'bar', locations=[('utils.py', None)], - user_comments=['Comment About `bar` with', + translator_comments=['Comment About `bar` with', 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, sort_output=True) From ab7dee136af7ed52eb140101db188e6b783356f3 Mon Sep 17 00:00:00 2001 From: Jakob Probst Date: Thu, 15 Oct 2020 16:41:27 +0200 Subject: [PATCH 2/2] CommentHandling: extend the tests - extend obsolete entry tests - add test for reading "previous" (#| ) comments --- tests/messages/test_pofile.py | 295 +++++++++++++++++++++++++++++++--- 1 file changed, 273 insertions(+), 22 deletions(-) diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index 38bd311bd..3be28870b 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -11,14 +11,15 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. -from datetime import datetime import unittest +from datetime import datetime +from babel._compat import StringIO, BytesIO from babel.core import Locale -from babel.messages.catalog import Catalog, Message from babel.messages import pofile +from babel.messages.catalog import Catalog, Message from babel.util import FixedOffsetTimezone -from babel._compat import StringIO, BytesIO + class ReadPoTestCase(unittest.TestCase): @@ -149,9 +150,25 @@ def test_header_entry(self): def test_obsolete_message(self): buf = StringIO(r'''# This is an obsolete message +#. Developer comment +#: utils.py:3 +#, fuzzy +#| msgctxt "previous context" +#| msgid "fuu" +#~ msgctxt "context" #~ msgid "foo" #~ msgstr "Voh" +#~ This is another obsolete message +#~. Another developer comment +#~: utils.py:3 +#~, fuzzy +#~| msgctxt "previous context" +#~| msgid "fus" +#~ msgctxt "context" +#~ msgid "fos" +#~ msgstr "Vohs" + # This message is not obsolete #: main.py:1 msgid "bar" @@ -159,16 +176,40 @@ def test_obsolete_message(self): ''') catalog = pofile.read_po(buf) self.assertEqual(1, len(catalog)) - self.assertEqual(1, len(catalog.obsolete)) + self.assertEqual(2, len(catalog.obsolete)) + message = catalog.obsolete[u'foo'] self.assertEqual(u'foo', message.id) self.assertEqual(u'Voh', message.string) + self.assertEqual([('utils.py', 3)], message.locations) + self.assertEqual({'fuzzy'}, message.flags) + self.assertEqual(['Developer comment'], message.extracted_comments) self.assertEqual(['This is an obsolete message'], message.translator_comments) + self.assertEqual(u'fuu', message.previous_id) + self.assertEqual(u'previous context', message.previous_context) + self.assertEqual(u'context', message.context) + + message = catalog.obsolete[u'fos'] + self.assertEqual(u'fos', message.id) + self.assertEqual(u'Vohs', message.string) + self.assertEqual([('utils.py', 3)], message.locations) + self.assertEqual({'fuzzy'}, message.flags) + self.assertEqual(['Another developer comment'], message.extracted_comments) + self.assertEqual(['This is another obsolete message'], message.translator_comments) + self.assertEqual(u'fus', message.previous_id) + self.assertEqual(u'previous context', message.previous_context) + self.assertEqual(u'context', message.context) def test_obsolete_message_ignored(self): - buf = StringIO(r'''# This is an obsolete message -#~ msgid "foo" -#~ msgstr "Voh" + buf = StringIO(r'''# User comment +#. Developer Comment +#: utils.py:3 +#, fuzzy +#| msgctxt "previous context" +#| msgid "foo" +#~ msgctxt "context" +#~ msgid "bar" +#~ msgstr "Bahr" # This message is not obsolete #: main.py:1 @@ -180,13 +221,53 @@ def test_obsolete_message_ignored(self): self.assertEqual(0, len(catalog.obsolete)) def test_multi_line_obsolete_message(self): - buf = StringIO(r'''# This is an obsolete message + buf = StringIO(r'''# Here's a user comment that covers multiple lines, and should still be +# handled correctly. +#. Here's a developer comment that covers multiple lines, and should still be +#. handled correctly. +#: utils.py:3 +#, fuzzy +#| msgctxt "" +#| "Here's a previous context that covers\n" +#| "multiple lines, and should still be handled\n" +#| "correctly.\n" +#| msgid "previous\n" +#| "foo" +#~ msgctxt "" +#~ "Here's a context that covers\n" +#~ "multiple lines, and should still be handled\n" +#~ "correctly.\n" #~ msgid "" #~ "foo" #~ "foo" #~ msgstr "" -#~ "Voh" -#~ "Vooooh" +#~ "Here's a message that covers\n" +#~ "multiple lines, and should still be handled\n" +#~ "correctly.\n" + +#~ Here's a user comment that covers multiple lines, and should still be +#~ handled correctly. +#~. Here's a developer comment that covers multiple lines, and should still be +#~. handled correctly. +#~: utils.py:3 +#~, fuzzy +#~| msgctxt "" +#~| "Here's a previous context that covers\n" +#~| "multiple lines, and should still be handled\n" +#~| "correctly.\n" +#| msgid "previous\n" +#| "fos" +#~ msgctxt "" +#~ "Here's a context that covers\n" +#~ "multiple lines, and should still be handled\n" +#~ "correctly.\n" +#~ msgid "" +#~ "fos" +#~ "fos" +#~ msgstr "" +#~ "Here's a message that covers\n" +#~ "multiple lines, and should still be handled\n" +#~ "correctly.\n" # This message is not obsolete #: main.py:1 @@ -194,11 +275,47 @@ def test_multi_line_obsolete_message(self): msgstr "Bahr" ''') catalog = pofile.read_po(buf) - self.assertEqual(1, len(catalog.obsolete)) + self.assertEqual(2, len(catalog.obsolete)) + message = catalog.obsolete[u'foofoo'] self.assertEqual(u'foofoo', message.id) - self.assertEqual(u'VohVooooh', message.string) - self.assertEqual(['This is an obsolete message'], message.translator_comments) + self.assertEqual(r"""Here's a message that covers +multiple lines, and should still be handled +correctly. +""", message.string) + self.assertEqual([('utils.py', 3)], message.locations) + self.assertEqual({'fuzzy'}, message.flags) + self.assertEqual(["Here's a developer comment that covers multiple lines, and should still be", "handled correctly."], message.extracted_comments) + self.assertEqual(["Here's a user comment that covers multiple lines, and should still be", "handled correctly."], message.translator_comments) + self.assertEqual(u'previous\nfoo', message.previous_id) + self.assertEqual(r"""Here's a previous context that covers +multiple lines, and should still be handled +correctly. +""", message.previous_context) + self.assertEqual(r"""Here's a context that covers +multiple lines, and should still be handled +correctly. +""", message.context) + + message = catalog.obsolete[u'fosfos'] + self.assertEqual(u'fosfos', message.id) + self.assertEqual(r"""Here's a message that covers +multiple lines, and should still be handled +correctly. +""", message.string) + self.assertEqual([('utils.py', 3)], message.locations) + self.assertEqual({'fuzzy'}, message.flags) + self.assertEqual(["Here's a developer comment that covers multiple lines, and should still be", "handled correctly."], message.extracted_comments) + self.assertEqual(["Here's a user comment that covers multiple lines, and should still be", "handled correctly."], message.translator_comments) + self.assertEqual(u'previous\nfos', message.previous_id) + self.assertEqual(r"""Here's a previous context that covers +multiple lines, and should still be handled +correctly. +""", message.previous_context) + self.assertEqual(r"""Here's a context that covers +multiple lines, and should still be handled +correctly. +""", message.context) def test_unit_following_multi_line_obsolete_message(self): buf = StringIO(r'''# This is an obsolete message @@ -485,6 +602,46 @@ def test_invalid_pofile_with_abort_flag(self): with self.assertRaises(pofile.PoFileError) as e: parser._invalid_pofile(line, lineno, msg) + def test_with_previous(self): + buf = StringIO(r''' +#: main.py:1 +#| msgctxt "f" +#| msgid "fo" +msgid "foo" +msgstr "Voh" +''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = catalog["foo"] + self.assertEqual("foo", message.id) + self.assertEqual("Voh", message.string) + self.assertEqual([("main.py", 1)], message.locations) + self.assertEqual("f", message.previous_context) + self.assertEqual("fo", message.previous_id) + + def test_with_previous_plural(self): + buf = StringIO(r''' +#: main.py:1 +#| msgctxt "f" +#| msgid "fo" +#| msgid_plural "fos" +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh" +msgstr[1] "Voeh" +''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = catalog["foo"] + self.assertEqual("foo", message.id[0]) + self.assertEqual("foos", message.id[1]) + self.assertEqual("Voh", message.string[0]) + self.assertEqual("Voeh", message.string[1]) + self.assertEqual([("main.py", 1)], message.locations) + self.assertEqual("f", message.previous_context) + self.assertEqual("fo", message.previous_id[0]) + self.assertEqual("fos", message.previous_id[1]) + class WritePoTestCase(unittest.TestCase): @@ -623,7 +780,7 @@ def test_pot_with_translator_comments(self): extracted_comments=['Comment About `foo`']) catalog.add(u'bar', locations=[('utils.py', 3)], translator_comments=['Comment About `bar` with', - 'multiple lines.']) + 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b'''#. Comment About `foo` @@ -642,7 +799,12 @@ def test_po_with_obsolete_message(self): catalog.add(u'foo', u'Voh', locations=[('main.py', 1)]) catalog.obsolete['bar'] = Message(u'bar', u'Bahr', locations=[('utils.py', 3)], - translator_comments=['User comment']) + flags=['fuzzy'], + extracted_comments=['Developer Comment'], + translator_comments=['User comment'], + previous_id=u'foo', + previous_context='previous context', + context='context') buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b'''#: main.py:1 @@ -650,7 +812,26 @@ def test_po_with_obsolete_message(self): msgstr "Voh" # User comment +#. Developer Comment +#: utils.py:3 +#, fuzzy +#~ msgctxt "context" +#~ msgid "bar" +#~ msgstr "Bahr"''', buf.getvalue().strip()) + + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_previous=True) + self.assertEqual(b'''#: main.py:1 +msgid "foo" +msgstr "Voh" + +# User comment +#. Developer Comment #: utils.py:3 +#, fuzzy +#| msgctxt "previous context" +#| msgid "foo" +#~ msgctxt "context" #~ msgid "bar" #~ msgstr "Bahr"''', buf.getvalue().strip()) @@ -664,16 +845,84 @@ def test_po_with_multiline_obsolete_message(self): msgstr = r"""Here's a message that covers multiple lines, and should still be handled correctly. +""" + extracted_comment = r"""Here's a developer comment that covers +multiple lines, and should still be handled +correctly. +""" + translator_comment = r"""Here's a user comment that covers +multiple lines, and should still be handled +correctly. +""" + previous_id = r"""Here's a previous message that covers +multiple lines, and should still be handled +correctly. +""" + previous_context = r"""Here's a previous context that covers +multiple lines, and should still be handled +correctly. +""" + context = r"""Here's a context that covers +multiple lines, and should still be handled +correctly. """ catalog.obsolete[msgid] = Message(msgid, msgstr, - locations=[('utils.py', 3)]) + locations=[('utils.py', 3)], + flags=['fuzzy'], + extracted_comments=[extracted_comment], + translator_comments=[translator_comment], + previous_id=previous_id, + previous_context=previous_context, + context=context) buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True) self.assertEqual(b'''#: main.py:1 msgid "foo" msgstr "Voh" +# Here's a user comment that covers multiple lines, and should still be +# handled correctly. +#. Here's a developer comment that covers multiple lines, and should still be +#. handled correctly. +#: utils.py:3 +#, fuzzy +#~ msgctxt "" +#~ "Here's a context that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n" +#~ msgid "" +#~ "Here's a message that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n" +#~ msgstr "" +#~ "Here's a message that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n"''', buf.getvalue().strip()) + + buf = BytesIO() + pofile.write_po(buf, catalog, omit_header=True, include_previous=True) + self.assertEqual(b'''#: main.py:1 +msgid "foo" +msgstr "Voh" + +# Here's a user comment that covers multiple lines, and should still be +# handled correctly. +#. Here's a developer comment that covers multiple lines, and should still be +#. handled correctly. #: utils.py:3 +#, fuzzy +#| msgctxt "" +#| "Here's a previous context that covers\\n" +#| "multiple lines, and should still be handled\\n" +#| "correctly.\\n" +#| msgid "" +#| "Here's a previous message that covers\\n" +#| "multiple lines, and should still be handled\\n" +#| "correctly.\\n" +#~ msgctxt "" +#~ "Here's a context that covers\\n" +#~ "multiple lines, and should still be handled\\n" +#~ "correctly.\\n" #~ msgid "" #~ "Here's a message that covers\\n" #~ "multiple lines, and should still be handled\\n" @@ -695,24 +944,26 @@ def test_po_with_obsolete_message_ignored(self): msgid "foo" msgstr "Voh"''', buf.getvalue().strip()) - def test_po_with_previous_msgid(self): + def test_po_with_previous(self): catalog = Catalog() catalog.add(u'foo', u'Voh', locations=[('main.py', 1)], - previous_id=u'fo') + previous_id=u'fo', previous_context=u'f') buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, include_previous=True) self.assertEqual(b'''#: main.py:1 +#| msgctxt "f" #| msgid "fo" msgid "foo" msgstr "Voh"''', buf.getvalue().strip()) - def test_po_with_previous_msgid_plural(self): + def test_po_with_previous_plural(self): catalog = Catalog() catalog.add((u'foo', u'foos'), (u'Voh', u'Voeh'), - locations=[('main.py', 1)], previous_id=(u'fo', u'fos')) + locations=[('main.py', 1)], previous_id=(u'fo', u'fos'), previous_context=u'f') buf = BytesIO() pofile.write_po(buf, catalog, omit_header=True, include_previous=True) self.assertEqual(b'''#: main.py:1 +#| msgctxt "f" #| msgid "fo" #| msgid_plural "fos" msgid "foo" @@ -724,7 +975,7 @@ def test_sorted_po(self): catalog = Catalog() catalog.add(u'bar', locations=[('utils.py', 3)], translator_comments=['Comment About `bar` with', - 'multiple lines.']) + 'multiple lines.']) catalog.add((u'foo', u'foos'), (u'Voh', u'Voeh'), locations=[('main.py', 1)]) buf = BytesIO() @@ -792,7 +1043,7 @@ def test_file_with_no_lineno(self): catalog = Catalog() catalog.add(u'bar', locations=[('utils.py', None)], translator_comments=['Comment About `bar` with', - 'multiple lines.']) + 'multiple lines.']) buf = BytesIO() pofile.write_po(buf, catalog, sort_output=True) value = buf.getvalue().strip()