# HG changeset patch # User pjenvey # Date 1184718544 0 # Node ID 9d0a19b4518b7d16837bfa9d599cb9603f536cee # Parent 49b089453f81534fd39287df26408cdaa85db77d o extract_python fixes: - now returns None for non-string arguments - no longer extracts strings from nested function calls refs #38 - use the correct starting line number in multi-line gettext function calls - avoids falsely identifying string keyword arg defaults from function definition names that match a keyword, e.g.: def gettext(foo='bar') - avoid capturing translator comments embedded within a gettext function call - default the file encoding to iso-8859-1 instead of ascii when missing a magic encoding comment, to emulate pre Python 2.5 behavior. Python warns about 'non-ascii' chars when there is no magic encoding comment, but < 2.5 actually treats them as iso-8859-1 for backwards compat (PEP 263). >= 2.5 treats them as strict ascii o extract fixes: - filter out messages that don't contain strings where the keyword specification calls for fixes #39 - filter out empty string messages and emit a warning about them, like xgettext diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,13 @@ * The number formatting functions now also work with numbers represented by Python `Decimal` objects (ticket #53). * Added extensible infrastructure for validating translation catalogs. + * Fixed the extractor not filtering out messages that didn't validate against + the keyword's specification (ticket #39). + * Fixed the extractor raising an exception when encountering an empty string + msgid. It now emits a warning to stderr. + * Numerous Python message extractor fixes: it now handles nested function + calls within a gettext function call correctly, uses the correct line number + for multi-line function calls, and other small fixes (tickets #38 and #39). Version 0.8.1 diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -49,6 +49,10 @@ DEFAULT_MAPPING = [('**.py', 'python')] +empty_msgid_warning = ( +'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' +'returns the header entry with meta information, not the empty string.') + def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, options_map=None, keywords=DEFAULT_KEYWORDS, comment_tags=(), callback=None): @@ -218,13 +222,33 @@ results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) for lineno, funcname, messages, comments in results: - if isinstance(messages, (list, tuple)): - msgs = [] - for index in keywords[funcname] or (1,): - msgs.append(messages[index - 1]) - messages = tuple(msgs) - if len(messages) == 1: - messages = messages[0] + spec = keywords[funcname] or (1,) + if not isinstance(messages, (list, tuple)): + messages = [messages] + + msgs = [] + # Validate the messages against the keyword's specification + invalid = False + for index in spec: + message = messages[index - 1] + if message is None: + invalid = True + break + msgs.append(message) + if invalid: + continue + + first_msg_index = spec[0] - 1 + if not messages[first_msg_index]: + # An empty string msgid isn't valid, emit a warning + where = '%s:%i' % (hasattr(fileobj, 'name') and \ + fileobj.name or '(unknown)', lineno) + print >> sys.stderr, empty_msgid_warning % where + continue + + messages = tuple(msgs) + if len(messages) == 1: + messages = messages[0] yield lineno, messages, comments return @@ -249,21 +273,29 @@ :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ - funcname = None - lineno = None + funcname = lineno = message_lineno = None + call_stack = -1 buf = [] messages = [] translator_comments = [] - in_args = False - in_translator_comments = False + in_def = in_translator_comments = False - encoding = parse_encoding(fileobj) or options.get('encoding', 'ascii') + encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') tokens = generate_tokens(fileobj.readline) for tok, value, (lineno, _), _, _ in tokens: - if funcname and tok == OP and value == '(': - in_args = True - elif tok == COMMENT: + if call_stack == -1 and tok == NAME and value in ('def', 'class'): + in_def = True + elif tok == OP and value == '(': + if in_def: + # Avoid false positives for declarations such as: + # def gettext(arg='message'): + in_def = False + continue + if funcname: + message_lineno = lineno + call_stack += 1 + elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line value = value.decode(encoding)[1:].strip() if in_translator_comments and \ @@ -281,41 +313,51 @@ comment = value[len(comment_tag):].strip() translator_comments.append((lineno, comment)) break - elif funcname and in_args: + elif funcname and call_stack == 0: if tok == OP and value == ')': - in_args = in_translator_comments = False if buf: messages.append(''.join(buf)) del buf[:] - if filter(None, messages): - if len(messages) > 1: - messages = tuple(messages) - else: - messages = messages[0] - # Comments don't apply unless they immediately preceed the - # message - if translator_comments and \ - translator_comments[-1][0] < lineno - 1: - translator_comments = [] + else: + messages.append(None) - yield (lineno, funcname, messages, - [comment[1] for comment in translator_comments]) - funcname = lineno = None + if len(messages) > 1: + messages = tuple(messages) + else: + messages = messages[0] + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = lineno = message_lineno = None + call_stack = -1 messages = [] translator_comments = [] + in_translator_comments = False elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding - # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470 + # https://sourceforge.net/tracker/?func=detail&atid=355470& + # aid=617979&group_id=5470 value = eval('# coding=%s\n%s' % (encoding, value), {'__builtins__':{}}, {}) if isinstance(value, str): value = value.decode(encoding) buf.append(value) elif tok == OP and value == ',': - messages.append(''.join(buf)) - del buf[:] - elif funcname: + if buf: + messages.append(''.join(buf)) + del buf[:] + else: + messages.append(None) + elif call_stack > 0 and tok == OP and value == ')': + call_stack -= 1 + elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value diff --git a/babel/messages/tests/extract.py b/babel/messages/tests/extract.py --- a/babel/messages/tests/extract.py +++ b/babel/messages/tests/extract.py @@ -14,6 +14,7 @@ import codecs import doctest from StringIO import StringIO +import sys import unittest from babel.messages import extract @@ -21,6 +22,105 @@ class ExtractPythonTestCase(unittest.TestCase): + def test_nested_calls(self): + buf = StringIO("""\ +msg1 = _(i18n_arg.replace(r'\"', '"')) +msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) +msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) +msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) +msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) +msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) +msg7 = _(hello.there) +msg8 = gettext('Rabbit') +msg9 = dgettext('wiki', model.addPage()) +msg10 = dngettext(getDomain(), 'Page', 'Pages', 3) +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + self.assertEqual([ + (1, '_', None, []), + (2, 'ungettext', (None, None, None), []), + (3, 'ungettext', (u'Babel', None, None), []), + (4, 'ungettext', (None, u'Babels', None), []), + (5, 'ungettext', (u'bunny', u'bunnies', None), []), + (6, 'ungettext', (None, u'bunnies', None), []), + (7, '_', None, []), + (8, 'gettext', u'Rabbit', []), + (9, 'dgettext', (u'wiki', None), []), + (10, 'dngettext', (None, u'Page', u'Pages', None), [])], + messages) + + def test_nested_comments(self): + buf = StringIO("""\ +msg = ngettext('pylon', # TRANSLATORS: shouldn't be + 'pylons', # TRANSLATORS: seeing this + count) +""") + messages = list(extract.extract_python(buf, ('ngettext',), + ['TRANSLATORS:'], {})) + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])], + messages) + + def test_declarations(self): + buf = StringIO("""\ +class gettext(object): + pass +def render_body(context,x,y=_('Page arg 1'),z=_('Page arg 2'),**pageargs): + pass +def ngettext(y='arg 1',z='arg 2',**pageargs): + pass +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + self.assertEqual([(3, '_', u'Page arg 1', []), + (3, '_', u'Page arg 2', [])], + messages) + + def test_multiline(self): + buf = StringIO("""\ +msg1 = ngettext('pylon', + 'pylons', count) +msg2 = ngettext('elvis', + 'elvises', + count) +""") + messages = list(extract.extract_python(buf, ('ngettext',), [], {})) + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), []), + (3, 'ngettext', (u'elvis', u'elvises', None), [])], + messages) + + def test_triple_quoted_strings(self): + buf = StringIO("""\ +msg1 = _('''pylons''') +msg2 = ngettext(r'''elvis''', \"\"\"elvises\"\"\", count) +msg2 = ngettext(\"\"\"elvis\"\"\", 'elvises', count) +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + self.assertEqual([(1, '_', (u'pylons'), []), + (2, 'ngettext', (u'elvis', u'elvises', None), []), + (3, 'ngettext', (u'elvis', u'elvises', None), [])], + messages) + + def test_multiline_strings(self): + buf = StringIO("""\ +_('''This module provides internationalization and localization +support for your Python programs by providing an interface to the GNU +gettext message catalog library.''') +""") + messages = list(extract.extract_python(buf, + extract.DEFAULT_KEYWORDS.keys(), + [], {})) + self.assertEqual( + [(1, '_', + u'This module provides internationalization and localization\n' + 'support for your Python programs by providing an interface to ' + 'the GNU\ngettext message catalog library.', [])], + messages) + def test_unicode_string_arg(self): buf = StringIO("msg = _(u'Foo Bar')") messages = list(extract.extract_python(buf, ('_',), [], {})) @@ -45,7 +145,7 @@ self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'A translation comment', u'with a second line'], messages[0][3]) - + def test_translator_comments_with_previous_non_translator_comments(self): buf = StringIO(""" # This shouldn't be in the output @@ -97,7 +197,7 @@ messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'one', u'NOTE: two'], messages[0][3]) - + def test_invalid_translator_comments(self): buf = StringIO(""" # NOTE: this shouldn't apply to any messages @@ -174,10 +274,48 @@ self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual(messages[0][2], messages[1][2]) +class ExtractTestCase(unittest.TestCase): + + def test_invalid_filter(self): + buf = StringIO("""\ +msg1 = _(i18n_arg.replace(r'\"', '"')) +msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) +msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) +msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) +msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) +msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) +msg7 = _(hello.there) +msg8 = gettext('Rabbit') +msg9 = dgettext('wiki', model.addPage()) +msg10 = dngettext(domain, 'Page', 'Pages', 3) +""") + messages = \ + list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], + {})) + self.assertEqual([(5, (u'bunny', u'bunnies'), []), + (8, u'Rabbit', []), + (10, (u'Page', u'Pages'), [])], messages) + + def test_empty_string_msgid(self): + buf = StringIO("""\ +msg = _('') +""") + stderr = sys.stderr + sys.stderr = StringIO() + try: + messages = \ + list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, + [], {})) + self.assertEqual([], messages) + assert 'warning: Empty msgid.' in sys.stderr.getvalue() + finally: + sys.stderr = stderr + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(extract)) suite.addTest(unittest.makeSuite(ExtractPythonTestCase)) + suite.addTest(unittest.makeSuite(ExtractTestCase)) return suite if __name__ == '__main__':