# HG changeset patch # User cmlenz # Date 1184605069 0 # Node ID bd8b1301b27eb8c12ac9c24999e09059e84b8ea3 # Parent 19eaa0f8fae5aa421e76a5b69264b670935736db Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,7 @@ manner. * The number formatting functions now also work with numbers represented by Python `Decimal` objects (ticket #53). + * Added extensible infrastructure for validating translation catalogs. Version 0.8.1 diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -30,17 +30,17 @@ from babel.messages.plurals import PLURALS from babel.util import odict, LOCALTZ, UTC, FixedOffsetTimezone -__all__ = ['Message', 'Catalog'] +__all__ = ['Message', 'Catalog', 'TranslationError'] __docformat__ = 'restructuredtext en' -PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search +PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]') class Message(object): """Representation of a single message in a catalog.""" def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=()): + user_comments=(), previous_id=(), lineno=None): """Create the message object. :param id: the message ID, or a ``(singular, plural)`` tuple for @@ -53,6 +53,8 @@ :param user_comments: a sequence of user comments for the message :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any """ self.id = id #: The message ID if not string and self.pluralizable: @@ -70,6 +72,7 @@ self.previous_id = [previous_id] else: self.previous_id = list(previous_id) + self.lineno = lineno def __repr__(self): return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, @@ -108,7 +111,7 @@ ids = self.id if not isinstance(ids, (list, tuple)): ids = [ids] - return bool(filter(None, [PYTHON_FORMAT(id) for id in ids])) + return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids])) python_format = property(python_format, doc="""\ Whether the message contains Python-style parameters. @@ -121,6 +124,11 @@ """) +class TranslationError(Exception): + """Exception thrown by translation checkers when invalid message + translations are encountered.""" + + DEFAULT_HEADER = u"""\ # Translations template for PROJECT. # Copyright (C) YEAR ORGANIZATION @@ -480,7 +488,7 @@ self._messages[key] = message def add(self, id, string=None, locations=(), flags=(), auto_comments=(), - user_comments=(), previous_id=()): + user_comments=(), previous_id=(), lineno=None): """Add or update the message with the specified ID. >>> catalog = Catalog() @@ -501,9 +509,35 @@ :param user_comments: a sequence of user comments :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any """ self[id] = Message(id, string, list(locations), flags, auto_comments, - user_comments, previous_id) + user_comments, previous_id, lineno=lineno) + + def check(self): + """Run various validation checks on the translations in the catalog. + + For every message which fails validation, this method yield a + ``(message, errors)`` tuple, where ``message`` is the `Message` object + and ``errors`` is a sequence of `TranslationError` objects. + + :rtype: ``iterator`` + """ + checkers = [] + from pkg_resources import working_set + for entry_point in working_set.iter_entry_points('babel.checkers'): + checkers.append(entry_point.load()) + + for message in self._messages.values(): + errors = [] + for checker in checkers: + try: + checker(self, message) + except TranslationError, e: + errors.append(e) + if errors: + yield message, errors def update(self, template, no_fuzzy_matching=False): """Update the catalog based on the given template catalog. diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py new file mode 100644 --- /dev/null +++ b/babel/messages/checkers.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Various routines that help with validation of translations.""" + +from babel.messages.catalog import TranslationError, PYTHON_FORMAT + +def num_plurals(catalog, message): + """Verify the number of plurals in the translation.""" + if not message.pluralizable: + if not isinstance(message.string, basestring): + raise TranslationError("Found plural forms for non-pluralizable " + "message") + return + + msgstrs = message.string + if not isinstance(msgstrs, (list, tuple)): + msgstrs = (msgstrs,) + if len(msgstrs) != catalog.num_plurals: + raise TranslationError("Wrong number of plural forms (expected %d)" % + catalog.num_plurals) + +def python_format(catalog, message): + if 'python-format' in message.flags: + msgids = message.id + if not isinstance(msgids, (list, tuple)): + msgids = (msgids,) + msgstrs = message.string + if not isinstance(msgstrs, (list, tuple)): + msgstrs = (msgstrs,) + for idx, msgid in enumerate(msgids): + if not msgstrs[idx]: + continue # no translation + for match in PYTHON_FORMAT.finditer(msgid): + param = match.group(0) + if param not in msgstrs[idx]: + raise TranslationError("Python parameter %s not found in " + "translation" % param) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -26,6 +26,7 @@ from StringIO import StringIO import sys import tempfile +import textwrap from babel import __version__ as VERSION from babel import Locale, localedata @@ -153,6 +154,10 @@ print 'catalog %r is marked as fuzzy, skipping' % (po_file) continue + for message, errors in catalog.check(): + for error in errors: + print 'error: %s:%d: %s' % (po_file, message.lineno, error) + print 'compiling catalog %r to %r' % (po_file, mo_file) outfile = open(mo_file, 'w') @@ -720,6 +725,10 @@ print 'catalog %r is marked as fuzzy, skipping' % (po_file) continue + for message, errors in catalog.check(): + for error in errors: + print 'error: %s:%d: %s' % (po_file, message.lineno, error) + print 'compiling catalog %r to %r' % (po_file, mo_file) outfile = open(mo_file, 'w') diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -129,6 +129,7 @@ catalog = Catalog(locale=locale, domain=domain) counter = [0] + offset = [0] messages = [] translations = [] locations = [] @@ -150,7 +151,8 @@ else: string = denormalize(translations[0][1]) message = Message(msgid, string, list(locations), set(flags), - list(auto_comments), list(user_comments)) + list(auto_comments), list(user_comments), + lineno=offset[0] + 1) if obsolete[0]: if not ignore_obsolete: catalog.obsolete[msgid] = message @@ -161,13 +163,14 @@ obsolete[0] = False counter[0] += 1 - def _process_message_line(line): + def _process_message_line(lineno, line): if line.startswith('msgid_plural'): in_msgid[0] = True msg = line[12:].lstrip() messages.append(msg) elif line.startswith('msgid'): in_msgid[0] = True + offset[0] = lineno txt = line[5:].lstrip() if messages: _add_message() @@ -187,7 +190,7 @@ elif in_msgstr[0]: translations[-1][1] += u'\n' + line.rstrip() - for line in fileobj.readlines(): + for lineno, line in enumerate(fileobj.readlines()): line = line.strip().decode(catalog.charset) if line.startswith('#'): in_msgid[0] = in_msgstr[0] = False @@ -202,7 +205,7 @@ flags.append(flag.strip()) elif line[1:].startswith('~'): obsolete[0] = True - _process_message_line(line[2:].lstrip()) + _process_message_line(lineno, line[2:].lstrip()) elif line[1:].startswith('.'): # These are called auto-comments comment = line[2:].strip() @@ -212,7 +215,7 @@ # These are called user comments user_comments.append(line[1:].strip()) else: - _process_message_line(line) + _process_message_line(lineno, line) if messages: _add_message() diff --git a/babel/messages/tests/catalog.py b/babel/messages/tests/catalog.py --- a/babel/messages/tests/catalog.py +++ b/babel/messages/tests/catalog.py @@ -20,9 +20,9 @@ class MessageTestCase(unittest.TestCase): def test_python_format(self): - assert catalog.PYTHON_FORMAT('foo %d bar') - assert catalog.PYTHON_FORMAT('foo %s bar') - assert catalog.PYTHON_FORMAT('foo %r bar') + assert catalog.PYTHON_FORMAT.search('foo %d bar') + assert catalog.PYTHON_FORMAT.search('foo %s bar') + assert catalog.PYTHON_FORMAT.search('foo %r bar') def test_translator_comments(self): mess = catalog.Message('foo', user_comments=['Comment About `foo`']) diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -146,6 +146,10 @@ [distutils.setup_keywords] message_extractors = babel.messages.frontend:check_message_extractors + [babel.checkers] + num_plurals = babel.messages.checkers:num_plurals + python_format = babel.messages.checkers:python_format + [babel.extractors] ignore = babel.messages.extract:extract_nothing python = babel.messages.extract:extract_python