changeset 222:bd8b1301b27e

Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
author cmlenz
date Mon, 16 Jul 2007 16:57:49 +0000
parents 19eaa0f8fae5
children 49b089453f81
files ChangeLog babel/messages/catalog.py babel/messages/checkers.py babel/messages/frontend.py babel/messages/pofile.py babel/messages/tests/catalog.py setup.py
diffstat 7 files changed, 113 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,7 @@
    manner.
  * The number formatting functions now also work with numbers represented by
    Python `Decimal` objects (ticket #53).
+ * Added extensible infrastructure for validating translation catalogs.
 
 
 Version 0.8.1
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -30,17 +30,17 @@
 from babel.messages.plurals import PLURALS
 from babel.util import odict, LOCALTZ, UTC, FixedOffsetTimezone
 
-__all__ = ['Message', 'Catalog']
+__all__ = ['Message', 'Catalog', 'TranslationError']
 __docformat__ = 'restructuredtext en'
 
-PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
+PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]')
 
 
 class Message(object):
     """Representation of a single message in a catalog."""
 
     def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
-                 user_comments=(), previous_id=()):
+                 user_comments=(), previous_id=(), lineno=None):
         """Create the message object.
 
         :param id: the message ID, or a ``(singular, plural)`` tuple for
@@ -53,6 +53,8 @@
         :param user_comments: a sequence of user comments for the message
         :param previous_id: the previous message ID, or a ``(singular, plural)``
                             tuple for pluralizable messages
+        :param lineno: the line number on which the msgid line was found in the
+                       PO file, if any
         """
         self.id = id #: The message ID
         if not string and self.pluralizable:
@@ -70,6 +72,7 @@
             self.previous_id = [previous_id]
         else:
             self.previous_id = list(previous_id)
+        self.lineno = lineno
 
     def __repr__(self):
         return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
@@ -108,7 +111,7 @@
         ids = self.id
         if not isinstance(ids, (list, tuple)):
             ids = [ids]
-        return bool(filter(None, [PYTHON_FORMAT(id) for id in ids]))
+        return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids]))
     python_format = property(python_format, doc="""\
         Whether the message contains Python-style parameters.
 
@@ -121,6 +124,11 @@
         """)
 
 
+class TranslationError(Exception):
+    """Exception thrown by translation checkers when invalid message
+    translations are encountered."""
+
+
 DEFAULT_HEADER = u"""\
 # Translations template for PROJECT.
 # Copyright (C) YEAR ORGANIZATION
@@ -480,7 +488,7 @@
             self._messages[key] = message
 
     def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
-            user_comments=(), previous_id=()):
+            user_comments=(), previous_id=(), lineno=None):
         """Add or update the message with the specified ID.
 
         >>> catalog = Catalog()
@@ -501,9 +509,35 @@
         :param user_comments: a sequence of user comments
         :param previous_id: the previous message ID, or a ``(singular, plural)``
                             tuple for pluralizable messages
+        :param lineno: the line number on which the msgid line was found in the
+                       PO file, if any
         """
         self[id] = Message(id, string, list(locations), flags, auto_comments,
-                           user_comments, previous_id)
+                           user_comments, previous_id, lineno=lineno)
+
+    def check(self):
+        """Run various validation checks on the translations in the catalog.
+        
+        For every message which fails validation, this method yield a
+        ``(message, errors)`` tuple, where ``message`` is the `Message` object
+        and ``errors`` is a sequence of `TranslationError` objects.
+        
+        :rtype: ``iterator``
+        """
+        checkers = []
+        from pkg_resources import working_set
+        for entry_point in working_set.iter_entry_points('babel.checkers'):
+            checkers.append(entry_point.load())
+
+        for message in self._messages.values():
+            errors = []
+            for checker in checkers:
+                try:
+                    checker(self, message)
+                except TranslationError, e:
+                    errors.append(e)
+            if errors:
+                yield message, errors
 
     def update(self, template, no_fuzzy_matching=False):
         """Update the catalog based on the given template catalog.
new file mode 100644
--- /dev/null
+++ b/babel/messages/checkers.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Various routines that help with validation of translations."""
+
+from babel.messages.catalog import TranslationError, PYTHON_FORMAT
+
+def num_plurals(catalog, message):
+    """Verify the number of plurals in the translation."""
+    if not message.pluralizable:
+        if not isinstance(message.string, basestring):
+            raise TranslationError("Found plural forms for non-pluralizable "
+                                   "message")
+        return
+
+    msgstrs = message.string
+    if not isinstance(msgstrs, (list, tuple)):
+        msgstrs = (msgstrs,)
+    if len(msgstrs) != catalog.num_plurals:
+        raise TranslationError("Wrong number of plural forms (expected %d)" %
+                               catalog.num_plurals)
+
+def python_format(catalog, message):
+    if 'python-format' in message.flags:
+        msgids = message.id
+        if not isinstance(msgids, (list, tuple)):
+            msgids = (msgids,)
+        msgstrs = message.string
+        if not isinstance(msgstrs, (list, tuple)):
+            msgstrs = (msgstrs,)
+        for idx, msgid in enumerate(msgids):
+            if not msgstrs[idx]:
+                continue # no translation
+            for match in PYTHON_FORMAT.finditer(msgid):
+                param = match.group(0)
+                if param not in msgstrs[idx]:
+                    raise TranslationError("Python parameter %s not found in "
+                                           "translation" % param)
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -26,6 +26,7 @@
 from StringIO import StringIO
 import sys
 import tempfile
+import textwrap
 
 from babel import __version__ as VERSION
 from babel import Locale, localedata
@@ -153,6 +154,10 @@
                 print 'catalog %r is marked as fuzzy, skipping' % (po_file)
                 continue
 
+            for message, errors in catalog.check():
+                for error in errors:
+                    print 'error: %s:%d: %s' % (po_file, message.lineno, error)
+
             print 'compiling catalog %r to %r' % (po_file, mo_file)
 
             outfile = open(mo_file, 'w')
@@ -720,6 +725,10 @@
                 print 'catalog %r is marked as fuzzy, skipping' % (po_file)
                 continue
 
+            for message, errors in catalog.check():
+                for error in errors:
+                    print 'error: %s:%d: %s' % (po_file, message.lineno, error)
+
             print 'compiling catalog %r to %r' % (po_file, mo_file)
 
             outfile = open(mo_file, 'w')
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -129,6 +129,7 @@
     catalog = Catalog(locale=locale, domain=domain)
 
     counter = [0]
+    offset = [0]
     messages = []
     translations = []
     locations = []
@@ -150,7 +151,8 @@
         else:
             string = denormalize(translations[0][1])
         message = Message(msgid, string, list(locations), set(flags),
-                          list(auto_comments), list(user_comments))
+                          list(auto_comments), list(user_comments),
+                          lineno=offset[0] + 1)
         if obsolete[0]:
             if not ignore_obsolete:
                 catalog.obsolete[msgid] = message
@@ -161,13 +163,14 @@
         obsolete[0] = False
         counter[0] += 1
 
-    def _process_message_line(line):
+    def _process_message_line(lineno, line):
         if line.startswith('msgid_plural'):
             in_msgid[0] = True
             msg = line[12:].lstrip()
             messages.append(msg)
         elif line.startswith('msgid'):
             in_msgid[0] = True
+            offset[0] = lineno
             txt = line[5:].lstrip()
             if messages:
                 _add_message()
@@ -187,7 +190,7 @@
             elif in_msgstr[0]:
                 translations[-1][1] += u'\n' + line.rstrip()
 
-    for line in fileobj.readlines():
+    for lineno, line in enumerate(fileobj.readlines()):
         line = line.strip().decode(catalog.charset)
         if line.startswith('#'):
             in_msgid[0] = in_msgstr[0] = False
@@ -202,7 +205,7 @@
                     flags.append(flag.strip())
             elif line[1:].startswith('~'):
                 obsolete[0] = True
-                _process_message_line(line[2:].lstrip())
+                _process_message_line(lineno, line[2:].lstrip())
             elif line[1:].startswith('.'):
                 # These are called auto-comments
                 comment = line[2:].strip()
@@ -212,7 +215,7 @@
                 # These are called user comments
                 user_comments.append(line[1:].strip())
         else:
-            _process_message_line(line)
+            _process_message_line(lineno, line)
 
     if messages:
         _add_message()
--- a/babel/messages/tests/catalog.py
+++ b/babel/messages/tests/catalog.py
@@ -20,9 +20,9 @@
 class MessageTestCase(unittest.TestCase):
 
     def test_python_format(self):
-        assert catalog.PYTHON_FORMAT('foo %d bar')
-        assert catalog.PYTHON_FORMAT('foo %s bar')
-        assert catalog.PYTHON_FORMAT('foo %r bar')
+        assert catalog.PYTHON_FORMAT.search('foo %d bar')
+        assert catalog.PYTHON_FORMAT.search('foo %s bar')
+        assert catalog.PYTHON_FORMAT.search('foo %r bar')
 
     def test_translator_comments(self):
         mess = catalog.Message('foo', user_comments=['Comment About `foo`'])
--- a/setup.py
+++ b/setup.py
@@ -146,6 +146,10 @@
     [distutils.setup_keywords]
     message_extractors = babel.messages.frontend:check_message_extractors
     
+    [babel.checkers]
+    num_plurals = babel.messages.checkers:num_plurals
+    python_format = babel.messages.checkers:python_format
+    
     [babel.extractors]
     ignore = babel.messages.extract:extract_nothing
     python = babel.messages.extract:extract_python
Copyright (C) 2012-2017 Edgewall Software