changeset 56:27fba894d3ca

Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
author cmlenz
date Fri, 08 Jun 2007 11:08:03 +0000
parents c3291ad6b010
children a6183d300a6e
files babel/messages/__init__.py babel/messages/catalog.py babel/messages/extract.py babel/messages/frontend.py babel/messages/plurals.py babel/messages/pofile.py babel/messages/tests/__init__.py babel/messages/tests/catalog.py babel/messages/tests/pofile.py babel/util.py
diffstat 10 files changed, 260 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/__init__.py
+++ b/babel/messages/__init__.py
@@ -16,6 +16,7 @@
 import gettext
 
 __all__ = ['Translations']
+__docformat__ = 'restructuredtext en'
 
 DEFAULT_DOMAIN = 'messages'
 
new file mode 100644
--- /dev/null
+++ b/babel/messages/catalog.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Data structures for message catalogs."""
+
+import re
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+from babel.util import odict
+
+__all__ = ['Message', 'Catalog']
+__docformat__ = 'restructuredtext en'
+
+PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
+
+
+class Message(object):
+    """Representation of a single message in a catalog."""
+
+    def __init__(self, id, string=None, locations=(), flags=()):
+        """Create the message object.
+        
+        :param id: the message ID, or a ``(singular, plural)`` tuple for
+                   pluralizable messages
+        :param string: the translated message string, or a
+                       ``(singular, plural)`` tuple for pluralizable messages
+        :param locations: a sequence of ``(filenname, lineno)`` tuples
+        :param flags: a set or sequence of flags
+        """
+        self.id = id
+        self.string = string
+        self.locations = locations
+        self.flags = set(flags)
+        if self.python_format:
+            self.flags.add('python-format')
+        else:
+            self.flags.discard('python-format')
+
+    def __repr__(self):
+        return '<%s %r>' % (type(self).__name__, self.id)
+
+    def pluralizable(self):
+        return isinstance(self.id, (list, tuple))
+    pluralizable = property(pluralizable, doc="""\
+        Whether the message is plurizable.
+        
+        >>> Message('foo').pluralizable
+        False
+        >>> Message(('foo', 'bar')).pluralizable
+        True
+        
+        :rtype:  `bool`
+        """)
+
+    def python_format(self):
+        ids = self.id
+        if not isinstance(ids, (list, tuple)):
+            ids = [ids]
+        return bool(filter(None, [PYTHON_FORMAT(id) for id in ids]))
+    python_format = property(python_format, doc="""\
+        Whether the message contains Python-style parameters.
+        
+        >>> Message('foo %(name)s bar').python_format
+        True
+        >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
+        True
+        
+        :rtype:  `bool`
+        """)
+
+
+class Catalog(object):
+    """Representation a message catalog."""
+
+    def __init__(self, domain=None):
+        self.domain = domain
+        self.messages = odict()
+
+    def __iter__(self):
+        for id in self.messages:
+            yield self.messages[id]
+
+    def __repr__(self):
+        return '<%s %r>' % (type(self).__name__, self.domain)
+
+    def __delitem__(self, id):
+        if id in self.messaages:
+            del self.messages[id]
+
+    def __getitem__(self, id):
+        return self.messages.get(id)
+
+    def __setitem__(self, id, message):
+        assert isinstance(message, Message), 'expected a Message object'
+        current = self.messages.get(id)
+        if current:
+            assert current.string == message.string, 'translation mismatch'
+            current.locations.extend(message.locations)
+            current.flags |= message.flags
+            message = current
+        else:
+            if isinstance(id, (list, tuple)):
+                singular, plural = id
+                id = singular
+            self.messages[id] = message
+
+    def add(self, id, string=None, locations=(), flags=()):
+        self[id] = Message(id, string, locations, flags)
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -46,10 +46,7 @@
     'dngettext': (2, 3),
 }
 
-DEFAULT_MAPPING = {
-    '**.html': 'genshi',
-    '**.py': 'python'
-}
+DEFAULT_MAPPING = {'**.py': 'python'}
 
 def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
                      options_map=None, keywords=DEFAULT_KEYWORDS,
@@ -58,7 +55,7 @@
     
     This function generates tuples of the form:
     
-        ``(filename, lineno, funcname, message)``
+        ``(filename, lineno, message)``
     
     Which extraction method is used per file is determined by the `method_map`
     parameter, which maps extended glob patterns to extraction method names.
@@ -119,6 +116,7 @@
     """
     if options_map is None:
         options_map = {}
+
     absname = os.path.abspath(dirname)
     for root, dirnames, filenames in os.walk(absname):
         for subdir in dirnames:
@@ -138,10 +136,10 @@
                             options = odict
                     if callback:
                         callback(filename, options)
-                    for line, func, key in extract_from_file(method, filepath,
+                    for lineno, message in extract_from_file(method, filepath,
                                                              keywords=keywords,
                                                              options=options):
-                        yield filename, line, func, key
+                        yield filename, lineno, message
 
 def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
                       options=None):
@@ -173,7 +171,7 @@
     
     This function returns a list of tuples of the form:
     
-        ``(lineno, funcname, message)``
+        ``(lineno, message)``
     
     The implementation dispatches the actual extraction to plugins, based on the
     value of the ``method`` parameter.
@@ -186,7 +184,7 @@
     >>> from StringIO import StringIO
     >>> for message in extract('python', StringIO(source)):
     ...     print message
-    (3, '_', 'Hello, world!')
+    (3, 'Hello, world!')
     
     :param method: a string specifying the extraction method (.e.g. "python")
     :param fileobj: the file-like object the messages should be extracted from
@@ -213,7 +211,7 @@
                 messages = tuple(msgs)
                 if len(messages) == 1:
                     messages = messages[0]
-            yield lineno, funcname, messages
+            yield lineno, messages
         return
 
     raise ValueError('Unknown extraction method %r' % method)
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -26,6 +26,7 @@
 from babel import __version__ as VERSION
 from babel import Locale
 from babel.core import UnknownLocaleError
+from babel.messages.catalog import Catalog
 from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS, \
                                    DEFAULT_MAPPING
 from babel.messages.pofile import write_po, write_pot
@@ -142,17 +143,17 @@
                                                   in options.items()])
                 log.info('extracting messages from %s%s' % (filename, optstr))
 
-            messages = []
+            catalog = Catalog()
             extracted = extract_from_dir(method_map=method_map,
                                          options_map=options_map,
                                          keywords=self.keywords,
                                          callback=callback)
-            for filename, lineno, funcname, message in extracted:
+            for filename, lineno, message in extracted:
                 filepath = os.path.normpath(filename)
-                messages.append((filepath, lineno, funcname, message, None))
+                catalog.add(message, None, [(filepath, lineno)])
 
             log.info('writing PO template file to %s' % self.output_file)
-            write_pot(outfile, messages, project=self.distribution.get_name(),
+            write_pot(outfile, catalog, project=self.distribution.get_name(),
                      version=self.distribution.get_version(), width=self.width,
                      charset=self.charset, no_location=self.no_location,
                      omit_header=self.omit_header)
@@ -384,16 +385,17 @@
             options.width = 0
 
         try:
-            messages = []
+            catalog = Catalog()
             for dirname in args:
                 if not os.path.isdir(dirname):
                     parser.error('%r is not a directory' % dirname)
                 extracted = extract_from_dir(dirname, method_map, options_map,
                                              keywords)
-                for filename, lineno, funcname, message in extracted:
+                for filename, lineno, message in extracted:
                     filepath = os.path.normpath(os.path.join(dirname, filename))
-                    messages.append((filepath, lineno, funcname, message, None))
-            write_pot(outfile, messages, width=options.width,
+                    catalog.add(message, None, [(filepath, lineno)])
+
+            write_pot(outfile, catalog, width=options.width,
                       charset=options.charset, no_location=options.no_location,
                       omit_header=options.omit_header)
         finally:
--- a/babel/messages/plurals.py
+++ b/babel/messages/plurals.py
@@ -11,6 +11,8 @@
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://babel.edgewall.org/log/.
 
+"""Plural form definitions."""
+
 PLURALS = {
     # Afrikaans - From Pootle's PO's
     'af': (2, '(n != 1)'),
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -28,6 +28,7 @@
 import time
 
 from babel import __version__ as VERSION
+from babel.messages.catalog import Catalog
 
 __all__ = ['escape', 'normalize', 'read_po', 'write_po', 'write_pot']
 
@@ -153,8 +154,6 @@
 
 """ % VERSION
 
-PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
-
 WORD_SEP = re.compile('('
     r'\s+|'                                 # any whitespace
     r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
@@ -240,22 +239,18 @@
         lines[-1] += '\n'
     return u'""\n' + u'\n'.join([escape(l) for l in lines])
 
-def write_pot(fileobj, messages, project='PROJECT', version='VERSION', width=76,
+def write_pot(fileobj, catalog, project='PROJECT', version='VERSION', width=76,
              charset='utf-8', no_location=False, omit_header=False):
-    r"""Write a ``gettext`` PO (portable object) template file to the given
-    file-like object.
+    r"""Write a ``gettext`` PO (portable object) template file for a given
+    message catalog to the provided file-like object.
     
-    The `messages` parameter is expected to be an iterable object producing
-    tuples of the form:
-    
-        ``(filename, lineno, funcname, message, flags)``
-    
+    >>> catalog = Catalog()
+    >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
+    ...             flags=('fuzzy',))
+    >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
     >>> from StringIO import StringIO
     >>> buf = StringIO()
-    >>> write_pot(buf, [
-    ...     ('main.py', 1, None, u'foo %(name)s', ('fuzzy',)),
-    ...     ('main.py', 3, 'ngettext', (u'bar', u'baz'), None)
-    ... ], omit_header=True)
+    >>> write_pot(buf, catalog, omit_header=True)
     
     >>> print buf.getvalue()
     #: main.py:1
@@ -272,7 +267,7 @@
     <BLANKLINE>
     
     :param fileobj: the file-like object to write to
-    :param messages: an iterable over the messages
+    :param messages: the `Catalog` instance
     :param project: the project name
     :param version: the project version
     :param width: the maximum line width for the generated output; use `None`,
@@ -299,48 +294,23 @@
             'charset': charset,
         })
 
-    locations = {}
-    msgflags = {}
-    msgids = []
-    plurals = {}
-
-    for filename, lineno, funcname, key, flags in messages:
-        flags = set(flags or [])
-        if isinstance(key, (list, tuple)):
-            assert len(key) == 2
-            plurals[key[0]] = key[1]
-            key = key[0]
-        if key in msgids:
-            locations[key].append((filename, lineno))
-            msgflags[key] |= flags
-        else:
-            if PYTHON_FORMAT(key):
-                flags.add('python-format')
-            else:
-                flags.discard('python-format')              
-        
-            locations[key] = [(filename, lineno)]
-            msgflags[key] = flags
-            msgids.append(key)
-
-    for msgid in msgids:
+    for message in catalog:
         if not no_location:
-            locs = u' '.join([u'%s:%d' % item for item in locations[msgid]])
+            locs = u' '.join([u'%s:%d' % item for item in message.locations])
             if width and width > 0:
                 locs = textwrap.wrap(locs, width, break_long_words=False)
             for line in locs:
                 _write('#: %s\n' % line.strip())
-        flags = msgflags[msgid]
-        if flags:
-            _write('#%s\n' % ', '.join([''] + list(flags)))
+        if message.flags:
+            _write('#%s\n' % ', '.join([''] + list(message.flags)))
 
-        if plurals.has_key(msgid):
-            _write('msgid %s\n' % _normalize(msgid))
-            _write('msgid_plural %s\n' % _normalize(plurals[msgid]))
+        if isinstance(message.id, (list, tuple)):
+            _write('msgid %s\n' % _normalize(message.id[0]))
+            _write('msgid_plural %s\n' % _normalize(message.id[1]))
             _write('msgstr[0] ""\n')
             _write('msgstr[1] ""\n')
         else:
-            _write('msgid %s\n' % _normalize(msgid))
+            _write('msgid %s\n' % _normalize(message.id))
             _write('msgstr ""\n')
         _write('\n')
 
--- a/babel/messages/tests/__init__.py
+++ b/babel/messages/tests/__init__.py
@@ -14,8 +14,9 @@
 import unittest
 
 def suite():
-    from babel.messages.tests import extract, frontend, pofile
+    from babel.messages.tests import catalog, extract, frontend, pofile
     suite = unittest.TestSuite()
+    suite.addTest(catalog.suite())
     suite.addTest(extract.suite())
     suite.addTest(frontend.suite())
     suite.addTest(pofile.suite())
new file mode 100644
--- /dev/null
+++ b/babel/messages/tests/catalog.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import doctest
+from StringIO import StringIO
+import unittest
+
+from babel.messages import catalog
+
+
+class MessageTestCase(unittest.TestCase):
+
+    def test_python_format(self):
+        assert catalog.PYTHON_FORMAT('foo %d bar')
+        assert catalog.PYTHON_FORMAT('foo %s bar')
+        assert catalog.PYTHON_FORMAT('foo %r bar')
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(doctest.DocTestSuite(catalog))
+    suite.addTest(unittest.makeSuite(MessageTestCase))
+    return suite
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
--- a/babel/messages/tests/pofile.py
+++ b/babel/messages/tests/pofile.py
@@ -15,25 +15,18 @@
 from StringIO import StringIO
 import unittest
 
+from babel.messages.catalog import Catalog
 from babel.messages import pofile
 
 
-class PythonFormatFlagTestCase(unittest.TestCase):
-
-    def test_without_name(self):
-        assert pofile.PYTHON_FORMAT('foo %d bar')
-        assert pofile.PYTHON_FORMAT('foo %s bar')
-        assert pofile.PYTHON_FORMAT('foo %r bar')
-
-
 class WritePotTestCase(unittest.TestCase):
 
     def test_join_locations(self):
+        catalog = Catalog()
+        catalog.add(u'foo', locations=[('main.py', 1)])
+        catalog.add(u'foo', locations=[('utils.py', 3)])
         buf = StringIO()
-        pofile.write_pot(buf, [
-            ('main.py', 1, None, u'foo', None),
-            ('utils.py', 3, None, u'foo', None),
-        ], omit_header=True)
+        pofile.write_pot(buf, catalog, omit_header=True)
         self.assertEqual('''#: main.py:1 utils.py:3
 msgid "foo"
 msgstr ""''', buf.getvalue().strip())
@@ -45,10 +38,11 @@
 not be removed
 
 """
+        catalog = Catalog()
+        catalog.add(text, locations=[('main.py', 1)])
         buf = StringIO()
-        pofile.write_pot(buf, [
-            ('main.py', 1, None, text, None),
-        ], no_location=True, omit_header=True, width=42)
+        pofile.write_pot(buf, catalog, no_location=True, omit_header=True,
+                         width=42)
         self.assertEqual(r'''msgid ""
 "Here's some text where       \n"
 "white space and line breaks matter, and"
@@ -62,10 +56,11 @@
         text = """Here's some text that
 includesareallylongwordthatmightbutshouldnt throw us into an infinite loop
 """
+        catalog = Catalog()
+        catalog.add(text, locations=[('main.py', 1)])
         buf = StringIO()
-        pofile.write_pot(buf, [
-            ('main.py', 1, None, text, None),
-        ], no_location=True, omit_header=True, width=32)
+        pofile.write_pot(buf, catalog, no_location=True, omit_header=True,
+                         width=32)
         self.assertEqual(r'''msgid ""
 "Here's some text that\n"
 "includesareallylongwordthatmightbutshouldnt"
@@ -77,7 +72,6 @@
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(doctest.DocTestSuite(pofile))
-    suite.addTest(unittest.makeSuite(PythonFormatFlagTestCase))
     suite.addTest(unittest.makeSuite(WritePotTestCase))
     return suite
 
--- a/babel/util.py
+++ b/babel/util.py
@@ -67,6 +67,55 @@
     return re.match(''.join(buf) + '$', filename) is not None
 
 
+class odict(dict):
+    """Ordered dict implementation.
+    
+    :see: `http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747`
+    """
+    def __init__(self, dict=None):
+        dict.__init__(self, dict)
+        self._keys = []
+
+    def __delitem__(self, key):
+        dict.__delitem__(self, key)
+        self._keys.remove(key)
+
+    def __setitem__(self, key, item):
+        dict.__setitem__(self, key, item)
+        if key not in self._keys:
+            self._keys.append(key)
+
+    def __iter__(self):
+        return iter(self._keys)
+
+    def clear(self):
+        dict.clear(self)
+        self._keys = []
+
+    def copy(self):
+        d = odict()
+        d.update(self)
+        return d
+
+    def items(self):
+        return zip(self._keys, self.values())
+
+    def keys(self):
+        return self._keys[:]
+
+    def setdefault(self, key, failobj = None):
+        dict.setdefault(self, key, failobj)
+        if key not in self._keys:
+            self._keys.append(key)
+
+    def update(self, dict):
+        for (key, val) in dict.items():
+            self[key] = val
+
+    def values(self):
+        return map(self.get, self._keys)
+
+
 class LazyProxy(object):
     """Class for proxy objects that delegate to a specified function to evaluate
     the actual object.
Copyright (C) 2012-2017 Edgewall Software