# HG changeset patch # User cmlenz # Date 1181300883 0 # Node ID 068952b4d4c0a99daca225defdb9263269bc984f # Parent e7080996fc464a62cb96c9dbaf51fd75f1640af6 Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends. diff --git a/babel/messages/__init__.py b/babel/messages/__init__.py --- a/babel/messages/__init__.py +++ b/babel/messages/__init__.py @@ -16,6 +16,7 @@ import gettext __all__ = ['Translations'] +__docformat__ = 'restructuredtext en' DEFAULT_DOMAIN = 'messages' diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py new file mode 100644 --- /dev/null +++ b/babel/messages/catalog.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Data structures for message catalogs.""" + +import re +try: + set +except NameError: + from sets import Set as set + +from babel.util import odict + +__all__ = ['Message', 'Catalog'] +__docformat__ = 'restructuredtext en' + +PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search + + +class Message(object): + """Representation of a single message in a catalog.""" + + def __init__(self, id, string=None, locations=(), flags=()): + """Create the message object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filenname, lineno)`` tuples + :param flags: a set or sequence of flags + """ + self.id = id + self.string = string + self.locations = locations + self.flags = set(flags) + if self.python_format: + self.flags.add('python-format') + else: + self.flags.discard('python-format') + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.id) + + def pluralizable(self): + return isinstance(self.id, (list, tuple)) + pluralizable = property(pluralizable, doc="""\ + Whether the message is plurizable. + + >>> Message('foo').pluralizable + False + >>> Message(('foo', 'bar')).pluralizable + True + + :rtype: `bool` + """) + + def python_format(self): + ids = self.id + if not isinstance(ids, (list, tuple)): + ids = [ids] + return bool(filter(None, [PYTHON_FORMAT(id) for id in ids])) + python_format = property(python_format, doc="""\ + Whether the message contains Python-style parameters. + + >>> Message('foo %(name)s bar').python_format + True + >>> Message(('foo %(name)s', 'foo %(name)s')).python_format + True + + :rtype: `bool` + """) + + +class Catalog(object): + """Representation a message catalog.""" + + def __init__(self, domain=None): + self.domain = domain + self.messages = odict() + + def __iter__(self): + for id in self.messages: + yield self.messages[id] + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.domain) + + def __delitem__(self, id): + if id in self.messaages: + del self.messages[id] + + def __getitem__(self, id): + return self.messages.get(id) + + def __setitem__(self, id, message): + assert isinstance(message, Message), 'expected a Message object' + current = self.messages.get(id) + if current: + assert current.string == message.string, 'translation mismatch' + current.locations.extend(message.locations) + current.flags |= message.flags + message = current + else: + if isinstance(id, (list, tuple)): + singular, plural = id + id = singular + self.messages[id] = message + + def add(self, id, string=None, locations=(), flags=()): + self[id] = Message(id, string, locations, flags) diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -46,10 +46,7 @@ 'dngettext': (2, 3), } -DEFAULT_MAPPING = { - '**.html': 'genshi', - '**.py': 'python' -} +DEFAULT_MAPPING = {'**.py': 'python'} def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, options_map=None, keywords=DEFAULT_KEYWORDS, @@ -58,7 +55,7 @@ This function generates tuples of the form: - ``(filename, lineno, funcname, message)`` + ``(filename, lineno, message)`` Which extraction method is used per file is determined by the `method_map` parameter, which maps extended glob patterns to extraction method names. @@ -119,6 +116,7 @@ """ if options_map is None: options_map = {} + absname = os.path.abspath(dirname) for root, dirnames, filenames in os.walk(absname): for subdir in dirnames: @@ -138,10 +136,10 @@ options = odict if callback: callback(filename, options) - for line, func, key in extract_from_file(method, filepath, + for lineno, message in extract_from_file(method, filepath, keywords=keywords, options=options): - yield filename, line, func, key + yield filename, lineno, message def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, options=None): @@ -173,7 +171,7 @@ This function returns a list of tuples of the form: - ``(lineno, funcname, message)`` + ``(lineno, message)`` The implementation dispatches the actual extraction to plugins, based on the value of the ``method`` parameter. @@ -186,7 +184,7 @@ >>> from StringIO import StringIO >>> for message in extract('python', StringIO(source)): ... print message - (3, '_', 'Hello, world!') + (3, 'Hello, world!') :param method: a string specifying the extraction method (.e.g. "python") :param fileobj: the file-like object the messages should be extracted from @@ -213,7 +211,7 @@ messages = tuple(msgs) if len(messages) == 1: messages = messages[0] - yield lineno, funcname, messages + yield lineno, messages return raise ValueError('Unknown extraction method %r' % method) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -26,6 +26,7 @@ from babel import __version__ as VERSION from babel import Locale from babel.core import UnknownLocaleError +from babel.messages.catalog import Catalog from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS, \ DEFAULT_MAPPING from babel.messages.pofile import write_po, write_pot @@ -142,17 +143,17 @@ in options.items()]) log.info('extracting messages from %s%s' % (filename, optstr)) - messages = [] + catalog = Catalog() extracted = extract_from_dir(method_map=method_map, options_map=options_map, keywords=self.keywords, callback=callback) - for filename, lineno, funcname, message in extracted: + for filename, lineno, message in extracted: filepath = os.path.normpath(filename) - messages.append((filepath, lineno, funcname, message, None)) + catalog.add(message, None, [(filepath, lineno)]) log.info('writing PO template file to %s' % self.output_file) - write_pot(outfile, messages, project=self.distribution.get_name(), + write_pot(outfile, catalog, project=self.distribution.get_name(), version=self.distribution.get_version(), width=self.width, charset=self.charset, no_location=self.no_location, omit_header=self.omit_header) @@ -384,16 +385,17 @@ options.width = 0 try: - messages = [] + catalog = Catalog() for dirname in args: if not os.path.isdir(dirname): parser.error('%r is not a directory' % dirname) extracted = extract_from_dir(dirname, method_map, options_map, keywords) - for filename, lineno, funcname, message in extracted: + for filename, lineno, message in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) - messages.append((filepath, lineno, funcname, message, None)) - write_pot(outfile, messages, width=options.width, + catalog.add(message, None, [(filepath, lineno)]) + + write_pot(outfile, catalog, width=options.width, charset=options.charset, no_location=options.no_location, omit_header=options.omit_header) finally: diff --git a/babel/messages/plurals.py b/babel/messages/plurals.py --- a/babel/messages/plurals.py +++ b/babel/messages/plurals.py @@ -11,6 +11,8 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. +"""Plural form definitions.""" + PLURALS = { # Afrikaans - From Pootle's PO's 'af': (2, '(n != 1)'), diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -28,6 +28,7 @@ import time from babel import __version__ as VERSION +from babel.messages.catalog import Catalog __all__ = ['escape', 'normalize', 'read_po', 'write_po', 'write_pot'] @@ -153,8 +154,6 @@ """ % VERSION -PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search - WORD_SEP = re.compile('(' r'\s+|' # any whitespace r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words @@ -240,22 +239,18 @@ lines[-1] += '\n' return u'""\n' + u'\n'.join([escape(l) for l in lines]) -def write_pot(fileobj, messages, project='PROJECT', version='VERSION', width=76, +def write_pot(fileobj, catalog, project='PROJECT', version='VERSION', width=76, charset='utf-8', no_location=False, omit_header=False): - r"""Write a ``gettext`` PO (portable object) template file to the given - file-like object. + r"""Write a ``gettext`` PO (portable object) template file for a given + message catalog to the provided file-like object. - The `messages` parameter is expected to be an iterable object producing - tuples of the form: - - ``(filename, lineno, funcname, message, flags)`` - + >>> catalog = Catalog() + >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], + ... flags=('fuzzy',)) + >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) >>> from StringIO import StringIO >>> buf = StringIO() - >>> write_pot(buf, [ - ... ('main.py', 1, None, u'foo %(name)s', ('fuzzy',)), - ... ('main.py', 3, 'ngettext', (u'bar', u'baz'), None) - ... ], omit_header=True) + >>> write_pot(buf, catalog, omit_header=True) >>> print buf.getvalue() #: main.py:1 @@ -272,7 +267,7 @@ :param fileobj: the file-like object to write to - :param messages: an iterable over the messages + :param messages: the `Catalog` instance :param project: the project name :param version: the project version :param width: the maximum line width for the generated output; use `None`, @@ -299,48 +294,23 @@ 'charset': charset, }) - locations = {} - msgflags = {} - msgids = [] - plurals = {} - - for filename, lineno, funcname, key, flags in messages: - flags = set(flags or []) - if isinstance(key, (list, tuple)): - assert len(key) == 2 - plurals[key[0]] = key[1] - key = key[0] - if key in msgids: - locations[key].append((filename, lineno)) - msgflags[key] |= flags - else: - if PYTHON_FORMAT(key): - flags.add('python-format') - else: - flags.discard('python-format') - - locations[key] = [(filename, lineno)] - msgflags[key] = flags - msgids.append(key) - - for msgid in msgids: + for message in catalog: if not no_location: - locs = u' '.join([u'%s:%d' % item for item in locations[msgid]]) + locs = u' '.join([u'%s:%d' % item for item in message.locations]) if width and width > 0: locs = textwrap.wrap(locs, width, break_long_words=False) for line in locs: _write('#: %s\n' % line.strip()) - flags = msgflags[msgid] - if flags: - _write('#%s\n' % ', '.join([''] + list(flags))) + if message.flags: + _write('#%s\n' % ', '.join([''] + list(message.flags))) - if plurals.has_key(msgid): - _write('msgid %s\n' % _normalize(msgid)) - _write('msgid_plural %s\n' % _normalize(plurals[msgid])) + if isinstance(message.id, (list, tuple)): + _write('msgid %s\n' % _normalize(message.id[0])) + _write('msgid_plural %s\n' % _normalize(message.id[1])) _write('msgstr[0] ""\n') _write('msgstr[1] ""\n') else: - _write('msgid %s\n' % _normalize(msgid)) + _write('msgid %s\n' % _normalize(message.id)) _write('msgstr ""\n') _write('\n') diff --git a/babel/messages/tests/__init__.py b/babel/messages/tests/__init__.py --- a/babel/messages/tests/__init__.py +++ b/babel/messages/tests/__init__.py @@ -14,8 +14,9 @@ import unittest def suite(): - from babel.messages.tests import extract, frontend, pofile + from babel.messages.tests import catalog, extract, frontend, pofile suite = unittest.TestSuite() + suite.addTest(catalog.suite()) suite.addTest(extract.suite()) suite.addTest(frontend.suite()) suite.addTest(pofile.suite()) diff --git a/babel/messages/tests/catalog.py b/babel/messages/tests/catalog.py new file mode 100644 --- /dev/null +++ b/babel/messages/tests/catalog.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +import doctest +from StringIO import StringIO +import unittest + +from babel.messages import catalog + + +class MessageTestCase(unittest.TestCase): + + def test_python_format(self): + assert catalog.PYTHON_FORMAT('foo %d bar') + assert catalog.PYTHON_FORMAT('foo %s bar') + assert catalog.PYTHON_FORMAT('foo %r bar') + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(doctest.DocTestSuite(catalog)) + suite.addTest(unittest.makeSuite(MessageTestCase)) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/babel/messages/tests/pofile.py b/babel/messages/tests/pofile.py --- a/babel/messages/tests/pofile.py +++ b/babel/messages/tests/pofile.py @@ -15,25 +15,18 @@ from StringIO import StringIO import unittest +from babel.messages.catalog import Catalog from babel.messages import pofile -class PythonFormatFlagTestCase(unittest.TestCase): - - def test_without_name(self): - assert pofile.PYTHON_FORMAT('foo %d bar') - assert pofile.PYTHON_FORMAT('foo %s bar') - assert pofile.PYTHON_FORMAT('foo %r bar') - - class WritePotTestCase(unittest.TestCase): def test_join_locations(self): + catalog = Catalog() + catalog.add(u'foo', locations=[('main.py', 1)]) + catalog.add(u'foo', locations=[('utils.py', 3)]) buf = StringIO() - pofile.write_pot(buf, [ - ('main.py', 1, None, u'foo', None), - ('utils.py', 3, None, u'foo', None), - ], omit_header=True) + pofile.write_pot(buf, catalog, omit_header=True) self.assertEqual('''#: main.py:1 utils.py:3 msgid "foo" msgstr ""''', buf.getvalue().strip()) @@ -45,10 +38,11 @@ not be removed """ + catalog = Catalog() + catalog.add(text, locations=[('main.py', 1)]) buf = StringIO() - pofile.write_pot(buf, [ - ('main.py', 1, None, text, None), - ], no_location=True, omit_header=True, width=42) + pofile.write_pot(buf, catalog, no_location=True, omit_header=True, + width=42) self.assertEqual(r'''msgid "" "Here's some text where \n" "white space and line breaks matter, and" @@ -62,10 +56,11 @@ text = """Here's some text that includesareallylongwordthatmightbutshouldnt throw us into an infinite loop """ + catalog = Catalog() + catalog.add(text, locations=[('main.py', 1)]) buf = StringIO() - pofile.write_pot(buf, [ - ('main.py', 1, None, text, None), - ], no_location=True, omit_header=True, width=32) + pofile.write_pot(buf, catalog, no_location=True, omit_header=True, + width=32) self.assertEqual(r'''msgid "" "Here's some text that\n" "includesareallylongwordthatmightbutshouldnt" @@ -77,7 +72,6 @@ def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(pofile)) - suite.addTest(unittest.makeSuite(PythonFormatFlagTestCase)) suite.addTest(unittest.makeSuite(WritePotTestCase)) return suite diff --git a/babel/util.py b/babel/util.py --- a/babel/util.py +++ b/babel/util.py @@ -67,6 +67,55 @@ return re.match(''.join(buf) + '$', filename) is not None +class odict(dict): + """Ordered dict implementation. + + :see: `http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747` + """ + def __init__(self, dict=None): + dict.__init__(self, dict) + self._keys = [] + + def __delitem__(self, key): + dict.__delitem__(self, key) + self._keys.remove(key) + + def __setitem__(self, key, item): + dict.__setitem__(self, key, item) + if key not in self._keys: + self._keys.append(key) + + def __iter__(self): + return iter(self._keys) + + def clear(self): + dict.clear(self) + self._keys = [] + + def copy(self): + d = odict() + d.update(self) + return d + + def items(self): + return zip(self._keys, self.values()) + + def keys(self): + return self._keys[:] + + def setdefault(self, key, failobj = None): + dict.setdefault(self, key, failobj) + if key not in self._keys: + self._keys.append(key) + + def update(self, dict): + for (key, val) in dict.items(): + self[key] = val + + def values(self): + return map(self.get, self._keys) + + class LazyProxy(object): """Class for proxy objects that delegate to a specified function to evaluate the actual object.