# HG changeset patch # User cmlenz # Date 1214580132 0 # Node ID c2ae38340540e5aa85b7a1db6a8302cd6fe126fa # Parent 7647773d3831defb91f01a5577e5d90f389e4f02 Ported [388:405/trunk] to 0.9.x branch. diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -17,6 +17,7 @@ from datetime import datetime from difflib import get_close_matches from email import message_from_string +from copy import copy import re try: set @@ -33,8 +34,17 @@ __all__ = ['Message', 'Catalog', 'TranslationError'] __docformat__ = 'restructuredtext en' -PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?([-#0\ +])?(\*|[\d]+)?' - r'(\.(\*|[\d]+))?([hlL])?[diouxXeEfFgGcrs]') + +PYTHON_FORMAT = re.compile(r'''(?x) + \% + (?:\(([\w]*)\))? + ( + [-#0\ +]?(?:\*|[\d]+)? + (?:\.(?:\*|[\d]+))? + [hlL]? + ) + ([diouxXeEfFgGcrs%]) +''') class Message(object): @@ -93,9 +103,29 @@ return cmp(self.id, obj.id) def clone(self): - return Message(self.id, self.string, self.locations, self.flags, - self.auto_comments, self.user_comments, - self.previous_id, self.lineno) + return Message(*map(copy, (self.id, self.string, self.locations, + self.flags, self.auto_comments, + self.user_comments, self.previous_id, + self.lineno))) + + def check(self, catalog=None): + """Run various validation checks on the message. Some validations + are only performed if the catalog is provided. This method returns + a sequence of `TranslationError` objects. + + :rtype: ``iterator`` + :param catalog: A catalog instance that is passed to the checkers + :see: `Catalog.check` for a way to perform checks for all messages + in a catalog. + """ + from babel.messages.checkers import checkers + errors = [] + for checker in checkers: + try: + checker(catalog, self) + except TranslationError, e: + errors.append(e) + return errors def fuzzy(self): return 'fuzzy' in self.flags @@ -568,28 +598,12 @@ ``(message, errors)`` tuple, where ``message`` is the `Message` object and ``errors`` is a sequence of `TranslationError` objects. - :note: this feature requires ``setuptools``/``pkg_resources`` to be - installed; if it is not, this method will simply return an empty - iterator :rtype: ``iterator`` """ - checkers = [] - try: - from pkg_resources import working_set - except ImportError: - return - else: - for entry_point in working_set.iter_entry_points('babel.checkers'): - checkers.append(entry_point.load()) - for message in self._messages.values(): - errors = [] - for checker in checkers: - try: - checker(self, message) - except TranslationError, e: - errors.append(e) - if errors: - yield message, errors + for message in self._messages.values(): + errors = message.check(catalog=self) + if errors: + yield message, errors def update(self, template, no_fuzzy_matching=False): """Update the catalog based on the given template catalog. diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py --- a/babel/messages/checkers.py +++ b/babel/messages/checkers.py @@ -16,8 +16,17 @@ :since: version 0.9 """ +from itertools import izip from babel.messages.catalog import TranslationError, PYTHON_FORMAT +#: list of format chars that are compatible to each other +_string_format_compatibilities = [ + set(['i', 'd', 'u']), + set(['x', 'X']), + set(['f', 'F', 'g', 'G']) +] + + def num_plurals(catalog, message): """Verify the number of plurals in the translation.""" if not message.pluralizable: @@ -26,6 +35,10 @@ "message") return + # skip further tests if no catalog is provided. + elif catalog is None: + return + msgstrs = message.string if not isinstance(msgstrs, (list, tuple)): msgstrs = (msgstrs,) @@ -33,19 +46,134 @@ raise TranslationError("Wrong number of plural forms (expected %d)" % catalog.num_plurals) + def python_format(catalog, message): - if 'python-format' in message.flags: - msgids = message.id - if not isinstance(msgids, (list, tuple)): - msgids = (msgids,) - msgstrs = message.string - if not isinstance(msgstrs, (list, tuple)): - msgstrs = (msgstrs,) - for idx, msgid in enumerate(msgids): - if not msgstrs[idx]: - continue # no translation - for match in PYTHON_FORMAT.finditer(msgid): - param = match.group(0) - if param not in msgstrs[idx]: - raise TranslationError("Python parameter %s not found in " - "translation" % param) + """Verify the format string placeholders in the translation.""" + if 'python-format' not in message.flags: + return + msgids = message.id + if not isinstance(msgids, (list, tuple)): + msgids = (msgids,) + msgstrs = message.string + if not isinstance(msgstrs, (list, tuple)): + msgstrs = (msgstrs,) + + for msgid, msgstr in izip(msgids, msgstrs): + if msgstr: + _validate_format(msgid, msgstr) + + +def _validate_format(format, alternative): + """Test format string `alternative` against `format`. `format` can be the + msgid of a message and `alternative` one of the `msgstr`\s. The two + arguments are not interchangeable as `alternative` may contain less + placeholders if `format` uses named placeholders. + + If `format` does not use string formatting a `ValueError` is raised. + + If the string formatting of `alternative` is compatible to `format` the + function returns `None`, otherwise a `TranslationError` is raised. + + Examples for compatible format strings: + + >>> _validate_format('Hello %s!', 'Hallo %s!') + >>> _validate_format('Hello %i!', 'Hallo %d!') + + Example for an incompatible format strings: + + >>> _validate_format('Hello %(name)s!', 'Hallo %s!') + Traceback (most recent call last): + ... + TranslationError: the format strings are of different kinds + + This function is used by the `python_format` checker. + + :param format: The original format string + :param alternative: The alternative format string that should be checked + against format + :return: None on success + :raises TranslationError: on formatting errors + """ + + def _parse(string): + result = [] + for match in PYTHON_FORMAT.finditer(string): + name, format, typechar = match.groups() + if typechar == '%' and name is None: + continue + result.append((name, str(typechar))) + return result + + def _compatible(a, b): + if a == b: + return True + for set in _string_format_compatibilities: + if a in set and b in set: + return True + return False + + def _check_positional(results): + positional = None + for name, char in results: + if positional is None: + positional = name is None + else: + if (name is None) != positional: + raise TranslationError('format string mixes positional ' + 'and named placeholders') + return bool(positional) + + a, b = map(_parse, (format, alternative)) + + # if a does not use string formattings, we are dealing with invalid + # input data. This function only works if the first string provided + # does contain string format chars + if not a: + raise ValueError('original string provided does not use string ' + 'formatting.') + + # now check if both strings are positional or named + a_positional, b_positional = map(_check_positional, (a, b)) + if a_positional and not b_positional and not b: + raise TranslationError('placeholders are incompatible') + elif a_positional != b_positional: + raise TranslationError('the format strings are of different kinds') + + # if we are operating on positional strings both must have the + # same number of format chars and those must be compatible + if a_positional: + if len(a) != len(b): + raise TranslationError('positional format placeholders are ' + 'unbalanced') + for idx, ((_, first), (_, second)) in enumerate(izip(a, b)): + if not _compatible(first, second): + raise TranslationError('incompatible format for placeholder ' + '%d: %r and %r are not compatible' % + (idx + 1, first, second)) + + # otherwise the second string must not have names the first one + # doesn't have and the types of those included must be compatible + else: + type_map = dict(a) + for name, typechar in b: + if name not in type_map: + raise TranslationError('unknown named placeholder %r' % name) + elif not _compatible(typechar, type_map[name]): + raise TranslationError('incompatible format for ' + 'placeholder %r: ' + '%r and %r are not compatible' % + (name, typechar, type_map[name])) + + +def _find_checkers(): + try: + from pkg_resources import working_set + except ImportError: + return [num_plurals, python_format] + checkers = [] + for entry_point in working_set.iter_entry_points('babel.checkers'): + checkers.append(entry_point.load()) + return checkers + + +checkers = _find_checkers() diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -271,6 +271,7 @@ results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) + for lineno, funcname, messages, comments in results: if funcname: spec = keywords[funcname] or (1,) @@ -313,7 +314,6 @@ if strip_comment_tags: _strip_comment_tags(comments, comment_tags) - yield lineno, messages, comments @@ -421,6 +421,13 @@ del buf[:] else: messages.append(None) + if translator_comments: + # We have translator comments, and since we're on a + # comma(,) user is allowed to break into a new line + # Let's increase the last comment's lineno in order + # for the comment to still be a valid one + old_lineno, old_comment = translator_comments.pop() + translator_comments.append((old_lineno+1, old_comment)) elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -196,8 +196,13 @@ _add_message() if line[1:].startswith(':'): for location in line[2:].lstrip().split(): - filename, lineno = location.split(':', 1) - locations.append((filename, int(lineno))) + pos = location.rfind(':') + if pos >= 0: + try: + lineno = int(location[pos + 1:]) + except ValueError: + continue + locations.append((location[:pos], lineno)) elif line[1:].startswith(','): for flag in line[2:].lstrip().split(','): flags.append(flag.strip()) diff --git a/babel/messages/tests/catalog.py b/babel/messages/tests/catalog.py --- a/babel/messages/tests/catalog.py +++ b/babel/messages/tests/catalog.py @@ -34,6 +34,7 @@ assert catalog.PYTHON_FORMAT.search('foo %(name).*f') assert catalog.PYTHON_FORMAT.search('foo %(name)3.*f') assert catalog.PYTHON_FORMAT.search('foo %(name)*.*f') + assert catalog.PYTHON_FORMAT.search('foo %()s') def test_translator_comments(self): mess = catalog.Message('foo', user_comments=['Comment About `foo`']) @@ -44,6 +45,14 @@ self.assertEqual(mess.auto_comments, ['Comment 1 About `foo`', 'Comment 2 About `foo`']) + def test_clone_message_object(self): + msg = catalog.Message('foo', locations=[('foo.py', 42)]) + clone = msg.clone() + clone.locations.append(('bar.py', 42)) + self.assertEqual(msg.locations, [('foo.py', 42)]) + msg.flags.add('fuzzy') + assert not clone.fuzzy and msg.fuzzy + class CatalogTestCase(unittest.TestCase): diff --git a/babel/messages/tests/extract.py b/babel/messages/tests/extract.py --- a/babel/messages/tests/extract.py +++ b/babel/messages/tests/extract.py @@ -62,6 +62,45 @@ self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])], messages) + def test_comments_with_calls_that_spawn_multiple_lines(self): + buf = StringIO("""\ +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, ngettext("Catalog deleted.", + "Catalogs deleted.", len(selected))) + +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, _("Locale deleted.")) + + +# NOTE: This Comment SHOULD Be Extracted +add_notice(req, ngettext("Foo deleted.", "Foos deleted.", len(selected))) + +# NOTE: This Comment SHOULD Be Extracted +# NOTE: And This One Too +add_notice(req, ngettext("Bar deleted.", + "Bars deleted.", len(selected))) +""") + messages = list(extract.extract_python(buf, ('ngettext','_'), ['NOTE:'], + + {'strip_comment_tags':False})) + self.assertEqual((6, '_', 'Locale deleted.', + [u'NOTE: This Comment SHOULD Be Extracted']), + messages[1]) + self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.', + None), + [u'NOTE: This Comment SHOULD Be Extracted']), + messages[2]) + self.assertEqual((3, 'ngettext', + (u'Catalog deleted.', + u'Catalogs deleted.', None), + [u'NOTE: This Comment SHOULD Be Extracted']), + messages[0]) + self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.', + None), + [u'NOTE: This Comment SHOULD Be Extracted', + u'NOTE: And This One Too']), + messages[3]) + def test_declarations(self): buf = StringIO("""\ class gettext(object): @@ -175,7 +214,7 @@ buf = StringIO(""" # This shouldn't be in the output # because it didn't start with a comment tag -# do NOTE: this will no be a translation comment +# do NOTE: this will not be a translation comment # NOTE: This one will be msg = _(u'Foo Bar') """) @@ -249,6 +288,17 @@ self.assertEqual(u'Hi there!', messages[0][2]) self.assertEqual([], messages[0][3]) + def test_comment_tag_with_leading_space(self): + buf = StringIO(""" + #: A translation comment + #: with leading spaces +msg = _(u'Foo Bar') +""") + messages = list(extract.extract_python(buf, ('_',), [':'], {})) + self.assertEqual(u'Foo Bar', messages[0][2]) + self.assertEqual([u': A translation comment', u': with leading spaces'], + messages[0][3]) + def test_different_signatures(self): buf = StringIO(""" foo = _('foo', 'bar') diff --git a/babel/messages/tests/pofile.py b/babel/messages/tests/pofile.py --- a/babel/messages/tests/pofile.py +++ b/babel/messages/tests/pofile.py @@ -360,6 +360,19 @@ msgstr[1] "Voeh"''' in value assert value.find('msgid ""') < value.find('msgid "bar"') < value.find('msgid "foo"') + def test_silent_location_fallback(self): + buf = StringIO('''\ +#: broken_file.py +msgid "missing line number" +msgstr "" + +#: broken_file.py:broken_line_number +msgid "broken line number" +msgstr ""''') + catalog = pofile.read_po(buf) + self.assertEqual(catalog['missing line number'].locations, []) + self.assertEqual(catalog['broken line number'].locations, []) + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(pofile)) diff --git a/babel/numbers.py b/babel/numbers.py --- a/babel/numbers.py +++ b/babel/numbers.py @@ -463,7 +463,10 @@ is_negative = int(value < 0) if self.exp_prec: # Scientific notation value = abs(value) - exp = int(math.floor(math.log(value, 10))) + if value: + exp = int(math.floor(math.log(value, 10))) + else: + exp = 0 # Minimum number of integer digits if self.int_prec[0] == self.int_prec[1]: exp -= self.int_prec[0] - 1 diff --git a/babel/support.py b/babel/support.py --- a/babel/support.py +++ b/babel/support.py @@ -20,6 +20,11 @@ from datetime import date, datetime, time import gettext +try: + set +except NameError: + from sets import set + from babel.core import Locale from babel.dates import format_date, format_datetime, format_time, LC_TIME from babel.numbers import format_number, format_decimal, format_currency, \ diff --git a/babel/tests/numbers.py b/babel/tests/numbers.py --- a/babel/tests/numbers.py +++ b/babel/tests/numbers.py @@ -138,6 +138,9 @@ fmt = numbers.format_scientific(Decimal('12345'), '#.##E+00 m/s', locale='en_US') self.assertEqual(fmt, '1.23E+04 m/s') + # 0 (see ticket #99) + fmt = numbers.format_scientific(0, '#E0', locale='en_US') + self.assertEqual(fmt, '0E0') def suite(): diff --git a/babel/util.py b/babel/util.py --- a/babel/util.py +++ b/babel/util.py @@ -30,6 +30,7 @@ 'LOCALTZ'] __docformat__ = 'restructuredtext en' + def distinct(iterable): """Yield all items in an iterable collection that are distinct. diff --git a/contrib/babel.js b/contrib/babel.js new file mode 100644 --- /dev/null +++ b/contrib/babel.js @@ -0,0 +1,160 @@ +/** + * Babel JavaScript Support + * + * Copyright (C) 2008 Edgewall Software + * All rights reserved. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://babel.edgewall.org/wiki/License. + * + * This software consists of voluntary contributions made by many + * individuals. For the exact contribution history, see the revision + * history and logs, available at http://babel.edgewall.org/log/. + */ + +/** + * A simple module that provides a gettext like translation interface. + * The catalog passed to load() must be a object conforming to this + * interface:: + * + * { + * messages: an object of {msgid: translations} items where + * translations is an array of messages or a single + * string if the message is not pluralizable. + * plural_expr: the plural expression for the language. + * locale: the identifier for this locale. + * domain: the name of the domain. + * } + * + * Missing elements in the object are ignored. + * + * Typical usage:: + * + * var translations = babel.Translations.load(...).install(); + */ +var babel = new function() { + + var defaultPluralExpr = function(n) { return n == 1 ? 0 : 1; }; + var formatRegex = /%?%(?:\(([^\)]+)\))?([disr])/g; + + /** + * A translations object implementing the gettext interface + */ + var Translations = this.Translations = function(locale, domain) { + this.messages = {}; + this.locale = locale || 'unknown'; + this.domain = domain || 'messages'; + this.pluralexpr = defaultPluralExpr; + }; + + /** + * Create a new translations object from the catalog and return it. + * See the babel-module comment for more details. + */ + Translations.load = function(catalog) { + var rv = new Translations(); + rv.load(catalog); + return rv; + }; + + Translations.prototype = { + /** + * translate a single string. + */ + gettext: function(string) { + var translated = this.messages[string]; + if (typeof translated == 'undefined') + return string; + return (typeof translated == 'string') ? translated : translated[0]; + }, + + /** + * translate a pluralizable string + */ + ngettext: function(singular, plural, n) { + var translated = this.messages[singular]; + if (typeof translated == 'undefined') + return (n == 1) ? singular : plural; + return translated[this.pluralexpr(n)]; + }, + + /** + * Install this translation document wide. After this call, there are + * three new methods on the window object: _, gettext and ngettext + */ + install: function() { + var self = this; + window._ = window.gettext = function(string) { + return self.gettext(string); + }; + window.ngettext = function(singular, plural, n) { + return self.ngettext(singular, plural, n); + }; + return this; + }, + + /** + * Works like Translations.load but updates the instance rather + * then creating a new one. + */ + load: function(catalog) { + if (catalog.messages) + this.update(catalog.messages) + if (catalog.plural_expr) + this.setPluralExpr(catalog.plural_expr); + if (catalog.locale) + this.locale = catalog.locale; + if (catalog.domain) + this.domain = catalog.domain; + return this; + }, + + /** + * Updates the translations with the object of messages. + */ + update: function(mapping) { + for (var key in mapping) + if (mapping.hasOwnProperty(key)) + this.messages[key] = mapping[key]; + return this; + }, + + /** + * Sets the plural expression + */ + setPluralExpr: function(expr) { + this.pluralexpr = new Function('n', 'return +(' + expr + ')'); + return this; + } + }; + + /** + * A python inspired string formatting function. Supports named and + * positional placeholders and "s", "d" and "i" as type characters + * without any formatting specifications. + * + * Examples:: + * + * babel.format(_('Hello %s'), name) + * babel.format(_('Progress: %(percent)s%%'), {percent: 100}) + */ + this.format = function() { + var arg, string = arguments[0], idx = 0; + if (arguments.length == 1) + return string; + else if (arguments.length == 2 && typeof arguments[1] == 'object') + arg = arguments[1]; + else { + arg = []; + for (var i = 1, n = arguments.length; i != n; ++i) + arg[i - 1] = arguments[i]; + } + return string.replace(formatRegex, function(all, name, type) { + if (all[0] == all[1]) return all.substring(1); + var value = arg[name || idx++]; + return (type == 'i' || type == 'd') ? +value : value; + }); + } + +};