cmlenz@220: # -*- coding: utf-8 -*- cmlenz@220: # jruigrok@530: # Copyright (C) 2007-2011 Edgewall Software cmlenz@220: # All rights reserved. cmlenz@220: # cmlenz@220: # This software is licensed as described in the file COPYING, which cmlenz@220: # you should have received as part of this distribution. The terms cmlenz@220: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@220: # cmlenz@220: # This software consists of voluntary contributions made by many cmlenz@220: # individuals. For the exact contribution history, see the revision cmlenz@220: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@220: cmlenz@234: """Various routines that help with validation of translations. cmlenz@234: cmlenz@234: :since: version 0.9 cmlenz@234: """ cmlenz@220: aronacher@352: from itertools import izip cmlenz@220: from babel.messages.catalog import TranslationError, PYTHON_FORMAT cmlenz@220: aronacher@352: #: list of format chars that are compatible to each other aronacher@352: _string_format_compatibilities = [ aronacher@352: set(['i', 'd', 'u']), aronacher@352: set(['x', 'X']), aronacher@352: set(['f', 'F', 'g', 'G']) aronacher@352: ] aronacher@352: aronacher@352: cmlenz@220: def num_plurals(catalog, message): cmlenz@220: """Verify the number of plurals in the translation.""" cmlenz@220: if not message.pluralizable: cmlenz@220: if not isinstance(message.string, basestring): cmlenz@220: raise TranslationError("Found plural forms for non-pluralizable " cmlenz@220: "message") cmlenz@220: return cmlenz@220: aronacher@355: # skip further tests if no catalog is provided. aronacher@355: elif catalog is None: aronacher@355: return aronacher@355: cmlenz@220: msgstrs = message.string cmlenz@220: if not isinstance(msgstrs, (list, tuple)): cmlenz@220: msgstrs = (msgstrs,) cmlenz@220: if len(msgstrs) != catalog.num_plurals: cmlenz@220: raise TranslationError("Wrong number of plural forms (expected %d)" % cmlenz@220: catalog.num_plurals) cmlenz@220: aronacher@352: cmlenz@220: def python_format(catalog, message): aronacher@353: """Verify the format string placeholders in the translation.""" aronacher@352: if 'python-format' not in message.flags: aronacher@352: return aronacher@352: msgids = message.id aronacher@352: if not isinstance(msgids, (list, tuple)): aronacher@352: msgids = (msgids,) aronacher@352: msgstrs = message.string aronacher@352: if not isinstance(msgstrs, (list, tuple)): aronacher@352: msgstrs = (msgstrs,) aronacher@352: aronacher@352: for msgid, msgstr in izip(msgids, msgstrs): aronacher@352: if msgstr: aronacher@352: _validate_format(msgid, msgstr) aronacher@352: aronacher@352: aronacher@352: def _validate_format(format, alternative): aronacher@352: """Test format string `alternative` against `format`. `format` can be the aronacher@352: msgid of a message and `alternative` one of the `msgstr`\s. The two aronacher@352: arguments are not interchangeable as `alternative` may contain less aronacher@352: placeholders if `format` uses named placeholders. aronacher@352: aronacher@416: The behavior of this function is undefined if the string does not use aronacher@416: string formattings. aronacher@352: aronacher@352: If the string formatting of `alternative` is compatible to `format` the aronacher@352: function returns `None`, otherwise a `TranslationError` is raised. aronacher@352: aronacher@352: Examples for compatible format strings: aronacher@352: aronacher@352: >>> _validate_format('Hello %s!', 'Hallo %s!') aronacher@352: >>> _validate_format('Hello %i!', 'Hallo %d!') aronacher@352: aronacher@352: Example for an incompatible format strings: aronacher@352: aronacher@352: >>> _validate_format('Hello %(name)s!', 'Hallo %s!') aronacher@352: Traceback (most recent call last): aronacher@352: ... aronacher@352: TranslationError: the format strings are of different kinds aronacher@352: aronacher@352: This function is used by the `python_format` checker. aronacher@352: aronacher@352: :param format: The original format string aronacher@352: :param alternative: The alternative format string that should be checked aronacher@352: against format aronacher@352: :return: None on success aronacher@352: :raises TranslationError: on formatting errors aronacher@352: """ aronacher@352: aronacher@352: def _parse(string): aronacher@352: result = [] aronacher@352: for match in PYTHON_FORMAT.finditer(string): aronacher@352: name, format, typechar = match.groups() aronacher@361: if typechar == '%' and name is None: aronacher@352: continue aronacher@360: result.append((name, str(typechar))) aronacher@352: return result aronacher@352: aronacher@352: def _compatible(a, b): aronacher@352: if a == b: aronacher@352: return True aronacher@352: for set in _string_format_compatibilities: aronacher@352: if a in set and b in set: aronacher@352: return True aronacher@352: return False aronacher@352: aronacher@352: def _check_positional(results): aronacher@352: positional = None aronacher@352: for name, char in results: aronacher@352: if positional is None: aronacher@352: positional = name is None aronacher@352: else: aronacher@352: if (name is None) != positional: aronacher@353: raise TranslationError('format string mixes positional ' aronacher@353: 'and named placeholders') aronacher@352: return bool(positional) aronacher@352: aronacher@352: a, b = map(_parse, (format, alternative)) aronacher@352: aronacher@352: # now check if both strings are positional or named aronacher@352: a_positional, b_positional = map(_check_positional, (a, b)) aronacher@352: if a_positional and not b_positional and not b: aronacher@352: raise TranslationError('placeholders are incompatible') aronacher@352: elif a_positional != b_positional: aronacher@352: raise TranslationError('the format strings are of different kinds') aronacher@352: aronacher@352: # if we are operating on positional strings both must have the aronacher@352: # same number of format chars and those must be compatible aronacher@352: if a_positional: aronacher@352: if len(a) != len(b): aronacher@352: raise TranslationError('positional format placeholders are ' aronacher@352: 'unbalanced') aronacher@352: for idx, ((_, first), (_, second)) in enumerate(izip(a, b)): aronacher@352: if not _compatible(first, second): aronacher@352: raise TranslationError('incompatible format for placeholder ' aronacher@352: '%d: %r and %r are not compatible' % aronacher@352: (idx + 1, first, second)) aronacher@352: aronacher@352: # otherwise the second string must not have names the first one aronacher@352: # doesn't have and the types of those included must be compatible aronacher@352: else: aronacher@352: type_map = dict(a) aronacher@352: for name, typechar in b: aronacher@352: if name not in type_map: aronacher@352: raise TranslationError('unknown named placeholder %r' % name) aronacher@352: elif not _compatible(typechar, type_map[name]): aronacher@364: raise TranslationError('incompatible format for ' aronacher@364: 'placeholder %r: ' aronacher@364: '%r and %r are not compatible' % aronacher@364: (name, typechar, type_map[name])) aronacher@352: aronacher@352: aronacher@355: def _find_checkers(): fschwarz@593: checkers = [] aronacher@355: try: aronacher@355: from pkg_resources import working_set aronacher@355: except ImportError: fschwarz@593: pass fschwarz@593: else: fschwarz@593: for entry_point in working_set.iter_entry_points('babel.checkers'): fschwarz@593: checkers.append(entry_point.load()) fschwarz@593: if len(checkers) == 0: fschwarz@593: # if pkg_resources is not available or no usable egg-info was found fschwarz@593: # (see #230), just resort to hard-coded checkers aronacher@355: return [num_plurals, python_format] aronacher@355: return checkers aronacher@355: aronacher@355: aronacher@355: checkers = _find_checkers()