Mercurial > genshi > genshi-test
diff genshi/filters/i18n.py @ 500:0742f421caba experimental-inline
Merged revisions 487-603 via svnmerge from
http://svn.edgewall.org/repos/genshi/trunk
author | cmlenz |
---|---|
date | Fri, 01 Jun 2007 17:21:47 +0000 |
parents | |
children | 1837f39efd6f |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/genshi/filters/i18n.py @@ -0,0 +1,286 @@ +"""Utilities for internationalization and localization of templates.""" + +try: + frozenset +except NameError: + from sets import ImmutableSet as frozenset +from gettext import gettext +from opcode import opmap +import re + +from genshi.core import Attrs, Namespace, QName, START, END, TEXT, _ensure +from genshi.template.base import Template, EXPR, SUB +from genshi.template.markup import EXEC + +_LOAD_NAME = chr(opmap['LOAD_NAME']) +_LOAD_CONST = chr(opmap['LOAD_CONST']) +_CALL_FUNCTION = chr(opmap['CALL_FUNCTION']) +_BINARY_ADD = chr(opmap['BINARY_ADD']) + + +class Translator(object): + """Can extract and translate localizable strings from markup streams and + templates. + + For example, assume the followng template: + + >>> from genshi.template import MarkupTemplate + >>> + >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> + ... <head> + ... <title>Example</title> + ... </head> + ... <body> + ... <h1>Example</h1> + ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> + ... </body> + ... </html>''', filename='example.html') + + For demonstration, we define a dummy ``gettext``-style function with a + hard-coded translation table, and pass that to the `Translator` initializer: + + >>> def pseudo_gettext(string): + ... return { + ... 'Example': 'Beispiel', + ... 'Hello, %(name)s': 'Hallo, %(name)s' + ... }[string] + >>> + >>> translator = Translator(pseudo_gettext) + + Next, the translator needs to be prepended to any already defined filters + on the template: + + >>> tmpl.filters.insert(0, translator) + + When generating the template output, our hard-coded translations should be + applied as expected: + + >>> print tmpl.generate(username='Hans', _=pseudo_gettext) + <html> + <head> + <title>Beispiel</title> + </head> + <body> + <h1>Beispiel</h1> + <p>Hallo, Hans</p> + </body> + </html> + """ + + IGNORE_TAGS = frozenset([ + QName('script'), QName('http://www.w3.org/1999/xhtml}script'), + QName('style'), QName('http://www.w3.org/1999/xhtml}style') + ]) + INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', + 'summary', 'title']) + + def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, + include_attrs=INCLUDE_ATTRS): + """Initialize the translator. + + :param translate: the translation function, for example ``gettext`` or + ``ugettext``. + :param ignore_tags: a set of tag names that should not be localized + :param include_attrs: a set of attribute names should be localized + """ + self.translate = translate + self.ignore_tags = ignore_tags + self.include_attrs = include_attrs + + def __call__(self, stream, ctxt=None, search_text=True): + """Translate any localizable strings in the given stream. + + This function shouldn't be called directly. Instead, an instance of + the `Translator` class should be registered as a filter with the + `Template` or the `TemplateLoader`, or applied as a regular stream + filter. If used as a template filter, it should be inserted in front of + all the default filters. + + :param stream: the markup event stream + :param ctxt: the template context (not used) + :param search_text: whether text nodes should be translated (used + internally) + :return: the localized stream + """ + ignore_tags = self.ignore_tags + include_attrs = self.include_attrs + translate = self.translate + skip = 0 + + for kind, data, pos in stream: + + # skip chunks that should not be localized + if skip: + if kind is START: + tag, attrs = data + if tag in ignore_tags: + skip += 1 + elif kind is END: + if tag in ignore_tags: + skip -= 1 + yield kind, data, pos + continue + + # handle different events that can be localized + if kind is START: + tag, attrs = data + if tag in ignore_tags: + skip += 1 + yield kind, data, pos + continue + + new_attrs = [] + changed = False + for name, value in attrs: + newval = value + if isinstance(value, basestring): + if name in include_attrs: + newval = self.translate(value) + else: + newval = list(self(_ensure(value), ctxt, + search_text=name in include_attrs) + ) + if newval != value: + value = newval + changed = True + new_attrs.append((name, value)) + if changed: + attrs = new_attrs + + yield kind, (tag, attrs), pos + + elif search_text and kind is TEXT: + text = data.strip() + if text: + data = data.replace(text, translate(text)) + yield kind, data, pos + + elif kind is SUB: + subkind, substream = data + new_substream = list(self(substream, ctxt)) + yield kind, (subkind, new_substream), pos + + else: + yield kind, data, pos + + GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', + 'ugettext', 'ungettext') + + def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True): + """Extract localizable strings from the given template stream. + + For every string found, this function yields a ``(lineno, function, + message)`` tuple, where: + + * ``lineno`` is the number of the line on which the string was found, + * ``function`` is the name of the ``gettext`` function used (if the + string was extracted from embedded Python code), and + * ``message`` is the string itself (a ``unicode`` object, or a tuple + of ``unicode`` objects for functions with multiple string arguments). + + >>> from genshi.template import MarkupTemplate + >>> + >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> + ... <head> + ... <title>Example</title> + ... </head> + ... <body> + ... <h1>Example</h1> + ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> + ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> + ... </body> + ... </html>''', filename='example.html') + >>> + >>> for lineno, funcname, message in Translator().extract(tmpl.stream): + ... print "%d, %r, %r" % (lineno, funcname, message) + 3, None, u'Example' + 6, None, u'Example' + 7, '_', u'Hello, %(name)s' + 8, 'ngettext', (u'You have %d item', u'You have %d items') + + :param stream: the event stream to extract strings from; can be a + regular stream or a template stream + :param gettext_functions: a sequence of function names that should be + treated as gettext-style localization + functions + :param search_text: whether the content of text nodes should be + extracted (used internally) + + :note: Changed in 0.4.1: For a function with multiple string arguments + (such as ``ngettext``), a single item with a tuple of strings is + yielded, instead an item for each string argument. + """ + tagname = None + skip = 0 + + for kind, data, pos in stream: + if skip: + if kind is START: + tag, attrs = data + if tag in self.ignore_tags: + skip += 1 + if kind is END: + tag = data + if tag in self.ignore_tags: + skip -= 1 + continue + + if kind is START: + tag, attrs = data + if tag in self.ignore_tags: + skip += 1 + continue + + for name, value in attrs: + if isinstance(value, basestring): + if name in self.include_attrs: + text = value.strip() + if text: + yield pos[1], None, text + else: + for lineno, funcname, text in self.extract( + _ensure(value), gettext_functions, + search_text=name in self.include_attrs): + yield lineno, funcname, text + + elif search_text and kind is TEXT: + text = data.strip() + if text and filter(None, [ch.isalpha() for ch in text]): + yield pos[1], None, text + + elif kind is EXPR or kind is EXEC: + consts = dict([(n, chr(i) + '\x00') for i, n in + enumerate(data.code.co_consts)]) + gettext_locs = [consts[n] for n in gettext_functions + if n in consts] + ops = [ + _LOAD_CONST, '(', '|'.join(gettext_locs), ')', + _CALL_FUNCTION, '.\x00', + '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' + ] + for loc, opcodes in re.findall(''.join(ops), data.code.co_code): + funcname = data.code.co_consts[ord(loc[0])] + strings = [] + opcodes = iter(opcodes) + for opcode in opcodes: + if opcode == _BINARY_ADD: + arg = strings.pop() + strings[-1] += arg + else: + arg = data.code.co_consts[ord(opcodes.next())] + opcodes.next() # skip second byte + if not isinstance(arg, basestring): + break + strings.append(unicode(arg)) + if len(strings) == 1: + strings = strings[0] + else: + strings = tuple(strings) + yield pos[1], funcname, strings + + elif kind is SUB: + subkind, substream = data + for lineno, funcname, text in self.extract(substream, + gettext_functions): + yield lineno, funcname, text