Mercurial > genshi > genshi-test
diff genshi/filters/i18n.py @ 902:09cc3627654c experimental-inline
Sync `experimental/inline` branch with [source:trunk@1126].
author | cmlenz |
---|---|
date | Fri, 23 Apr 2010 21:08:26 +0000 |
parents | 1837f39efd6f |
children |
line wrap: on
line diff
--- a/genshi/filters/i18n.py +++ b/genshi/filters/i18n.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2007 Edgewall Software +# Copyright (C) 2007-2010 Edgewall Software # All rights reserved. # # This software is licensed as described in the file COPYING, which @@ -11,71 +11,544 @@ # individuals. For the exact contribution history, see the revision # history and logs, available at http://genshi.edgewall.org/log/. -"""Utilities for internationalization and localization of templates. +"""Directives and utilities for internationalization and localization of +templates. :since: version 0.4 +:note: Directives support added since version 0.6 """ +try: + any +except NameError: + from genshi.util import any from gettext import NullTranslations +import os import re from types import FunctionType -from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ - END_NS, XML_NAMESPACE, _ensure +from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ + XML_NAMESPACE, _ensure, StreamEventKind from genshi.template.eval import _ast from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives -from genshi.template.directives import Directive +from genshi.template.directives import Directive, StripDirective from genshi.template.markup import MarkupTemplate, EXEC __all__ = ['Translator', 'extract'] __docformat__ = 'restructuredtext en' + I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') +MSGBUF = StreamEventKind('MSGBUF') +SUB_START = StreamEventKind('SUB_START') +SUB_END = StreamEventKind('SUB_END') + +GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', + 'ugettext', 'ungettext') + -class CommentDirective(Directive): +class I18NDirective(Directive): + """Simple interface for i18n directives to support messages extraction.""" - __slots__ = [] + def __call__(self, stream, directives, ctxt, **vars): + return _apply_directives(stream, directives, ctxt, vars) + + +class ExtractableI18NDirective(I18NDirective): + """Simple interface for directives to support messages extraction.""" + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + raise NotImplementedError + + +class CommentDirective(I18NDirective): + """Implementation of the ``i18n:comment`` template directive which adds + translation comments. + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <p i18n:comment="As in Foo Bar">Foo</p> + ... </html>''') + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) + [(2, None, u'Foo', [u'As in Foo Bar'])] + """ + __slots__ = ['comment'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.comment = value + + +class MsgDirective(ExtractableI18NDirective): + r"""Implementation of the ``i18n:msg`` directive which marks inner content + as translatable. Consider the following examples: + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg=""> + ... <p>Foo</p> + ... <p>Bar</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) + [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])] + >>> print(tmpl.generate().render()) + <html> + <div><p>Foo</p> + <p>Bar</p></div> + <p>Foo <em>bar</em>!</p> + </html> + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg="fname, lname"> + ... <p>First Name: ${fname}</p> + ... <p>Last Name: ${lname}</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []), + (6, None, u'Foo [1:bar]!', [])] + + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:msg="fname, lname"> + ... <p>First Name: ${fname}</p> + ... <p>Last Name: ${lname}</p> + ... </div> + ... <p i18n:msg="">Foo <em>bar</em>!</p> + ... </html>''') + >>> translator.setup(tmpl) + >>> print(tmpl.generate(fname='John', lname='Doe').render()) + <html> + <div><p>First Name: John</p> + <p>Last Name: Doe</p></div> + <p>Foo <em>bar</em>!</p> + </html> + + Starting and ending white-space is stripped of to make it simpler for + translators. Stripping it is not that important since it's on the html + source, the rendered output will remain the same. + """ + __slots__ = ['params', 'lineno'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.params = [param.strip() for param in value.split(',') if param] + self.lineno = lineno @classmethod def attach(cls, template, stream, value, namespaces, pos): - return None, stream + if type(value) is dict: + value = value.get('params', '').strip() + return super(MsgDirective, cls).attach(template, stream, value.strip(), + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + gettext = ctxt.get('_i18n.gettext') + if ctxt.get('_i18n.domain'): + dgettext = ctxt.get('_i18n.dgettext') + assert hasattr(dgettext, '__call__'), \ + 'No domain gettext function passed' + gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) + + def _generate(): + msgbuf = MessageBuffer(self) + previous = stream.next() + if previous[0] is START: + yield previous + else: + msgbuf.append(*previous) + previous = stream.next() + for kind, data, pos in stream: + msgbuf.append(*previous) + previous = kind, data, pos + if previous[0] is not END: + msgbuf.append(*previous) + previous = None + for event in msgbuf.translate(gettext(msgbuf.format())): + yield event + if previous: + yield previous + + return _apply_directives(_generate(), directives, ctxt, vars) + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + msgbuf = MessageBuffer(self) + strip = False + + stream = iter(stream) + previous = stream.next() + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + strip = True + for event in stream: + if event[0] is START: + for message in translator._extract_attrs(event, + gettext_functions, + search_text=search_text): + yield message + msgbuf.append(*previous) + previous = event + if not strip: + msgbuf.append(*previous) + + yield self.lineno, None, msgbuf.format(), comment_stack[-1:] -class MsgDirective(Directive): - +class ChooseBranchDirective(I18NDirective): __slots__ = ['params'] - def __init__(self, value, template, hints=None, namespaces=None, - lineno=-1, offset=-1): - Directive.__init__(self, None, template, namespaces, lineno, offset) - self.params = [name.strip() for name in value.split(',')] + def __call__(self, stream, directives, ctxt, **vars): + self.params = ctxt.get('_i18n.choose.params', [])[:] + msgbuf = MessageBuffer(self) + stream = _apply_directives(stream, directives, ctxt, vars) - def __call__(self, stream, directives, ctxt, **vars): - msgbuf = MessageBuffer(self.params) + previous = stream.next() + if previous[0] is START: + yield previous + else: + msgbuf.append(*previous) - stream = iter(stream) - yield stream.next() # the outer start tag - previous = stream.next() + try: + previous = stream.next() + except StopIteration: + # For example <i18n:singular> or <i18n:plural> directives + yield MSGBUF, (), -1 # the place holder for msgbuf output + ctxt['_i18n.choose.%s' % self.tagname] = msgbuf + return + for event in stream: msgbuf.append(*previous) previous = event + yield MSGBUF, (), -1 # the place holder for msgbuf output - gettext = ctxt.get('_i18n.gettext') - for event in msgbuf.translate(gettext(msgbuf.format())): + if previous[0] is END: + yield previous # the outer end tag + else: + msgbuf.append(*previous) + ctxt['_i18n.choose.%s' % self.tagname] = msgbuf + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None, msgbuf=None): + stream = iter(stream) + previous = stream.next() + + if previous[0] is START: + # skip the enclosing element + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + + for event in stream: + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + msgbuf.append(*previous) + previous = event + + if previous[0] is not END: + msgbuf.append(*previous) + + +class SingularDirective(ChooseBranchDirective): + """Implementation of the ``i18n:singular`` directive to be used with the + ``i18n:choose`` directive.""" + + +class PluralDirective(ChooseBranchDirective): + """Implementation of the ``i18n:plural`` directive to be used with the + ``i18n:choose`` directive.""" + + +class ChooseDirective(ExtractableI18NDirective): + """Implementation of the ``i18n:choose`` directive which provides plural + internationalisation of strings. + + This directive requires at least one parameter, the one which evaluates to + an integer which will allow to choose the plural/singular form. If you also + have expressions inside the singular and plural version of the string you + also need to pass a name for those parameters. Consider the following + examples: + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:choose="num; num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </div> + ... </html>''') + >>> translator = Translator() + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, 'ngettext', (u'There is %(num)s coin', + u'There are %(num)s coins'), [])] + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <div i18n:choose="num; num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </div> + ... </html>''') + >>> translator.setup(tmpl) + >>> print(tmpl.generate(num=1).render()) + <html> + <div> + <p>There is 1 coin</p> + </div> + </html> + >>> print(tmpl.generate(num=2).render()) + <html> + <div> + <p>There are 2 coins</p> + </div> + </html> + + When used as a element and not as an attribute: + + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <i18n:choose numeral="num" params="num"> + ... <p i18n:singular="">There is $num coin</p> + ... <p i18n:plural="">There are $num coins</p> + ... </i18n:choose> + ... </html>''') + >>> translator.setup(tmpl) + >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE + [(2, 'ngettext', (u'There is %(num)s coin', + u'There are %(num)s coins'), [])] + """ + __slots__ = ['numeral', 'params', 'lineno'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + params = [v.strip() for v in value.split(';')] + self.numeral = self._parse_expr(params.pop(0), template, lineno, offset) + self.params = params and [name.strip() for name in + params[0].split(',') if name] or [] + self.lineno = lineno + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + numeral = value.get('numeral', '').strip() + assert numeral is not '', "at least pass the numeral param" + params = [v.strip() for v in value.get('params', '').split(',')] + value = '%s; ' % numeral + ', '.join(params) + return super(ChooseDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + ctxt.push({'_i18n.choose.params': self.params, + '_i18n.choose.singular': None, + '_i18n.choose.plural': None}) + + ngettext = ctxt.get('_i18n.ngettext') + assert hasattr(ngettext, '__call__'), 'No ngettext function available' + dngettext = ctxt.get('_i18n.dngettext') + if not dngettext: + dngettext = lambda d, s, p, n: ngettext(s, p, n) + + new_stream = [] + singular_stream = None + singular_msgbuf = None + plural_stream = None + plural_msgbuf = None + + numeral = self.numeral.evaluate(ctxt) + is_plural = self._is_plural(numeral, ngettext) + + for event in stream: + if event[0] is SUB and any(isinstance(d, ChooseBranchDirective) + for d in event[1][0]): + subdirectives, substream = event[1] + + if isinstance(subdirectives[0], SingularDirective): + singular_stream = list(_apply_directives(substream, + subdirectives, + ctxt, vars)) + new_stream.append((MSGBUF, None, (None, -1, -1))) + + elif isinstance(subdirectives[0], PluralDirective): + if is_plural: + plural_stream = list(_apply_directives(substream, + subdirectives, + ctxt, vars)) + + else: + new_stream.append(event) + + if ctxt.get('_i18n.domain'): + ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), + s, p, n) + + singular_msgbuf = ctxt.get('_i18n.choose.singular') + if is_plural: + plural_msgbuf = ctxt.get('_i18n.choose.plural') + msgbuf, choice = plural_msgbuf, plural_stream + else: + msgbuf, choice = singular_msgbuf, singular_stream + plural_msgbuf = MessageBuffer(self) + + for kind, data, pos in new_stream: + if kind is MSGBUF: + for event in choice: + if event[0] is MSGBUF: + translation = ngettext(singular_msgbuf.format(), + plural_msgbuf.format(), + numeral) + for subevent in msgbuf.translate(translation): + yield subevent + else: + yield event + else: + yield kind, data, pos + + ctxt.pop() + + def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, + search_text=True, comment_stack=None): + strip = False + stream = iter(stream) + previous = stream.next() + + if previous[0] is START: + # skip the enclosing element + for message in translator._extract_attrs(previous, + gettext_functions, + search_text=search_text): + yield message + previous = stream.next() + strip = True + + singular_msgbuf = MessageBuffer(self) + plural_msgbuf = MessageBuffer(self) + + for event in stream: + if previous[0] is SUB: + directives, substream = previous[1] + for directive in directives: + if isinstance(directive, SingularDirective): + for message in directive.extract(translator, + substream, gettext_functions, search_text, + comment_stack, msgbuf=singular_msgbuf): + yield message + elif isinstance(directive, PluralDirective): + for message in directive.extract(translator, + substream, gettext_functions, search_text, + comment_stack, msgbuf=plural_msgbuf): + yield message + elif not isinstance(directive, StripDirective): + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + else: + if previous[0] is START: + for message in translator._extract_attrs(previous, + gettext_functions, + search_text): + yield message + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + previous = event + + if not strip: + singular_msgbuf.append(*previous) + plural_msgbuf.append(*previous) + + yield self.lineno, 'ngettext', \ + (singular_msgbuf.format(), plural_msgbuf.format()), \ + comment_stack[-1:] + + def _is_plural(self, numeral, ngettext): + # XXX: should we test which form was chosen like this!?!?!? + # There should be no match in any catalogue for these singular and + # plural test strings + singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' + plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' + return ngettext(singular, plural, numeral) == plural + + +class DomainDirective(I18NDirective): + """Implementation of the ``i18n:domain`` directive which allows choosing + another i18n domain(catalog) to translate from. + + >>> from genshi.filters.tests.i18n import DummyTranslations + >>> tmpl = MarkupTemplate('''\ + <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + ... <p i18n:msg="">Bar</p> + ... <div i18n:domain="foo"> + ... <p i18n:msg="">FooBar</p> + ... <p>Bar</p> + ... <p i18n:domain="bar" i18n:msg="">Bar</p> + ... <p i18n:domain="">Bar</p> + ... </div> + ... <p>Bar</p> + ... </html>''') + + >>> translations = DummyTranslations({'Bar': 'Voh'}) + >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'}) + >>> translations.add_domain('bar', {'Bar': 'bar_Bar'}) + >>> translator = Translator(translations) + >>> translator.setup(tmpl) + + >>> print(tmpl.generate().render()) + <html> + <p>Voh</p> + <div> + <p>BarFoo</p> + <p>foo_Bar</p> + <p>bar_Bar</p> + <p>Voh</p> + </div> + <p>Voh</p> + </html> + """ + __slots__ = ['domain'] + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + Directive.__init__(self, None, template, namespaces, lineno, offset) + self.domain = value and value.strip() or '__DEFAULT__' + + @classmethod + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('name') + return super(DomainDirective, cls).attach(template, stream, value, + namespaces, pos) + + def __call__(self, stream, directives, ctxt, **vars): + ctxt.push({'_i18n.domain': self.domain}) + for event in _apply_directives(stream, directives, ctxt, vars): yield event - - yield previous # the outer end tag + ctxt.pop() class Translator(DirectiveFactory): """Can extract and translate localizable strings from markup streams and templates. - For example, assume the followng template: + For example, assume the following template: - >>> from genshi.template import MarkupTemplate - >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> @@ -94,7 +567,6 @@ ... 'Example': 'Beispiel', ... 'Hello, %(name)s': 'Hallo, %(name)s' ... }[string] - >>> >>> translator = Translator(pseudo_gettext) Next, the translator needs to be prepended to any already defined filters @@ -105,7 +577,7 @@ When generating the template output, our hard-coded translations should be applied as expected: - >>> print tmpl.generate(username='Hans', _=pseudo_gettext) + >>> print(tmpl.generate(username='Hans', _=pseudo_gettext)) <html> <head> <title>Beispiel</title> @@ -115,23 +587,28 @@ <p>Hallo, Hans</p> </body> </html> - + Note that elements defining ``xml:lang`` attributes that do not contain variable expressions are ignored by this filter. That can be used to exclude specific parts of a template from being extracted and translated. """ directives = [ + ('domain', DomainDirective), ('comment', CommentDirective), - ('msg', MsgDirective) + ('msg', MsgDirective), + ('choose', ChooseDirective), + ('singular', SingularDirective), + ('plural', PluralDirective) ] IGNORE_TAGS = frozenset([ QName('script'), QName('http://www.w3.org/1999/xhtml}script'), QName('style'), QName('http://www.w3.org/1999/xhtml}style') ]) - INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', - 'summary', 'title']) + INCLUDE_ATTRS = frozenset([ + 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title' + ]) NAMESPACE = I18N_NAMESPACE def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, @@ -145,7 +622,7 @@ :param extract_text: whether the content of text nodes should be extracted, or only text in explicit ``gettext`` function calls - + :note: Changed in 0.6: the `translate` parameter can now be either a ``gettext``-style function, or an object compatible with the ``NullTransalations`` or ``GNUTranslations`` interface @@ -155,7 +632,8 @@ self.include_attrs = include_attrs self.extract_text = extract_text - def __call__(self, stream, ctxt=None, search_text=True): + def __call__(self, stream, ctxt=None, translate_text=True, + translate_attrs=True): """Translate any localizable strings in the given stream. This function shouldn't be called directly. Instead, an instance of @@ -166,25 +644,41 @@ :param stream: the markup event stream :param ctxt: the template context (not used) - :param search_text: whether text nodes should be translated (used - internally) + :param translate_text: whether text nodes should be translated (used + internally) + :param translate_attrs: whether attribute values should be translated + (used internally) :return: the localized stream """ ignore_tags = self.ignore_tags include_attrs = self.include_attrs skip = 0 xml_lang = XML_NAMESPACE['lang'] + if not self.extract_text: + translate_text = False + translate_attrs = False if type(self.translate) is FunctionType: gettext = self.translate + if ctxt: + ctxt['_i18n.gettext'] = gettext else: gettext = self.translate.ugettext - if ctxt: - ctxt['_i18n.gettext'] = gettext + ngettext = self.translate.ungettext + try: + dgettext = self.translate.dugettext + dngettext = self.translate.dungettext + except AttributeError: + dgettext = lambda _, y: gettext(y) + dngettext = lambda _, s, p, n: ngettext(s, p, n) + if ctxt: + ctxt['_i18n.gettext'] = gettext + ctxt['_i18n.ngettext'] = ngettext + ctxt['_i18n.dgettext'] = dgettext + ctxt['_i18n.dngettext'] = dngettext - extract_text = self.extract_text - if not extract_text: - search_text = False + if ctxt and ctxt.get('_i18n.domain'): + gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) for kind, data, pos in stream: @@ -208,14 +702,15 @@ new_attrs = [] changed = False + for name, value in attrs: newval = value - if extract_text and isinstance(value, basestring): - if name in include_attrs: + if isinstance(value, basestring): + if translate_attrs and name in include_attrs: newval = gettext(value) else: - newval = list(self(_ensure(value), ctxt, - search_text=False) + newval = list( + self(_ensure(value), ctxt, translate_text=False) ) if newval != value: value = newval @@ -226,7 +721,7 @@ yield kind, (tag, attrs), pos - elif search_text and kind is TEXT: + elif translate_text and kind is TEXT: text = data.strip() if text: data = data.replace(text, unicode(gettext(text))) @@ -234,22 +729,36 @@ elif kind is SUB: directives, substream = data - # If this is an i18n:msg directive, no need to translate text + current_domain = None + for idx, directive in enumerate(directives): + # Organize directives to make everything work + # FIXME: There's got to be a better way to do this! + if isinstance(directive, DomainDirective): + # Grab current domain and update context + current_domain = directive.domain + ctxt.push({'_i18n.domain': current_domain}) + # Put domain directive as the first one in order to + # update context before any other directives evaluation + directives.insert(0, directives.pop(idx)) + + # If this is an i18n directive, no need to translate text # nodes here - is_msg = filter(None, [isinstance(d, MsgDirective) - for d in directives]) + is_i18n_directive = any([ + isinstance(d, ExtractableI18NDirective) + for d in directives + ]) substream = list(self(substream, ctxt, - search_text=not is_msg)) + translate_text=not is_i18n_directive, + translate_attrs=translate_attrs)) yield kind, (directives, substream), pos + if current_domain: + ctxt.pop() else: yield kind, data, pos - GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', - 'ugettext', 'ungettext') - def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, - search_text=True, msgbuf=None): + search_text=True, comment_stack=None): """Extract localizable strings from the given template stream. For every string found, this function yields a ``(lineno, function, @@ -264,8 +773,6 @@ * ``comments`` is a list of comments related to the message, extracted from ``i18n:comment`` attributes found in the markup - >>> from genshi.template import MarkupTemplate - >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> @@ -276,9 +783,8 @@ ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> ... </body> ... </html>''', filename='example.html') - >>> >>> for line, func, msg, comments in Translator().extract(tmpl.stream): - ... print "%d, %r, %r" % (line, func, msg) + ... print('%d, %r, %r' % (line, func, msg)) 3, None, u'Example' 6, None, u'Example' 7, '_', u'Hello, %(name)s' @@ -295,18 +801,18 @@ :note: Changed in 0.4.1: For a function with multiple string arguments (such as ``ngettext``), a single item with a tuple of strings is yielded, instead an item for each string argument. - :note: Changed in 0.6: The returned tuples now include a 4th element, - which is a list of comments for the translator + :note: Changed in 0.6: The returned tuples now include a fourth + element, which is a list of comments for the translator. """ if not self.extract_text: search_text = False + if comment_stack is None: + comment_stack = [] skip = 0 - i18n_comment = I18N_NAMESPACE['comment'] - i18n_msg = I18N_NAMESPACE['msg'] + xml_lang = XML_NAMESPACE['lang'] for kind, data, pos in stream: - if skip: if kind is START: skip += 1 @@ -315,64 +821,103 @@ if kind is START and not skip: tag, attrs = data - if tag in self.ignore_tags or \ isinstance(attrs.get(xml_lang), basestring): skip += 1 continue - for name, value in attrs: - if search_text and isinstance(value, basestring): - if name in self.include_attrs: - text = value.strip() - if text: - yield pos[1], None, text, [] - else: - for lineno, funcname, text, comments in self.extract( - _ensure(value), gettext_functions, - search_text=False): - yield lineno, funcname, text, comments - - if msgbuf: - msgbuf.append(kind, data, pos) - else: - msg_params = attrs.get(i18n_msg) - if msg_params is not None: - if type(msg_params) is list: # event tuple - msg_params = msg_params[0][1] - msgbuf = MessageBuffer( - msg_params, attrs.get(i18n_comment), pos[1] - ) + for message in self._extract_attrs((kind, data, pos), + gettext_functions, + search_text=search_text): + yield message elif not skip and search_text and kind is TEXT: - if not msgbuf: - text = data.strip() - if text and filter(None, [ch.isalpha() for ch in text]): - yield pos[1], None, text, [] - else: - msgbuf.append(kind, data, pos) - - elif not skip and msgbuf and kind is END: - msgbuf.append(kind, data, pos) - if not msgbuf.depth: - yield msgbuf.lineno, None, msgbuf.format(), \ - filter(None, [msgbuf.comment]) - msgbuf = None + text = data.strip() + if text and [ch for ch in text if ch.isalpha()]: + yield pos[1], None, text, comment_stack[-1:] elif kind is EXPR or kind is EXEC: - if msgbuf: - msgbuf.append(kind, data, pos) for funcname, strings in extract_from_code(data, gettext_functions): + # XXX: Do we need to grab i18n:comment from comment_stack ??? yield pos[1], funcname, strings, [] elif kind is SUB: - subkind, substream = data - messages = self.extract(substream, gettext_functions, - search_text=search_text and not skip, - msgbuf=msgbuf) - for lineno, funcname, text, comments in messages: - yield lineno, funcname, text, comments + directives, substream = data + in_comment = False + + for idx, directive in enumerate(directives): + # Do a first loop to see if there's a comment directive + # If there is update context and pop it from directives + if isinstance(directive, CommentDirective): + in_comment = True + comment_stack.append(directive.comment) + if len(directives) == 1: + # in case we're in the presence of something like: + # <p i18n:comment="foo">Foo</p> + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + directives.pop(idx) + elif not isinstance(directive, I18NDirective): + # Remove all other non i18n directives from the process + directives.pop(idx) + + if not directives and not in_comment: + # Extract content if there's no directives because + # strip was pop'ed and not because comment was pop'ed. + # Extraction in this case has been taken care of. + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip): + yield message + + for directive in directives: + if isinstance(directive, ExtractableI18NDirective): + for message in directive.extract(self, + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + else: + for message in self.extract( + substream, gettext_functions, + search_text=search_text and not skip, + comment_stack=comment_stack): + yield message + + if in_comment: + comment_stack.pop() + + def get_directive_index(self, dir_cls): + total = len(self._dir_order) + if dir_cls in self._dir_order: + return self._dir_order.index(dir_cls) - total + return total + + def setup(self, template): + """Convenience function to register the `Translator` filter and the + related directives with the given template. + + :param template: a `Template` instance + """ + template.filters.insert(0, self) + if hasattr(template, 'add_directives'): + template.add_directives(Translator.NAMESPACE, self) + + def _extract_attrs(self, event, gettext_functions, search_text): + for name, value in event[1][1]: + if search_text and isinstance(value, basestring): + if name in self.include_attrs: + text = value.strip() + if text: + yield event[2][1], None, text, [] + else: + for message in self.extract(_ensure(value), gettext_functions, + search_text=False): + yield message class MessageBuffer(object): @@ -381,25 +926,23 @@ :since: version 0.5 """ - def __init__(self, params=u'', comment=None, lineno=-1): + def __init__(self, directive=None): """Initialize the message buffer. - :param params: comma-separated list of parameter names - :type params: `basestring` - :param lineno: the line number on which the first stream event - belonging to the message was found + :param directive: the directive owning the buffer + :type directive: I18NDirective """ - if isinstance(params, basestring): - params = [name.strip() for name in params.split(',')] - self.params = params - self.comment = comment - self.lineno = lineno + # params list needs to be copied so that directives can be evaluated + # more than once + self.orig_params = self.params = directive.params[:] + self.directive = directive self.string = [] self.events = {} self.values = {} self.depth = 1 self.order = 1 self.stack = [0] + self.subdirectives = {} def append(self, kind, data, pos): """Append a stream event to the buffer. @@ -408,33 +951,62 @@ :param data: the event data :param pos: the position of the event in the source """ - if kind is TEXT: + if kind is SUB: + # The order needs to be +1 because a new START kind event will + # happen and we we need to wrap those events into our custom kind(s) + order = self.stack[-1] + 1 + subdirectives, substream = data + # Store the directives that should be applied after translation + self.subdirectives.setdefault(order, []).extend(subdirectives) + self.events.setdefault(order, []).append((SUB_START, None, pos)) + for skind, sdata, spos in substream: + self.append(skind, sdata, spos) + self.events.setdefault(order, []).append((SUB_END, None, pos)) + elif kind is TEXT: + if '[' in data or ']' in data: + # Quote [ and ] if it ain't us adding it, ie, if the user is + # using those chars in his templates, escape them + data = data.replace('[', '\[').replace(']', '\]') self.string.append(data) - self.events.setdefault(self.stack[-1], []).append(None) + self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) elif kind is EXPR: - param = self.params.pop(0) + if self.params: + param = self.params.pop(0) + else: + params = ', '.join(['"%s"' % p for p in self.orig_params if p]) + if params: + params = "(%s)" % params + raise IndexError("%d parameters%s given to 'i18n:%s' but " + "%d or more expressions used in '%s', line %s" + % (len(self.orig_params), params, + self.directive.tagname, + len(self.orig_params) + 1, + os.path.basename(pos[0] or + 'In-memory Template'), + pos[1])) self.string.append('%%(%s)s' % param) - self.events.setdefault(self.stack[-1], []).append(None) + self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) self.values[param] = (kind, data, pos) else: - if kind is START: - self.string.append(u'[%d:' % self.order) - self.events.setdefault(self.order, []).append((kind, data, pos)) + if kind is START: + self.string.append('[%d:' % self.order) self.stack.append(self.order) + self.events.setdefault(self.stack[-1], + []).append((kind, data, pos)) self.depth += 1 self.order += 1 elif kind is END: self.depth -= 1 if self.depth: self.events[self.stack[-1]].append((kind, data, pos)) - self.string.append(u']') + self.string.append(']') self.stack.pop() def format(self): """Return a message identifier representing the content in the buffer. """ - return u''.join(self.string).strip() + return ''.join(self.string).strip() def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): """Interpolate the given message translation with the events in the @@ -442,41 +1014,108 @@ :param string: the translated message string """ + substream = None + + def yield_parts(string): + for idx, part in enumerate(regex.split(string)): + if idx % 2: + yield self.values[part] + elif part: + yield (TEXT, + part.replace('\[', '[').replace('\]', ']'), + (None, -1, -1) + ) + parts = parse_msg(string) + parts_counter = {} for order, string in parts: - events = self.events[order] - while events: - event = events.pop(0) - if event: - yield event + parts_counter.setdefault(order, []).append(None) + + while parts: + order, string = parts.pop(0) + if len(parts_counter[order]) == 1: + events = self.events[order] + else: + events = [self.events[order].pop(0)] + parts_counter[order].pop() + + for event in events: + if event[0] is SUB_START: + substream = [] + elif event[0] is SUB_END: + # Yield a substream which might have directives to be + # applied to it (after translation events) + yield SUB, (self.subdirectives[order], substream), event[2] + substream = None + elif event[0] is TEXT: + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + elif event[0] is START: + if substream is not None: + substream.append(event) + else: + yield event + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + elif event[0] is END: + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + if substream is not None: + substream.append(event) + else: + yield event + elif event[0] is EXPR: + # These are handled on the strings itself + continue else: - if not string: - break - for idx, part in enumerate(regex.split(string)): - if idx % 2: - yield self.values[part] - elif part: - yield TEXT, part, (None, -1, -1) - if not self.events[order] or not self.events[order][0]: - break + if string: + for part in yield_parts(string): + if substream is not None: + substream.append(part) + else: + yield part + # String handled, reset it + string = None + if substream is not None: + substream.append(event) + else: + yield event -def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')): +def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')): """Parse a translated message using Genshi mixed content message formatting. - + >>> parse_msg("See [1:Help].") [(0, 'See '), (1, 'Help'), (0, '.')] - + >>> parse_msg("See [1:our [2:Help] page] for details.") [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] - + >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] - + >>> parse_msg("[1:] Bilder pro Seite anzeigen.") [(1, ''), (0, ' Bilder pro Seite anzeigen.')] - + :param string: the translated message string :return: a list of ``(order, string)`` tuples :rtype: `list` @@ -510,14 +1149,13 @@ """Extract strings from Python bytecode. >>> from genshi.template.eval import Expression - >>> expr = Expression('_("Hello")') - >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) + >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) [('_', u'Hello')] - + >>> expr = Expression('ngettext("You have %(num)s item", ' ... '"You have %(num)s items", num)') - >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) + >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] :param code: the `Code` object @@ -591,6 +1229,10 @@ tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), encoding=encoding) + tmpl.loader = None + translator = Translator(None, ignore_tags, include_attrs, extract_text) + if hasattr(tmpl, 'add_directives'): + tmpl.add_directives(Translator.NAMESPACE, translator) for message in translator.extract(tmpl.stream, gettext_functions=keywords): yield message