cmlenz@531: # -*- coding: utf-8 -*- cmlenz@531: # cmlenz@897: # Copyright (C) 2007-2010 Edgewall Software cmlenz@531: # All rights reserved. cmlenz@531: # cmlenz@531: # This software is licensed as described in the file COPYING, which cmlenz@531: # you should have received as part of this distribution. The terms cmlenz@531: # are also available at http://genshi.edgewall.org/wiki/License. cmlenz@531: # cmlenz@531: # This software consists of voluntary contributions made by many cmlenz@531: # individuals. For the exact contribution history, see the revision cmlenz@531: # history and logs, available at http://genshi.edgewall.org/log/. cmlenz@531: cmlenz@849: """Directives and utilities for internationalization and localization of cmlenz@849: templates. cmlenz@576: cmlenz@576: :since: version 0.4 cmlenz@849: :note: Directives support added since version 0.6 cmlenz@576: """ cmlenz@446: cmlenz@856: try: cmlenz@856: any cmlenz@856: except NameError: cmlenz@856: from genshi.util import any cmlenz@788: from gettext import NullTranslations cmlenz@849: import os cmlenz@446: import re cmlenz@788: from types import FunctionType cmlenz@446: cmlenz@895: from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ cmlenz@895: XML_NAMESPACE, _ensure, StreamEventKind cmlenz@794: from genshi.template.eval import _ast cmlenz@790: from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives cmlenz@849: from genshi.template.directives import Directive, StripDirective cmlenz@528: from genshi.template.markup import MarkupTemplate, EXEC hodgestar@933: from genshi.compat import IS_PYTHON2 cmlenz@446: cmlenz@528: __all__ = ['Translator', 'extract'] cmlenz@501: __docformat__ = 'restructuredtext en' cmlenz@501: cmlenz@849: cmlenz@560: I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') cmlenz@560: cmlenz@849: MSGBUF = StreamEventKind('MSGBUF') cmlenz@849: SUB_START = StreamEventKind('SUB_START') cmlenz@849: SUB_END = StreamEventKind('SUB_END') cmlenz@849: cmlenz@892: GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', cmlenz@892: 'ugettext', 'ungettext') cmlenz@892: cmlenz@446: cmlenz@849: class I18NDirective(Directive): cmlenz@849: """Simple interface for i18n directives to support messages extraction.""" cmlenz@790: cmlenz@849: def __call__(self, stream, directives, ctxt, **vars): cmlenz@849: return _apply_directives(stream, directives, ctxt, vars) cmlenz@790: cmlenz@790: cmlenz@849: class ExtractableI18NDirective(I18NDirective): cmlenz@849: """Simple interface for directives to support messages extraction.""" cmlenz@790: cmlenz@892: def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, cmlenz@892: search_text=True, comment_stack=None): cmlenz@849: raise NotImplementedError cmlenz@849: cmlenz@849: cmlenz@849: class CommentDirective(I18NDirective): cmlenz@849: """Implementation of the ``i18n:comment`` template directive which adds cmlenz@849: translation comments. cmlenz@849: cmlenz@849: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...

Foo

cmlenz@849: ... ''') cmlenz@849: >>> translator = Translator() cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: >>> list(translator.extract(tmpl.stream)) cmlenz@849: [(2, None, u'Foo', [u'As in Foo Bar'])] cmlenz@849: """ cmlenz@849: __slots__ = ['comment'] cmlenz@849: cmlenz@892: def __init__(self, value, template=None, namespaces=None, lineno=-1, cmlenz@892: offset=-1): cmlenz@849: Directive.__init__(self, None, template, namespaces, lineno, offset) cmlenz@849: self.comment = value cmlenz@849: cmlenz@849: cmlenz@849: class MsgDirective(ExtractableI18NDirective): cmlenz@849: r"""Implementation of the ``i18n:msg`` directive which marks inner content cmlenz@849: as translatable. Consider the following examples: cmlenz@849: cmlenz@849: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...
cmlenz@849: ...

Foo

cmlenz@849: ...

Bar

cmlenz@849: ...
cmlenz@849: ...

Foo bar!

cmlenz@849: ... ''') cmlenz@849: cmlenz@849: >>> translator = Translator() cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: >>> list(translator.extract(tmpl.stream)) cmlenz@849: [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])] cmlenz@853: >>> print(tmpl.generate().render()) cmlenz@849: cmlenz@849:

Foo

cmlenz@849:

Bar

cmlenz@849:

Foo bar!

cmlenz@849: cmlenz@849: cmlenz@849: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...
cmlenz@849: ...

First Name: ${fname}

cmlenz@849: ...

Last Name: ${lname}

cmlenz@849: ...
cmlenz@849: ...

Foo bar!

cmlenz@849: ... ''') cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE cmlenz@849: [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []), cmlenz@849: (6, None, u'Foo [1:bar]!', [])] cmlenz@849: cmlenz@849: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...
cmlenz@849: ...

First Name: ${fname}

cmlenz@849: ...

Last Name: ${lname}

cmlenz@849: ...
cmlenz@849: ...

Foo bar!

cmlenz@849: ... ''') cmlenz@849: >>> translator.setup(tmpl) cmlenz@853: >>> print(tmpl.generate(fname='John', lname='Doe').render()) cmlenz@849: cmlenz@849:

First Name: John

cmlenz@849:

Last Name: Doe

cmlenz@849:

Foo bar!

cmlenz@849: cmlenz@849: cmlenz@849: Starting and ending white-space is stripped of to make it simpler for cmlenz@849: translators. Stripping it is not that important since it's on the html cmlenz@849: source, the rendered output will remain the same. cmlenz@849: """ cmlenz@892: __slots__ = ['params', 'lineno'] cmlenz@790: cmlenz@892: def __init__(self, value, template=None, namespaces=None, lineno=-1, cmlenz@892: offset=-1): cmlenz@790: Directive.__init__(self, None, template, namespaces, lineno, offset) cmlenz@849: self.params = [param.strip() for param in value.split(',') if param] cmlenz@892: self.lineno = lineno cmlenz@849: cmlenz@849: @classmethod cmlenz@849: def attach(cls, template, stream, value, namespaces, pos): cmlenz@849: if type(value) is dict: cmlenz@849: value = value.get('params', '').strip() cmlenz@849: return super(MsgDirective, cls).attach(template, stream, value.strip(), cmlenz@849: namespaces, pos) cmlenz@790: cmlenz@790: def __call__(self, stream, directives, ctxt, **vars): cmlenz@849: gettext = ctxt.get('_i18n.gettext') cmlenz@849: if ctxt.get('_i18n.domain'): cmlenz@891: dgettext = ctxt.get('_i18n.dgettext') cmlenz@854: assert hasattr(dgettext, '__call__'), \ cmlenz@854: 'No domain gettext function passed' cmlenz@849: gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) cmlenz@849: cmlenz@849: def _generate(): cmlenz@849: msgbuf = MessageBuffer(self) cmlenz@849: previous = stream.next() cmlenz@849: if previous[0] is START: cmlenz@849: yield previous cmlenz@849: else: cmlenz@849: msgbuf.append(*previous) cmlenz@849: previous = stream.next() cmlenz@849: for kind, data, pos in stream: cmlenz@849: msgbuf.append(*previous) cmlenz@849: previous = kind, data, pos cmlenz@849: if previous[0] is not END: cmlenz@849: msgbuf.append(*previous) cmlenz@849: previous = None cmlenz@849: for event in msgbuf.translate(gettext(msgbuf.format())): cmlenz@849: yield event cmlenz@849: if previous: cmlenz@849: yield previous cmlenz@849: cmlenz@849: return _apply_directives(_generate(), directives, ctxt, vars) cmlenz@849: cmlenz@892: def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, cmlenz@892: search_text=True, comment_stack=None): cmlenz@849: msgbuf = MessageBuffer(self) cmlenz@892: strip = False cmlenz@790: cmlenz@790: stream = iter(stream) cmlenz@790: previous = stream.next() cmlenz@849: if previous[0] is START: cmlenz@892: for message in translator._extract_attrs(previous, cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@849: previous = stream.next() cmlenz@892: strip = True cmlenz@790: for event in stream: cmlenz@892: if event[0] is START: cmlenz@892: for message in translator._extract_attrs(event, cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@790: msgbuf.append(*previous) cmlenz@790: previous = event cmlenz@892: if not strip: cmlenz@892: msgbuf.append(*previous) cmlenz@790: cmlenz@892: yield self.lineno, None, msgbuf.format(), comment_stack[-1:] cmlenz@849: cmlenz@849: cmlenz@849: class ChooseBranchDirective(I18NDirective): cmlenz@849: __slots__ = ['params'] cmlenz@892: cmlenz@849: def __call__(self, stream, directives, ctxt, **vars): cmlenz@849: self.params = ctxt.get('_i18n.choose.params', [])[:] cmlenz@849: msgbuf = MessageBuffer(self) cmlenz@895: stream = _apply_directives(stream, directives, ctxt, vars) cmlenz@849: cmlenz@849: previous = stream.next() palgarvio@871: if previous[0] is START: palgarvio@871: yield previous palgarvio@871: else: palgarvio@871: msgbuf.append(*previous) cmlenz@895: palgarvio@871: try: palgarvio@871: previous = stream.next() palgarvio@871: except StopIteration: palgarvio@871: # For example or directives palgarvio@871: yield MSGBUF, (), -1 # the place holder for msgbuf output cmlenz@895: ctxt['_i18n.choose.%s' % self.tagname] = msgbuf palgarvio@871: return cmlenz@895: cmlenz@895: for event in stream: cmlenz@849: msgbuf.append(*previous) cmlenz@895: previous = event cmlenz@849: yield MSGBUF, (), -1 # the place holder for msgbuf output palgarvio@871: palgarvio@871: if previous[0] is END: palgarvio@871: yield previous # the outer end tag palgarvio@871: else: palgarvio@871: msgbuf.append(*previous) cmlenz@895: ctxt['_i18n.choose.%s' % self.tagname] = msgbuf cmlenz@849: cmlenz@892: def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, cmlenz@892: search_text=True, comment_stack=None, msgbuf=None): cmlenz@849: stream = iter(stream) cmlenz@849: previous = stream.next() cmlenz@892: cmlenz@849: if previous[0] is START: cmlenz@892: # skip the enclosing element cmlenz@892: for message in translator._extract_attrs(previous, cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@849: previous = stream.next() cmlenz@892: cmlenz@849: for event in stream: cmlenz@892: if previous[0] is START: cmlenz@892: for message in translator._extract_attrs(previous, cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@849: msgbuf.append(*previous) cmlenz@849: previous = event cmlenz@892: cmlenz@849: if previous[0] is not END: cmlenz@849: msgbuf.append(*previous) cmlenz@849: cmlenz@849: cmlenz@849: class SingularDirective(ChooseBranchDirective): cmlenz@849: """Implementation of the ``i18n:singular`` directive to be used with the cmlenz@849: ``i18n:choose`` directive.""" cmlenz@849: cmlenz@849: cmlenz@849: class PluralDirective(ChooseBranchDirective): cmlenz@849: """Implementation of the ``i18n:plural`` directive to be used with the cmlenz@849: ``i18n:choose`` directive.""" cmlenz@849: cmlenz@849: cmlenz@849: class ChooseDirective(ExtractableI18NDirective): cmlenz@849: """Implementation of the ``i18n:choose`` directive which provides plural cmlenz@849: internationalisation of strings. cmlenz@849: cmlenz@849: This directive requires at least one parameter, the one which evaluates to cmlenz@849: an integer which will allow to choose the plural/singular form. If you also cmlenz@849: have expressions inside the singular and plural version of the string you cmlenz@849: also need to pass a name for those parameters. Consider the following cmlenz@849: examples: cmlenz@849: hodgestar@933: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...
cmlenz@849: ...

There is $num coin

cmlenz@849: ...

There are $num coins

cmlenz@849: ...
cmlenz@849: ... ''') cmlenz@849: >>> translator = Translator() cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE cmlenz@849: [(2, 'ngettext', (u'There is %(num)s coin', cmlenz@849: u'There are %(num)s coins'), [])] cmlenz@849: hodgestar@933: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...
cmlenz@849: ...

There is $num coin

cmlenz@849: ...

There are $num coins

cmlenz@849: ...
cmlenz@849: ... ''') cmlenz@849: >>> translator.setup(tmpl) cmlenz@853: >>> print(tmpl.generate(num=1).render()) cmlenz@849: cmlenz@849:
cmlenz@849:

There is 1 coin

cmlenz@849:
cmlenz@849: cmlenz@853: >>> print(tmpl.generate(num=2).render()) cmlenz@849: cmlenz@849:
cmlenz@849:

There are 2 coins

cmlenz@849:
cmlenz@849: cmlenz@849: cmlenz@888: When used as a element and not as an attribute: cmlenz@849: hodgestar@933: >>> tmpl = MarkupTemplate(''' cmlenz@849: ... cmlenz@849: ...

There is $num coin

cmlenz@849: ...

There are $num coins

cmlenz@849: ...
cmlenz@849: ... ''') cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE cmlenz@849: [(2, 'ngettext', (u'There is %(num)s coin', cmlenz@849: u'There are %(num)s coins'), [])] cmlenz@849: """ cmlenz@892: __slots__ = ['numeral', 'params', 'lineno'] cmlenz@849: cmlenz@892: def __init__(self, value, template=None, namespaces=None, lineno=-1, cmlenz@892: offset=-1): cmlenz@849: Directive.__init__(self, None, template, namespaces, lineno, offset) cmlenz@849: params = [v.strip() for v in value.split(';')] cmlenz@849: self.numeral = self._parse_expr(params.pop(0), template, lineno, offset) cmlenz@849: self.params = params and [name.strip() for name in cmlenz@849: params[0].split(',') if name] or [] cmlenz@892: self.lineno = lineno cmlenz@849: cmlenz@849: @classmethod cmlenz@849: def attach(cls, template, stream, value, namespaces, pos): cmlenz@849: if type(value) is dict: cmlenz@849: numeral = value.get('numeral', '').strip() cmlenz@849: assert numeral is not '', "at least pass the numeral param" cmlenz@849: params = [v.strip() for v in value.get('params', '').split(',')] cmlenz@849: value = '%s; ' % numeral + ', '.join(params) cmlenz@849: return super(ChooseDirective, cls).attach(template, stream, value, cmlenz@849: namespaces, pos) cmlenz@849: cmlenz@849: def __call__(self, stream, directives, ctxt, **vars): cmlenz@849: ctxt.push({'_i18n.choose.params': self.params, cmlenz@895: '_i18n.choose.singular': None, cmlenz@895: '_i18n.choose.plural': None}) cmlenz@895: cmlenz@895: ngettext = ctxt.get('_i18n.ngettext') cmlenz@895: assert hasattr(ngettext, '__call__'), 'No ngettext function available' cmlenz@895: dngettext = ctxt.get('_i18n.dngettext') cmlenz@895: if not dngettext: cmlenz@895: dngettext = lambda d, s, p, n: ngettext(s, p, n) cmlenz@849: cmlenz@849: new_stream = [] cmlenz@849: singular_stream = None cmlenz@849: singular_msgbuf = None cmlenz@849: plural_stream = None cmlenz@849: plural_msgbuf = None cmlenz@849: cmlenz@895: numeral = self.numeral.evaluate(ctxt) cmlenz@895: is_plural = self._is_plural(numeral, ngettext) cmlenz@892: cmlenz@895: for event in stream: cmlenz@895: if event[0] is SUB and any(isinstance(d, ChooseBranchDirective) cmlenz@895: for d in event[1][0]): cmlenz@895: subdirectives, substream = event[1] cmlenz@895: cmlenz@895: if isinstance(subdirectives[0], SingularDirective): cmlenz@849: singular_stream = list(_apply_directives(substream, cmlenz@849: subdirectives, cmlenz@849: ctxt, vars)) cmlenz@895: new_stream.append((MSGBUF, None, (None, -1, -1))) cmlenz@895: cmlenz@895: elif isinstance(subdirectives[0], PluralDirective): cmlenz@895: if is_plural: cmlenz@895: plural_stream = list(_apply_directives(substream, cmlenz@895: subdirectives, cmlenz@895: ctxt, vars)) cmlenz@895: cmlenz@849: else: cmlenz@895: new_stream.append(event) cmlenz@849: cmlenz@849: if ctxt.get('_i18n.domain'): cmlenz@849: ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), cmlenz@849: s, p, n) cmlenz@849: cmlenz@895: singular_msgbuf = ctxt.get('_i18n.choose.singular') cmlenz@895: if is_plural: cmlenz@895: plural_msgbuf = ctxt.get('_i18n.choose.plural') cmlenz@895: msgbuf, choice = plural_msgbuf, plural_stream palgarvio@873: else: cmlenz@895: msgbuf, choice = singular_msgbuf, singular_stream cmlenz@895: plural_msgbuf = MessageBuffer(self) palgarvio@873: cmlenz@849: for kind, data, pos in new_stream: cmlenz@849: if kind is MSGBUF: cmlenz@895: for event in choice: cmlenz@895: if event[0] is MSGBUF: cmlenz@849: translation = ngettext(singular_msgbuf.format(), cmlenz@849: plural_msgbuf.format(), cmlenz@895: numeral) cmlenz@895: for subevent in msgbuf.translate(translation): cmlenz@895: yield subevent cmlenz@849: else: cmlenz@895: yield event cmlenz@849: else: cmlenz@849: yield kind, data, pos cmlenz@849: cmlenz@849: ctxt.pop() cmlenz@849: cmlenz@892: def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, cmlenz@892: search_text=True, comment_stack=None): cmlenz@892: strip = False cmlenz@849: stream = iter(stream) cmlenz@849: previous = stream.next() cmlenz@892: cmlenz@892: if previous[0] is START: cmlenz@892: # skip the enclosing element cmlenz@892: for message in translator._extract_attrs(previous, cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@892: previous = stream.next() cmlenz@892: strip = True cmlenz@849: cmlenz@849: singular_msgbuf = MessageBuffer(self) cmlenz@849: plural_msgbuf = MessageBuffer(self) cmlenz@849: cmlenz@892: for event in stream: cmlenz@892: if previous[0] is SUB: cmlenz@892: directives, substream = previous[1] cmlenz@892: for directive in directives: cmlenz@892: if isinstance(directive, SingularDirective): cmlenz@892: for message in directive.extract(translator, cmlenz@892: substream, gettext_functions, search_text, cmlenz@892: comment_stack, msgbuf=singular_msgbuf): cmlenz@892: yield message cmlenz@892: elif isinstance(directive, PluralDirective): cmlenz@892: for message in directive.extract(translator, cmlenz@892: substream, gettext_functions, search_text, cmlenz@892: comment_stack, msgbuf=plural_msgbuf): cmlenz@892: yield message cmlenz@892: elif not isinstance(directive, StripDirective): cmlenz@892: singular_msgbuf.append(*previous) cmlenz@892: plural_msgbuf.append(*previous) cmlenz@849: else: cmlenz@892: if previous[0] is START: cmlenz@892: for message in translator._extract_attrs(previous, cmlenz@892: gettext_functions, cmlenz@892: search_text): cmlenz@892: yield message cmlenz@892: singular_msgbuf.append(*previous) cmlenz@892: plural_msgbuf.append(*previous) cmlenz@892: previous = event cmlenz@849: cmlenz@892: if not strip: cmlenz@892: singular_msgbuf.append(*previous) cmlenz@892: plural_msgbuf.append(*previous) cmlenz@892: cmlenz@892: yield self.lineno, 'ngettext', \ cmlenz@849: (singular_msgbuf.format(), plural_msgbuf.format()), \ cmlenz@849: comment_stack[-1:] cmlenz@849: cmlenz@895: def _is_plural(self, numeral, ngettext): cmlenz@895: # XXX: should we test which form was chosen like this!?!?!? cmlenz@895: # There should be no match in any catalogue for these singular and cmlenz@895: # plural test strings cmlenz@895: singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' cmlenz@895: plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' cmlenz@895: return ngettext(singular, plural, numeral) == plural cmlenz@895: cmlenz@849: cmlenz@849: class DomainDirective(I18NDirective): cmlenz@849: """Implementation of the ``i18n:domain`` directive which allows choosing cmlenz@849: another i18n domain(catalog) to translate from. cmlenz@849: cmlenz@849: >>> from genshi.filters.tests.i18n import DummyTranslations hodgestar@933: >>> tmpl = MarkupTemplate(''' cmlenz@849: ...

Bar

cmlenz@849: ...
cmlenz@849: ...

FooBar

cmlenz@849: ...

Bar

cmlenz@849: ...

Bar

cmlenz@849: ...

Bar

cmlenz@849: ...
cmlenz@849: ...

Bar

cmlenz@849: ... ''') cmlenz@849: cmlenz@849: >>> translations = DummyTranslations({'Bar': 'Voh'}) cmlenz@849: >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'}) cmlenz@849: >>> translations.add_domain('bar', {'Bar': 'bar_Bar'}) cmlenz@849: >>> translator = Translator(translations) cmlenz@849: >>> translator.setup(tmpl) cmlenz@849: cmlenz@853: >>> print(tmpl.generate().render()) cmlenz@849: cmlenz@849:

Voh

cmlenz@849:
cmlenz@849:

BarFoo

cmlenz@849:

foo_Bar

cmlenz@849:

bar_Bar

cmlenz@849:

Voh

cmlenz@849:
cmlenz@849:

Voh

cmlenz@849: cmlenz@849: """ cmlenz@849: __slots__ = ['domain'] cmlenz@849: cmlenz@892: def __init__(self, value, template=None, namespaces=None, lineno=-1, cmlenz@892: offset=-1): cmlenz@849: Directive.__init__(self, None, template, namespaces, lineno, offset) cmlenz@895: self.domain = value and value.strip() or '__DEFAULT__' cmlenz@849: cmlenz@849: @classmethod cmlenz@849: def attach(cls, template, stream, value, namespaces, pos): cmlenz@849: if type(value) is dict: cmlenz@849: value = value.get('name') cmlenz@849: return super(DomainDirective, cls).attach(template, stream, value, cmlenz@849: namespaces, pos) cmlenz@849: cmlenz@849: def __call__(self, stream, directives, ctxt, **vars): cmlenz@849: ctxt.push({'_i18n.domain': self.domain}) cmlenz@849: for event in _apply_directives(stream, directives, ctxt, vars): cmlenz@790: yield event cmlenz@849: ctxt.pop() cmlenz@790: cmlenz@790: cmlenz@790: class Translator(DirectiveFactory): cmlenz@446: """Can extract and translate localizable strings from markup streams and cmlenz@450: templates. cmlenz@446: cmlenz@849: For example, assume the following template: cmlenz@446: cmlenz@446: >>> tmpl = MarkupTemplate(''' cmlenz@446: ... cmlenz@446: ... Example cmlenz@446: ... cmlenz@446: ... cmlenz@446: ...

Example

cmlenz@446: ...

${_("Hello, %(name)s") % dict(name=username)}

cmlenz@446: ... cmlenz@446: ... ''', filename='example.html') cmlenz@446: cmlenz@446: For demonstration, we define a dummy ``gettext``-style function with a cmlenz@446: hard-coded translation table, and pass that to the `Translator` initializer: cmlenz@446: cmlenz@446: >>> def pseudo_gettext(string): cmlenz@446: ... return { cmlenz@446: ... 'Example': 'Beispiel', cmlenz@446: ... 'Hello, %(name)s': 'Hallo, %(name)s' cmlenz@446: ... }[string] cmlenz@446: >>> translator = Translator(pseudo_gettext) cmlenz@446: cmlenz@446: Next, the translator needs to be prepended to any already defined filters cmlenz@446: on the template: cmlenz@446: cmlenz@446: >>> tmpl.filters.insert(0, translator) cmlenz@446: cmlenz@446: When generating the template output, our hard-coded translations should be cmlenz@446: applied as expected: cmlenz@446: cmlenz@853: >>> print(tmpl.generate(username='Hans', _=pseudo_gettext)) cmlenz@446: cmlenz@446: cmlenz@446: Beispiel cmlenz@446: cmlenz@446: cmlenz@446:

Beispiel

cmlenz@446:

Hallo, Hans

cmlenz@446: cmlenz@446: cmlenz@849: cmlenz@522: Note that elements defining ``xml:lang`` attributes that do not contain cmlenz@522: variable expressions are ignored by this filter. That can be used to cmlenz@522: exclude specific parts of a template from being extracted and translated. cmlenz@446: """ cmlenz@446: cmlenz@790: directives = [ cmlenz@849: ('domain', DomainDirective), cmlenz@790: ('comment', CommentDirective), cmlenz@849: ('msg', MsgDirective), cmlenz@849: ('choose', ChooseDirective), cmlenz@849: ('singular', SingularDirective), cmlenz@849: ('plural', PluralDirective) cmlenz@790: ] cmlenz@790: cmlenz@450: IGNORE_TAGS = frozenset([ cmlenz@450: QName('script'), QName('http://www.w3.org/1999/xhtml}script'), cmlenz@450: QName('style'), QName('http://www.w3.org/1999/xhtml}style') cmlenz@450: ]) cmlenz@849: INCLUDE_ATTRS = frozenset([ hodgestar@1037: 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title', hodgestar@1037: 'placeholder', cmlenz@849: ]) cmlenz@790: NAMESPACE = I18N_NAMESPACE cmlenz@446: cmlenz@788: def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, cmlenz@594: include_attrs=INCLUDE_ATTRS, extract_text=True): cmlenz@446: """Initialize the translator. cmlenz@446: cmlenz@446: :param translate: the translation function, for example ``gettext`` or cmlenz@446: ``ugettext``. cmlenz@446: :param ignore_tags: a set of tag names that should not be localized cmlenz@446: :param include_attrs: a set of attribute names should be localized cmlenz@594: :param extract_text: whether the content of text nodes should be cmlenz@594: extracted, or only text in explicit ``gettext`` cmlenz@594: function calls cmlenz@849: cmlenz@788: :note: Changed in 0.6: the `translate` parameter can now be either cmlenz@788: a ``gettext``-style function, or an object compatible with the cmlenz@788: ``NullTransalations`` or ``GNUTranslations`` interface cmlenz@446: """ cmlenz@450: self.translate = translate cmlenz@446: self.ignore_tags = ignore_tags cmlenz@446: self.include_attrs = include_attrs cmlenz@594: self.extract_text = extract_text cmlenz@446: cmlenz@895: def __call__(self, stream, ctxt=None, translate_text=True, cmlenz@895: translate_attrs=True): cmlenz@448: """Translate any localizable strings in the given stream. cmlenz@448: cmlenz@448: This function shouldn't be called directly. Instead, an instance of cmlenz@448: the `Translator` class should be registered as a filter with the cmlenz@448: `Template` or the `TemplateLoader`, or applied as a regular stream cmlenz@448: filter. If used as a template filter, it should be inserted in front of cmlenz@448: all the default filters. cmlenz@448: cmlenz@448: :param stream: the markup event stream cmlenz@448: :param ctxt: the template context (not used) cmlenz@895: :param translate_text: whether text nodes should be translated (used cmlenz@895: internally) cmlenz@895: :param translate_attrs: whether attribute values should be translated cmlenz@895: (used internally) cmlenz@448: :return: the localized stream cmlenz@448: """ cmlenz@450: ignore_tags = self.ignore_tags cmlenz@450: include_attrs = self.include_attrs cmlenz@790: skip = 0 cmlenz@790: xml_lang = XML_NAMESPACE['lang'] cmlenz@895: if not self.extract_text: cmlenz@895: translate_text = False cmlenz@895: translate_attrs = False cmlenz@790: cmlenz@788: if type(self.translate) is FunctionType: cmlenz@788: gettext = self.translate cmlenz@849: if ctxt: cmlenz@849: ctxt['_i18n.gettext'] = gettext cmlenz@788: else: hodgestar@933: if IS_PYTHON2: hodgestar@933: gettext = self.translate.ugettext hodgestar@933: ngettext = self.translate.ungettext hodgestar@933: else: hodgestar@933: gettext = self.translate.gettext hodgestar@933: ngettext = self.translate.ngettext cmlenz@849: try: hodgestar@933: if IS_PYTHON2: hodgestar@933: dgettext = self.translate.dugettext hodgestar@933: dngettext = self.translate.dungettext hodgestar@933: else: hodgestar@933: dgettext = self.translate.dgettext hodgestar@933: dngettext = self.translate.dngettext cmlenz@849: except AttributeError: cmlenz@895: dgettext = lambda _, y: gettext(y) cmlenz@895: dngettext = lambda _, s, p, n: ngettext(s, p, n) cmlenz@849: if ctxt: cmlenz@849: ctxt['_i18n.gettext'] = gettext cmlenz@895: ctxt['_i18n.ngettext'] = ngettext cmlenz@849: ctxt['_i18n.dgettext'] = dgettext cmlenz@849: ctxt['_i18n.dngettext'] = dngettext cmlenz@787: cmlenz@849: if ctxt and ctxt.get('_i18n.domain'): hodgestar@933: # TODO: This can cause infinite recursion if dgettext is defined hodgestar@933: # via the AttributeError case above! cmlenz@849: gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) cmlenz@849: cmlenz@446: for kind, data, pos in stream: cmlenz@446: cmlenz@446: # skip chunks that should not be localized cmlenz@446: if skip: cmlenz@446: if kind is START: cmlenz@522: skip += 1 cmlenz@446: elif kind is END: cmlenz@522: skip -= 1 cmlenz@446: yield kind, data, pos cmlenz@446: continue cmlenz@446: cmlenz@446: # handle different events that can be localized cmlenz@446: if kind is START: cmlenz@446: tag, attrs = data cmlenz@522: if tag in self.ignore_tags or \ cmlenz@522: isinstance(attrs.get(xml_lang), basestring): cmlenz@446: skip += 1 cmlenz@446: yield kind, data, pos cmlenz@446: continue cmlenz@446: cmlenz@493: new_attrs = [] cmlenz@446: changed = False cmlenz@849: cmlenz@446: for name, value in attrs: cmlenz@483: newval = value cmlenz@895: if isinstance(value, basestring): cmlenz@895: if translate_attrs and name in include_attrs: cmlenz@788: newval = gettext(value) cmlenz@483: else: cmlenz@849: newval = list( cmlenz@895: self(_ensure(value), ctxt, translate_text=False) cmlenz@483: ) cmlenz@483: if newval != value: cmlenz@483: value = newval cmlenz@483: changed = True cmlenz@446: new_attrs.append((name, value)) cmlenz@446: if changed: cmlenz@667: attrs = Attrs(new_attrs) cmlenz@446: cmlenz@446: yield kind, (tag, attrs), pos cmlenz@446: cmlenz@895: elif translate_text and kind is TEXT: cmlenz@790: text = data.strip() cmlenz@790: if text: cmlenz@790: data = data.replace(text, unicode(gettext(text))) cmlenz@790: yield kind, data, pos cmlenz@446: cmlenz@446: elif kind is SUB: cmlenz@790: directives, substream = data cmlenz@849: current_domain = None cmlenz@849: for idx, directive in enumerate(directives): cmlenz@849: # Organize directives to make everything work cmlenz@895: # FIXME: There's got to be a better way to do this! cmlenz@849: if isinstance(directive, DomainDirective): cmlenz@849: # Grab current domain and update context cmlenz@849: current_domain = directive.domain cmlenz@849: ctxt.push({'_i18n.domain': current_domain}) cmlenz@849: # Put domain directive as the first one in order to cmlenz@849: # update context before any other directives evaluation cmlenz@849: directives.insert(0, directives.pop(idx)) cmlenz@849: cmlenz@849: # If this is an i18n directive, no need to translate text cmlenz@790: # nodes here cmlenz@856: is_i18n_directive = any([ cmlenz@856: isinstance(d, ExtractableI18NDirective) cmlenz@856: for d in directives cmlenz@856: ]) cmlenz@790: substream = list(self(substream, ctxt, cmlenz@895: translate_text=not is_i18n_directive, cmlenz@895: translate_attrs=translate_attrs)) cmlenz@790: yield kind, (directives, substream), pos cmlenz@560: cmlenz@849: if current_domain: cmlenz@849: ctxt.pop() cmlenz@446: else: cmlenz@446: yield kind, data, pos cmlenz@446: cmlenz@485: def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, cmlenz@891: search_text=True, comment_stack=None): cmlenz@446: """Extract localizable strings from the given template stream. cmlenz@446: cmlenz@450: For every string found, this function yields a ``(lineno, function, cmlenz@787: message, comments)`` tuple, where: cmlenz@446: cmlenz@450: * ``lineno`` is the number of the line on which the string was found, cmlenz@450: * ``function`` is the name of the ``gettext`` function used (if the cmlenz@450: string was extracted from embedded Python code), and cmlenz@469: * ``message`` is the string itself (a ``unicode`` object, or a tuple cmlenz@787: of ``unicode`` objects for functions with multiple string cmlenz@787: arguments). cmlenz@787: * ``comments`` is a list of comments related to the message, extracted cmlenz@787: from ``i18n:comment`` attributes found in the markup cmlenz@446: cmlenz@446: >>> tmpl = MarkupTemplate(''' cmlenz@446: ... cmlenz@446: ... Example cmlenz@446: ... cmlenz@446: ... cmlenz@446: ...

Example

cmlenz@446: ...

${_("Hello, %(name)s") % dict(name=username)}

cmlenz@469: ...

${ngettext("You have %d item", "You have %d items", num)}

cmlenz@446: ... cmlenz@446: ... ''', filename='example.html') cmlenz@787: >>> for line, func, msg, comments in Translator().extract(tmpl.stream): cmlenz@853: ... print('%d, %r, %r' % (line, func, msg)) cmlenz@450: 3, None, u'Example' cmlenz@450: 6, None, u'Example' cmlenz@450: 7, '_', u'Hello, %(name)s' cmlenz@565: 8, 'ngettext', (u'You have %d item', u'You have %d items', None) cmlenz@469: cmlenz@450: :param stream: the event stream to extract strings from; can be a cmlenz@450: regular stream or a template stream cmlenz@450: :param gettext_functions: a sequence of function names that should be cmlenz@450: treated as gettext-style localization cmlenz@450: functions cmlenz@485: :param search_text: whether the content of text nodes should be cmlenz@485: extracted (used internally) cmlenz@469: cmlenz@469: :note: Changed in 0.4.1: For a function with multiple string arguments cmlenz@469: (such as ``ngettext``), a single item with a tuple of strings is cmlenz@469: yielded, instead an item for each string argument. cmlenz@849: :note: Changed in 0.6: The returned tuples now include a fourth cmlenz@849: element, which is a list of comments for the translator. cmlenz@446: """ cmlenz@594: if not self.extract_text: cmlenz@594: search_text = False cmlenz@849: if comment_stack is None: cmlenz@849: comment_stack = [] cmlenz@446: skip = 0 cmlenz@849: cmlenz@522: xml_lang = XML_NAMESPACE['lang'] cmlenz@446: cmlenz@446: for kind, data, pos in stream: cmlenz@446: if skip: cmlenz@446: if kind is START: cmlenz@522: skip += 1 cmlenz@446: if kind is END: cmlenz@522: skip -= 1 cmlenz@446: cmlenz@549: if kind is START and not skip: cmlenz@446: tag, attrs = data cmlenz@522: if tag in self.ignore_tags or \ cmlenz@522: isinstance(attrs.get(xml_lang), basestring): cmlenz@446: skip += 1 cmlenz@446: continue cmlenz@446: cmlenz@892: for message in self._extract_attrs((kind, data, pos), cmlenz@892: gettext_functions, cmlenz@892: search_text=search_text): cmlenz@892: yield message cmlenz@446: cmlenz@549: elif not skip and search_text and kind is TEXT: cmlenz@891: text = data.strip() cmlenz@891: if text and [ch for ch in text if ch.isalpha()]: cmlenz@891: yield pos[1], None, text, comment_stack[-1:] cmlenz@446: cmlenz@446: elif kind is EXPR or kind is EXEC: cmlenz@565: for funcname, strings in extract_from_code(data, cmlenz@561: gettext_functions): cmlenz@849: # XXX: Do we need to grab i18n:comment from comment_stack ??? cmlenz@787: yield pos[1], funcname, strings, [] cmlenz@446: cmlenz@446: elif kind is SUB: cmlenz@849: directives, substream = data cmlenz@849: in_comment = False cmlenz@849: cmlenz@849: for idx, directive in enumerate(directives): cmlenz@849: # Do a first loop to see if there's a comment directive cmlenz@849: # If there is update context and pop it from directives cmlenz@849: if isinstance(directive, CommentDirective): cmlenz@849: in_comment = True cmlenz@849: comment_stack.append(directive.comment) cmlenz@849: if len(directives) == 1: cmlenz@849: # in case we're in the presence of something like: cmlenz@849: #

Foo

cmlenz@892: for message in self.extract( cmlenz@892: substream, gettext_functions, cmlenz@892: search_text=search_text and not skip, cmlenz@892: comment_stack=comment_stack): cmlenz@892: yield message cmlenz@849: directives.pop(idx) cmlenz@849: elif not isinstance(directive, I18NDirective): cmlenz@849: # Remove all other non i18n directives from the process cmlenz@849: directives.pop(idx) cmlenz@849: cmlenz@849: if not directives and not in_comment: cmlenz@849: # Extract content if there's no directives because cmlenz@849: # strip was pop'ed and not because comment was pop'ed. cmlenz@849: # Extraction in this case has been taken care of. cmlenz@892: for message in self.extract( cmlenz@892: substream, gettext_functions, cmlenz@892: search_text=search_text and not skip): cmlenz@892: yield message cmlenz@849: cmlenz@849: for directive in directives: cmlenz@849: if isinstance(directive, ExtractableI18NDirective): cmlenz@892: for message in directive.extract(self, cmlenz@892: substream, gettext_functions, cmlenz@892: search_text=search_text and not skip, cmlenz@892: comment_stack=comment_stack): cmlenz@892: yield message cmlenz@849: else: cmlenz@892: for message in self.extract( cmlenz@892: substream, gettext_functions, cmlenz@892: search_text=search_text and not skip, cmlenz@892: comment_stack=comment_stack): cmlenz@892: yield message cmlenz@849: cmlenz@849: if in_comment: cmlenz@849: comment_stack.pop() cmlenz@849: cmlenz@849: def get_directive_index(self, dir_cls): cmlenz@849: total = len(self._dir_order) cmlenz@849: if dir_cls in self._dir_order: cmlenz@849: return self._dir_order.index(dir_cls) - total cmlenz@849: return total cmlenz@849: cmlenz@849: def setup(self, template): cmlenz@849: """Convenience function to register the `Translator` filter and the cmlenz@849: related directives with the given template. cmlenz@849: cmlenz@849: :param template: a `Template` instance cmlenz@849: """ cmlenz@849: template.filters.insert(0, self) cmlenz@849: if hasattr(template, 'add_directives'): cmlenz@849: template.add_directives(Translator.NAMESPACE, self) cmlenz@528: cmlenz@892: def _extract_attrs(self, event, gettext_functions, search_text): cmlenz@892: for name, value in event[1][1]: cmlenz@892: if search_text and isinstance(value, basestring): cmlenz@892: if name in self.include_attrs: cmlenz@892: text = value.strip() cmlenz@892: if text: cmlenz@892: yield event[2][1], None, text, [] cmlenz@892: else: cmlenz@892: for message in self.extract(_ensure(value), gettext_functions, cmlenz@892: search_text=False): cmlenz@892: yield message cmlenz@892: cmlenz@528: cmlenz@560: class MessageBuffer(object): cmlenz@738: """Helper class for managing internationalized mixed content. cmlenz@576: cmlenz@576: :since: version 0.5 cmlenz@576: """ cmlenz@560: cmlenz@849: def __init__(self, directive=None): cmlenz@738: """Initialize the message buffer. cmlenz@738: cmlenz@882: :param directive: the directive owning the buffer cmlenz@882: :type directive: I18NDirective cmlenz@738: """ cmlenz@849: # params list needs to be copied so that directives can be evaluated cmlenz@849: # more than once cmlenz@849: self.orig_params = self.params = directive.params[:] cmlenz@849: self.directive = directive cmlenz@738: self.string = [] cmlenz@560: self.events = {} cmlenz@775: self.values = {} cmlenz@560: self.depth = 1 cmlenz@560: self.order = 1 hodgestar@952: self._prev_order = None cmlenz@560: self.stack = [0] cmlenz@849: self.subdirectives = {} cmlenz@560: hodgestar@952: def _add_event(self, order, event): hodgestar@952: if order == self._prev_order: hodgestar@952: self.events[order][-1].append(event) hodgestar@952: else: hodgestar@952: self._prev_order = order hodgestar@952: self.events.setdefault(order, []) hodgestar@952: self.events[order].append([event]) hodgestar@952: cmlenz@560: def append(self, kind, data, pos): cmlenz@738: """Append a stream event to the buffer. cmlenz@738: cmlenz@738: :param kind: the stream event kind cmlenz@738: :param data: the event data cmlenz@738: :param pos: the position of the event in the source cmlenz@738: """ cmlenz@849: if kind is SUB: cmlenz@849: # The order needs to be +1 because a new START kind event will cmlenz@849: # happen and we we need to wrap those events into our custom kind(s) cmlenz@849: order = self.stack[-1] + 1 cmlenz@849: subdirectives, substream = data cmlenz@849: # Store the directives that should be applied after translation cmlenz@849: self.subdirectives.setdefault(order, []).extend(subdirectives) hodgestar@952: self._add_event(order, (SUB_START, None, pos)) cmlenz@849: for skind, sdata, spos in substream: cmlenz@849: self.append(skind, sdata, spos) hodgestar@952: self._add_event(order, (SUB_END, None, pos)) cmlenz@849: elif kind is TEXT: cmlenz@849: if '[' in data or ']' in data: cmlenz@849: # Quote [ and ] if it ain't us adding it, ie, if the user is cmlenz@849: # using those chars in his templates, escape them cmlenz@849: data = data.replace('[', '\[').replace(']', '\]') cmlenz@738: self.string.append(data) hodgestar@952: self._add_event(self.stack[-1], (kind, data, pos)) cmlenz@775: elif kind is EXPR: cmlenz@849: if self.params: cmlenz@849: param = self.params.pop(0) cmlenz@849: else: cmlenz@849: params = ', '.join(['"%s"' % p for p in self.orig_params if p]) cmlenz@849: if params: cmlenz@849: params = "(%s)" % params cmlenz@849: raise IndexError("%d parameters%s given to 'i18n:%s' but " cmlenz@849: "%d or more expressions used in '%s', line %s" cmlenz@849: % (len(self.orig_params), params, cmlenz@849: self.directive.tagname, cmlenz@892: len(self.orig_params) + 1, cmlenz@849: os.path.basename(pos[0] or palgarvio@872: 'In-memory Template'), cmlenz@849: pos[1])) cmlenz@775: self.string.append('%%(%s)s' % param) hodgestar@952: self._add_event(self.stack[-1], (kind, data, pos)) cmlenz@775: self.values[param] = (kind, data, pos) cmlenz@560: else: cmlenz@849: if kind is START: cmlenz@854: self.string.append('[%d:' % self.order) cmlenz@560: self.stack.append(self.order) hodgestar@952: self._add_event(self.stack[-1], (kind, data, pos)) cmlenz@560: self.depth += 1 cmlenz@560: self.order += 1 cmlenz@560: elif kind is END: cmlenz@560: self.depth -= 1 cmlenz@560: if self.depth: hodgestar@952: self._add_event(self.stack[-1], (kind, data, pos)) cmlenz@854: self.string.append(']') cmlenz@560: self.stack.pop() cmlenz@560: cmlenz@560: def format(self): cmlenz@738: """Return a message identifier representing the content in the cmlenz@738: buffer. cmlenz@738: """ cmlenz@854: return ''.join(self.string).strip() cmlenz@560: cmlenz@775: def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): cmlenz@738: """Interpolate the given message translation with the events in the cmlenz@738: buffer and return the translated stream. cmlenz@738: cmlenz@738: :param string: the translated message string cmlenz@738: """ cmlenz@849: substream = None cmlenz@895: cmlenz@849: def yield_parts(string): cmlenz@849: for idx, part in enumerate(regex.split(string)): cmlenz@849: if idx % 2: cmlenz@849: yield self.values[part] cmlenz@849: elif part: cmlenz@849: yield (TEXT, cmlenz@849: part.replace('\[', '[').replace('\]', ']'), cmlenz@849: (None, -1, -1) cmlenz@849: ) cmlenz@849: cmlenz@560: parts = parse_msg(string) cmlenz@849: parts_counter = {} cmlenz@560: for order, string in parts: cmlenz@849: parts_counter.setdefault(order, []).append(None) cmlenz@849: cmlenz@849: while parts: cmlenz@849: order, string = parts.pop(0) hodgestar@1016: events = self.events[order] hodgestar@1016: if events: hodgestar@1016: events = events.pop(0) hodgestar@1016: else: hodgestar@1016: # create a dummy empty text event so any remaining hodgestar@1016: # part of the translation can be processed. hodgestar@1016: events = [(TEXT, "", (None, -1, -1))] cmlenz@849: parts_counter[order].pop() cmlenz@849: cmlenz@849: for event in events: cmlenz@849: if event[0] is SUB_START: cmlenz@849: substream = [] cmlenz@849: elif event[0] is SUB_END: cmlenz@849: # Yield a substream which might have directives to be cmlenz@849: # applied to it (after translation events) cmlenz@849: yield SUB, (self.subdirectives[order], substream), event[2] cmlenz@849: substream = None cmlenz@849: elif event[0] is TEXT: cmlenz@849: if string: cmlenz@849: for part in yield_parts(string): cmlenz@849: if substream is not None: cmlenz@849: substream.append(part) cmlenz@849: else: cmlenz@849: yield part cmlenz@849: # String handled, reset it cmlenz@849: string = None cmlenz@849: elif event[0] is START: cmlenz@849: if substream is not None: cmlenz@849: substream.append(event) cmlenz@849: else: cmlenz@849: yield event cmlenz@849: if string: cmlenz@849: for part in yield_parts(string): cmlenz@849: if substream is not None: cmlenz@849: substream.append(part) cmlenz@849: else: cmlenz@849: yield part cmlenz@849: # String handled, reset it cmlenz@849: string = None cmlenz@849: elif event[0] is END: cmlenz@849: if string: cmlenz@849: for part in yield_parts(string): cmlenz@849: if substream is not None: cmlenz@849: substream.append(part) cmlenz@849: else: cmlenz@849: yield part cmlenz@849: # String handled, reset it cmlenz@849: string = None cmlenz@849: if substream is not None: cmlenz@849: substream.append(event) cmlenz@849: else: cmlenz@849: yield event cmlenz@849: elif event[0] is EXPR: cmlenz@849: # These are handled on the strings itself cmlenz@849: continue cmlenz@775: else: cmlenz@849: if string: cmlenz@849: for part in yield_parts(string): cmlenz@849: if substream is not None: cmlenz@849: substream.append(part) cmlenz@849: else: cmlenz@849: yield part cmlenz@849: # String handled, reset it cmlenz@849: string = None cmlenz@849: if substream is not None: cmlenz@849: substream.append(event) cmlenz@849: else: cmlenz@849: yield event cmlenz@560: cmlenz@892: cmlenz@849: def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?>> parse_msg("See [1:Help].") cmlenz@738: [(0, 'See '), (1, 'Help'), (0, '.')] cmlenz@849: cmlenz@738: >>> parse_msg("See [1:our [2:Help] page] for details.") cmlenz@738: [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] cmlenz@849: cmlenz@738: >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") cmlenz@738: [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] cmlenz@849: cmlenz@738: >>> parse_msg("[1:] Bilder pro Seite anzeigen.") cmlenz@738: [(1, ''), (0, ' Bilder pro Seite anzeigen.')] cmlenz@849: cmlenz@738: :param string: the translated message string cmlenz@738: :return: a list of ``(order, string)`` tuples cmlenz@738: :rtype: `list` cmlenz@738: """ cmlenz@738: parts = [] cmlenz@738: stack = [0] cmlenz@738: while True: cmlenz@738: mo = regex.search(string) cmlenz@738: if not mo: cmlenz@738: break cmlenz@738: cmlenz@738: if mo.start() or stack[-1]: cmlenz@738: parts.append((stack[-1], string[:mo.start()])) cmlenz@738: string = string[mo.end():] cmlenz@738: cmlenz@738: orderno = mo.group(1) cmlenz@738: if orderno is not None: cmlenz@738: stack.append(int(orderno)) cmlenz@738: else: cmlenz@738: stack.pop() cmlenz@738: if not stack: cmlenz@738: break cmlenz@738: cmlenz@738: if string: cmlenz@738: parts.append((stack[-1], string)) cmlenz@738: cmlenz@738: return parts cmlenz@738: cmlenz@776: cmlenz@561: def extract_from_code(code, gettext_functions): cmlenz@561: """Extract strings from Python bytecode. cmlenz@561: cmlenz@561: >>> from genshi.template.eval import Expression cmlenz@561: >>> expr = Expression('_("Hello")') cmlenz@892: >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) cmlenz@561: [('_', u'Hello')] cmlenz@849: cmlenz@561: >>> expr = Expression('ngettext("You have %(num)s item", ' cmlenz@561: ... '"You have %(num)s items", num)') cmlenz@892: >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS)) cmlenz@565: [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] cmlenz@561: cmlenz@565: :param code: the `Code` object cmlenz@565: :type code: `genshi.template.eval.Code` cmlenz@561: :param gettext_functions: a sequence of function names cmlenz@576: :since: version 0.5 cmlenz@561: """ cmlenz@565: def _walk(node): cmlenz@794: if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \ cmlenz@794: and node.func.id in gettext_functions: cmlenz@565: strings = [] cmlenz@600: def _add(arg): hodgestar@933: if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode): hodgestar@933: strings.append(arg.s) hodgestar@933: elif isinstance(arg, _ast.Str): cmlenz@794: strings.append(unicode(arg.s, 'utf-8')) cmlenz@794: elif arg: cmlenz@565: strings.append(None) cmlenz@600: [_add(arg) for arg in node.args] cmlenz@794: _add(node.starargs) cmlenz@794: _add(node.kwargs) cmlenz@565: if len(strings) == 1: cmlenz@565: strings = strings[0] cmlenz@561: else: cmlenz@565: strings = tuple(strings) cmlenz@794: yield node.func.id, strings cmlenz@794: elif node._fields: cmlenz@794: children = [] cmlenz@794: for field in node._fields: cmlenz@794: child = getattr(node, field, None) cmlenz@794: if isinstance(child, list): cmlenz@794: for elem in child: cmlenz@794: children.append(elem) cmlenz@794: elif isinstance(child, _ast.AST): cmlenz@794: children.append(child) cmlenz@794: for child in children: cmlenz@565: for funcname, strings in _walk(child): cmlenz@565: yield funcname, strings cmlenz@565: return _walk(code.ast) cmlenz@561: cmlenz@776: cmlenz@528: def extract(fileobj, keywords, comment_tags, options): cmlenz@528: """Babel extraction method for Genshi templates. cmlenz@528: cmlenz@528: :param fileobj: the file-like object the messages should be extracted from cmlenz@528: :param keywords: a list of keywords (i.e. function names) that should be cmlenz@528: recognized as translation functions cmlenz@528: :param comment_tags: a list of translator tags to search for and include cmlenz@528: in the results cmlenz@528: :param options: a dictionary of additional options (optional) cmlenz@528: :return: an iterator over ``(lineno, funcname, message, comments)`` tuples cmlenz@528: :rtype: ``iterator`` cmlenz@528: """ cmlenz@528: template_class = options.get('template_class', MarkupTemplate) cmlenz@528: if isinstance(template_class, basestring): cmlenz@528: module, clsname = template_class.split(':', 1) cmlenz@528: template_class = getattr(__import__(module, {}, {}, [clsname]), clsname) cmlenz@528: encoding = options.get('encoding', None) cmlenz@528: cmlenz@596: extract_text = options.get('extract_text', True) cmlenz@596: if isinstance(extract_text, basestring): cmlenz@596: extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true') cmlenz@596: cmlenz@528: ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS) cmlenz@528: if isinstance(ignore_tags, basestring): cmlenz@528: ignore_tags = ignore_tags.split() cmlenz@528: ignore_tags = [QName(tag) for tag in ignore_tags] cmlenz@596: cmlenz@528: include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS) cmlenz@528: if isinstance(include_attrs, basestring): cmlenz@528: include_attrs = include_attrs.split() cmlenz@528: include_attrs = [QName(attr) for attr in include_attrs] cmlenz@528: cmlenz@528: tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), cmlenz@528: encoding=encoding) cmlenz@882: tmpl.loader = None cmlenz@849: cmlenz@596: translator = Translator(None, ignore_tags, include_attrs, extract_text) cmlenz@849: if hasattr(tmpl, 'add_directives'): cmlenz@849: tmpl.add_directives(Translator.NAMESPACE, translator) cmlenz@787: for message in translator.extract(tmpl.stream, gettext_functions=keywords): cmlenz@787: yield message