genshi/genshi-test: genshi/filters/i18n.py comparison

comparison genshi/filters/i18n.py @ 902:09cc3627654c experimental-inline

Sync `experimental/inline` branch with [source:trunk@1126].

author	cmlenz
date	Fri, 23 Apr 2010 21:08:26 +0000
parents	1837f39efd6f
children

comparison

equal deleted inserted replaced

-:de82830f8816
+:09cc3627654c
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2007 Edgewall Software
+# Copyright (C) 2007-2010 Edgewall Software
 # All rights reserved.
 #
 # This software is licensed as described in the file COPYING, which
 # you should have received as part of this distribution. The terms
 # are also available at http://genshi.edgewall.org/wiki/License.
 #
 # This software consists of voluntary contributions made by many
 # individuals. For the exact contribution history, see the revision
 # history and logs, available at http://genshi.edgewall.org/log/.
-"""Utilities for internationalization and localization of templates.
+"""Directives and utilities for internationalization and localization of
+templates.
 :since: version 0.4
+:note: Directives support added since version 0.6
 """
+try:
+any
+except NameError:
+from genshi.util import any
 from gettext import NullTranslations
+import os
 import re
 from types import FunctionType
-from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \
+from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
-END_NS, XML_NAMESPACE, _ensure
+XML_NAMESPACE, _ensure, StreamEventKind
 from genshi.template.eval import _ast
 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
-from genshi.template.directives import Directive
+from genshi.template.directives import Directive, StripDirective
 from genshi.template.markup import MarkupTemplate, EXEC
 __all__ = ['Translator', 'extract']
 __docformat__ = 'restructuredtext en'
 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
+MSGBUF = StreamEventKind('MSGBUF')
-class CommentDirective(Directive):
+SUB_START = StreamEventKind('SUB_START')
+SUB_END = StreamEventKind('SUB_END')
-__slots__ = []
+GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
+'ugettext', 'ungettext')
+class I18NDirective(Directive):
+"""Simple interface for i18n directives to support messages extraction."""
+def __call__(self, stream, directives, ctxt, **vars):
+return _apply_directives(stream, directives, ctxt, vars)
+class ExtractableI18NDirective(I18NDirective):
+"""Simple interface for directives to support messages extraction."""
+def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+search_text=True, comment_stack=None):
+raise NotImplementedError
+class CommentDirective(I18NDirective):
+"""Implementation of the ``i18n:comment`` template directive which adds
+translation comments.
+>>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <p i18n:comment="As in Foo Bar">Foo</p>
+... </html>''')
+>>> translator = Translator()
+>>> translator.setup(tmpl)
+>>> list(translator.extract(tmpl.stream))
+[(2, None, u'Foo', [u'As in Foo Bar'])]
+"""
+__slots__ = ['comment']
+def __init__(self, value, template=None, namespaces=None, lineno=-1,
+offset=-1):
+Directive.__init__(self, None, template, namespaces, lineno, offset)
+self.comment = value
+class MsgDirective(ExtractableI18NDirective):
+r"""Implementation of the ``i18n:msg`` directive which marks inner content
+as translatable. Consider the following examples:
+>>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <div i18n:msg="">
+...     <p>Foo</p>
+...     <p>Bar</p>
+...   </div>
+...   <p i18n:msg="">Foo <em>bar</em>!</p>
+... </html>''')
+>>> translator = Translator()
+>>> translator.setup(tmpl)
+>>> list(translator.extract(tmpl.stream))
+[(2, None, u'[1:Foo]\n    [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
+>>> print(tmpl.generate().render())
+<html>
+<div><p>Foo</p>
+<p>Bar</p></div>
+<p>Foo <em>bar</em>!</p>
+</html>
+>>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <div i18n:msg="fname, lname">
+...     <p>First Name: ${fname}</p>
+...     <p>Last Name: ${lname}</p>
+...   </div>
+...   <p i18n:msg="">Foo <em>bar</em>!</p>
+... </html>''')
+>>> translator.setup(tmpl)
+>>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+[(2, None, u'[1:First Name: %(fname)s]\n    [2:Last Name: %(lname)s]', []),
+(6, None, u'Foo [1:bar]!', [])]
+>>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <div i18n:msg="fname, lname">
+...     <p>First Name: ${fname}</p>
+...     <p>Last Name: ${lname}</p>
+...   </div>
+...   <p i18n:msg="">Foo <em>bar</em>!</p>
+... </html>''')
+>>> translator.setup(tmpl)
+>>> print(tmpl.generate(fname='John', lname='Doe').render())
+<html>
+<div><p>First Name: John</p>
+<p>Last Name: Doe</p></div>
+<p>Foo <em>bar</em>!</p>
+</html>
+Starting and ending white-space is stripped of to make it simpler for
+translators. Stripping it is not that important since it's on the html
+source, the rendered output will remain the same.
+"""
+__slots__ = ['params', 'lineno']
+def __init__(self, value, template=None, namespaces=None, lineno=-1,
+offset=-1):
+Directive.__init__(self, None, template, namespaces, lineno, offset)
+self.params = [param.strip() for param in value.split(',') if param]
+self.lineno = lineno
 @classmethod
 def attach(cls, template, stream, value, namespaces, pos):
-return None, stream
+if type(value) is dict:
+value = value.get('params', '').strip()
+return super(MsgDirective, cls).attach(template, stream, value.strip(),
-class MsgDirective(Directive):
+namespaces, pos)
+def __call__(self, stream, directives, ctxt, **vars):
+gettext = ctxt.get('_i18n.gettext')
+if ctxt.get('_i18n.domain'):
+dgettext = ctxt.get('_i18n.dgettext')
+assert hasattr(dgettext, '__call__'), \
+'No domain gettext function passed'
+gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
+def _generate():
+msgbuf = MessageBuffer(self)
+previous = stream.next()
+if previous[0] is START:
+yield previous
+else:
+msgbuf.append(*previous)
+previous = stream.next()
+for kind, data, pos in stream:
+msgbuf.append(*previous)
+previous = kind, data, pos
+if previous[0] is not END:
+msgbuf.append(*previous)
+previous = None
+for event in msgbuf.translate(gettext(msgbuf.format())):
+yield event
+if previous:
+yield previous
+return _apply_directives(_generate(), directives, ctxt, vars)
+def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+search_text=True, comment_stack=None):
+msgbuf = MessageBuffer(self)
+strip = False
+stream = iter(stream)
+previous = stream.next()
+if previous[0] is START:
+for message in translator._extract_attrs(previous,
+gettext_functions,
+search_text=search_text):
+yield message
+previous = stream.next()
+strip = True
+for event in stream:
+if event[0] is START:
+for message in translator._extract_attrs(event,
+gettext_functions,
+search_text=search_text):
+yield message
+msgbuf.append(*previous)
+previous = event
+if not strip:
+msgbuf.append(*previous)
+yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
+class ChooseBranchDirective(I18NDirective):
 __slots__ = ['params']
-def __init__(self, value, template, hints=None, namespaces=None,
-lineno=-1, offset=-1):
-Directive.__init__(self, None, template, namespaces, lineno, offset)
-self.params = [name.strip() for name in value.split(',')]
 def __call__(self, stream, directives, ctxt, **vars):
-msgbuf = MessageBuffer(self.params)
+self.params = ctxt.get('_i18n.choose.params', [])[:]
+msgbuf = MessageBuffer(self)
-stream = iter(stream)
+stream = _apply_directives(stream, directives, ctxt, vars)
-yield stream.next() # the outer start tag
 previous = stream.next()
+if previous[0] is START:
+yield previous
+else:
+msgbuf.append(*previous)
+try:
+previous = stream.next()
+except StopIteration:
+# For example <i18n:singular> or <i18n:plural> directives
+yield MSGBUF, (), -1 # the place holder for msgbuf output
+ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+return
 for event in stream:
 msgbuf.append(*previous)
 previous = event
+yield MSGBUF, (), -1 # the place holder for msgbuf output
-gettext = ctxt.get('_i18n.gettext')
-for event in msgbuf.translate(gettext(msgbuf.format())):
+if previous[0] is END:
+yield previous # the outer end tag
+else:
+msgbuf.append(*previous)
+ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
+def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+search_text=True, comment_stack=None, msgbuf=None):
+stream = iter(stream)
+previous = stream.next()
+if previous[0] is START:
+# skip the enclosing element
+for message in translator._extract_attrs(previous,
+gettext_functions,
+search_text=search_text):
+yield message
+previous = stream.next()
+for event in stream:
+if previous[0] is START:
+for message in translator._extract_attrs(previous,
+gettext_functions,
+search_text=search_text):
+yield message
+msgbuf.append(*previous)
+previous = event
+if previous[0] is not END:
+msgbuf.append(*previous)
+class SingularDirective(ChooseBranchDirective):
+"""Implementation of the ``i18n:singular`` directive to be used with the
+``i18n:choose`` directive."""
+class PluralDirective(ChooseBranchDirective):
+"""Implementation of the ``i18n:plural`` directive to be used with the
+``i18n:choose`` directive."""
+class ChooseDirective(ExtractableI18NDirective):
+"""Implementation of the ``i18n:choose`` directive which provides plural
+internationalisation of strings.
+This directive requires at least one parameter, the one which evaluates to
+an integer which will allow to choose the plural/singular form. If you also
+have expressions inside the singular and plural version of the string you
+also need to pass a name for those parameters. Consider the following
+examples:
+>>> tmpl = MarkupTemplate('''\
+<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <div i18n:choose="num; num">
+...     <p i18n:singular="">There is $num coin</p>
+...     <p i18n:plural="">There are $num coins</p>
+...   </div>
+... </html>''')
+>>> translator = Translator()
+>>> translator.setup(tmpl)
+>>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+[(2, 'ngettext', (u'There is %(num)s coin',
+u'There are %(num)s coins'), [])]
+>>> tmpl = MarkupTemplate('''\
+<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <div i18n:choose="num; num">
+...     <p i18n:singular="">There is $num coin</p>
+...     <p i18n:plural="">There are $num coins</p>
+...   </div>
+... </html>''')
+>>> translator.setup(tmpl)
+>>> print(tmpl.generate(num=1).render())
+<html>
+<div>
+<p>There is 1 coin</p>
+</div>
+</html>
+>>> print(tmpl.generate(num=2).render())
+<html>
+<div>
+<p>There are 2 coins</p>
+</div>
+</html>
+When used as a element and not as an attribute:
+>>> tmpl = MarkupTemplate('''\
+<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <i18n:choose numeral="num" params="num">
+...     <p i18n:singular="">There is $num coin</p>
+...     <p i18n:plural="">There are $num coins</p>
+...   </i18n:choose>
+... </html>''')
+>>> translator.setup(tmpl)
+>>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
+[(2, 'ngettext', (u'There is %(num)s coin',
+u'There are %(num)s coins'), [])]
+"""
+__slots__ = ['numeral', 'params', 'lineno']
+def __init__(self, value, template=None, namespaces=None, lineno=-1,
+offset=-1):
+Directive.__init__(self, None, template, namespaces, lineno, offset)
+params = [v.strip() for v in value.split(';')]
+self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
+self.params = params and [name.strip() for name in
+params[0].split(',') if name] or []
+self.lineno = lineno
+@classmethod
+def attach(cls, template, stream, value, namespaces, pos):
+if type(value) is dict:
+numeral = value.get('numeral', '').strip()
+assert numeral is not '', "at least pass the numeral param"
+params = [v.strip() for v in value.get('params', '').split(',')]
+value = '%s; ' % numeral + ', '.join(params)
+return super(ChooseDirective, cls).attach(template, stream, value,
+namespaces, pos)
+def __call__(self, stream, directives, ctxt, **vars):
+ctxt.push({'_i18n.choose.params': self.params,
+'_i18n.choose.singular': None,
+'_i18n.choose.plural': None})
+ngettext = ctxt.get('_i18n.ngettext')
+assert hasattr(ngettext, '__call__'), 'No ngettext function available'
+dngettext = ctxt.get('_i18n.dngettext')
+if not dngettext:
+dngettext = lambda d, s, p, n: ngettext(s, p, n)
+new_stream = []
+singular_stream = None
+singular_msgbuf = None
+plural_stream = None
+plural_msgbuf = None
+numeral = self.numeral.evaluate(ctxt)
+is_plural = self._is_plural(numeral, ngettext)
+for event in stream:
+if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
+for d in event[1][0]):
+subdirectives, substream = event[1]
+if isinstance(subdirectives[0], SingularDirective):
+singular_stream = list(_apply_directives(substream,
+subdirectives,
+ctxt, vars))
+new_stream.append((MSGBUF, None, (None, -1, -1)))
+elif isinstance(subdirectives[0], PluralDirective):
+if is_plural:
+plural_stream = list(_apply_directives(substream,
+subdirectives,
+ctxt, vars))
+else:
+new_stream.append(event)
+if ctxt.get('_i18n.domain'):
+ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
+s, p, n)
+singular_msgbuf = ctxt.get('_i18n.choose.singular')
+if is_plural:
+plural_msgbuf = ctxt.get('_i18n.choose.plural')
+msgbuf, choice = plural_msgbuf, plural_stream
+else:
+msgbuf, choice = singular_msgbuf, singular_stream
+plural_msgbuf = MessageBuffer(self)
+for kind, data, pos in new_stream:
+if kind is MSGBUF:
+for event in choice:
+if event[0] is MSGBUF:
+translation = ngettext(singular_msgbuf.format(),
+plural_msgbuf.format(),
+numeral)
+for subevent in msgbuf.translate(translation):
+yield subevent
+else:
+yield event
+else:
+yield kind, data, pos
+ctxt.pop()
+def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
+search_text=True, comment_stack=None):
+strip = False
+stream = iter(stream)
+previous = stream.next()
+if previous[0] is START:
+# skip the enclosing element
+for message in translator._extract_attrs(previous,
+gettext_functions,
+search_text=search_text):
+yield message
+previous = stream.next()
+strip = True
+singular_msgbuf = MessageBuffer(self)
+plural_msgbuf = MessageBuffer(self)
+for event in stream:
+if previous[0] is SUB:
+directives, substream = previous[1]
+for directive in directives:
+if isinstance(directive, SingularDirective):
+for message in directive.extract(translator,
+substream, gettext_functions, search_text,
+comment_stack, msgbuf=singular_msgbuf):
+yield message
+elif isinstance(directive, PluralDirective):
+for message in directive.extract(translator,
+substream, gettext_functions, search_text,
+comment_stack, msgbuf=plural_msgbuf):
+yield message
+elif not isinstance(directive, StripDirective):
+singular_msgbuf.append(*previous)
+plural_msgbuf.append(*previous)
+else:
+if previous[0] is START:
+for message in translator._extract_attrs(previous,
+gettext_functions,
+search_text):
+yield message
+singular_msgbuf.append(*previous)
+plural_msgbuf.append(*previous)
+previous = event
+if not strip:
+singular_msgbuf.append(*previous)
+plural_msgbuf.append(*previous)
+yield self.lineno, 'ngettext', \
+(singular_msgbuf.format(), plural_msgbuf.format()), \
+comment_stack[-1:]
+def _is_plural(self, numeral, ngettext):
+# XXX: should we test which form was chosen like this!?!?!?
+# There should be no match in any catalogue for these singular and
+# plural test strings
+singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
+plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
+return ngettext(singular, plural, numeral) == plural
+class DomainDirective(I18NDirective):
+"""Implementation of the ``i18n:domain`` directive which allows choosing
+another i18n domain(catalog) to translate from.
+>>> from genshi.filters.tests.i18n import DummyTranslations
+>>> tmpl = MarkupTemplate('''\
+<html xmlns:i18n="http://genshi.edgewall.org/i18n">
+...   <p i18n:msg="">Bar</p>
+...   <div i18n:domain="foo">
+...     <p i18n:msg="">FooBar</p>
+...     <p>Bar</p>
+...     <p i18n:domain="bar" i18n:msg="">Bar</p>
+...     <p i18n:domain="">Bar</p>
+...   </div>
+...   <p>Bar</p>
+... </html>''')
+>>> translations = DummyTranslations({'Bar': 'Voh'})
+>>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
+>>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
+>>> translator = Translator(translations)
+>>> translator.setup(tmpl)
+>>> print(tmpl.generate().render())
+<html>
+<p>Voh</p>
+<div>
+<p>BarFoo</p>
+<p>foo_Bar</p>
+<p>bar_Bar</p>
+<p>Voh</p>
+</div>
+<p>Voh</p>
+</html>
+"""
+__slots__ = ['domain']
+def __init__(self, value, template=None, namespaces=None, lineno=-1,
+offset=-1):
+Directive.__init__(self, None, template, namespaces, lineno, offset)
+self.domain = value and value.strip() or '__DEFAULT__'
+@classmethod
+def attach(cls, template, stream, value, namespaces, pos):
+if type(value) is dict:
+value = value.get('name')
+return super(DomainDirective, cls).attach(template, stream, value,
+namespaces, pos)
+def __call__(self, stream, directives, ctxt, **vars):
+ctxt.push({'_i18n.domain': self.domain})
+for event in _apply_directives(stream, directives, ctxt, vars):
 yield event
+ctxt.pop()
-yield previous # the outer end tag
 class Translator(DirectiveFactory):
 """Can extract and translate localizable strings from markup streams and
 templates.
-For example, assume the followng template:
+For example, assume the following template:
->>> from genshi.template import MarkupTemplate
->>>
 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
 ...   <head>
 ...     <title>Example</title>
 ...   </head>
 ...   <body>
 >>> def pseudo_gettext(string):
 ...     return {
 ...         'Example': 'Beispiel',
 ...         'Hello, %(name)s': 'Hallo, %(name)s'
 ...     }[string]
->>>
 >>> translator = Translator(pseudo_gettext)
 Next, the translator needs to be prepended to any already defined filters
 on the template:
 >>> tmpl.filters.insert(0, translator)
 When generating the template output, our hard-coded translations should be
 applied as expected:
->>> print tmpl.generate(username='Hans', _=pseudo_gettext)
+>>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
 <html>
 <head>
 <title>Beispiel</title>
 </head>
 <body>
 <h1>Beispiel</h1>
 <p>Hallo, Hans</p>
 </body>
 </html>
 Note that elements defining ``xml:lang`` attributes that do not contain
 variable expressions are ignored by this filter. That can be used to
 exclude specific parts of a template from being extracted and translated.
 """
 directives = [
+('domain', DomainDirective),
 ('comment', CommentDirective),
-('msg', MsgDirective)
+('msg', MsgDirective),
+('choose', ChooseDirective),
+('singular', SingularDirective),
+('plural', PluralDirective)
 ]
 IGNORE_TAGS = frozenset([
 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
 ])
-INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby',
+INCLUDE_ATTRS = frozenset([
-'summary', 'title'])
+'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
+])
 NAMESPACE = I18N_NAMESPACE
 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
 include_attrs=INCLUDE_ATTRS, extract_text=True):
 """Initialize the translator.
 :param ignore_tags: a set of tag names that should not be localized
 :param include_attrs: a set of attribute names should be localized
 :param extract_text: whether the content of text nodes should be
 extracted, or only text in explicit ``gettext``
 function calls
 :note: Changed in 0.6: the `translate` parameter can now be either
 a ``gettext``-style function, or an object compatible with the
 ``NullTransalations`` or ``GNUTranslations`` interface
 """
 self.translate = translate
 self.ignore_tags = ignore_tags
 self.include_attrs = include_attrs
 self.extract_text = extract_text
-def __call__(self, stream, ctxt=None, search_text=True):
+def __call__(self, stream, ctxt=None, translate_text=True,
+translate_attrs=True):
 """Translate any localizable strings in the given stream.
 This function shouldn't be called directly. Instead, an instance of
 the `Translator` class should be registered as a filter with the
 `Template` or the `TemplateLoader`, or applied as a regular stream
 filter. If used as a template filter, it should be inserted in front of
 all the default filters.
 :param stream: the markup event stream
 :param ctxt: the template context (not used)
-:param search_text: whether text nodes should be translated (used
+:param translate_text: whether text nodes should be translated (used
 internally)
+:param translate_attrs: whether attribute values should be translated
+(used internally)
 :return: the localized stream
 """
 ignore_tags = self.ignore_tags
 include_attrs = self.include_attrs
 skip = 0
 xml_lang = XML_NAMESPACE['lang']
+if not self.extract_text:
+translate_text = False
+translate_attrs = False
 if type(self.translate) is FunctionType:
 gettext = self.translate
+if ctxt:
+ctxt['_i18n.gettext'] = gettext
 else:
 gettext = self.translate.ugettext
-if ctxt:
+ngettext = self.translate.ungettext
-ctxt['_i18n.gettext'] = gettext
+try:
+dgettext = self.translate.dugettext
-extract_text = self.extract_text
+dngettext = self.translate.dungettext
-if not extract_text:
+except AttributeError:
-search_text = False
+dgettext = lambda _, y: gettext(y)
+dngettext = lambda _, s, p, n: ngettext(s, p, n)
+if ctxt:
+ctxt['_i18n.gettext'] = gettext
+ctxt['_i18n.ngettext'] = ngettext
+ctxt['_i18n.dgettext'] = dgettext
+ctxt['_i18n.dngettext'] = dngettext
+if ctxt and ctxt.get('_i18n.domain'):
+gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
 for kind, data, pos in stream:
 # skip chunks that should not be localized
 if skip:
 yield kind, data, pos
 continue
 new_attrs = []
 changed = False
 for name, value in attrs:
 newval = value
-if extract_text and isinstance(value, basestring):
+if isinstance(value, basestring):
-if name in include_attrs:
+if translate_attrs and name in include_attrs:
 newval = gettext(value)
 else:
-newval = list(self(_ensure(value), ctxt,
+newval = list(
-search_text=False)
+self(_ensure(value), ctxt, translate_text=False)
 )
 if newval != value:
 value = newval
 changed = True
 new_attrs.append((name, value))
 if changed:
 attrs = Attrs(new_attrs)
 yield kind, (tag, attrs), pos
-elif search_text and kind is TEXT:
+elif translate_text and kind is TEXT:
 text = data.strip()
 if text:
 data = data.replace(text, unicode(gettext(text)))
 yield kind, data, pos
 elif kind is SUB:
 directives, substream = data
-# If this is an i18n:msg directive, no need to translate text
+current_domain = None
+for idx, directive in enumerate(directives):
+# Organize directives to make everything work
+# FIXME: There's got to be a better way to do this!
+if isinstance(directive, DomainDirective):
+# Grab current domain and update context
+current_domain = directive.domain
+ctxt.push({'_i18n.domain': current_domain})
+# Put domain directive as the first one in order to
+# update context before any other directives evaluation
+directives.insert(0, directives.pop(idx))
+# If this is an i18n directive, no need to translate text
 # nodes here
-is_msg = filter(None, [isinstance(d, MsgDirective)
+is_i18n_directive = any([
-for d in directives])
+isinstance(d, ExtractableI18NDirective)
+for d in directives
+])
 substream = list(self(substream, ctxt,
-search_text=not is_msg))
+translate_text=not is_i18n_directive,
+translate_attrs=translate_attrs))
 yield kind, (directives, substream), pos
+if current_domain:
+ctxt.pop()
 else:
 yield kind, data, pos
-GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
-'ugettext', 'ungettext')
 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
-search_text=True, msgbuf=None):
+search_text=True, comment_stack=None):
 """Extract localizable strings from the given template stream.
 For every string found, this function yields a ``(lineno, function,
 message, comments)`` tuple, where:
 of ``unicode`` objects for functions with multiple string
 arguments).
 *  ``comments`` is a list of comments related to the message, extracted
 from ``i18n:comment`` attributes found in the markup
->>> from genshi.template import MarkupTemplate
->>>
 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
 ...   <head>
 ...     <title>Example</title>
 ...   </head>
 ...   <body>
 ...     <h1>Example</h1>
 ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
 ...     <p>${ngettext("You have %d item", "You have %d items", num)}</p>
 ...   </body>
 ... </html>''', filename='example.html')
->>>
 >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
-...    print "%d, %r, %r" % (line, func, msg)
+...    print('%d, %r, %r' % (line, func, msg))
 3, None, u'Example'
 6, None, u'Example'
 7, '_', u'Hello, %(name)s'
 8, 'ngettext', (u'You have %d item', u'You have %d items', None)
 extracted (used internally)
 :note: Changed in 0.4.1: For a function with multiple string arguments
 (such as ``ngettext``), a single item with a tuple of strings is
 yielded, instead an item for each string argument.
-:note: Changed in 0.6: The returned tuples now include a 4th element,
+:note: Changed in 0.6: The returned tuples now include a fourth
-which is a list of comments for the translator
+element, which is a list of comments for the translator.
 """
 if not self.extract_text:
 search_text = False
+if comment_stack is None:
+comment_stack = []
 skip = 0
-i18n_comment = I18N_NAMESPACE['comment']
-i18n_msg = I18N_NAMESPACE['msg']
 xml_lang = XML_NAMESPACE['lang']
 for kind, data, pos in stream:
 if skip:
 if kind is START:
 skip += 1
 if kind is END:
 skip -= 1
 if kind is START and not skip:
 tag, attrs = data
 if tag in self.ignore_tags or \
 isinstance(attrs.get(xml_lang), basestring):
 skip += 1
 continue
-for name, value in attrs:
+for message in self._extract_attrs((kind, data, pos),
-if search_text and isinstance(value, basestring):
+gettext_functions,
-if name in self.include_attrs:
+search_text=search_text):
-text = value.strip()
+yield message
-if text:
-yield pos[1], None, text, []
-else:
-for lineno, funcname, text, comments in self.extract(
-_ensure(value), gettext_functions,
-search_text=False):
-yield lineno, funcname, text, comments
-if msgbuf:
-msgbuf.append(kind, data, pos)
-else:
-msg_params = attrs.get(i18n_msg)
-if msg_params is not None:
-if type(msg_params) is list: # event tuple
-msg_params = msg_params[0][1]
-msgbuf = MessageBuffer(
-msg_params, attrs.get(i18n_comment), pos[1]
-)
 elif not skip and search_text and kind is TEXT:
-if not msgbuf:
+text = data.strip()
-text = data.strip()
+if text and [ch for ch in text if ch.isalpha()]:
-if text and filter(None, [ch.isalpha() for ch in text]):
+yield pos[1], None, text, comment_stack[-1:]
-yield pos[1], None, text, []
-else:
-msgbuf.append(kind, data, pos)
-elif not skip and msgbuf and kind is END:
-msgbuf.append(kind, data, pos)
-if not msgbuf.depth:
-yield msgbuf.lineno, None, msgbuf.format(), \
-filter(None, [msgbuf.comment])
-msgbuf = None
 elif kind is EXPR or kind is EXEC:
-if msgbuf:
-msgbuf.append(kind, data, pos)
 for funcname, strings in extract_from_code(data,
 gettext_functions):
+# XXX: Do we need to grab i18n:comment from comment_stack ???
 yield pos[1], funcname, strings, []
 elif kind is SUB:
-subkind, substream = data
+directives, substream = data
-messages = self.extract(substream, gettext_functions,
+in_comment = False
-search_text=search_text and not skip,
-msgbuf=msgbuf)
+for idx, directive in enumerate(directives):
-for lineno, funcname, text, comments in messages:
+# Do a first loop to see if there's a comment directive
-yield lineno, funcname, text, comments
+# If there is update context and pop it from directives
+if isinstance(directive, CommentDirective):
+in_comment = True
+comment_stack.append(directive.comment)
+if len(directives) == 1:
+# in case we're in the presence of something like:
+# <p i18n:comment="foo">Foo</p>
+for message in self.extract(
+substream, gettext_functions,
+search_text=search_text and not skip,
+comment_stack=comment_stack):
+yield message
+directives.pop(idx)
+elif not isinstance(directive, I18NDirective):
+# Remove all other non i18n directives from the process
+directives.pop(idx)
+if not directives and not in_comment:
+# Extract content if there's no directives because
+# strip was pop'ed and not because comment was pop'ed.
+# Extraction in this case has been taken care of.
+for message in self.extract(
+substream, gettext_functions,
+search_text=search_text and not skip):
+yield message
+for directive in directives:
+if isinstance(directive, ExtractableI18NDirective):
+for message in directive.extract(self,
+substream, gettext_functions,
+search_text=search_text and not skip,
+comment_stack=comment_stack):
+yield message
+else:
+for message in self.extract(
+substream, gettext_functions,
+search_text=search_text and not skip,
+comment_stack=comment_stack):
+yield message
+if in_comment:
+comment_stack.pop()
+def get_directive_index(self, dir_cls):
+total = len(self._dir_order)
+if dir_cls in self._dir_order:
+return self._dir_order.index(dir_cls) - total
+return total
+def setup(self, template):
+"""Convenience function to register the `Translator` filter and the
+related directives with the given template.
+:param template: a `Template` instance
+"""
+template.filters.insert(0, self)
+if hasattr(template, 'add_directives'):
+template.add_directives(Translator.NAMESPACE, self)
+def _extract_attrs(self, event, gettext_functions, search_text):
+for name, value in event[1][1]:
+if search_text and isinstance(value, basestring):
+if name in self.include_attrs:
+text = value.strip()
+if text:
+yield event[2][1], None, text, []
+else:
+for message in self.extract(_ensure(value), gettext_functions,
+search_text=False):
+yield message
 class MessageBuffer(object):
 """Helper class for managing internationalized mixed content.
 :since: version 0.5
 """
-def __init__(self, params=u'', comment=None, lineno=-1):
+def __init__(self, directive=None):
 """Initialize the message buffer.
-:param params: comma-separated list of parameter names
+:param directive: the directive owning the buffer
-:type params: `basestring`
+:type directive: I18NDirective
-:param lineno: the line number on which the first stream event
-belonging to the message was found
 """
-if isinstance(params, basestring):
+# params list needs to be copied so that directives can be evaluated
-params = [name.strip() for name in params.split(',')]
+# more than once
-self.params = params
+self.orig_params = self.params = directive.params[:]
-self.comment = comment
+self.directive = directive
-self.lineno = lineno
 self.string = []
 self.events = {}
 self.values = {}
 self.depth = 1
 self.order = 1
 self.stack = [0]
+self.subdirectives = {}
 def append(self, kind, data, pos):
 """Append a stream event to the buffer.
 :param kind: the stream event kind
 :param data: the event data
 :param pos: the position of the event in the source
 """
-if kind is TEXT:
+if kind is SUB:
+# The order needs to be +1 because a new START kind event will
+# happen and we we need to wrap those events into our custom kind(s)
+order = self.stack[-1] + 1
+subdirectives, substream = data
+# Store the directives that should be applied after translation
+self.subdirectives.setdefault(order, []).extend(subdirectives)
+self.events.setdefault(order, []).append((SUB_START, None, pos))
+for skind, sdata, spos in substream:
+self.append(skind, sdata, spos)
+self.events.setdefault(order, []).append((SUB_END, None, pos))
+elif kind is TEXT:
+if '[' in data or ']' in data:
+# Quote [ and ] if it ain't us adding it, ie, if the user is
+# using those chars in his templates, escape them
+data = data.replace('[', '\[').replace(']', '\]')
 self.string.append(data)
-self.events.setdefault(self.stack[-1], []).append(None)
+self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
 elif kind is EXPR:
-param = self.params.pop(0)
+if self.params:
+param = self.params.pop(0)
+else:
+params = ', '.join(['"%s"' % p for p in self.orig_params if p])
+if params:
+params = "(%s)" % params
+raise IndexError("%d parameters%s given to 'i18n:%s' but "
+"%d or more expressions used in '%s', line %s"
+% (len(self.orig_params), params,
+self.directive.tagname,
+len(self.orig_params) + 1,
+os.path.basename(pos[0] or
+'In-memory Template'),
+pos[1]))
 self.string.append('%%(%s)s' % param)
-self.events.setdefault(self.stack[-1], []).append(None)
+self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
 self.values[param] = (kind, data, pos)
 else:
 if kind is START:
-self.string.append(u'[%d:' % self.order)
+self.string.append('[%d:' % self.order)
-self.events.setdefault(self.order, []).append((kind, data, pos))
 self.stack.append(self.order)
+self.events.setdefault(self.stack[-1],
+[]).append((kind, data, pos))
 self.depth += 1
 self.order += 1
 elif kind is END:
 self.depth -= 1
 if self.depth:
 self.events[self.stack[-1]].append((kind, data, pos))
-self.string.append(u']')
+self.string.append(']')
 self.stack.pop()
 def format(self):
 """Return a message identifier representing the content in the
 buffer.
 """
-return u''.join(self.string).strip()
+return ''.join(self.string).strip()
 def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
 """Interpolate the given message translation with the events in the
 buffer and return the translated stream.
 :param string: the translated message string
 """
+substream = None
+def yield_parts(string):
+for idx, part in enumerate(regex.split(string)):
+if idx % 2:
+yield self.values[part]
+elif part:
+yield (TEXT,
+part.replace('\[', '[').replace('\]', ']'),
+(None, -1, -1)
+)
 parts = parse_msg(string)
+parts_counter = {}
 for order, string in parts:
-events = self.events[order]
+parts_counter.setdefault(order, []).append(None)
-while events:
-event = events.pop(0)
+while parts:
-if event:
+order, string = parts.pop(0)
-yield event
+if len(parts_counter[order]) == 1:
+events = self.events[order]
+else:
+events = [self.events[order].pop(0)]
+parts_counter[order].pop()
+for event in events:
+if event[0] is SUB_START:
+substream = []
+elif event[0] is SUB_END:
+# Yield a substream which might have directives to be
+# applied to it (after translation events)
+yield SUB, (self.subdirectives[order], substream), event[2]
+substream = None
+elif event[0] is TEXT:
+if string:
+for part in yield_parts(string):
+if substream is not None:
+substream.append(part)
+else:
+yield part
+# String handled, reset it
+string = None
+elif event[0] is START:
+if substream is not None:
+substream.append(event)
+else:
+yield event
+if string:
+for part in yield_parts(string):
+if substream is not None:
+substream.append(part)
+else:
+yield part
+# String handled, reset it
+string = None
+elif event[0] is END:
+if string:
+for part in yield_parts(string):
+if substream is not None:
+substream.append(part)
+else:
+yield part
+# String handled, reset it
+string = None
+if substream is not None:
+substream.append(event)
+else:
+yield event
+elif event[0] is EXPR:
+# These are handled on the strings itself
+continue
 else:
-if not string:
+if string:
-break
+for part in yield_parts(string):
-for idx, part in enumerate(regex.split(string)):
+if substream is not None:
-if idx % 2:
+substream.append(part)
-yield self.values[part]
+else:
-elif part:
+yield part
-yield TEXT, part, (None, -1, -1)
+# String handled, reset it
-if not self.events[order] or not self.events[order][0]:
+string = None
-break
+if substream is not None:
+substream.append(event)
+else:
-def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')):
+yield event
+def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
 """Parse a translated message using Genshi mixed content message
 formatting.
 >>> parse_msg("See [1:Help].")
 [(0, 'See '), (1, 'Help'), (0, '.')]
 >>> parse_msg("See [1:our [2:Help] page] for details.")
 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
 >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
 [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
 :param string: the translated message string
 :return: a list of ``(order, string)`` tuples
 :rtype: `list`
 """
 parts = []
 def extract_from_code(code, gettext_functions):
 """Extract strings from Python bytecode.
 >>> from genshi.template.eval import Expression
 >>> expr = Expression('_("Hello")')
->>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
+>>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
 [('_', u'Hello')]
 >>> expr = Expression('ngettext("You have %(num)s item", '
 ...                            '"You have %(num)s items", num)')
->>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
+>>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
 :param code: the `Code` object
 :type code: `genshi.template.eval.Code`
 :param gettext_functions: a sequence of function names
 include_attrs = include_attrs.split()
 include_attrs = [QName(attr) for attr in include_attrs]
 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
 encoding=encoding)
+tmpl.loader = None
 translator = Translator(None, ignore_tags, include_attrs, extract_text)
+if hasattr(tmpl, 'add_directives'):
+tmpl.add_directives(Translator.NAMESPACE, translator)
 for message in translator.extract(tmpl.stream, gettext_functions=keywords):
 yield message

Mercurial > genshi > genshi-test

comparison genshi/filters/i18n.py @ 902:09cc3627654c experimental-inline