genshi/mirror: genshi/filters/i18n.py comparison

comparison genshi/filters/i18n.py @ 450:94601511cd68 trunk

Extend the I18n extraction to also yield function names if applicable.

author	cmlenz
date	Fri, 13 Apr 2007 20:58:48 +0000
parents	1154f2aadb6c
children	4b6dc4978691

comparison

equal deleted inserted replaced

-:b07e65580175
+:94601511cd68
 from sets import ImmutableSet as frozenset
 from gettext import gettext
 from opcode import opmap
 import re
-from genshi.core import Attrs, START, END, TEXT
+from genshi.core import Attrs, Namespace, QName, START, END, TEXT
 from genshi.template.base import Template, EXPR, SUB
 from genshi.template.markup import EXEC
 _LOAD_NAME = chr(opmap['LOAD_NAME'])
 _LOAD_CONST = chr(opmap['LOAD_CONST'])
 _BINARY_ADD = chr(opmap['BINARY_ADD'])
 class Translator(object):
 """Can extract and translate localizable strings from markup streams and
-templates
+templates.
 For example, assume the followng template:
 >>> from genshi.template import MarkupTemplate
+>>>
 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
 ...   <head>
 ...     <title>Example</title>
 ...   </head>
 ...   <body>
 >>> def pseudo_gettext(string):
 ...     return {
 ...         'Example': 'Beispiel',
 ...         'Hello, %(name)s': 'Hallo, %(name)s'
 ...     }[string]
+>>>
 >>> translator = Translator(pseudo_gettext)
 Next, the translator needs to be prepended to any already defined filters
 on the template:
 <p>Hallo, Hans</p>
 </body>
 </html>
 """
-IGNORE_TAGS = frozenset(['script', 'style'])
+IGNORE_TAGS = frozenset([
+QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
+QName('style'), QName('http://www.w3.org/1999/xhtml}style')
+])
 INCLUDE_ATTRS = frozenset(['title', 'alt'])
 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS,
 include_attrs=INCLUDE_ATTRS):
 """Initialize the translator.
 :param translate: the translation function, for example ``gettext`` or
 ``ugettext``.
 :param ignore_tags: a set of tag names that should not be localized
 :param include_attrs: a set of attribute names should be localized
 """
-self.gettext = translate
+self.translate = translate
 self.ignore_tags = ignore_tags
 self.include_attrs = include_attrs
 def __call__(self, stream, ctxt=None, search_text=True):
 """Translate any localizable strings in the given stream.
 :param ctxt: the template context (not used)
 :param search_text: whether text nodes should be translated (used
 internally)
 :return: the localized stream
 """
+ignore_tags = self.ignore_tags
+include_attrs = self.include_attrs
+translate = self.translate
 skip = 0
 for kind, data, pos in stream:
 # skip chunks that should not be localized
 if skip:
 if kind is START:
 tag, attrs = data
-tag = tag.localname
+if tag in ignore_tags:
-if tag.localname in self.ignore_tags:
 skip += 1
 elif kind is END:
-if tag.localname in self.ignore_tags:
+if tag in ignore_tags:
 skip -= 1
 yield kind, data, pos
 continue
 # handle different events that can be localized
 if kind is START:
 tag, attrs = data
-if tag.localname in self.ignore_tags:
+if tag in ignore_tags:
 skip += 1
 yield kind, data, pos
 continue
 new_attrs = list(attrs)
 for name, value in attrs:
 if name in include_attrs:
 if isinstance(value, basestring):
 newval = ugettext(value)
 else:
-newval = list(self(value, ctxt, search_text=name in self.include_attrs))
+newval = list(self(value, ctxt, search_text=name in include_attrs))
 if newval != value:
 value = new_val
 changed = True
 new_attrs.append((name, value))
 if changed:
 yield kind, (tag, attrs), pos
 elif kind is TEXT:
 text = data.strip()
 if text:
-data = data.replace(text, self.gettext(text))
+data = data.replace(text, translate(text))
 yield kind, data, pos
 elif kind is SUB:
 subkind, substream = data
 new_substream = list(self(substream, ctxt))
 yield kind, (subkind, new_substream), pos
 else:
 yield kind, data, pos
-def extract(self, stream, gettext_functions=('_', 'gettext', 'ngettext')):
+GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
+'ugettext', 'ungettext')
+def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS):
 """Extract localizable strings from the given template stream.
-For every string found, this function yields a ``(lineno, message)``
+For every string found, this function yields a ``(lineno, function,
-tuple.
+message)`` tuple, where:
-:param stream: the event stream to extract strings from; can be a
+* ``lineno`` is the number of the line on which the string was found,
-regular stream or a template stream
+* ``function`` is the name of the ``gettext`` function used (if the
+string was extracted from embedded Python code), and
+*  ``message`` is the string itself (a ``unicode`` object).
 >>> from genshi.template import MarkupTemplate
+>>>
 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
 ...   <head>
 ...     <title>Example</title>
 ...   </head>
 ...   <body>
 ...     <h1>Example</h1>
 ...     <p>${_("Hello, %(name)s") % dict(name=username)}</p>
 ...   </body>
 ... </html>''', filename='example.html')
->>> for lineno, message in Translator().extract(tmpl.stream):
+>>>
-...    print "Line %d: %r" % (lineno, message)
+>>> for lineno, funcname, message in Translator().extract(tmpl.stream):
-Line 3: u'Example'
+...    print "%d, %r, %r" % (lineno, funcname, message)
-Line 6: u'Example'
+3, None, u'Example'
-Line 7: u'Hello, %(name)s'
+6, None, u'Example'
+7, '_', u'Hello, %(name)s'
+:param stream: the event stream to extract strings from; can be a
+regular stream or a template stream
+:param gettext_functions: a sequence of function names that should be
+treated as gettext-style localization
+functions
 """
 tagname = None
 skip = 0
 for kind, data, pos in stream:
 if skip:
 if kind is START:
 tag, attrs = data
-if tag.localname in self.ignore_tags:
+if tag in self.ignore_tags:
 skip += 1
 if kind is END:
 tag = data
-if tag.localname in self.ignore_tags:
+if tag in self.ignore_tags:
 skip -= 1
 continue
 if kind is START:
 tag, attrs = data
-if tag.localname in self.ignore_tags:
+if tag in self.ignore_tags:
 skip += 1
 continue
 for name, value in attrs:
 if name in self.include_attrs:
 if isinstance(value, basestring):
 text = value.strip()
 if text:
-yield pos[1], text
+yield pos[1], None, text
 else:
-for lineno, text in harvest(value):
+for lineno, funcname, text in harvest(value):
-yield lineno, text
+yield lineno, funcname, text
 elif kind is TEXT:
 text = data.strip()
 if text and filter(None, [ch.isalpha() for ch in text]):
-yield pos[1], text
+yield pos[1], None, text
 elif kind is EXPR or kind is EXEC:
 consts = dict([(n, chr(i) + '\x00') for i, n in
 enumerate(data.code.co_consts)])
 gettext_locs = [consts[n] for n in gettext_functions
 ops = [
 _LOAD_CONST, '(', '|'.join(gettext_locs), ')',
 _CALL_FUNCTION, '.\x00',
 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)'
 ]
-for _, opcodes in re.findall(''.join(ops), data.code.co_code):
+for loc, opcodes in re.findall(''.join(ops), data.code.co_code):
+funcname = data.code.co_consts[ord(loc[0])]
 strings = []
 opcodes = iter(opcodes)
 for opcode in opcodes:
 if opcode == _BINARY_ADD:
 arg = strings.pop()
 opcodes.next() # skip second byte
 if not isinstance(arg, basestring):
 break
 strings.append(unicode(arg))
 for string in strings:
-yield pos[1], string
+yield pos[1], funcname, string
 elif kind is SUB:
 subkind, substream = data
-for lineno, text in self.harvest(substream):
+for lineno, funcname, text in self.harvest(substream):
-yield lineno, text
+yield lineno, funcname, text

Mercurial > genshi > mirror

comparison genshi/filters/i18n.py @ 450:94601511cd68 trunk