Mercurial > genshi > mirror
changeset 450:94601511cd68 trunk
Extend the I18n extraction to also yield function names if applicable.
author | cmlenz |
---|---|
date | Fri, 13 Apr 2007 20:58:48 +0000 |
parents | b07e65580175 |
children | 4183fd29fa4e |
files | genshi/filters/i18n.py |
diffstat | 1 files changed, 52 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/genshi/filters/i18n.py +++ b/genshi/filters/i18n.py @@ -8,7 +8,7 @@ from opcode import opmap import re -from genshi.core import Attrs, START, END, TEXT +from genshi.core import Attrs, Namespace, QName, START, END, TEXT from genshi.template.base import Template, EXPR, SUB from genshi.template.markup import EXEC @@ -20,11 +20,12 @@ class Translator(object): """Can extract and translate localizable strings from markup streams and - templates + templates. For example, assume the followng template: >>> from genshi.template import MarkupTemplate + >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> @@ -43,6 +44,7 @@ ... 'Example': 'Beispiel', ... 'Hello, %(name)s': 'Hallo, %(name)s' ... }[string] + >>> >>> translator = Translator(pseudo_gettext) Next, the translator needs to be prepended to any already defined filters @@ -65,7 +67,10 @@ </html> """ - IGNORE_TAGS = frozenset(['script', 'style']) + IGNORE_TAGS = frozenset([ + QName('script'), QName('http://www.w3.org/1999/xhtml}script'), + QName('style'), QName('http://www.w3.org/1999/xhtml}style') + ]) INCLUDE_ATTRS = frozenset(['title', 'alt']) def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, @@ -77,7 +82,7 @@ :param ignore_tags: a set of tag names that should not be localized :param include_attrs: a set of attribute names should be localized """ - self.gettext = translate + self.translate = translate self.ignore_tags = ignore_tags self.include_attrs = include_attrs @@ -96,6 +101,9 @@ internally) :return: the localized stream """ + ignore_tags = self.ignore_tags + include_attrs = self.include_attrs + translate = self.translate skip = 0 for kind, data, pos in stream: @@ -104,11 +112,10 @@ if skip: if kind is START: tag, attrs = data - tag = tag.localname - if tag.localname in self.ignore_tags: + if tag in ignore_tags: skip += 1 elif kind is END: - if tag.localname in self.ignore_tags: + if tag in ignore_tags: skip -= 1 yield kind, data, pos continue @@ -116,7 +123,7 @@ # handle different events that can be localized if kind is START: tag, attrs = data - if tag.localname in self.ignore_tags: + if tag in ignore_tags: skip += 1 yield kind, data, pos continue @@ -128,7 +135,7 @@ if isinstance(value, basestring): newval = ugettext(value) else: - newval = list(self(value, ctxt, search_text=name in self.include_attrs)) + newval = list(self(value, ctxt, search_text=name in include_attrs)) if newval != value: value = new_val changed = True @@ -141,7 +148,7 @@ elif kind is TEXT: text = data.strip() if text: - data = data.replace(text, self.gettext(text)) + data = data.replace(text, translate(text)) yield kind, data, pos elif kind is SUB: @@ -152,16 +159,22 @@ else: yield kind, data, pos - def extract(self, stream, gettext_functions=('_', 'gettext', 'ngettext')): + GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', + 'ugettext', 'ungettext') + + def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS): """Extract localizable strings from the given template stream. - For every string found, this function yields a ``(lineno, message)`` - tuple. + For every string found, this function yields a ``(lineno, function, + message)`` tuple, where: - :param stream: the event stream to extract strings from; can be a - regular stream or a template stream + * ``lineno`` is the number of the line on which the string was found, + * ``function`` is the name of the ``gettext`` function used (if the + string was extracted from embedded Python code), and + * ``message`` is the string itself (a ``unicode`` object). >>> from genshi.template import MarkupTemplate + >>> >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> ... <head> ... <title>Example</title> @@ -171,11 +184,18 @@ ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> ... </body> ... </html>''', filename='example.html') - >>> for lineno, message in Translator().extract(tmpl.stream): - ... print "Line %d: %r" % (lineno, message) - Line 3: u'Example' - Line 6: u'Example' - Line 7: u'Hello, %(name)s' + >>> + >>> for lineno, funcname, message in Translator().extract(tmpl.stream): + ... print "%d, %r, %r" % (lineno, funcname, message) + 3, None, u'Example' + 6, None, u'Example' + 7, '_', u'Hello, %(name)s' + + :param stream: the event stream to extract strings from; can be a + regular stream or a template stream + :param gettext_functions: a sequence of function names that should be + treated as gettext-style localization + functions """ tagname = None skip = 0 @@ -184,17 +204,17 @@ if skip: if kind is START: tag, attrs = data - if tag.localname in self.ignore_tags: + if tag in self.ignore_tags: skip += 1 if kind is END: tag = data - if tag.localname in self.ignore_tags: + if tag in self.ignore_tags: skip -= 1 continue if kind is START: tag, attrs = data - if tag.localname in self.ignore_tags: + if tag in self.ignore_tags: skip += 1 continue @@ -203,15 +223,15 @@ if isinstance(value, basestring): text = value.strip() if text: - yield pos[1], text + yield pos[1], None, text else: - for lineno, text in harvest(value): - yield lineno, text + for lineno, funcname, text in harvest(value): + yield lineno, funcname, text elif kind is TEXT: text = data.strip() if text and filter(None, [ch.isalpha() for ch in text]): - yield pos[1], text + yield pos[1], None, text elif kind is EXPR or kind is EXEC: consts = dict([(n, chr(i) + '\x00') for i, n in @@ -223,7 +243,8 @@ _CALL_FUNCTION, '.\x00', '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' ] - for _, opcodes in re.findall(''.join(ops), data.code.co_code): + for loc, opcodes in re.findall(''.join(ops), data.code.co_code): + funcname = data.code.co_consts[ord(loc[0])] strings = [] opcodes = iter(opcodes) for opcode in opcodes: @@ -237,9 +258,9 @@ break strings.append(unicode(arg)) for string in strings: - yield pos[1], string + yield pos[1], funcname, string elif kind is SUB: subkind, substream = data - for lineno, text in self.harvest(substream): - yield lineno, text + for lineno, funcname, text in self.harvest(substream): + yield lineno, funcname, text