comparison genshi/filters/i18n.py @ 500:0742f421caba experimental-inline

Merged revisions 487-603 via svnmerge from http://svn.edgewall.org/repos/genshi/trunk
author cmlenz
date Fri, 01 Jun 2007 17:21:47 +0000
parents
children 1837f39efd6f
comparison
equal deleted inserted replaced
499:869b7885a516 500:0742f421caba
1 """Utilities for internationalization and localization of templates."""
2
3 try:
4 frozenset
5 except NameError:
6 from sets import ImmutableSet as frozenset
7 from gettext import gettext
8 from opcode import opmap
9 import re
10
11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, _ensure
12 from genshi.template.base import Template, EXPR, SUB
13 from genshi.template.markup import EXEC
14
15 _LOAD_NAME = chr(opmap['LOAD_NAME'])
16 _LOAD_CONST = chr(opmap['LOAD_CONST'])
17 _CALL_FUNCTION = chr(opmap['CALL_FUNCTION'])
18 _BINARY_ADD = chr(opmap['BINARY_ADD'])
19
20
21 class Translator(object):
22 """Can extract and translate localizable strings from markup streams and
23 templates.
24
25 For example, assume the followng template:
26
27 >>> from genshi.template import MarkupTemplate
28 >>>
29 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
30 ... <head>
31 ... <title>Example</title>
32 ... </head>
33 ... <body>
34 ... <h1>Example</h1>
35 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
36 ... </body>
37 ... </html>''', filename='example.html')
38
39 For demonstration, we define a dummy ``gettext``-style function with a
40 hard-coded translation table, and pass that to the `Translator` initializer:
41
42 >>> def pseudo_gettext(string):
43 ... return {
44 ... 'Example': 'Beispiel',
45 ... 'Hello, %(name)s': 'Hallo, %(name)s'
46 ... }[string]
47 >>>
48 >>> translator = Translator(pseudo_gettext)
49
50 Next, the translator needs to be prepended to any already defined filters
51 on the template:
52
53 >>> tmpl.filters.insert(0, translator)
54
55 When generating the template output, our hard-coded translations should be
56 applied as expected:
57
58 >>> print tmpl.generate(username='Hans', _=pseudo_gettext)
59 <html>
60 <head>
61 <title>Beispiel</title>
62 </head>
63 <body>
64 <h1>Beispiel</h1>
65 <p>Hallo, Hans</p>
66 </body>
67 </html>
68 """
69
70 IGNORE_TAGS = frozenset([
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
73 ])
74 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby',
75 'summary', 'title'])
76
77 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS,
78 include_attrs=INCLUDE_ATTRS):
79 """Initialize the translator.
80
81 :param translate: the translation function, for example ``gettext`` or
82 ``ugettext``.
83 :param ignore_tags: a set of tag names that should not be localized
84 :param include_attrs: a set of attribute names should be localized
85 """
86 self.translate = translate
87 self.ignore_tags = ignore_tags
88 self.include_attrs = include_attrs
89
90 def __call__(self, stream, ctxt=None, search_text=True):
91 """Translate any localizable strings in the given stream.
92
93 This function shouldn't be called directly. Instead, an instance of
94 the `Translator` class should be registered as a filter with the
95 `Template` or the `TemplateLoader`, or applied as a regular stream
96 filter. If used as a template filter, it should be inserted in front of
97 all the default filters.
98
99 :param stream: the markup event stream
100 :param ctxt: the template context (not used)
101 :param search_text: whether text nodes should be translated (used
102 internally)
103 :return: the localized stream
104 """
105 ignore_tags = self.ignore_tags
106 include_attrs = self.include_attrs
107 translate = self.translate
108 skip = 0
109
110 for kind, data, pos in stream:
111
112 # skip chunks that should not be localized
113 if skip:
114 if kind is START:
115 tag, attrs = data
116 if tag in ignore_tags:
117 skip += 1
118 elif kind is END:
119 if tag in ignore_tags:
120 skip -= 1
121 yield kind, data, pos
122 continue
123
124 # handle different events that can be localized
125 if kind is START:
126 tag, attrs = data
127 if tag in ignore_tags:
128 skip += 1
129 yield kind, data, pos
130 continue
131
132 new_attrs = []
133 changed = False
134 for name, value in attrs:
135 newval = value
136 if isinstance(value, basestring):
137 if name in include_attrs:
138 newval = self.translate(value)
139 else:
140 newval = list(self(_ensure(value), ctxt,
141 search_text=name in include_attrs)
142 )
143 if newval != value:
144 value = newval
145 changed = True
146 new_attrs.append((name, value))
147 if changed:
148 attrs = new_attrs
149
150 yield kind, (tag, attrs), pos
151
152 elif search_text and kind is TEXT:
153 text = data.strip()
154 if text:
155 data = data.replace(text, translate(text))
156 yield kind, data, pos
157
158 elif kind is SUB:
159 subkind, substream = data
160 new_substream = list(self(substream, ctxt))
161 yield kind, (subkind, new_substream), pos
162
163 else:
164 yield kind, data, pos
165
166 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
167 'ugettext', 'ungettext')
168
169 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
170 search_text=True):
171 """Extract localizable strings from the given template stream.
172
173 For every string found, this function yields a ``(lineno, function,
174 message)`` tuple, where:
175
176 * ``lineno`` is the number of the line on which the string was found,
177 * ``function`` is the name of the ``gettext`` function used (if the
178 string was extracted from embedded Python code), and
179 * ``message`` is the string itself (a ``unicode`` object, or a tuple
180 of ``unicode`` objects for functions with multiple string arguments).
181
182 >>> from genshi.template import MarkupTemplate
183 >>>
184 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
185 ... <head>
186 ... <title>Example</title>
187 ... </head>
188 ... <body>
189 ... <h1>Example</h1>
190 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
191 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
192 ... </body>
193 ... </html>''', filename='example.html')
194 >>>
195 >>> for lineno, funcname, message in Translator().extract(tmpl.stream):
196 ... print "%d, %r, %r" % (lineno, funcname, message)
197 3, None, u'Example'
198 6, None, u'Example'
199 7, '_', u'Hello, %(name)s'
200 8, 'ngettext', (u'You have %d item', u'You have %d items')
201
202 :param stream: the event stream to extract strings from; can be a
203 regular stream or a template stream
204 :param gettext_functions: a sequence of function names that should be
205 treated as gettext-style localization
206 functions
207 :param search_text: whether the content of text nodes should be
208 extracted (used internally)
209
210 :note: Changed in 0.4.1: For a function with multiple string arguments
211 (such as ``ngettext``), a single item with a tuple of strings is
212 yielded, instead an item for each string argument.
213 """
214 tagname = None
215 skip = 0
216
217 for kind, data, pos in stream:
218 if skip:
219 if kind is START:
220 tag, attrs = data
221 if tag in self.ignore_tags:
222 skip += 1
223 if kind is END:
224 tag = data
225 if tag in self.ignore_tags:
226 skip -= 1
227 continue
228
229 if kind is START:
230 tag, attrs = data
231 if tag in self.ignore_tags:
232 skip += 1
233 continue
234
235 for name, value in attrs:
236 if isinstance(value, basestring):
237 if name in self.include_attrs:
238 text = value.strip()
239 if text:
240 yield pos[1], None, text
241 else:
242 for lineno, funcname, text in self.extract(
243 _ensure(value), gettext_functions,
244 search_text=name in self.include_attrs):
245 yield lineno, funcname, text
246
247 elif search_text and kind is TEXT:
248 text = data.strip()
249 if text and filter(None, [ch.isalpha() for ch in text]):
250 yield pos[1], None, text
251
252 elif kind is EXPR or kind is EXEC:
253 consts = dict([(n, chr(i) + '\x00') for i, n in
254 enumerate(data.code.co_consts)])
255 gettext_locs = [consts[n] for n in gettext_functions
256 if n in consts]
257 ops = [
258 _LOAD_CONST, '(', '|'.join(gettext_locs), ')',
259 _CALL_FUNCTION, '.\x00',
260 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)'
261 ]
262 for loc, opcodes in re.findall(''.join(ops), data.code.co_code):
263 funcname = data.code.co_consts[ord(loc[0])]
264 strings = []
265 opcodes = iter(opcodes)
266 for opcode in opcodes:
267 if opcode == _BINARY_ADD:
268 arg = strings.pop()
269 strings[-1] += arg
270 else:
271 arg = data.code.co_consts[ord(opcodes.next())]
272 opcodes.next() # skip second byte
273 if not isinstance(arg, basestring):
274 break
275 strings.append(unicode(arg))
276 if len(strings) == 1:
277 strings = strings[0]
278 else:
279 strings = tuple(strings)
280 yield pos[1], funcname, strings
281
282 elif kind is SUB:
283 subkind, substream = data
284 for lineno, funcname, text in self.extract(substream,
285 gettext_functions):
286 yield lineno, funcname, text
Copyright (C) 2012-2017 Edgewall Software