Mercurial > genshi > genshi-test
comparison genshi/filters/i18n.py @ 500:0742f421caba experimental-inline
Merged revisions 487-603 via svnmerge from
http://svn.edgewall.org/repos/genshi/trunk
author | cmlenz |
---|---|
date | Fri, 01 Jun 2007 17:21:47 +0000 |
parents | |
children | 1837f39efd6f |
comparison
equal
deleted
inserted
replaced
499:869b7885a516 | 500:0742f421caba |
---|---|
1 """Utilities for internationalization and localization of templates.""" | |
2 | |
3 try: | |
4 frozenset | |
5 except NameError: | |
6 from sets import ImmutableSet as frozenset | |
7 from gettext import gettext | |
8 from opcode import opmap | |
9 import re | |
10 | |
11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, _ensure | |
12 from genshi.template.base import Template, EXPR, SUB | |
13 from genshi.template.markup import EXEC | |
14 | |
15 _LOAD_NAME = chr(opmap['LOAD_NAME']) | |
16 _LOAD_CONST = chr(opmap['LOAD_CONST']) | |
17 _CALL_FUNCTION = chr(opmap['CALL_FUNCTION']) | |
18 _BINARY_ADD = chr(opmap['BINARY_ADD']) | |
19 | |
20 | |
21 class Translator(object): | |
22 """Can extract and translate localizable strings from markup streams and | |
23 templates. | |
24 | |
25 For example, assume the followng template: | |
26 | |
27 >>> from genshi.template import MarkupTemplate | |
28 >>> | |
29 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> | |
30 ... <head> | |
31 ... <title>Example</title> | |
32 ... </head> | |
33 ... <body> | |
34 ... <h1>Example</h1> | |
35 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> | |
36 ... </body> | |
37 ... </html>''', filename='example.html') | |
38 | |
39 For demonstration, we define a dummy ``gettext``-style function with a | |
40 hard-coded translation table, and pass that to the `Translator` initializer: | |
41 | |
42 >>> def pseudo_gettext(string): | |
43 ... return { | |
44 ... 'Example': 'Beispiel', | |
45 ... 'Hello, %(name)s': 'Hallo, %(name)s' | |
46 ... }[string] | |
47 >>> | |
48 >>> translator = Translator(pseudo_gettext) | |
49 | |
50 Next, the translator needs to be prepended to any already defined filters | |
51 on the template: | |
52 | |
53 >>> tmpl.filters.insert(0, translator) | |
54 | |
55 When generating the template output, our hard-coded translations should be | |
56 applied as expected: | |
57 | |
58 >>> print tmpl.generate(username='Hans', _=pseudo_gettext) | |
59 <html> | |
60 <head> | |
61 <title>Beispiel</title> | |
62 </head> | |
63 <body> | |
64 <h1>Beispiel</h1> | |
65 <p>Hallo, Hans</p> | |
66 </body> | |
67 </html> | |
68 """ | |
69 | |
70 IGNORE_TAGS = frozenset([ | |
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), | |
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style') | |
73 ]) | |
74 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', | |
75 'summary', 'title']) | |
76 | |
77 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, | |
78 include_attrs=INCLUDE_ATTRS): | |
79 """Initialize the translator. | |
80 | |
81 :param translate: the translation function, for example ``gettext`` or | |
82 ``ugettext``. | |
83 :param ignore_tags: a set of tag names that should not be localized | |
84 :param include_attrs: a set of attribute names should be localized | |
85 """ | |
86 self.translate = translate | |
87 self.ignore_tags = ignore_tags | |
88 self.include_attrs = include_attrs | |
89 | |
90 def __call__(self, stream, ctxt=None, search_text=True): | |
91 """Translate any localizable strings in the given stream. | |
92 | |
93 This function shouldn't be called directly. Instead, an instance of | |
94 the `Translator` class should be registered as a filter with the | |
95 `Template` or the `TemplateLoader`, or applied as a regular stream | |
96 filter. If used as a template filter, it should be inserted in front of | |
97 all the default filters. | |
98 | |
99 :param stream: the markup event stream | |
100 :param ctxt: the template context (not used) | |
101 :param search_text: whether text nodes should be translated (used | |
102 internally) | |
103 :return: the localized stream | |
104 """ | |
105 ignore_tags = self.ignore_tags | |
106 include_attrs = self.include_attrs | |
107 translate = self.translate | |
108 skip = 0 | |
109 | |
110 for kind, data, pos in stream: | |
111 | |
112 # skip chunks that should not be localized | |
113 if skip: | |
114 if kind is START: | |
115 tag, attrs = data | |
116 if tag in ignore_tags: | |
117 skip += 1 | |
118 elif kind is END: | |
119 if tag in ignore_tags: | |
120 skip -= 1 | |
121 yield kind, data, pos | |
122 continue | |
123 | |
124 # handle different events that can be localized | |
125 if kind is START: | |
126 tag, attrs = data | |
127 if tag in ignore_tags: | |
128 skip += 1 | |
129 yield kind, data, pos | |
130 continue | |
131 | |
132 new_attrs = [] | |
133 changed = False | |
134 for name, value in attrs: | |
135 newval = value | |
136 if isinstance(value, basestring): | |
137 if name in include_attrs: | |
138 newval = self.translate(value) | |
139 else: | |
140 newval = list(self(_ensure(value), ctxt, | |
141 search_text=name in include_attrs) | |
142 ) | |
143 if newval != value: | |
144 value = newval | |
145 changed = True | |
146 new_attrs.append((name, value)) | |
147 if changed: | |
148 attrs = new_attrs | |
149 | |
150 yield kind, (tag, attrs), pos | |
151 | |
152 elif search_text and kind is TEXT: | |
153 text = data.strip() | |
154 if text: | |
155 data = data.replace(text, translate(text)) | |
156 yield kind, data, pos | |
157 | |
158 elif kind is SUB: | |
159 subkind, substream = data | |
160 new_substream = list(self(substream, ctxt)) | |
161 yield kind, (subkind, new_substream), pos | |
162 | |
163 else: | |
164 yield kind, data, pos | |
165 | |
166 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', | |
167 'ugettext', 'ungettext') | |
168 | |
169 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, | |
170 search_text=True): | |
171 """Extract localizable strings from the given template stream. | |
172 | |
173 For every string found, this function yields a ``(lineno, function, | |
174 message)`` tuple, where: | |
175 | |
176 * ``lineno`` is the number of the line on which the string was found, | |
177 * ``function`` is the name of the ``gettext`` function used (if the | |
178 string was extracted from embedded Python code), and | |
179 * ``message`` is the string itself (a ``unicode`` object, or a tuple | |
180 of ``unicode`` objects for functions with multiple string arguments). | |
181 | |
182 >>> from genshi.template import MarkupTemplate | |
183 >>> | |
184 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> | |
185 ... <head> | |
186 ... <title>Example</title> | |
187 ... </head> | |
188 ... <body> | |
189 ... <h1>Example</h1> | |
190 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> | |
191 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> | |
192 ... </body> | |
193 ... </html>''', filename='example.html') | |
194 >>> | |
195 >>> for lineno, funcname, message in Translator().extract(tmpl.stream): | |
196 ... print "%d, %r, %r" % (lineno, funcname, message) | |
197 3, None, u'Example' | |
198 6, None, u'Example' | |
199 7, '_', u'Hello, %(name)s' | |
200 8, 'ngettext', (u'You have %d item', u'You have %d items') | |
201 | |
202 :param stream: the event stream to extract strings from; can be a | |
203 regular stream or a template stream | |
204 :param gettext_functions: a sequence of function names that should be | |
205 treated as gettext-style localization | |
206 functions | |
207 :param search_text: whether the content of text nodes should be | |
208 extracted (used internally) | |
209 | |
210 :note: Changed in 0.4.1: For a function with multiple string arguments | |
211 (such as ``ngettext``), a single item with a tuple of strings is | |
212 yielded, instead an item for each string argument. | |
213 """ | |
214 tagname = None | |
215 skip = 0 | |
216 | |
217 for kind, data, pos in stream: | |
218 if skip: | |
219 if kind is START: | |
220 tag, attrs = data | |
221 if tag in self.ignore_tags: | |
222 skip += 1 | |
223 if kind is END: | |
224 tag = data | |
225 if tag in self.ignore_tags: | |
226 skip -= 1 | |
227 continue | |
228 | |
229 if kind is START: | |
230 tag, attrs = data | |
231 if tag in self.ignore_tags: | |
232 skip += 1 | |
233 continue | |
234 | |
235 for name, value in attrs: | |
236 if isinstance(value, basestring): | |
237 if name in self.include_attrs: | |
238 text = value.strip() | |
239 if text: | |
240 yield pos[1], None, text | |
241 else: | |
242 for lineno, funcname, text in self.extract( | |
243 _ensure(value), gettext_functions, | |
244 search_text=name in self.include_attrs): | |
245 yield lineno, funcname, text | |
246 | |
247 elif search_text and kind is TEXT: | |
248 text = data.strip() | |
249 if text and filter(None, [ch.isalpha() for ch in text]): | |
250 yield pos[1], None, text | |
251 | |
252 elif kind is EXPR or kind is EXEC: | |
253 consts = dict([(n, chr(i) + '\x00') for i, n in | |
254 enumerate(data.code.co_consts)]) | |
255 gettext_locs = [consts[n] for n in gettext_functions | |
256 if n in consts] | |
257 ops = [ | |
258 _LOAD_CONST, '(', '|'.join(gettext_locs), ')', | |
259 _CALL_FUNCTION, '.\x00', | |
260 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' | |
261 ] | |
262 for loc, opcodes in re.findall(''.join(ops), data.code.co_code): | |
263 funcname = data.code.co_consts[ord(loc[0])] | |
264 strings = [] | |
265 opcodes = iter(opcodes) | |
266 for opcode in opcodes: | |
267 if opcode == _BINARY_ADD: | |
268 arg = strings.pop() | |
269 strings[-1] += arg | |
270 else: | |
271 arg = data.code.co_consts[ord(opcodes.next())] | |
272 opcodes.next() # skip second byte | |
273 if not isinstance(arg, basestring): | |
274 break | |
275 strings.append(unicode(arg)) | |
276 if len(strings) == 1: | |
277 strings = strings[0] | |
278 else: | |
279 strings = tuple(strings) | |
280 yield pos[1], funcname, strings | |
281 | |
282 elif kind is SUB: | |
283 subkind, substream = data | |
284 for lineno, funcname, text in self.extract(substream, | |
285 gettext_functions): | |
286 yield lineno, funcname, text |