500
|
1 """Utilities for internationalization and localization of templates."""
|
|
2
|
|
3 try:
|
|
4 frozenset
|
|
5 except NameError:
|
|
6 from sets import ImmutableSet as frozenset
|
|
7 from gettext import gettext
|
|
8 from opcode import opmap
|
|
9 import re
|
|
10
|
|
11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, _ensure
|
|
12 from genshi.template.base import Template, EXPR, SUB
|
|
13 from genshi.template.markup import EXEC
|
|
14
|
|
15 _LOAD_NAME = chr(opmap['LOAD_NAME'])
|
|
16 _LOAD_CONST = chr(opmap['LOAD_CONST'])
|
|
17 _CALL_FUNCTION = chr(opmap['CALL_FUNCTION'])
|
|
18 _BINARY_ADD = chr(opmap['BINARY_ADD'])
|
|
19
|
|
20
|
|
21 class Translator(object):
|
|
22 """Can extract and translate localizable strings from markup streams and
|
|
23 templates.
|
|
24
|
|
25 For example, assume the followng template:
|
|
26
|
|
27 >>> from genshi.template import MarkupTemplate
|
|
28 >>>
|
|
29 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
|
|
30 ... <head>
|
|
31 ... <title>Example</title>
|
|
32 ... </head>
|
|
33 ... <body>
|
|
34 ... <h1>Example</h1>
|
|
35 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
|
|
36 ... </body>
|
|
37 ... </html>''', filename='example.html')
|
|
38
|
|
39 For demonstration, we define a dummy ``gettext``-style function with a
|
|
40 hard-coded translation table, and pass that to the `Translator` initializer:
|
|
41
|
|
42 >>> def pseudo_gettext(string):
|
|
43 ... return {
|
|
44 ... 'Example': 'Beispiel',
|
|
45 ... 'Hello, %(name)s': 'Hallo, %(name)s'
|
|
46 ... }[string]
|
|
47 >>>
|
|
48 >>> translator = Translator(pseudo_gettext)
|
|
49
|
|
50 Next, the translator needs to be prepended to any already defined filters
|
|
51 on the template:
|
|
52
|
|
53 >>> tmpl.filters.insert(0, translator)
|
|
54
|
|
55 When generating the template output, our hard-coded translations should be
|
|
56 applied as expected:
|
|
57
|
|
58 >>> print tmpl.generate(username='Hans', _=pseudo_gettext)
|
|
59 <html>
|
|
60 <head>
|
|
61 <title>Beispiel</title>
|
|
62 </head>
|
|
63 <body>
|
|
64 <h1>Beispiel</h1>
|
|
65 <p>Hallo, Hans</p>
|
|
66 </body>
|
|
67 </html>
|
|
68 """
|
|
69
|
|
70 IGNORE_TAGS = frozenset([
|
|
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
|
|
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
|
|
73 ])
|
|
74 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby',
|
|
75 'summary', 'title'])
|
|
76
|
|
77 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS,
|
|
78 include_attrs=INCLUDE_ATTRS):
|
|
79 """Initialize the translator.
|
|
80
|
|
81 :param translate: the translation function, for example ``gettext`` or
|
|
82 ``ugettext``.
|
|
83 :param ignore_tags: a set of tag names that should not be localized
|
|
84 :param include_attrs: a set of attribute names should be localized
|
|
85 """
|
|
86 self.translate = translate
|
|
87 self.ignore_tags = ignore_tags
|
|
88 self.include_attrs = include_attrs
|
|
89
|
|
90 def __call__(self, stream, ctxt=None, search_text=True):
|
|
91 """Translate any localizable strings in the given stream.
|
|
92
|
|
93 This function shouldn't be called directly. Instead, an instance of
|
|
94 the `Translator` class should be registered as a filter with the
|
|
95 `Template` or the `TemplateLoader`, or applied as a regular stream
|
|
96 filter. If used as a template filter, it should be inserted in front of
|
|
97 all the default filters.
|
|
98
|
|
99 :param stream: the markup event stream
|
|
100 :param ctxt: the template context (not used)
|
|
101 :param search_text: whether text nodes should be translated (used
|
|
102 internally)
|
|
103 :return: the localized stream
|
|
104 """
|
|
105 ignore_tags = self.ignore_tags
|
|
106 include_attrs = self.include_attrs
|
|
107 translate = self.translate
|
|
108 skip = 0
|
|
109
|
|
110 for kind, data, pos in stream:
|
|
111
|
|
112 # skip chunks that should not be localized
|
|
113 if skip:
|
|
114 if kind is START:
|
|
115 tag, attrs = data
|
|
116 if tag in ignore_tags:
|
|
117 skip += 1
|
|
118 elif kind is END:
|
|
119 if tag in ignore_tags:
|
|
120 skip -= 1
|
|
121 yield kind, data, pos
|
|
122 continue
|
|
123
|
|
124 # handle different events that can be localized
|
|
125 if kind is START:
|
|
126 tag, attrs = data
|
|
127 if tag in ignore_tags:
|
|
128 skip += 1
|
|
129 yield kind, data, pos
|
|
130 continue
|
|
131
|
|
132 new_attrs = []
|
|
133 changed = False
|
|
134 for name, value in attrs:
|
|
135 newval = value
|
|
136 if isinstance(value, basestring):
|
|
137 if name in include_attrs:
|
|
138 newval = self.translate(value)
|
|
139 else:
|
|
140 newval = list(self(_ensure(value), ctxt,
|
|
141 search_text=name in include_attrs)
|
|
142 )
|
|
143 if newval != value:
|
|
144 value = newval
|
|
145 changed = True
|
|
146 new_attrs.append((name, value))
|
|
147 if changed:
|
|
148 attrs = new_attrs
|
|
149
|
|
150 yield kind, (tag, attrs), pos
|
|
151
|
|
152 elif search_text and kind is TEXT:
|
|
153 text = data.strip()
|
|
154 if text:
|
|
155 data = data.replace(text, translate(text))
|
|
156 yield kind, data, pos
|
|
157
|
|
158 elif kind is SUB:
|
|
159 subkind, substream = data
|
|
160 new_substream = list(self(substream, ctxt))
|
|
161 yield kind, (subkind, new_substream), pos
|
|
162
|
|
163 else:
|
|
164 yield kind, data, pos
|
|
165
|
|
166 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
|
|
167 'ugettext', 'ungettext')
|
|
168
|
|
169 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
|
|
170 search_text=True):
|
|
171 """Extract localizable strings from the given template stream.
|
|
172
|
|
173 For every string found, this function yields a ``(lineno, function,
|
|
174 message)`` tuple, where:
|
|
175
|
|
176 * ``lineno`` is the number of the line on which the string was found,
|
|
177 * ``function`` is the name of the ``gettext`` function used (if the
|
|
178 string was extracted from embedded Python code), and
|
|
179 * ``message`` is the string itself (a ``unicode`` object, or a tuple
|
|
180 of ``unicode`` objects for functions with multiple string arguments).
|
|
181
|
|
182 >>> from genshi.template import MarkupTemplate
|
|
183 >>>
|
|
184 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
|
|
185 ... <head>
|
|
186 ... <title>Example</title>
|
|
187 ... </head>
|
|
188 ... <body>
|
|
189 ... <h1>Example</h1>
|
|
190 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
|
|
191 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
|
|
192 ... </body>
|
|
193 ... </html>''', filename='example.html')
|
|
194 >>>
|
|
195 >>> for lineno, funcname, message in Translator().extract(tmpl.stream):
|
|
196 ... print "%d, %r, %r" % (lineno, funcname, message)
|
|
197 3, None, u'Example'
|
|
198 6, None, u'Example'
|
|
199 7, '_', u'Hello, %(name)s'
|
|
200 8, 'ngettext', (u'You have %d item', u'You have %d items')
|
|
201
|
|
202 :param stream: the event stream to extract strings from; can be a
|
|
203 regular stream or a template stream
|
|
204 :param gettext_functions: a sequence of function names that should be
|
|
205 treated as gettext-style localization
|
|
206 functions
|
|
207 :param search_text: whether the content of text nodes should be
|
|
208 extracted (used internally)
|
|
209
|
|
210 :note: Changed in 0.4.1: For a function with multiple string arguments
|
|
211 (such as ``ngettext``), a single item with a tuple of strings is
|
|
212 yielded, instead an item for each string argument.
|
|
213 """
|
|
214 tagname = None
|
|
215 skip = 0
|
|
216
|
|
217 for kind, data, pos in stream:
|
|
218 if skip:
|
|
219 if kind is START:
|
|
220 tag, attrs = data
|
|
221 if tag in self.ignore_tags:
|
|
222 skip += 1
|
|
223 if kind is END:
|
|
224 tag = data
|
|
225 if tag in self.ignore_tags:
|
|
226 skip -= 1
|
|
227 continue
|
|
228
|
|
229 if kind is START:
|
|
230 tag, attrs = data
|
|
231 if tag in self.ignore_tags:
|
|
232 skip += 1
|
|
233 continue
|
|
234
|
|
235 for name, value in attrs:
|
|
236 if isinstance(value, basestring):
|
|
237 if name in self.include_attrs:
|
|
238 text = value.strip()
|
|
239 if text:
|
|
240 yield pos[1], None, text
|
|
241 else:
|
|
242 for lineno, funcname, text in self.extract(
|
|
243 _ensure(value), gettext_functions,
|
|
244 search_text=name in self.include_attrs):
|
|
245 yield lineno, funcname, text
|
|
246
|
|
247 elif search_text and kind is TEXT:
|
|
248 text = data.strip()
|
|
249 if text and filter(None, [ch.isalpha() for ch in text]):
|
|
250 yield pos[1], None, text
|
|
251
|
|
252 elif kind is EXPR or kind is EXEC:
|
|
253 consts = dict([(n, chr(i) + '\x00') for i, n in
|
|
254 enumerate(data.code.co_consts)])
|
|
255 gettext_locs = [consts[n] for n in gettext_functions
|
|
256 if n in consts]
|
|
257 ops = [
|
|
258 _LOAD_CONST, '(', '|'.join(gettext_locs), ')',
|
|
259 _CALL_FUNCTION, '.\x00',
|
|
260 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)'
|
|
261 ]
|
|
262 for loc, opcodes in re.findall(''.join(ops), data.code.co_code):
|
|
263 funcname = data.code.co_consts[ord(loc[0])]
|
|
264 strings = []
|
|
265 opcodes = iter(opcodes)
|
|
266 for opcode in opcodes:
|
|
267 if opcode == _BINARY_ADD:
|
|
268 arg = strings.pop()
|
|
269 strings[-1] += arg
|
|
270 else:
|
|
271 arg = data.code.co_consts[ord(opcodes.next())]
|
|
272 opcodes.next() # skip second byte
|
|
273 if not isinstance(arg, basestring):
|
|
274 break
|
|
275 strings.append(unicode(arg))
|
|
276 if len(strings) == 1:
|
|
277 strings = strings[0]
|
|
278 else:
|
|
279 strings = tuple(strings)
|
|
280 yield pos[1], funcname, strings
|
|
281
|
|
282 elif kind is SUB:
|
|
283 subkind, substream = data
|
|
284 for lineno, funcname, text in self.extract(substream,
|
|
285 gettext_functions):
|
|
286 yield lineno, funcname, text
|