Mercurial > genshi > genshi-test
comparison genshi/filters/i18n.py @ 820:1837f39efd6f experimental-inline
Sync (old) experimental inline branch with trunk@1027.
author | cmlenz |
---|---|
date | Wed, 11 Mar 2009 17:51:06 +0000 |
parents | 0742f421caba |
children | 09cc3627654c |
comparison
equal
deleted
inserted
replaced
500:0742f421caba | 820:1837f39efd6f |
---|---|
1 """Utilities for internationalization and localization of templates.""" | 1 # -*- coding: utf-8 -*- |
2 | 2 # |
3 try: | 3 # Copyright (C) 2007 Edgewall Software |
4 frozenset | 4 # All rights reserved. |
5 except NameError: | 5 # |
6 from sets import ImmutableSet as frozenset | 6 # This software is licensed as described in the file COPYING, which |
7 from gettext import gettext | 7 # you should have received as part of this distribution. The terms |
8 from opcode import opmap | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://genshi.edgewall.org/log/. | |
13 | |
14 """Utilities for internationalization and localization of templates. | |
15 | |
16 :since: version 0.4 | |
17 """ | |
18 | |
19 from gettext import NullTranslations | |
9 import re | 20 import re |
10 | 21 from types import FunctionType |
11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, _ensure | 22 |
12 from genshi.template.base import Template, EXPR, SUB | 23 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ |
13 from genshi.template.markup import EXEC | 24 END_NS, XML_NAMESPACE, _ensure |
14 | 25 from genshi.template.eval import _ast |
15 _LOAD_NAME = chr(opmap['LOAD_NAME']) | 26 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives |
16 _LOAD_CONST = chr(opmap['LOAD_CONST']) | 27 from genshi.template.directives import Directive |
17 _CALL_FUNCTION = chr(opmap['CALL_FUNCTION']) | 28 from genshi.template.markup import MarkupTemplate, EXEC |
18 _BINARY_ADD = chr(opmap['BINARY_ADD']) | 29 |
19 | 30 __all__ = ['Translator', 'extract'] |
20 | 31 __docformat__ = 'restructuredtext en' |
21 class Translator(object): | 32 |
33 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') | |
34 | |
35 | |
36 class CommentDirective(Directive): | |
37 | |
38 __slots__ = [] | |
39 | |
40 @classmethod | |
41 def attach(cls, template, stream, value, namespaces, pos): | |
42 return None, stream | |
43 | |
44 | |
45 class MsgDirective(Directive): | |
46 | |
47 __slots__ = ['params'] | |
48 | |
49 def __init__(self, value, template, hints=None, namespaces=None, | |
50 lineno=-1, offset=-1): | |
51 Directive.__init__(self, None, template, namespaces, lineno, offset) | |
52 self.params = [name.strip() for name in value.split(',')] | |
53 | |
54 def __call__(self, stream, directives, ctxt, **vars): | |
55 msgbuf = MessageBuffer(self.params) | |
56 | |
57 stream = iter(stream) | |
58 yield stream.next() # the outer start tag | |
59 previous = stream.next() | |
60 for event in stream: | |
61 msgbuf.append(*previous) | |
62 previous = event | |
63 | |
64 gettext = ctxt.get('_i18n.gettext') | |
65 for event in msgbuf.translate(gettext(msgbuf.format())): | |
66 yield event | |
67 | |
68 yield previous # the outer end tag | |
69 | |
70 | |
71 class Translator(DirectiveFactory): | |
22 """Can extract and translate localizable strings from markup streams and | 72 """Can extract and translate localizable strings from markup streams and |
23 templates. | 73 templates. |
24 | 74 |
25 For example, assume the followng template: | 75 For example, assume the followng template: |
26 | 76 |
63 <body> | 113 <body> |
64 <h1>Beispiel</h1> | 114 <h1>Beispiel</h1> |
65 <p>Hallo, Hans</p> | 115 <p>Hallo, Hans</p> |
66 </body> | 116 </body> |
67 </html> | 117 </html> |
118 | |
119 Note that elements defining ``xml:lang`` attributes that do not contain | |
120 variable expressions are ignored by this filter. That can be used to | |
121 exclude specific parts of a template from being extracted and translated. | |
68 """ | 122 """ |
123 | |
124 directives = [ | |
125 ('comment', CommentDirective), | |
126 ('msg', MsgDirective) | |
127 ] | |
69 | 128 |
70 IGNORE_TAGS = frozenset([ | 129 IGNORE_TAGS = frozenset([ |
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), | 130 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), |
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style') | 131 QName('style'), QName('http://www.w3.org/1999/xhtml}style') |
73 ]) | 132 ]) |
74 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', | 133 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', |
75 'summary', 'title']) | 134 'summary', 'title']) |
76 | 135 NAMESPACE = I18N_NAMESPACE |
77 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, | 136 |
78 include_attrs=INCLUDE_ATTRS): | 137 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, |
138 include_attrs=INCLUDE_ATTRS, extract_text=True): | |
79 """Initialize the translator. | 139 """Initialize the translator. |
80 | 140 |
81 :param translate: the translation function, for example ``gettext`` or | 141 :param translate: the translation function, for example ``gettext`` or |
82 ``ugettext``. | 142 ``ugettext``. |
83 :param ignore_tags: a set of tag names that should not be localized | 143 :param ignore_tags: a set of tag names that should not be localized |
84 :param include_attrs: a set of attribute names should be localized | 144 :param include_attrs: a set of attribute names should be localized |
145 :param extract_text: whether the content of text nodes should be | |
146 extracted, or only text in explicit ``gettext`` | |
147 function calls | |
148 | |
149 :note: Changed in 0.6: the `translate` parameter can now be either | |
150 a ``gettext``-style function, or an object compatible with the | |
151 ``NullTransalations`` or ``GNUTranslations`` interface | |
85 """ | 152 """ |
86 self.translate = translate | 153 self.translate = translate |
87 self.ignore_tags = ignore_tags | 154 self.ignore_tags = ignore_tags |
88 self.include_attrs = include_attrs | 155 self.include_attrs = include_attrs |
156 self.extract_text = extract_text | |
89 | 157 |
90 def __call__(self, stream, ctxt=None, search_text=True): | 158 def __call__(self, stream, ctxt=None, search_text=True): |
91 """Translate any localizable strings in the given stream. | 159 """Translate any localizable strings in the given stream. |
92 | 160 |
93 This function shouldn't be called directly. Instead, an instance of | 161 This function shouldn't be called directly. Instead, an instance of |
102 internally) | 170 internally) |
103 :return: the localized stream | 171 :return: the localized stream |
104 """ | 172 """ |
105 ignore_tags = self.ignore_tags | 173 ignore_tags = self.ignore_tags |
106 include_attrs = self.include_attrs | 174 include_attrs = self.include_attrs |
107 translate = self.translate | |
108 skip = 0 | 175 skip = 0 |
176 xml_lang = XML_NAMESPACE['lang'] | |
177 | |
178 if type(self.translate) is FunctionType: | |
179 gettext = self.translate | |
180 else: | |
181 gettext = self.translate.ugettext | |
182 if ctxt: | |
183 ctxt['_i18n.gettext'] = gettext | |
184 | |
185 extract_text = self.extract_text | |
186 if not extract_text: | |
187 search_text = False | |
109 | 188 |
110 for kind, data, pos in stream: | 189 for kind, data, pos in stream: |
111 | 190 |
112 # skip chunks that should not be localized | 191 # skip chunks that should not be localized |
113 if skip: | 192 if skip: |
114 if kind is START: | 193 if kind is START: |
115 tag, attrs = data | 194 skip += 1 |
116 if tag in ignore_tags: | |
117 skip += 1 | |
118 elif kind is END: | 195 elif kind is END: |
119 if tag in ignore_tags: | 196 skip -= 1 |
120 skip -= 1 | |
121 yield kind, data, pos | 197 yield kind, data, pos |
122 continue | 198 continue |
123 | 199 |
124 # handle different events that can be localized | 200 # handle different events that can be localized |
125 if kind is START: | 201 if kind is START: |
126 tag, attrs = data | 202 tag, attrs = data |
127 if tag in ignore_tags: | 203 if tag in self.ignore_tags or \ |
204 isinstance(attrs.get(xml_lang), basestring): | |
128 skip += 1 | 205 skip += 1 |
129 yield kind, data, pos | 206 yield kind, data, pos |
130 continue | 207 continue |
131 | 208 |
132 new_attrs = [] | 209 new_attrs = [] |
133 changed = False | 210 changed = False |
134 for name, value in attrs: | 211 for name, value in attrs: |
135 newval = value | 212 newval = value |
136 if isinstance(value, basestring): | 213 if extract_text and isinstance(value, basestring): |
137 if name in include_attrs: | 214 if name in include_attrs: |
138 newval = self.translate(value) | 215 newval = gettext(value) |
139 else: | 216 else: |
140 newval = list(self(_ensure(value), ctxt, | 217 newval = list(self(_ensure(value), ctxt, |
141 search_text=name in include_attrs) | 218 search_text=False) |
142 ) | 219 ) |
143 if newval != value: | 220 if newval != value: |
144 value = newval | 221 value = newval |
145 changed = True | 222 changed = True |
146 new_attrs.append((name, value)) | 223 new_attrs.append((name, value)) |
147 if changed: | 224 if changed: |
148 attrs = new_attrs | 225 attrs = Attrs(new_attrs) |
149 | 226 |
150 yield kind, (tag, attrs), pos | 227 yield kind, (tag, attrs), pos |
151 | 228 |
152 elif search_text and kind is TEXT: | 229 elif search_text and kind is TEXT: |
153 text = data.strip() | 230 text = data.strip() |
154 if text: | 231 if text: |
155 data = data.replace(text, translate(text)) | 232 data = data.replace(text, unicode(gettext(text))) |
156 yield kind, data, pos | 233 yield kind, data, pos |
157 | 234 |
158 elif kind is SUB: | 235 elif kind is SUB: |
159 subkind, substream = data | 236 directives, substream = data |
160 new_substream = list(self(substream, ctxt)) | 237 # If this is an i18n:msg directive, no need to translate text |
161 yield kind, (subkind, new_substream), pos | 238 # nodes here |
239 is_msg = filter(None, [isinstance(d, MsgDirective) | |
240 for d in directives]) | |
241 substream = list(self(substream, ctxt, | |
242 search_text=not is_msg)) | |
243 yield kind, (directives, substream), pos | |
162 | 244 |
163 else: | 245 else: |
164 yield kind, data, pos | 246 yield kind, data, pos |
165 | 247 |
166 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', | 248 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', |
167 'ugettext', 'ungettext') | 249 'ugettext', 'ungettext') |
168 | 250 |
169 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, | 251 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, |
170 search_text=True): | 252 search_text=True, msgbuf=None): |
171 """Extract localizable strings from the given template stream. | 253 """Extract localizable strings from the given template stream. |
172 | 254 |
173 For every string found, this function yields a ``(lineno, function, | 255 For every string found, this function yields a ``(lineno, function, |
174 message)`` tuple, where: | 256 message, comments)`` tuple, where: |
175 | 257 |
176 * ``lineno`` is the number of the line on which the string was found, | 258 * ``lineno`` is the number of the line on which the string was found, |
177 * ``function`` is the name of the ``gettext`` function used (if the | 259 * ``function`` is the name of the ``gettext`` function used (if the |
178 string was extracted from embedded Python code), and | 260 string was extracted from embedded Python code), and |
179 * ``message`` is the string itself (a ``unicode`` object, or a tuple | 261 * ``message`` is the string itself (a ``unicode`` object, or a tuple |
180 of ``unicode`` objects for functions with multiple string arguments). | 262 of ``unicode`` objects for functions with multiple string |
263 arguments). | |
264 * ``comments`` is a list of comments related to the message, extracted | |
265 from ``i18n:comment`` attributes found in the markup | |
181 | 266 |
182 >>> from genshi.template import MarkupTemplate | 267 >>> from genshi.template import MarkupTemplate |
183 >>> | 268 >>> |
184 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> | 269 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> |
185 ... <head> | 270 ... <head> |
190 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> | 275 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> |
191 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> | 276 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> |
192 ... </body> | 277 ... </body> |
193 ... </html>''', filename='example.html') | 278 ... </html>''', filename='example.html') |
194 >>> | 279 >>> |
195 >>> for lineno, funcname, message in Translator().extract(tmpl.stream): | 280 >>> for line, func, msg, comments in Translator().extract(tmpl.stream): |
196 ... print "%d, %r, %r" % (lineno, funcname, message) | 281 ... print "%d, %r, %r" % (line, func, msg) |
197 3, None, u'Example' | 282 3, None, u'Example' |
198 6, None, u'Example' | 283 6, None, u'Example' |
199 7, '_', u'Hello, %(name)s' | 284 7, '_', u'Hello, %(name)s' |
200 8, 'ngettext', (u'You have %d item', u'You have %d items') | 285 8, 'ngettext', (u'You have %d item', u'You have %d items', None) |
201 | 286 |
202 :param stream: the event stream to extract strings from; can be a | 287 :param stream: the event stream to extract strings from; can be a |
203 regular stream or a template stream | 288 regular stream or a template stream |
204 :param gettext_functions: a sequence of function names that should be | 289 :param gettext_functions: a sequence of function names that should be |
205 treated as gettext-style localization | 290 treated as gettext-style localization |
208 extracted (used internally) | 293 extracted (used internally) |
209 | 294 |
210 :note: Changed in 0.4.1: For a function with multiple string arguments | 295 :note: Changed in 0.4.1: For a function with multiple string arguments |
211 (such as ``ngettext``), a single item with a tuple of strings is | 296 (such as ``ngettext``), a single item with a tuple of strings is |
212 yielded, instead an item for each string argument. | 297 yielded, instead an item for each string argument. |
213 """ | 298 :note: Changed in 0.6: The returned tuples now include a 4th element, |
214 tagname = None | 299 which is a list of comments for the translator |
300 """ | |
301 if not self.extract_text: | |
302 search_text = False | |
215 skip = 0 | 303 skip = 0 |
304 i18n_comment = I18N_NAMESPACE['comment'] | |
305 i18n_msg = I18N_NAMESPACE['msg'] | |
306 xml_lang = XML_NAMESPACE['lang'] | |
216 | 307 |
217 for kind, data, pos in stream: | 308 for kind, data, pos in stream: |
309 | |
218 if skip: | 310 if skip: |
219 if kind is START: | 311 if kind is START: |
220 tag, attrs = data | 312 skip += 1 |
221 if tag in self.ignore_tags: | |
222 skip += 1 | |
223 if kind is END: | 313 if kind is END: |
224 tag = data | 314 skip -= 1 |
225 if tag in self.ignore_tags: | 315 |
226 skip -= 1 | 316 if kind is START and not skip: |
227 continue | |
228 | |
229 if kind is START: | |
230 tag, attrs = data | 317 tag, attrs = data |
231 if tag in self.ignore_tags: | 318 |
319 if tag in self.ignore_tags or \ | |
320 isinstance(attrs.get(xml_lang), basestring): | |
232 skip += 1 | 321 skip += 1 |
233 continue | 322 continue |
234 | 323 |
235 for name, value in attrs: | 324 for name, value in attrs: |
236 if isinstance(value, basestring): | 325 if search_text and isinstance(value, basestring): |
237 if name in self.include_attrs: | 326 if name in self.include_attrs: |
238 text = value.strip() | 327 text = value.strip() |
239 if text: | 328 if text: |
240 yield pos[1], None, text | 329 yield pos[1], None, text, [] |
241 else: | 330 else: |
242 for lineno, funcname, text in self.extract( | 331 for lineno, funcname, text, comments in self.extract( |
243 _ensure(value), gettext_functions, | 332 _ensure(value), gettext_functions, |
244 search_text=name in self.include_attrs): | 333 search_text=False): |
245 yield lineno, funcname, text | 334 yield lineno, funcname, text, comments |
246 | 335 |
247 elif search_text and kind is TEXT: | 336 if msgbuf: |
248 text = data.strip() | 337 msgbuf.append(kind, data, pos) |
249 if text and filter(None, [ch.isalpha() for ch in text]): | 338 else: |
250 yield pos[1], None, text | 339 msg_params = attrs.get(i18n_msg) |
340 if msg_params is not None: | |
341 if type(msg_params) is list: # event tuple | |
342 msg_params = msg_params[0][1] | |
343 msgbuf = MessageBuffer( | |
344 msg_params, attrs.get(i18n_comment), pos[1] | |
345 ) | |
346 | |
347 elif not skip and search_text and kind is TEXT: | |
348 if not msgbuf: | |
349 text = data.strip() | |
350 if text and filter(None, [ch.isalpha() for ch in text]): | |
351 yield pos[1], None, text, [] | |
352 else: | |
353 msgbuf.append(kind, data, pos) | |
354 | |
355 elif not skip and msgbuf and kind is END: | |
356 msgbuf.append(kind, data, pos) | |
357 if not msgbuf.depth: | |
358 yield msgbuf.lineno, None, msgbuf.format(), \ | |
359 filter(None, [msgbuf.comment]) | |
360 msgbuf = None | |
251 | 361 |
252 elif kind is EXPR or kind is EXEC: | 362 elif kind is EXPR or kind is EXEC: |
253 consts = dict([(n, chr(i) + '\x00') for i, n in | 363 if msgbuf: |
254 enumerate(data.code.co_consts)]) | 364 msgbuf.append(kind, data, pos) |
255 gettext_locs = [consts[n] for n in gettext_functions | 365 for funcname, strings in extract_from_code(data, |
256 if n in consts] | 366 gettext_functions): |
257 ops = [ | 367 yield pos[1], funcname, strings, [] |
258 _LOAD_CONST, '(', '|'.join(gettext_locs), ')', | |
259 _CALL_FUNCTION, '.\x00', | |
260 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' | |
261 ] | |
262 for loc, opcodes in re.findall(''.join(ops), data.code.co_code): | |
263 funcname = data.code.co_consts[ord(loc[0])] | |
264 strings = [] | |
265 opcodes = iter(opcodes) | |
266 for opcode in opcodes: | |
267 if opcode == _BINARY_ADD: | |
268 arg = strings.pop() | |
269 strings[-1] += arg | |
270 else: | |
271 arg = data.code.co_consts[ord(opcodes.next())] | |
272 opcodes.next() # skip second byte | |
273 if not isinstance(arg, basestring): | |
274 break | |
275 strings.append(unicode(arg)) | |
276 if len(strings) == 1: | |
277 strings = strings[0] | |
278 else: | |
279 strings = tuple(strings) | |
280 yield pos[1], funcname, strings | |
281 | 368 |
282 elif kind is SUB: | 369 elif kind is SUB: |
283 subkind, substream = data | 370 subkind, substream = data |
284 for lineno, funcname, text in self.extract(substream, | 371 messages = self.extract(substream, gettext_functions, |
285 gettext_functions): | 372 search_text=search_text and not skip, |
286 yield lineno, funcname, text | 373 msgbuf=msgbuf) |
374 for lineno, funcname, text, comments in messages: | |
375 yield lineno, funcname, text, comments | |
376 | |
377 | |
378 class MessageBuffer(object): | |
379 """Helper class for managing internationalized mixed content. | |
380 | |
381 :since: version 0.5 | |
382 """ | |
383 | |
384 def __init__(self, params=u'', comment=None, lineno=-1): | |
385 """Initialize the message buffer. | |
386 | |
387 :param params: comma-separated list of parameter names | |
388 :type params: `basestring` | |
389 :param lineno: the line number on which the first stream event | |
390 belonging to the message was found | |
391 """ | |
392 if isinstance(params, basestring): | |
393 params = [name.strip() for name in params.split(',')] | |
394 self.params = params | |
395 self.comment = comment | |
396 self.lineno = lineno | |
397 self.string = [] | |
398 self.events = {} | |
399 self.values = {} | |
400 self.depth = 1 | |
401 self.order = 1 | |
402 self.stack = [0] | |
403 | |
404 def append(self, kind, data, pos): | |
405 """Append a stream event to the buffer. | |
406 | |
407 :param kind: the stream event kind | |
408 :param data: the event data | |
409 :param pos: the position of the event in the source | |
410 """ | |
411 if kind is TEXT: | |
412 self.string.append(data) | |
413 self.events.setdefault(self.stack[-1], []).append(None) | |
414 elif kind is EXPR: | |
415 param = self.params.pop(0) | |
416 self.string.append('%%(%s)s' % param) | |
417 self.events.setdefault(self.stack[-1], []).append(None) | |
418 self.values[param] = (kind, data, pos) | |
419 else: | |
420 if kind is START: | |
421 self.string.append(u'[%d:' % self.order) | |
422 self.events.setdefault(self.order, []).append((kind, data, pos)) | |
423 self.stack.append(self.order) | |
424 self.depth += 1 | |
425 self.order += 1 | |
426 elif kind is END: | |
427 self.depth -= 1 | |
428 if self.depth: | |
429 self.events[self.stack[-1]].append((kind, data, pos)) | |
430 self.string.append(u']') | |
431 self.stack.pop() | |
432 | |
433 def format(self): | |
434 """Return a message identifier representing the content in the | |
435 buffer. | |
436 """ | |
437 return u''.join(self.string).strip() | |
438 | |
439 def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): | |
440 """Interpolate the given message translation with the events in the | |
441 buffer and return the translated stream. | |
442 | |
443 :param string: the translated message string | |
444 """ | |
445 parts = parse_msg(string) | |
446 for order, string in parts: | |
447 events = self.events[order] | |
448 while events: | |
449 event = events.pop(0) | |
450 if event: | |
451 yield event | |
452 else: | |
453 if not string: | |
454 break | |
455 for idx, part in enumerate(regex.split(string)): | |
456 if idx % 2: | |
457 yield self.values[part] | |
458 elif part: | |
459 yield TEXT, part, (None, -1, -1) | |
460 if not self.events[order] or not self.events[order][0]: | |
461 break | |
462 | |
463 | |
464 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')): | |
465 """Parse a translated message using Genshi mixed content message | |
466 formatting. | |
467 | |
468 >>> parse_msg("See [1:Help].") | |
469 [(0, 'See '), (1, 'Help'), (0, '.')] | |
470 | |
471 >>> parse_msg("See [1:our [2:Help] page] for details.") | |
472 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] | |
473 | |
474 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") | |
475 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] | |
476 | |
477 >>> parse_msg("[1:] Bilder pro Seite anzeigen.") | |
478 [(1, ''), (0, ' Bilder pro Seite anzeigen.')] | |
479 | |
480 :param string: the translated message string | |
481 :return: a list of ``(order, string)`` tuples | |
482 :rtype: `list` | |
483 """ | |
484 parts = [] | |
485 stack = [0] | |
486 while True: | |
487 mo = regex.search(string) | |
488 if not mo: | |
489 break | |
490 | |
491 if mo.start() or stack[-1]: | |
492 parts.append((stack[-1], string[:mo.start()])) | |
493 string = string[mo.end():] | |
494 | |
495 orderno = mo.group(1) | |
496 if orderno is not None: | |
497 stack.append(int(orderno)) | |
498 else: | |
499 stack.pop() | |
500 if not stack: | |
501 break | |
502 | |
503 if string: | |
504 parts.append((stack[-1], string)) | |
505 | |
506 return parts | |
507 | |
508 | |
509 def extract_from_code(code, gettext_functions): | |
510 """Extract strings from Python bytecode. | |
511 | |
512 >>> from genshi.template.eval import Expression | |
513 | |
514 >>> expr = Expression('_("Hello")') | |
515 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) | |
516 [('_', u'Hello')] | |
517 | |
518 >>> expr = Expression('ngettext("You have %(num)s item", ' | |
519 ... '"You have %(num)s items", num)') | |
520 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) | |
521 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] | |
522 | |
523 :param code: the `Code` object | |
524 :type code: `genshi.template.eval.Code` | |
525 :param gettext_functions: a sequence of function names | |
526 :since: version 0.5 | |
527 """ | |
528 def _walk(node): | |
529 if isinstance(node, _ast.Call) and isinstance(node.func, _ast.Name) \ | |
530 and node.func.id in gettext_functions: | |
531 strings = [] | |
532 def _add(arg): | |
533 if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring): | |
534 strings.append(unicode(arg.s, 'utf-8')) | |
535 elif arg: | |
536 strings.append(None) | |
537 [_add(arg) for arg in node.args] | |
538 _add(node.starargs) | |
539 _add(node.kwargs) | |
540 if len(strings) == 1: | |
541 strings = strings[0] | |
542 else: | |
543 strings = tuple(strings) | |
544 yield node.func.id, strings | |
545 elif node._fields: | |
546 children = [] | |
547 for field in node._fields: | |
548 child = getattr(node, field, None) | |
549 if isinstance(child, list): | |
550 for elem in child: | |
551 children.append(elem) | |
552 elif isinstance(child, _ast.AST): | |
553 children.append(child) | |
554 for child in children: | |
555 for funcname, strings in _walk(child): | |
556 yield funcname, strings | |
557 return _walk(code.ast) | |
558 | |
559 | |
560 def extract(fileobj, keywords, comment_tags, options): | |
561 """Babel extraction method for Genshi templates. | |
562 | |
563 :param fileobj: the file-like object the messages should be extracted from | |
564 :param keywords: a list of keywords (i.e. function names) that should be | |
565 recognized as translation functions | |
566 :param comment_tags: a list of translator tags to search for and include | |
567 in the results | |
568 :param options: a dictionary of additional options (optional) | |
569 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples | |
570 :rtype: ``iterator`` | |
571 """ | |
572 template_class = options.get('template_class', MarkupTemplate) | |
573 if isinstance(template_class, basestring): | |
574 module, clsname = template_class.split(':', 1) | |
575 template_class = getattr(__import__(module, {}, {}, [clsname]), clsname) | |
576 encoding = options.get('encoding', None) | |
577 | |
578 extract_text = options.get('extract_text', True) | |
579 if isinstance(extract_text, basestring): | |
580 extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true') | |
581 | |
582 ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS) | |
583 if isinstance(ignore_tags, basestring): | |
584 ignore_tags = ignore_tags.split() | |
585 ignore_tags = [QName(tag) for tag in ignore_tags] | |
586 | |
587 include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS) | |
588 if isinstance(include_attrs, basestring): | |
589 include_attrs = include_attrs.split() | |
590 include_attrs = [QName(attr) for attr in include_attrs] | |
591 | |
592 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), | |
593 encoding=encoding) | |
594 translator = Translator(None, ignore_tags, include_attrs, extract_text) | |
595 for message in translator.extract(tmpl.stream, gettext_functions=keywords): | |
596 yield message |