comparison genshi/filters/i18n.py @ 902:09cc3627654c experimental-inline

Sync `experimental/inline` branch with [source:trunk@1126].
author cmlenz
date Fri, 23 Apr 2010 21:08:26 +0000
parents 1837f39efd6f
children
comparison
equal deleted inserted replaced
830:de82830f8816 902:09cc3627654c
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 # 2 #
3 # Copyright (C) 2007 Edgewall Software 3 # Copyright (C) 2007-2010 Edgewall Software
4 # All rights reserved. 4 # All rights reserved.
5 # 5 #
6 # This software is licensed as described in the file COPYING, which 6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms 7 # you should have received as part of this distribution. The terms
8 # are also available at http://genshi.edgewall.org/wiki/License. 8 # are also available at http://genshi.edgewall.org/wiki/License.
9 # 9 #
10 # This software consists of voluntary contributions made by many 10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision 11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://genshi.edgewall.org/log/. 12 # history and logs, available at http://genshi.edgewall.org/log/.
13 13
14 """Utilities for internationalization and localization of templates. 14 """Directives and utilities for internationalization and localization of
15 templates.
15 16
16 :since: version 0.4 17 :since: version 0.4
18 :note: Directives support added since version 0.6
17 """ 19 """
18 20
21 try:
22 any
23 except NameError:
24 from genshi.util import any
19 from gettext import NullTranslations 25 from gettext import NullTranslations
26 import os
20 import re 27 import re
21 from types import FunctionType 28 from types import FunctionType
22 29
23 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ 30 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
24 END_NS, XML_NAMESPACE, _ensure 31 XML_NAMESPACE, _ensure, StreamEventKind
25 from genshi.template.eval import _ast 32 from genshi.template.eval import _ast
26 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives 33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
27 from genshi.template.directives import Directive 34 from genshi.template.directives import Directive, StripDirective
28 from genshi.template.markup import MarkupTemplate, EXEC 35 from genshi.template.markup import MarkupTemplate, EXEC
29 36
30 __all__ = ['Translator', 'extract'] 37 __all__ = ['Translator', 'extract']
31 __docformat__ = 'restructuredtext en' 38 __docformat__ = 'restructuredtext en'
32 39
40
33 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') 41 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
34 42
35 43 MSGBUF = StreamEventKind('MSGBUF')
36 class CommentDirective(Directive): 44 SUB_START = StreamEventKind('SUB_START')
37 45 SUB_END = StreamEventKind('SUB_END')
38 __slots__ = [] 46
47 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
48 'ugettext', 'ungettext')
49
50
51 class I18NDirective(Directive):
52 """Simple interface for i18n directives to support messages extraction."""
53
54 def __call__(self, stream, directives, ctxt, **vars):
55 return _apply_directives(stream, directives, ctxt, vars)
56
57
58 class ExtractableI18NDirective(I18NDirective):
59 """Simple interface for directives to support messages extraction."""
60
61 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
62 search_text=True, comment_stack=None):
63 raise NotImplementedError
64
65
66 class CommentDirective(I18NDirective):
67 """Implementation of the ``i18n:comment`` template directive which adds
68 translation comments.
69
70 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
71 ... <p i18n:comment="As in Foo Bar">Foo</p>
72 ... </html>''')
73 >>> translator = Translator()
74 >>> translator.setup(tmpl)
75 >>> list(translator.extract(tmpl.stream))
76 [(2, None, u'Foo', [u'As in Foo Bar'])]
77 """
78 __slots__ = ['comment']
79
80 def __init__(self, value, template=None, namespaces=None, lineno=-1,
81 offset=-1):
82 Directive.__init__(self, None, template, namespaces, lineno, offset)
83 self.comment = value
84
85
86 class MsgDirective(ExtractableI18NDirective):
87 r"""Implementation of the ``i18n:msg`` directive which marks inner content
88 as translatable. Consider the following examples:
89
90 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
91 ... <div i18n:msg="">
92 ... <p>Foo</p>
93 ... <p>Bar</p>
94 ... </div>
95 ... <p i18n:msg="">Foo <em>bar</em>!</p>
96 ... </html>''')
97
98 >>> translator = Translator()
99 >>> translator.setup(tmpl)
100 >>> list(translator.extract(tmpl.stream))
101 [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
102 >>> print(tmpl.generate().render())
103 <html>
104 <div><p>Foo</p>
105 <p>Bar</p></div>
106 <p>Foo <em>bar</em>!</p>
107 </html>
108
109 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
110 ... <div i18n:msg="fname, lname">
111 ... <p>First Name: ${fname}</p>
112 ... <p>Last Name: ${lname}</p>
113 ... </div>
114 ... <p i18n:msg="">Foo <em>bar</em>!</p>
115 ... </html>''')
116 >>> translator.setup(tmpl)
117 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
118 [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []),
119 (6, None, u'Foo [1:bar]!', [])]
120
121 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
122 ... <div i18n:msg="fname, lname">
123 ... <p>First Name: ${fname}</p>
124 ... <p>Last Name: ${lname}</p>
125 ... </div>
126 ... <p i18n:msg="">Foo <em>bar</em>!</p>
127 ... </html>''')
128 >>> translator.setup(tmpl)
129 >>> print(tmpl.generate(fname='John', lname='Doe').render())
130 <html>
131 <div><p>First Name: John</p>
132 <p>Last Name: Doe</p></div>
133 <p>Foo <em>bar</em>!</p>
134 </html>
135
136 Starting and ending white-space is stripped of to make it simpler for
137 translators. Stripping it is not that important since it's on the html
138 source, the rendered output will remain the same.
139 """
140 __slots__ = ['params', 'lineno']
141
142 def __init__(self, value, template=None, namespaces=None, lineno=-1,
143 offset=-1):
144 Directive.__init__(self, None, template, namespaces, lineno, offset)
145 self.params = [param.strip() for param in value.split(',') if param]
146 self.lineno = lineno
39 147
40 @classmethod 148 @classmethod
41 def attach(cls, template, stream, value, namespaces, pos): 149 def attach(cls, template, stream, value, namespaces, pos):
42 return None, stream 150 if type(value) is dict:
43 151 value = value.get('params', '').strip()
44 152 return super(MsgDirective, cls).attach(template, stream, value.strip(),
45 class MsgDirective(Directive): 153 namespaces, pos)
46 154
155 def __call__(self, stream, directives, ctxt, **vars):
156 gettext = ctxt.get('_i18n.gettext')
157 if ctxt.get('_i18n.domain'):
158 dgettext = ctxt.get('_i18n.dgettext')
159 assert hasattr(dgettext, '__call__'), \
160 'No domain gettext function passed'
161 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
162
163 def _generate():
164 msgbuf = MessageBuffer(self)
165 previous = stream.next()
166 if previous[0] is START:
167 yield previous
168 else:
169 msgbuf.append(*previous)
170 previous = stream.next()
171 for kind, data, pos in stream:
172 msgbuf.append(*previous)
173 previous = kind, data, pos
174 if previous[0] is not END:
175 msgbuf.append(*previous)
176 previous = None
177 for event in msgbuf.translate(gettext(msgbuf.format())):
178 yield event
179 if previous:
180 yield previous
181
182 return _apply_directives(_generate(), directives, ctxt, vars)
183
184 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
185 search_text=True, comment_stack=None):
186 msgbuf = MessageBuffer(self)
187 strip = False
188
189 stream = iter(stream)
190 previous = stream.next()
191 if previous[0] is START:
192 for message in translator._extract_attrs(previous,
193 gettext_functions,
194 search_text=search_text):
195 yield message
196 previous = stream.next()
197 strip = True
198 for event in stream:
199 if event[0] is START:
200 for message in translator._extract_attrs(event,
201 gettext_functions,
202 search_text=search_text):
203 yield message
204 msgbuf.append(*previous)
205 previous = event
206 if not strip:
207 msgbuf.append(*previous)
208
209 yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
210
211
212 class ChooseBranchDirective(I18NDirective):
47 __slots__ = ['params'] 213 __slots__ = ['params']
48 214
49 def __init__(self, value, template, hints=None, namespaces=None,
50 lineno=-1, offset=-1):
51 Directive.__init__(self, None, template, namespaces, lineno, offset)
52 self.params = [name.strip() for name in value.split(',')]
53
54 def __call__(self, stream, directives, ctxt, **vars): 215 def __call__(self, stream, directives, ctxt, **vars):
55 msgbuf = MessageBuffer(self.params) 216 self.params = ctxt.get('_i18n.choose.params', [])[:]
56 217 msgbuf = MessageBuffer(self)
57 stream = iter(stream) 218 stream = _apply_directives(stream, directives, ctxt, vars)
58 yield stream.next() # the outer start tag 219
59 previous = stream.next() 220 previous = stream.next()
221 if previous[0] is START:
222 yield previous
223 else:
224 msgbuf.append(*previous)
225
226 try:
227 previous = stream.next()
228 except StopIteration:
229 # For example <i18n:singular> or <i18n:plural> directives
230 yield MSGBUF, (), -1 # the place holder for msgbuf output
231 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
232 return
233
60 for event in stream: 234 for event in stream:
61 msgbuf.append(*previous) 235 msgbuf.append(*previous)
62 previous = event 236 previous = event
63 237 yield MSGBUF, (), -1 # the place holder for msgbuf output
64 gettext = ctxt.get('_i18n.gettext') 238
65 for event in msgbuf.translate(gettext(msgbuf.format())): 239 if previous[0] is END:
240 yield previous # the outer end tag
241 else:
242 msgbuf.append(*previous)
243 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
244
245 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
246 search_text=True, comment_stack=None, msgbuf=None):
247 stream = iter(stream)
248 previous = stream.next()
249
250 if previous[0] is START:
251 # skip the enclosing element
252 for message in translator._extract_attrs(previous,
253 gettext_functions,
254 search_text=search_text):
255 yield message
256 previous = stream.next()
257
258 for event in stream:
259 if previous[0] is START:
260 for message in translator._extract_attrs(previous,
261 gettext_functions,
262 search_text=search_text):
263 yield message
264 msgbuf.append(*previous)
265 previous = event
266
267 if previous[0] is not END:
268 msgbuf.append(*previous)
269
270
271 class SingularDirective(ChooseBranchDirective):
272 """Implementation of the ``i18n:singular`` directive to be used with the
273 ``i18n:choose`` directive."""
274
275
276 class PluralDirective(ChooseBranchDirective):
277 """Implementation of the ``i18n:plural`` directive to be used with the
278 ``i18n:choose`` directive."""
279
280
281 class ChooseDirective(ExtractableI18NDirective):
282 """Implementation of the ``i18n:choose`` directive which provides plural
283 internationalisation of strings.
284
285 This directive requires at least one parameter, the one which evaluates to
286 an integer which will allow to choose the plural/singular form. If you also
287 have expressions inside the singular and plural version of the string you
288 also need to pass a name for those parameters. Consider the following
289 examples:
290
291 >>> tmpl = MarkupTemplate('''\
292 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
293 ... <div i18n:choose="num; num">
294 ... <p i18n:singular="">There is $num coin</p>
295 ... <p i18n:plural="">There are $num coins</p>
296 ... </div>
297 ... </html>''')
298 >>> translator = Translator()
299 >>> translator.setup(tmpl)
300 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
301 [(2, 'ngettext', (u'There is %(num)s coin',
302 u'There are %(num)s coins'), [])]
303
304 >>> tmpl = MarkupTemplate('''\
305 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
306 ... <div i18n:choose="num; num">
307 ... <p i18n:singular="">There is $num coin</p>
308 ... <p i18n:plural="">There are $num coins</p>
309 ... </div>
310 ... </html>''')
311 >>> translator.setup(tmpl)
312 >>> print(tmpl.generate(num=1).render())
313 <html>
314 <div>
315 <p>There is 1 coin</p>
316 </div>
317 </html>
318 >>> print(tmpl.generate(num=2).render())
319 <html>
320 <div>
321 <p>There are 2 coins</p>
322 </div>
323 </html>
324
325 When used as a element and not as an attribute:
326
327 >>> tmpl = MarkupTemplate('''\
328 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
329 ... <i18n:choose numeral="num" params="num">
330 ... <p i18n:singular="">There is $num coin</p>
331 ... <p i18n:plural="">There are $num coins</p>
332 ... </i18n:choose>
333 ... </html>''')
334 >>> translator.setup(tmpl)
335 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
336 [(2, 'ngettext', (u'There is %(num)s coin',
337 u'There are %(num)s coins'), [])]
338 """
339 __slots__ = ['numeral', 'params', 'lineno']
340
341 def __init__(self, value, template=None, namespaces=None, lineno=-1,
342 offset=-1):
343 Directive.__init__(self, None, template, namespaces, lineno, offset)
344 params = [v.strip() for v in value.split(';')]
345 self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
346 self.params = params and [name.strip() for name in
347 params[0].split(',') if name] or []
348 self.lineno = lineno
349
350 @classmethod
351 def attach(cls, template, stream, value, namespaces, pos):
352 if type(value) is dict:
353 numeral = value.get('numeral', '').strip()
354 assert numeral is not '', "at least pass the numeral param"
355 params = [v.strip() for v in value.get('params', '').split(',')]
356 value = '%s; ' % numeral + ', '.join(params)
357 return super(ChooseDirective, cls).attach(template, stream, value,
358 namespaces, pos)
359
360 def __call__(self, stream, directives, ctxt, **vars):
361 ctxt.push({'_i18n.choose.params': self.params,
362 '_i18n.choose.singular': None,
363 '_i18n.choose.plural': None})
364
365 ngettext = ctxt.get('_i18n.ngettext')
366 assert hasattr(ngettext, '__call__'), 'No ngettext function available'
367 dngettext = ctxt.get('_i18n.dngettext')
368 if not dngettext:
369 dngettext = lambda d, s, p, n: ngettext(s, p, n)
370
371 new_stream = []
372 singular_stream = None
373 singular_msgbuf = None
374 plural_stream = None
375 plural_msgbuf = None
376
377 numeral = self.numeral.evaluate(ctxt)
378 is_plural = self._is_plural(numeral, ngettext)
379
380 for event in stream:
381 if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
382 for d in event[1][0]):
383 subdirectives, substream = event[1]
384
385 if isinstance(subdirectives[0], SingularDirective):
386 singular_stream = list(_apply_directives(substream,
387 subdirectives,
388 ctxt, vars))
389 new_stream.append((MSGBUF, None, (None, -1, -1)))
390
391 elif isinstance(subdirectives[0], PluralDirective):
392 if is_plural:
393 plural_stream = list(_apply_directives(substream,
394 subdirectives,
395 ctxt, vars))
396
397 else:
398 new_stream.append(event)
399
400 if ctxt.get('_i18n.domain'):
401 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
402 s, p, n)
403
404 singular_msgbuf = ctxt.get('_i18n.choose.singular')
405 if is_plural:
406 plural_msgbuf = ctxt.get('_i18n.choose.plural')
407 msgbuf, choice = plural_msgbuf, plural_stream
408 else:
409 msgbuf, choice = singular_msgbuf, singular_stream
410 plural_msgbuf = MessageBuffer(self)
411
412 for kind, data, pos in new_stream:
413 if kind is MSGBUF:
414 for event in choice:
415 if event[0] is MSGBUF:
416 translation = ngettext(singular_msgbuf.format(),
417 plural_msgbuf.format(),
418 numeral)
419 for subevent in msgbuf.translate(translation):
420 yield subevent
421 else:
422 yield event
423 else:
424 yield kind, data, pos
425
426 ctxt.pop()
427
428 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
429 search_text=True, comment_stack=None):
430 strip = False
431 stream = iter(stream)
432 previous = stream.next()
433
434 if previous[0] is START:
435 # skip the enclosing element
436 for message in translator._extract_attrs(previous,
437 gettext_functions,
438 search_text=search_text):
439 yield message
440 previous = stream.next()
441 strip = True
442
443 singular_msgbuf = MessageBuffer(self)
444 plural_msgbuf = MessageBuffer(self)
445
446 for event in stream:
447 if previous[0] is SUB:
448 directives, substream = previous[1]
449 for directive in directives:
450 if isinstance(directive, SingularDirective):
451 for message in directive.extract(translator,
452 substream, gettext_functions, search_text,
453 comment_stack, msgbuf=singular_msgbuf):
454 yield message
455 elif isinstance(directive, PluralDirective):
456 for message in directive.extract(translator,
457 substream, gettext_functions, search_text,
458 comment_stack, msgbuf=plural_msgbuf):
459 yield message
460 elif not isinstance(directive, StripDirective):
461 singular_msgbuf.append(*previous)
462 plural_msgbuf.append(*previous)
463 else:
464 if previous[0] is START:
465 for message in translator._extract_attrs(previous,
466 gettext_functions,
467 search_text):
468 yield message
469 singular_msgbuf.append(*previous)
470 plural_msgbuf.append(*previous)
471 previous = event
472
473 if not strip:
474 singular_msgbuf.append(*previous)
475 plural_msgbuf.append(*previous)
476
477 yield self.lineno, 'ngettext', \
478 (singular_msgbuf.format(), plural_msgbuf.format()), \
479 comment_stack[-1:]
480
481 def _is_plural(self, numeral, ngettext):
482 # XXX: should we test which form was chosen like this!?!?!?
483 # There should be no match in any catalogue for these singular and
484 # plural test strings
485 singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
486 plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
487 return ngettext(singular, plural, numeral) == plural
488
489
490 class DomainDirective(I18NDirective):
491 """Implementation of the ``i18n:domain`` directive which allows choosing
492 another i18n domain(catalog) to translate from.
493
494 >>> from genshi.filters.tests.i18n import DummyTranslations
495 >>> tmpl = MarkupTemplate('''\
496 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
497 ... <p i18n:msg="">Bar</p>
498 ... <div i18n:domain="foo">
499 ... <p i18n:msg="">FooBar</p>
500 ... <p>Bar</p>
501 ... <p i18n:domain="bar" i18n:msg="">Bar</p>
502 ... <p i18n:domain="">Bar</p>
503 ... </div>
504 ... <p>Bar</p>
505 ... </html>''')
506
507 >>> translations = DummyTranslations({'Bar': 'Voh'})
508 >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
509 >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
510 >>> translator = Translator(translations)
511 >>> translator.setup(tmpl)
512
513 >>> print(tmpl.generate().render())
514 <html>
515 <p>Voh</p>
516 <div>
517 <p>BarFoo</p>
518 <p>foo_Bar</p>
519 <p>bar_Bar</p>
520 <p>Voh</p>
521 </div>
522 <p>Voh</p>
523 </html>
524 """
525 __slots__ = ['domain']
526
527 def __init__(self, value, template=None, namespaces=None, lineno=-1,
528 offset=-1):
529 Directive.__init__(self, None, template, namespaces, lineno, offset)
530 self.domain = value and value.strip() or '__DEFAULT__'
531
532 @classmethod
533 def attach(cls, template, stream, value, namespaces, pos):
534 if type(value) is dict:
535 value = value.get('name')
536 return super(DomainDirective, cls).attach(template, stream, value,
537 namespaces, pos)
538
539 def __call__(self, stream, directives, ctxt, **vars):
540 ctxt.push({'_i18n.domain': self.domain})
541 for event in _apply_directives(stream, directives, ctxt, vars):
66 yield event 542 yield event
67 543 ctxt.pop()
68 yield previous # the outer end tag
69 544
70 545
71 class Translator(DirectiveFactory): 546 class Translator(DirectiveFactory):
72 """Can extract and translate localizable strings from markup streams and 547 """Can extract and translate localizable strings from markup streams and
73 templates. 548 templates.
74 549
75 For example, assume the followng template: 550 For example, assume the following template:
76 551
77 >>> from genshi.template import MarkupTemplate
78 >>>
79 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 552 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
80 ... <head> 553 ... <head>
81 ... <title>Example</title> 554 ... <title>Example</title>
82 ... </head> 555 ... </head>
83 ... <body> 556 ... <body>
92 >>> def pseudo_gettext(string): 565 >>> def pseudo_gettext(string):
93 ... return { 566 ... return {
94 ... 'Example': 'Beispiel', 567 ... 'Example': 'Beispiel',
95 ... 'Hello, %(name)s': 'Hallo, %(name)s' 568 ... 'Hello, %(name)s': 'Hallo, %(name)s'
96 ... }[string] 569 ... }[string]
97 >>>
98 >>> translator = Translator(pseudo_gettext) 570 >>> translator = Translator(pseudo_gettext)
99 571
100 Next, the translator needs to be prepended to any already defined filters 572 Next, the translator needs to be prepended to any already defined filters
101 on the template: 573 on the template:
102 574
103 >>> tmpl.filters.insert(0, translator) 575 >>> tmpl.filters.insert(0, translator)
104 576
105 When generating the template output, our hard-coded translations should be 577 When generating the template output, our hard-coded translations should be
106 applied as expected: 578 applied as expected:
107 579
108 >>> print tmpl.generate(username='Hans', _=pseudo_gettext) 580 >>> print(tmpl.generate(username='Hans', _=pseudo_gettext))
109 <html> 581 <html>
110 <head> 582 <head>
111 <title>Beispiel</title> 583 <title>Beispiel</title>
112 </head> 584 </head>
113 <body> 585 <body>
114 <h1>Beispiel</h1> 586 <h1>Beispiel</h1>
115 <p>Hallo, Hans</p> 587 <p>Hallo, Hans</p>
116 </body> 588 </body>
117 </html> 589 </html>
118 590
119 Note that elements defining ``xml:lang`` attributes that do not contain 591 Note that elements defining ``xml:lang`` attributes that do not contain
120 variable expressions are ignored by this filter. That can be used to 592 variable expressions are ignored by this filter. That can be used to
121 exclude specific parts of a template from being extracted and translated. 593 exclude specific parts of a template from being extracted and translated.
122 """ 594 """
123 595
124 directives = [ 596 directives = [
597 ('domain', DomainDirective),
125 ('comment', CommentDirective), 598 ('comment', CommentDirective),
126 ('msg', MsgDirective) 599 ('msg', MsgDirective),
600 ('choose', ChooseDirective),
601 ('singular', SingularDirective),
602 ('plural', PluralDirective)
127 ] 603 ]
128 604
129 IGNORE_TAGS = frozenset([ 605 IGNORE_TAGS = frozenset([
130 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), 606 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
131 QName('style'), QName('http://www.w3.org/1999/xhtml}style') 607 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
132 ]) 608 ])
133 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', 609 INCLUDE_ATTRS = frozenset([
134 'summary', 'title']) 610 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
611 ])
135 NAMESPACE = I18N_NAMESPACE 612 NAMESPACE = I18N_NAMESPACE
136 613
137 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, 614 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
138 include_attrs=INCLUDE_ATTRS, extract_text=True): 615 include_attrs=INCLUDE_ATTRS, extract_text=True):
139 """Initialize the translator. 616 """Initialize the translator.
143 :param ignore_tags: a set of tag names that should not be localized 620 :param ignore_tags: a set of tag names that should not be localized
144 :param include_attrs: a set of attribute names should be localized 621 :param include_attrs: a set of attribute names should be localized
145 :param extract_text: whether the content of text nodes should be 622 :param extract_text: whether the content of text nodes should be
146 extracted, or only text in explicit ``gettext`` 623 extracted, or only text in explicit ``gettext``
147 function calls 624 function calls
148 625
149 :note: Changed in 0.6: the `translate` parameter can now be either 626 :note: Changed in 0.6: the `translate` parameter can now be either
150 a ``gettext``-style function, or an object compatible with the 627 a ``gettext``-style function, or an object compatible with the
151 ``NullTransalations`` or ``GNUTranslations`` interface 628 ``NullTransalations`` or ``GNUTranslations`` interface
152 """ 629 """
153 self.translate = translate 630 self.translate = translate
154 self.ignore_tags = ignore_tags 631 self.ignore_tags = ignore_tags
155 self.include_attrs = include_attrs 632 self.include_attrs = include_attrs
156 self.extract_text = extract_text 633 self.extract_text = extract_text
157 634
158 def __call__(self, stream, ctxt=None, search_text=True): 635 def __call__(self, stream, ctxt=None, translate_text=True,
636 translate_attrs=True):
159 """Translate any localizable strings in the given stream. 637 """Translate any localizable strings in the given stream.
160 638
161 This function shouldn't be called directly. Instead, an instance of 639 This function shouldn't be called directly. Instead, an instance of
162 the `Translator` class should be registered as a filter with the 640 the `Translator` class should be registered as a filter with the
163 `Template` or the `TemplateLoader`, or applied as a regular stream 641 `Template` or the `TemplateLoader`, or applied as a regular stream
164 filter. If used as a template filter, it should be inserted in front of 642 filter. If used as a template filter, it should be inserted in front of
165 all the default filters. 643 all the default filters.
166 644
167 :param stream: the markup event stream 645 :param stream: the markup event stream
168 :param ctxt: the template context (not used) 646 :param ctxt: the template context (not used)
169 :param search_text: whether text nodes should be translated (used 647 :param translate_text: whether text nodes should be translated (used
170 internally) 648 internally)
649 :param translate_attrs: whether attribute values should be translated
650 (used internally)
171 :return: the localized stream 651 :return: the localized stream
172 """ 652 """
173 ignore_tags = self.ignore_tags 653 ignore_tags = self.ignore_tags
174 include_attrs = self.include_attrs 654 include_attrs = self.include_attrs
175 skip = 0 655 skip = 0
176 xml_lang = XML_NAMESPACE['lang'] 656 xml_lang = XML_NAMESPACE['lang']
657 if not self.extract_text:
658 translate_text = False
659 translate_attrs = False
177 660
178 if type(self.translate) is FunctionType: 661 if type(self.translate) is FunctionType:
179 gettext = self.translate 662 gettext = self.translate
663 if ctxt:
664 ctxt['_i18n.gettext'] = gettext
180 else: 665 else:
181 gettext = self.translate.ugettext 666 gettext = self.translate.ugettext
182 if ctxt: 667 ngettext = self.translate.ungettext
183 ctxt['_i18n.gettext'] = gettext 668 try:
184 669 dgettext = self.translate.dugettext
185 extract_text = self.extract_text 670 dngettext = self.translate.dungettext
186 if not extract_text: 671 except AttributeError:
187 search_text = False 672 dgettext = lambda _, y: gettext(y)
673 dngettext = lambda _, s, p, n: ngettext(s, p, n)
674 if ctxt:
675 ctxt['_i18n.gettext'] = gettext
676 ctxt['_i18n.ngettext'] = ngettext
677 ctxt['_i18n.dgettext'] = dgettext
678 ctxt['_i18n.dngettext'] = dngettext
679
680 if ctxt and ctxt.get('_i18n.domain'):
681 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
188 682
189 for kind, data, pos in stream: 683 for kind, data, pos in stream:
190 684
191 # skip chunks that should not be localized 685 # skip chunks that should not be localized
192 if skip: 686 if skip:
206 yield kind, data, pos 700 yield kind, data, pos
207 continue 701 continue
208 702
209 new_attrs = [] 703 new_attrs = []
210 changed = False 704 changed = False
705
211 for name, value in attrs: 706 for name, value in attrs:
212 newval = value 707 newval = value
213 if extract_text and isinstance(value, basestring): 708 if isinstance(value, basestring):
214 if name in include_attrs: 709 if translate_attrs and name in include_attrs:
215 newval = gettext(value) 710 newval = gettext(value)
216 else: 711 else:
217 newval = list(self(_ensure(value), ctxt, 712 newval = list(
218 search_text=False) 713 self(_ensure(value), ctxt, translate_text=False)
219 ) 714 )
220 if newval != value: 715 if newval != value:
221 value = newval 716 value = newval
222 changed = True 717 changed = True
223 new_attrs.append((name, value)) 718 new_attrs.append((name, value))
224 if changed: 719 if changed:
225 attrs = Attrs(new_attrs) 720 attrs = Attrs(new_attrs)
226 721
227 yield kind, (tag, attrs), pos 722 yield kind, (tag, attrs), pos
228 723
229 elif search_text and kind is TEXT: 724 elif translate_text and kind is TEXT:
230 text = data.strip() 725 text = data.strip()
231 if text: 726 if text:
232 data = data.replace(text, unicode(gettext(text))) 727 data = data.replace(text, unicode(gettext(text)))
233 yield kind, data, pos 728 yield kind, data, pos
234 729
235 elif kind is SUB: 730 elif kind is SUB:
236 directives, substream = data 731 directives, substream = data
237 # If this is an i18n:msg directive, no need to translate text 732 current_domain = None
733 for idx, directive in enumerate(directives):
734 # Organize directives to make everything work
735 # FIXME: There's got to be a better way to do this!
736 if isinstance(directive, DomainDirective):
737 # Grab current domain and update context
738 current_domain = directive.domain
739 ctxt.push({'_i18n.domain': current_domain})
740 # Put domain directive as the first one in order to
741 # update context before any other directives evaluation
742 directives.insert(0, directives.pop(idx))
743
744 # If this is an i18n directive, no need to translate text
238 # nodes here 745 # nodes here
239 is_msg = filter(None, [isinstance(d, MsgDirective) 746 is_i18n_directive = any([
240 for d in directives]) 747 isinstance(d, ExtractableI18NDirective)
748 for d in directives
749 ])
241 substream = list(self(substream, ctxt, 750 substream = list(self(substream, ctxt,
242 search_text=not is_msg)) 751 translate_text=not is_i18n_directive,
752 translate_attrs=translate_attrs))
243 yield kind, (directives, substream), pos 753 yield kind, (directives, substream), pos
244 754
755 if current_domain:
756 ctxt.pop()
245 else: 757 else:
246 yield kind, data, pos 758 yield kind, data, pos
247 759
248 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
249 'ugettext', 'ungettext')
250
251 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, 760 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
252 search_text=True, msgbuf=None): 761 search_text=True, comment_stack=None):
253 """Extract localizable strings from the given template stream. 762 """Extract localizable strings from the given template stream.
254 763
255 For every string found, this function yields a ``(lineno, function, 764 For every string found, this function yields a ``(lineno, function,
256 message, comments)`` tuple, where: 765 message, comments)`` tuple, where:
257 766
262 of ``unicode`` objects for functions with multiple string 771 of ``unicode`` objects for functions with multiple string
263 arguments). 772 arguments).
264 * ``comments`` is a list of comments related to the message, extracted 773 * ``comments`` is a list of comments related to the message, extracted
265 from ``i18n:comment`` attributes found in the markup 774 from ``i18n:comment`` attributes found in the markup
266 775
267 >>> from genshi.template import MarkupTemplate
268 >>>
269 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 776 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
270 ... <head> 777 ... <head>
271 ... <title>Example</title> 778 ... <title>Example</title>
272 ... </head> 779 ... </head>
273 ... <body> 780 ... <body>
274 ... <h1>Example</h1> 781 ... <h1>Example</h1>
275 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> 782 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
276 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> 783 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
277 ... </body> 784 ... </body>
278 ... </html>''', filename='example.html') 785 ... </html>''', filename='example.html')
279 >>>
280 >>> for line, func, msg, comments in Translator().extract(tmpl.stream): 786 >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
281 ... print "%d, %r, %r" % (line, func, msg) 787 ... print('%d, %r, %r' % (line, func, msg))
282 3, None, u'Example' 788 3, None, u'Example'
283 6, None, u'Example' 789 6, None, u'Example'
284 7, '_', u'Hello, %(name)s' 790 7, '_', u'Hello, %(name)s'
285 8, 'ngettext', (u'You have %d item', u'You have %d items', None) 791 8, 'ngettext', (u'You have %d item', u'You have %d items', None)
286 792
293 extracted (used internally) 799 extracted (used internally)
294 800
295 :note: Changed in 0.4.1: For a function with multiple string arguments 801 :note: Changed in 0.4.1: For a function with multiple string arguments
296 (such as ``ngettext``), a single item with a tuple of strings is 802 (such as ``ngettext``), a single item with a tuple of strings is
297 yielded, instead an item for each string argument. 803 yielded, instead an item for each string argument.
298 :note: Changed in 0.6: The returned tuples now include a 4th element, 804 :note: Changed in 0.6: The returned tuples now include a fourth
299 which is a list of comments for the translator 805 element, which is a list of comments for the translator.
300 """ 806 """
301 if not self.extract_text: 807 if not self.extract_text:
302 search_text = False 808 search_text = False
809 if comment_stack is None:
810 comment_stack = []
303 skip = 0 811 skip = 0
304 i18n_comment = I18N_NAMESPACE['comment'] 812
305 i18n_msg = I18N_NAMESPACE['msg']
306 xml_lang = XML_NAMESPACE['lang'] 813 xml_lang = XML_NAMESPACE['lang']
307 814
308 for kind, data, pos in stream: 815 for kind, data, pos in stream:
309
310 if skip: 816 if skip:
311 if kind is START: 817 if kind is START:
312 skip += 1 818 skip += 1
313 if kind is END: 819 if kind is END:
314 skip -= 1 820 skip -= 1
315 821
316 if kind is START and not skip: 822 if kind is START and not skip:
317 tag, attrs = data 823 tag, attrs = data
318
319 if tag in self.ignore_tags or \ 824 if tag in self.ignore_tags or \
320 isinstance(attrs.get(xml_lang), basestring): 825 isinstance(attrs.get(xml_lang), basestring):
321 skip += 1 826 skip += 1
322 continue 827 continue
323 828
324 for name, value in attrs: 829 for message in self._extract_attrs((kind, data, pos),
325 if search_text and isinstance(value, basestring): 830 gettext_functions,
326 if name in self.include_attrs: 831 search_text=search_text):
327 text = value.strip() 832 yield message
328 if text:
329 yield pos[1], None, text, []
330 else:
331 for lineno, funcname, text, comments in self.extract(
332 _ensure(value), gettext_functions,
333 search_text=False):
334 yield lineno, funcname, text, comments
335
336 if msgbuf:
337 msgbuf.append(kind, data, pos)
338 else:
339 msg_params = attrs.get(i18n_msg)
340 if msg_params is not None:
341 if type(msg_params) is list: # event tuple
342 msg_params = msg_params[0][1]
343 msgbuf = MessageBuffer(
344 msg_params, attrs.get(i18n_comment), pos[1]
345 )
346 833
347 elif not skip and search_text and kind is TEXT: 834 elif not skip and search_text and kind is TEXT:
348 if not msgbuf: 835 text = data.strip()
349 text = data.strip() 836 if text and [ch for ch in text if ch.isalpha()]:
350 if text and filter(None, [ch.isalpha() for ch in text]): 837 yield pos[1], None, text, comment_stack[-1:]
351 yield pos[1], None, text, []
352 else:
353 msgbuf.append(kind, data, pos)
354
355 elif not skip and msgbuf and kind is END:
356 msgbuf.append(kind, data, pos)
357 if not msgbuf.depth:
358 yield msgbuf.lineno, None, msgbuf.format(), \
359 filter(None, [msgbuf.comment])
360 msgbuf = None
361 838
362 elif kind is EXPR or kind is EXEC: 839 elif kind is EXPR or kind is EXEC:
363 if msgbuf:
364 msgbuf.append(kind, data, pos)
365 for funcname, strings in extract_from_code(data, 840 for funcname, strings in extract_from_code(data,
366 gettext_functions): 841 gettext_functions):
842 # XXX: Do we need to grab i18n:comment from comment_stack ???
367 yield pos[1], funcname, strings, [] 843 yield pos[1], funcname, strings, []
368 844
369 elif kind is SUB: 845 elif kind is SUB:
370 subkind, substream = data 846 directives, substream = data
371 messages = self.extract(substream, gettext_functions, 847 in_comment = False
372 search_text=search_text and not skip, 848
373 msgbuf=msgbuf) 849 for idx, directive in enumerate(directives):
374 for lineno, funcname, text, comments in messages: 850 # Do a first loop to see if there's a comment directive
375 yield lineno, funcname, text, comments 851 # If there is update context and pop it from directives
852 if isinstance(directive, CommentDirective):
853 in_comment = True
854 comment_stack.append(directive.comment)
855 if len(directives) == 1:
856 # in case we're in the presence of something like:
857 # <p i18n:comment="foo">Foo</p>
858 for message in self.extract(
859 substream, gettext_functions,
860 search_text=search_text and not skip,
861 comment_stack=comment_stack):
862 yield message
863 directives.pop(idx)
864 elif not isinstance(directive, I18NDirective):
865 # Remove all other non i18n directives from the process
866 directives.pop(idx)
867
868 if not directives and not in_comment:
869 # Extract content if there's no directives because
870 # strip was pop'ed and not because comment was pop'ed.
871 # Extraction in this case has been taken care of.
872 for message in self.extract(
873 substream, gettext_functions,
874 search_text=search_text and not skip):
875 yield message
876
877 for directive in directives:
878 if isinstance(directive, ExtractableI18NDirective):
879 for message in directive.extract(self,
880 substream, gettext_functions,
881 search_text=search_text and not skip,
882 comment_stack=comment_stack):
883 yield message
884 else:
885 for message in self.extract(
886 substream, gettext_functions,
887 search_text=search_text and not skip,
888 comment_stack=comment_stack):
889 yield message
890
891 if in_comment:
892 comment_stack.pop()
893
894 def get_directive_index(self, dir_cls):
895 total = len(self._dir_order)
896 if dir_cls in self._dir_order:
897 return self._dir_order.index(dir_cls) - total
898 return total
899
900 def setup(self, template):
901 """Convenience function to register the `Translator` filter and the
902 related directives with the given template.
903
904 :param template: a `Template` instance
905 """
906 template.filters.insert(0, self)
907 if hasattr(template, 'add_directives'):
908 template.add_directives(Translator.NAMESPACE, self)
909
910 def _extract_attrs(self, event, gettext_functions, search_text):
911 for name, value in event[1][1]:
912 if search_text and isinstance(value, basestring):
913 if name in self.include_attrs:
914 text = value.strip()
915 if text:
916 yield event[2][1], None, text, []
917 else:
918 for message in self.extract(_ensure(value), gettext_functions,
919 search_text=False):
920 yield message
376 921
377 922
378 class MessageBuffer(object): 923 class MessageBuffer(object):
379 """Helper class for managing internationalized mixed content. 924 """Helper class for managing internationalized mixed content.
380 925
381 :since: version 0.5 926 :since: version 0.5
382 """ 927 """
383 928
384 def __init__(self, params=u'', comment=None, lineno=-1): 929 def __init__(self, directive=None):
385 """Initialize the message buffer. 930 """Initialize the message buffer.
386 931
387 :param params: comma-separated list of parameter names 932 :param directive: the directive owning the buffer
388 :type params: `basestring` 933 :type directive: I18NDirective
389 :param lineno: the line number on which the first stream event
390 belonging to the message was found
391 """ 934 """
392 if isinstance(params, basestring): 935 # params list needs to be copied so that directives can be evaluated
393 params = [name.strip() for name in params.split(',')] 936 # more than once
394 self.params = params 937 self.orig_params = self.params = directive.params[:]
395 self.comment = comment 938 self.directive = directive
396 self.lineno = lineno
397 self.string = [] 939 self.string = []
398 self.events = {} 940 self.events = {}
399 self.values = {} 941 self.values = {}
400 self.depth = 1 942 self.depth = 1
401 self.order = 1 943 self.order = 1
402 self.stack = [0] 944 self.stack = [0]
945 self.subdirectives = {}
403 946
404 def append(self, kind, data, pos): 947 def append(self, kind, data, pos):
405 """Append a stream event to the buffer. 948 """Append a stream event to the buffer.
406 949
407 :param kind: the stream event kind 950 :param kind: the stream event kind
408 :param data: the event data 951 :param data: the event data
409 :param pos: the position of the event in the source 952 :param pos: the position of the event in the source
410 """ 953 """
411 if kind is TEXT: 954 if kind is SUB:
955 # The order needs to be +1 because a new START kind event will
956 # happen and we we need to wrap those events into our custom kind(s)
957 order = self.stack[-1] + 1
958 subdirectives, substream = data
959 # Store the directives that should be applied after translation
960 self.subdirectives.setdefault(order, []).extend(subdirectives)
961 self.events.setdefault(order, []).append((SUB_START, None, pos))
962 for skind, sdata, spos in substream:
963 self.append(skind, sdata, spos)
964 self.events.setdefault(order, []).append((SUB_END, None, pos))
965 elif kind is TEXT:
966 if '[' in data or ']' in data:
967 # Quote [ and ] if it ain't us adding it, ie, if the user is
968 # using those chars in his templates, escape them
969 data = data.replace('[', '\[').replace(']', '\]')
412 self.string.append(data) 970 self.string.append(data)
413 self.events.setdefault(self.stack[-1], []).append(None) 971 self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
414 elif kind is EXPR: 972 elif kind is EXPR:
415 param = self.params.pop(0) 973 if self.params:
974 param = self.params.pop(0)
975 else:
976 params = ', '.join(['"%s"' % p for p in self.orig_params if p])
977 if params:
978 params = "(%s)" % params
979 raise IndexError("%d parameters%s given to 'i18n:%s' but "
980 "%d or more expressions used in '%s', line %s"
981 % (len(self.orig_params), params,
982 self.directive.tagname,
983 len(self.orig_params) + 1,
984 os.path.basename(pos[0] or
985 'In-memory Template'),
986 pos[1]))
416 self.string.append('%%(%s)s' % param) 987 self.string.append('%%(%s)s' % param)
417 self.events.setdefault(self.stack[-1], []).append(None) 988 self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
418 self.values[param] = (kind, data, pos) 989 self.values[param] = (kind, data, pos)
419 else: 990 else:
420 if kind is START: 991 if kind is START:
421 self.string.append(u'[%d:' % self.order) 992 self.string.append('[%d:' % self.order)
422 self.events.setdefault(self.order, []).append((kind, data, pos))
423 self.stack.append(self.order) 993 self.stack.append(self.order)
994 self.events.setdefault(self.stack[-1],
995 []).append((kind, data, pos))
424 self.depth += 1 996 self.depth += 1
425 self.order += 1 997 self.order += 1
426 elif kind is END: 998 elif kind is END:
427 self.depth -= 1 999 self.depth -= 1
428 if self.depth: 1000 if self.depth:
429 self.events[self.stack[-1]].append((kind, data, pos)) 1001 self.events[self.stack[-1]].append((kind, data, pos))
430 self.string.append(u']') 1002 self.string.append(']')
431 self.stack.pop() 1003 self.stack.pop()
432 1004
433 def format(self): 1005 def format(self):
434 """Return a message identifier representing the content in the 1006 """Return a message identifier representing the content in the
435 buffer. 1007 buffer.
436 """ 1008 """
437 return u''.join(self.string).strip() 1009 return ''.join(self.string).strip()
438 1010
439 def translate(self, string, regex=re.compile(r'%\((\w+)\)s')): 1011 def translate(self, string, regex=re.compile(r'%\((\w+)\)s')):
440 """Interpolate the given message translation with the events in the 1012 """Interpolate the given message translation with the events in the
441 buffer and return the translated stream. 1013 buffer and return the translated stream.
442 1014
443 :param string: the translated message string 1015 :param string: the translated message string
444 """ 1016 """
1017 substream = None
1018
1019 def yield_parts(string):
1020 for idx, part in enumerate(regex.split(string)):
1021 if idx % 2:
1022 yield self.values[part]
1023 elif part:
1024 yield (TEXT,
1025 part.replace('\[', '[').replace('\]', ']'),
1026 (None, -1, -1)
1027 )
1028
445 parts = parse_msg(string) 1029 parts = parse_msg(string)
1030 parts_counter = {}
446 for order, string in parts: 1031 for order, string in parts:
447 events = self.events[order] 1032 parts_counter.setdefault(order, []).append(None)
448 while events: 1033
449 event = events.pop(0) 1034 while parts:
450 if event: 1035 order, string = parts.pop(0)
451 yield event 1036 if len(parts_counter[order]) == 1:
1037 events = self.events[order]
1038 else:
1039 events = [self.events[order].pop(0)]
1040 parts_counter[order].pop()
1041
1042 for event in events:
1043 if event[0] is SUB_START:
1044 substream = []
1045 elif event[0] is SUB_END:
1046 # Yield a substream which might have directives to be
1047 # applied to it (after translation events)
1048 yield SUB, (self.subdirectives[order], substream), event[2]
1049 substream = None
1050 elif event[0] is TEXT:
1051 if string:
1052 for part in yield_parts(string):
1053 if substream is not None:
1054 substream.append(part)
1055 else:
1056 yield part
1057 # String handled, reset it
1058 string = None
1059 elif event[0] is START:
1060 if substream is not None:
1061 substream.append(event)
1062 else:
1063 yield event
1064 if string:
1065 for part in yield_parts(string):
1066 if substream is not None:
1067 substream.append(part)
1068 else:
1069 yield part
1070 # String handled, reset it
1071 string = None
1072 elif event[0] is END:
1073 if string:
1074 for part in yield_parts(string):
1075 if substream is not None:
1076 substream.append(part)
1077 else:
1078 yield part
1079 # String handled, reset it
1080 string = None
1081 if substream is not None:
1082 substream.append(event)
1083 else:
1084 yield event
1085 elif event[0] is EXPR:
1086 # These are handled on the strings itself
1087 continue
452 else: 1088 else:
453 if not string: 1089 if string:
454 break 1090 for part in yield_parts(string):
455 for idx, part in enumerate(regex.split(string)): 1091 if substream is not None:
456 if idx % 2: 1092 substream.append(part)
457 yield self.values[part] 1093 else:
458 elif part: 1094 yield part
459 yield TEXT, part, (None, -1, -1) 1095 # String handled, reset it
460 if not self.events[order] or not self.events[order][0]: 1096 string = None
461 break 1097 if substream is not None:
462 1098 substream.append(event)
463 1099 else:
464 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')): 1100 yield event
1101
1102
1103 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
465 """Parse a translated message using Genshi mixed content message 1104 """Parse a translated message using Genshi mixed content message
466 formatting. 1105 formatting.
467 1106
468 >>> parse_msg("See [1:Help].") 1107 >>> parse_msg("See [1:Help].")
469 [(0, 'See '), (1, 'Help'), (0, '.')] 1108 [(0, 'See '), (1, 'Help'), (0, '.')]
470 1109
471 >>> parse_msg("See [1:our [2:Help] page] for details.") 1110 >>> parse_msg("See [1:our [2:Help] page] for details.")
472 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] 1111 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
473 1112
474 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") 1113 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
475 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] 1114 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
476 1115
477 >>> parse_msg("[1:] Bilder pro Seite anzeigen.") 1116 >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
478 [(1, ''), (0, ' Bilder pro Seite anzeigen.')] 1117 [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
479 1118
480 :param string: the translated message string 1119 :param string: the translated message string
481 :return: a list of ``(order, string)`` tuples 1120 :return: a list of ``(order, string)`` tuples
482 :rtype: `list` 1121 :rtype: `list`
483 """ 1122 """
484 parts = [] 1123 parts = []
508 1147
509 def extract_from_code(code, gettext_functions): 1148 def extract_from_code(code, gettext_functions):
510 """Extract strings from Python bytecode. 1149 """Extract strings from Python bytecode.
511 1150
512 >>> from genshi.template.eval import Expression 1151 >>> from genshi.template.eval import Expression
513
514 >>> expr = Expression('_("Hello")') 1152 >>> expr = Expression('_("Hello")')
515 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1153 >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
516 [('_', u'Hello')] 1154 [('_', u'Hello')]
517 1155
518 >>> expr = Expression('ngettext("You have %(num)s item", ' 1156 >>> expr = Expression('ngettext("You have %(num)s item", '
519 ... '"You have %(num)s items", num)') 1157 ... '"You have %(num)s items", num)')
520 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1158 >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
521 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] 1159 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
522 1160
523 :param code: the `Code` object 1161 :param code: the `Code` object
524 :type code: `genshi.template.eval.Code` 1162 :type code: `genshi.template.eval.Code`
525 :param gettext_functions: a sequence of function names 1163 :param gettext_functions: a sequence of function names
589 include_attrs = include_attrs.split() 1227 include_attrs = include_attrs.split()
590 include_attrs = [QName(attr) for attr in include_attrs] 1228 include_attrs = [QName(attr) for attr in include_attrs]
591 1229
592 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), 1230 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
593 encoding=encoding) 1231 encoding=encoding)
1232 tmpl.loader = None
1233
594 translator = Translator(None, ignore_tags, include_attrs, extract_text) 1234 translator = Translator(None, ignore_tags, include_attrs, extract_text)
1235 if hasattr(tmpl, 'add_directives'):
1236 tmpl.add_directives(Translator.NAMESPACE, translator)
595 for message in translator.extract(tmpl.stream, gettext_functions=keywords): 1237 for message in translator.extract(tmpl.stream, gettext_functions=keywords):
596 yield message 1238 yield message
Copyright (C) 2012-2017 Edgewall Software