comparison genshi/filters/i18n.py @ 849:e43633b320db

Merged advanced-i18n branch back into trunk.
author cmlenz
date Tue, 10 Nov 2009 20:54:06 +0000
parents ada9d53ea751
children 47297fd93363
comparison
equal deleted inserted replaced
848:6c66e274198d 849:e43633b320db
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 # 2 #
3 # Copyright (C) 2007 Edgewall Software 3 # Copyright (C) 2007-2008 Edgewall Software
4 # All rights reserved. 4 # All rights reserved.
5 # 5 #
6 # This software is licensed as described in the file COPYING, which 6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms 7 # you should have received as part of this distribution. The terms
8 # are also available at http://genshi.edgewall.org/wiki/License. 8 # are also available at http://genshi.edgewall.org/wiki/License.
9 # 9 #
10 # This software consists of voluntary contributions made by many 10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision 11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://genshi.edgewall.org/log/. 12 # history and logs, available at http://genshi.edgewall.org/log/.
13 13
14 """Utilities for internationalization and localization of templates. 14 """Directives and utilities for internationalization and localization of
15 templates.
15 16
16 :since: version 0.4 17 :since: version 0.4
18 :note: Directives support added since version 0.6
17 """ 19 """
18 20
19 from gettext import NullTranslations 21 from gettext import NullTranslations
22 import os
20 import re 23 import re
21 from types import FunctionType 24 from types import FunctionType
22 25
23 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ 26 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \
24 END_NS, XML_NAMESPACE, _ensure 27 END_NS, XML_NAMESPACE, _ensure, StreamEventKind
25 from genshi.template.eval import _ast 28 from genshi.template.eval import _ast
26 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives 29 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
27 from genshi.template.directives import Directive 30 from genshi.template.directives import Directive, StripDirective
28 from genshi.template.markup import MarkupTemplate, EXEC 31 from genshi.template.markup import MarkupTemplate, EXEC
29 32
30 __all__ = ['Translator', 'extract'] 33 __all__ = ['Translator', 'extract']
31 __docformat__ = 'restructuredtext en' 34 __docformat__ = 'restructuredtext en'
32 35
36
33 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n') 37 I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
34 38
35 39 MSGBUF = StreamEventKind('MSGBUF')
36 class CommentDirective(Directive): 40 SUB_START = StreamEventKind('SUB_START')
37 41 SUB_END = StreamEventKind('SUB_END')
38 __slots__ = [] 42
39 43
40 @classmethod 44 class I18NDirective(Directive):
41 def attach(cls, template, stream, value, namespaces, pos): 45 """Simple interface for i18n directives to support messages extraction."""
42 return None, stream 46
43 47 def __call__(self, stream, directives, ctxt, **vars):
44 48 return _apply_directives(stream, directives, ctxt, vars)
45 class MsgDirective(Directive): 49
46 50
47 __slots__ = ['params'] 51 class ExtractableI18NDirective(I18NDirective):
52 """Simple interface for directives to support messages extraction."""
53
54 def extract(self, stream, comment_stack):
55 raise NotImplementedError
56
57
58 class CommentDirective(I18NDirective):
59 """Implementation of the ``i18n:comment`` template directive which adds
60 translation comments.
61
62 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
63 ... <p i18n:comment="As in Foo Bar">Foo</p>
64 ... </html>''')
65 >>> translator = Translator()
66 >>> translator.setup(tmpl)
67 >>> list(translator.extract(tmpl.stream))
68 [(2, None, u'Foo', [u'As in Foo Bar'])]
69 """
70 __slots__ = ['comment']
48 71
49 def __init__(self, value, template, hints=None, namespaces=None, 72 def __init__(self, value, template, hints=None, namespaces=None,
50 lineno=-1, offset=-1): 73 lineno=-1, offset=-1):
51 Directive.__init__(self, None, template, namespaces, lineno, offset) 74 Directive.__init__(self, None, template, namespaces, lineno, offset)
52 self.params = [name.strip() for name in value.split(',')] 75 self.comment = value
76
77
78 class MsgDirective(ExtractableI18NDirective):
79 r"""Implementation of the ``i18n:msg`` directive which marks inner content
80 as translatable. Consider the following examples:
81
82 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
83 ... <div i18n:msg="">
84 ... <p>Foo</p>
85 ... <p>Bar</p>
86 ... </div>
87 ... <p i18n:msg="">Foo <em>bar</em>!</p>
88 ... </html>''')
89
90 >>> translator = Translator()
91 >>> translator.setup(tmpl)
92 >>> list(translator.extract(tmpl.stream))
93 [(2, None, u'[1:Foo]\n [2:Bar]', []), (6, None, u'Foo [1:bar]!', [])]
94 >>> print tmpl.generate().render()
95 <html>
96 <div><p>Foo</p>
97 <p>Bar</p></div>
98 <p>Foo <em>bar</em>!</p>
99 </html>
100
101 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
102 ... <div i18n:msg="fname, lname">
103 ... <p>First Name: ${fname}</p>
104 ... <p>Last Name: ${lname}</p>
105 ... </div>
106 ... <p i18n:msg="">Foo <em>bar</em>!</p>
107 ... </html>''')
108 >>> translator.setup(tmpl)
109 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
110 [(2, None, u'[1:First Name: %(fname)s]\n [2:Last Name: %(lname)s]', []),
111 (6, None, u'Foo [1:bar]!', [])]
112
113 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
114 ... <div i18n:msg="fname, lname">
115 ... <p>First Name: ${fname}</p>
116 ... <p>Last Name: ${lname}</p>
117 ... </div>
118 ... <p i18n:msg="">Foo <em>bar</em>!</p>
119 ... </html>''')
120 >>> translator.setup(tmpl)
121 >>> print tmpl.generate(fname='John', lname='Doe').render()
122 <html>
123 <div><p>First Name: John</p>
124 <p>Last Name: Doe</p></div>
125 <p>Foo <em>bar</em>!</p>
126 </html>
127
128 Starting and ending white-space is stripped of to make it simpler for
129 translators. Stripping it is not that important since it's on the html
130 source, the rendered output will remain the same.
131 """
132 __slots__ = ['params']
133
134 def __init__(self, value, template, hints=None, namespaces=None,
135 lineno=-1, offset=-1):
136 Directive.__init__(self, None, template, namespaces, lineno, offset)
137 self.params = [param.strip() for param in value.split(',') if param]
138
139 @classmethod
140 def attach(cls, template, stream, value, namespaces, pos):
141 if type(value) is dict:
142 value = value.get('params', '').strip()
143 return super(MsgDirective, cls).attach(template, stream, value.strip(),
144 namespaces, pos)
53 145
54 def __call__(self, stream, directives, ctxt, **vars): 146 def __call__(self, stream, directives, ctxt, **vars):
55 msgbuf = MessageBuffer(self.params) 147 gettext = ctxt.get('_i18n.gettext')
148 dgettext = ctxt.get('_i18n.dgettext')
149 if ctxt.get('_i18n.domain'):
150 assert callable(dgettext), "No domain gettext function passed"
151 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
152
153 def _generate():
154 msgbuf = MessageBuffer(self)
155 previous = stream.next()
156 if previous[0] is START:
157 yield previous
158 else:
159 msgbuf.append(*previous)
160 previous = stream.next()
161 for kind, data, pos in stream:
162 msgbuf.append(*previous)
163 previous = kind, data, pos
164 if previous[0] is not END:
165 msgbuf.append(*previous)
166 previous = None
167 for event in msgbuf.translate(gettext(msgbuf.format())):
168 yield event
169 if previous:
170 yield previous
171
172 return _apply_directives(_generate(), directives, ctxt, vars)
173
174 def extract(self, stream, comment_stack):
175 msgbuf = MessageBuffer(self)
56 176
57 stream = iter(stream) 177 stream = iter(stream)
58 yield stream.next() # the outer start tag
59 previous = stream.next() 178 previous = stream.next()
179 if previous[0] is START:
180 previous = stream.next()
60 for event in stream: 181 for event in stream:
61 msgbuf.append(*previous) 182 msgbuf.append(*previous)
62 previous = event 183 previous = event
63 184 if previous[0] is not END:
64 gettext = ctxt.get('_i18n.gettext') 185 msgbuf.append(*previous)
65 for event in msgbuf.translate(gettext(msgbuf.format())): 186
187 yield None, msgbuf.format(), comment_stack[-1:]
188
189
190 class ChooseBranchDirective(I18NDirective):
191 __slots__ = ['params']
192
193 def __call__(self, stream, directives, ctxt, **vars):
194 self.params = ctxt.get('_i18n.choose.params', [])[:]
195 msgbuf = MessageBuffer(self)
196
197 stream = iter(_apply_directives(stream, directives, ctxt, vars))
198 yield stream.next() # the outer start tag
199 previous = stream.next()
200 for kind, data, pos in stream:
201 msgbuf.append(*previous)
202 previous = kind, data, pos
203 yield MSGBUF, (), -1 # the place holder for msgbuf output
204 yield previous # the outer end tag
205 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf
206
207
208 def extract(self, stream, comment_stack, msgbuf):
209 stream = iter(stream)
210 previous = stream.next()
211 if previous[0] is START:
212 previous = stream.next()
213 for event in stream:
214 msgbuf.append(*previous)
215 previous = event
216 if previous[0] is not END:
217 msgbuf.append(*previous)
218 return msgbuf
219
220
221 class SingularDirective(ChooseBranchDirective):
222 """Implementation of the ``i18n:singular`` directive to be used with the
223 ``i18n:choose`` directive."""
224
225
226 class PluralDirective(ChooseBranchDirective):
227 """Implementation of the ``i18n:plural`` directive to be used with the
228 ``i18n:choose`` directive."""
229
230
231 class ChooseDirective(ExtractableI18NDirective):
232 """Implementation of the ``i18n:choose`` directive which provides plural
233 internationalisation of strings.
234
235 This directive requires at least one parameter, the one which evaluates to
236 an integer which will allow to choose the plural/singular form. If you also
237 have expressions inside the singular and plural version of the string you
238 also need to pass a name for those parameters. Consider the following
239 examples:
240
241 >>> tmpl = MarkupTemplate('''\
242 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
243 ... <div i18n:choose="num; num">
244 ... <p i18n:singular="">There is $num coin</p>
245 ... <p i18n:plural="">There are $num coins</p>
246 ... </div>
247 ... </html>''')
248 >>> translator = Translator()
249 >>> translator.setup(tmpl)
250 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
251 [(2, 'ngettext', (u'There is %(num)s coin',
252 u'There are %(num)s coins'), [])]
253
254 >>> tmpl = MarkupTemplate('''\
255 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
256 ... <div i18n:choose="num; num">
257 ... <p i18n:singular="">There is $num coin</p>
258 ... <p i18n:plural="">There are $num coins</p>
259 ... </div>
260 ... </html>''')
261 >>> translator.setup(tmpl)
262 >>> print tmpl.generate(num=1).render()
263 <html>
264 <div>
265 <p>There is 1 coin</p>
266 </div>
267 </html>
268 >>> print tmpl.generate(num=2).render()
269 <html>
270 <div>
271 <p>There are 2 coins</p>
272 </div>
273 </html>
274
275 When used as a directive and not as an attribute:
276
277 >>> tmpl = MarkupTemplate('''\
278 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
279 ... <i18n:choose numeral="num" params="num">
280 ... <p i18n:singular="">There is $num coin</p>
281 ... <p i18n:plural="">There are $num coins</p>
282 ... </i18n:choose>
283 ... </html>''')
284 >>> translator.setup(tmpl)
285 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
286 [(2, 'ngettext', (u'There is %(num)s coin',
287 u'There are %(num)s coins'), [])]
288 """
289 __slots__ = ['numeral', 'params']
290
291 def __init__(self, value, template, hints=None, namespaces=None,
292 lineno=-1, offset=-1):
293 Directive.__init__(self, None, template, namespaces, lineno, offset)
294 params = [v.strip() for v in value.split(';')]
295 self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
296 self.params = params and [name.strip() for name in
297 params[0].split(',') if name] or []
298
299 @classmethod
300 def attach(cls, template, stream, value, namespaces, pos):
301 if type(value) is dict:
302 numeral = value.get('numeral', '').strip()
303 assert numeral is not '', "at least pass the numeral param"
304 params = [v.strip() for v in value.get('params', '').split(',')]
305 value = '%s; ' % numeral + ', '.join(params)
306 return super(ChooseDirective, cls).attach(template, stream, value,
307 namespaces, pos)
308
309 def __call__(self, stream, directives, ctxt, **vars):
310 ctxt.push({'_i18n.choose.params': self.params,
311 '_i18n.choose.SingularDirective': None,
312 '_i18n.choose.PluralDirective': None})
313
314 new_stream = []
315 singular_stream = None
316 singular_msgbuf = None
317 plural_stream = None
318 plural_msgbuf = None
319
320 ngettext = ctxt.get('_i18n.ungettext')
321 assert callable(ngettext), "No ngettext function available"
322 dngettext = ctxt.get('_i18n.dngettext')
323 if not dngettext:
324 dngettext = lambda d, s, p, n: ngettext(s, p, n)
325
326 for kind, event, pos in stream:
327 if kind is SUB:
328 subdirectives, substream = event
329 if isinstance(subdirectives[0],
330 SingularDirective) and not singular_stream:
331 # Apply directives to update context
332 singular_stream = list(_apply_directives(substream,
333 subdirectives,
334 ctxt, vars))
335 new_stream.append((MSGBUF, (), ('', -1))) # msgbuf place holder
336 singular_msgbuf = ctxt.get('_i18n.choose.SingularDirective')
337 elif isinstance(subdirectives[0],
338 PluralDirective) and not plural_stream:
339 # Apply directives to update context
340 plural_stream = list(_apply_directives(substream,
341 subdirectives,
342 ctxt, vars))
343 plural_msgbuf = ctxt.get('_i18n.choose.PluralDirective')
344 else:
345 new_stream.append((kind, event, pos))
346 else:
347 new_stream.append((kind, event, pos))
348
349 if ctxt.get('_i18n.domain'):
350 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
351 s, p, n)
352
353 for kind, data, pos in new_stream:
354 if kind is MSGBUF:
355 for skind, sdata, spos in singular_stream:
356 if skind is MSGBUF:
357 translation = ngettext(singular_msgbuf.format(),
358 plural_msgbuf.format(),
359 self.numeral.evaluate(ctxt))
360 for event in singular_msgbuf.translate(translation):
361 yield event
362 else:
363 yield skind, sdata, spos
364 else:
365 yield kind, data, pos
366
367 ctxt.pop()
368
369 def extract(self, stream, comment_stack):
370 stream = iter(stream)
371 previous = stream.next()
372 if previous is START:
373 stream.next()
374
375 singular_msgbuf = MessageBuffer(self)
376 plural_msgbuf = MessageBuffer(self)
377
378 for kind, event, pos in stream:
379 if kind is SUB:
380 subdirectives, substream = event
381 for subdirective in subdirectives:
382 if isinstance(subdirective, SingularDirective):
383 singular_msgbuf = subdirective.extract(substream, comment_stack,
384 singular_msgbuf)
385 elif isinstance(subdirective, PluralDirective):
386 plural_msgbuf = subdirective.extract(substream, comment_stack,
387 plural_msgbuf)
388 elif not isinstance(subdirective, StripDirective):
389 singular_msgbuf.append(kind, event, pos)
390 plural_msgbuf.append(kind, event, pos)
391 else:
392 singular_msgbuf.append(kind, event, pos)
393 plural_msgbuf.append(kind, event, pos)
394
395 yield 'ngettext', \
396 (singular_msgbuf.format(), plural_msgbuf.format()), \
397 comment_stack[-1:]
398
399
400 class DomainDirective(I18NDirective):
401 """Implementation of the ``i18n:domain`` directive which allows choosing
402 another i18n domain(catalog) to translate from.
403
404 >>> from genshi.filters.tests.i18n import DummyTranslations
405 >>> tmpl = MarkupTemplate('''\
406 <html xmlns:i18n="http://genshi.edgewall.org/i18n">
407 ... <p i18n:msg="">Bar</p>
408 ... <div i18n:domain="foo">
409 ... <p i18n:msg="">FooBar</p>
410 ... <p>Bar</p>
411 ... <p i18n:domain="bar" i18n:msg="">Bar</p>
412 ... <p i18n:domain="">Bar</p>
413 ... </div>
414 ... <p>Bar</p>
415 ... </html>''')
416
417 >>> translations = DummyTranslations({'Bar': 'Voh'})
418 >>> translations.add_domain('foo', {'FooBar': 'BarFoo', 'Bar': 'foo_Bar'})
419 >>> translations.add_domain('bar', {'Bar': 'bar_Bar'})
420 >>> translator = Translator(translations)
421 >>> translator.setup(tmpl)
422
423 >>> print tmpl.generate().render()
424 <html>
425 <p>Voh</p>
426 <div>
427 <p>BarFoo</p>
428 <p>foo_Bar</p>
429 <p>bar_Bar</p>
430 <p>Voh</p>
431 </div>
432 <p>Voh</p>
433 </html>
434 """
435 __slots__ = ['domain']
436
437 def __init__(self, value, template, hints=None, namespaces=None,
438 lineno=-1, offset=-1):
439 Directive.__init__(self, None, template, namespaces, lineno, offset)
440 self.domain = value and value.strip() or '__DEFAULT__'
441
442 @classmethod
443 def attach(cls, template, stream, value, namespaces, pos):
444 if type(value) is dict:
445 value = value.get('name')
446 return super(DomainDirective, cls).attach(template, stream, value,
447 namespaces, pos)
448
449 def __call__(self, stream, directives, ctxt, **vars):
450 ctxt.push({'_i18n.domain': self.domain})
451 for event in _apply_directives(stream, directives, ctxt, vars):
66 yield event 452 yield event
67 453 ctxt.pop()
68 yield previous # the outer end tag
69 454
70 455
71 class Translator(DirectiveFactory): 456 class Translator(DirectiveFactory):
72 """Can extract and translate localizable strings from markup streams and 457 """Can extract and translate localizable strings from markup streams and
73 templates. 458 templates.
74 459
75 For example, assume the followng template: 460 For example, assume the following template:
76 461
77 >>> from genshi.template import MarkupTemplate
78 >>>
79 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 462 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
80 ... <head> 463 ... <head>
81 ... <title>Example</title> 464 ... <title>Example</title>
82 ... </head> 465 ... </head>
83 ... <body> 466 ... <body>
92 >>> def pseudo_gettext(string): 475 >>> def pseudo_gettext(string):
93 ... return { 476 ... return {
94 ... 'Example': 'Beispiel', 477 ... 'Example': 'Beispiel',
95 ... 'Hello, %(name)s': 'Hallo, %(name)s' 478 ... 'Hello, %(name)s': 'Hallo, %(name)s'
96 ... }[string] 479 ... }[string]
97 >>>
98 >>> translator = Translator(pseudo_gettext) 480 >>> translator = Translator(pseudo_gettext)
99 481
100 Next, the translator needs to be prepended to any already defined filters 482 Next, the translator needs to be prepended to any already defined filters
101 on the template: 483 on the template:
102 484
113 <body> 495 <body>
114 <h1>Beispiel</h1> 496 <h1>Beispiel</h1>
115 <p>Hallo, Hans</p> 497 <p>Hallo, Hans</p>
116 </body> 498 </body>
117 </html> 499 </html>
118 500
119 Note that elements defining ``xml:lang`` attributes that do not contain 501 Note that elements defining ``xml:lang`` attributes that do not contain
120 variable expressions are ignored by this filter. That can be used to 502 variable expressions are ignored by this filter. That can be used to
121 exclude specific parts of a template from being extracted and translated. 503 exclude specific parts of a template from being extracted and translated.
122 """ 504 """
123 505
124 directives = [ 506 directives = [
507 ('domain', DomainDirective),
125 ('comment', CommentDirective), 508 ('comment', CommentDirective),
126 ('msg', MsgDirective) 509 ('msg', MsgDirective),
510 ('choose', ChooseDirective),
511 ('singular', SingularDirective),
512 ('plural', PluralDirective)
127 ] 513 ]
128 514
129 IGNORE_TAGS = frozenset([ 515 IGNORE_TAGS = frozenset([
130 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), 516 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
131 QName('style'), QName('http://www.w3.org/1999/xhtml}style') 517 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
132 ]) 518 ])
133 INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby', 519 INCLUDE_ATTRS = frozenset([
134 'summary', 'title']) 520 'abbr', 'alt', 'label', 'prompt', 'standby', 'summary', 'title'
521 ])
135 NAMESPACE = I18N_NAMESPACE 522 NAMESPACE = I18N_NAMESPACE
136 523
137 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS, 524 def __init__(self, translate=NullTranslations(), ignore_tags=IGNORE_TAGS,
138 include_attrs=INCLUDE_ATTRS, extract_text=True): 525 include_attrs=INCLUDE_ATTRS, extract_text=True):
139 """Initialize the translator. 526 """Initialize the translator.
143 :param ignore_tags: a set of tag names that should not be localized 530 :param ignore_tags: a set of tag names that should not be localized
144 :param include_attrs: a set of attribute names should be localized 531 :param include_attrs: a set of attribute names should be localized
145 :param extract_text: whether the content of text nodes should be 532 :param extract_text: whether the content of text nodes should be
146 extracted, or only text in explicit ``gettext`` 533 extracted, or only text in explicit ``gettext``
147 function calls 534 function calls
148 535
149 :note: Changed in 0.6: the `translate` parameter can now be either 536 :note: Changed in 0.6: the `translate` parameter can now be either
150 a ``gettext``-style function, or an object compatible with the 537 a ``gettext``-style function, or an object compatible with the
151 ``NullTransalations`` or ``GNUTranslations`` interface 538 ``NullTransalations`` or ``GNUTranslations`` interface
152 """ 539 """
153 self.translate = translate 540 self.translate = translate
175 skip = 0 562 skip = 0
176 xml_lang = XML_NAMESPACE['lang'] 563 xml_lang = XML_NAMESPACE['lang']
177 564
178 if type(self.translate) is FunctionType: 565 if type(self.translate) is FunctionType:
179 gettext = self.translate 566 gettext = self.translate
567 if ctxt:
568 ctxt['_i18n.gettext'] = gettext
180 else: 569 else:
181 gettext = self.translate.ugettext 570 gettext = self.translate.ugettext
182 if ctxt: 571 try:
183 ctxt['_i18n.gettext'] = gettext 572 dgettext = self.translate.dugettext
573 except AttributeError:
574 dgettext = lambda x, y: gettext(y)
575 ngettext = self.translate.ungettext
576 try:
577 dngettext = self.translate.dungettext
578 except AttributeError:
579 dngettext = lambda d, s, p, n: ngettext(s, p, n)
580
581 if ctxt:
582 ctxt['_i18n.gettext'] = gettext
583 ctxt['_i18n.ugettext'] = gettext
584 ctxt['_i18n.dgettext'] = dgettext
585 ctxt['_i18n.ngettext'] = ngettext
586 ctxt['_i18n.ungettext'] = ngettext
587 ctxt['_i18n.dngettext'] = dngettext
184 588
185 extract_text = self.extract_text 589 extract_text = self.extract_text
186 if not extract_text: 590 if not extract_text:
187 search_text = False 591 search_text = False
592
593 if ctxt and ctxt.get('_i18n.domain'):
594 old_gettext = gettext
595 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
188 596
189 for kind, data, pos in stream: 597 for kind, data, pos in stream:
190 598
191 # skip chunks that should not be localized 599 # skip chunks that should not be localized
192 if skip: 600 if skip:
206 yield kind, data, pos 614 yield kind, data, pos
207 continue 615 continue
208 616
209 new_attrs = [] 617 new_attrs = []
210 changed = False 618 changed = False
619
211 for name, value in attrs: 620 for name, value in attrs:
212 newval = value 621 newval = value
213 if extract_text and isinstance(value, basestring): 622 if extract_text and isinstance(value, basestring):
214 if name in include_attrs: 623 if name in include_attrs:
215 newval = gettext(value) 624 newval = gettext(value)
216 else: 625 else:
217 newval = list(self(_ensure(value), ctxt, 626 newval = list(
218 search_text=False) 627 self(_ensure(value), ctxt, search_text=False)
219 ) 628 )
220 if newval != value: 629 if newval != value:
221 value = newval 630 value = newval
222 changed = True 631 changed = True
223 new_attrs.append((name, value)) 632 new_attrs.append((name, value))
232 data = data.replace(text, unicode(gettext(text))) 641 data = data.replace(text, unicode(gettext(text)))
233 yield kind, data, pos 642 yield kind, data, pos
234 643
235 elif kind is SUB: 644 elif kind is SUB:
236 directives, substream = data 645 directives, substream = data
237 # If this is an i18n:msg directive, no need to translate text 646 current_domain = None
647 for idx, directive in enumerate(directives):
648 # Organize directives to make everything work
649 if isinstance(directive, DomainDirective):
650 # Grab current domain and update context
651 current_domain = directive.domain
652 ctxt.push({'_i18n.domain': current_domain})
653 # Put domain directive as the first one in order to
654 # update context before any other directives evaluation
655 directives.insert(0, directives.pop(idx))
656
657 # If this is an i18n directive, no need to translate text
238 # nodes here 658 # nodes here
239 is_msg = filter(None, [isinstance(d, MsgDirective) 659 is_i18n_directive = filter(None,
240 for d in directives]) 660 [isinstance(d, ExtractableI18NDirective)
661 for d in directives])
241 substream = list(self(substream, ctxt, 662 substream = list(self(substream, ctxt,
242 search_text=not is_msg)) 663 search_text=not is_i18n_directive))
243 yield kind, (directives, substream), pos 664 yield kind, (directives, substream), pos
244 665
666 if current_domain:
667 ctxt.pop()
245 else: 668 else:
246 yield kind, data, pos 669 yield kind, data, pos
247 670
248 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', 671 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
249 'ugettext', 'ungettext') 672 'ugettext', 'ungettext')
250 673
251 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, 674 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
252 search_text=True, msgbuf=None): 675 search_text=True, msgbuf=None, comment_stack=None):
253 """Extract localizable strings from the given template stream. 676 """Extract localizable strings from the given template stream.
254 677
255 For every string found, this function yields a ``(lineno, function, 678 For every string found, this function yields a ``(lineno, function,
256 message, comments)`` tuple, where: 679 message, comments)`` tuple, where:
257 680
262 of ``unicode`` objects for functions with multiple string 685 of ``unicode`` objects for functions with multiple string
263 arguments). 686 arguments).
264 * ``comments`` is a list of comments related to the message, extracted 687 * ``comments`` is a list of comments related to the message, extracted
265 from ``i18n:comment`` attributes found in the markup 688 from ``i18n:comment`` attributes found in the markup
266 689
267 >>> from genshi.template import MarkupTemplate
268 >>>
269 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 690 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
270 ... <head> 691 ... <head>
271 ... <title>Example</title> 692 ... <title>Example</title>
272 ... </head> 693 ... </head>
273 ... <body> 694 ... <body>
274 ... <h1>Example</h1> 695 ... <h1>Example</h1>
275 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> 696 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
276 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p> 697 ... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
277 ... </body> 698 ... </body>
278 ... </html>''', filename='example.html') 699 ... </html>''', filename='example.html')
279 >>>
280 >>> for line, func, msg, comments in Translator().extract(tmpl.stream): 700 >>> for line, func, msg, comments in Translator().extract(tmpl.stream):
281 ... print "%d, %r, %r" % (line, func, msg) 701 ... print "%d, %r, %r" % (line, func, msg)
282 3, None, u'Example' 702 3, None, u'Example'
283 6, None, u'Example' 703 6, None, u'Example'
284 7, '_', u'Hello, %(name)s' 704 7, '_', u'Hello, %(name)s'
293 extracted (used internally) 713 extracted (used internally)
294 714
295 :note: Changed in 0.4.1: For a function with multiple string arguments 715 :note: Changed in 0.4.1: For a function with multiple string arguments
296 (such as ``ngettext``), a single item with a tuple of strings is 716 (such as ``ngettext``), a single item with a tuple of strings is
297 yielded, instead an item for each string argument. 717 yielded, instead an item for each string argument.
298 :note: Changed in 0.6: The returned tuples now include a 4th element, 718 :note: Changed in 0.6: The returned tuples now include a fourth
299 which is a list of comments for the translator 719 element, which is a list of comments for the translator.
300 """ 720 """
301 if not self.extract_text: 721 if not self.extract_text:
302 search_text = False 722 search_text = False
723 if comment_stack is None:
724 comment_stack = []
303 skip = 0 725 skip = 0
304 i18n_comment = I18N_NAMESPACE['comment'] 726
305 i18n_msg = I18N_NAMESPACE['msg'] 727 # Un-comment bellow to extract messages without adding directives
306 xml_lang = XML_NAMESPACE['lang'] 728 xml_lang = XML_NAMESPACE['lang']
307 729
308 for kind, data, pos in stream: 730 for kind, data, pos in stream:
309
310 if skip: 731 if skip:
311 if kind is START: 732 if kind is START:
312 skip += 1 733 skip += 1
313 if kind is END: 734 if kind is END:
314 skip -= 1 735 skip -= 1
324 for name, value in attrs: 745 for name, value in attrs:
325 if search_text and isinstance(value, basestring): 746 if search_text and isinstance(value, basestring):
326 if name in self.include_attrs: 747 if name in self.include_attrs:
327 text = value.strip() 748 text = value.strip()
328 if text: 749 if text:
750 # XXX: Do we need to grab i18n:comment from comment_stack ???
329 yield pos[1], None, text, [] 751 yield pos[1], None, text, []
330 else: 752 else:
331 for lineno, funcname, text, comments in self.extract( 753 for lineno, funcname, text, comments in self.extract(
332 _ensure(value), gettext_functions, 754 _ensure(value), gettext_functions,
333 search_text=False): 755 search_text=False):
334 yield lineno, funcname, text, comments 756 yield lineno, funcname, text, comments
335 757
336 if msgbuf: 758 if msgbuf:
337 msgbuf.append(kind, data, pos) 759 msgbuf.append(kind, data, pos)
338 else:
339 msg_params = attrs.get(i18n_msg)
340 if msg_params is not None:
341 if type(msg_params) is list: # event tuple
342 msg_params = msg_params[0][1]
343 msgbuf = MessageBuffer(
344 msg_params, attrs.get(i18n_comment), pos[1]
345 )
346 760
347 elif not skip and search_text and kind is TEXT: 761 elif not skip and search_text and kind is TEXT:
348 if not msgbuf: 762 if not msgbuf:
349 text = data.strip() 763 text = data.strip()
350 if text and filter(None, [ch.isalpha() for ch in text]): 764 if text and filter(None, [ch.isalpha() for ch in text]):
351 yield pos[1], None, text, [] 765 yield pos[1], None, text, comment_stack[-1:]
352 else: 766 else:
353 msgbuf.append(kind, data, pos) 767 msgbuf.append(kind, data, pos)
354 768
355 elif not skip and msgbuf and kind is END: 769 elif not skip and msgbuf and kind is END:
356 msgbuf.append(kind, data, pos) 770 msgbuf.append(kind, data, pos)
357 if not msgbuf.depth: 771 if not msgbuf.depth:
358 yield msgbuf.lineno, None, msgbuf.format(), \ 772 yield msgbuf.lineno, None, msgbuf.format(), \
359 filter(None, [msgbuf.comment]) 773 filter(None, [msgbuf.comment])
360 msgbuf = None 774 msgbuf = None
361 775
362 elif kind is EXPR or kind is EXEC: 776 elif kind is EXPR or kind is EXEC:
363 if msgbuf: 777 if msgbuf:
364 msgbuf.append(kind, data, pos) 778 msgbuf.append(kind, data, pos)
365 for funcname, strings in extract_from_code(data, 779 for funcname, strings in extract_from_code(data,
366 gettext_functions): 780 gettext_functions):
781 # XXX: Do we need to grab i18n:comment from comment_stack ???
367 yield pos[1], funcname, strings, [] 782 yield pos[1], funcname, strings, []
368 783
369 elif kind is SUB: 784 elif kind is SUB:
370 subkind, substream = data 785 directives, substream = data
371 messages = self.extract(substream, gettext_functions, 786 in_comment = False
372 search_text=search_text and not skip, 787
373 msgbuf=msgbuf) 788 for idx, directive in enumerate(directives):
374 for lineno, funcname, text, comments in messages: 789 # Do a first loop to see if there's a comment directive
375 yield lineno, funcname, text, comments 790 # If there is update context and pop it from directives
791 if isinstance(directive, CommentDirective):
792 in_comment = True
793 comment_stack.append(directive.comment)
794 if len(directives) == 1:
795 # in case we're in the presence of something like:
796 # <p i18n:comment="foo">Foo</p>
797 messages = self.extract(
798 substream, gettext_functions,
799 search_text=search_text and not skip,
800 msgbuf=msgbuf, comment_stack=comment_stack)
801 for lineno, funcname, text, comments in messages:
802 yield lineno, funcname, text, comments
803 directives.pop(idx)
804 elif not isinstance(directive, I18NDirective):
805 # Remove all other non i18n directives from the process
806 directives.pop(idx)
807
808 if not directives and not in_comment:
809 # Extract content if there's no directives because
810 # strip was pop'ed and not because comment was pop'ed.
811 # Extraction in this case has been taken care of.
812 messages = self.extract(
813 substream, gettext_functions,
814 search_text=search_text and not skip, msgbuf=msgbuf)
815 for lineno, funcname, text, comments in messages:
816 yield lineno, funcname, text, comments
817
818 for directive in directives:
819 if isinstance(directive, ExtractableI18NDirective):
820 messages = directive.extract(substream, comment_stack)
821 for funcname, text, comments in messages:
822 yield pos[1], funcname, text, comments
823 else:
824 messages = self.extract(
825 substream, gettext_functions,
826 search_text=search_text and not skip, msgbuf=msgbuf)
827 for lineno, funcname, text, comments in messages:
828 yield lineno, funcname, text, comments
829
830 if in_comment:
831 comment_stack.pop()
832
833 def get_directive_index(self, dir_cls):
834 total = len(self._dir_order)
835 if dir_cls in self._dir_order:
836 return self._dir_order.index(dir_cls) - total
837 return total
838
839 def setup(self, template):
840 """Convenience function to register the `Translator` filter and the
841 related directives with the given template.
842
843 :param template: a `Template` instance
844 """
845 template.filters.insert(0, self)
846 if hasattr(template, 'add_directives'):
847 template.add_directives(Translator.NAMESPACE, self)
376 848
377 849
378 class MessageBuffer(object): 850 class MessageBuffer(object):
379 """Helper class for managing internationalized mixed content. 851 """Helper class for managing internationalized mixed content.
380 852
381 :since: version 0.5 853 :since: version 0.5
382 """ 854 """
383 855
384 def __init__(self, params=u'', comment=None, lineno=-1): 856 def __init__(self, directive=None):
385 """Initialize the message buffer. 857 """Initialize the message buffer.
386 858
387 :param params: comma-separated list of parameter names 859 :param params: comma-separated list of parameter names
388 :type params: `basestring` 860 :type params: `basestring`
389 :param lineno: the line number on which the first stream event 861 :param lineno: the line number on which the first stream event
390 belonging to the message was found 862 belonging to the message was found
391 """ 863 """
392 if isinstance(params, basestring): 864 # params list needs to be copied so that directives can be evaluated
393 params = [name.strip() for name in params.split(',')] 865 # more than once
394 self.params = params 866 self.orig_params = self.params = directive.params[:]
395 self.comment = comment 867 self.directive = directive
396 self.lineno = lineno
397 self.string = [] 868 self.string = []
398 self.events = {} 869 self.events = {}
399 self.values = {} 870 self.values = {}
400 self.depth = 1 871 self.depth = 1
401 self.order = 1 872 self.order = 1
402 self.stack = [0] 873 self.stack = [0]
874 self.subdirectives = {}
403 875
404 def append(self, kind, data, pos): 876 def append(self, kind, data, pos):
405 """Append a stream event to the buffer. 877 """Append a stream event to the buffer.
406 878
407 :param kind: the stream event kind 879 :param kind: the stream event kind
408 :param data: the event data 880 :param data: the event data
409 :param pos: the position of the event in the source 881 :param pos: the position of the event in the source
410 """ 882 """
411 if kind is TEXT: 883 if kind is SUB:
884 # The order needs to be +1 because a new START kind event will
885 # happen and we we need to wrap those events into our custom kind(s)
886 order = self.stack[-1] + 1
887 subdirectives, substream = data
888 # Store the directives that should be applied after translation
889 self.subdirectives.setdefault(order, []).extend(subdirectives)
890 self.events.setdefault(order, []).append((SUB_START, None, pos))
891 for skind, sdata, spos in substream:
892 self.append(skind, sdata, spos)
893 self.events.setdefault(order, []).append((SUB_END, None, pos))
894 elif kind is TEXT:
895 if '[' in data or ']' in data:
896 # Quote [ and ] if it ain't us adding it, ie, if the user is
897 # using those chars in his templates, escape them
898 data = data.replace('[', '\[').replace(']', '\]')
412 self.string.append(data) 899 self.string.append(data)
413 self.events.setdefault(self.stack[-1], []).append(None) 900 self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
414 elif kind is EXPR: 901 elif kind is EXPR:
415 param = self.params.pop(0) 902 if self.params:
903 param = self.params.pop(0)
904 else:
905 params = ', '.join(['"%s"' % p for p in self.orig_params if p])
906 if params:
907 params = "(%s)" % params
908 raise IndexError("%d parameters%s given to 'i18n:%s' but "
909 "%d or more expressions used in '%s', line %s"
910 % (len(self.orig_params), params,
911 self.directive.tagname,
912 len(self.orig_params)+1,
913 os.path.basename(pos[0] or
914 'In Memmory Template'),
915 pos[1]))
416 self.string.append('%%(%s)s' % param) 916 self.string.append('%%(%s)s' % param)
417 self.events.setdefault(self.stack[-1], []).append(None) 917 self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
418 self.values[param] = (kind, data, pos) 918 self.values[param] = (kind, data, pos)
419 else: 919 else:
420 if kind is START: 920 if kind is START:
421 self.string.append(u'[%d:' % self.order) 921 self.string.append(u'[%d:' % self.order)
422 self.events.setdefault(self.order, []).append((kind, data, pos))
423 self.stack.append(self.order) 922 self.stack.append(self.order)
923 self.events.setdefault(self.stack[-1],
924 []).append((kind, data, pos))
424 self.depth += 1 925 self.depth += 1
425 self.order += 1 926 self.order += 1
426 elif kind is END: 927 elif kind is END:
427 self.depth -= 1 928 self.depth -= 1
428 if self.depth: 929 if self.depth:
440 """Interpolate the given message translation with the events in the 941 """Interpolate the given message translation with the events in the
441 buffer and return the translated stream. 942 buffer and return the translated stream.
442 943
443 :param string: the translated message string 944 :param string: the translated message string
444 """ 945 """
946 substream = None
947
948 def yield_parts(string):
949 for idx, part in enumerate(regex.split(string)):
950 if idx % 2:
951 yield self.values[part]
952 elif part:
953 yield (TEXT,
954 part.replace('\[', '[').replace('\]', ']'),
955 (None, -1, -1)
956 )
957
445 parts = parse_msg(string) 958 parts = parse_msg(string)
959 parts_counter = {}
446 for order, string in parts: 960 for order, string in parts:
447 events = self.events[order] 961 parts_counter.setdefault(order, []).append(None)
448 while events: 962
449 event = events.pop(0) 963 while parts:
450 if event: 964 order, string = parts.pop(0)
451 yield event 965 if len(parts_counter[order]) == 1:
966 events = self.events[order]
967 else:
968 events = [self.events[order].pop(0)]
969 parts_counter[order].pop()
970
971 for event in events:
972 if event[0] is SUB_START:
973 substream = []
974 elif event[0] is SUB_END:
975 # Yield a substream which might have directives to be
976 # applied to it (after translation events)
977 yield SUB, (self.subdirectives[order], substream), event[2]
978 substream = None
979 elif event[0] is TEXT:
980 if string:
981 for part in yield_parts(string):
982 if substream is not None:
983 substream.append(part)
984 else:
985 yield part
986 # String handled, reset it
987 string = None
988 elif event[0] is START:
989 if substream is not None:
990 substream.append(event)
991 else:
992 yield event
993 if string:
994 for part in yield_parts(string):
995 if substream is not None:
996 substream.append(part)
997 else:
998 yield part
999 # String handled, reset it
1000 string = None
1001 elif event[0] is END:
1002 if string:
1003 for part in yield_parts(string):
1004 if substream is not None:
1005 substream.append(part)
1006 else:
1007 yield part
1008 # String handled, reset it
1009 string = None
1010 if substream is not None:
1011 substream.append(event)
1012 else:
1013 yield event
1014 elif event[0] is EXPR:
1015 # These are handled on the strings itself
1016 continue
452 else: 1017 else:
453 if not string: 1018 if string:
454 break 1019 for part in yield_parts(string):
455 for idx, part in enumerate(regex.split(string)): 1020 if substream is not None:
456 if idx % 2: 1021 substream.append(part)
457 yield self.values[part] 1022 else:
458 elif part: 1023 yield part
459 yield TEXT, part, (None, -1, -1) 1024 # String handled, reset it
460 if not self.events[order] or not self.events[order][0]: 1025 string = None
461 break 1026 if substream is not None:
462 1027 substream.append(event)
463 1028 else:
464 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')): 1029 yield event
1030
1031 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
465 """Parse a translated message using Genshi mixed content message 1032 """Parse a translated message using Genshi mixed content message
466 formatting. 1033 formatting.
467 1034
468 >>> parse_msg("See [1:Help].") 1035 >>> parse_msg("See [1:Help].")
469 [(0, 'See '), (1, 'Help'), (0, '.')] 1036 [(0, 'See '), (1, 'Help'), (0, '.')]
470 1037
471 >>> parse_msg("See [1:our [2:Help] page] for details.") 1038 >>> parse_msg("See [1:our [2:Help] page] for details.")
472 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')] 1039 [(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
473 1040
474 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].") 1041 >>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
475 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')] 1042 [(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
476 1043
477 >>> parse_msg("[1:] Bilder pro Seite anzeigen.") 1044 >>> parse_msg("[1:] Bilder pro Seite anzeigen.")
478 [(1, ''), (0, ' Bilder pro Seite anzeigen.')] 1045 [(1, ''), (0, ' Bilder pro Seite anzeigen.')]
479 1046
480 :param string: the translated message string 1047 :param string: the translated message string
481 :return: a list of ``(order, string)`` tuples 1048 :return: a list of ``(order, string)`` tuples
482 :rtype: `list` 1049 :rtype: `list`
483 """ 1050 """
484 parts = [] 1051 parts = []
508 1075
509 def extract_from_code(code, gettext_functions): 1076 def extract_from_code(code, gettext_functions):
510 """Extract strings from Python bytecode. 1077 """Extract strings from Python bytecode.
511 1078
512 >>> from genshi.template.eval import Expression 1079 >>> from genshi.template.eval import Expression
513
514 >>> expr = Expression('_("Hello")') 1080 >>> expr = Expression('_("Hello")')
515 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1081 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
516 [('_', u'Hello')] 1082 [('_', u'Hello')]
517 1083
518 >>> expr = Expression('ngettext("You have %(num)s item", ' 1084 >>> expr = Expression('ngettext("You have %(num)s item", '
519 ... '"You have %(num)s items", num)') 1085 ... '"You have %(num)s items", num)')
520 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1086 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
521 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] 1087 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
522 1088
589 include_attrs = include_attrs.split() 1155 include_attrs = include_attrs.split()
590 include_attrs = [QName(attr) for attr in include_attrs] 1156 include_attrs = [QName(attr) for attr in include_attrs]
591 1157
592 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None), 1158 tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
593 encoding=encoding) 1159 encoding=encoding)
1160
594 translator = Translator(None, ignore_tags, include_attrs, extract_text) 1161 translator = Translator(None, ignore_tags, include_attrs, extract_text)
1162 if hasattr(tmpl, 'add_directives'):
1163 tmpl.add_directives(Translator.NAMESPACE, translator)
595 for message in translator.extract(tmpl.stream, gettext_functions=keywords): 1164 for message in translator.extract(tmpl.stream, gettext_functions=keywords):
596 yield message 1165 yield message
Copyright (C) 2012-2017 Edgewall Software