comparison genshi/filters/i18n.py @ 895:f30c9fb10272

i18n: some cleanup, especially for the pluralization directives.
author cmlenz
date Wed, 21 Apr 2010 20:00:37 +0000
parents 1de952fd479e
children 85e4678337cf
comparison
equal deleted inserted replaced
894:6fe4feb2635b 895:f30c9fb10272
25 from gettext import NullTranslations 25 from gettext import NullTranslations
26 import os 26 import os
27 import re 27 import re
28 from types import FunctionType 28 from types import FunctionType
29 29
30 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ 30 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \
31 END_NS, XML_NAMESPACE, _ensure, StreamEventKind 31 XML_NAMESPACE, _ensure, StreamEventKind
32 from genshi.template.eval import _ast 32 from genshi.template.eval import _ast
33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives 33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
34 from genshi.template.directives import Directive, StripDirective 34 from genshi.template.directives import Directive, StripDirective
35 from genshi.template.markup import MarkupTemplate, EXEC 35 from genshi.template.markup import MarkupTemplate, EXEC
36 36
213 __slots__ = ['params'] 213 __slots__ = ['params']
214 214
215 def __call__(self, stream, directives, ctxt, **vars): 215 def __call__(self, stream, directives, ctxt, **vars):
216 self.params = ctxt.get('_i18n.choose.params', [])[:] 216 self.params = ctxt.get('_i18n.choose.params', [])[:]
217 msgbuf = MessageBuffer(self) 217 msgbuf = MessageBuffer(self)
218 218 stream = _apply_directives(stream, directives, ctxt, vars)
219 stream = iter(_apply_directives(stream, directives, ctxt, vars)) 219
220
221 previous = stream.next() 220 previous = stream.next()
222 if previous[0] is START: 221 if previous[0] is START:
223 yield previous 222 yield previous
224 else: 223 else:
225 msgbuf.append(*previous) 224 msgbuf.append(*previous)
226 225
227 try: 226 try:
228 previous = stream.next() 227 previous = stream.next()
229 except StopIteration: 228 except StopIteration:
230 # For example <i18n:singular> or <i18n:plural> directives 229 # For example <i18n:singular> or <i18n:plural> directives
231 yield MSGBUF, (), -1 # the place holder for msgbuf output 230 yield MSGBUF, (), -1 # the place holder for msgbuf output
232 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf 231 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
233 return 232 return
234 233
235 for kind, data, pos in stream: 234 for event in stream:
236 msgbuf.append(*previous) 235 msgbuf.append(*previous)
237 previous = kind, data, pos 236 previous = event
238 yield MSGBUF, (), -1 # the place holder for msgbuf output 237 yield MSGBUF, (), -1 # the place holder for msgbuf output
239 238
240 if previous[0] is END: 239 if previous[0] is END:
241 yield previous # the outer end tag 240 yield previous # the outer end tag
242 else: 241 else:
243 msgbuf.append(*previous) 242 msgbuf.append(*previous)
244 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf 243 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf
245
246 244
247 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, 245 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
248 search_text=True, comment_stack=None, msgbuf=None): 246 search_text=True, comment_stack=None, msgbuf=None):
249 stream = iter(stream) 247 stream = iter(stream)
250 previous = stream.next() 248 previous = stream.next()
359 return super(ChooseDirective, cls).attach(template, stream, value, 357 return super(ChooseDirective, cls).attach(template, stream, value,
360 namespaces, pos) 358 namespaces, pos)
361 359
362 def __call__(self, stream, directives, ctxt, **vars): 360 def __call__(self, stream, directives, ctxt, **vars):
363 ctxt.push({'_i18n.choose.params': self.params, 361 ctxt.push({'_i18n.choose.params': self.params,
364 '_i18n.choose.SingularDirective': None, 362 '_i18n.choose.singular': None,
365 '_i18n.choose.PluralDirective': None}) 363 '_i18n.choose.plural': None})
364
365 ngettext = ctxt.get('_i18n.ngettext')
366 assert hasattr(ngettext, '__call__'), 'No ngettext function available'
367 dngettext = ctxt.get('_i18n.dngettext')
368 if not dngettext:
369 dngettext = lambda d, s, p, n: ngettext(s, p, n)
366 370
367 new_stream = [] 371 new_stream = []
368 singular_stream = None 372 singular_stream = None
369 singular_msgbuf = None 373 singular_msgbuf = None
370 plural_stream = None 374 plural_stream = None
371 plural_msgbuf = None 375 plural_msgbuf = None
372 376
373 ngettext = ctxt.get('_i18n.ungettext') 377 numeral = self.numeral.evaluate(ctxt)
374 assert hasattr(ngettext, '__call__'), 'No ngettext function available' 378 is_plural = self._is_plural(numeral, ngettext)
375 dngettext = ctxt.get('_i18n.dngettext') 379
376 if not dngettext: 380 for event in stream:
377 dngettext = lambda d, s, p, n: ngettext(s, p, n) 381 if event[0] is SUB and any(isinstance(d, ChooseBranchDirective)
378 382 for d in event[1][0]):
379 for kind, event, pos in stream: 383 subdirectives, substream = event[1]
380 if kind is SUB: 384
381 subdirectives, substream = event 385 if isinstance(subdirectives[0], SingularDirective):
382 if isinstance(subdirectives[0],
383 SingularDirective) and not singular_stream:
384 strip_directive_present = []
385 for idx, subdirective in enumerate(subdirectives):
386 if isinstance(subdirective, StripDirective):
387 # Any strip directive should be applied AFTER
388 # the event's have been translated.
389 strip_directive_present.append(
390 subdirectives.pop(idx)
391 )
392 # Apply directives to update context
393 singular_stream = list(_apply_directives(substream, 386 singular_stream = list(_apply_directives(substream,
394 subdirectives, 387 subdirectives,
395 ctxt, vars)) 388 ctxt, vars))
396 if strip_directive_present: 389 new_stream.append((MSGBUF, None, (None, -1, -1)))
397 singular_stream = list( 390
398 _apply_directives(singular_stream, 391 elif isinstance(subdirectives[0], PluralDirective):
399 strip_directive_present, 392 if is_plural:
400 ctxt, vars) 393 plural_stream = list(_apply_directives(substream,
401 ) 394 subdirectives,
402 del strip_directive_present 395 ctxt, vars))
403 new_stream.append((MSGBUF, (), ('', -1))) # msgbuf place holder 396
404 singular_msgbuf = ctxt.get('_i18n.choose.SingularDirective')
405 elif isinstance(subdirectives[0],
406 PluralDirective) and not plural_stream:
407 strip_directive_present = []
408 for idx, subdirective in enumerate(subdirectives):
409 if isinstance(subdirective, StripDirective):
410 # Any strip directive should be applied AFTER
411 # the event's have been translated.
412 strip_directive_present.append(
413 subdirectives.pop(idx)
414 )
415 # Apply directives to update context
416 plural_stream = list(_apply_directives(substream,
417 subdirectives,
418 ctxt, vars))
419 if strip_directive_present:
420 plural_stream = list(
421 _apply_directives(plural_stream,
422 strip_directive_present,
423 ctxt, vars)
424 )
425 del strip_directive_present
426 plural_msgbuf = ctxt.get('_i18n.choose.PluralDirective')
427 else:
428 new_stream.append((kind, event, pos))
429 else: 397 else:
430 new_stream.append((kind, event, pos)) 398 new_stream.append(event)
431 399
432 if ctxt.get('_i18n.domain'): 400 if ctxt.get('_i18n.domain'):
433 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), 401 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'),
434 s, p, n) 402 s, p, n)
435 403
436 # XXX: should we test which form was chosen like this!?!?!? 404 singular_msgbuf = ctxt.get('_i18n.choose.singular')
437 # There should be no match in any catalogue for these singular and 405 if is_plural:
438 # plural test strings 406 plural_msgbuf = ctxt.get('_i18n.choose.plural')
439 singular_test = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' 407 msgbuf, choice = plural_msgbuf, plural_stream
440 plural_test = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
441 translation = ngettext(singular_test, plural_test,
442 self.numeral.evaluate(ctxt))
443 if translation == singular_test:
444 chosen_msgbuf = singular_msgbuf
445 chosen_stream = singular_stream
446 else: 408 else:
447 chosen_msgbuf = plural_msgbuf 409 msgbuf, choice = singular_msgbuf, singular_stream
448 chosen_stream = plural_stream 410 plural_msgbuf = MessageBuffer(self)
449 del singular_test, plural_test, translation
450 411
451 for kind, data, pos in new_stream: 412 for kind, data, pos in new_stream:
452 if kind is MSGBUF: 413 if kind is MSGBUF:
453 for skind, sdata, spos in chosen_stream: 414 for event in choice:
454 if skind is MSGBUF: 415 if event[0] is MSGBUF:
455 translation = ngettext(singular_msgbuf.format(), 416 translation = ngettext(singular_msgbuf.format(),
456 plural_msgbuf.format(), 417 plural_msgbuf.format(),
457 self.numeral.evaluate(ctxt)) 418 numeral)
458 for event in chosen_msgbuf.translate(translation): 419 for subevent in msgbuf.translate(translation):
459 yield event 420 yield subevent
460 else: 421 else:
461 yield skind, sdata, spos 422 yield event
462 else: 423 else:
463 yield kind, data, pos 424 yield kind, data, pos
464 425
465 ctxt.pop() 426 ctxt.pop()
466 427
515 476
516 yield self.lineno, 'ngettext', \ 477 yield self.lineno, 'ngettext', \
517 (singular_msgbuf.format(), plural_msgbuf.format()), \ 478 (singular_msgbuf.format(), plural_msgbuf.format()), \
518 comment_stack[-1:] 479 comment_stack[-1:]
519 480
481 def _is_plural(self, numeral, ngettext):
482 # XXX: should we test which form was chosen like this!?!?!?
483 # There should be no match in any catalogue for these singular and
484 # plural test strings
485 singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
486 plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
487 return ngettext(singular, plural, numeral) == plural
488
520 489
521 class DomainDirective(I18NDirective): 490 class DomainDirective(I18NDirective):
522 """Implementation of the ``i18n:domain`` directive which allows choosing 491 """Implementation of the ``i18n:domain`` directive which allows choosing
523 another i18n domain(catalog) to translate from. 492 another i18n domain(catalog) to translate from.
524 493
556 __slots__ = ['domain'] 525 __slots__ = ['domain']
557 526
558 def __init__(self, value, template=None, namespaces=None, lineno=-1, 527 def __init__(self, value, template=None, namespaces=None, lineno=-1,
559 offset=-1): 528 offset=-1):
560 Directive.__init__(self, None, template, namespaces, lineno, offset) 529 Directive.__init__(self, None, template, namespaces, lineno, offset)
561 self.domain = value and value.strip() or '__DEFAULT__' 530 self.domain = value and value.strip() or '__DEFAULT__'
562 531
563 @classmethod 532 @classmethod
564 def attach(cls, template, stream, value, namespaces, pos): 533 def attach(cls, template, stream, value, namespaces, pos):
565 if type(value) is dict: 534 if type(value) is dict:
566 value = value.get('name') 535 value = value.get('name')
661 self.translate = translate 630 self.translate = translate
662 self.ignore_tags = ignore_tags 631 self.ignore_tags = ignore_tags
663 self.include_attrs = include_attrs 632 self.include_attrs = include_attrs
664 self.extract_text = extract_text 633 self.extract_text = extract_text
665 634
666 def __call__(self, stream, ctxt=None, search_text=True): 635 def __call__(self, stream, ctxt=None, translate_text=True,
636 translate_attrs=True):
667 """Translate any localizable strings in the given stream. 637 """Translate any localizable strings in the given stream.
668 638
669 This function shouldn't be called directly. Instead, an instance of 639 This function shouldn't be called directly. Instead, an instance of
670 the `Translator` class should be registered as a filter with the 640 the `Translator` class should be registered as a filter with the
671 `Template` or the `TemplateLoader`, or applied as a regular stream 641 `Template` or the `TemplateLoader`, or applied as a regular stream
672 filter. If used as a template filter, it should be inserted in front of 642 filter. If used as a template filter, it should be inserted in front of
673 all the default filters. 643 all the default filters.
674 644
675 :param stream: the markup event stream 645 :param stream: the markup event stream
676 :param ctxt: the template context (not used) 646 :param ctxt: the template context (not used)
677 :param search_text: whether text nodes should be translated (used 647 :param translate_text: whether text nodes should be translated (used
678 internally) 648 internally)
649 :param translate_attrs: whether attribute values should be translated
650 (used internally)
679 :return: the localized stream 651 :return: the localized stream
680 """ 652 """
681 ignore_tags = self.ignore_tags 653 ignore_tags = self.ignore_tags
682 include_attrs = self.include_attrs 654 include_attrs = self.include_attrs
683 skip = 0 655 skip = 0
684 xml_lang = XML_NAMESPACE['lang'] 656 xml_lang = XML_NAMESPACE['lang']
657 if not self.extract_text:
658 translate_text = False
659 translate_attrs = False
685 660
686 if type(self.translate) is FunctionType: 661 if type(self.translate) is FunctionType:
687 gettext = self.translate 662 gettext = self.translate
688 if ctxt: 663 if ctxt:
689 ctxt['_i18n.gettext'] = gettext 664 ctxt['_i18n.gettext'] = gettext
690 else: 665 else:
691 gettext = self.translate.ugettext 666 gettext = self.translate.ugettext
667 ngettext = self.translate.ungettext
692 try: 668 try:
693 dgettext = self.translate.dugettext 669 dgettext = self.translate.dugettext
694 except AttributeError:
695 dgettext = lambda x, y: gettext(y)
696 ngettext = self.translate.ungettext
697 try:
698 dngettext = self.translate.dungettext 670 dngettext = self.translate.dungettext
699 except AttributeError: 671 except AttributeError:
700 dngettext = lambda d, s, p, n: ngettext(s, p, n) 672 dgettext = lambda _, y: gettext(y)
701 673 dngettext = lambda _, s, p, n: ngettext(s, p, n)
702 if ctxt: 674 if ctxt:
703 ctxt['_i18n.gettext'] = gettext 675 ctxt['_i18n.gettext'] = gettext
704 ctxt['_i18n.ugettext'] = gettext 676 ctxt['_i18n.ngettext'] = ngettext
705 ctxt['_i18n.dgettext'] = dgettext 677 ctxt['_i18n.dgettext'] = dgettext
706 ctxt['_i18n.ngettext'] = ngettext
707 ctxt['_i18n.ungettext'] = ngettext
708 ctxt['_i18n.dngettext'] = dngettext 678 ctxt['_i18n.dngettext'] = dngettext
709 679
710 extract_text = self.extract_text
711 if not extract_text:
712 search_text = False
713
714 if ctxt and ctxt.get('_i18n.domain'): 680 if ctxt and ctxt.get('_i18n.domain'):
715 old_gettext = gettext
716 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) 681 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
717 682
718 for kind, data, pos in stream: 683 for kind, data, pos in stream:
719 684
720 # skip chunks that should not be localized 685 # skip chunks that should not be localized
738 new_attrs = [] 703 new_attrs = []
739 changed = False 704 changed = False
740 705
741 for name, value in attrs: 706 for name, value in attrs:
742 newval = value 707 newval = value
743 if extract_text and isinstance(value, basestring): 708 if isinstance(value, basestring):
744 if name in include_attrs: 709 if translate_attrs and name in include_attrs:
745 newval = gettext(value) 710 newval = gettext(value)
746 else: 711 else:
747 newval = list( 712 newval = list(
748 self(_ensure(value), ctxt, search_text=False) 713 self(_ensure(value), ctxt, translate_text=False)
749 ) 714 )
750 if newval != value: 715 if newval != value:
751 value = newval 716 value = newval
752 changed = True 717 changed = True
753 new_attrs.append((name, value)) 718 new_attrs.append((name, value))
754 if changed: 719 if changed:
755 attrs = Attrs(new_attrs) 720 attrs = Attrs(new_attrs)
756 721
757 yield kind, (tag, attrs), pos 722 yield kind, (tag, attrs), pos
758 723
759 elif search_text and kind is TEXT: 724 elif translate_text and kind is TEXT:
760 text = data.strip() 725 text = data.strip()
761 if text: 726 if text:
762 data = data.replace(text, unicode(gettext(text))) 727 data = data.replace(text, unicode(gettext(text)))
763 yield kind, data, pos 728 yield kind, data, pos
764 729
765 elif kind is SUB: 730 elif kind is SUB:
766 directives, substream = data 731 directives, substream = data
767 current_domain = None 732 current_domain = None
768 for idx, directive in enumerate(directives): 733 for idx, directive in enumerate(directives):
769 # Organize directives to make everything work 734 # Organize directives to make everything work
735 # FIXME: There's got to be a better way to do this!
770 if isinstance(directive, DomainDirective): 736 if isinstance(directive, DomainDirective):
771 # Grab current domain and update context 737 # Grab current domain and update context
772 current_domain = directive.domain 738 current_domain = directive.domain
773 ctxt.push({'_i18n.domain': current_domain}) 739 ctxt.push({'_i18n.domain': current_domain})
774 # Put domain directive as the first one in order to 740 # Put domain directive as the first one in order to
780 is_i18n_directive = any([ 746 is_i18n_directive = any([
781 isinstance(d, ExtractableI18NDirective) 747 isinstance(d, ExtractableI18NDirective)
782 for d in directives 748 for d in directives
783 ]) 749 ])
784 substream = list(self(substream, ctxt, 750 substream = list(self(substream, ctxt,
785 search_text=not is_i18n_directive)) 751 translate_text=not is_i18n_directive,
752 translate_attrs=translate_attrs))
786 yield kind, (directives, substream), pos 753 yield kind, (directives, substream), pos
787 754
788 if current_domain: 755 if current_domain:
789 ctxt.pop() 756 ctxt.pop()
790 else: 757 else:
1046 buffer and return the translated stream. 1013 buffer and return the translated stream.
1047 1014
1048 :param string: the translated message string 1015 :param string: the translated message string
1049 """ 1016 """
1050 substream = None 1017 substream = None
1051 1018
1052 def yield_parts(string): 1019 def yield_parts(string):
1053 for idx, part in enumerate(regex.split(string)): 1020 for idx, part in enumerate(regex.split(string)):
1054 if idx % 2: 1021 if idx % 2:
1055 yield self.values[part] 1022 yield self.values[part]
1056 elif part: 1023 elif part:
Copyright (C) 2012-2017 Edgewall Software