Mercurial > genshi > genshi-test
comparison genshi/filters/i18n.py @ 895:f30c9fb10272
i18n: some cleanup, especially for the pluralization directives.
author | cmlenz |
---|---|
date | Wed, 21 Apr 2010 20:00:37 +0000 |
parents | 1de952fd479e |
children | 85e4678337cf |
comparison
equal
deleted
inserted
replaced
894:6fe4feb2635b | 895:f30c9fb10272 |
---|---|
25 from gettext import NullTranslations | 25 from gettext import NullTranslations |
26 import os | 26 import os |
27 import re | 27 import re |
28 from types import FunctionType | 28 from types import FunctionType |
29 | 29 |
30 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \ | 30 from genshi.core import Attrs, Namespace, QName, START, END, TEXT, \ |
31 END_NS, XML_NAMESPACE, _ensure, StreamEventKind | 31 XML_NAMESPACE, _ensure, StreamEventKind |
32 from genshi.template.eval import _ast | 32 from genshi.template.eval import _ast |
33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives | 33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives |
34 from genshi.template.directives import Directive, StripDirective | 34 from genshi.template.directives import Directive, StripDirective |
35 from genshi.template.markup import MarkupTemplate, EXEC | 35 from genshi.template.markup import MarkupTemplate, EXEC |
36 | 36 |
213 __slots__ = ['params'] | 213 __slots__ = ['params'] |
214 | 214 |
215 def __call__(self, stream, directives, ctxt, **vars): | 215 def __call__(self, stream, directives, ctxt, **vars): |
216 self.params = ctxt.get('_i18n.choose.params', [])[:] | 216 self.params = ctxt.get('_i18n.choose.params', [])[:] |
217 msgbuf = MessageBuffer(self) | 217 msgbuf = MessageBuffer(self) |
218 | 218 stream = _apply_directives(stream, directives, ctxt, vars) |
219 stream = iter(_apply_directives(stream, directives, ctxt, vars)) | 219 |
220 | |
221 previous = stream.next() | 220 previous = stream.next() |
222 if previous[0] is START: | 221 if previous[0] is START: |
223 yield previous | 222 yield previous |
224 else: | 223 else: |
225 msgbuf.append(*previous) | 224 msgbuf.append(*previous) |
226 | 225 |
227 try: | 226 try: |
228 previous = stream.next() | 227 previous = stream.next() |
229 except StopIteration: | 228 except StopIteration: |
230 # For example <i18n:singular> or <i18n:plural> directives | 229 # For example <i18n:singular> or <i18n:plural> directives |
231 yield MSGBUF, (), -1 # the place holder for msgbuf output | 230 yield MSGBUF, (), -1 # the place holder for msgbuf output |
232 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf | 231 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf |
233 return | 232 return |
234 | 233 |
235 for kind, data, pos in stream: | 234 for event in stream: |
236 msgbuf.append(*previous) | 235 msgbuf.append(*previous) |
237 previous = kind, data, pos | 236 previous = event |
238 yield MSGBUF, (), -1 # the place holder for msgbuf output | 237 yield MSGBUF, (), -1 # the place holder for msgbuf output |
239 | 238 |
240 if previous[0] is END: | 239 if previous[0] is END: |
241 yield previous # the outer end tag | 240 yield previous # the outer end tag |
242 else: | 241 else: |
243 msgbuf.append(*previous) | 242 msgbuf.append(*previous) |
244 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf | 243 ctxt['_i18n.choose.%s' % self.tagname] = msgbuf |
245 | |
246 | 244 |
247 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, | 245 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS, |
248 search_text=True, comment_stack=None, msgbuf=None): | 246 search_text=True, comment_stack=None, msgbuf=None): |
249 stream = iter(stream) | 247 stream = iter(stream) |
250 previous = stream.next() | 248 previous = stream.next() |
359 return super(ChooseDirective, cls).attach(template, stream, value, | 357 return super(ChooseDirective, cls).attach(template, stream, value, |
360 namespaces, pos) | 358 namespaces, pos) |
361 | 359 |
362 def __call__(self, stream, directives, ctxt, **vars): | 360 def __call__(self, stream, directives, ctxt, **vars): |
363 ctxt.push({'_i18n.choose.params': self.params, | 361 ctxt.push({'_i18n.choose.params': self.params, |
364 '_i18n.choose.SingularDirective': None, | 362 '_i18n.choose.singular': None, |
365 '_i18n.choose.PluralDirective': None}) | 363 '_i18n.choose.plural': None}) |
364 | |
365 ngettext = ctxt.get('_i18n.ngettext') | |
366 assert hasattr(ngettext, '__call__'), 'No ngettext function available' | |
367 dngettext = ctxt.get('_i18n.dngettext') | |
368 if not dngettext: | |
369 dngettext = lambda d, s, p, n: ngettext(s, p, n) | |
366 | 370 |
367 new_stream = [] | 371 new_stream = [] |
368 singular_stream = None | 372 singular_stream = None |
369 singular_msgbuf = None | 373 singular_msgbuf = None |
370 plural_stream = None | 374 plural_stream = None |
371 plural_msgbuf = None | 375 plural_msgbuf = None |
372 | 376 |
373 ngettext = ctxt.get('_i18n.ungettext') | 377 numeral = self.numeral.evaluate(ctxt) |
374 assert hasattr(ngettext, '__call__'), 'No ngettext function available' | 378 is_plural = self._is_plural(numeral, ngettext) |
375 dngettext = ctxt.get('_i18n.dngettext') | 379 |
376 if not dngettext: | 380 for event in stream: |
377 dngettext = lambda d, s, p, n: ngettext(s, p, n) | 381 if event[0] is SUB and any(isinstance(d, ChooseBranchDirective) |
378 | 382 for d in event[1][0]): |
379 for kind, event, pos in stream: | 383 subdirectives, substream = event[1] |
380 if kind is SUB: | 384 |
381 subdirectives, substream = event | 385 if isinstance(subdirectives[0], SingularDirective): |
382 if isinstance(subdirectives[0], | |
383 SingularDirective) and not singular_stream: | |
384 strip_directive_present = [] | |
385 for idx, subdirective in enumerate(subdirectives): | |
386 if isinstance(subdirective, StripDirective): | |
387 # Any strip directive should be applied AFTER | |
388 # the event's have been translated. | |
389 strip_directive_present.append( | |
390 subdirectives.pop(idx) | |
391 ) | |
392 # Apply directives to update context | |
393 singular_stream = list(_apply_directives(substream, | 386 singular_stream = list(_apply_directives(substream, |
394 subdirectives, | 387 subdirectives, |
395 ctxt, vars)) | 388 ctxt, vars)) |
396 if strip_directive_present: | 389 new_stream.append((MSGBUF, None, (None, -1, -1))) |
397 singular_stream = list( | 390 |
398 _apply_directives(singular_stream, | 391 elif isinstance(subdirectives[0], PluralDirective): |
399 strip_directive_present, | 392 if is_plural: |
400 ctxt, vars) | 393 plural_stream = list(_apply_directives(substream, |
401 ) | 394 subdirectives, |
402 del strip_directive_present | 395 ctxt, vars)) |
403 new_stream.append((MSGBUF, (), ('', -1))) # msgbuf place holder | 396 |
404 singular_msgbuf = ctxt.get('_i18n.choose.SingularDirective') | |
405 elif isinstance(subdirectives[0], | |
406 PluralDirective) and not plural_stream: | |
407 strip_directive_present = [] | |
408 for idx, subdirective in enumerate(subdirectives): | |
409 if isinstance(subdirective, StripDirective): | |
410 # Any strip directive should be applied AFTER | |
411 # the event's have been translated. | |
412 strip_directive_present.append( | |
413 subdirectives.pop(idx) | |
414 ) | |
415 # Apply directives to update context | |
416 plural_stream = list(_apply_directives(substream, | |
417 subdirectives, | |
418 ctxt, vars)) | |
419 if strip_directive_present: | |
420 plural_stream = list( | |
421 _apply_directives(plural_stream, | |
422 strip_directive_present, | |
423 ctxt, vars) | |
424 ) | |
425 del strip_directive_present | |
426 plural_msgbuf = ctxt.get('_i18n.choose.PluralDirective') | |
427 else: | |
428 new_stream.append((kind, event, pos)) | |
429 else: | 397 else: |
430 new_stream.append((kind, event, pos)) | 398 new_stream.append(event) |
431 | 399 |
432 if ctxt.get('_i18n.domain'): | 400 if ctxt.get('_i18n.domain'): |
433 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), | 401 ngettext = lambda s, p, n: dngettext(ctxt.get('_i18n.domain'), |
434 s, p, n) | 402 s, p, n) |
435 | 403 |
436 # XXX: should we test which form was chosen like this!?!?!? | 404 singular_msgbuf = ctxt.get('_i18n.choose.singular') |
437 # There should be no match in any catalogue for these singular and | 405 if is_plural: |
438 # plural test strings | 406 plural_msgbuf = ctxt.get('_i18n.choose.plural') |
439 singular_test = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' | 407 msgbuf, choice = plural_msgbuf, plural_stream |
440 plural_test = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' | |
441 translation = ngettext(singular_test, plural_test, | |
442 self.numeral.evaluate(ctxt)) | |
443 if translation == singular_test: | |
444 chosen_msgbuf = singular_msgbuf | |
445 chosen_stream = singular_stream | |
446 else: | 408 else: |
447 chosen_msgbuf = plural_msgbuf | 409 msgbuf, choice = singular_msgbuf, singular_stream |
448 chosen_stream = plural_stream | 410 plural_msgbuf = MessageBuffer(self) |
449 del singular_test, plural_test, translation | |
450 | 411 |
451 for kind, data, pos in new_stream: | 412 for kind, data, pos in new_stream: |
452 if kind is MSGBUF: | 413 if kind is MSGBUF: |
453 for skind, sdata, spos in chosen_stream: | 414 for event in choice: |
454 if skind is MSGBUF: | 415 if event[0] is MSGBUF: |
455 translation = ngettext(singular_msgbuf.format(), | 416 translation = ngettext(singular_msgbuf.format(), |
456 plural_msgbuf.format(), | 417 plural_msgbuf.format(), |
457 self.numeral.evaluate(ctxt)) | 418 numeral) |
458 for event in chosen_msgbuf.translate(translation): | 419 for subevent in msgbuf.translate(translation): |
459 yield event | 420 yield subevent |
460 else: | 421 else: |
461 yield skind, sdata, spos | 422 yield event |
462 else: | 423 else: |
463 yield kind, data, pos | 424 yield kind, data, pos |
464 | 425 |
465 ctxt.pop() | 426 ctxt.pop() |
466 | 427 |
515 | 476 |
516 yield self.lineno, 'ngettext', \ | 477 yield self.lineno, 'ngettext', \ |
517 (singular_msgbuf.format(), plural_msgbuf.format()), \ | 478 (singular_msgbuf.format(), plural_msgbuf.format()), \ |
518 comment_stack[-1:] | 479 comment_stack[-1:] |
519 | 480 |
481 def _is_plural(self, numeral, ngettext): | |
482 # XXX: should we test which form was chosen like this!?!?!? | |
483 # There should be no match in any catalogue for these singular and | |
484 # plural test strings | |
485 singular = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' | |
486 plural = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' | |
487 return ngettext(singular, plural, numeral) == plural | |
488 | |
520 | 489 |
521 class DomainDirective(I18NDirective): | 490 class DomainDirective(I18NDirective): |
522 """Implementation of the ``i18n:domain`` directive which allows choosing | 491 """Implementation of the ``i18n:domain`` directive which allows choosing |
523 another i18n domain(catalog) to translate from. | 492 another i18n domain(catalog) to translate from. |
524 | 493 |
556 __slots__ = ['domain'] | 525 __slots__ = ['domain'] |
557 | 526 |
558 def __init__(self, value, template=None, namespaces=None, lineno=-1, | 527 def __init__(self, value, template=None, namespaces=None, lineno=-1, |
559 offset=-1): | 528 offset=-1): |
560 Directive.__init__(self, None, template, namespaces, lineno, offset) | 529 Directive.__init__(self, None, template, namespaces, lineno, offset) |
561 self.domain = value and value.strip() or '__DEFAULT__' | 530 self.domain = value and value.strip() or '__DEFAULT__' |
562 | 531 |
563 @classmethod | 532 @classmethod |
564 def attach(cls, template, stream, value, namespaces, pos): | 533 def attach(cls, template, stream, value, namespaces, pos): |
565 if type(value) is dict: | 534 if type(value) is dict: |
566 value = value.get('name') | 535 value = value.get('name') |
661 self.translate = translate | 630 self.translate = translate |
662 self.ignore_tags = ignore_tags | 631 self.ignore_tags = ignore_tags |
663 self.include_attrs = include_attrs | 632 self.include_attrs = include_attrs |
664 self.extract_text = extract_text | 633 self.extract_text = extract_text |
665 | 634 |
666 def __call__(self, stream, ctxt=None, search_text=True): | 635 def __call__(self, stream, ctxt=None, translate_text=True, |
636 translate_attrs=True): | |
667 """Translate any localizable strings in the given stream. | 637 """Translate any localizable strings in the given stream. |
668 | 638 |
669 This function shouldn't be called directly. Instead, an instance of | 639 This function shouldn't be called directly. Instead, an instance of |
670 the `Translator` class should be registered as a filter with the | 640 the `Translator` class should be registered as a filter with the |
671 `Template` or the `TemplateLoader`, or applied as a regular stream | 641 `Template` or the `TemplateLoader`, or applied as a regular stream |
672 filter. If used as a template filter, it should be inserted in front of | 642 filter. If used as a template filter, it should be inserted in front of |
673 all the default filters. | 643 all the default filters. |
674 | 644 |
675 :param stream: the markup event stream | 645 :param stream: the markup event stream |
676 :param ctxt: the template context (not used) | 646 :param ctxt: the template context (not used) |
677 :param search_text: whether text nodes should be translated (used | 647 :param translate_text: whether text nodes should be translated (used |
678 internally) | 648 internally) |
649 :param translate_attrs: whether attribute values should be translated | |
650 (used internally) | |
679 :return: the localized stream | 651 :return: the localized stream |
680 """ | 652 """ |
681 ignore_tags = self.ignore_tags | 653 ignore_tags = self.ignore_tags |
682 include_attrs = self.include_attrs | 654 include_attrs = self.include_attrs |
683 skip = 0 | 655 skip = 0 |
684 xml_lang = XML_NAMESPACE['lang'] | 656 xml_lang = XML_NAMESPACE['lang'] |
657 if not self.extract_text: | |
658 translate_text = False | |
659 translate_attrs = False | |
685 | 660 |
686 if type(self.translate) is FunctionType: | 661 if type(self.translate) is FunctionType: |
687 gettext = self.translate | 662 gettext = self.translate |
688 if ctxt: | 663 if ctxt: |
689 ctxt['_i18n.gettext'] = gettext | 664 ctxt['_i18n.gettext'] = gettext |
690 else: | 665 else: |
691 gettext = self.translate.ugettext | 666 gettext = self.translate.ugettext |
667 ngettext = self.translate.ungettext | |
692 try: | 668 try: |
693 dgettext = self.translate.dugettext | 669 dgettext = self.translate.dugettext |
694 except AttributeError: | |
695 dgettext = lambda x, y: gettext(y) | |
696 ngettext = self.translate.ungettext | |
697 try: | |
698 dngettext = self.translate.dungettext | 670 dngettext = self.translate.dungettext |
699 except AttributeError: | 671 except AttributeError: |
700 dngettext = lambda d, s, p, n: ngettext(s, p, n) | 672 dgettext = lambda _, y: gettext(y) |
701 | 673 dngettext = lambda _, s, p, n: ngettext(s, p, n) |
702 if ctxt: | 674 if ctxt: |
703 ctxt['_i18n.gettext'] = gettext | 675 ctxt['_i18n.gettext'] = gettext |
704 ctxt['_i18n.ugettext'] = gettext | 676 ctxt['_i18n.ngettext'] = ngettext |
705 ctxt['_i18n.dgettext'] = dgettext | 677 ctxt['_i18n.dgettext'] = dgettext |
706 ctxt['_i18n.ngettext'] = ngettext | |
707 ctxt['_i18n.ungettext'] = ngettext | |
708 ctxt['_i18n.dngettext'] = dngettext | 678 ctxt['_i18n.dngettext'] = dngettext |
709 | 679 |
710 extract_text = self.extract_text | |
711 if not extract_text: | |
712 search_text = False | |
713 | |
714 if ctxt and ctxt.get('_i18n.domain'): | 680 if ctxt and ctxt.get('_i18n.domain'): |
715 old_gettext = gettext | |
716 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) | 681 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) |
717 | 682 |
718 for kind, data, pos in stream: | 683 for kind, data, pos in stream: |
719 | 684 |
720 # skip chunks that should not be localized | 685 # skip chunks that should not be localized |
738 new_attrs = [] | 703 new_attrs = [] |
739 changed = False | 704 changed = False |
740 | 705 |
741 for name, value in attrs: | 706 for name, value in attrs: |
742 newval = value | 707 newval = value |
743 if extract_text and isinstance(value, basestring): | 708 if isinstance(value, basestring): |
744 if name in include_attrs: | 709 if translate_attrs and name in include_attrs: |
745 newval = gettext(value) | 710 newval = gettext(value) |
746 else: | 711 else: |
747 newval = list( | 712 newval = list( |
748 self(_ensure(value), ctxt, search_text=False) | 713 self(_ensure(value), ctxt, translate_text=False) |
749 ) | 714 ) |
750 if newval != value: | 715 if newval != value: |
751 value = newval | 716 value = newval |
752 changed = True | 717 changed = True |
753 new_attrs.append((name, value)) | 718 new_attrs.append((name, value)) |
754 if changed: | 719 if changed: |
755 attrs = Attrs(new_attrs) | 720 attrs = Attrs(new_attrs) |
756 | 721 |
757 yield kind, (tag, attrs), pos | 722 yield kind, (tag, attrs), pos |
758 | 723 |
759 elif search_text and kind is TEXT: | 724 elif translate_text and kind is TEXT: |
760 text = data.strip() | 725 text = data.strip() |
761 if text: | 726 if text: |
762 data = data.replace(text, unicode(gettext(text))) | 727 data = data.replace(text, unicode(gettext(text))) |
763 yield kind, data, pos | 728 yield kind, data, pos |
764 | 729 |
765 elif kind is SUB: | 730 elif kind is SUB: |
766 directives, substream = data | 731 directives, substream = data |
767 current_domain = None | 732 current_domain = None |
768 for idx, directive in enumerate(directives): | 733 for idx, directive in enumerate(directives): |
769 # Organize directives to make everything work | 734 # Organize directives to make everything work |
735 # FIXME: There's got to be a better way to do this! | |
770 if isinstance(directive, DomainDirective): | 736 if isinstance(directive, DomainDirective): |
771 # Grab current domain and update context | 737 # Grab current domain and update context |
772 current_domain = directive.domain | 738 current_domain = directive.domain |
773 ctxt.push({'_i18n.domain': current_domain}) | 739 ctxt.push({'_i18n.domain': current_domain}) |
774 # Put domain directive as the first one in order to | 740 # Put domain directive as the first one in order to |
780 is_i18n_directive = any([ | 746 is_i18n_directive = any([ |
781 isinstance(d, ExtractableI18NDirective) | 747 isinstance(d, ExtractableI18NDirective) |
782 for d in directives | 748 for d in directives |
783 ]) | 749 ]) |
784 substream = list(self(substream, ctxt, | 750 substream = list(self(substream, ctxt, |
785 search_text=not is_i18n_directive)) | 751 translate_text=not is_i18n_directive, |
752 translate_attrs=translate_attrs)) | |
786 yield kind, (directives, substream), pos | 753 yield kind, (directives, substream), pos |
787 | 754 |
788 if current_domain: | 755 if current_domain: |
789 ctxt.pop() | 756 ctxt.pop() |
790 else: | 757 else: |
1046 buffer and return the translated stream. | 1013 buffer and return the translated stream. |
1047 | 1014 |
1048 :param string: the translated message string | 1015 :param string: the translated message string |
1049 """ | 1016 """ |
1050 substream = None | 1017 substream = None |
1051 | 1018 |
1052 def yield_parts(string): | 1019 def yield_parts(string): |
1053 for idx, part in enumerate(regex.split(string)): | 1020 for idx, part in enumerate(regex.split(string)): |
1054 if idx % 2: | 1021 if idx % 2: |
1055 yield self.values[part] | 1022 yield self.values[part] |
1056 elif part: | 1023 elif part: |