comparison genshi/filters/i18n.py @ 892:1de952fd479e

i18n: Support extraction of attributes in markup embedded in ``i18n:msg`` and ``i18n:choose`` directives. See also #380.
author cmlenz
date Wed, 21 Apr 2010 10:42:41 +0000
parents b40dbfee9ba6
children f30c9fb10272
comparison
equal deleted inserted replaced
891:b40dbfee9ba6 892:1de952fd479e
42 42
43 MSGBUF = StreamEventKind('MSGBUF') 43 MSGBUF = StreamEventKind('MSGBUF')
44 SUB_START = StreamEventKind('SUB_START') 44 SUB_START = StreamEventKind('SUB_START')
45 SUB_END = StreamEventKind('SUB_END') 45 SUB_END = StreamEventKind('SUB_END')
46 46
47 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
48 'ugettext', 'ungettext')
49
47 50
48 class I18NDirective(Directive): 51 class I18NDirective(Directive):
49 """Simple interface for i18n directives to support messages extraction.""" 52 """Simple interface for i18n directives to support messages extraction."""
50 53
51 def __call__(self, stream, directives, ctxt, **vars): 54 def __call__(self, stream, directives, ctxt, **vars):
53 56
54 57
55 class ExtractableI18NDirective(I18NDirective): 58 class ExtractableI18NDirective(I18NDirective):
56 """Simple interface for directives to support messages extraction.""" 59 """Simple interface for directives to support messages extraction."""
57 60
58 def extract(self, stream, comment_stack): 61 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
62 search_text=True, comment_stack=None):
59 raise NotImplementedError 63 raise NotImplementedError
60 64
61 65
62 class CommentDirective(I18NDirective): 66 class CommentDirective(I18NDirective):
63 """Implementation of the ``i18n:comment`` template directive which adds 67 """Implementation of the ``i18n:comment`` template directive which adds
71 >>> list(translator.extract(tmpl.stream)) 75 >>> list(translator.extract(tmpl.stream))
72 [(2, None, u'Foo', [u'As in Foo Bar'])] 76 [(2, None, u'Foo', [u'As in Foo Bar'])]
73 """ 77 """
74 __slots__ = ['comment'] 78 __slots__ = ['comment']
75 79
76 def __init__(self, value, template, hints=None, namespaces=None, 80 def __init__(self, value, template=None, namespaces=None, lineno=-1,
77 lineno=-1, offset=-1): 81 offset=-1):
78 Directive.__init__(self, None, template, namespaces, lineno, offset) 82 Directive.__init__(self, None, template, namespaces, lineno, offset)
79 self.comment = value 83 self.comment = value
80 84
81 85
82 class MsgDirective(ExtractableI18NDirective): 86 class MsgDirective(ExtractableI18NDirective):
131 135
132 Starting and ending white-space is stripped of to make it simpler for 136 Starting and ending white-space is stripped of to make it simpler for
133 translators. Stripping it is not that important since it's on the html 137 translators. Stripping it is not that important since it's on the html
134 source, the rendered output will remain the same. 138 source, the rendered output will remain the same.
135 """ 139 """
136 __slots__ = ['params'] 140 __slots__ = ['params', 'lineno']
137 141
138 def __init__(self, value, template, hints=None, namespaces=None, 142 def __init__(self, value, template=None, namespaces=None, lineno=-1,
139 lineno=-1, offset=-1): 143 offset=-1):
140 Directive.__init__(self, None, template, namespaces, lineno, offset) 144 Directive.__init__(self, None, template, namespaces, lineno, offset)
141 self.params = [param.strip() for param in value.split(',') if param] 145 self.params = [param.strip() for param in value.split(',') if param]
146 self.lineno = lineno
142 147
143 @classmethod 148 @classmethod
144 def attach(cls, template, stream, value, namespaces, pos): 149 def attach(cls, template, stream, value, namespaces, pos):
145 if type(value) is dict: 150 if type(value) is dict:
146 value = value.get('params', '').strip() 151 value = value.get('params', '').strip()
174 if previous: 179 if previous:
175 yield previous 180 yield previous
176 181
177 return _apply_directives(_generate(), directives, ctxt, vars) 182 return _apply_directives(_generate(), directives, ctxt, vars)
178 183
179 def extract(self, stream, comment_stack): 184 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
185 search_text=True, comment_stack=None):
180 msgbuf = MessageBuffer(self) 186 msgbuf = MessageBuffer(self)
187 strip = False
181 188
182 stream = iter(stream) 189 stream = iter(stream)
183 previous = stream.next() 190 previous = stream.next()
184 if previous[0] is START: 191 if previous[0] is START:
192 for message in translator._extract_attrs(previous,
193 gettext_functions,
194 search_text=search_text):
195 yield message
185 previous = stream.next() 196 previous = stream.next()
197 strip = True
186 for event in stream: 198 for event in stream:
199 if event[0] is START:
200 for message in translator._extract_attrs(event,
201 gettext_functions,
202 search_text=search_text):
203 yield message
187 msgbuf.append(*previous) 204 msgbuf.append(*previous)
188 previous = event 205 previous = event
189 msgbuf.append(*previous) 206 if not strip:
190 207 msgbuf.append(*previous)
191 yield None, msgbuf.format(), comment_stack[-1:] 208
209 yield self.lineno, None, msgbuf.format(), comment_stack[-1:]
192 210
193 211
194 class ChooseBranchDirective(I18NDirective): 212 class ChooseBranchDirective(I18NDirective):
195 __slots__ = ['params'] 213 __slots__ = ['params']
196 214
197 def __call__(self, stream, directives, ctxt, **vars): 215 def __call__(self, stream, directives, ctxt, **vars):
198 self.params = ctxt.get('_i18n.choose.params', [])[:] 216 self.params = ctxt.get('_i18n.choose.params', [])[:]
199 msgbuf = MessageBuffer(self) 217 msgbuf = MessageBuffer(self)
200 218
201 stream = iter(_apply_directives(stream, directives, ctxt, vars)) 219 stream = iter(_apply_directives(stream, directives, ctxt, vars))
224 else: 242 else:
225 msgbuf.append(*previous) 243 msgbuf.append(*previous)
226 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf 244 ctxt['_i18n.choose.%s' % type(self).__name__] = msgbuf
227 245
228 246
229 def extract(self, stream, comment_stack, msgbuf): 247 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
248 search_text=True, comment_stack=None, msgbuf=None):
230 stream = iter(stream) 249 stream = iter(stream)
231 previous = stream.next() 250 previous = stream.next()
251
232 if previous[0] is START: 252 if previous[0] is START:
253 # skip the enclosing element
254 for message in translator._extract_attrs(previous,
255 gettext_functions,
256 search_text=search_text):
257 yield message
233 previous = stream.next() 258 previous = stream.next()
259
234 for event in stream: 260 for event in stream:
261 if previous[0] is START:
262 for message in translator._extract_attrs(previous,
263 gettext_functions,
264 search_text=search_text):
265 yield message
235 msgbuf.append(*previous) 266 msgbuf.append(*previous)
236 previous = event 267 previous = event
268
237 if previous[0] is not END: 269 if previous[0] is not END:
238 msgbuf.append(*previous) 270 msgbuf.append(*previous)
239 return msgbuf
240 271
241 272
242 class SingularDirective(ChooseBranchDirective): 273 class SingularDirective(ChooseBranchDirective):
243 """Implementation of the ``i18n:singular`` directive to be used with the 274 """Implementation of the ``i18n:singular`` directive to be used with the
244 ``i18n:choose`` directive.""" 275 ``i18n:choose`` directive."""
305 >>> translator.setup(tmpl) 336 >>> translator.setup(tmpl)
306 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE 337 >>> list(translator.extract(tmpl.stream)) #doctest: +NORMALIZE_WHITESPACE
307 [(2, 'ngettext', (u'There is %(num)s coin', 338 [(2, 'ngettext', (u'There is %(num)s coin',
308 u'There are %(num)s coins'), [])] 339 u'There are %(num)s coins'), [])]
309 """ 340 """
310 __slots__ = ['numeral', 'params'] 341 __slots__ = ['numeral', 'params', 'lineno']
311 342
312 def __init__(self, value, template, hints=None, namespaces=None, 343 def __init__(self, value, template=None, namespaces=None, lineno=-1,
313 lineno=-1, offset=-1): 344 offset=-1):
314 Directive.__init__(self, None, template, namespaces, lineno, offset) 345 Directive.__init__(self, None, template, namespaces, lineno, offset)
315 params = [v.strip() for v in value.split(';')] 346 params = [v.strip() for v in value.split(';')]
316 self.numeral = self._parse_expr(params.pop(0), template, lineno, offset) 347 self.numeral = self._parse_expr(params.pop(0), template, lineno, offset)
317 self.params = params and [name.strip() for name in 348 self.params = params and [name.strip() for name in
318 params[0].split(',') if name] or [] 349 params[0].split(',') if name] or []
350 self.lineno = lineno
319 351
320 @classmethod 352 @classmethod
321 def attach(cls, template, stream, value, namespaces, pos): 353 def attach(cls, template, stream, value, namespaces, pos):
322 if type(value) is dict: 354 if type(value) is dict:
323 numeral = value.get('numeral', '').strip() 355 numeral = value.get('numeral', '').strip()
341 ngettext = ctxt.get('_i18n.ungettext') 373 ngettext = ctxt.get('_i18n.ungettext')
342 assert hasattr(ngettext, '__call__'), 'No ngettext function available' 374 assert hasattr(ngettext, '__call__'), 'No ngettext function available'
343 dngettext = ctxt.get('_i18n.dngettext') 375 dngettext = ctxt.get('_i18n.dngettext')
344 if not dngettext: 376 if not dngettext:
345 dngettext = lambda d, s, p, n: ngettext(s, p, n) 377 dngettext = lambda d, s, p, n: ngettext(s, p, n)
378
346 for kind, event, pos in stream: 379 for kind, event, pos in stream:
347 if kind is SUB: 380 if kind is SUB:
348 subdirectives, substream = event 381 subdirectives, substream = event
349 if isinstance(subdirectives[0], 382 if isinstance(subdirectives[0],
350 SingularDirective) and not singular_stream: 383 SingularDirective) and not singular_stream:
405 # plural test strings 438 # plural test strings
406 singular_test = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93' 439 singular_test = u'O\x85\xbe\xa9\xa8az\xc3?\xe6\xa1\x02n\x84\x93'
407 plural_test = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00' 440 plural_test = u'\xcc\xfb+\xd3Pn\x9d\tT\xec\x1d\xda\x1a\x88\x00'
408 translation = ngettext(singular_test, plural_test, 441 translation = ngettext(singular_test, plural_test,
409 self.numeral.evaluate(ctxt)) 442 self.numeral.evaluate(ctxt))
410 if translation==singular_test: 443 if translation == singular_test:
411 chosen_msgbuf = singular_msgbuf 444 chosen_msgbuf = singular_msgbuf
412 chosen_stream = singular_stream 445 chosen_stream = singular_stream
413 else: 446 else:
414 chosen_msgbuf = plural_msgbuf 447 chosen_msgbuf = plural_msgbuf
415 chosen_stream = plural_stream 448 chosen_stream = plural_stream
429 else: 462 else:
430 yield kind, data, pos 463 yield kind, data, pos
431 464
432 ctxt.pop() 465 ctxt.pop()
433 466
434 def extract(self, stream, comment_stack): 467 def extract(self, translator, stream, gettext_functions=GETTEXT_FUNCTIONS,
468 search_text=True, comment_stack=None):
469 strip = False
435 stream = iter(stream) 470 stream = iter(stream)
436 previous = stream.next() 471 previous = stream.next()
437 if previous is START: 472
438 stream.next() 473 if previous[0] is START:
474 # skip the enclosing element
475 for message in translator._extract_attrs(previous,
476 gettext_functions,
477 search_text=search_text):
478 yield message
479 previous = stream.next()
480 strip = True
439 481
440 singular_msgbuf = MessageBuffer(self) 482 singular_msgbuf = MessageBuffer(self)
441 plural_msgbuf = MessageBuffer(self) 483 plural_msgbuf = MessageBuffer(self)
442 484
443 for kind, event, pos in stream: 485 for event in stream:
444 if kind is SUB: 486 if previous[0] is SUB:
445 subdirectives, substream = event 487 directives, substream = previous[1]
446 for subdirective in subdirectives: 488 for directive in directives:
447 if isinstance(subdirective, SingularDirective): 489 if isinstance(directive, SingularDirective):
448 singular_msgbuf = subdirective.extract(substream, comment_stack, 490 for message in directive.extract(translator,
449 singular_msgbuf) 491 substream, gettext_functions, search_text,
450 elif isinstance(subdirective, PluralDirective): 492 comment_stack, msgbuf=singular_msgbuf):
451 plural_msgbuf = subdirective.extract(substream, comment_stack, 493 yield message
452 plural_msgbuf) 494 elif isinstance(directive, PluralDirective):
453 elif not isinstance(subdirective, StripDirective): 495 for message in directive.extract(translator,
454 singular_msgbuf.append(kind, event, pos) 496 substream, gettext_functions, search_text,
455 plural_msgbuf.append(kind, event, pos) 497 comment_stack, msgbuf=plural_msgbuf):
498 yield message
499 elif not isinstance(directive, StripDirective):
500 singular_msgbuf.append(*previous)
501 plural_msgbuf.append(*previous)
456 else: 502 else:
457 singular_msgbuf.append(kind, event, pos) 503 if previous[0] is START:
458 plural_msgbuf.append(kind, event, pos) 504 for message in translator._extract_attrs(previous,
459 505 gettext_functions,
460 yield 'ngettext', \ 506 search_text):
507 yield message
508 singular_msgbuf.append(*previous)
509 plural_msgbuf.append(*previous)
510 previous = event
511
512 if not strip:
513 singular_msgbuf.append(*previous)
514 plural_msgbuf.append(*previous)
515
516 yield self.lineno, 'ngettext', \
461 (singular_msgbuf.format(), plural_msgbuf.format()), \ 517 (singular_msgbuf.format(), plural_msgbuf.format()), \
462 comment_stack[-1:] 518 comment_stack[-1:]
463 519
464 520
465 class DomainDirective(I18NDirective): 521 class DomainDirective(I18NDirective):
497 <p>Voh</p> 553 <p>Voh</p>
498 </html> 554 </html>
499 """ 555 """
500 __slots__ = ['domain'] 556 __slots__ = ['domain']
501 557
502 def __init__(self, value, template, hints=None, namespaces=None, 558 def __init__(self, value, template=None, namespaces=None, lineno=-1,
503 lineno=-1, offset=-1): 559 offset=-1):
504 Directive.__init__(self, None, template, namespaces, lineno, offset) 560 Directive.__init__(self, None, template, namespaces, lineno, offset)
505 self.domain = value and value.strip() or '__DEFAULT__' 561 self.domain = value and value.strip() or '__DEFAULT__'
506 562
507 @classmethod 563 @classmethod
508 def attach(cls, template, stream, value, namespaces, pos): 564 def attach(cls, template, stream, value, namespaces, pos):
731 787
732 if current_domain: 788 if current_domain:
733 ctxt.pop() 789 ctxt.pop()
734 else: 790 else:
735 yield kind, data, pos 791 yield kind, data, pos
736
737 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
738 'ugettext', 'ungettext')
739 792
740 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS, 793 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
741 search_text=True, comment_stack=None): 794 search_text=True, comment_stack=None):
742 """Extract localizable strings from the given template stream. 795 """Extract localizable strings from the given template stream.
743 796
799 if kind is END: 852 if kind is END:
800 skip -= 1 853 skip -= 1
801 854
802 if kind is START and not skip: 855 if kind is START and not skip:
803 tag, attrs = data 856 tag, attrs = data
804
805 if tag in self.ignore_tags or \ 857 if tag in self.ignore_tags or \
806 isinstance(attrs.get(xml_lang), basestring): 858 isinstance(attrs.get(xml_lang), basestring):
807 skip += 1 859 skip += 1
808 continue 860 continue
809 861
810 for name, value in attrs: 862 for message in self._extract_attrs((kind, data, pos),
811 if search_text and isinstance(value, basestring): 863 gettext_functions,
812 if name in self.include_attrs: 864 search_text=search_text):
813 text = value.strip() 865 yield message
814 if text:
815 # XXX: Do we need to grab i18n:comment from comment_stack ???
816 yield pos[1], None, text, []
817 else:
818 for lineno, funcname, text, comments in self.extract(
819 _ensure(value), gettext_functions,
820 search_text=False):
821 yield lineno, funcname, text, comments
822 866
823 elif not skip and search_text and kind is TEXT: 867 elif not skip and search_text and kind is TEXT:
824 text = data.strip() 868 text = data.strip()
825 if text and [ch for ch in text if ch.isalpha()]: 869 if text and [ch for ch in text if ch.isalpha()]:
826 yield pos[1], None, text, comment_stack[-1:] 870 yield pos[1], None, text, comment_stack[-1:]
842 in_comment = True 886 in_comment = True
843 comment_stack.append(directive.comment) 887 comment_stack.append(directive.comment)
844 if len(directives) == 1: 888 if len(directives) == 1:
845 # in case we're in the presence of something like: 889 # in case we're in the presence of something like:
846 # <p i18n:comment="foo">Foo</p> 890 # <p i18n:comment="foo">Foo</p>
847 messages = self.extract( 891 for message in self.extract(
848 substream, gettext_functions, 892 substream, gettext_functions,
849 search_text=search_text and not skip, 893 search_text=search_text and not skip,
850 comment_stack=comment_stack) 894 comment_stack=comment_stack):
851 for lineno, funcname, text, comments in messages: 895 yield message
852 yield lineno, funcname, text, comments
853 directives.pop(idx) 896 directives.pop(idx)
854 elif not isinstance(directive, I18NDirective): 897 elif not isinstance(directive, I18NDirective):
855 # Remove all other non i18n directives from the process 898 # Remove all other non i18n directives from the process
856 directives.pop(idx) 899 directives.pop(idx)
857 900
858 if not directives and not in_comment: 901 if not directives and not in_comment:
859 # Extract content if there's no directives because 902 # Extract content if there's no directives because
860 # strip was pop'ed and not because comment was pop'ed. 903 # strip was pop'ed and not because comment was pop'ed.
861 # Extraction in this case has been taken care of. 904 # Extraction in this case has been taken care of.
862 messages = self.extract( 905 for message in self.extract(
863 substream, gettext_functions, 906 substream, gettext_functions,
864 search_text=search_text and not skip) 907 search_text=search_text and not skip):
865 for lineno, funcname, text, comments in messages: 908 yield message
866 yield lineno, funcname, text, comments
867 909
868 for directive in directives: 910 for directive in directives:
869 if isinstance(directive, ExtractableI18NDirective): 911 if isinstance(directive, ExtractableI18NDirective):
870 messages = directive.extract(substream, comment_stack) 912 for message in directive.extract(self,
871 for funcname, text, comments in messages: 913 substream, gettext_functions,
872 yield pos[1], funcname, text, comments 914 search_text=search_text and not skip,
915 comment_stack=comment_stack):
916 yield message
873 else: 917 else:
874 messages = self.extract( 918 for message in self.extract(
875 substream, gettext_functions, 919 substream, gettext_functions,
876 search_text=search_text and not skip) 920 search_text=search_text and not skip,
877 for lineno, funcname, text, comments in messages: 921 comment_stack=comment_stack):
878 yield lineno, funcname, text, comments 922 yield message
879 923
880 if in_comment: 924 if in_comment:
881 comment_stack.pop() 925 comment_stack.pop()
882 926
883 def get_directive_index(self, dir_cls): 927 def get_directive_index(self, dir_cls):
893 :param template: a `Template` instance 937 :param template: a `Template` instance
894 """ 938 """
895 template.filters.insert(0, self) 939 template.filters.insert(0, self)
896 if hasattr(template, 'add_directives'): 940 if hasattr(template, 'add_directives'):
897 template.add_directives(Translator.NAMESPACE, self) 941 template.add_directives(Translator.NAMESPACE, self)
942
943 def _extract_attrs(self, event, gettext_functions, search_text):
944 for name, value in event[1][1]:
945 if search_text and isinstance(value, basestring):
946 if name in self.include_attrs:
947 text = value.strip()
948 if text:
949 yield event[2][1], None, text, []
950 else:
951 for message in self.extract(_ensure(value), gettext_functions,
952 search_text=False):
953 yield message
898 954
899 955
900 class MessageBuffer(object): 956 class MessageBuffer(object):
901 """Helper class for managing internationalized mixed content. 957 """Helper class for managing internationalized mixed content.
902 958
955 params = "(%s)" % params 1011 params = "(%s)" % params
956 raise IndexError("%d parameters%s given to 'i18n:%s' but " 1012 raise IndexError("%d parameters%s given to 'i18n:%s' but "
957 "%d or more expressions used in '%s', line %s" 1013 "%d or more expressions used in '%s', line %s"
958 % (len(self.orig_params), params, 1014 % (len(self.orig_params), params,
959 self.directive.tagname, 1015 self.directive.tagname,
960 len(self.orig_params)+1, 1016 len(self.orig_params) + 1,
961 os.path.basename(pos[0] or 1017 os.path.basename(pos[0] or
962 'In-memory Template'), 1018 'In-memory Template'),
963 pos[1])) 1019 pos[1]))
964 self.string.append('%%(%s)s' % param) 1020 self.string.append('%%(%s)s' % param)
965 self.events.setdefault(self.stack[-1], []).append((kind, data, pos)) 1021 self.events.setdefault(self.stack[-1], []).append((kind, data, pos))
1074 if substream is not None: 1130 if substream is not None:
1075 substream.append(event) 1131 substream.append(event)
1076 else: 1132 else:
1077 yield event 1133 yield event
1078 1134
1135
1079 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')): 1136 def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|(?<!\\)\]')):
1080 """Parse a translated message using Genshi mixed content message 1137 """Parse a translated message using Genshi mixed content message
1081 formatting. 1138 formatting.
1082 1139
1083 >>> parse_msg("See [1:Help].") 1140 >>> parse_msg("See [1:Help].")
1124 def extract_from_code(code, gettext_functions): 1181 def extract_from_code(code, gettext_functions):
1125 """Extract strings from Python bytecode. 1182 """Extract strings from Python bytecode.
1126 1183
1127 >>> from genshi.template.eval import Expression 1184 >>> from genshi.template.eval import Expression
1128 >>> expr = Expression('_("Hello")') 1185 >>> expr = Expression('_("Hello")')
1129 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1186 >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
1130 [('_', u'Hello')] 1187 [('_', u'Hello')]
1131 1188
1132 >>> expr = Expression('ngettext("You have %(num)s item", ' 1189 >>> expr = Expression('ngettext("You have %(num)s item", '
1133 ... '"You have %(num)s items", num)') 1190 ... '"You have %(num)s items", num)')
1134 >>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS)) 1191 >>> list(extract_from_code(expr, GETTEXT_FUNCTIONS))
1135 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))] 1192 [('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
1136 1193
1137 :param code: the `Code` object 1194 :param code: the `Code` object
1138 :type code: `genshi.template.eval.Code` 1195 :type code: `genshi.template.eval.Code`
1139 :param gettext_functions: a sequence of function names 1196 :param gettext_functions: a sequence of function names
Copyright (C) 2012-2017 Edgewall Software