comparison genshi/filters/i18n.py @ 450:94601511cd68 trunk

Extend the I18n extraction to also yield function names if applicable.
author cmlenz
date Fri, 13 Apr 2007 20:58:48 +0000
parents 1154f2aadb6c
children 4b6dc4978691
comparison
equal deleted inserted replaced
449:b07e65580175 450:94601511cd68
6 from sets import ImmutableSet as frozenset 6 from sets import ImmutableSet as frozenset
7 from gettext import gettext 7 from gettext import gettext
8 from opcode import opmap 8 from opcode import opmap
9 import re 9 import re
10 10
11 from genshi.core import Attrs, START, END, TEXT 11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT
12 from genshi.template.base import Template, EXPR, SUB 12 from genshi.template.base import Template, EXPR, SUB
13 from genshi.template.markup import EXEC 13 from genshi.template.markup import EXEC
14 14
15 _LOAD_NAME = chr(opmap['LOAD_NAME']) 15 _LOAD_NAME = chr(opmap['LOAD_NAME'])
16 _LOAD_CONST = chr(opmap['LOAD_CONST']) 16 _LOAD_CONST = chr(opmap['LOAD_CONST'])
18 _BINARY_ADD = chr(opmap['BINARY_ADD']) 18 _BINARY_ADD = chr(opmap['BINARY_ADD'])
19 19
20 20
21 class Translator(object): 21 class Translator(object):
22 """Can extract and translate localizable strings from markup streams and 22 """Can extract and translate localizable strings from markup streams and
23 templates 23 templates.
24 24
25 For example, assume the followng template: 25 For example, assume the followng template:
26 26
27 >>> from genshi.template import MarkupTemplate 27 >>> from genshi.template import MarkupTemplate
28 >>>
28 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 29 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
29 ... <head> 30 ... <head>
30 ... <title>Example</title> 31 ... <title>Example</title>
31 ... </head> 32 ... </head>
32 ... <body> 33 ... <body>
41 >>> def pseudo_gettext(string): 42 >>> def pseudo_gettext(string):
42 ... return { 43 ... return {
43 ... 'Example': 'Beispiel', 44 ... 'Example': 'Beispiel',
44 ... 'Hello, %(name)s': 'Hallo, %(name)s' 45 ... 'Hello, %(name)s': 'Hallo, %(name)s'
45 ... }[string] 46 ... }[string]
47 >>>
46 >>> translator = Translator(pseudo_gettext) 48 >>> translator = Translator(pseudo_gettext)
47 49
48 Next, the translator needs to be prepended to any already defined filters 50 Next, the translator needs to be prepended to any already defined filters
49 on the template: 51 on the template:
50 52
63 <p>Hallo, Hans</p> 65 <p>Hallo, Hans</p>
64 </body> 66 </body>
65 </html> 67 </html>
66 """ 68 """
67 69
68 IGNORE_TAGS = frozenset(['script', 'style']) 70 IGNORE_TAGS = frozenset([
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style')
73 ])
69 INCLUDE_ATTRS = frozenset(['title', 'alt']) 74 INCLUDE_ATTRS = frozenset(['title', 'alt'])
70 75
71 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, 76 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS,
72 include_attrs=INCLUDE_ATTRS): 77 include_attrs=INCLUDE_ATTRS):
73 """Initialize the translator. 78 """Initialize the translator.
75 :param translate: the translation function, for example ``gettext`` or 80 :param translate: the translation function, for example ``gettext`` or
76 ``ugettext``. 81 ``ugettext``.
77 :param ignore_tags: a set of tag names that should not be localized 82 :param ignore_tags: a set of tag names that should not be localized
78 :param include_attrs: a set of attribute names should be localized 83 :param include_attrs: a set of attribute names should be localized
79 """ 84 """
80 self.gettext = translate 85 self.translate = translate
81 self.ignore_tags = ignore_tags 86 self.ignore_tags = ignore_tags
82 self.include_attrs = include_attrs 87 self.include_attrs = include_attrs
83 88
84 def __call__(self, stream, ctxt=None, search_text=True): 89 def __call__(self, stream, ctxt=None, search_text=True):
85 """Translate any localizable strings in the given stream. 90 """Translate any localizable strings in the given stream.
94 :param ctxt: the template context (not used) 99 :param ctxt: the template context (not used)
95 :param search_text: whether text nodes should be translated (used 100 :param search_text: whether text nodes should be translated (used
96 internally) 101 internally)
97 :return: the localized stream 102 :return: the localized stream
98 """ 103 """
104 ignore_tags = self.ignore_tags
105 include_attrs = self.include_attrs
106 translate = self.translate
99 skip = 0 107 skip = 0
100 108
101 for kind, data, pos in stream: 109 for kind, data, pos in stream:
102 110
103 # skip chunks that should not be localized 111 # skip chunks that should not be localized
104 if skip: 112 if skip:
105 if kind is START: 113 if kind is START:
106 tag, attrs = data 114 tag, attrs = data
107 tag = tag.localname 115 if tag in ignore_tags:
108 if tag.localname in self.ignore_tags:
109 skip += 1 116 skip += 1
110 elif kind is END: 117 elif kind is END:
111 if tag.localname in self.ignore_tags: 118 if tag in ignore_tags:
112 skip -= 1 119 skip -= 1
113 yield kind, data, pos 120 yield kind, data, pos
114 continue 121 continue
115 122
116 # handle different events that can be localized 123 # handle different events that can be localized
117 if kind is START: 124 if kind is START:
118 tag, attrs = data 125 tag, attrs = data
119 if tag.localname in self.ignore_tags: 126 if tag in ignore_tags:
120 skip += 1 127 skip += 1
121 yield kind, data, pos 128 yield kind, data, pos
122 continue 129 continue
123 130
124 new_attrs = list(attrs) 131 new_attrs = list(attrs)
126 for name, value in attrs: 133 for name, value in attrs:
127 if name in include_attrs: 134 if name in include_attrs:
128 if isinstance(value, basestring): 135 if isinstance(value, basestring):
129 newval = ugettext(value) 136 newval = ugettext(value)
130 else: 137 else:
131 newval = list(self(value, ctxt, search_text=name in self.include_attrs)) 138 newval = list(self(value, ctxt, search_text=name in include_attrs))
132 if newval != value: 139 if newval != value:
133 value = new_val 140 value = new_val
134 changed = True 141 changed = True
135 new_attrs.append((name, value)) 142 new_attrs.append((name, value))
136 if changed: 143 if changed:
139 yield kind, (tag, attrs), pos 146 yield kind, (tag, attrs), pos
140 147
141 elif kind is TEXT: 148 elif kind is TEXT:
142 text = data.strip() 149 text = data.strip()
143 if text: 150 if text:
144 data = data.replace(text, self.gettext(text)) 151 data = data.replace(text, translate(text))
145 yield kind, data, pos 152 yield kind, data, pos
146 153
147 elif kind is SUB: 154 elif kind is SUB:
148 subkind, substream = data 155 subkind, substream = data
149 new_substream = list(self(substream, ctxt)) 156 new_substream = list(self(substream, ctxt))
150 yield kind, (subkind, new_substream), pos 157 yield kind, (subkind, new_substream), pos
151 158
152 else: 159 else:
153 yield kind, data, pos 160 yield kind, data, pos
154 161
155 def extract(self, stream, gettext_functions=('_', 'gettext', 'ngettext')): 162 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
163 'ugettext', 'ungettext')
164
165 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS):
156 """Extract localizable strings from the given template stream. 166 """Extract localizable strings from the given template stream.
157 167
158 For every string found, this function yields a ``(lineno, message)`` 168 For every string found, this function yields a ``(lineno, function,
159 tuple. 169 message)`` tuple, where:
160 170
161 :param stream: the event stream to extract strings from; can be a 171 * ``lineno`` is the number of the line on which the string was found,
162 regular stream or a template stream 172 * ``function`` is the name of the ``gettext`` function used (if the
173 string was extracted from embedded Python code), and
174 * ``message`` is the string itself (a ``unicode`` object).
163 175
164 >>> from genshi.template import MarkupTemplate 176 >>> from genshi.template import MarkupTemplate
177 >>>
165 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> 178 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
166 ... <head> 179 ... <head>
167 ... <title>Example</title> 180 ... <title>Example</title>
168 ... </head> 181 ... </head>
169 ... <body> 182 ... <body>
170 ... <h1>Example</h1> 183 ... <h1>Example</h1>
171 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> 184 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
172 ... </body> 185 ... </body>
173 ... </html>''', filename='example.html') 186 ... </html>''', filename='example.html')
174 >>> for lineno, message in Translator().extract(tmpl.stream): 187 >>>
175 ... print "Line %d: %r" % (lineno, message) 188 >>> for lineno, funcname, message in Translator().extract(tmpl.stream):
176 Line 3: u'Example' 189 ... print "%d, %r, %r" % (lineno, funcname, message)
177 Line 6: u'Example' 190 3, None, u'Example'
178 Line 7: u'Hello, %(name)s' 191 6, None, u'Example'
192 7, '_', u'Hello, %(name)s'
193
194 :param stream: the event stream to extract strings from; can be a
195 regular stream or a template stream
196 :param gettext_functions: a sequence of function names that should be
197 treated as gettext-style localization
198 functions
179 """ 199 """
180 tagname = None 200 tagname = None
181 skip = 0 201 skip = 0
182 202
183 for kind, data, pos in stream: 203 for kind, data, pos in stream:
184 if skip: 204 if skip:
185 if kind is START: 205 if kind is START:
186 tag, attrs = data 206 tag, attrs = data
187 if tag.localname in self.ignore_tags: 207 if tag in self.ignore_tags:
188 skip += 1 208 skip += 1
189 if kind is END: 209 if kind is END:
190 tag = data 210 tag = data
191 if tag.localname in self.ignore_tags: 211 if tag in self.ignore_tags:
192 skip -= 1 212 skip -= 1
193 continue 213 continue
194 214
195 if kind is START: 215 if kind is START:
196 tag, attrs = data 216 tag, attrs = data
197 if tag.localname in self.ignore_tags: 217 if tag in self.ignore_tags:
198 skip += 1 218 skip += 1
199 continue 219 continue
200 220
201 for name, value in attrs: 221 for name, value in attrs:
202 if name in self.include_attrs: 222 if name in self.include_attrs:
203 if isinstance(value, basestring): 223 if isinstance(value, basestring):
204 text = value.strip() 224 text = value.strip()
205 if text: 225 if text:
206 yield pos[1], text 226 yield pos[1], None, text
207 else: 227 else:
208 for lineno, text in harvest(value): 228 for lineno, funcname, text in harvest(value):
209 yield lineno, text 229 yield lineno, funcname, text
210 230
211 elif kind is TEXT: 231 elif kind is TEXT:
212 text = data.strip() 232 text = data.strip()
213 if text and filter(None, [ch.isalpha() for ch in text]): 233 if text and filter(None, [ch.isalpha() for ch in text]):
214 yield pos[1], text 234 yield pos[1], None, text
215 235
216 elif kind is EXPR or kind is EXEC: 236 elif kind is EXPR or kind is EXEC:
217 consts = dict([(n, chr(i) + '\x00') for i, n in 237 consts = dict([(n, chr(i) + '\x00') for i, n in
218 enumerate(data.code.co_consts)]) 238 enumerate(data.code.co_consts)])
219 gettext_locs = [consts[n] for n in gettext_functions 239 gettext_locs = [consts[n] for n in gettext_functions
221 ops = [ 241 ops = [
222 _LOAD_CONST, '(', '|'.join(gettext_locs), ')', 242 _LOAD_CONST, '(', '|'.join(gettext_locs), ')',
223 _CALL_FUNCTION, '.\x00', 243 _CALL_FUNCTION, '.\x00',
224 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' 244 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)'
225 ] 245 ]
226 for _, opcodes in re.findall(''.join(ops), data.code.co_code): 246 for loc, opcodes in re.findall(''.join(ops), data.code.co_code):
247 funcname = data.code.co_consts[ord(loc[0])]
227 strings = [] 248 strings = []
228 opcodes = iter(opcodes) 249 opcodes = iter(opcodes)
229 for opcode in opcodes: 250 for opcode in opcodes:
230 if opcode == _BINARY_ADD: 251 if opcode == _BINARY_ADD:
231 arg = strings.pop() 252 arg = strings.pop()
235 opcodes.next() # skip second byte 256 opcodes.next() # skip second byte
236 if not isinstance(arg, basestring): 257 if not isinstance(arg, basestring):
237 break 258 break
238 strings.append(unicode(arg)) 259 strings.append(unicode(arg))
239 for string in strings: 260 for string in strings:
240 yield pos[1], string 261 yield pos[1], funcname, string
241 262
242 elif kind is SUB: 263 elif kind is SUB:
243 subkind, substream = data 264 subkind, substream = data
244 for lineno, text in self.harvest(substream): 265 for lineno, funcname, text in self.harvest(substream):
245 yield lineno, text 266 yield lineno, funcname, text
Copyright (C) 2012-2017 Edgewall Software