Mercurial > genshi > mirror
comparison genshi/filters/i18n.py @ 450:94601511cd68 trunk
Extend the I18n extraction to also yield function names if applicable.
author | cmlenz |
---|---|
date | Fri, 13 Apr 2007 20:58:48 +0000 |
parents | 1154f2aadb6c |
children | 4b6dc4978691 |
comparison
equal
deleted
inserted
replaced
449:b07e65580175 | 450:94601511cd68 |
---|---|
6 from sets import ImmutableSet as frozenset | 6 from sets import ImmutableSet as frozenset |
7 from gettext import gettext | 7 from gettext import gettext |
8 from opcode import opmap | 8 from opcode import opmap |
9 import re | 9 import re |
10 | 10 |
11 from genshi.core import Attrs, START, END, TEXT | 11 from genshi.core import Attrs, Namespace, QName, START, END, TEXT |
12 from genshi.template.base import Template, EXPR, SUB | 12 from genshi.template.base import Template, EXPR, SUB |
13 from genshi.template.markup import EXEC | 13 from genshi.template.markup import EXEC |
14 | 14 |
15 _LOAD_NAME = chr(opmap['LOAD_NAME']) | 15 _LOAD_NAME = chr(opmap['LOAD_NAME']) |
16 _LOAD_CONST = chr(opmap['LOAD_CONST']) | 16 _LOAD_CONST = chr(opmap['LOAD_CONST']) |
18 _BINARY_ADD = chr(opmap['BINARY_ADD']) | 18 _BINARY_ADD = chr(opmap['BINARY_ADD']) |
19 | 19 |
20 | 20 |
21 class Translator(object): | 21 class Translator(object): |
22 """Can extract and translate localizable strings from markup streams and | 22 """Can extract and translate localizable strings from markup streams and |
23 templates | 23 templates. |
24 | 24 |
25 For example, assume the followng template: | 25 For example, assume the followng template: |
26 | 26 |
27 >>> from genshi.template import MarkupTemplate | 27 >>> from genshi.template import MarkupTemplate |
28 >>> | |
28 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> | 29 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> |
29 ... <head> | 30 ... <head> |
30 ... <title>Example</title> | 31 ... <title>Example</title> |
31 ... </head> | 32 ... </head> |
32 ... <body> | 33 ... <body> |
41 >>> def pseudo_gettext(string): | 42 >>> def pseudo_gettext(string): |
42 ... return { | 43 ... return { |
43 ... 'Example': 'Beispiel', | 44 ... 'Example': 'Beispiel', |
44 ... 'Hello, %(name)s': 'Hallo, %(name)s' | 45 ... 'Hello, %(name)s': 'Hallo, %(name)s' |
45 ... }[string] | 46 ... }[string] |
47 >>> | |
46 >>> translator = Translator(pseudo_gettext) | 48 >>> translator = Translator(pseudo_gettext) |
47 | 49 |
48 Next, the translator needs to be prepended to any already defined filters | 50 Next, the translator needs to be prepended to any already defined filters |
49 on the template: | 51 on the template: |
50 | 52 |
63 <p>Hallo, Hans</p> | 65 <p>Hallo, Hans</p> |
64 </body> | 66 </body> |
65 </html> | 67 </html> |
66 """ | 68 """ |
67 | 69 |
68 IGNORE_TAGS = frozenset(['script', 'style']) | 70 IGNORE_TAGS = frozenset([ |
71 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), | |
72 QName('style'), QName('http://www.w3.org/1999/xhtml}style') | |
73 ]) | |
69 INCLUDE_ATTRS = frozenset(['title', 'alt']) | 74 INCLUDE_ATTRS = frozenset(['title', 'alt']) |
70 | 75 |
71 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, | 76 def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS, |
72 include_attrs=INCLUDE_ATTRS): | 77 include_attrs=INCLUDE_ATTRS): |
73 """Initialize the translator. | 78 """Initialize the translator. |
75 :param translate: the translation function, for example ``gettext`` or | 80 :param translate: the translation function, for example ``gettext`` or |
76 ``ugettext``. | 81 ``ugettext``. |
77 :param ignore_tags: a set of tag names that should not be localized | 82 :param ignore_tags: a set of tag names that should not be localized |
78 :param include_attrs: a set of attribute names should be localized | 83 :param include_attrs: a set of attribute names should be localized |
79 """ | 84 """ |
80 self.gettext = translate | 85 self.translate = translate |
81 self.ignore_tags = ignore_tags | 86 self.ignore_tags = ignore_tags |
82 self.include_attrs = include_attrs | 87 self.include_attrs = include_attrs |
83 | 88 |
84 def __call__(self, stream, ctxt=None, search_text=True): | 89 def __call__(self, stream, ctxt=None, search_text=True): |
85 """Translate any localizable strings in the given stream. | 90 """Translate any localizable strings in the given stream. |
94 :param ctxt: the template context (not used) | 99 :param ctxt: the template context (not used) |
95 :param search_text: whether text nodes should be translated (used | 100 :param search_text: whether text nodes should be translated (used |
96 internally) | 101 internally) |
97 :return: the localized stream | 102 :return: the localized stream |
98 """ | 103 """ |
104 ignore_tags = self.ignore_tags | |
105 include_attrs = self.include_attrs | |
106 translate = self.translate | |
99 skip = 0 | 107 skip = 0 |
100 | 108 |
101 for kind, data, pos in stream: | 109 for kind, data, pos in stream: |
102 | 110 |
103 # skip chunks that should not be localized | 111 # skip chunks that should not be localized |
104 if skip: | 112 if skip: |
105 if kind is START: | 113 if kind is START: |
106 tag, attrs = data | 114 tag, attrs = data |
107 tag = tag.localname | 115 if tag in ignore_tags: |
108 if tag.localname in self.ignore_tags: | |
109 skip += 1 | 116 skip += 1 |
110 elif kind is END: | 117 elif kind is END: |
111 if tag.localname in self.ignore_tags: | 118 if tag in ignore_tags: |
112 skip -= 1 | 119 skip -= 1 |
113 yield kind, data, pos | 120 yield kind, data, pos |
114 continue | 121 continue |
115 | 122 |
116 # handle different events that can be localized | 123 # handle different events that can be localized |
117 if kind is START: | 124 if kind is START: |
118 tag, attrs = data | 125 tag, attrs = data |
119 if tag.localname in self.ignore_tags: | 126 if tag in ignore_tags: |
120 skip += 1 | 127 skip += 1 |
121 yield kind, data, pos | 128 yield kind, data, pos |
122 continue | 129 continue |
123 | 130 |
124 new_attrs = list(attrs) | 131 new_attrs = list(attrs) |
126 for name, value in attrs: | 133 for name, value in attrs: |
127 if name in include_attrs: | 134 if name in include_attrs: |
128 if isinstance(value, basestring): | 135 if isinstance(value, basestring): |
129 newval = ugettext(value) | 136 newval = ugettext(value) |
130 else: | 137 else: |
131 newval = list(self(value, ctxt, search_text=name in self.include_attrs)) | 138 newval = list(self(value, ctxt, search_text=name in include_attrs)) |
132 if newval != value: | 139 if newval != value: |
133 value = new_val | 140 value = new_val |
134 changed = True | 141 changed = True |
135 new_attrs.append((name, value)) | 142 new_attrs.append((name, value)) |
136 if changed: | 143 if changed: |
139 yield kind, (tag, attrs), pos | 146 yield kind, (tag, attrs), pos |
140 | 147 |
141 elif kind is TEXT: | 148 elif kind is TEXT: |
142 text = data.strip() | 149 text = data.strip() |
143 if text: | 150 if text: |
144 data = data.replace(text, self.gettext(text)) | 151 data = data.replace(text, translate(text)) |
145 yield kind, data, pos | 152 yield kind, data, pos |
146 | 153 |
147 elif kind is SUB: | 154 elif kind is SUB: |
148 subkind, substream = data | 155 subkind, substream = data |
149 new_substream = list(self(substream, ctxt)) | 156 new_substream = list(self(substream, ctxt)) |
150 yield kind, (subkind, new_substream), pos | 157 yield kind, (subkind, new_substream), pos |
151 | 158 |
152 else: | 159 else: |
153 yield kind, data, pos | 160 yield kind, data, pos |
154 | 161 |
155 def extract(self, stream, gettext_functions=('_', 'gettext', 'ngettext')): | 162 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext', |
163 'ugettext', 'ungettext') | |
164 | |
165 def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS): | |
156 """Extract localizable strings from the given template stream. | 166 """Extract localizable strings from the given template stream. |
157 | 167 |
158 For every string found, this function yields a ``(lineno, message)`` | 168 For every string found, this function yields a ``(lineno, function, |
159 tuple. | 169 message)`` tuple, where: |
160 | 170 |
161 :param stream: the event stream to extract strings from; can be a | 171 * ``lineno`` is the number of the line on which the string was found, |
162 regular stream or a template stream | 172 * ``function`` is the name of the ``gettext`` function used (if the |
173 string was extracted from embedded Python code), and | |
174 * ``message`` is the string itself (a ``unicode`` object). | |
163 | 175 |
164 >>> from genshi.template import MarkupTemplate | 176 >>> from genshi.template import MarkupTemplate |
177 >>> | |
165 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> | 178 >>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/"> |
166 ... <head> | 179 ... <head> |
167 ... <title>Example</title> | 180 ... <title>Example</title> |
168 ... </head> | 181 ... </head> |
169 ... <body> | 182 ... <body> |
170 ... <h1>Example</h1> | 183 ... <h1>Example</h1> |
171 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> | 184 ... <p>${_("Hello, %(name)s") % dict(name=username)}</p> |
172 ... </body> | 185 ... </body> |
173 ... </html>''', filename='example.html') | 186 ... </html>''', filename='example.html') |
174 >>> for lineno, message in Translator().extract(tmpl.stream): | 187 >>> |
175 ... print "Line %d: %r" % (lineno, message) | 188 >>> for lineno, funcname, message in Translator().extract(tmpl.stream): |
176 Line 3: u'Example' | 189 ... print "%d, %r, %r" % (lineno, funcname, message) |
177 Line 6: u'Example' | 190 3, None, u'Example' |
178 Line 7: u'Hello, %(name)s' | 191 6, None, u'Example' |
192 7, '_', u'Hello, %(name)s' | |
193 | |
194 :param stream: the event stream to extract strings from; can be a | |
195 regular stream or a template stream | |
196 :param gettext_functions: a sequence of function names that should be | |
197 treated as gettext-style localization | |
198 functions | |
179 """ | 199 """ |
180 tagname = None | 200 tagname = None |
181 skip = 0 | 201 skip = 0 |
182 | 202 |
183 for kind, data, pos in stream: | 203 for kind, data, pos in stream: |
184 if skip: | 204 if skip: |
185 if kind is START: | 205 if kind is START: |
186 tag, attrs = data | 206 tag, attrs = data |
187 if tag.localname in self.ignore_tags: | 207 if tag in self.ignore_tags: |
188 skip += 1 | 208 skip += 1 |
189 if kind is END: | 209 if kind is END: |
190 tag = data | 210 tag = data |
191 if tag.localname in self.ignore_tags: | 211 if tag in self.ignore_tags: |
192 skip -= 1 | 212 skip -= 1 |
193 continue | 213 continue |
194 | 214 |
195 if kind is START: | 215 if kind is START: |
196 tag, attrs = data | 216 tag, attrs = data |
197 if tag.localname in self.ignore_tags: | 217 if tag in self.ignore_tags: |
198 skip += 1 | 218 skip += 1 |
199 continue | 219 continue |
200 | 220 |
201 for name, value in attrs: | 221 for name, value in attrs: |
202 if name in self.include_attrs: | 222 if name in self.include_attrs: |
203 if isinstance(value, basestring): | 223 if isinstance(value, basestring): |
204 text = value.strip() | 224 text = value.strip() |
205 if text: | 225 if text: |
206 yield pos[1], text | 226 yield pos[1], None, text |
207 else: | 227 else: |
208 for lineno, text in harvest(value): | 228 for lineno, funcname, text in harvest(value): |
209 yield lineno, text | 229 yield lineno, funcname, text |
210 | 230 |
211 elif kind is TEXT: | 231 elif kind is TEXT: |
212 text = data.strip() | 232 text = data.strip() |
213 if text and filter(None, [ch.isalpha() for ch in text]): | 233 if text and filter(None, [ch.isalpha() for ch in text]): |
214 yield pos[1], text | 234 yield pos[1], None, text |
215 | 235 |
216 elif kind is EXPR or kind is EXEC: | 236 elif kind is EXPR or kind is EXEC: |
217 consts = dict([(n, chr(i) + '\x00') for i, n in | 237 consts = dict([(n, chr(i) + '\x00') for i, n in |
218 enumerate(data.code.co_consts)]) | 238 enumerate(data.code.co_consts)]) |
219 gettext_locs = [consts[n] for n in gettext_functions | 239 gettext_locs = [consts[n] for n in gettext_functions |
221 ops = [ | 241 ops = [ |
222 _LOAD_CONST, '(', '|'.join(gettext_locs), ')', | 242 _LOAD_CONST, '(', '|'.join(gettext_locs), ')', |
223 _CALL_FUNCTION, '.\x00', | 243 _CALL_FUNCTION, '.\x00', |
224 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' | 244 '((?:', _BINARY_ADD, '|', _LOAD_CONST, '.\x00)+)' |
225 ] | 245 ] |
226 for _, opcodes in re.findall(''.join(ops), data.code.co_code): | 246 for loc, opcodes in re.findall(''.join(ops), data.code.co_code): |
247 funcname = data.code.co_consts[ord(loc[0])] | |
227 strings = [] | 248 strings = [] |
228 opcodes = iter(opcodes) | 249 opcodes = iter(opcodes) |
229 for opcode in opcodes: | 250 for opcode in opcodes: |
230 if opcode == _BINARY_ADD: | 251 if opcode == _BINARY_ADD: |
231 arg = strings.pop() | 252 arg = strings.pop() |
235 opcodes.next() # skip second byte | 256 opcodes.next() # skip second byte |
236 if not isinstance(arg, basestring): | 257 if not isinstance(arg, basestring): |
237 break | 258 break |
238 strings.append(unicode(arg)) | 259 strings.append(unicode(arg)) |
239 for string in strings: | 260 for string in strings: |
240 yield pos[1], string | 261 yield pos[1], funcname, string |
241 | 262 |
242 elif kind is SUB: | 263 elif kind is SUB: |
243 subkind, substream = data | 264 subkind, substream = data |
244 for lineno, text in self.harvest(substream): | 265 for lineno, funcname, text in self.harvest(substream): |
245 yield lineno, text | 266 yield lineno, funcname, text |