Mercurial > genshi > genshi-test
comparison markup/output.py @ 136:636e0100fcaf
Minor performance improvements in serialization.
author | cmlenz |
---|---|
date | Sun, 06 Aug 2006 21:22:21 +0000 |
parents | 93bbdcf9428b |
children | a2edde90ad24 |
comparison
equal
deleted
inserted
replaced
134:df44110ca91d | 136:636e0100fcaf |
---|---|
78 | 78 |
79 stream = chain(self.preamble, stream) | 79 stream = chain(self.preamble, stream) |
80 for filter_ in self.filters: | 80 for filter_ in self.filters: |
81 stream = filter_(stream) | 81 stream = filter_(stream) |
82 stream = _PushbackIterator(stream) | 82 stream = _PushbackIterator(stream) |
83 pushback = stream.pushback | |
83 for kind, data, pos in stream: | 84 for kind, data, pos in stream: |
84 | 85 |
85 if kind is START: | 86 if kind is START: |
86 tag, attrib = data | 87 tag, attrib = data |
87 | 88 |
92 prefix = ns_mapping[namespace] | 93 prefix = ns_mapping[namespace] |
93 if prefix: | 94 if prefix: |
94 tagname = '%s:%s' % (prefix, tagname) | 95 tagname = '%s:%s' % (prefix, tagname) |
95 else: | 96 else: |
96 ns_attrib.append((QName('xmlns'), namespace)) | 97 ns_attrib.append((QName('xmlns'), namespace)) |
97 buf = ['<%s' % tagname] | 98 buf = ['<', tagname] |
98 | 99 |
99 for attr, value in attrib + ns_attrib: | 100 for attr, value in attrib + ns_attrib: |
100 attrname = attr.localname | 101 attrname = attr.localname |
101 if attr.namespace: | 102 if attr.namespace: |
102 prefix = ns_mapping.get(attr.namespace) | 103 prefix = ns_mapping.get(attr.namespace) |
103 if prefix: | 104 if prefix: |
104 attrname = '%s:%s' % (prefix, attrname) | 105 attrname = '%s:%s' % (prefix, attrname) |
105 buf.append(' %s="%s"' % (attrname, escape(value))) | 106 buf += [' ', attrname, '="', escape(value), '"'] |
106 ns_attrib = [] | 107 ns_attrib = [] |
107 | 108 |
108 kind, data, pos = stream.next() | 109 kind, data, pos = stream.next() |
109 if kind is END: | 110 if kind is END: |
110 buf.append('/>') | 111 buf += ['/>'] |
111 else: | 112 else: |
112 buf.append('>') | 113 buf += ['>'] |
113 stream.pushback((kind, data, pos)) | 114 pushback((kind, data, pos)) |
114 | 115 |
115 yield Markup(''.join(buf)) | 116 yield Markup(''.join(buf)) |
116 | 117 |
117 elif kind is END: | 118 elif kind is END: |
118 tag = data | 119 tag = data |
127 yield escape(data, quotes=False) | 128 yield escape(data, quotes=False) |
128 | 129 |
129 elif kind is COMMENT: | 130 elif kind is COMMENT: |
130 yield Markup('<!--%s-->' % data) | 131 yield Markup('<!--%s-->' % data) |
131 | 132 |
132 elif kind is DOCTYPE: | 133 elif kind is DOCTYPE and not have_doctype: |
133 if not have_doctype: | 134 name, pubid, sysid = data |
134 name, pubid, sysid = data | 135 buf = ['<!DOCTYPE %s'] |
135 buf = ['<!DOCTYPE %s'] | 136 if pubid: |
136 if pubid: | 137 buf += [' PUBLIC "%s"'] |
137 buf.append(' PUBLIC "%s"') | 138 elif sysid: |
138 elif sysid: | 139 buf += [' SYSTEM'] |
139 buf.append(' SYSTEM') | 140 if sysid: |
140 if sysid: | 141 buf += [' "%s"'] |
141 buf.append(' "%s"') | 142 buf += ['>\n'] |
142 buf.append('>\n') | 143 yield Markup(''.join(buf), *filter(None, data)) |
143 yield Markup(''.join(buf), *filter(None, data)) | 144 have_doctype = True |
144 have_doctype = True | |
145 | 145 |
146 elif kind is START_NS: | 146 elif kind is START_NS: |
147 prefix, uri = data | 147 prefix, uri = data |
148 if uri not in ns_mapping: | 148 if uri not in ns_mapping: |
149 ns_mapping[uri] = prefix | 149 ns_mapping[uri] = prefix |
174 'defer', 'disabled', 'ismap', 'multiple', | 174 'defer', 'disabled', 'ismap', 'multiple', |
175 'nohref', 'noresize', 'noshade', 'nowrap']) | 175 'nohref', 'noresize', 'noshade', 'nowrap']) |
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) | 176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) |
177 | 177 |
178 def __call__(self, stream): | 178 def __call__(self, stream): |
179 namespace = self.NAMESPACE | |
180 ns_mapping = {} | |
181 boolean_attrs = self._BOOLEAN_ATTRS | |
182 empty_elems = self._EMPTY_ELEMS | |
179 have_doctype = False | 183 have_doctype = False |
180 ns_mapping = {} | |
181 | 184 |
182 stream = chain(self.preamble, stream) | 185 stream = chain(self.preamble, stream) |
183 for filter_ in self.filters: | 186 for filter_ in self.filters: |
184 stream = filter_(stream) | 187 stream = filter_(stream) |
185 stream = _PushbackIterator(stream) | 188 stream = _PushbackIterator(stream) |
189 pushback = stream.pushback | |
186 for kind, data, pos in stream: | 190 for kind, data, pos in stream: |
187 | 191 |
188 if kind is START: | 192 if kind is START: |
189 tag, attrib = data | 193 tag, attrib = data |
190 if tag.namespace and tag not in self.NAMESPACE: | 194 if not tag.namespace or tag in namespace: |
191 continue # not in the HTML namespace, so don't emit | 195 tagname = tag.localname |
192 buf = ['<', tag.localname] | 196 buf = ['<', tagname] |
193 | 197 |
194 for attr, value in attrib: | 198 for attr, value in attrib: |
195 if attr.namespace and attr not in self.NAMESPACE: | 199 if not attr.namespace or attr in namespace: |
196 continue # not in the HTML namespace, so don't emit | 200 attrname = attr.localname |
197 if attr.localname in self._BOOLEAN_ATTRS: | 201 if attrname in boolean_attrs: |
198 if value: | 202 if value: |
199 buf.append(' %s="%s"' % (attr.localname, attr.localname)) | 203 buf += [' ', attrname, '="', attrname, '"'] |
204 else: | |
205 buf += [' ', attrname, '="', escape(value), '"'] | |
206 | |
207 if tagname in empty_elems: | |
208 kind, data, pos = stream.next() | |
209 if kind is END: | |
210 buf += [' />'] | |
211 else: | |
212 buf += ['>'] | |
213 pushback((kind, data, pos)) | |
200 else: | 214 else: |
201 buf.append(' %s="%s"' % (attr.localname, escape(value))) | 215 buf += ['>'] |
202 | 216 |
203 if tag.localname in self._EMPTY_ELEMS: | 217 yield Markup(''.join(buf)) |
204 kind, data, pos = stream.next() | |
205 if kind is END: | |
206 buf.append(' />') | |
207 else: | |
208 buf.append('>') | |
209 stream.pushback((kind, data, pos)) | |
210 else: | |
211 buf.append('>') | |
212 | |
213 yield Markup(''.join(buf)) | |
214 | 218 |
215 elif kind is END: | 219 elif kind is END: |
216 tag = data | 220 tag = data |
217 if tag.namespace and tag not in self.NAMESPACE: | 221 if not tag.namespace or tag in namespace: |
218 continue # not in the HTML namespace, so don't emit | 222 yield Markup('</%s>' % tag.localname) |
219 yield Markup('</%s>' % tag.localname) | |
220 | 223 |
221 elif kind is TEXT: | 224 elif kind is TEXT: |
222 yield escape(data, quotes=False) | 225 yield escape(data, quotes=False) |
223 | 226 |
224 elif kind is COMMENT: | 227 elif kind is COMMENT: |
225 yield Markup('<!--%s-->' % data) | 228 yield Markup('<!--%s-->' % data) |
226 | 229 |
227 elif kind is DOCTYPE: | 230 elif kind is DOCTYPE and not have_doctype: |
228 if not have_doctype: | 231 name, pubid, sysid = data |
229 name, pubid, sysid = data | 232 buf = ['<!DOCTYPE %s'] |
230 buf = ['<!DOCTYPE %s'] | 233 if pubid: |
231 if pubid: | 234 buf += [' PUBLIC "%s"'] |
232 buf.append(' PUBLIC "%s"') | 235 elif sysid: |
233 elif sysid: | 236 buf += [' SYSTEM'] |
234 buf.append(' SYSTEM') | 237 if sysid: |
235 if sysid: | 238 buf += [' "%s"'] |
236 buf.append(' "%s"') | 239 buf += ['>\n'] |
237 buf.append('>\n') | 240 yield Markup(''.join(buf), *filter(None, data)) |
238 yield Markup(''.join(buf), *filter(None, data)) | 241 have_doctype = True |
239 have_doctype = True | 242 |
240 | 243 elif kind is START_NS and data[1] not in ns_mapping: |
241 elif kind is START_NS: | 244 ns_mapping[data[1]] = data[0] |
242 prefix, uri = data | |
243 if uri not in ns_mapping: | |
244 ns_mapping[uri] = prefix | |
245 | 245 |
246 elif kind is PI: | 246 elif kind is PI: |
247 yield Markup('<?%s %s?>' % data) | 247 yield Markup('<?%s %s?>' % data) |
248 | 248 |
249 | 249 |
255 >>> print ''.join(HTMLSerializer()(elem.generate())) | 255 >>> print ''.join(HTMLSerializer()(elem.generate())) |
256 <div><a href="foo"></a><br><hr noshade></div> | 256 <div><a href="foo"></a><br><hr noshade></div> |
257 """ | 257 """ |
258 | 258 |
259 def __call__(self, stream): | 259 def __call__(self, stream): |
260 namespace = self.NAMESPACE | |
261 ns_mapping = {} | |
262 boolean_attrs = self._BOOLEAN_ATTRS | |
263 empty_elems = self._EMPTY_ELEMS | |
260 have_doctype = False | 264 have_doctype = False |
261 ns_mapping = {} | |
262 | 265 |
263 stream = chain(self.preamble, stream) | 266 stream = chain(self.preamble, stream) |
264 for filter_ in self.filters: | 267 for filter_ in self.filters: |
265 stream = filter_(stream) | 268 stream = filter_(stream) |
266 stream = _PushbackIterator(stream) | 269 stream = _PushbackIterator(stream) |
267 for kind, data, pos in stream: | 270 for kind, data, pos in stream: |
268 | 271 |
269 if kind is START: | 272 if kind is START: |
270 tag, attrib = data | 273 tag, attrib = data |
271 if tag.namespace and tag not in self.NAMESPACE: | 274 if not tag.namespace or tag in namespace: |
272 continue # not in the HTML namespace, so don't emit | 275 tagname = tag.localname |
273 buf = ['<', tag.localname] | 276 buf = ['<', tagname] |
274 | 277 |
275 for attr, value in attrib: | 278 for attr, value in attrib: |
276 if attr.namespace and attr not in self.NAMESPACE \ | 279 attrname = attr.localname |
277 or attr.localname.startswith('xml:'): | 280 if not attr.namespace and not \ |
278 continue # not in the HTML namespace, so don't emit | 281 attrname.startswith('xml:') or \ |
279 if attr.localname in self._BOOLEAN_ATTRS: | 282 attr in namespace: |
280 if value: | 283 if attrname in boolean_attrs: |
281 buf.append(' %s' % attr.localname) | 284 if value: |
282 else: | 285 buf += [' ', attrname] |
283 buf.append(' %s="%s"' % (attr.localname, escape(value))) | 286 else: |
284 | 287 buf += [' ', attrname, '="', escape(value), '"'] |
285 if tag.localname in self._EMPTY_ELEMS: | 288 |
286 kind, data, pos = stream.next() | 289 if tagname in empty_elems: |
287 if kind is not END: | 290 kind, data, pos = stream.next() |
288 stream.pushback((kind, data, pos)) | 291 if kind is not END: |
289 | 292 stream.pushback((kind, data, pos)) |
290 yield Markup(''.join(buf + ['>'])) | 293 |
294 buf += ['>'] | |
295 yield Markup(''.join(buf)) | |
291 | 296 |
292 elif kind is END: | 297 elif kind is END: |
293 tag = data | 298 tag = data |
294 if tag.namespace and tag not in self.NAMESPACE: | 299 if not tag.namespace or tag in namespace: |
295 continue # not in the HTML namespace, so don't emit | 300 yield Markup('</%s>' % tag.localname) |
296 yield Markup('</%s>' % tag.localname) | |
297 | 301 |
298 elif kind is TEXT: | 302 elif kind is TEXT: |
299 yield escape(data, quotes=False) | 303 yield escape(data, quotes=False) |
300 | 304 |
301 elif kind is COMMENT: | 305 elif kind is COMMENT: |
302 yield Markup('<!--%s-->' % data) | 306 yield Markup('<!--%s-->' % data) |
303 | 307 |
304 elif kind is DOCTYPE: | 308 elif kind is DOCTYPE and not have_doctype: |
305 if not have_doctype: | 309 name, pubid, sysid = data |
306 name, pubid, sysid = data | 310 buf = ['<!DOCTYPE %s'] |
307 buf = ['<!DOCTYPE %s'] | 311 if pubid: |
308 if pubid: | 312 buf += [' PUBLIC "%s"'] |
309 buf.append(' PUBLIC "%s"') | 313 elif sysid: |
310 elif sysid: | 314 buf += [' SYSTEM'] |
311 buf.append(' SYSTEM') | 315 if sysid: |
312 if sysid: | 316 buf += [' "%s"'] |
313 buf.append(' "%s"') | 317 buf += ['>\n'] |
314 buf.append('>\n') | 318 yield Markup(''.join(buf), *filter(None, data)) |
315 yield Markup(''.join(buf), *filter(None, data)) | 319 have_doctype = True |
316 have_doctype = True | 320 |
317 | 321 elif kind is START_NS and data[1] not in ns_mapping: |
318 elif kind is START_NS: | 322 ns_mapping[data[1]] = data[0] |
319 prefix, uri = data | |
320 if uri not in ns_mapping: | |
321 ns_mapping[uri] = prefix | |
322 | 323 |
323 elif kind is PI: | 324 elif kind is PI: |
324 yield Markup('<?%s %s?>' % data) | 325 yield Markup('<?%s %s?>' % data) |
325 | 326 |
326 | 327 |
344 def __call__(self, stream, ctxt=None): | 345 def __call__(self, stream, ctxt=None): |
345 trim_trailing_space = self._TRAILING_SPACE.sub | 346 trim_trailing_space = self._TRAILING_SPACE.sub |
346 collapse_lines = self._LINE_COLLAPSE.sub | 347 collapse_lines = self._LINE_COLLAPSE.sub |
347 mjoin = Markup('').join | 348 mjoin = Markup('').join |
348 preserve = [False] | 349 preserve = [False] |
350 append_preserve = preserve.append | |
351 pop_preserve = preserve.pop | |
349 | 352 |
350 textbuf = [] | 353 textbuf = [] |
354 append_text = textbuf.append | |
355 pop_text = textbuf.pop | |
351 for kind, data, pos in chain(stream, [(None, None, None)]): | 356 for kind, data, pos in chain(stream, [(None, None, None)]): |
352 if kind is TEXT: | 357 if kind is TEXT: |
353 textbuf.append(data) | 358 append_text(data) |
354 else: | 359 else: |
355 if kind is START: | 360 if kind is START: |
356 preserve.append(data[0] in self.preserve or | 361 append_preserve(data[0] in self.preserve or |
357 data[1].get('xml:space') == 'preserve') | 362 data[1].get('xml:space') == 'preserve') |
358 if textbuf: | 363 if textbuf: |
359 if len(textbuf) > 1: | 364 if len(textbuf) > 1: |
360 text = mjoin(textbuf, escape_quotes=False) | 365 text = mjoin(textbuf, escape_quotes=False) |
361 del textbuf[:] | 366 del textbuf[:] |
362 else: | 367 else: |
363 text = escape(textbuf.pop(), quotes=False) | 368 text = escape(pop_text(), quotes=False) |
364 if not preserve[-1]: | 369 if not preserve[-1]: |
365 text = collapse_lines('\n', trim_trailing_space('', text)) | 370 text = collapse_lines('\n', trim_trailing_space('', text)) |
366 yield TEXT, Markup(text), pos | 371 yield TEXT, Markup(text), pos |
367 if kind is END: | 372 if kind is END: |
368 preserve.pop() | 373 pop_preserve() |
369 if kind is not None: | 374 if kind: |
370 yield kind, data, pos | 375 yield kind, data, pos |
371 | 376 |
372 | 377 |
373 class _PushbackIterator(object): | 378 class _PushbackIterator(object): |
374 """A simple wrapper for iterators that allows pushing items back on the | 379 """A simple wrapper for iterators that allows pushing items back on the |