comparison markup/output.py @ 136:636e0100fcaf

Minor performance improvements in serialization.
author cmlenz
date Sun, 06 Aug 2006 21:22:21 +0000
parents 93bbdcf9428b
children a2edde90ad24
comparison
equal deleted inserted replaced
134:df44110ca91d 136:636e0100fcaf
78 78
79 stream = chain(self.preamble, stream) 79 stream = chain(self.preamble, stream)
80 for filter_ in self.filters: 80 for filter_ in self.filters:
81 stream = filter_(stream) 81 stream = filter_(stream)
82 stream = _PushbackIterator(stream) 82 stream = _PushbackIterator(stream)
83 pushback = stream.pushback
83 for kind, data, pos in stream: 84 for kind, data, pos in stream:
84 85
85 if kind is START: 86 if kind is START:
86 tag, attrib = data 87 tag, attrib = data
87 88
92 prefix = ns_mapping[namespace] 93 prefix = ns_mapping[namespace]
93 if prefix: 94 if prefix:
94 tagname = '%s:%s' % (prefix, tagname) 95 tagname = '%s:%s' % (prefix, tagname)
95 else: 96 else:
96 ns_attrib.append((QName('xmlns'), namespace)) 97 ns_attrib.append((QName('xmlns'), namespace))
97 buf = ['<%s' % tagname] 98 buf = ['<', tagname]
98 99
99 for attr, value in attrib + ns_attrib: 100 for attr, value in attrib + ns_attrib:
100 attrname = attr.localname 101 attrname = attr.localname
101 if attr.namespace: 102 if attr.namespace:
102 prefix = ns_mapping.get(attr.namespace) 103 prefix = ns_mapping.get(attr.namespace)
103 if prefix: 104 if prefix:
104 attrname = '%s:%s' % (prefix, attrname) 105 attrname = '%s:%s' % (prefix, attrname)
105 buf.append(' %s="%s"' % (attrname, escape(value))) 106 buf += [' ', attrname, '="', escape(value), '"']
106 ns_attrib = [] 107 ns_attrib = []
107 108
108 kind, data, pos = stream.next() 109 kind, data, pos = stream.next()
109 if kind is END: 110 if kind is END:
110 buf.append('/>') 111 buf += ['/>']
111 else: 112 else:
112 buf.append('>') 113 buf += ['>']
113 stream.pushback((kind, data, pos)) 114 pushback((kind, data, pos))
114 115
115 yield Markup(''.join(buf)) 116 yield Markup(''.join(buf))
116 117
117 elif kind is END: 118 elif kind is END:
118 tag = data 119 tag = data
127 yield escape(data, quotes=False) 128 yield escape(data, quotes=False)
128 129
129 elif kind is COMMENT: 130 elif kind is COMMENT:
130 yield Markup('<!--%s-->' % data) 131 yield Markup('<!--%s-->' % data)
131 132
132 elif kind is DOCTYPE: 133 elif kind is DOCTYPE and not have_doctype:
133 if not have_doctype: 134 name, pubid, sysid = data
134 name, pubid, sysid = data 135 buf = ['<!DOCTYPE %s']
135 buf = ['<!DOCTYPE %s'] 136 if pubid:
136 if pubid: 137 buf += [' PUBLIC "%s"']
137 buf.append(' PUBLIC "%s"') 138 elif sysid:
138 elif sysid: 139 buf += [' SYSTEM']
139 buf.append(' SYSTEM') 140 if sysid:
140 if sysid: 141 buf += [' "%s"']
141 buf.append(' "%s"') 142 buf += ['>\n']
142 buf.append('>\n') 143 yield Markup(''.join(buf), *filter(None, data))
143 yield Markup(''.join(buf), *filter(None, data)) 144 have_doctype = True
144 have_doctype = True
145 145
146 elif kind is START_NS: 146 elif kind is START_NS:
147 prefix, uri = data 147 prefix, uri = data
148 if uri not in ns_mapping: 148 if uri not in ns_mapping:
149 ns_mapping[uri] = prefix 149 ns_mapping[uri] = prefix
174 'defer', 'disabled', 'ismap', 'multiple', 174 'defer', 'disabled', 'ismap', 'multiple',
175 'nohref', 'noresize', 'noshade', 'nowrap']) 175 'nohref', 'noresize', 'noshade', 'nowrap'])
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) 176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
177 177
178 def __call__(self, stream): 178 def __call__(self, stream):
179 namespace = self.NAMESPACE
180 ns_mapping = {}
181 boolean_attrs = self._BOOLEAN_ATTRS
182 empty_elems = self._EMPTY_ELEMS
179 have_doctype = False 183 have_doctype = False
180 ns_mapping = {}
181 184
182 stream = chain(self.preamble, stream) 185 stream = chain(self.preamble, stream)
183 for filter_ in self.filters: 186 for filter_ in self.filters:
184 stream = filter_(stream) 187 stream = filter_(stream)
185 stream = _PushbackIterator(stream) 188 stream = _PushbackIterator(stream)
189 pushback = stream.pushback
186 for kind, data, pos in stream: 190 for kind, data, pos in stream:
187 191
188 if kind is START: 192 if kind is START:
189 tag, attrib = data 193 tag, attrib = data
190 if tag.namespace and tag not in self.NAMESPACE: 194 if not tag.namespace or tag in namespace:
191 continue # not in the HTML namespace, so don't emit 195 tagname = tag.localname
192 buf = ['<', tag.localname] 196 buf = ['<', tagname]
193 197
194 for attr, value in attrib: 198 for attr, value in attrib:
195 if attr.namespace and attr not in self.NAMESPACE: 199 if not attr.namespace or attr in namespace:
196 continue # not in the HTML namespace, so don't emit 200 attrname = attr.localname
197 if attr.localname in self._BOOLEAN_ATTRS: 201 if attrname in boolean_attrs:
198 if value: 202 if value:
199 buf.append(' %s="%s"' % (attr.localname, attr.localname)) 203 buf += [' ', attrname, '="', attrname, '"']
204 else:
205 buf += [' ', attrname, '="', escape(value), '"']
206
207 if tagname in empty_elems:
208 kind, data, pos = stream.next()
209 if kind is END:
210 buf += [' />']
211 else:
212 buf += ['>']
213 pushback((kind, data, pos))
200 else: 214 else:
201 buf.append(' %s="%s"' % (attr.localname, escape(value))) 215 buf += ['>']
202 216
203 if tag.localname in self._EMPTY_ELEMS: 217 yield Markup(''.join(buf))
204 kind, data, pos = stream.next()
205 if kind is END:
206 buf.append(' />')
207 else:
208 buf.append('>')
209 stream.pushback((kind, data, pos))
210 else:
211 buf.append('>')
212
213 yield Markup(''.join(buf))
214 218
215 elif kind is END: 219 elif kind is END:
216 tag = data 220 tag = data
217 if tag.namespace and tag not in self.NAMESPACE: 221 if not tag.namespace or tag in namespace:
218 continue # not in the HTML namespace, so don't emit 222 yield Markup('</%s>' % tag.localname)
219 yield Markup('</%s>' % tag.localname)
220 223
221 elif kind is TEXT: 224 elif kind is TEXT:
222 yield escape(data, quotes=False) 225 yield escape(data, quotes=False)
223 226
224 elif kind is COMMENT: 227 elif kind is COMMENT:
225 yield Markup('<!--%s-->' % data) 228 yield Markup('<!--%s-->' % data)
226 229
227 elif kind is DOCTYPE: 230 elif kind is DOCTYPE and not have_doctype:
228 if not have_doctype: 231 name, pubid, sysid = data
229 name, pubid, sysid = data 232 buf = ['<!DOCTYPE %s']
230 buf = ['<!DOCTYPE %s'] 233 if pubid:
231 if pubid: 234 buf += [' PUBLIC "%s"']
232 buf.append(' PUBLIC "%s"') 235 elif sysid:
233 elif sysid: 236 buf += [' SYSTEM']
234 buf.append(' SYSTEM') 237 if sysid:
235 if sysid: 238 buf += [' "%s"']
236 buf.append(' "%s"') 239 buf += ['>\n']
237 buf.append('>\n') 240 yield Markup(''.join(buf), *filter(None, data))
238 yield Markup(''.join(buf), *filter(None, data)) 241 have_doctype = True
239 have_doctype = True 242
240 243 elif kind is START_NS and data[1] not in ns_mapping:
241 elif kind is START_NS: 244 ns_mapping[data[1]] = data[0]
242 prefix, uri = data
243 if uri not in ns_mapping:
244 ns_mapping[uri] = prefix
245 245
246 elif kind is PI: 246 elif kind is PI:
247 yield Markup('<?%s %s?>' % data) 247 yield Markup('<?%s %s?>' % data)
248 248
249 249
255 >>> print ''.join(HTMLSerializer()(elem.generate())) 255 >>> print ''.join(HTMLSerializer()(elem.generate()))
256 <div><a href="foo"></a><br><hr noshade></div> 256 <div><a href="foo"></a><br><hr noshade></div>
257 """ 257 """
258 258
259 def __call__(self, stream): 259 def __call__(self, stream):
260 namespace = self.NAMESPACE
261 ns_mapping = {}
262 boolean_attrs = self._BOOLEAN_ATTRS
263 empty_elems = self._EMPTY_ELEMS
260 have_doctype = False 264 have_doctype = False
261 ns_mapping = {}
262 265
263 stream = chain(self.preamble, stream) 266 stream = chain(self.preamble, stream)
264 for filter_ in self.filters: 267 for filter_ in self.filters:
265 stream = filter_(stream) 268 stream = filter_(stream)
266 stream = _PushbackIterator(stream) 269 stream = _PushbackIterator(stream)
267 for kind, data, pos in stream: 270 for kind, data, pos in stream:
268 271
269 if kind is START: 272 if kind is START:
270 tag, attrib = data 273 tag, attrib = data
271 if tag.namespace and tag not in self.NAMESPACE: 274 if not tag.namespace or tag in namespace:
272 continue # not in the HTML namespace, so don't emit 275 tagname = tag.localname
273 buf = ['<', tag.localname] 276 buf = ['<', tagname]
274 277
275 for attr, value in attrib: 278 for attr, value in attrib:
276 if attr.namespace and attr not in self.NAMESPACE \ 279 attrname = attr.localname
277 or attr.localname.startswith('xml:'): 280 if not attr.namespace and not \
278 continue # not in the HTML namespace, so don't emit 281 attrname.startswith('xml:') or \
279 if attr.localname in self._BOOLEAN_ATTRS: 282 attr in namespace:
280 if value: 283 if attrname in boolean_attrs:
281 buf.append(' %s' % attr.localname) 284 if value:
282 else: 285 buf += [' ', attrname]
283 buf.append(' %s="%s"' % (attr.localname, escape(value))) 286 else:
284 287 buf += [' ', attrname, '="', escape(value), '"']
285 if tag.localname in self._EMPTY_ELEMS: 288
286 kind, data, pos = stream.next() 289 if tagname in empty_elems:
287 if kind is not END: 290 kind, data, pos = stream.next()
288 stream.pushback((kind, data, pos)) 291 if kind is not END:
289 292 stream.pushback((kind, data, pos))
290 yield Markup(''.join(buf + ['>'])) 293
294 buf += ['>']
295 yield Markup(''.join(buf))
291 296
292 elif kind is END: 297 elif kind is END:
293 tag = data 298 tag = data
294 if tag.namespace and tag not in self.NAMESPACE: 299 if not tag.namespace or tag in namespace:
295 continue # not in the HTML namespace, so don't emit 300 yield Markup('</%s>' % tag.localname)
296 yield Markup('</%s>' % tag.localname)
297 301
298 elif kind is TEXT: 302 elif kind is TEXT:
299 yield escape(data, quotes=False) 303 yield escape(data, quotes=False)
300 304
301 elif kind is COMMENT: 305 elif kind is COMMENT:
302 yield Markup('<!--%s-->' % data) 306 yield Markup('<!--%s-->' % data)
303 307
304 elif kind is DOCTYPE: 308 elif kind is DOCTYPE and not have_doctype:
305 if not have_doctype: 309 name, pubid, sysid = data
306 name, pubid, sysid = data 310 buf = ['<!DOCTYPE %s']
307 buf = ['<!DOCTYPE %s'] 311 if pubid:
308 if pubid: 312 buf += [' PUBLIC "%s"']
309 buf.append(' PUBLIC "%s"') 313 elif sysid:
310 elif sysid: 314 buf += [' SYSTEM']
311 buf.append(' SYSTEM') 315 if sysid:
312 if sysid: 316 buf += [' "%s"']
313 buf.append(' "%s"') 317 buf += ['>\n']
314 buf.append('>\n') 318 yield Markup(''.join(buf), *filter(None, data))
315 yield Markup(''.join(buf), *filter(None, data)) 319 have_doctype = True
316 have_doctype = True 320
317 321 elif kind is START_NS and data[1] not in ns_mapping:
318 elif kind is START_NS: 322 ns_mapping[data[1]] = data[0]
319 prefix, uri = data
320 if uri not in ns_mapping:
321 ns_mapping[uri] = prefix
322 323
323 elif kind is PI: 324 elif kind is PI:
324 yield Markup('<?%s %s?>' % data) 325 yield Markup('<?%s %s?>' % data)
325 326
326 327
344 def __call__(self, stream, ctxt=None): 345 def __call__(self, stream, ctxt=None):
345 trim_trailing_space = self._TRAILING_SPACE.sub 346 trim_trailing_space = self._TRAILING_SPACE.sub
346 collapse_lines = self._LINE_COLLAPSE.sub 347 collapse_lines = self._LINE_COLLAPSE.sub
347 mjoin = Markup('').join 348 mjoin = Markup('').join
348 preserve = [False] 349 preserve = [False]
350 append_preserve = preserve.append
351 pop_preserve = preserve.pop
349 352
350 textbuf = [] 353 textbuf = []
354 append_text = textbuf.append
355 pop_text = textbuf.pop
351 for kind, data, pos in chain(stream, [(None, None, None)]): 356 for kind, data, pos in chain(stream, [(None, None, None)]):
352 if kind is TEXT: 357 if kind is TEXT:
353 textbuf.append(data) 358 append_text(data)
354 else: 359 else:
355 if kind is START: 360 if kind is START:
356 preserve.append(data[0] in self.preserve or 361 append_preserve(data[0] in self.preserve or
357 data[1].get('xml:space') == 'preserve') 362 data[1].get('xml:space') == 'preserve')
358 if textbuf: 363 if textbuf:
359 if len(textbuf) > 1: 364 if len(textbuf) > 1:
360 text = mjoin(textbuf, escape_quotes=False) 365 text = mjoin(textbuf, escape_quotes=False)
361 del textbuf[:] 366 del textbuf[:]
362 else: 367 else:
363 text = escape(textbuf.pop(), quotes=False) 368 text = escape(pop_text(), quotes=False)
364 if not preserve[-1]: 369 if not preserve[-1]:
365 text = collapse_lines('\n', trim_trailing_space('', text)) 370 text = collapse_lines('\n', trim_trailing_space('', text))
366 yield TEXT, Markup(text), pos 371 yield TEXT, Markup(text), pos
367 if kind is END: 372 if kind is END:
368 preserve.pop() 373 pop_preserve()
369 if kind is not None: 374 if kind:
370 yield kind, data, pos 375 yield kind, data, pos
371 376
372 377
373 class _PushbackIterator(object): 378 class _PushbackIterator(object):
374 """A simple wrapper for iterators that allows pushing items back on the 379 """A simple wrapper for iterators that allows pushing items back on the
Copyright (C) 2012-2017 Edgewall Software