comparison markup/output.py @ 212:e8c43127d9a9

Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
author cmlenz
date Wed, 30 Aug 2006 12:40:44 +0000
parents 0f16c907077e
children bafa1cc49c2f
comparison
equal deleted inserted replaced
211:0a14c2a06be3 212:e8c43127d9a9
20 frozenset 20 frozenset
21 except NameError: 21 except NameError:
22 from sets import ImmutableSet as frozenset 22 from sets import ImmutableSet as frozenset
23 import re 23 import re
24 24
25 from markup.core import escape, Markup, Namespace, QName 25 from markup.core import escape, Markup, Namespace, QName, StreamEventKind
26 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ 26 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
27 END_CDATA, PI, COMMENT, XML_NAMESPACE 27 END_CDATA, PI, COMMENT, XML_NAMESPACE
28 28
29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer', 29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
30 'TextSerializer'] 30 'TextSerializer']
67 stripped from the output 67 stripped from the output
68 """ 68 """
69 self.preamble = [] 69 self.preamble = []
70 if doctype: 70 if doctype:
71 self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) 71 self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
72 self.filters = [] 72 self.filters = [EmptyTagFilter()]
73 if strip_whitespace: 73 if strip_whitespace:
74 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) 74 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
75 75
76 def __call__(self, stream): 76 def __call__(self, stream):
77 ns_attrib = [] 77 ns_attrib = []
80 in_cdata = False 80 in_cdata = False
81 81
82 stream = chain(self.preamble, stream) 82 stream = chain(self.preamble, stream)
83 for filter_ in self.filters: 83 for filter_ in self.filters:
84 stream = filter_(stream) 84 stream = filter_(stream)
85 stream = _PushbackIterator(stream)
86 pushback = stream.pushback
87 for kind, data, pos in stream: 85 for kind, data, pos in stream:
88 86
89 if kind is START: 87 if kind is START or kind is EMPTY:
90 tag, attrib = data 88 tag, attrib = data
91 89
92 tagname = tag.localname 90 tagname = tag.localname
93 namespace = tag.namespace 91 namespace = tag.namespace
94 if namespace: 92 if namespace:
107 if prefix: 105 if prefix:
108 attrname = '%s:%s' % (prefix, attrname) 106 attrname = '%s:%s' % (prefix, attrname)
109 buf += [' ', attrname, '="', escape(value), '"'] 107 buf += [' ', attrname, '="', escape(value), '"']
110 ns_attrib = [] 108 ns_attrib = []
111 109
112 kind, data, pos = stream.next() 110 if kind is EMPTY:
113 if kind is END:
114 buf += ['/>'] 111 buf += ['/>']
115 else: 112 else:
116 buf += ['>'] 113 buf += ['>']
117 pushback((kind, data, pos))
118 114
119 yield Markup(''.join(buf)) 115 yield Markup(''.join(buf))
120 116
121 elif kind is END: 117 elif kind is END:
122 tag = data 118 tag = data
199 in_cdata = False 195 in_cdata = False
200 196
201 stream = chain(self.preamble, stream) 197 stream = chain(self.preamble, stream)
202 for filter_ in self.filters: 198 for filter_ in self.filters:
203 stream = filter_(stream) 199 stream = filter_(stream)
204 stream = _PushbackIterator(stream)
205 pushback = stream.pushback
206 for kind, data, pos in stream: 200 for kind, data, pos in stream:
207 201
208 if kind is START: 202 if kind is START or kind is EMPTY:
209 tag, attrib = data 203 tag, attrib = data
210 204
211 tagname = tag.localname 205 tagname = tag.localname
212 tagns = tag.namespace 206 tagns = tag.namespace
213 if tagns: 207 if tagns:
230 buf += [' ', attrname, '="', attrname, '"'] 224 buf += [' ', attrname, '="', attrname, '"']
231 else: 225 else:
232 buf += [' ', attrname, '="', escape(value), '"'] 226 buf += [' ', attrname, '="', escape(value), '"']
233 ns_attrib = [] 227 ns_attrib = []
234 228
235 if (tagns and tagns != namespace.uri) or tagname in empty_elems: 229 if kind is EMPTY:
236 kind, data, pos = stream.next() 230 if (tagns and tagns != namespace.uri) \
237 if kind is END: 231 or tag.localname in empty_elems:
238 buf += [' />'] 232 buf += [' />']
239 else: 233 else:
240 buf += ['>'] 234 buf += ['></%s>' % tagname]
241 pushback((kind, data, pos))
242 else: 235 else:
243 buf += ['>'] 236 buf += ['>']
244 237
245 yield Markup(''.join(buf)) 238 yield Markup(''.join(buf))
246 239
331 noescape = False 324 noescape = False
332 325
333 stream = chain(self.preamble, stream) 326 stream = chain(self.preamble, stream)
334 for filter_ in self.filters: 327 for filter_ in self.filters:
335 stream = filter_(stream) 328 stream = filter_(stream)
336 stream = _PushbackIterator(stream)
337 pushback = stream.pushback
338 for kind, data, pos in stream: 329 for kind, data, pos in stream:
339 330
340 if kind is START: 331 if kind is START or kind is EMPTY:
341 tag, attrib = data 332 tag, attrib = data
342 if not tag.namespace or tag in namespace: 333 if not tag.namespace or tag in namespace:
343 tagname = tag.localname 334 tagname = tag.localname
344 buf = ['<', tagname] 335 buf = ['<', tagname]
345 336
350 if value: 341 if value:
351 buf += [' ', attrname] 342 buf += [' ', attrname]
352 else: 343 else:
353 buf += [' ', attrname, '="', escape(value), '"'] 344 buf += [' ', attrname, '="', escape(value), '"']
354 345
355 if tagname in empty_elems:
356 kind, data, pos = stream.next()
357 if kind is not END:
358 pushback((kind, data, pos))
359
360 buf += ['>'] 346 buf += ['>']
347
348 if kind is EMPTY:
349 if tagname not in empty_elems:
350 buf.append('</%s>' % tagname)
351
361 yield Markup(''.join(buf)) 352 yield Markup(''.join(buf))
362 353
363 if tagname in noescape_elems: 354 if tagname in noescape_elems:
364 noescape = True 355 noescape = True
365 356
426 for kind, data, pos in stream: 417 for kind, data, pos in stream:
427 if kind is TEXT: 418 if kind is TEXT:
428 if type(data) is Markup: 419 if type(data) is Markup:
429 data = data.striptags().stripentities() 420 data = data.striptags().stripentities()
430 yield unicode(data) 421 yield unicode(data)
422
423
424 class EmptyTagFilter(object):
425 """Combines `START` and `STOP` events into `EMPTY` events for elements that
426 have no contents.
427 """
428
429 EMPTY = StreamEventKind('EMPTY')
430
431 def __call__(self, stream):
432 prev = (None, None, None)
433 for kind, data, pos in stream:
434 if prev[0] is START:
435 if kind is END:
436 prev = EMPTY, prev[1], prev[2]
437 yield prev
438 continue
439 else:
440 yield prev
441 if kind is not START:
442 yield kind, data, pos
443 prev = kind, data, pos
444
445
446 EMPTY = EmptyTagFilter.EMPTY
431 447
432 448
433 class WhitespaceFilter(object): 449 class WhitespaceFilter(object):
434 """A filter that removes extraneous ignorable white space from the 450 """A filter that removes extraneous ignorable white space from the
435 stream.""" 451 stream."""
505 elif kind is END_CDATA and not escape_cdata: 521 elif kind is END_CDATA and not escape_cdata:
506 noescape = False 522 noescape = False
507 523
508 if kind: 524 if kind:
509 yield kind, data, pos 525 yield kind, data, pos
510
511
512 class _PushbackIterator(object):
513 """A simple wrapper for iterators that allows pushing items back on the
514 queue via the `pushback()` method.
515
516 That can effectively be used to peek at the next item."""
517 __slots__ = ['iterable', 'buf']
518
519 def __init__(self, iterable):
520 self.iterable = iter(iterable)
521 self.buf = []
522
523 def __iter__(self):
524 return self
525
526 def next(self):
527 if self.buf:
528 return self.buf.pop(0)
529 return self.iterable.next()
530
531 def pushback(self, item):
532 self.buf.append(item)
Copyright (C) 2012-2017 Edgewall Software