comparison markup/output.py @ 143:ef761afcedff

CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
author cmlenz
date Fri, 11 Aug 2006 14:08:13 +0000
parents b3ceaa35fb6b
children 56d534eb53f9
comparison
equal deleted inserted replaced
142:b49cc51999b9 143:ef761afcedff
21 except NameError: 21 except NameError:
22 from sets import ImmutableSet as frozenset 22 from sets import ImmutableSet as frozenset
23 import re 23 import re
24 24
25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE 25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI 26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
27 START_CDATA, END_CDATA, PI, COMMENT
27 28
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] 29 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
29 30
30 31
31 class DocType(object): 32 class DocType(object):
70 self.filters = [] 71 self.filters = []
71 if strip_whitespace: 72 if strip_whitespace:
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) 73 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
73 74
74 def __call__(self, stream): 75 def __call__(self, stream):
75 have_doctype = False
76 ns_attrib = [] 76 ns_attrib = []
77 ns_mapping = {XML_NAMESPACE.uri: 'xml'} 77 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
78 have_doctype = False
79 in_cdata = False
78 80
79 stream = chain(self.preamble, stream) 81 stream = chain(self.preamble, stream)
80 for filter_ in self.filters: 82 for filter_ in self.filters:
81 stream = filter_(stream) 83 stream = filter_(stream)
82 stream = _PushbackIterator(stream) 84 stream = _PushbackIterator(stream)
123 if prefix: 125 if prefix:
124 tagname = '%s:%s' % (prefix, tag.localname) 126 tagname = '%s:%s' % (prefix, tag.localname)
125 yield Markup('</%s>' % tagname) 127 yield Markup('</%s>' % tagname)
126 128
127 elif kind is TEXT: 129 elif kind is TEXT:
128 yield escape(data, quotes=False) 130 if in_cdata:
131 yield data
132 else:
133 yield escape(data, quotes=False)
129 134
130 elif kind is COMMENT: 135 elif kind is COMMENT:
131 yield Markup('<!--%s-->' % data) 136 yield Markup('<!--%s-->' % data)
132 137
133 elif kind is DOCTYPE and not have_doctype: 138 elif kind is DOCTYPE and not have_doctype:
150 if not prefix: 155 if not prefix:
151 ns_attrib.append((QName('xmlns'), uri)) 156 ns_attrib.append((QName('xmlns'), uri))
152 else: 157 else:
153 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) 158 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
154 159
160 elif kind is START_CDATA:
161 yield Markup('<![CDATA[')
162 in_cdata = True
163
164 elif kind is END_CDATA:
165 yield Markup(']]>')
166 in_cdata = False
167
155 elif kind is PI: 168 elif kind is PI:
156 yield Markup('<?%s %s?>' % data) 169 yield Markup('<?%s %s?>' % data)
157 170
158 171
159 class XHTMLSerializer(XMLSerializer): 172 class XHTMLSerializer(XMLSerializer):
180 ns_attrib = [] 193 ns_attrib = []
181 ns_mapping = {XML_NAMESPACE.uri: 'xml'} 194 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
182 boolean_attrs = self._BOOLEAN_ATTRS 195 boolean_attrs = self._BOOLEAN_ATTRS
183 empty_elems = self._EMPTY_ELEMS 196 empty_elems = self._EMPTY_ELEMS
184 have_doctype = False 197 have_doctype = False
198 in_cdata = False
185 199
186 stream = chain(self.preamble, stream) 200 stream = chain(self.preamble, stream)
187 for filter_ in self.filters: 201 for filter_ in self.filters:
188 stream = filter_(stream) 202 stream = filter_(stream)
189 stream = _PushbackIterator(stream) 203 stream = _PushbackIterator(stream)
238 if prefix: 252 if prefix:
239 tagname = '%s:%s' % (prefix, tag.localname) 253 tagname = '%s:%s' % (prefix, tag.localname)
240 yield Markup('</%s>' % tagname) 254 yield Markup('</%s>' % tagname)
241 255
242 elif kind is TEXT: 256 elif kind is TEXT:
243 yield escape(data, quotes=False) 257 if in_cdata:
258 yield data
259 else:
260 yield escape(data, quotes=False)
244 261
245 elif kind is COMMENT: 262 elif kind is COMMENT:
246 yield Markup('<!--%s-->' % data) 263 yield Markup('<!--%s-->' % data)
247 264
248 elif kind is DOCTYPE and not have_doctype: 265 elif kind is DOCTYPE and not have_doctype:
265 if not prefix: 282 if not prefix:
266 ns_attrib.append((QName('xmlns'), uri)) 283 ns_attrib.append((QName('xmlns'), uri))
267 else: 284 else:
268 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) 285 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
269 286
287 elif kind is START_CDATA:
288 yield Markup('<![CDATA[')
289 in_cdata = True
290
291 elif kind is END_CDATA:
292 yield Markup(']]>')
293 in_cdata = False
294
270 elif kind is PI: 295 elif kind is PI:
271 yield Markup('<?%s %s?>' % data) 296 yield Markup('<?%s %s?>' % data)
272 297
273 298
274 class HTMLSerializer(XHTMLSerializer): 299 class HTMLSerializer(XHTMLSerializer):
292 stripped from the output 317 stripped from the output
293 """ 318 """
294 super(HTMLSerializer, self).__init__(doctype, False) 319 super(HTMLSerializer, self).__init__(doctype, False)
295 if strip_whitespace: 320 if strip_whitespace:
296 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, 321 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
297 self._NOESCAPE_ELEMS)) 322 self._NOESCAPE_ELEMS, True))
298 323
299 def __call__(self, stream): 324 def __call__(self, stream):
300 namespace = self.NAMESPACE 325 namespace = self.NAMESPACE
301 ns_mapping = {} 326 ns_mapping = {}
302 boolean_attrs = self._BOOLEAN_ATTRS 327 boolean_attrs = self._BOOLEAN_ATTRS
380 405
381 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') 406 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
382 _LINE_COLLAPSE = re.compile('\n{2,}') 407 _LINE_COLLAPSE = re.compile('\n{2,}')
383 _XML_SPACE = XML_NAMESPACE['space'] 408 _XML_SPACE = XML_NAMESPACE['space']
384 409
385 def __init__(self, preserve=None, noescape=None): 410 def __init__(self, preserve=None, noescape=None, escape_cdata=False):
386 """Initialize the filter. 411 """Initialize the filter.
387 412
388 @param preserve: a set or sequence of tag names for which white-space 413 @param preserve: a set or sequence of tag names for which white-space
389 should be ignored. 414 should be ignored.
390 @param noescape: a set or sequence of tag names for which text content 415 @param noescape: a set or sequence of tag names for which text content
397 preserve = [] 422 preserve = []
398 self.preserve = frozenset(preserve) 423 self.preserve = frozenset(preserve)
399 if noescape is None: 424 if noescape is None:
400 noescape = [] 425 noescape = []
401 self.noescape = frozenset(noescape) 426 self.noescape = frozenset(noescape)
427 self.escape_cdata = escape_cdata
402 428
403 def __call__(self, stream, ctxt=None): 429 def __call__(self, stream, ctxt=None):
404 trim_trailing_space = self._TRAILING_SPACE.sub 430 trim_trailing_space = self._TRAILING_SPACE.sub
405 collapse_lines = self._LINE_COLLAPSE.sub 431 collapse_lines = self._LINE_COLLAPSE.sub
406 xml_space = self._XML_SPACE 432 xml_space = self._XML_SPACE
407 mjoin = Markup('').join 433 mjoin = Markup('').join
408 preserve_elems = self.preserve 434 preserve_elems = self.preserve
409 preserve = False 435 preserve = False
410 noescape_elems = self.noescape 436 noescape_elems = self.noescape
411 noescape = False 437 noescape = False
438 escape_cdata = self.escape_cdata
412 439
413 textbuf = [] 440 textbuf = []
414 push_text = textbuf.append 441 push_text = textbuf.append
415 pop_text = textbuf.pop 442 pop_text = textbuf.pop
416 for kind, data, pos in chain(stream, [(None, None, None)]): 443 for kind, data, pos in chain(stream, [(None, None, None)]):
439 noescape = True 466 noescape = True
440 467
441 elif kind is END: 468 elif kind is END:
442 preserve = noescape = False 469 preserve = noescape = False
443 470
471 elif kind is START_CDATA and not escape_cdata:
472 noescape = True
473
474 elif kind is END_CDATA and not escape_cdata:
475 noescape = False
476
444 if kind: 477 if kind:
445 yield kind, data, pos 478 yield kind, data, pos
446 479
447 480
448 class _PushbackIterator(object): 481 class _PushbackIterator(object):
Copyright (C) 2012-2017 Edgewall Software