Mercurial > genshi > genshi-test
comparison markup/output.py @ 143:ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
author | cmlenz |
---|---|
date | Fri, 11 Aug 2006 14:08:13 +0000 |
parents | b3ceaa35fb6b |
children | 56d534eb53f9 |
comparison
equal
deleted
inserted
replaced
142:b49cc51999b9 | 143:ef761afcedff |
---|---|
21 except NameError: | 21 except NameError: |
22 from sets import ImmutableSet as frozenset | 22 from sets import ImmutableSet as frozenset |
23 import re | 23 import re |
24 | 24 |
25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE | 25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE |
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI | 26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ |
27 START_CDATA, END_CDATA, PI, COMMENT | |
27 | 28 |
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] | 29 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] |
29 | 30 |
30 | 31 |
31 class DocType(object): | 32 class DocType(object): |
70 self.filters = [] | 71 self.filters = [] |
71 if strip_whitespace: | 72 if strip_whitespace: |
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | 73 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
73 | 74 |
74 def __call__(self, stream): | 75 def __call__(self, stream): |
75 have_doctype = False | |
76 ns_attrib = [] | 76 ns_attrib = [] |
77 ns_mapping = {XML_NAMESPACE.uri: 'xml'} | 77 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
78 have_doctype = False | |
79 in_cdata = False | |
78 | 80 |
79 stream = chain(self.preamble, stream) | 81 stream = chain(self.preamble, stream) |
80 for filter_ in self.filters: | 82 for filter_ in self.filters: |
81 stream = filter_(stream) | 83 stream = filter_(stream) |
82 stream = _PushbackIterator(stream) | 84 stream = _PushbackIterator(stream) |
123 if prefix: | 125 if prefix: |
124 tagname = '%s:%s' % (prefix, tag.localname) | 126 tagname = '%s:%s' % (prefix, tag.localname) |
125 yield Markup('</%s>' % tagname) | 127 yield Markup('</%s>' % tagname) |
126 | 128 |
127 elif kind is TEXT: | 129 elif kind is TEXT: |
128 yield escape(data, quotes=False) | 130 if in_cdata: |
131 yield data | |
132 else: | |
133 yield escape(data, quotes=False) | |
129 | 134 |
130 elif kind is COMMENT: | 135 elif kind is COMMENT: |
131 yield Markup('<!--%s-->' % data) | 136 yield Markup('<!--%s-->' % data) |
132 | 137 |
133 elif kind is DOCTYPE and not have_doctype: | 138 elif kind is DOCTYPE and not have_doctype: |
150 if not prefix: | 155 if not prefix: |
151 ns_attrib.append((QName('xmlns'), uri)) | 156 ns_attrib.append((QName('xmlns'), uri)) |
152 else: | 157 else: |
153 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) | 158 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
154 | 159 |
160 elif kind is START_CDATA: | |
161 yield Markup('<![CDATA[') | |
162 in_cdata = True | |
163 | |
164 elif kind is END_CDATA: | |
165 yield Markup(']]>') | |
166 in_cdata = False | |
167 | |
155 elif kind is PI: | 168 elif kind is PI: |
156 yield Markup('<?%s %s?>' % data) | 169 yield Markup('<?%s %s?>' % data) |
157 | 170 |
158 | 171 |
159 class XHTMLSerializer(XMLSerializer): | 172 class XHTMLSerializer(XMLSerializer): |
180 ns_attrib = [] | 193 ns_attrib = [] |
181 ns_mapping = {XML_NAMESPACE.uri: 'xml'} | 194 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
182 boolean_attrs = self._BOOLEAN_ATTRS | 195 boolean_attrs = self._BOOLEAN_ATTRS |
183 empty_elems = self._EMPTY_ELEMS | 196 empty_elems = self._EMPTY_ELEMS |
184 have_doctype = False | 197 have_doctype = False |
198 in_cdata = False | |
185 | 199 |
186 stream = chain(self.preamble, stream) | 200 stream = chain(self.preamble, stream) |
187 for filter_ in self.filters: | 201 for filter_ in self.filters: |
188 stream = filter_(stream) | 202 stream = filter_(stream) |
189 stream = _PushbackIterator(stream) | 203 stream = _PushbackIterator(stream) |
238 if prefix: | 252 if prefix: |
239 tagname = '%s:%s' % (prefix, tag.localname) | 253 tagname = '%s:%s' % (prefix, tag.localname) |
240 yield Markup('</%s>' % tagname) | 254 yield Markup('</%s>' % tagname) |
241 | 255 |
242 elif kind is TEXT: | 256 elif kind is TEXT: |
243 yield escape(data, quotes=False) | 257 if in_cdata: |
258 yield data | |
259 else: | |
260 yield escape(data, quotes=False) | |
244 | 261 |
245 elif kind is COMMENT: | 262 elif kind is COMMENT: |
246 yield Markup('<!--%s-->' % data) | 263 yield Markup('<!--%s-->' % data) |
247 | 264 |
248 elif kind is DOCTYPE and not have_doctype: | 265 elif kind is DOCTYPE and not have_doctype: |
265 if not prefix: | 282 if not prefix: |
266 ns_attrib.append((QName('xmlns'), uri)) | 283 ns_attrib.append((QName('xmlns'), uri)) |
267 else: | 284 else: |
268 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) | 285 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
269 | 286 |
287 elif kind is START_CDATA: | |
288 yield Markup('<![CDATA[') | |
289 in_cdata = True | |
290 | |
291 elif kind is END_CDATA: | |
292 yield Markup(']]>') | |
293 in_cdata = False | |
294 | |
270 elif kind is PI: | 295 elif kind is PI: |
271 yield Markup('<?%s %s?>' % data) | 296 yield Markup('<?%s %s?>' % data) |
272 | 297 |
273 | 298 |
274 class HTMLSerializer(XHTMLSerializer): | 299 class HTMLSerializer(XHTMLSerializer): |
292 stripped from the output | 317 stripped from the output |
293 """ | 318 """ |
294 super(HTMLSerializer, self).__init__(doctype, False) | 319 super(HTMLSerializer, self).__init__(doctype, False) |
295 if strip_whitespace: | 320 if strip_whitespace: |
296 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, | 321 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
297 self._NOESCAPE_ELEMS)) | 322 self._NOESCAPE_ELEMS, True)) |
298 | 323 |
299 def __call__(self, stream): | 324 def __call__(self, stream): |
300 namespace = self.NAMESPACE | 325 namespace = self.NAMESPACE |
301 ns_mapping = {} | 326 ns_mapping = {} |
302 boolean_attrs = self._BOOLEAN_ATTRS | 327 boolean_attrs = self._BOOLEAN_ATTRS |
380 | 405 |
381 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') | 406 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') |
382 _LINE_COLLAPSE = re.compile('\n{2,}') | 407 _LINE_COLLAPSE = re.compile('\n{2,}') |
383 _XML_SPACE = XML_NAMESPACE['space'] | 408 _XML_SPACE = XML_NAMESPACE['space'] |
384 | 409 |
385 def __init__(self, preserve=None, noescape=None): | 410 def __init__(self, preserve=None, noescape=None, escape_cdata=False): |
386 """Initialize the filter. | 411 """Initialize the filter. |
387 | 412 |
388 @param preserve: a set or sequence of tag names for which white-space | 413 @param preserve: a set or sequence of tag names for which white-space |
389 should be ignored. | 414 should be ignored. |
390 @param noescape: a set or sequence of tag names for which text content | 415 @param noescape: a set or sequence of tag names for which text content |
397 preserve = [] | 422 preserve = [] |
398 self.preserve = frozenset(preserve) | 423 self.preserve = frozenset(preserve) |
399 if noescape is None: | 424 if noescape is None: |
400 noescape = [] | 425 noescape = [] |
401 self.noescape = frozenset(noescape) | 426 self.noescape = frozenset(noescape) |
427 self.escape_cdata = escape_cdata | |
402 | 428 |
403 def __call__(self, stream, ctxt=None): | 429 def __call__(self, stream, ctxt=None): |
404 trim_trailing_space = self._TRAILING_SPACE.sub | 430 trim_trailing_space = self._TRAILING_SPACE.sub |
405 collapse_lines = self._LINE_COLLAPSE.sub | 431 collapse_lines = self._LINE_COLLAPSE.sub |
406 xml_space = self._XML_SPACE | 432 xml_space = self._XML_SPACE |
407 mjoin = Markup('').join | 433 mjoin = Markup('').join |
408 preserve_elems = self.preserve | 434 preserve_elems = self.preserve |
409 preserve = False | 435 preserve = False |
410 noescape_elems = self.noescape | 436 noescape_elems = self.noescape |
411 noescape = False | 437 noescape = False |
438 escape_cdata = self.escape_cdata | |
412 | 439 |
413 textbuf = [] | 440 textbuf = [] |
414 push_text = textbuf.append | 441 push_text = textbuf.append |
415 pop_text = textbuf.pop | 442 pop_text = textbuf.pop |
416 for kind, data, pos in chain(stream, [(None, None, None)]): | 443 for kind, data, pos in chain(stream, [(None, None, None)]): |
439 noescape = True | 466 noescape = True |
440 | 467 |
441 elif kind is END: | 468 elif kind is END: |
442 preserve = noescape = False | 469 preserve = noescape = False |
443 | 470 |
471 elif kind is START_CDATA and not escape_cdata: | |
472 noescape = True | |
473 | |
474 elif kind is END_CDATA and not escape_cdata: | |
475 noescape = False | |
476 | |
444 if kind: | 477 if kind: |
445 yield kind, data, pos | 478 yield kind, data, pos |
446 | 479 |
447 | 480 |
448 class _PushbackIterator(object): | 481 class _PushbackIterator(object): |