Mercurial > genshi > mirror
changeset 143:3d4c214c979a trunk
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
author | cmlenz |
---|---|
date | Fri, 11 Aug 2006 14:08:13 +0000 |
parents | 349b3ff5367d |
children | d1ce85a7f296 |
files | markup/core.py markup/input.py markup/output.py markup/tests/output.py |
diffstat | 4 files changed, 62 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/markup/core.py +++ b/markup/core.py @@ -48,8 +48,10 @@ END = StreamEventKind('END') # an end tag TEXT = StreamEventKind('TEXT') # literal text DOCTYPE = StreamEventKind('DOCTYPE') # doctype declaration - START_NS = StreamEventKind('START-NS') # start namespace mapping - END_NS = StreamEventKind('END-NS') # end namespace mapping + START_NS = StreamEventKind('START_NS') # start namespace mapping + END_NS = StreamEventKind('END_NS') # end namespace mapping + START_CDATA = StreamEventKind('START_CDATA') # start CDATA section + END_CDATA = StreamEventKind('END_CDATA') # end CDATA section PI = StreamEventKind('PI') # processing instruction COMMENT = StreamEventKind('COMMENT') # comment @@ -135,6 +137,8 @@ DOCTYPE = Stream.DOCTYPE START_NS = Stream.START_NS END_NS = Stream.END_NS +START_CDATA = Stream.START_CDATA +END_CDATA = Stream.END_CDATA PI = Stream.PI COMMENT = Stream.COMMENT
--- a/markup/input.py +++ b/markup/input.py @@ -69,6 +69,8 @@ parser.StartDoctypeDeclHandler = self._handle_doctype parser.StartNamespaceDeclHandler = self._handle_start_ns parser.EndNamespaceDeclHandler = self._handle_end_ns + parser.StartCdataSectionHandler = self._handle_start_cdata + parser.EndCdataSectionHandler = self._handle_end_cdata parser.ProcessingInstructionHandler = self._handle_pi parser.CommentHandler = self._handle_comment parser.DefaultHandler = self._handle_other @@ -105,7 +107,7 @@ msg += ', in ' + self.filename raise ParseError(msg, self.filename, e.lineno, e.offset) - def _enqueue(self, kind, data, pos=None): + def _enqueue(self, kind, data=None, pos=None): if pos is None: pos = self._getpos() if kind is Stream.TEXT: @@ -149,6 +151,12 @@ def _handle_end_ns(self, prefix): self._enqueue(Stream.END_NS, prefix or '') + def _handle_start_cdata(self): + self._enqueue(Stream.START_CDATA) + + def _handle_end_cdata(self): + self._enqueue(Stream.END_CDATA) + def _handle_pi(self, target, data): self._enqueue(Stream.PI, (target, data))
--- a/markup/output.py +++ b/markup/output.py @@ -23,7 +23,8 @@ import re from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE -from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI +from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ + START_CDATA, END_CDATA, PI, COMMENT __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] @@ -72,9 +73,10 @@ self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) def __call__(self, stream): - have_doctype = False ns_attrib = [] ns_mapping = {XML_NAMESPACE.uri: 'xml'} + have_doctype = False + in_cdata = False stream = chain(self.preamble, stream) for filter_ in self.filters: @@ -125,7 +127,10 @@ yield Markup('</%s>' % tagname) elif kind is TEXT: - yield escape(data, quotes=False) + if in_cdata: + yield data + else: + yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) @@ -152,6 +157,14 @@ else: ns_attrib.append((QName('xmlns:%s' % prefix), uri)) + elif kind is START_CDATA: + yield Markup('<![CDATA[') + in_cdata = True + + elif kind is END_CDATA: + yield Markup(']]>') + in_cdata = False + elif kind is PI: yield Markup('<?%s %s?>' % data) @@ -182,6 +195,7 @@ boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS have_doctype = False + in_cdata = False stream = chain(self.preamble, stream) for filter_ in self.filters: @@ -240,7 +254,10 @@ yield Markup('</%s>' % tagname) elif kind is TEXT: - yield escape(data, quotes=False) + if in_cdata: + yield data + else: + yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) @@ -267,6 +284,14 @@ else: ns_attrib.append((QName('xmlns:%s' % prefix), uri)) + elif kind is START_CDATA: + yield Markup('<![CDATA[') + in_cdata = True + + elif kind is END_CDATA: + yield Markup(']]>') + in_cdata = False + elif kind is PI: yield Markup('<?%s %s?>' % data) @@ -294,7 +319,7 @@ super(HTMLSerializer, self).__init__(doctype, False) if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, - self._NOESCAPE_ELEMS)) + self._NOESCAPE_ELEMS, True)) def __call__(self, stream): namespace = self.NAMESPACE @@ -382,7 +407,7 @@ _LINE_COLLAPSE = re.compile('\n{2,}') _XML_SPACE = XML_NAMESPACE['space'] - def __init__(self, preserve=None, noescape=None): + def __init__(self, preserve=None, noescape=None, escape_cdata=False): """Initialize the filter. @param preserve: a set or sequence of tag names for which white-space @@ -399,6 +424,7 @@ if noescape is None: noescape = [] self.noescape = frozenset(noescape) + self.escape_cdata = escape_cdata def __call__(self, stream, ctxt=None): trim_trailing_space = self._TRAILING_SPACE.sub @@ -409,6 +435,7 @@ preserve = False noescape_elems = self.noescape noescape = False + escape_cdata = self.escape_cdata textbuf = [] push_text = textbuf.append @@ -441,6 +468,12 @@ elif kind is END: preserve = noescape = False + elif kind is START_CDATA and not escape_cdata: + noescape = True + + elif kind is END_CDATA and not escape_cdata: + noescape = False + if kind: yield kind, data, pos
--- a/markup/tests/output.py +++ b/markup/tests/output.py @@ -95,16 +95,18 @@ self.assertEqual(text, output) def test_script_escaping(self): - text = '<script><![CDATA[if (1 < 2) { alert("Doh"); }]]></script>' + text = """<script>/*<![CDATA[*/ + if (1 < 2) { alert("Doh"); } + /*]]>*/</script>""" output = XML(text).render(XHTMLSerializer) - self.assertEqual('<script>if (1 < 2) { alert("Doh"); }</script>', - output) + self.assertEqual(text, output) def test_style_escaping(self): - text = '<style><![CDATA[html > body { display: none; }]]></style>' + text = """<style>/*<![CDATA[*/ + html > body { display: none; } + /*]]>*/</style>""" output = XML(text).render(XHTMLSerializer) - self.assertEqual('<style>html > body { display: none; }</style>', - output) + self.assertEqual(text, output) class HTMLSerializerTestCase(unittest.TestCase):