changeset 143:3d4c214c979a trunk

CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
author cmlenz
date Fri, 11 Aug 2006 14:08:13 +0000
parents 349b3ff5367d
children d1ce85a7f296
files markup/core.py markup/input.py markup/output.py markup/tests/output.py
diffstat 4 files changed, 62 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/markup/core.py
+++ b/markup/core.py
@@ -48,8 +48,10 @@
     END = StreamEventKind('END') # an end tag
     TEXT = StreamEventKind('TEXT') # literal text
     DOCTYPE = StreamEventKind('DOCTYPE') # doctype declaration
-    START_NS = StreamEventKind('START-NS') # start namespace mapping
-    END_NS = StreamEventKind('END-NS') # end namespace mapping
+    START_NS = StreamEventKind('START_NS') # start namespace mapping
+    END_NS = StreamEventKind('END_NS') # end namespace mapping
+    START_CDATA = StreamEventKind('START_CDATA') # start CDATA section
+    END_CDATA = StreamEventKind('END_CDATA') # end CDATA section
     PI = StreamEventKind('PI') # processing instruction
     COMMENT = StreamEventKind('COMMENT') # comment
 
@@ -135,6 +137,8 @@
 DOCTYPE = Stream.DOCTYPE
 START_NS = Stream.START_NS
 END_NS = Stream.END_NS
+START_CDATA = Stream.START_CDATA
+END_CDATA = Stream.END_CDATA
 PI = Stream.PI
 COMMENT = Stream.COMMENT
 
--- a/markup/input.py
+++ b/markup/input.py
@@ -69,6 +69,8 @@
         parser.StartDoctypeDeclHandler = self._handle_doctype
         parser.StartNamespaceDeclHandler = self._handle_start_ns
         parser.EndNamespaceDeclHandler = self._handle_end_ns
+        parser.StartCdataSectionHandler = self._handle_start_cdata
+        parser.EndCdataSectionHandler = self._handle_end_cdata
         parser.ProcessingInstructionHandler = self._handle_pi
         parser.CommentHandler = self._handle_comment
         parser.DefaultHandler = self._handle_other
@@ -105,7 +107,7 @@
                 msg += ', in ' + self.filename
             raise ParseError(msg, self.filename, e.lineno, e.offset)
 
-    def _enqueue(self, kind, data, pos=None):
+    def _enqueue(self, kind, data=None, pos=None):
         if pos is None:
             pos = self._getpos()
         if kind is Stream.TEXT:
@@ -149,6 +151,12 @@
     def _handle_end_ns(self, prefix):
         self._enqueue(Stream.END_NS, prefix or '')
 
+    def _handle_start_cdata(self):
+        self._enqueue(Stream.START_CDATA)
+
+    def _handle_end_cdata(self):
+        self._enqueue(Stream.END_CDATA)
+
     def _handle_pi(self, target, data):
         self._enqueue(Stream.PI, (target, data))
 
--- a/markup/output.py
+++ b/markup/output.py
@@ -23,7 +23,8 @@
 import re
 
 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE
-from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI
+from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
+                        START_CDATA, END_CDATA, PI, COMMENT
 
 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
 
@@ -72,9 +73,10 @@
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
 
     def __call__(self, stream):
-        have_doctype = False
         ns_attrib = []
         ns_mapping = {XML_NAMESPACE.uri: 'xml'}
+        have_doctype = False
+        in_cdata = False
 
         stream = chain(self.preamble, stream)
         for filter_ in self.filters:
@@ -125,7 +127,10 @@
                 yield Markup('</%s>' % tagname)
 
             elif kind is TEXT:
-                yield escape(data, quotes=False)
+                if in_cdata:
+                    yield data
+                else:
+                    yield escape(data, quotes=False)
 
             elif kind is COMMENT:
                 yield Markup('<!--%s-->' % data)
@@ -152,6 +157,14 @@
                     else:
                         ns_attrib.append((QName('xmlns:%s' % prefix), uri))
 
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
             elif kind is PI:
                 yield Markup('<?%s %s?>' % data)
 
@@ -182,6 +195,7 @@
         boolean_attrs = self._BOOLEAN_ATTRS
         empty_elems = self._EMPTY_ELEMS
         have_doctype = False
+        in_cdata = False
 
         stream = chain(self.preamble, stream)
         for filter_ in self.filters:
@@ -240,7 +254,10 @@
                 yield Markup('</%s>' % tagname)
 
             elif kind is TEXT:
-                yield escape(data, quotes=False)
+                if in_cdata:
+                    yield data
+                else:
+                    yield escape(data, quotes=False)
 
             elif kind is COMMENT:
                 yield Markup('<!--%s-->' % data)
@@ -267,6 +284,14 @@
                     else:
                         ns_attrib.append((QName('xmlns:%s' % prefix), uri))
 
+            elif kind is START_CDATA:
+                yield Markup('<![CDATA[')
+                in_cdata = True
+
+            elif kind is END_CDATA:
+                yield Markup(']]>')
+                in_cdata = False
+
             elif kind is PI:
                 yield Markup('<?%s %s?>' % data)
 
@@ -294,7 +319,7 @@
         super(HTMLSerializer, self).__init__(doctype, False)
         if strip_whitespace:
             self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
-                                                 self._NOESCAPE_ELEMS))
+                                                 self._NOESCAPE_ELEMS, True))
 
     def __call__(self, stream):
         namespace = self.NAMESPACE
@@ -382,7 +407,7 @@
     _LINE_COLLAPSE = re.compile('\n{2,}')
     _XML_SPACE = XML_NAMESPACE['space']
 
-    def __init__(self, preserve=None, noescape=None):
+    def __init__(self, preserve=None, noescape=None, escape_cdata=False):
         """Initialize the filter.
         
         @param preserve: a set or sequence of tag names for which white-space
@@ -399,6 +424,7 @@
         if noescape is None:
             noescape = []
         self.noescape = frozenset(noescape)
+        self.escape_cdata = escape_cdata
 
     def __call__(self, stream, ctxt=None):
         trim_trailing_space = self._TRAILING_SPACE.sub
@@ -409,6 +435,7 @@
         preserve = False
         noescape_elems = self.noescape
         noescape = False
+        escape_cdata = self.escape_cdata
 
         textbuf = []
         push_text = textbuf.append
@@ -441,6 +468,12 @@
                 elif kind is END:
                     preserve = noescape = False
 
+                elif kind is START_CDATA and not escape_cdata:
+                    noescape = True
+
+                elif kind is END_CDATA and not escape_cdata:
+                    noescape = False
+
                 if kind:
                     yield kind, data, pos
 
--- a/markup/tests/output.py
+++ b/markup/tests/output.py
@@ -95,16 +95,18 @@
         self.assertEqual(text, output)
 
     def test_script_escaping(self):
-        text = '<script><![CDATA[if (1 < 2) { alert("Doh"); }]]></script>'
+        text = """<script>/*<![CDATA[*/
+            if (1 < 2) { alert("Doh"); }
+        /*]]>*/</script>"""
         output = XML(text).render(XHTMLSerializer)
-        self.assertEqual('<script>if (1 &lt; 2) { alert("Doh"); }</script>',
-                         output)
+        self.assertEqual(text, output)
 
     def test_style_escaping(self):
-        text = '<style><![CDATA[html > body { display: none; }]]></style>'
+        text = """<style>/*<![CDATA[*/
+            html > body { display: none; }
+        /*]]>*/</style>"""
         output = XML(text).render(XHTMLSerializer)
-        self.assertEqual('<style>html &gt; body { display: none; }</style>',
-                         output)
+        self.assertEqual(text, output)
 
 
 class HTMLSerializerTestCase(unittest.TestCase):
Copyright (C) 2012-2017 Edgewall Software