changeset 209:fc6b2fb66518 trunk

* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC. * Enable the `XMLParser` to handle HTML entities without requiring the declaration of a HTML document type.
author cmlenz
date Tue, 29 Aug 2006 16:34:40 +0000
parents bc146e63c159
children 9fd7535883f2
files markup/input.py markup/tests/input.py
diffstat 2 files changed, 42 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/markup/input.py
+++ b/markup/input.py
@@ -80,7 +80,11 @@
         parser.EndCdataSectionHandler = self._handle_end_cdata
         parser.ProcessingInstructionHandler = self._handle_pi
         parser.CommentHandler = self._handle_comment
+
+        # Tell Expat that we'll handle non-XML entities ourselves
+        # (in _handle_other)
         parser.DefaultHandler = self._handle_other
+        parser.UseForeignDTD()
 
         # Location reporting is only support in Python >= 2.4
         if not hasattr(parser, 'CurrentLineNumber'):
@@ -184,9 +188,13 @@
                 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
                 self._enqueue(TEXT, text)
             except KeyError:
-                lineno, offset = self._getpos()
-                raise expat.error("undefined entity %s: line %d, column %d" %
-                                  (text, lineno, offset))
+                filename, lineno, offset = self._getpos()
+                error = expat.error('undefined entity "%s": line %d, column %d'
+                                    % (text, lineno, offset))
+                error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY
+                error.lineno = lineno
+                error.offset = offset
+                raise error
 
 
 def XML(text):
--- a/markup/tests/input.py
+++ b/markup/tests/input.py
@@ -17,7 +17,7 @@
 import unittest
 
 from markup.core import Stream
-from markup.input import XMLParser, HTMLParser
+from markup.input import XMLParser, HTMLParser, ParseError
 
 
 class XMLParserTestCase(unittest.TestCase):
@@ -59,6 +59,36 @@
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\u2013', data)
 
+    def test_html_entity_with_dtd(self):
+        text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+        <html>&nbsp;</html>
+        """
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[2]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xa0', data)
+
+    def test_html_entity_without_dtd(self):
+        text = '<html>&nbsp;</html>'
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xa0', data)
+
+    def test_undefined_entity_with_dtd(self):
+        text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+        <html>&junk;</html>
+        """
+        events = XMLParser(StringIO(text))
+        self.assertRaises(ParseError, list, events)
+
+    def test_undefined_entity_without_dtd(self):
+        text = '<html>&junk;</html>'
+        events = XMLParser(StringIO(text))
+        self.assertRaises(ParseError, list, events)
+
 
 class HTMLParserTestCase(unittest.TestCase):
 
Copyright (C) 2012-2017 Edgewall Software