# HG changeset patch # User cmlenz # Date 1156869280 0 # Node ID fc6b2fb6651840b850dba79f5317d5b005076254 # Parent bc146e63c15978ccada1e3f8f5c64e56b2455451 * Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC. * Enable the `XMLParser` to handle HTML entities without requiring the declaration of a HTML document type. diff --git a/markup/input.py b/markup/input.py --- a/markup/input.py +++ b/markup/input.py @@ -80,7 +80,11 @@ parser.EndCdataSectionHandler = self._handle_end_cdata parser.ProcessingInstructionHandler = self._handle_pi parser.CommentHandler = self._handle_comment + + # Tell Expat that we'll handle non-XML entities ourselves + # (in _handle_other) parser.DefaultHandler = self._handle_other + parser.UseForeignDTD() # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): @@ -184,9 +188,13 @@ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) self._enqueue(TEXT, text) except KeyError: - lineno, offset = self._getpos() - raise expat.error("undefined entity %s: line %d, column %d" % - (text, lineno, offset)) + filename, lineno, offset = self._getpos() + error = expat.error('undefined entity "%s": line %d, column %d' + % (text, lineno, offset)) + error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY + error.lineno = lineno + error.offset = offset + raise error def XML(text): diff --git a/markup/tests/input.py b/markup/tests/input.py --- a/markup/tests/input.py +++ b/markup/tests/input.py @@ -17,7 +17,7 @@ import unittest from markup.core import Stream -from markup.input import XMLParser, HTMLParser +from markup.input import XMLParser, HTMLParser, ParseError class XMLParserTestCase(unittest.TestCase): @@ -59,6 +59,36 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_html_entity_with_dtd(self): + text = """ +   + """ + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[2] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + + def test_html_entity_without_dtd(self): + text = ' ' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + + def test_undefined_entity_with_dtd(self): + text = """ + &junk; + """ + events = XMLParser(StringIO(text)) + self.assertRaises(ParseError, list, events) + + def test_undefined_entity_without_dtd(self): + text = '&junk;' + events = XMLParser(StringIO(text)) + self.assertRaises(ParseError, list, events) + class HTMLParserTestCase(unittest.TestCase):