Mercurial > genshi > genshi-test
changeset 209:5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
* Enable the `XMLParser` to handle HTML entities without requiring the declaration of a HTML document type.
author | cmlenz |
---|---|
date | Tue, 29 Aug 2006 16:34:40 +0000 |
parents | 835203f3b8fd |
children | c0c70dc5bf95 |
files | markup/input.py markup/tests/input.py |
diffstat | 2 files changed, 42 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/markup/input.py +++ b/markup/input.py @@ -80,7 +80,11 @@ parser.EndCdataSectionHandler = self._handle_end_cdata parser.ProcessingInstructionHandler = self._handle_pi parser.CommentHandler = self._handle_comment + + # Tell Expat that we'll handle non-XML entities ourselves + # (in _handle_other) parser.DefaultHandler = self._handle_other + parser.UseForeignDTD() # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): @@ -184,9 +188,13 @@ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) self._enqueue(TEXT, text) except KeyError: - lineno, offset = self._getpos() - raise expat.error("undefined entity %s: line %d, column %d" % - (text, lineno, offset)) + filename, lineno, offset = self._getpos() + error = expat.error('undefined entity "%s": line %d, column %d' + % (text, lineno, offset)) + error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY + error.lineno = lineno + error.offset = offset + raise error def XML(text):
--- a/markup/tests/input.py +++ b/markup/tests/input.py @@ -17,7 +17,7 @@ import unittest from markup.core import Stream -from markup.input import XMLParser, HTMLParser +from markup.input import XMLParser, HTMLParser, ParseError class XMLParserTestCase(unittest.TestCase): @@ -59,6 +59,36 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_html_entity_with_dtd(self): + text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html> </html> + """ + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[2] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + + def test_html_entity_without_dtd(self): + text = '<html> </html>' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + + def test_undefined_entity_with_dtd(self): + text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html>&junk;</html> + """ + events = XMLParser(StringIO(text)) + self.assertRaises(ParseError, list, events) + + def test_undefined_entity_without_dtd(self): + text = '<html>&junk;</html>' + events = XMLParser(StringIO(text)) + self.assertRaises(ParseError, list, events) + class HTMLParserTestCase(unittest.TestCase):