Mercurial > genshi > mirror
changeset 295:689da4ab5082 stable-0.3.x
Ported [361:362] to 0.3.x branch.
author | cmlenz |
---|---|
date | Fri, 13 Oct 2006 13:46:09 +0000 |
parents | 060ade245194 |
children | 3df37819534b |
files | ChangeLog genshi/input.py genshi/tests/input.py |
diffstat | 3 files changed, 40 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ * Fixed bug introduced in 0.3.2 that broke the parsing of templates which declare the same namespace more than once in a nested fashion. + * Fixed the parsing of HTML entity references inside attribute values, both + in the `XMLParser` and the `HTMLParser` classes. Version 0.3.2
--- a/genshi/input.py +++ b/genshi/input.py @@ -21,7 +21,7 @@ import htmlentitydefs from StringIO import StringIO -from genshi.core import Attrs, QName, Stream +from genshi.core import Attrs, QName, Stream, stripentities from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ START_CDATA, END_CDATA, PI, COMMENT @@ -56,6 +56,10 @@ END root """ + _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in + htmlentitydefs.name2codepoint.items()] + _external_dtd = '\n'.join(_entitydefs) + def __init__(self, source, filename=None): """Initialize the parser for the given XML text. @@ -85,7 +89,9 @@ # Tell Expat that we'll handle non-XML entities ourselves # (in _handle_other) parser.DefaultHandler = self._handle_other + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.UseForeignDTD() + parser.ExternalEntityRefHandler = self._build_foreign # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): @@ -126,6 +132,11 @@ def __iter__(self): return iter(self.parse()) + def _build_foreign(self, context, base, sysid, pubid): + parser = self.expat.ExternalEntityParserCreate(context) + parser.ParseFile(StringIO(self._external_dtd)) + return 1 + def _enqueue(self, kind, data=None, pos=None): if pos is None: pos = self._getpos() @@ -277,7 +288,7 @@ for name, value in attrib: # Fixup minimized attributes if value is None: value = name - fixed_attrib.append((name, unicode(value))) + fixed_attrib.append((name, unicode(stripentities(value)))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS:
--- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -76,6 +76,15 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xa0', data) + def test_html_entity_in_attribute(self): + text = '<p title=" "/>' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xa0', data[1].get('title')) + kind, data, pos = events[1] + self.assertEqual(Stream.END, kind) + def test_undefined_entity_with_dtd(self): text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> @@ -118,6 +127,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_html_entity_in_attribute(self): + text = '<p title=" "></p>' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xa0', data[1].get('title')) + kind, data, pos = events[1] + self.assertEqual(Stream.END, kind) + + def test_html_entity_in_text(self): + text = '<p> </p>' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + def suite(): suite = unittest.TestSuite()