# HG changeset patch # User cmlenz # Date 1160747169 0 # Node ID 2fd22ce67ed08bf9b4ae42cb0bc23f50f2bc860e # Parent 7487632f467cdec5d0373da5ed2c0ee62cea63ea Ported [361:362] to 0.3.x branch. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ * Fixed bug introduced in 0.3.2 that broke the parsing of templates which declare the same namespace more than once in a nested fashion. + * Fixed the parsing of HTML entity references inside attribute values, both + in the `XMLParser` and the `HTMLParser` classes. Version 0.3.2 diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -21,7 +21,7 @@ import htmlentitydefs from StringIO import StringIO -from genshi.core import Attrs, QName, Stream +from genshi.core import Attrs, QName, Stream, stripentities from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ START_CDATA, END_CDATA, PI, COMMENT @@ -56,6 +56,10 @@ END root """ + _entitydefs = ['' % (name, value) for name, value in + htmlentitydefs.name2codepoint.items()] + _external_dtd = '\n'.join(_entitydefs) + def __init__(self, source, filename=None): """Initialize the parser for the given XML text. @@ -85,7 +89,9 @@ # Tell Expat that we'll handle non-XML entities ourselves # (in _handle_other) parser.DefaultHandler = self._handle_other + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.UseForeignDTD() + parser.ExternalEntityRefHandler = self._build_foreign # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): @@ -126,6 +132,11 @@ def __iter__(self): return iter(self.parse()) + def _build_foreign(self, context, base, sysid, pubid): + parser = self.expat.ExternalEntityParserCreate(context) + parser.ParseFile(StringIO(self._external_dtd)) + return 1 + def _enqueue(self, kind, data=None, pos=None): if pos is None: pos = self._getpos() @@ -277,7 +288,7 @@ for name, value in attrib: # Fixup minimized attributes if value is None: value = name - fixed_attrib.append((name, unicode(value))) + fixed_attrib.append((name, unicode(stripentities(value)))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -76,6 +76,15 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\xa0', data) + def test_html_entity_in_attribute(self): + text = '

' + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xa0', data[1].get('title')) + kind, data, pos = events[1] + self.assertEqual(Stream.END, kind) + def test_undefined_entity_with_dtd(self): text = """ @@ -118,6 +127,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_html_entity_in_attribute(self): + text = '

' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[0] + self.assertEqual(Stream.START, kind) + self.assertEqual(u'\xa0', data[1].get('title')) + kind, data, pos = events[1] + self.assertEqual(Stream.END, kind) + + def test_html_entity_in_text(self): + text = '

 

' + events = list(HTMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xa0', data) + def suite(): suite = unittest.TestSuite()