Mercurial > genshi > mirror
diff genshi/input.py @ 293:e17b7459b515 trunk
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
author | cmlenz |
---|---|
date | Fri, 13 Oct 2006 13:42:38 +0000 |
parents | 94f9f2cc66c8 |
children | 8de1ff534d22 |
line wrap: on
line diff
--- a/genshi/input.py +++ b/genshi/input.py @@ -21,7 +21,7 @@ import htmlentitydefs from StringIO import StringIO -from genshi.core import Attrs, QName, Stream +from genshi.core import Attrs, QName, Stream, stripentities from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ START_CDATA, END_CDATA, PI, COMMENT @@ -71,6 +71,10 @@ END root """ + _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in + htmlentitydefs.name2codepoint.items()] + _external_dtd = '\n'.join(_entitydefs) + def __init__(self, source, filename=None): """Initialize the parser for the given XML text. @@ -100,7 +104,9 @@ # Tell Expat that we'll handle non-XML entities ourselves # (in _handle_other) parser.DefaultHandler = self._handle_other + parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.UseForeignDTD() + parser.ExternalEntityRefHandler = self._build_foreign # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): @@ -141,6 +147,11 @@ def __iter__(self): return iter(self.parse()) + def _build_foreign(self, context, base, sysid, pubid): + parser = self.expat.ExternalEntityParserCreate(context) + parser.ParseFile(StringIO(self._external_dtd)) + return 1 + def _enqueue(self, kind, data=None, pos=None): if pos is None: pos = self._getpos() @@ -292,7 +303,7 @@ for name, value in attrib: # Fixup minimized attributes if value is None: value = name - fixed_attrib.append((name, unicode(value))) + fixed_attrib.append((name, unicode(stripentities(value)))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: