changeset 295:689da4ab5082 stable-0.3.x

Ported [361:362] to 0.3.x branch.
author cmlenz
date Fri, 13 Oct 2006 13:46:09 +0000
parents 060ade245194
children 3df37819534b
files ChangeLog genshi/input.py genshi/tests/input.py
diffstat 3 files changed, 40 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,8 @@
 
  * Fixed bug introduced in 0.3.2 that broke the parsing of templates which
    declare the same namespace more than once in a nested fashion.
+ * Fixed the parsing of HTML entity references inside attribute values, both
+   in the `XMLParser` and the `HTMLParser` classes.
 
 
 Version 0.3.2
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -21,7 +21,7 @@
 import htmlentitydefs
 from StringIO import StringIO
 
-from genshi.core import Attrs, QName, Stream
+from genshi.core import Attrs, QName, Stream, stripentities
 from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \
                         START_CDATA, END_CDATA, PI, COMMENT
 
@@ -56,6 +56,10 @@
     END root
     """
 
+    _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
+                   htmlentitydefs.name2codepoint.items()]
+    _external_dtd = '\n'.join(_entitydefs)
+
     def __init__(self, source, filename=None):
         """Initialize the parser for the given XML text.
         
@@ -85,7 +89,9 @@
         # Tell Expat that we'll handle non-XML entities ourselves
         # (in _handle_other)
         parser.DefaultHandler = self._handle_other
+        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
         parser.UseForeignDTD()
+        parser.ExternalEntityRefHandler = self._build_foreign
 
         # Location reporting is only support in Python >= 2.4
         if not hasattr(parser, 'CurrentLineNumber'):
@@ -126,6 +132,11 @@
     def __iter__(self):
         return iter(self.parse())
 
+    def _build_foreign(self, context, base, sysid, pubid):
+        parser = self.expat.ExternalEntityParserCreate(context)
+        parser.ParseFile(StringIO(self._external_dtd))
+        return 1
+
     def _enqueue(self, kind, data=None, pos=None):
         if pos is None:
             pos = self._getpos()
@@ -277,7 +288,7 @@
         for name, value in attrib: # Fixup minimized attributes
             if value is None:
                 value = name
-            fixed_attrib.append((name, unicode(value)))
+            fixed_attrib.append((name, unicode(stripentities(value))))
 
         self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
         if tag in self._EMPTY_ELEMS:
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -76,6 +76,15 @@
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xa0', data)
 
+    def test_html_entity_in_attribute(self):
+        text = '<p title="&nbsp;"/>'
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[0]
+        self.assertEqual(Stream.START, kind)
+        self.assertEqual(u'\xa0', data[1].get('title'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.END, kind)
+
     def test_undefined_entity_with_dtd(self):
         text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -118,6 +127,22 @@
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\u2013', data)
 
+    def test_html_entity_in_attribute(self):
+        text = '<p title="&nbsp;"></p>'
+        events = list(HTMLParser(StringIO(text)))
+        kind, data, pos = events[0]
+        self.assertEqual(Stream.START, kind)
+        self.assertEqual(u'\xa0', data[1].get('title'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.END, kind)
+
+    def test_html_entity_in_text(self):
+        text = '<p>&nbsp;</p>'
+        events = list(HTMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xa0', data)
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software