# HG changeset patch # User cmlenz # Date 1174415272 0 # Node ID 7589a0e51001d80532b38ca0dd6c8a97d604fb60 # Parent 95089b6e37ca3ddfb1ce22c2645b01dcc4357f4a Applied patch for #106 (handling of hex charrefs in HTML parser). diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -338,7 +338,10 @@ self._enqueue(TEXT, text) def handle_charref(self, name): - text = unichr(int(name)) + if name.lower().startswith('x'): + text = unichr(int(name[1:], 16)) + else: + text = unichr(int(name)) self._enqueue(TEXT, text) def handle_entityref(self, name): diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -222,6 +222,14 @@ self.assertEqual((Stream.END, 'b'), events[3][:2]) self.assertEqual((Stream.END, 'span'), events[4][:2]) + def test_hex_charref(self): + text = ''' + events = list(HTMLParser(StringIO(text))) + self.assertEqual(3, len(events)) + self.assertEqual((Stream.START, ('span', ())), events[0][:2]) + self.assertEqual((Stream.TEXT, "'"), events[1][:2]) + self.assertEqual((Stream.END, 'span'), events[2][:2]) + def suite(): suite = unittest.TestSuite()