Mercurial > genshi > genshi-test
comparison genshi/tests/input.py @ 932:e53161c2773c
Merge r1140 from py3k:
add support for python 3 to core genshi components (genshi.core, genshi.input and genshi.output):
* default input and output encodings changed from UTF-8 to None (i.e. unicode strings)
* Namespace and QName objects do not call stringrepr in __repr__ in Python 3 since repr() returns a unicode string there.
* track changes to expat parser in Python 3 (mostly it accepts bytes instead of strings)
author | hodgestar |
---|---|
date | Fri, 18 Mar 2011 09:08:12 +0000 |
parents | 0d9e87c6cf6e |
children |
comparison
equal
deleted
inserted
replaced
931:ade3abe742e9 | 932:e53161c2773c |
---|---|
10 # This software consists of voluntary contributions made by many | 10 # This software consists of voluntary contributions made by many |
11 # individuals. For the exact contribution history, see the revision | 11 # individuals. For the exact contribution history, see the revision |
12 # history and logs, available at http://genshi.edgewall.org/log/. | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
13 | 13 |
14 import doctest | 14 import doctest |
15 from StringIO import StringIO | |
16 import sys | 15 import sys |
17 import unittest | 16 import unittest |
18 | 17 |
19 from genshi.core import Attrs, Stream | 18 from genshi.core import Attrs, Stream |
20 from genshi.input import XMLParser, HTMLParser, ParseError | 19 from genshi.input import XMLParser, HTMLParser, ParseError |
20 from genshi.compat import StringIO, BytesIO | |
21 | 21 |
22 | 22 |
23 class XMLParserTestCase(unittest.TestCase): | 23 class XMLParserTestCase(unittest.TestCase): |
24 | 24 |
25 def test_text_node_pos_single_line(self): | 25 def test_text_node_pos_single_line(self): |
57 self.assertEqual(Stream.TEXT, kind) | 57 self.assertEqual(Stream.TEXT, kind) |
58 self.assertEqual(u'\u2013', data) | 58 self.assertEqual(u'\u2013', data) |
59 | 59 |
60 def test_latin1_encoded(self): | 60 def test_latin1_encoded(self): |
61 text = u'<div>\xf6</div>'.encode('iso-8859-1') | 61 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
62 events = list(XMLParser(StringIO(text), encoding='iso-8859-1')) | 62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1')) |
63 kind, data, pos = events[1] | 63 kind, data, pos = events[1] |
64 self.assertEqual(Stream.TEXT, kind) | 64 self.assertEqual(Stream.TEXT, kind) |
65 self.assertEqual(u'\xf6', data) | 65 self.assertEqual(u'\xf6', data) |
66 | 66 |
67 def test_latin1_encoded_xmldecl(self): | 67 def test_latin1_encoded_xmldecl(self): |
68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> | 68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> |
69 <div>\xf6</div> | 69 <div>\xf6</div> |
70 """.encode('iso-8859-1') | 70 """.encode('iso-8859-1') |
71 events = list(XMLParser(StringIO(text))) | 71 events = list(XMLParser(BytesIO(text))) |
72 kind, data, pos = events[2] | 72 kind, data, pos = events[2] |
73 self.assertEqual(Stream.TEXT, kind) | 73 self.assertEqual(Stream.TEXT, kind) |
74 self.assertEqual(u'\xf6', data) | 74 self.assertEqual(u'\xf6', data) |
75 | 75 |
76 def test_html_entity_with_dtd(self): | 76 def test_html_entity_with_dtd(self): |
114 | 114 |
115 | 115 |
116 class HTMLParserTestCase(unittest.TestCase): | 116 class HTMLParserTestCase(unittest.TestCase): |
117 | 117 |
118 def test_text_node_pos_single_line(self): | 118 def test_text_node_pos_single_line(self): |
119 text = '<elem>foo bar</elem>' | 119 text = u'<elem>foo bar</elem>' |
120 events = list(HTMLParser(StringIO(text))) | 120 events = list(HTMLParser(StringIO(text))) |
121 kind, data, pos = events[1] | 121 kind, data, pos = events[1] |
122 self.assertEqual(Stream.TEXT, kind) | 122 self.assertEqual(Stream.TEXT, kind) |
123 self.assertEqual('foo bar', data) | 123 self.assertEqual('foo bar', data) |
124 self.assertEqual((None, 1, 6), pos) | 124 self.assertEqual((None, 1, 6), pos) |
125 | 125 |
126 def test_text_node_pos_multi_line(self): | 126 def test_text_node_pos_multi_line(self): |
127 text = '''<elem>foo | 127 text = u'''<elem>foo |
128 bar</elem>''' | 128 bar</elem>''' |
129 events = list(HTMLParser(StringIO(text))) | 129 events = list(HTMLParser(StringIO(text))) |
130 kind, data, pos = events[1] | 130 kind, data, pos = events[1] |
131 self.assertEqual(Stream.TEXT, kind) | 131 self.assertEqual(Stream.TEXT, kind) |
132 self.assertEqual('foo\nbar', data) | 132 self.assertEqual('foo\nbar', data) |
133 self.assertEqual((None, 1, 6), pos) | 133 self.assertEqual((None, 1, 6), pos) |
134 | 134 |
135 def test_input_encoding_text(self): | 135 def test_input_encoding_text(self): |
136 text = u'<div>\xf6</div>'.encode('iso-8859-1') | 136 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
137 events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) | 137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
138 kind, data, pos = events[1] | 138 kind, data, pos = events[1] |
139 self.assertEqual(Stream.TEXT, kind) | 139 self.assertEqual(Stream.TEXT, kind) |
140 self.assertEqual(u'\xf6', data) | 140 self.assertEqual(u'\xf6', data) |
141 | 141 |
142 def test_input_encoding_attribute(self): | 142 def test_input_encoding_attribute(self): |
143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') | 143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') |
144 events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) | 144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
145 kind, (tag, attrib), pos = events[0] | 145 kind, (tag, attrib), pos = events[0] |
146 self.assertEqual(Stream.START, kind) | 146 self.assertEqual(Stream.START, kind) |
147 self.assertEqual(u'\xf6', attrib.get('title')) | 147 self.assertEqual(u'\xf6', attrib.get('title')) |
148 | 148 |
149 def test_unicode_input(self): | 149 def test_unicode_input(self): |
152 kind, data, pos = events[1] | 152 kind, data, pos = events[1] |
153 self.assertEqual(Stream.TEXT, kind) | 153 self.assertEqual(Stream.TEXT, kind) |
154 self.assertEqual(u'\u2013', data) | 154 self.assertEqual(u'\u2013', data) |
155 | 155 |
156 def test_html_entity_in_attribute(self): | 156 def test_html_entity_in_attribute(self): |
157 text = '<p title=" "></p>' | 157 text = u'<p title=" "></p>' |
158 events = list(HTMLParser(StringIO(text))) | 158 events = list(HTMLParser(StringIO(text))) |
159 kind, data, pos = events[0] | 159 kind, data, pos = events[0] |
160 self.assertEqual(Stream.START, kind) | 160 self.assertEqual(Stream.START, kind) |
161 self.assertEqual(u'\xa0', data[1].get('title')) | 161 self.assertEqual(u'\xa0', data[1].get('title')) |
162 kind, data, pos = events[1] | 162 kind, data, pos = events[1] |
163 self.assertEqual(Stream.END, kind) | 163 self.assertEqual(Stream.END, kind) |
164 | 164 |
165 def test_html_entity_in_text(self): | 165 def test_html_entity_in_text(self): |
166 text = '<p> </p>' | 166 text = u'<p> </p>' |
167 events = list(HTMLParser(StringIO(text))) | 167 events = list(HTMLParser(StringIO(text))) |
168 kind, data, pos = events[1] | 168 kind, data, pos = events[1] |
169 self.assertEqual(Stream.TEXT, kind) | 169 self.assertEqual(Stream.TEXT, kind) |
170 self.assertEqual(u'\xa0', data) | 170 self.assertEqual(u'\xa0', data) |
171 | 171 |
172 def test_processing_instruction(self): | 172 def test_processing_instruction(self): |
173 text = '<?php echo "Foobar" ?>' | 173 text = u'<?php echo "Foobar" ?>' |
174 events = list(HTMLParser(StringIO(text))) | 174 events = list(HTMLParser(StringIO(text))) |
175 kind, (target, data), pos = events[0] | 175 kind, (target, data), pos = events[0] |
176 self.assertEqual(Stream.PI, kind) | 176 self.assertEqual(Stream.PI, kind) |
177 self.assertEqual('php', target) | 177 self.assertEqual('php', target) |
178 self.assertEqual('echo "Foobar"', data) | 178 self.assertEqual('echo "Foobar"', data) |
203 self.assertEqual('1.0', version) | 203 self.assertEqual('1.0', version) |
204 self.assertEqual(None, encoding) | 204 self.assertEqual(None, encoding) |
205 self.assertEqual(1, standalone) | 205 self.assertEqual(1, standalone) |
206 | 206 |
207 def test_processing_instruction_trailing_qmark(self): | 207 def test_processing_instruction_trailing_qmark(self): |
208 text = '<?php echo "Foobar" ??>' | 208 text = u'<?php echo "Foobar" ??>' |
209 events = list(HTMLParser(StringIO(text))) | 209 events = list(HTMLParser(StringIO(text))) |
210 kind, (target, data), pos = events[0] | 210 kind, (target, data), pos = events[0] |
211 self.assertEqual(Stream.PI, kind) | 211 self.assertEqual(Stream.PI, kind) |
212 self.assertEqual('php', target) | 212 self.assertEqual('php', target) |
213 self.assertEqual('echo "Foobar" ?', data) | 213 self.assertEqual('echo "Foobar" ?', data) |
214 | 214 |
215 def test_out_of_order_tags1(self): | 215 def test_out_of_order_tags1(self): |
216 text = '<span><b>Foobar</span></b>' | 216 text = u'<span><b>Foobar</span></b>' |
217 events = list(HTMLParser(StringIO(text))) | 217 events = list(HTMLParser(StringIO(text))) |
218 self.assertEqual(5, len(events)) | 218 self.assertEqual(5, len(events)) |
219 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) | 219 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
220 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) | 220 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
221 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) | 221 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
222 self.assertEqual((Stream.END, 'b'), events[3][:2]) | 222 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
223 self.assertEqual((Stream.END, 'span'), events[4][:2]) | 223 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
224 | 224 |
225 def test_out_of_order_tags2(self): | 225 def test_out_of_order_tags2(self): |
226 text = '<span class="baz"><b><i>Foobar</span></b></i>' | 226 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8') |
227 events = list(HTMLParser(StringIO(text))) | 227 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) |
228 self.assertEqual(7, len(events)) | 228 self.assertEqual(7, len(events)) |
229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), | 229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), |
230 events[0][:2]) | 230 events[0][:2]) |
231 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) | 231 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
232 self.assertEqual((Stream.START, ('i', ())), events[2][:2]) | 232 self.assertEqual((Stream.START, ('i', ())), events[2][:2]) |
234 self.assertEqual((Stream.END, 'i'), events[4][:2]) | 234 self.assertEqual((Stream.END, 'i'), events[4][:2]) |
235 self.assertEqual((Stream.END, 'b'), events[5][:2]) | 235 self.assertEqual((Stream.END, 'b'), events[5][:2]) |
236 self.assertEqual((Stream.END, 'span'), events[6][:2]) | 236 self.assertEqual((Stream.END, 'span'), events[6][:2]) |
237 | 237 |
238 def test_out_of_order_tags3(self): | 238 def test_out_of_order_tags3(self): |
239 text = '<span><b>Foobar</i>' | 239 text = u'<span><b>Foobar</i>'.encode('utf-8') |
240 events = list(HTMLParser(StringIO(text))) | 240 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) |
241 self.assertEqual(5, len(events)) | 241 self.assertEqual(5, len(events)) |
242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) | 242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
243 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) | 243 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
244 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) | 244 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
245 self.assertEqual((Stream.END, 'b'), events[3][:2]) | 245 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
246 self.assertEqual((Stream.END, 'span'), events[4][:2]) | 246 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
247 | 247 |
248 def test_hex_charref(self): | 248 def test_hex_charref(self): |
249 text = '<span>'</span>' | 249 text = u'<span>'</span>' |
250 events = list(HTMLParser(StringIO(text))) | 250 events = list(HTMLParser(StringIO(text))) |
251 self.assertEqual(3, len(events)) | 251 self.assertEqual(3, len(events)) |
252 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) | 252 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
253 self.assertEqual((Stream.TEXT, "'"), events[1][:2]) | 253 self.assertEqual((Stream.TEXT, "'"), events[1][:2]) |
254 self.assertEqual((Stream.END, 'span'), events[2][:2]) | 254 self.assertEqual((Stream.END, 'span'), events[2][:2]) |