comparison genshi/tests/input.py @ 932:e53161c2773c

Merge r1140 from py3k: add support for python 3 to core genshi components (genshi.core, genshi.input and genshi.output): * default input and output encodings changed from UTF-8 to None (i.e. unicode strings) * Namespace and QName objects do not call stringrepr in __repr__ in Python 3 since repr() returns a unicode string there. * track changes to expat parser in Python 3 (mostly it accepts bytes instead of strings)
author hodgestar
date Fri, 18 Mar 2011 09:08:12 +0000
parents 0d9e87c6cf6e
children
comparison
equal deleted inserted replaced
931:ade3abe742e9 932:e53161c2773c
10 # This software consists of voluntary contributions made by many 10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision 11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://genshi.edgewall.org/log/. 12 # history and logs, available at http://genshi.edgewall.org/log/.
13 13
14 import doctest 14 import doctest
15 from StringIO import StringIO
16 import sys 15 import sys
17 import unittest 16 import unittest
18 17
19 from genshi.core import Attrs, Stream 18 from genshi.core import Attrs, Stream
20 from genshi.input import XMLParser, HTMLParser, ParseError 19 from genshi.input import XMLParser, HTMLParser, ParseError
20 from genshi.compat import StringIO, BytesIO
21 21
22 22
23 class XMLParserTestCase(unittest.TestCase): 23 class XMLParserTestCase(unittest.TestCase):
24 24
25 def test_text_node_pos_single_line(self): 25 def test_text_node_pos_single_line(self):
57 self.assertEqual(Stream.TEXT, kind) 57 self.assertEqual(Stream.TEXT, kind)
58 self.assertEqual(u'\u2013', data) 58 self.assertEqual(u'\u2013', data)
59 59
60 def test_latin1_encoded(self): 60 def test_latin1_encoded(self):
61 text = u'<div>\xf6</div>'.encode('iso-8859-1') 61 text = u'<div>\xf6</div>'.encode('iso-8859-1')
62 events = list(XMLParser(StringIO(text), encoding='iso-8859-1')) 62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1'))
63 kind, data, pos = events[1] 63 kind, data, pos = events[1]
64 self.assertEqual(Stream.TEXT, kind) 64 self.assertEqual(Stream.TEXT, kind)
65 self.assertEqual(u'\xf6', data) 65 self.assertEqual(u'\xf6', data)
66 66
67 def test_latin1_encoded_xmldecl(self): 67 def test_latin1_encoded_xmldecl(self):
68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> 68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
69 <div>\xf6</div> 69 <div>\xf6</div>
70 """.encode('iso-8859-1') 70 """.encode('iso-8859-1')
71 events = list(XMLParser(StringIO(text))) 71 events = list(XMLParser(BytesIO(text)))
72 kind, data, pos = events[2] 72 kind, data, pos = events[2]
73 self.assertEqual(Stream.TEXT, kind) 73 self.assertEqual(Stream.TEXT, kind)
74 self.assertEqual(u'\xf6', data) 74 self.assertEqual(u'\xf6', data)
75 75
76 def test_html_entity_with_dtd(self): 76 def test_html_entity_with_dtd(self):
114 114
115 115
116 class HTMLParserTestCase(unittest.TestCase): 116 class HTMLParserTestCase(unittest.TestCase):
117 117
118 def test_text_node_pos_single_line(self): 118 def test_text_node_pos_single_line(self):
119 text = '<elem>foo bar</elem>' 119 text = u'<elem>foo bar</elem>'
120 events = list(HTMLParser(StringIO(text))) 120 events = list(HTMLParser(StringIO(text)))
121 kind, data, pos = events[1] 121 kind, data, pos = events[1]
122 self.assertEqual(Stream.TEXT, kind) 122 self.assertEqual(Stream.TEXT, kind)
123 self.assertEqual('foo bar', data) 123 self.assertEqual('foo bar', data)
124 self.assertEqual((None, 1, 6), pos) 124 self.assertEqual((None, 1, 6), pos)
125 125
126 def test_text_node_pos_multi_line(self): 126 def test_text_node_pos_multi_line(self):
127 text = '''<elem>foo 127 text = u'''<elem>foo
128 bar</elem>''' 128 bar</elem>'''
129 events = list(HTMLParser(StringIO(text))) 129 events = list(HTMLParser(StringIO(text)))
130 kind, data, pos = events[1] 130 kind, data, pos = events[1]
131 self.assertEqual(Stream.TEXT, kind) 131 self.assertEqual(Stream.TEXT, kind)
132 self.assertEqual('foo\nbar', data) 132 self.assertEqual('foo\nbar', data)
133 self.assertEqual((None, 1, 6), pos) 133 self.assertEqual((None, 1, 6), pos)
134 134
135 def test_input_encoding_text(self): 135 def test_input_encoding_text(self):
136 text = u'<div>\xf6</div>'.encode('iso-8859-1') 136 text = u'<div>\xf6</div>'.encode('iso-8859-1')
137 events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) 137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
138 kind, data, pos = events[1] 138 kind, data, pos = events[1]
139 self.assertEqual(Stream.TEXT, kind) 139 self.assertEqual(Stream.TEXT, kind)
140 self.assertEqual(u'\xf6', data) 140 self.assertEqual(u'\xf6', data)
141 141
142 def test_input_encoding_attribute(self): 142 def test_input_encoding_attribute(self):
143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') 143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
144 events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) 144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
145 kind, (tag, attrib), pos = events[0] 145 kind, (tag, attrib), pos = events[0]
146 self.assertEqual(Stream.START, kind) 146 self.assertEqual(Stream.START, kind)
147 self.assertEqual(u'\xf6', attrib.get('title')) 147 self.assertEqual(u'\xf6', attrib.get('title'))
148 148
149 def test_unicode_input(self): 149 def test_unicode_input(self):
152 kind, data, pos = events[1] 152 kind, data, pos = events[1]
153 self.assertEqual(Stream.TEXT, kind) 153 self.assertEqual(Stream.TEXT, kind)
154 self.assertEqual(u'\u2013', data) 154 self.assertEqual(u'\u2013', data)
155 155
156 def test_html_entity_in_attribute(self): 156 def test_html_entity_in_attribute(self):
157 text = '<p title="&nbsp;"></p>' 157 text = u'<p title="&nbsp;"></p>'
158 events = list(HTMLParser(StringIO(text))) 158 events = list(HTMLParser(StringIO(text)))
159 kind, data, pos = events[0] 159 kind, data, pos = events[0]
160 self.assertEqual(Stream.START, kind) 160 self.assertEqual(Stream.START, kind)
161 self.assertEqual(u'\xa0', data[1].get('title')) 161 self.assertEqual(u'\xa0', data[1].get('title'))
162 kind, data, pos = events[1] 162 kind, data, pos = events[1]
163 self.assertEqual(Stream.END, kind) 163 self.assertEqual(Stream.END, kind)
164 164
165 def test_html_entity_in_text(self): 165 def test_html_entity_in_text(self):
166 text = '<p>&nbsp;</p>' 166 text = u'<p>&nbsp;</p>'
167 events = list(HTMLParser(StringIO(text))) 167 events = list(HTMLParser(StringIO(text)))
168 kind, data, pos = events[1] 168 kind, data, pos = events[1]
169 self.assertEqual(Stream.TEXT, kind) 169 self.assertEqual(Stream.TEXT, kind)
170 self.assertEqual(u'\xa0', data) 170 self.assertEqual(u'\xa0', data)
171 171
172 def test_processing_instruction(self): 172 def test_processing_instruction(self):
173 text = '<?php echo "Foobar" ?>' 173 text = u'<?php echo "Foobar" ?>'
174 events = list(HTMLParser(StringIO(text))) 174 events = list(HTMLParser(StringIO(text)))
175 kind, (target, data), pos = events[0] 175 kind, (target, data), pos = events[0]
176 self.assertEqual(Stream.PI, kind) 176 self.assertEqual(Stream.PI, kind)
177 self.assertEqual('php', target) 177 self.assertEqual('php', target)
178 self.assertEqual('echo "Foobar"', data) 178 self.assertEqual('echo "Foobar"', data)
203 self.assertEqual('1.0', version) 203 self.assertEqual('1.0', version)
204 self.assertEqual(None, encoding) 204 self.assertEqual(None, encoding)
205 self.assertEqual(1, standalone) 205 self.assertEqual(1, standalone)
206 206
207 def test_processing_instruction_trailing_qmark(self): 207 def test_processing_instruction_trailing_qmark(self):
208 text = '<?php echo "Foobar" ??>' 208 text = u'<?php echo "Foobar" ??>'
209 events = list(HTMLParser(StringIO(text))) 209 events = list(HTMLParser(StringIO(text)))
210 kind, (target, data), pos = events[0] 210 kind, (target, data), pos = events[0]
211 self.assertEqual(Stream.PI, kind) 211 self.assertEqual(Stream.PI, kind)
212 self.assertEqual('php', target) 212 self.assertEqual('php', target)
213 self.assertEqual('echo "Foobar" ?', data) 213 self.assertEqual('echo "Foobar" ?', data)
214 214
215 def test_out_of_order_tags1(self): 215 def test_out_of_order_tags1(self):
216 text = '<span><b>Foobar</span></b>' 216 text = u'<span><b>Foobar</span></b>'
217 events = list(HTMLParser(StringIO(text))) 217 events = list(HTMLParser(StringIO(text)))
218 self.assertEqual(5, len(events)) 218 self.assertEqual(5, len(events))
219 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) 219 self.assertEqual((Stream.START, ('span', ())), events[0][:2])
220 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) 220 self.assertEqual((Stream.START, ('b', ())), events[1][:2])
221 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) 221 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
222 self.assertEqual((Stream.END, 'b'), events[3][:2]) 222 self.assertEqual((Stream.END, 'b'), events[3][:2])
223 self.assertEqual((Stream.END, 'span'), events[4][:2]) 223 self.assertEqual((Stream.END, 'span'), events[4][:2])
224 224
225 def test_out_of_order_tags2(self): 225 def test_out_of_order_tags2(self):
226 text = '<span class="baz"><b><i>Foobar</span></b></i>' 226 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8')
227 events = list(HTMLParser(StringIO(text))) 227 events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
228 self.assertEqual(7, len(events)) 228 self.assertEqual(7, len(events))
229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), 229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
230 events[0][:2]) 230 events[0][:2])
231 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) 231 self.assertEqual((Stream.START, ('b', ())), events[1][:2])
232 self.assertEqual((Stream.START, ('i', ())), events[2][:2]) 232 self.assertEqual((Stream.START, ('i', ())), events[2][:2])
234 self.assertEqual((Stream.END, 'i'), events[4][:2]) 234 self.assertEqual((Stream.END, 'i'), events[4][:2])
235 self.assertEqual((Stream.END, 'b'), events[5][:2]) 235 self.assertEqual((Stream.END, 'b'), events[5][:2])
236 self.assertEqual((Stream.END, 'span'), events[6][:2]) 236 self.assertEqual((Stream.END, 'span'), events[6][:2])
237 237
238 def test_out_of_order_tags3(self): 238 def test_out_of_order_tags3(self):
239 text = '<span><b>Foobar</i>' 239 text = u'<span><b>Foobar</i>'.encode('utf-8')
240 events = list(HTMLParser(StringIO(text))) 240 events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
241 self.assertEqual(5, len(events)) 241 self.assertEqual(5, len(events))
242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) 242 self.assertEqual((Stream.START, ('span', ())), events[0][:2])
243 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) 243 self.assertEqual((Stream.START, ('b', ())), events[1][:2])
244 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) 244 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
245 self.assertEqual((Stream.END, 'b'), events[3][:2]) 245 self.assertEqual((Stream.END, 'b'), events[3][:2])
246 self.assertEqual((Stream.END, 'span'), events[4][:2]) 246 self.assertEqual((Stream.END, 'span'), events[4][:2])
247 247
248 def test_hex_charref(self): 248 def test_hex_charref(self):
249 text = '<span>&#x27;</span>' 249 text = u'<span>&#x27;</span>'
250 events = list(HTMLParser(StringIO(text))) 250 events = list(HTMLParser(StringIO(text)))
251 self.assertEqual(3, len(events)) 251 self.assertEqual(3, len(events))
252 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) 252 self.assertEqual((Stream.START, ('span', ())), events[0][:2])
253 self.assertEqual((Stream.TEXT, "'"), events[1][:2]) 253 self.assertEqual((Stream.TEXT, "'"), events[1][:2])
254 self.assertEqual((Stream.END, 'span'), events[2][:2]) 254 self.assertEqual((Stream.END, 'span'), events[2][:2])
Copyright (C) 2012-2017 Edgewall Software