Mercurial > genshi > genshi-test
annotate genshi/tests/input.py @ 932:e53161c2773c
Merge r1140 from py3k:
add support for python 3 to core genshi components (genshi.core, genshi.input and genshi.output):
* default input and output encodings changed from UTF-8 to None (i.e. unicode strings)
* Namespace and QName objects do not call stringrepr in __repr__ in Python 3 since repr() returns a unicode string there.
* track changes to expat parser in Python 3 (mostly it accepts bytes instead of strings)
author | hodgestar |
---|---|
date | Fri, 18 Mar 2011 09:08:12 +0000 |
parents | 0d9e87c6cf6e |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
3 # Copyright (C) 2006-2009 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
1
diff
changeset
|
14 import doctest |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
15 import sys |
1 | 16 import unittest |
17 | |
378
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
18 from genshi.core import Attrs, Stream |
230 | 19 from genshi.input import XMLParser, HTMLParser, ParseError |
932 | 20 from genshi.compat import StringIO, BytesIO |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
21 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
22 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
23 class XMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
24 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
25 def test_text_node_pos_single_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
26 text = '<elem>foo bar</elem>' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
27 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
28 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
29 self.assertEqual(Stream.TEXT, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
30 self.assertEqual('foo bar', data) |
750 | 31 self.assertEqual((None, 1, 6), pos) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
32 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
33 def test_text_node_pos_multi_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
34 text = '''<elem>foo |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
35 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
36 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
37 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
38 self.assertEqual(Stream.TEXT, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
39 self.assertEqual('foo\nbar', data) |
750 | 40 self.assertEqual((None, 1, -1), pos) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
41 |
160 | 42 def test_element_attribute_order(self): |
43 text = '<elem title="baz" id="foo" class="bar" />' | |
44 events = list(XMLParser(StringIO(text))) | |
45 kind, data, pos = events[0] | |
46 self.assertEqual(Stream.START, kind) | |
47 tag, attrib = data | |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
48 self.assertEqual('elem', tag) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
49 self.assertEqual(('title', 'baz'), attrib[0]) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
50 self.assertEqual(('id', 'foo'), attrib[1]) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
51 self.assertEqual(('class', 'bar'), attrib[2]) |
160 | 52 |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
53 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
54 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
55 events = list(XMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
56 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
57 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
58 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
59 |
316
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
60 def test_latin1_encoded(self): |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
61 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
932 | 62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1')) |
316
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
63 kind, data, pos = events[1] |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
64 self.assertEqual(Stream.TEXT, kind) |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
65 self.assertEqual(u'\xf6', data) |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
66 |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
67 def test_latin1_encoded_xmldecl(self): |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
69 <div>\xf6</div> |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
70 """.encode('iso-8859-1') |
932 | 71 events = list(XMLParser(BytesIO(text))) |
460
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
72 kind, data, pos = events[2] |
316
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
73 self.assertEqual(Stream.TEXT, kind) |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
74 self.assertEqual(u'\xf6', data) |
4ab9edf5e83b
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
75 |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
76 def test_html_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
77 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
79 <html> </html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
80 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
81 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
82 kind, data, pos = events[2] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
83 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
84 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
85 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
86 def test_html_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
87 text = '<html> </html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
88 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
89 kind, data, pos = events[1] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
90 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
91 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
92 |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
93 def test_html_entity_in_attribute(self): |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
94 text = '<p title=" "/>' |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
95 events = list(XMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
96 kind, data, pos = events[0] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
97 self.assertEqual(Stream.START, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
98 self.assertEqual(u'\xa0', data[1].get('title')) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
99 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
100 self.assertEqual(Stream.END, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
101 |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
102 def test_undefined_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
103 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
104 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
105 <html>&junk;</html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
106 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
107 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
108 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
109 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
110 def test_undefined_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
111 text = '<html>&junk;</html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
112 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
113 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
114 |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
115 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
116 class HTMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
117 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
118 def test_text_node_pos_single_line(self): |
932 | 119 text = u'<elem>foo bar</elem>' |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
120 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
121 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
122 self.assertEqual(Stream.TEXT, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
123 self.assertEqual('foo bar', data) |
750 | 124 self.assertEqual((None, 1, 6), pos) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
125 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
126 def test_text_node_pos_multi_line(self): |
932 | 127 text = u'''<elem>foo |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
128 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
129 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
130 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
131 self.assertEqual(Stream.TEXT, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
132 self.assertEqual('foo\nbar', data) |
750 | 133 self.assertEqual((None, 1, 6), pos) |
1 | 134 |
312
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
135 def test_input_encoding_text(self): |
311
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
136 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
932 | 137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
311
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
138 kind, data, pos = events[1] |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
139 self.assertEqual(Stream.TEXT, kind) |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
140 self.assertEqual(u'\xf6', data) |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
141 |
312
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
142 def test_input_encoding_attribute(self): |
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') |
932 | 144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
312
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
145 kind, (tag, attrib), pos = events[0] |
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
146 self.assertEqual(Stream.START, kind) |
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
147 self.assertEqual(u'\xf6', attrib.get('title')) |
7e743338a799
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
148 |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
149 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
150 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
151 events = list(HTMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
152 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
153 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
154 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
155 |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
156 def test_html_entity_in_attribute(self): |
932 | 157 text = u'<p title=" "></p>' |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
158 events = list(HTMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
159 kind, data, pos = events[0] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
160 self.assertEqual(Stream.START, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
161 self.assertEqual(u'\xa0', data[1].get('title')) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
162 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
163 self.assertEqual(Stream.END, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
164 |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
165 def test_html_entity_in_text(self): |
932 | 166 text = u'<p> </p>' |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
167 events = list(HTMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
168 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
169 self.assertEqual(Stream.TEXT, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
170 self.assertEqual(u'\xa0', data) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
171 |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
172 def test_processing_instruction(self): |
932 | 173 text = u'<?php echo "Foobar" ?>' |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
174 events = list(HTMLParser(StringIO(text))) |
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
175 kind, (target, data), pos = events[0] |
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
176 self.assertEqual(Stream.PI, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
177 self.assertEqual('php', target) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
178 self.assertEqual('echo "Foobar"', data) |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
179 |
460
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
180 def test_xmldecl(self): |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
181 text = '<?xml version="1.0" ?><root />' |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
182 events = list(XMLParser(StringIO(text))) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
183 kind, (version, encoding, standalone), pos = events[0] |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
184 self.assertEqual(Stream.XML_DECL, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
185 self.assertEqual('1.0', version) |
460
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
186 self.assertEqual(None, encoding) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
187 self.assertEqual(-1, standalone) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
188 |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
189 def test_xmldecl_encoding(self): |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
190 text = '<?xml version="1.0" encoding="utf-8" ?><root />' |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
191 events = list(XMLParser(StringIO(text))) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
192 kind, (version, encoding, standalone), pos = events[0] |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
193 self.assertEqual(Stream.XML_DECL, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
194 self.assertEqual('1.0', version) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
195 self.assertEqual('utf-8', encoding) |
460
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
196 self.assertEqual(-1, standalone) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
197 |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
198 def test_xmldecl_standalone(self): |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
199 text = '<?xml version="1.0" standalone="yes" ?><root />' |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
200 events = list(XMLParser(StringIO(text))) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
201 kind, (version, encoding, standalone), pos = events[0] |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
202 self.assertEqual(Stream.XML_DECL, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
203 self.assertEqual('1.0', version) |
460
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
204 self.assertEqual(None, encoding) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
205 self.assertEqual(1, standalone) |
6b5544bb5a99
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
206 |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
207 def test_processing_instruction_trailing_qmark(self): |
932 | 208 text = u'<?php echo "Foobar" ??>' |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
209 events = list(HTMLParser(StringIO(text))) |
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
210 kind, (target, data), pos = events[0] |
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
211 self.assertEqual(Stream.PI, kind) |
854
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
212 self.assertEqual('php', target) |
0d9e87c6cf6e
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
213 self.assertEqual('echo "Foobar" ?', data) |
376
74b6bf92f0cd
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
214 |
378
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
215 def test_out_of_order_tags1(self): |
932 | 216 text = u'<span><b>Foobar</span></b>' |
378
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
217 events = list(HTMLParser(StringIO(text))) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
218 self.assertEqual(5, len(events)) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
219 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
220 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
221 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
222 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
223 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
224 |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
225 def test_out_of_order_tags2(self): |
932 | 226 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8') |
227 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) | |
378
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
228 self.assertEqual(7, len(events)) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
230 events[0][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
231 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
232 self.assertEqual((Stream.START, ('i', ())), events[2][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
233 self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
234 self.assertEqual((Stream.END, 'i'), events[4][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
235 self.assertEqual((Stream.END, 'b'), events[5][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
236 self.assertEqual((Stream.END, 'span'), events[6][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
237 |
383 | 238 def test_out_of_order_tags3(self): |
932 | 239 text = u'<span><b>Foobar</i>'.encode('utf-8') |
240 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) | |
378
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
241 self.assertEqual(5, len(events)) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
243 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
244 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
245 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
246 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
fff4a81ffc56
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
247 |
423
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
248 def test_hex_charref(self): |
932 | 249 text = u'<span>'</span>' |
423
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
250 events = list(HTMLParser(StringIO(text))) |
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
251 self.assertEqual(3, len(events)) |
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
252 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
253 self.assertEqual((Stream.TEXT, "'"), events[1][:2]) |
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
254 self.assertEqual((Stream.END, 'span'), events[2][:2]) |
7589a0e51001
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
255 |
1 | 256 |
257 def suite(): | |
258 suite = unittest.TestSuite() | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
134
diff
changeset
|
259 suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
260 suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
261 suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) |
1 | 262 return suite |
263 | |
264 if __name__ == '__main__': | |
265 unittest.main(defaultTest='suite') |