Mercurial > genshi > mirror
annotate genshi/tests/input.py @ 996:0f4b2e892a48 trunk
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
author | hodgestar |
---|---|
date | Sat, 26 Jan 2013 17:26:47 +0000 |
parents | 2bfd8f8d241c |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
3 # Copyright (C) 2006-2009 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
1
diff
changeset
|
14 import doctest |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
15 import sys |
1 | 16 import unittest |
17 | |
378
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
18 from genshi.core import Attrs, Stream |
230 | 19 from genshi.input import XMLParser, HTMLParser, ParseError |
932 | 20 from genshi.compat import StringIO, BytesIO |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
21 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
22 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
23 class XMLParserTestCase(unittest.TestCase): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
24 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
25 def test_text_node_pos_single_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
26 text = '<elem>foo bar</elem>' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
27 events = list(XMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
28 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
29 self.assertEqual(Stream.TEXT, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
30 self.assertEqual('foo bar', data) |
750 | 31 self.assertEqual((None, 1, 6), pos) |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
32 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
33 def test_text_node_pos_multi_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
34 text = '''<elem>foo |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
35 bar</elem>''' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
36 events = list(XMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
37 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
38 self.assertEqual(Stream.TEXT, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
39 self.assertEqual('foo\nbar', data) |
750 | 40 self.assertEqual((None, 1, -1), pos) |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
41 |
160 | 42 def test_element_attribute_order(self): |
43 text = '<elem title="baz" id="foo" class="bar" />' | |
44 events = list(XMLParser(StringIO(text))) | |
45 kind, data, pos = events[0] | |
46 self.assertEqual(Stream.START, kind) | |
47 tag, attrib = data | |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
48 self.assertEqual('elem', tag) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
49 self.assertEqual(('title', 'baz'), attrib[0]) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
50 self.assertEqual(('id', 'foo'), attrib[1]) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
51 self.assertEqual(('class', 'bar'), attrib[2]) |
160 | 52 |
207
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
53 def test_unicode_input(self): |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
54 text = u'<div>\u2013</div>' |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
55 events = list(XMLParser(StringIO(text))) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
56 kind, data, pos = events[1] |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
57 self.assertEqual(Stream.TEXT, kind) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
58 self.assertEqual(u'\u2013', data) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
59 |
316
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
60 def test_latin1_encoded(self): |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
61 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
932 | 62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1')) |
316
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
63 kind, data, pos = events[1] |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
64 self.assertEqual(Stream.TEXT, kind) |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
65 self.assertEqual(u'\xf6', data) |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
66 |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
67 def test_latin1_encoded_xmldecl(self): |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
69 <div>\xf6</div> |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
70 """.encode('iso-8859-1') |
932 | 71 events = list(XMLParser(BytesIO(text))) |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
72 kind, data, pos = events[2] |
316
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
73 self.assertEqual(Stream.TEXT, kind) |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
74 self.assertEqual(u'\xf6', data) |
a946edefac40
Configurable encoding of template files, closing #65.
cmlenz
parents:
312
diff
changeset
|
75 |
209
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
76 def test_html_entity_with_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
77 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
79 <html> </html> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
80 """ |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
81 events = list(XMLParser(StringIO(text))) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
82 kind, data, pos = events[2] |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
83 self.assertEqual(Stream.TEXT, kind) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
84 self.assertEqual(u'\xa0', data) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
85 |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
86 def test_html_entity_without_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
87 text = '<html> </html>' |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
88 events = list(XMLParser(StringIO(text))) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
89 kind, data, pos = events[1] |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
90 self.assertEqual(Stream.TEXT, kind) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
91 self.assertEqual(u'\xa0', data) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
92 |
293
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
93 def test_html_entity_in_attribute(self): |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
94 text = '<p title=" "/>' |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
95 events = list(XMLParser(StringIO(text))) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
96 kind, data, pos = events[0] |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
97 self.assertEqual(Stream.START, kind) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
98 self.assertEqual(u'\xa0', data[1].get('title')) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
99 kind, data, pos = events[1] |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
100 self.assertEqual(Stream.END, kind) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
101 |
209
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
102 def test_undefined_entity_with_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
103 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
104 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
105 <html>&junk;</html> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
106 """ |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
107 events = XMLParser(StringIO(text)) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
108 self.assertRaises(ParseError, list, events) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
109 |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
110 def test_undefined_entity_without_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
111 text = '<html>&junk;</html>' |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
112 events = XMLParser(StringIO(text)) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
113 self.assertRaises(ParseError, list, events) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
114 |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
115 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
116 class HTMLParserTestCase(unittest.TestCase): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
117 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
118 def test_text_node_pos_single_line(self): |
932 | 119 text = u'<elem>foo bar</elem>' |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
120 events = list(HTMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
121 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
122 self.assertEqual(Stream.TEXT, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
123 self.assertEqual('foo bar', data) |
750 | 124 self.assertEqual((None, 1, 6), pos) |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
125 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
126 def test_text_node_pos_multi_line(self): |
932 | 127 text = u'''<elem>foo |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
128 bar</elem>''' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
129 events = list(HTMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
130 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
131 self.assertEqual(Stream.TEXT, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
132 self.assertEqual('foo\nbar', data) |
750 | 133 self.assertEqual((None, 1, 6), pos) |
1 | 134 |
312
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
135 def test_input_encoding_text(self): |
311
8de1ff534d22
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
136 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
932 | 137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
311
8de1ff534d22
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
138 kind, data, pos = events[1] |
8de1ff534d22
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
139 self.assertEqual(Stream.TEXT, kind) |
8de1ff534d22
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
140 self.assertEqual(u'\xf6', data) |
8de1ff534d22
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
141 |
312
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
142 def test_input_encoding_attribute(self): |
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') |
932 | 144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) |
312
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
145 kind, (tag, attrib), pos = events[0] |
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
146 self.assertEqual(Stream.START, kind) |
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
147 self.assertEqual(u'\xf6', attrib.get('title')) |
cb7326367f91
Follow-up to [385]: also decode attribute values in the `HTMLParser`.
cmlenz
parents:
311
diff
changeset
|
148 |
207
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
149 def test_unicode_input(self): |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
150 text = u'<div>\u2013</div>' |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
151 events = list(HTMLParser(StringIO(text))) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
152 kind, data, pos = events[1] |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
153 self.assertEqual(Stream.TEXT, kind) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
154 self.assertEqual(u'\u2013', data) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
155 |
293
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
156 def test_html_entity_in_attribute(self): |
932 | 157 text = u'<p title=" "></p>' |
293
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
158 events = list(HTMLParser(StringIO(text))) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
159 kind, data, pos = events[0] |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
160 self.assertEqual(Stream.START, kind) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
161 self.assertEqual(u'\xa0', data[1].get('title')) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
162 kind, data, pos = events[1] |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
163 self.assertEqual(Stream.END, kind) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
164 |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
165 def test_html_entity_in_text(self): |
932 | 166 text = u'<p> </p>' |
293
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
167 events = list(HTMLParser(StringIO(text))) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
168 kind, data, pos = events[1] |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
169 self.assertEqual(Stream.TEXT, kind) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
170 self.assertEqual(u'\xa0', data) |
e17b7459b515
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
171 |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
172 def test_processing_instruction(self): |
932 | 173 text = u'<?php echo "Foobar" ?>' |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
174 events = list(HTMLParser(StringIO(text))) |
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
175 kind, (target, data), pos = events[0] |
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
176 self.assertEqual(Stream.PI, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
177 self.assertEqual('php', target) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
178 self.assertEqual('echo "Foobar"', data) |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
179 |
996
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
180 def test_processing_instruction_no_data_1(self): |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
181 text = u'<?foo ?>' |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
182 events = list(HTMLParser(StringIO(text))) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
183 kind, (target, data), pos = events[0] |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
184 self.assertEqual(Stream.PI, kind) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
185 self.assertEqual('foo', target) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
186 self.assertEqual('', data) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
187 |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
188 def test_processing_instruction_no_data_2(self): |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
189 text = u'<?experiment>...<?/experiment>' |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
190 events = list(HTMLParser(StringIO(text))) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
191 kind, (target, data), pos = events[0] |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
192 self.assertEqual(Stream.PI, kind) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
193 self.assertEqual('experiment', target) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
194 self.assertEqual('', data) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
195 kind, (target, data), pos = events[2] |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
196 self.assertEqual('/experiment', target) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
197 self.assertEqual('', data) |
0f4b2e892a48
Fix handling of processing instructions that don't have data attached (patch from Neil Muller, fixes #368).
hodgestar
parents:
965
diff
changeset
|
198 |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
199 def test_xmldecl(self): |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
200 text = '<?xml version="1.0" ?><root />' |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
201 events = list(XMLParser(StringIO(text))) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
202 kind, (version, encoding, standalone), pos = events[0] |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
203 self.assertEqual(Stream.XML_DECL, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
204 self.assertEqual('1.0', version) |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
205 self.assertEqual(None, encoding) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
206 self.assertEqual(-1, standalone) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
207 |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
208 def test_xmldecl_encoding(self): |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
209 text = '<?xml version="1.0" encoding="utf-8" ?><root />' |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
210 events = list(XMLParser(StringIO(text))) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
211 kind, (version, encoding, standalone), pos = events[0] |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
212 self.assertEqual(Stream.XML_DECL, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
213 self.assertEqual('1.0', version) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
214 self.assertEqual('utf-8', encoding) |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
215 self.assertEqual(-1, standalone) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
216 |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
217 def test_xmldecl_standalone(self): |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
218 text = '<?xml version="1.0" standalone="yes" ?><root />' |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
219 events = list(XMLParser(StringIO(text))) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
220 kind, (version, encoding, standalone), pos = events[0] |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
221 self.assertEqual(Stream.XML_DECL, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
222 self.assertEqual('1.0', version) |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
223 self.assertEqual(None, encoding) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
224 self.assertEqual(1, standalone) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
423
diff
changeset
|
225 |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
226 def test_processing_instruction_trailing_qmark(self): |
932 | 227 text = u'<?php echo "Foobar" ??>' |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
228 events = list(HTMLParser(StringIO(text))) |
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
229 kind, (target, data), pos = events[0] |
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
230 self.assertEqual(Stream.PI, kind) |
854
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
231 self.assertEqual('php', target) |
4d9bef447df9
More work on reducing the size of the diff produced by 2to3.
cmlenz
parents:
750
diff
changeset
|
232 self.assertEqual('echo "Foobar" ?', data) |
376
0e0952d85d97
Fix parsing of processing instructions in HTML input.
cmlenz
parents:
316
diff
changeset
|
233 |
378
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
234 def test_out_of_order_tags1(self): |
932 | 235 text = u'<span><b>Foobar</span></b>' |
378
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
236 events = list(HTMLParser(StringIO(text))) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
237 self.assertEqual(5, len(events)) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
238 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
239 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
240 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
241 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
242 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
243 |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
244 def test_out_of_order_tags2(self): |
932 | 245 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8') |
246 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) | |
378
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
247 self.assertEqual(7, len(events)) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
248 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
249 events[0][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
250 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
251 self.assertEqual((Stream.START, ('i', ())), events[2][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
252 self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
253 self.assertEqual((Stream.END, 'i'), events[4][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
254 self.assertEqual((Stream.END, 'b'), events[5][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
255 self.assertEqual((Stream.END, 'span'), events[6][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
256 |
383 | 257 def test_out_of_order_tags3(self): |
932 | 258 text = u'<span><b>Foobar</i>'.encode('utf-8') |
259 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) | |
378
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
260 self.assertEqual(5, len(events)) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
261 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
262 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
263 self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
264 self.assertEqual((Stream.END, 'b'), events[3][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
265 self.assertEqual((Stream.END, 'span'), events[4][:2]) |
873ca2a7ec05
Improve handling of incorrectly nested tags in the HTML parser.
cmlenz
parents:
376
diff
changeset
|
266 |
423
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
267 def test_hex_charref(self): |
932 | 268 text = u'<span>'</span>' |
423
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
269 events = list(HTMLParser(StringIO(text))) |
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
270 self.assertEqual(3, len(events)) |
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
271 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) |
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
272 self.assertEqual((Stream.TEXT, "'"), events[1][:2]) |
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
273 self.assertEqual((Stream.END, 'span'), events[2][:2]) |
56bbe1d94da0
Applied patch for #106 (handling of hex charrefs in HTML parser).
cmlenz
parents:
383
diff
changeset
|
274 |
965
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
275 def test_multibyte_character_on_chunk_boundary(self): |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
276 text = u'a' * ((4 * 1024) - 1) + u'\xe6' |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
277 events = list(HTMLParser(BytesIO(text.encode('utf-8')), |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
278 encoding='utf-8')) |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
279 self.assertEqual(1, len(events)) |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
280 self.assertEqual((Stream.TEXT, text), events[0][:2]) |
2bfd8f8d241c
Fix parsing of multi-byte characters that occur on 4K boundaries of HTML files (fixes #538).
hodgestar
parents:
932
diff
changeset
|
281 |
1 | 282 |
283 def suite(): | |
284 suite = unittest.TestSuite() | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
134
diff
changeset
|
285 suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
286 suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
287 suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) |
1 | 288 return suite |
289 | |
290 if __name__ == '__main__': | |
291 unittest.main(defaultTest='suite') |