Mercurial > genshi > genshi-test
annotate genshi/tests/input.py @ 311:01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
* The `TemplateLoader` class can now also be initialized from a string for the search path, for cases where the search path contains only a single directory.
author | cmlenz |
---|---|
date | Sun, 22 Oct 2006 14:57:40 +0000 |
parents | 38adb4aa7df5 |
children | 7e743338a799 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
1
diff
changeset
|
14 import doctest |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
15 from StringIO import StringIO |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
16 import sys |
1 | 17 import unittest |
18 | |
230 | 19 from genshi.core import Stream |
20 from genshi.input import XMLParser, HTMLParser, ParseError | |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
21 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
22 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
23 class XMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
24 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
25 def test_text_node_pos_single_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
26 text = '<elem>foo bar</elem>' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
27 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
28 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
29 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
30 self.assertEqual(u'foo bar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
31 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
32 self.assertEqual((None, 1, 6), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
33 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
34 def test_text_node_pos_multi_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
35 text = '''<elem>foo |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
36 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
37 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
38 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
39 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
40 self.assertEqual(u'foo\nbar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
41 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
42 self.assertEqual((None, 1, -1), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
43 |
160 | 44 def test_element_attribute_order(self): |
45 text = '<elem title="baz" id="foo" class="bar" />' | |
46 events = list(XMLParser(StringIO(text))) | |
47 kind, data, pos = events[0] | |
48 self.assertEqual(Stream.START, kind) | |
49 tag, attrib = data | |
50 self.assertEqual(u'elem', tag) | |
51 self.assertEqual((u'title', u'baz'), attrib[0]) | |
52 self.assertEqual((u'id', u'foo'), attrib[1]) | |
53 self.assertEqual((u'class', u'bar'), attrib[2]) | |
54 | |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
55 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
56 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
57 events = list(XMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
58 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
59 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
60 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
61 |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
62 def test_html_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
63 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
64 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
65 <html> </html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
66 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
67 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
68 kind, data, pos = events[2] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
69 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
70 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
71 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
72 def test_html_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
73 text = '<html> </html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
74 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
75 kind, data, pos = events[1] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
76 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
77 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
78 |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
79 def test_html_entity_in_attribute(self): |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
80 text = '<p title=" "/>' |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
81 events = list(XMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
82 kind, data, pos = events[0] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
83 self.assertEqual(Stream.START, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
84 self.assertEqual(u'\xa0', data[1].get('title')) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
85 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
86 self.assertEqual(Stream.END, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
87 |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
88 def test_undefined_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
89 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
90 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
91 <html>&junk;</html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
92 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
93 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
94 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
95 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
96 def test_undefined_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
97 text = '<html>&junk;</html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
98 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
99 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
100 |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
101 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
102 class HTMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
103 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
104 def test_text_node_pos_single_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
105 text = '<elem>foo bar</elem>' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
106 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
107 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
108 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
109 self.assertEqual(u'foo bar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
110 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
111 self.assertEqual((None, 1, 6), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
112 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
113 def test_text_node_pos_multi_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
114 text = '''<elem>foo |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
115 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
116 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
117 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
118 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
119 self.assertEqual(u'foo\nbar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
120 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
121 self.assertEqual((None, 1, 6), pos) |
1 | 122 |
311
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
123 def test_input_encoding(self): |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
124 text = u'<div>\xf6</div>'.encode('iso-8859-1') |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
125 events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
126 kind, data, pos = events[1] |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
127 self.assertEqual(Stream.TEXT, kind) |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
128 self.assertEqual(u'\xf6', data) |
01e2c48f6dfb
* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8).
cmlenz
parents:
293
diff
changeset
|
129 |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
130 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
131 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
132 events = list(HTMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
133 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
134 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
135 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
136 |
293
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
137 def test_html_entity_in_attribute(self): |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
138 text = '<p title=" "></p>' |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
139 events = list(HTMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
140 kind, data, pos = events[0] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
141 self.assertEqual(Stream.START, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
142 self.assertEqual(u'\xa0', data[1].get('title')) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
143 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
144 self.assertEqual(Stream.END, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
145 |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
146 def test_html_entity_in_text(self): |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
147 text = '<p> </p>' |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
148 events = list(HTMLParser(StringIO(text))) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
149 kind, data, pos = events[1] |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
150 self.assertEqual(Stream.TEXT, kind) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
151 self.assertEqual(u'\xa0', data) |
38adb4aa7df5
Fix a bug in the XML parser, where attributes containing HTML entity references would get pulled out of the attribute value, and instead added as a text node just before the associated start tag. Thanks to Hamish Lawson for [http://groups.google.com/group/genshi/browse_thread/thread/c64eb48676b0ff96/0e6ce786e8820f3d pointing out the problem].
cmlenz
parents:
230
diff
changeset
|
152 |
1 | 153 |
154 def suite(): | |
155 suite = unittest.TestSuite() | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
134
diff
changeset
|
156 suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
157 suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
158 suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) |
1 | 159 return suite |
160 | |
161 if __name__ == '__main__': | |
162 unittest.main(defaultTest='suite') |