Mercurial > genshi > mirror
annotate markup/tests/input.py @ 219:ebceef564b79 trunk
Minor improvements to `WhitespaceFilter`.
author | cmlenz |
---|---|
date | Tue, 05 Sep 2006 13:35:53 +0000 |
parents | fc6b2fb66518 |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
1
diff
changeset
|
14 import doctest |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
15 from StringIO import StringIO |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
16 import sys |
1 | 17 import unittest |
18 | |
19 from markup.core import Stream | |
209
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
20 from markup.input import XMLParser, HTMLParser, ParseError |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
21 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
22 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
23 class XMLParserTestCase(unittest.TestCase): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
24 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
25 def test_text_node_pos_single_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
26 text = '<elem>foo bar</elem>' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
27 events = list(XMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
28 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
29 self.assertEqual(Stream.TEXT, kind) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
30 self.assertEqual(u'foo bar', data) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
31 if sys.version_info[:2] >= (2, 4): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
32 self.assertEqual((None, 1, 6), pos) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
33 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
34 def test_text_node_pos_multi_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
35 text = '''<elem>foo |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
36 bar</elem>''' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
37 events = list(XMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
38 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
39 self.assertEqual(Stream.TEXT, kind) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
40 self.assertEqual(u'foo\nbar', data) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
41 if sys.version_info[:2] >= (2, 4): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
42 self.assertEqual((None, 1, -1), pos) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
43 |
160 | 44 def test_element_attribute_order(self): |
45 text = '<elem title="baz" id="foo" class="bar" />' | |
46 events = list(XMLParser(StringIO(text))) | |
47 kind, data, pos = events[0] | |
48 self.assertEqual(Stream.START, kind) | |
49 tag, attrib = data | |
50 self.assertEqual(u'elem', tag) | |
51 self.assertEqual((u'title', u'baz'), attrib[0]) | |
52 self.assertEqual((u'id', u'foo'), attrib[1]) | |
53 self.assertEqual((u'class', u'bar'), attrib[2]) | |
54 | |
207
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
55 def test_unicode_input(self): |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
56 text = u'<div>\u2013</div>' |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
57 events = list(XMLParser(StringIO(text))) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
58 kind, data, pos = events[1] |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
59 self.assertEqual(Stream.TEXT, kind) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
60 self.assertEqual(u'\u2013', data) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
61 |
209
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
62 def test_html_entity_with_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
63 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
64 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
65 <html> </html> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
66 """ |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
67 events = list(XMLParser(StringIO(text))) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
68 kind, data, pos = events[2] |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
69 self.assertEqual(Stream.TEXT, kind) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
70 self.assertEqual(u'\xa0', data) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
71 |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
72 def test_html_entity_without_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
73 text = '<html> </html>' |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
74 events = list(XMLParser(StringIO(text))) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
75 kind, data, pos = events[1] |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
76 self.assertEqual(Stream.TEXT, kind) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
77 self.assertEqual(u'\xa0', data) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
78 |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
79 def test_undefined_entity_with_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
80 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
81 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
82 <html>&junk;</html> |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
83 """ |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
84 events = XMLParser(StringIO(text)) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
85 self.assertRaises(ParseError, list, events) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
86 |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
87 def test_undefined_entity_without_dtd(self): |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
88 text = '<html>&junk;</html>' |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
89 events = XMLParser(StringIO(text)) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
90 self.assertRaises(ParseError, list, events) |
fc6b2fb66518
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
91 |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
92 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
93 class HTMLParserTestCase(unittest.TestCase): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
94 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
95 def test_text_node_pos_single_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
96 text = '<elem>foo bar</elem>' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
97 events = list(HTMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
98 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
99 self.assertEqual(Stream.TEXT, kind) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
100 self.assertEqual(u'foo bar', data) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
101 if sys.version_info[:2] >= (2, 4): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
102 self.assertEqual((None, 1, 6), pos) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
103 |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
104 def test_text_node_pos_multi_line(self): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
105 text = '''<elem>foo |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
106 bar</elem>''' |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
107 events = list(HTMLParser(StringIO(text))) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
108 kind, data, pos = events[1] |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
109 self.assertEqual(Stream.TEXT, kind) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
110 self.assertEqual(u'foo\nbar', data) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
111 if sys.version_info[:2] >= (2, 4): |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
112 self.assertEqual((None, 1, 6), pos) |
1 | 113 |
207
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
114 def test_unicode_input(self): |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
115 text = u'<div>\u2013</div>' |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
116 events = list(HTMLParser(StringIO(text))) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
117 kind, data, pos = events[1] |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
118 self.assertEqual(Stream.TEXT, kind) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
119 self.assertEqual(u'\u2013', data) |
28bfc6aafab7
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
120 |
1 | 121 |
122 def suite(): | |
123 suite = unittest.TestSuite() | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
134
diff
changeset
|
124 suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) |
134
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
125 suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) |
d681d2c3cd8d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
126 suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) |
1 | 127 return suite |
128 | |
129 if __name__ == '__main__': | |
130 unittest.main(defaultTest='suite') |