Mercurial > genshi > genshi-test
annotate markup/tests/input.py @ 215:e92135672812
A couple of minor XPath fixes.
author | cmlenz |
---|---|
date | Fri, 01 Sep 2006 13:45:42 +0000 |
parents | 5b422db07359 |
children |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
1
diff
changeset
|
14 import doctest |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
15 from StringIO import StringIO |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
16 import sys |
1 | 17 import unittest |
18 | |
19 from markup.core import Stream | |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
20 from markup.input import XMLParser, HTMLParser, ParseError |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
21 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
22 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
23 class XMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
24 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
25 def test_text_node_pos_single_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
26 text = '<elem>foo bar</elem>' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
27 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
28 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
29 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
30 self.assertEqual(u'foo bar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
31 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
32 self.assertEqual((None, 1, 6), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
33 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
34 def test_text_node_pos_multi_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
35 text = '''<elem>foo |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
36 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
37 events = list(XMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
38 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
39 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
40 self.assertEqual(u'foo\nbar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
41 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
42 self.assertEqual((None, 1, -1), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
43 |
160 | 44 def test_element_attribute_order(self): |
45 text = '<elem title="baz" id="foo" class="bar" />' | |
46 events = list(XMLParser(StringIO(text))) | |
47 kind, data, pos = events[0] | |
48 self.assertEqual(Stream.START, kind) | |
49 tag, attrib = data | |
50 self.assertEqual(u'elem', tag) | |
51 self.assertEqual((u'title', u'baz'), attrib[0]) | |
52 self.assertEqual((u'id', u'foo'), attrib[1]) | |
53 self.assertEqual((u'class', u'bar'), attrib[2]) | |
54 | |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
55 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
56 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
57 events = list(XMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
58 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
59 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
60 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
61 |
209
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
62 def test_html_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
63 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
64 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
65 <html> </html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
66 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
67 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
68 kind, data, pos = events[2] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
69 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
70 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
71 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
72 def test_html_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
73 text = '<html> </html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
74 events = list(XMLParser(StringIO(text))) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
75 kind, data, pos = events[1] |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
76 self.assertEqual(Stream.TEXT, kind) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
77 self.assertEqual(u'\xa0', data) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
78 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
79 def test_undefined_entity_with_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
80 text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
81 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
82 <html>&junk;</html> |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
83 """ |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
84 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
85 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
86 |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
87 def test_undefined_entity_without_dtd(self): |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
88 text = '<html>&junk;</html>' |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
89 events = XMLParser(StringIO(text)) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
90 self.assertRaises(ParseError, list, events) |
5b422db07359
* Fix bug in handling of undefined entities. Thanks to Arnar for reporting the issue on IRC.
cmlenz
parents:
207
diff
changeset
|
91 |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
92 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
93 class HTMLParserTestCase(unittest.TestCase): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
94 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
95 def test_text_node_pos_single_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
96 text = '<elem>foo bar</elem>' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
97 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
98 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
99 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
100 self.assertEqual(u'foo bar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
101 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
102 self.assertEqual((None, 1, 6), pos) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
103 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
104 def test_text_node_pos_multi_line(self): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
105 text = '''<elem>foo |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
106 bar</elem>''' |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
107 events = list(HTMLParser(StringIO(text))) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
108 kind, data, pos = events[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
109 self.assertEqual(Stream.TEXT, kind) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
110 self.assertEqual(u'foo\nbar', data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
111 if sys.version_info[:2] >= (2, 4): |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
112 self.assertEqual((None, 1, 6), pos) |
1 | 113 |
207
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
114 def test_unicode_input(self): |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
115 text = u'<div>\u2013</div>' |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
116 events = list(HTMLParser(StringIO(text))) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
117 kind, data, pos = events[1] |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
118 self.assertEqual(Stream.TEXT, kind) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
119 self.assertEqual(u'\u2013', data) |
0619a27f5e67
The `XMLParser` now correctly handles unicode input. Closes #43.
cmlenz
parents:
160
diff
changeset
|
120 |
1 | 121 |
122 def suite(): | |
123 suite = unittest.TestSuite() | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
134
diff
changeset
|
124 suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
125 suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
66
diff
changeset
|
126 suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) |
1 | 127 return suite |
128 | |
129 if __name__ == '__main__': | |
130 unittest.main(defaultTest='suite') |