1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Christopher Lenz
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://trac.edgewall.com/license.html.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://projects.edgewall.com/trac/.
|
|
13
|
|
14 import doctest
|
|
15 from HTMLParser import HTMLParseError
|
|
16 import unittest
|
|
17
|
|
18 from markup.core import Markup, escape, unescape
|
|
19
|
|
20
|
|
21 class MarkupTestCase(unittest.TestCase):
|
|
22
|
|
23 def test_escape(self):
|
|
24 markup = escape('<b>"&"</b>')
|
|
25 assert isinstance(markup, Markup)
|
|
26 self.assertEquals('<b>"&"</b>', markup)
|
|
27
|
|
28 def test_escape_noquotes(self):
|
|
29 markup = escape('<b>"&"</b>', quotes=False)
|
|
30 assert isinstance(markup, Markup)
|
|
31 self.assertEquals('<b>"&"</b>', markup)
|
|
32
|
|
33 def test_unescape_markup(self):
|
|
34 string = '<b>"&"</b>'
|
|
35 markup = Markup.escape(string)
|
|
36 assert isinstance(markup, Markup)
|
|
37 self.assertEquals(string, unescape(markup))
|
|
38
|
|
39 def test_add_str(self):
|
|
40 markup = Markup('<b>foo</b>') + '<br/>'
|
|
41 assert isinstance(markup, Markup)
|
|
42 self.assertEquals('<b>foo</b><br/>', markup)
|
|
43
|
|
44 def test_add_markup(self):
|
|
45 markup = Markup('<b>foo</b>') + Markup('<br/>')
|
|
46 assert isinstance(markup, Markup)
|
|
47 self.assertEquals('<b>foo</b><br/>', markup)
|
|
48
|
|
49 def test_add_reverse(self):
|
|
50 markup = 'foo' + Markup('<b>bar</b>')
|
|
51 assert isinstance(markup, unicode)
|
|
52 self.assertEquals('foo<b>bar</b>', markup)
|
|
53
|
|
54 def test_mod(self):
|
|
55 markup = Markup('<b>%s</b>') % '&'
|
|
56 assert isinstance(markup, Markup)
|
|
57 self.assertEquals('<b>&</b>', markup)
|
|
58
|
|
59 def test_mod_multi(self):
|
|
60 markup = Markup('<b>%s</b> %s') % ('&', 'boo')
|
|
61 assert isinstance(markup, Markup)
|
|
62 self.assertEquals('<b>&</b> boo', markup)
|
|
63
|
|
64 def test_mul(self):
|
|
65 markup = Markup('<b>foo</b>') * 2
|
|
66 assert isinstance(markup, Markup)
|
|
67 self.assertEquals('<b>foo</b><b>foo</b>', markup)
|
|
68
|
|
69 def test_join(self):
|
|
70 markup = Markup('<br />').join(['foo', '<bar />', Markup('<baz />')])
|
|
71 assert isinstance(markup, Markup)
|
|
72 self.assertEquals('foo<br /><bar /><br /><baz />', markup)
|
|
73
|
|
74 def test_stripentities_all(self):
|
|
75 markup = Markup('& j').stripentities()
|
|
76 assert isinstance(markup, Markup)
|
|
77 self.assertEquals('& j', markup)
|
|
78
|
|
79 def test_stripentities_keepxml(self):
|
|
80 markup = Markup('<a href="#">fo<br />o</a>').striptags()
|
|
81 assert isinstance(markup, Markup)
|
|
82 self.assertEquals('foo', markup)
|
|
83
|
|
84 def test_striptags_empty(self):
|
|
85 markup = Markup('<br />').striptags()
|
|
86 assert isinstance(markup, Markup)
|
|
87 self.assertEquals('', markup)
|
|
88
|
|
89 def test_striptags_mid(self):
|
|
90 markup = Markup('<a href="#">fo<br />o</a>').striptags()
|
|
91 assert isinstance(markup, Markup)
|
|
92 self.assertEquals('foo', markup)
|
|
93
|
|
94 def test_sanitize_unchanged(self):
|
|
95 markup = Markup('<a href="#">fo<br />o</a>')
|
|
96 self.assertEquals('<a href="#">fo<br/>o</a>', str(markup.sanitize()))
|
|
97
|
|
98 def test_sanitize_escape_text(self):
|
|
99 markup = Markup('<a href="#">fo&</a>')
|
|
100 self.assertEquals('<a href="#">fo&</a>', str(markup.sanitize()))
|
|
101 markup = Markup('<a href="#"><foo></a>')
|
|
102 self.assertEquals('<a href="#"><foo></a>', str(markup.sanitize()))
|
|
103
|
|
104 def test_sanitize_entityref_text(self):
|
|
105 markup = Markup('<a href="#">foö</a>')
|
|
106 self.assertEquals(u'<a href="#">foƶ</a>', unicode(markup.sanitize()))
|
|
107
|
|
108 def test_sanitize_escape_attr(self):
|
|
109 markup = Markup('<div title="<foo>"></div>')
|
|
110 self.assertEquals('<div title="<foo>"/>', str(markup.sanitize()))
|
|
111
|
|
112 def test_sanitize_close_empty_tag(self):
|
|
113 markup = Markup('<a href="#">fo<br>o</a>')
|
|
114 self.assertEquals('<a href="#">fo<br/>o</a>', str(markup.sanitize()))
|
|
115
|
|
116 def test_sanitize_invalid_entity(self):
|
|
117 markup = Markup('&junk;')
|
|
118 self.assertEquals('&junk;', str(markup.sanitize()))
|
|
119
|
|
120 def test_sanitize_remove_script_elem(self):
|
|
121 markup = Markup('<script>alert("Foo")</script>')
|
|
122 self.assertEquals('', str(markup.sanitize()))
|
|
123 markup = Markup('<SCRIPT SRC="http://example.com/"></SCRIPT>')
|
|
124 self.assertEquals('', str(markup.sanitize()))
|
|
125 markup = Markup('<SCR\0IPT>alert("foo")</SCR\0IPT>')
|
|
126 self.assertRaises(HTMLParseError, markup.sanitize().render)
|
|
127 markup = Markup('<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
|
|
128 self.assertRaises(HTMLParseError, markup.sanitize().render)
|
|
129
|
|
130 def test_sanitize_remove_onclick_attr(self):
|
|
131 markup = Markup('<div onclick=\'alert("foo")\' />')
|
|
132 self.assertEquals('<div/>', str(markup.sanitize()))
|
|
133
|
|
134 def test_sanitize_remove_style_scripts(self):
|
|
135 # Inline style with url() using javascript: scheme
|
|
136 markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
|
|
137 self.assertEquals('<div/>', str(markup.sanitize()))
|
|
138 # Inline style with url() using javascript: scheme, using control char
|
|
139 markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
|
|
140 self.assertEquals('<div/>', str(markup.sanitize()))
|
|
141 # Inline style with url() using javascript: scheme, in quotes
|
|
142 markup = Markup('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
|
|
143 self.assertEquals('<div/>', str(markup.sanitize()))
|
|
144 # IE expressions in CSS not allowed
|
|
145 markup = Markup('<DIV STYLE=\'width: expression(alert("foo"));\'>')
|
|
146 self.assertEquals('<div/>', str(markup.sanitize()))
|
|
147 markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"));'
|
|
148 'color: #fff\'>')
|
|
149 self.assertEquals('<div style="color: #fff"/>', str(markup.sanitize()))
|
|
150
|
|
151 def test_sanitize_remove_src_javascript(self):
|
|
152 markup = Markup('<img src=\'javascript:alert("foo")\'>')
|
|
153 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
154 # Case-insensitive protocol matching
|
|
155 markup = Markup('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
|
|
156 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
157 # Grave accents (not parsed)
|
|
158 markup = Markup('<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
|
|
159 self.assertRaises(HTMLParseError, markup.sanitize().render)
|
|
160 # Protocol encoded using UTF-8 numeric entities
|
|
161 markup = Markup('<IMG SRC=\'javascri'
|
|
162 'pt:alert("foo")\'>')
|
|
163 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
164 # Protocol encoded using UTF-8 numeric entities without a semicolon
|
|
165 # (which is allowed because the max number of digits is used)
|
|
166 markup = Markup('<IMG SRC=\'java'
|
|
167 'script'
|
|
168 ':alert("foo")\'>')
|
|
169 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
170 # Protocol encoded using UTF-8 numeric hex entities without a semicolon
|
|
171 # (which is allowed because the max number of digits is used)
|
|
172 markup = Markup('<IMG SRC=\'javascri'
|
|
173 'pt:alert("foo")\'>')
|
|
174 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
175 # Embedded tab character in protocol
|
|
176 markup = Markup('<IMG SRC=\'jav\tascript:alert("foo");\'>')
|
|
177 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
178 # Embedded tab character in protocol, but encoded this time
|
|
179 markup = Markup('<IMG SRC=\'jav	ascript:alert("foo");\'>')
|
|
180 self.assertEquals('<img/>', str(markup.sanitize()))
|
|
181
|
|
182
|
|
183 def suite():
|
|
184 suite = unittest.TestSuite()
|
|
185 suite.addTest(unittest.makeSuite(MarkupTestCase, 'test'))
|
|
186 return suite
|
|
187
|
|
188 if __name__ == '__main__':
|
|
189 unittest.main(defaultTest='suite')
|