comparison markup/tests/filters.py @ 115:e92fb402ee04 trunk

Various fixes for breakage introduced in [132].
author cmlenz
date Tue, 01 Aug 2006 10:42:48 +0000
parents d10fbba1d5e0
children d1ce85a7f296
comparison
equal deleted inserted replaced
114:4c4e81d12649 115:e92fb402ee04
22 class HTMLSanitizerTestCase(unittest.TestCase): 22 class HTMLSanitizerTestCase(unittest.TestCase):
23 23
24 def test_sanitize_unchanged(self): 24 def test_sanitize_unchanged(self):
25 html = HTML('<a href="#">fo<br />o</a>') 25 html = HTML('<a href="#">fo<br />o</a>')
26 self.assertEquals('<a href="#">fo<br/>o</a>', 26 self.assertEquals('<a href="#">fo<br/>o</a>',
27 str(html.filter(HTMLSanitizer())) 27 str(html.filter(HTMLSanitizer())))
28 28
29 def test_sanitize_escape_text(self): 29 def test_sanitize_escape_text(self):
30 html = HTML('<a href="#">fo&amp;</a>') 30 html = HTML('<a href="#">fo&amp;</a>')
31 self.assertEquals('<a href="#">fo&amp;</a>', 31 self.assertEquals('<a href="#">fo&amp;</a>',
32 str(html.filter(HTMLSanitizer())) 32 str(html.filter(HTMLSanitizer())))
33 html = HTML('<a href="#">&lt;foo&gt;</a>') 33 html = HTML('<a href="#">&lt;foo&gt;</a>')
34 self.assertEquals('<a href="#">&lt;foo&gt;</a>', 34 self.assertEquals('<a href="#">&lt;foo&gt;</a>',
35 str(html.filter(HTMLSanitizer())) 35 str(html.filter(HTMLSanitizer())))
36 36
37 def test_sanitize_entityref_text(self): 37 def test_sanitize_entityref_text(self):
38 html = HTML('<a href="#">fo&ouml;</a>') 38 html = HTML('<a href="#">fo&ouml;</a>')
39 self.assertEquals(u'<a href="#">fo&ouml;</a>', 39 self.assertEquals(u'<a href="#">fo&ouml;</a>',
40 str(html.filter(HTMLSanitizer())) 40 str(html.filter(HTMLSanitizer())))
41 41
42 def test_sanitize_escape_attr(self): 42 def test_sanitize_escape_attr(self):
43 html = HTML('<div title="&lt;foo&gt;"></div>') 43 html = HTML('<div title="&lt;foo&gt;"></div>')
44 self.assertEquals('<div title="&lt;foo&gt;"/>', 44 self.assertEquals('<div title="&lt;foo&gt;"/>',
45 str(html.filter(HTMLSanitizer()))) 45 str(html.filter(HTMLSanitizer())))
46 46
47 def test_sanitize_close_empty_tag(self): 47 def test_sanitize_close_empty_tag(self):
48 html = HTML('<a href="#">fo<br>o</a>') 48 html = HTML('<a href="#">fo<br>o</a>')
49 self.assertEquals('<a href="#">fo<br/>o</a>', 49 self.assertEquals('<a href="#">fo<br/>o</a>',
50 str(html.filter(HTMLSanitizer())) 50 str(html.filter(HTMLSanitizer())))
51 51
52 def test_sanitize_invalid_entity(self): 52 def test_sanitize_invalid_entity(self):
53 html = HTML('&junk;') 53 html = HTML('&junk;')
54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer())) 54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer())))
55 55
56 def test_sanitize_remove_script_elem(self): 56 def test_sanitize_remove_script_elem(self):
57 html = HTML('<script>alert("Foo")</script>') 57 html = HTML('<script>alert("Foo")</script>')
58 self.assertEquals('', str(html.filter(HTMLSanitizer())) 58 self.assertEquals('', str(html.filter(HTMLSanitizer())))
59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') 59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
60 self.assertEquals('', str(html.filter(HTMLSanitizer())) 60 self.assertEquals('', str(html.filter(HTMLSanitizer())))
61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') 61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
62 self.assertRaises(ParseError, HTML, 62 self.assertRaises(ParseError, HTML,
63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') 63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
64 64
65 def test_sanitize_remove_onclick_attr(self): 65 def test_sanitize_remove_onclick_attr(self):
66 html = HTML('<div onclick=\'alert("foo")\' />') 66 html = HTML('<div onclick=\'alert("foo")\' />')
67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) 67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())))
68 68
69 def test_sanitize_remove_style_scripts(self): 69 def test_sanitize_remove_style_scripts(self):
70 # Inline style with url() using javascript: scheme 70 # Inline style with url() using javascript: scheme
71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') 71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) 72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())))
73 # Inline style with url() using javascript: scheme, using control char 73 # Inline style with url() using javascript: scheme, using control char
74 html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>') 74 html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) 75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())))
76 # Inline style with url() using javascript: scheme, in quotes 76 # Inline style with url() using javascript: scheme, in quotes
77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') 77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) 78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())))
79 # IE expressions in CSS not allowed 79 # IE expressions in CSS not allowed
80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') 80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) 81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())))
82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' 82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
83 'color: #fff\'>') 83 'color: #fff\'>')
84 self.assertEquals('<div style="color: #fff"/>', 84 self.assertEquals('<div style="color: #fff"/>',
85 str(html.filter(HTMLSanitizer())) 85 str(html.filter(HTMLSanitizer())))
86 86
87 def test_sanitize_remove_src_javascript(self): 87 def test_sanitize_remove_src_javascript(self):
88 html = HTML('<img src=\'javascript:alert("foo")\'>') 88 html = HTML('<img src=\'javascript:alert("foo")\'>')
89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
90 # Case-insensitive protocol matching 90 # Case-insensitive protocol matching
91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') 91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
93 # Grave accents (not parsed) 93 # Grave accents (not parsed)
94 self.assertRaises(ParseError, HTML, 94 self.assertRaises(ParseError, HTML,
95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') 95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
96 # Protocol encoded using UTF-8 numeric entities 96 # Protocol encoded using UTF-8 numeric entities
97 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;' 97 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
98 '&#112;&#116;&#58;alert("foo")\'>') 98 '&#112;&#116;&#58;alert("foo")\'>')
99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
100 # Protocol encoded using UTF-8 numeric entities without a semicolon 100 # Protocol encoded using UTF-8 numeric entities without a semicolon
101 # (which is allowed because the max number of digits is used) 101 # (which is allowed because the max number of digits is used)
102 html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097' 102 html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
103 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116' 103 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
104 '&#0000058alert("foo")\'>') 104 '&#0000058alert("foo")\'>')
105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon 106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon
107 # (which is allowed because the max number of digits is used) 107 # (which is allowed because the max number of digits is used)
108 html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69' 108 html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
109 '&#x70&#x74&#x3A;alert("foo")\'>') 109 '&#x70&#x74&#x3A;alert("foo")\'>')
110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
111 # Embedded tab character in protocol 111 # Embedded tab character in protocol
112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') 112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
114 # Embedded tab character in protocol, but encoded this time 114 # Embedded tab character in protocol, but encoded this time
115 html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>') 115 html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) 116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())))
117 117
118 118
119 def suite(): 119 def suite():
120 suite = unittest.TestSuite() 120 suite = unittest.TestSuite()
121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) 121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
Copyright (C) 2012-2017 Edgewall Software