comparison markup/tests/filters.py @ 204:516a6cae0aa8

* Implement reverse add/mul operators for `Markup` class, so that the result is also a `Markup` instance. * Override the bitwise or (`|`) operator on the `Stream` class, which allows a syntax similar to Unix shell pipes for chaining stream filters.
author cmlenz
date Fri, 25 Aug 2006 23:58:36 +0000
parents 28b56f09a7e1
children
comparison
equal deleted inserted replaced
203:6addf7af09f6 204:516a6cae0aa8
22 class HTMLSanitizerTestCase(unittest.TestCase): 22 class HTMLSanitizerTestCase(unittest.TestCase):
23 23
24 def test_sanitize_unchanged(self): 24 def test_sanitize_unchanged(self):
25 html = HTML('<a href="#">fo<br />o</a>') 25 html = HTML('<a href="#">fo<br />o</a>')
26 self.assertEquals(u'<a href="#">fo<br/>o</a>', 26 self.assertEquals(u'<a href="#">fo<br/>o</a>',
27 unicode(html.filter(HTMLSanitizer()))) 27 unicode(html | HTMLSanitizer()))
28 28
29 def test_sanitize_escape_text(self): 29 def test_sanitize_escape_text(self):
30 html = HTML('<a href="#">fo&amp;</a>') 30 html = HTML('<a href="#">fo&amp;</a>')
31 self.assertEquals(u'<a href="#">fo&amp;</a>', 31 self.assertEquals(u'<a href="#">fo&amp;</a>',
32 unicode(html.filter(HTMLSanitizer()))) 32 unicode(html | HTMLSanitizer()))
33 html = HTML('<a href="#">&lt;foo&gt;</a>') 33 html = HTML('<a href="#">&lt;foo&gt;</a>')
34 self.assertEquals(u'<a href="#">&lt;foo&gt;</a>', 34 self.assertEquals(u'<a href="#">&lt;foo&gt;</a>',
35 unicode(html.filter(HTMLSanitizer()))) 35 unicode(html | HTMLSanitizer()))
36 36
37 def test_sanitize_entityref_text(self): 37 def test_sanitize_entityref_text(self):
38 html = HTML('<a href="#">fo&ouml;</a>') 38 html = HTML('<a href="#">fo&ouml;</a>')
39 self.assertEquals(u'<a href="#">foö</a>', 39 self.assertEquals(u'<a href="#">foö</a>',
40 unicode(html.filter(HTMLSanitizer()))) 40 unicode(html | HTMLSanitizer()))
41 41
42 def test_sanitize_escape_attr(self): 42 def test_sanitize_escape_attr(self):
43 html = HTML('<div title="&lt;foo&gt;"></div>') 43 html = HTML('<div title="&lt;foo&gt;"></div>')
44 self.assertEquals(u'<div title="&lt;foo&gt;"/>', 44 self.assertEquals(u'<div title="&lt;foo&gt;"/>',
45 unicode(html.filter(HTMLSanitizer()))) 45 unicode(html | HTMLSanitizer()))
46 46
47 def test_sanitize_close_empty_tag(self): 47 def test_sanitize_close_empty_tag(self):
48 html = HTML('<a href="#">fo<br>o</a>') 48 html = HTML('<a href="#">fo<br>o</a>')
49 self.assertEquals(u'<a href="#">fo<br/>o</a>', 49 self.assertEquals(u'<a href="#">fo<br/>o</a>',
50 unicode(html.filter(HTMLSanitizer()))) 50 unicode(html | HTMLSanitizer()))
51 51
52 def test_sanitize_invalid_entity(self): 52 def test_sanitize_invalid_entity(self):
53 html = HTML('&junk;') 53 html = HTML('&junk;')
54 self.assertEquals('&amp;junk;', unicode(html.filter(HTMLSanitizer()))) 54 self.assertEquals('&amp;junk;', unicode(html | HTMLSanitizer()))
55 55
56 def test_sanitize_remove_script_elem(self): 56 def test_sanitize_remove_script_elem(self):
57 html = HTML('<script>alert("Foo")</script>') 57 html = HTML('<script>alert("Foo")</script>')
58 self.assertEquals(u'', unicode(html.filter(HTMLSanitizer()))) 58 self.assertEquals(u'', unicode(html | HTMLSanitizer()))
59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') 59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
60 self.assertEquals(u'', unicode(html.filter(HTMLSanitizer()))) 60 self.assertEquals(u'', unicode(html | HTMLSanitizer()))
61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') 61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
62 self.assertRaises(ParseError, HTML, 62 self.assertRaises(ParseError, HTML,
63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') 63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
64 64
65 def test_sanitize_remove_onclick_attr(self): 65 def test_sanitize_remove_onclick_attr(self):
66 html = HTML('<div onclick=\'alert("foo")\' />') 66 html = HTML('<div onclick=\'alert("foo")\' />')
67 self.assertEquals(u'<div/>', unicode(html.filter(HTMLSanitizer()))) 67 self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
68 68
69 def test_sanitize_remove_style_scripts(self): 69 def test_sanitize_remove_style_scripts(self):
70 # Inline style with url() using javascript: scheme 70 # Inline style with url() using javascript: scheme
71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') 71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
72 self.assertEquals(u'<div/>', unicode(html.filter(HTMLSanitizer()))) 72 self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
73 # Inline style with url() using javascript: scheme, using control char 73 # Inline style with url() using javascript: scheme, using control char
74 html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>') 74 html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
75 self.assertEquals(u'<div/>', unicode(html.filter(HTMLSanitizer()))) 75 self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
76 # Inline style with url() using javascript: scheme, in quotes 76 # Inline style with url() using javascript: scheme, in quotes
77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') 77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
78 self.assertEquals(u'<div/>', unicode(html.filter(HTMLSanitizer()))) 78 self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
79 # IE expressions in CSS not allowed 79 # IE expressions in CSS not allowed
80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') 80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
81 self.assertEquals(u'<div/>', unicode(html.filter(HTMLSanitizer()))) 81 self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' 82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
83 'color: #fff\'>') 83 'color: #fff\'>')
84 self.assertEquals(u'<div style="color: #fff"/>', 84 self.assertEquals(u'<div style="color: #fff"/>',
85 unicode(html.filter(HTMLSanitizer()))) 85 unicode(html | HTMLSanitizer()))
86 86
87 def test_sanitize_remove_src_javascript(self): 87 def test_sanitize_remove_src_javascript(self):
88 html = HTML('<img src=\'javascript:alert("foo")\'>') 88 html = HTML('<img src=\'javascript:alert("foo")\'>')
89 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 89 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
90 # Case-insensitive protocol matching 90 # Case-insensitive protocol matching
91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') 91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
92 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 92 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
93 # Grave accents (not parsed) 93 # Grave accents (not parsed)
94 self.assertRaises(ParseError, HTML, 94 self.assertRaises(ParseError, HTML,
95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') 95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
96 # Protocol encoded using UTF-8 numeric entities 96 # Protocol encoded using UTF-8 numeric entities
97 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;' 97 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
98 '&#112;&#116;&#58;alert("foo")\'>') 98 '&#112;&#116;&#58;alert("foo")\'>')
99 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 99 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
100 # Protocol encoded using UTF-8 numeric entities without a semicolon 100 # Protocol encoded using UTF-8 numeric entities without a semicolon
101 # (which is allowed because the max number of digits is used) 101 # (which is allowed because the max number of digits is used)
102 html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097' 102 html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
103 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116' 103 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
104 '&#0000058alert("foo")\'>') 104 '&#0000058alert("foo")\'>')
105 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 105 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon 106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon
107 # (which is allowed because the max number of digits is used) 107 # (which is allowed because the max number of digits is used)
108 html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69' 108 html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
109 '&#x70&#x74&#x3A;alert("foo")\'>') 109 '&#x70&#x74&#x3A;alert("foo")\'>')
110 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 110 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
111 # Embedded tab character in protocol 111 # Embedded tab character in protocol
112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') 112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
113 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 113 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
114 # Embedded tab character in protocol, but encoded this time 114 # Embedded tab character in protocol, but encoded this time
115 html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>') 115 html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
116 self.assertEquals(u'<img/>', unicode(html.filter(HTMLSanitizer()))) 116 self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
117 117
118 118
119 def suite(): 119 def suite():
120 suite = unittest.TestSuite() 120 suite = unittest.TestSuite()
121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) 121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
Copyright (C) 2012-2017 Edgewall Software