Mercurial > genshi > mirror
comparison markup/tests/filters.py @ 115:e92fb402ee04 trunk
Various fixes for breakage introduced in [132].
author | cmlenz |
---|---|
date | Tue, 01 Aug 2006 10:42:48 +0000 |
parents | d10fbba1d5e0 |
children | d1ce85a7f296 |
comparison
equal
deleted
inserted
replaced
114:4c4e81d12649 | 115:e92fb402ee04 |
---|---|
22 class HTMLSanitizerTestCase(unittest.TestCase): | 22 class HTMLSanitizerTestCase(unittest.TestCase): |
23 | 23 |
24 def test_sanitize_unchanged(self): | 24 def test_sanitize_unchanged(self): |
25 html = HTML('<a href="#">fo<br />o</a>') | 25 html = HTML('<a href="#">fo<br />o</a>') |
26 self.assertEquals('<a href="#">fo<br/>o</a>', | 26 self.assertEquals('<a href="#">fo<br/>o</a>', |
27 str(html.filter(HTMLSanitizer())) | 27 str(html.filter(HTMLSanitizer()))) |
28 | 28 |
29 def test_sanitize_escape_text(self): | 29 def test_sanitize_escape_text(self): |
30 html = HTML('<a href="#">fo&</a>') | 30 html = HTML('<a href="#">fo&</a>') |
31 self.assertEquals('<a href="#">fo&</a>', | 31 self.assertEquals('<a href="#">fo&</a>', |
32 str(html.filter(HTMLSanitizer())) | 32 str(html.filter(HTMLSanitizer()))) |
33 html = HTML('<a href="#"><foo></a>') | 33 html = HTML('<a href="#"><foo></a>') |
34 self.assertEquals('<a href="#"><foo></a>', | 34 self.assertEquals('<a href="#"><foo></a>', |
35 str(html.filter(HTMLSanitizer())) | 35 str(html.filter(HTMLSanitizer()))) |
36 | 36 |
37 def test_sanitize_entityref_text(self): | 37 def test_sanitize_entityref_text(self): |
38 html = HTML('<a href="#">foö</a>') | 38 html = HTML('<a href="#">foö</a>') |
39 self.assertEquals(u'<a href="#">foö</a>', | 39 self.assertEquals(u'<a href="#">foö</a>', |
40 str(html.filter(HTMLSanitizer())) | 40 str(html.filter(HTMLSanitizer()))) |
41 | 41 |
42 def test_sanitize_escape_attr(self): | 42 def test_sanitize_escape_attr(self): |
43 html = HTML('<div title="<foo>"></div>') | 43 html = HTML('<div title="<foo>"></div>') |
44 self.assertEquals('<div title="<foo>"/>', | 44 self.assertEquals('<div title="<foo>"/>', |
45 str(html.filter(HTMLSanitizer()))) | 45 str(html.filter(HTMLSanitizer()))) |
46 | 46 |
47 def test_sanitize_close_empty_tag(self): | 47 def test_sanitize_close_empty_tag(self): |
48 html = HTML('<a href="#">fo<br>o</a>') | 48 html = HTML('<a href="#">fo<br>o</a>') |
49 self.assertEquals('<a href="#">fo<br/>o</a>', | 49 self.assertEquals('<a href="#">fo<br/>o</a>', |
50 str(html.filter(HTMLSanitizer())) | 50 str(html.filter(HTMLSanitizer()))) |
51 | 51 |
52 def test_sanitize_invalid_entity(self): | 52 def test_sanitize_invalid_entity(self): |
53 html = HTML('&junk;') | 53 html = HTML('&junk;') |
54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer())) | 54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer()))) |
55 | 55 |
56 def test_sanitize_remove_script_elem(self): | 56 def test_sanitize_remove_script_elem(self): |
57 html = HTML('<script>alert("Foo")</script>') | 57 html = HTML('<script>alert("Foo")</script>') |
58 self.assertEquals('', str(html.filter(HTMLSanitizer())) | 58 self.assertEquals('', str(html.filter(HTMLSanitizer()))) |
59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') | 59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') |
60 self.assertEquals('', str(html.filter(HTMLSanitizer())) | 60 self.assertEquals('', str(html.filter(HTMLSanitizer()))) |
61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') | 61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') |
62 self.assertRaises(ParseError, HTML, | 62 self.assertRaises(ParseError, HTML, |
63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') | 63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') |
64 | 64 |
65 def test_sanitize_remove_onclick_attr(self): | 65 def test_sanitize_remove_onclick_attr(self): |
66 html = HTML('<div onclick=\'alert("foo")\' />') | 66 html = HTML('<div onclick=\'alert("foo")\' />') |
67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | 67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))) |
68 | 68 |
69 def test_sanitize_remove_style_scripts(self): | 69 def test_sanitize_remove_style_scripts(self): |
70 # Inline style with url() using javascript: scheme | 70 # Inline style with url() using javascript: scheme |
71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') | 71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') |
72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | 72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))) |
73 # Inline style with url() using javascript: scheme, using control char | 73 # Inline style with url() using javascript: scheme, using control char |
74 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') | 74 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') |
75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | 75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))) |
76 # Inline style with url() using javascript: scheme, in quotes | 76 # Inline style with url() using javascript: scheme, in quotes |
77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') | 77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') |
78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | 78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))) |
79 # IE expressions in CSS not allowed | 79 # IE expressions in CSS not allowed |
80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') | 80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') |
81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | 81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))) |
82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' | 82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' |
83 'color: #fff\'>') | 83 'color: #fff\'>') |
84 self.assertEquals('<div style="color: #fff"/>', | 84 self.assertEquals('<div style="color: #fff"/>', |
85 str(html.filter(HTMLSanitizer())) | 85 str(html.filter(HTMLSanitizer()))) |
86 | 86 |
87 def test_sanitize_remove_src_javascript(self): | 87 def test_sanitize_remove_src_javascript(self): |
88 html = HTML('<img src=\'javascript:alert("foo")\'>') | 88 html = HTML('<img src=\'javascript:alert("foo")\'>') |
89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
90 # Case-insensitive protocol matching | 90 # Case-insensitive protocol matching |
91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') | 91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') |
92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
93 # Grave accents (not parsed) | 93 # Grave accents (not parsed) |
94 self.assertRaises(ParseError, HTML, | 94 self.assertRaises(ParseError, HTML, |
95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') | 95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') |
96 # Protocol encoded using UTF-8 numeric entities | 96 # Protocol encoded using UTF-8 numeric entities |
97 html = HTML('<IMG SRC=\'javascri' | 97 html = HTML('<IMG SRC=\'javascri' |
98 'pt:alert("foo")\'>') | 98 'pt:alert("foo")\'>') |
99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
100 # Protocol encoded using UTF-8 numeric entities without a semicolon | 100 # Protocol encoded using UTF-8 numeric entities without a semicolon |
101 # (which is allowed because the max number of digits is used) | 101 # (which is allowed because the max number of digits is used) |
102 html = HTML('<IMG SRC=\'java' | 102 html = HTML('<IMG SRC=\'java' |
103 'script' | 103 'script' |
104 ':alert("foo")\'>') | 104 ':alert("foo")\'>') |
105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon | 106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon |
107 # (which is allowed because the max number of digits is used) | 107 # (which is allowed because the max number of digits is used) |
108 html = HTML('<IMG SRC=\'javascri' | 108 html = HTML('<IMG SRC=\'javascri' |
109 'pt:alert("foo")\'>') | 109 'pt:alert("foo")\'>') |
110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
111 # Embedded tab character in protocol | 111 # Embedded tab character in protocol |
112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') | 112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') |
113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
114 # Embedded tab character in protocol, but encoded this time | 114 # Embedded tab character in protocol, but encoded this time |
115 html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') | 115 html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') |
116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | 116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))) |
117 | 117 |
118 | 118 |
119 def suite(): | 119 def suite(): |
120 suite = unittest.TestSuite() | 120 suite = unittest.TestSuite() |
121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) | 121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) |