Mercurial > genshi > genshi-test
comparison markup/tests/filters.py @ 113:e815c2c07572
Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.
author | cmlenz |
---|---|
date | Mon, 31 Jul 2006 23:00:06 +0000 |
parents | |
children | d4ea684655d9 |
comparison
equal
deleted
inserted
replaced
112:a834a6669681 | 113:e815c2c07572 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2006 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://markup.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://markup.edgewall.org/log/. | |
13 | |
14 import doctest | |
15 import unittest | |
16 | |
17 from markup.core import Stream | |
18 from markup.input import HTML, ParseError | |
19 from markup.filters import HTMLSanitizer | |
20 | |
21 | |
22 class HTMLSanitizerTestCase(unittest.TestCase): | |
23 | |
24 def test_sanitize_unchanged(self): | |
25 html = HTML('<a href="#">fo<br />o</a>') | |
26 self.assertEquals('<a href="#">fo<br/>o</a>', | |
27 str(html.filter(HTMLSanitizer())) | |
28 | |
29 def test_sanitize_escape_text(self): | |
30 html = HTML('<a href="#">fo&</a>') | |
31 self.assertEquals('<a href="#">fo&</a>', | |
32 str(html.filter(HTMLSanitizer())) | |
33 html = HTML('<a href="#"><foo></a>') | |
34 self.assertEquals('<a href="#"><foo></a>', | |
35 str(html.filter(HTMLSanitizer())) | |
36 | |
37 def test_sanitize_entityref_text(self): | |
38 html = HTML('<a href="#">foö</a>') | |
39 self.assertEquals(u'<a href="#">foö</a>', | |
40 str(html.filter(HTMLSanitizer())) | |
41 | |
42 def test_sanitize_escape_attr(self): | |
43 html = HTML('<div title="<foo>"></div>') | |
44 self.assertEquals('<div title="<foo>"/>', | |
45 str(html.filter(HTMLSanitizer()))) | |
46 | |
47 def test_sanitize_close_empty_tag(self): | |
48 html = HTML('<a href="#">fo<br>o</a>') | |
49 self.assertEquals('<a href="#">fo<br/>o</a>', | |
50 str(html.filter(HTMLSanitizer())) | |
51 | |
52 def test_sanitize_invalid_entity(self): | |
53 html = HTML('&junk;') | |
54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer())) | |
55 | |
56 def test_sanitize_remove_script_elem(self): | |
57 html = HTML('<script>alert("Foo")</script>') | |
58 self.assertEquals('', str(html.filter(HTMLSanitizer())) | |
59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') | |
60 self.assertEquals('', str(html.filter(HTMLSanitizer())) | |
61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') | |
62 self.assertRaises(ParseError, HTML, | |
63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') | |
64 | |
65 def test_sanitize_remove_onclick_attr(self): | |
66 html = HTML('<div onclick=\'alert("foo")\' />') | |
67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | |
68 | |
69 def test_sanitize_remove_style_scripts(self): | |
70 # Inline style with url() using javascript: scheme | |
71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') | |
72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | |
73 # Inline style with url() using javascript: scheme, using control char | |
74 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') | |
75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | |
76 # Inline style with url() using javascript: scheme, in quotes | |
77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') | |
78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | |
79 # IE expressions in CSS not allowed | |
80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') | |
81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer())) | |
82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' | |
83 'color: #fff\'>') | |
84 self.assertEquals('<div style="color: #fff"/>', | |
85 str(html.filter(HTMLSanitizer())) | |
86 | |
87 def test_sanitize_remove_src_javascript(self): | |
88 html = HTML('<img src=\'javascript:alert("foo")\'>') | |
89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
90 # Case-insensitive protocol matching | |
91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') | |
92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
93 # Grave accents (not parsed) | |
94 self.assertRaises(ParseError, HTML, | |
95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') | |
96 # Protocol encoded using UTF-8 numeric entities | |
97 html = HTML('<IMG SRC=\'javascri' | |
98 'pt:alert("foo")\'>') | |
99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
100 # Protocol encoded using UTF-8 numeric entities without a semicolon | |
101 # (which is allowed because the max number of digits is used) | |
102 html = HTML('<IMG SRC=\'java' | |
103 'script' | |
104 ':alert("foo")\'>') | |
105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon | |
107 # (which is allowed because the max number of digits is used) | |
108 html = HTML('<IMG SRC=\'javascri' | |
109 'pt:alert("foo")\'>') | |
110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
111 # Embedded tab character in protocol | |
112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') | |
113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
114 # Embedded tab character in protocol, but encoded this time | |
115 html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') | |
116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer())) | |
117 | |
118 | |
119 def suite(): | |
120 suite = unittest.TestSuite() | |
121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) | |
122 return suite | |
123 | |
124 if __name__ == '__main__': | |
125 unittest.main(defaultTest='suite') |