comparison markup/tests/filters.py @ 113:e815c2c07572

Removed the `sanitize()` method from the `Markup` class, and migrate the existing unit tests to `markup.tests.filters`. Provide a `Stream.filter()` method instead which can be used to conveniently apply a filter to a stream.
author cmlenz
date Mon, 31 Jul 2006 23:00:06 +0000
parents
children d4ea684655d9
comparison
equal deleted inserted replaced
112:a834a6669681 113:e815c2c07572
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://markup.edgewall.org/wiki/License.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://markup.edgewall.org/log/.
13
14 import doctest
15 import unittest
16
17 from markup.core import Stream
18 from markup.input import HTML, ParseError
19 from markup.filters import HTMLSanitizer
20
21
22 class HTMLSanitizerTestCase(unittest.TestCase):
23
24 def test_sanitize_unchanged(self):
25 html = HTML('<a href="#">fo<br />o</a>')
26 self.assertEquals('<a href="#">fo<br/>o</a>',
27 str(html.filter(HTMLSanitizer()))
28
29 def test_sanitize_escape_text(self):
30 html = HTML('<a href="#">fo&amp;</a>')
31 self.assertEquals('<a href="#">fo&amp;</a>',
32 str(html.filter(HTMLSanitizer()))
33 html = HTML('<a href="#">&lt;foo&gt;</a>')
34 self.assertEquals('<a href="#">&lt;foo&gt;</a>',
35 str(html.filter(HTMLSanitizer()))
36
37 def test_sanitize_entityref_text(self):
38 html = HTML('<a href="#">fo&ouml;</a>')
39 self.assertEquals(u'<a href="#">fo&ouml;</a>',
40 str(html.filter(HTMLSanitizer()))
41
42 def test_sanitize_escape_attr(self):
43 html = HTML('<div title="&lt;foo&gt;"></div>')
44 self.assertEquals('<div title="&lt;foo&gt;"/>',
45 str(html.filter(HTMLSanitizer())))
46
47 def test_sanitize_close_empty_tag(self):
48 html = HTML('<a href="#">fo<br>o</a>')
49 self.assertEquals('<a href="#">fo<br/>o</a>',
50 str(html.filter(HTMLSanitizer()))
51
52 def test_sanitize_invalid_entity(self):
53 html = HTML('&junk;')
54 self.assertEquals('&junk;', str(html.filter(HTMLSanitizer()))
55
56 def test_sanitize_remove_script_elem(self):
57 html = HTML('<script>alert("Foo")</script>')
58 self.assertEquals('', str(html.filter(HTMLSanitizer()))
59 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
60 self.assertEquals('', str(html.filter(HTMLSanitizer()))
61 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
62 self.assertRaises(ParseError, HTML,
63 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
64
65 def test_sanitize_remove_onclick_attr(self):
66 html = HTML('<div onclick=\'alert("foo")\' />')
67 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))
68
69 def test_sanitize_remove_style_scripts(self):
70 # Inline style with url() using javascript: scheme
71 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
72 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))
73 # Inline style with url() using javascript: scheme, using control char
74 html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
75 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))
76 # Inline style with url() using javascript: scheme, in quotes
77 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
78 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))
79 # IE expressions in CSS not allowed
80 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
81 self.assertEquals('<div/>', str(html.filter(HTMLSanitizer()))
82 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
83 'color: #fff\'>')
84 self.assertEquals('<div style="color: #fff"/>',
85 str(html.filter(HTMLSanitizer()))
86
87 def test_sanitize_remove_src_javascript(self):
88 html = HTML('<img src=\'javascript:alert("foo")\'>')
89 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
90 # Case-insensitive protocol matching
91 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
92 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
93 # Grave accents (not parsed)
94 self.assertRaises(ParseError, HTML,
95 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
96 # Protocol encoded using UTF-8 numeric entities
97 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
98 '&#112;&#116;&#58;alert("foo")\'>')
99 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
100 # Protocol encoded using UTF-8 numeric entities without a semicolon
101 # (which is allowed because the max number of digits is used)
102 html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
103 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
104 '&#0000058alert("foo")\'>')
105 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
106 # Protocol encoded using UTF-8 numeric hex entities without a semicolon
107 # (which is allowed because the max number of digits is used)
108 html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
109 '&#x70&#x74&#x3A;alert("foo")\'>')
110 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
111 # Embedded tab character in protocol
112 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
113 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
114 # Embedded tab character in protocol, but encoded this time
115 html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
116 self.assertEquals('<img/>', str(html.filter(HTMLSanitizer()))
117
118
119 def suite():
120 suite = unittest.TestSuite()
121 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
122 return suite
123
124 if __name__ == '__main__':
125 unittest.main(defaultTest='suite')
Copyright (C) 2012-2017 Edgewall Software