comparison genshi/filters/tests/html.py @ 964:cadb6703ac18 stable-0.6.x

Merge r1187 from trunk (fix HTMLSanitizer tests for Python 2.7 -- see #501).
author hodgestar
date Sat, 29 Dec 2012 12:00:00 +0000
parents 40415173f513
children
comparison
equal deleted inserted replaced
953:ea40c6ff63da 964:cadb6703ac18
328 return HTMLSanitizer(safe_attrs=safe_attrs) 328 return HTMLSanitizer(safe_attrs=safe_attrs)
329 329
330 330
331 class HTMLSanitizerTestCase(unittest.TestCase): 331 class HTMLSanitizerTestCase(unittest.TestCase):
332 332
333 def assert_parse_error_or_equal(self, expected, exploit):
334 try:
335 html = HTML(exploit)
336 except ParseError:
337 return
338 self.assertEquals(expected, (html | HTMLSanitizer()).render())
339
333 def test_sanitize_unchanged(self): 340 def test_sanitize_unchanged(self):
334 html = HTML('<a href="#">fo<br />o</a>') 341 html = HTML('<a href="#">fo<br />o</a>')
335 self.assertEquals('<a href="#">fo<br/>o</a>', 342 self.assertEquals('<a href="#">fo<br/>o</a>',
336 (html | HTMLSanitizer()).render()) 343 (html | HTMLSanitizer()).render())
337 html = HTML('<a href="#with:colon">foo</a>') 344 html = HTML('<a href="#with:colon">foo</a>')
368 def test_sanitize_remove_script_elem(self): 375 def test_sanitize_remove_script_elem(self):
369 html = HTML('<script>alert("Foo")</script>') 376 html = HTML('<script>alert("Foo")</script>')
370 self.assertEquals('', (html | HTMLSanitizer()).render()) 377 self.assertEquals('', (html | HTMLSanitizer()).render())
371 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') 378 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
372 self.assertEquals('', (html | HTMLSanitizer()).render()) 379 self.assertEquals('', (html | HTMLSanitizer()).render())
373 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') 380 src = u'<SCR\0IPT>alert("foo")</SCR\0IPT>'
374 self.assertRaises(ParseError, HTML, 381 self.assert_parse_error_or_equal('&lt;SCR\x00IPT&gt;alert("foo")', src)
375 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') 382 src = u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>'
383 self.assert_parse_error_or_equal('&lt;SCRIPT&amp;XYZ; '
384 'SRC="http://example.com/"&gt;', src)
376 385
377 def test_sanitize_remove_onclick_attr(self): 386 def test_sanitize_remove_onclick_attr(self):
378 html = HTML('<div onclick=\'alert("foo")\' />') 387 html = HTML('<div onclick=\'alert("foo")\' />')
379 self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) 388 self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
380 389
441 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 450 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
442 # Case-insensitive protocol matching 451 # Case-insensitive protocol matching
443 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') 452 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
444 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 453 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
445 # Grave accents (not parsed) 454 # Grave accents (not parsed)
446 self.assertRaises(ParseError, HTML, 455 src = u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>'
447 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') 456 self.assert_parse_error_or_equal('<img/>', src)
448 # Protocol encoded using UTF-8 numeric entities 457 # Protocol encoded using UTF-8 numeric entities
449 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;' 458 html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
450 '&#112;&#116;&#58;alert("foo")\'>') 459 '&#112;&#116;&#58;alert("foo")\'>')
451 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 460 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
452 # Protocol encoded using UTF-8 numeric entities without a semicolon 461 # Protocol encoded using UTF-8 numeric entities without a semicolon
Copyright (C) 2012-2017 Edgewall Software