cmlenz@113: # -*- coding: utf-8 -*- cmlenz@113: # cmlenz@113: # Copyright (C) 2006 Edgewall Software cmlenz@113: # All rights reserved. cmlenz@113: # cmlenz@113: # This software is licensed as described in the file COPYING, which cmlenz@113: # you should have received as part of this distribution. The terms cmlenz@230: # are also available at http://genshi.edgewall.org/wiki/License. cmlenz@113: # cmlenz@113: # This software consists of voluntary contributions made by many cmlenz@113: # individuals. For the exact contribution history, see the revision cmlenz@230: # history and logs, available at http://genshi.edgewall.org/log/. cmlenz@113: cmlenz@113: import doctest cmlenz@113: import unittest cmlenz@113: cmlenz@275: from genshi import filters cmlenz@230: from genshi.input import HTML, ParseError cmlenz@275: from genshi.filters import HTMLFormFiller, HTMLSanitizer cmlenz@113: cmlenz@113: cmlenz@275: class HTMLFormFillerTestCase(unittest.TestCase): cmlenz@275: cmlenz@275: def test_fill_input_text_no_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_text_single_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': 'bar'}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_text_multi_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': ['bar']}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_hidden_no_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_hidden_single_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': 'bar'}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_hidden_multi_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': ['bar']}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_textarea_no_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': 'bar'}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_textarea_multi_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': ['bar']}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_checkbox_no_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_checkbox_single_value_auto(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ''}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': 'on'}))) cmlenz@275: cmlenz@275: def test_fill_input_checkbox_single_value_defined(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': '1'}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': '2'}))) cmlenz@275: cmlenz@275: def test_fill_input_checkbox_multi_value_auto(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': []}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ['on']}))) cmlenz@275: cmlenz@275: def test_fill_input_checkbox_multi_value_defined(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ['1']}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ['2']}))) cmlenz@275: cmlenz@275: def test_fill_input_radio_no_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_input_radio_single_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': '1'}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': '2'}))) cmlenz@275: cmlenz@275: def test_fill_input_radio_multi_value(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ['1']}))) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html | HTMLFormFiller(data={'foo': ['2']}))) cmlenz@275: cmlenz@275: def test_fill_select_no_value_auto(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_select_no_value_defined(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller() cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_select_single_value_auto(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': '1'}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_select_single_value_defined(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': '1'}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_select_multi_value_auto(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': ['1', '3']}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: def test_fill_select_multi_value_defined(self): cmlenz@275: html = HTML("""

cmlenz@275: cmlenz@275:

""") | HTMLFormFiller(data={'foo': ['1', '3']}) cmlenz@275: self.assertEquals("""

cmlenz@275: cmlenz@275:

""", unicode(html)) cmlenz@275: cmlenz@275: cmlenz@113: class HTMLSanitizerTestCase(unittest.TestCase): cmlenz@113: cmlenz@113: def test_sanitize_unchanged(self): cmlenz@113: html = HTML('fo
o
') cmlenz@144: self.assertEquals(u'fo
o
', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_escape_text(self): cmlenz@113: html = HTML('fo&') cmlenz@144: self.assertEquals(u'fo&', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: html = HTML('<foo>') cmlenz@144: self.assertEquals(u'<foo>', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_entityref_text(self): cmlenz@113: html = HTML('foö') cmlenz@144: self.assertEquals(u'foƶ', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_escape_attr(self): cmlenz@113: html = HTML('
') cmlenz@144: self.assertEquals(u'
', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_close_empty_tag(self): cmlenz@113: html = HTML('fo
o
') cmlenz@144: self.assertEquals(u'fo
o
', cmlenz@204: unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_invalid_entity(self): cmlenz@113: html = HTML('&junk;') cmlenz@204: self.assertEquals('&junk;', unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_remove_script_elem(self): cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: self.assertRaises(ParseError, HTML, 'alert("foo")') cmlenz@113: self.assertRaises(ParseError, HTML, cmlenz@113: '') cmlenz@113: cmlenz@113: def test_sanitize_remove_onclick_attr(self): cmlenz@113: html = HTML('
') cmlenz@204: self.assertEquals(u'
', unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: def test_sanitize_remove_style_scripts(self): cmlenz@431: sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) cmlenz@113: # Inline style with url() using javascript: scheme cmlenz@113: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@113: # Inline style with url() using javascript: scheme, using control char cmlenz@113: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@113: # Inline style with url() using javascript: scheme, in quotes cmlenz@113: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@113: # IE expressions in CSS not allowed cmlenz@113: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@113: html = HTML('
') cmlenz@144: self.assertEquals(u'
', cmlenz@431: unicode(html | sanitizer)) cmlenz@431: # Inline style with url() using javascript: scheme, using unicode cmlenz@431: # escapes cmlenz@431: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@431: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@431: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@431: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@431: html = HTML('
') cmlenz@431: self.assertEquals(u'
', unicode(html | sanitizer)) cmlenz@113: cmlenz@113: def test_sanitize_remove_src_javascript(self): cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Case-insensitive protocol matching cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Grave accents (not parsed) cmlenz@113: self.assertRaises(ParseError, HTML, cmlenz@113: '') cmlenz@113: # Protocol encoded using UTF-8 numeric entities cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Protocol encoded using UTF-8 numeric entities without a semicolon cmlenz@113: # (which is allowed because the max number of digits is used) cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Protocol encoded using UTF-8 numeric hex entities without a semicolon cmlenz@113: # (which is allowed because the max number of digits is used) cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Embedded tab character in protocol cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: # Embedded tab character in protocol, but encoded this time cmlenz@113: html = HTML('') cmlenz@204: self.assertEquals(u'', unicode(html | HTMLSanitizer())) cmlenz@113: cmlenz@113: cmlenz@113: def suite(): cmlenz@113: suite = unittest.TestSuite() cmlenz@275: suite.addTest(doctest.DocTestSuite(filters)) cmlenz@275: suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test')) cmlenz@113: suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) cmlenz@113: return suite cmlenz@113: cmlenz@113: if __name__ == '__main__': cmlenz@113: unittest.main(defaultTest='suite')