Mercurial > genshi > genshi-test
changeset 840:878306a5b465
Ported some of the HTML sanitization improvements from Trac (see [T7658]).
author | cmlenz |
---|---|
date | Tue, 17 Mar 2009 17:50:50 +0000 |
parents | 80bce2f6f9fe |
children | 67ec9a402bdc |
files | ChangeLog genshi/filters/html.py genshi/filters/tests/html.py |
diffstat | 3 files changed, 72 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -12,6 +12,7 @@ with Google App Engine. This, too, is the result of integrating work done by Marcin Kurczych during GSoC 2008. * Added caching in the serialization stage for improved performance. + * Various improvements to the HTML sanitization filter. Version 0.5.2
--- a/genshi/filters/html.py +++ b/genshi/filters/html.py @@ -211,6 +211,10 @@ default because it is very hard for such sanitizing to be completely safe, especially considering how much error recovery current web browsers perform. + It also does some basic filtering of CSS properties that may be used for + typical phishing attacks. For more sophisticated filtering, this class + provides a couple of hooks that can be overridden in sub-classes. + :warn: Note that this special processing of CSS is currently only applied to style attributes, **not** style elements. """ @@ -274,7 +278,7 @@ if waiting_for: continue tag, attrs = data - if tag not in self.safe_tags: + if not self.is_safe_elem(tag, attrs): waiting_for = tag continue @@ -309,6 +313,43 @@ if not waiting_for: yield kind, data, pos + def is_safe_css(self, propname, value): + """Determine whether the given css property declaration is to be + considered safe for inclusion in the output. + + :param propname: the CSS property name + :param value: the value of the property + :return: whether the property value should be considered safe + :rtype: bool + :since: version 0.6 + """ + if propname == 'position': + return False + if propname.startswith('margin') and '-' in value: + # Negative margins can be used for phishing + return False + return True + + def is_safe_elem(self, tag, attrs): + """Determine whether the given element should be considered safe for + inclusion in the output. + + :param tag: the tag name of the element + :type tag: QName + :param attrs: the element attributes + :type attrs: Attrs + :return: whether the element should be considered safe + :rtype: bool + :since: version 0.6 + """ + if tag not in self.safe_tags: + return False + if tag.localname == 'input': + input_type = attrs.get('type', '').lower() + if input_type == 'password': + return False + return True + def is_safe_uri(self, uri): """Determine whether the given URI is to be considered safe for inclusion in the output. @@ -369,10 +410,16 @@ decl = decl.strip() if not decl: continue + try: + propname, value = decl.split(':', 1) + except ValueError: + continue + if not self.is_safe_css(propname.strip().lower(), value.strip()): + continue is_evil = False - if 'expression' in decl: + if 'expression' in value: is_evil = True - for match in re.finditer(r'url\s*\(([^)]+)', decl): + for match in re.finditer(r'url\s*\(([^)]+)', value): if not self.is_safe_uri(match.group(1)): is_evil = True break
--- a/genshi/filters/tests/html.py +++ b/genshi/filters/tests/html.py @@ -357,6 +357,10 @@ html = HTML('<div onclick=\'alert("foo")\' />') self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer())) + def test_sanitize_remove_input_password(self): + html = HTML('<form><input type="password" /></form>') + self.assertEquals(u'<form/>', unicode(html | HTMLSanitizer())) + def test_sanitize_remove_comments(self): html = HTML('''<div><!-- conditional comment crap --></div>''') self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer())) @@ -394,6 +398,22 @@ html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') self.assertEquals(u'<div/>', unicode(html | sanitizer)) + def test_sanitize_remove_style_phishing(self): + sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + # The position property is not allowed + html = HTML('<div style="position:absolute;top:0"></div>') + self.assertEquals(u'<div style="top:0"/>', unicode(html | sanitizer)) + # Normal margins get passed through + html = HTML('<div style="margin:10px 20px"></div>') + self.assertEquals(u'<div style="margin:10px 20px"/>', unicode(html | sanitizer)) + # But not negative margins + html = HTML('<div style="margin:-1000px 0 0"></div>') + self.assertEquals(u'<div/>', unicode(html | sanitizer)) + html = HTML('<div style="margin-left:-2000px 0 0"></div>') + self.assertEquals(u'<div/>', unicode(html | sanitizer)) + html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>') + self.assertEquals(u'<div/>', unicode(html | sanitizer)) + def test_sanitize_remove_src_javascript(self): html = HTML('<img src=\'javascript:alert("foo")\'>') self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer())) @@ -433,5 +453,6 @@ suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) return suite + if __name__ == '__main__': unittest.main(defaultTest='suite')