Mercurial > genshi > mirror
changeset 949:8bc6f32fdd45 trunk
Improve sanitizing of CSS in style attributes (note that the Genshi documentation already warns users that enabling the style attribute is dangerous -- now it is slightly less dangerous). Fixes #455. Patch taken from jomae's Trac commit trac:r10788 and modified for Genshi -- thanks!
author | hodgestar |
---|---|
date | Fri, 02 Sep 2011 22:01:42 +0000 |
parents | 0f9fe59dfa00 |
children | 981f3fc8c3ed |
files | genshi/filters/html.py genshi/filters/tests/test_html.py |
diffstat | 2 files changed, 195 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/genshi/filters/html.py +++ b/genshi/filters/html.py @@ -230,7 +230,7 @@ :warn: Note that this special processing of CSS is currently only applied to style attributes, **not** style elements. - """ + " """ SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', @@ -254,13 +254,42 @@ 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) + SAFE_CSS = frozenset([ + # CSS 3 properties <http://www.w3.org/TR/CSS/#properties> + 'background', 'background-attachment', 'background-color', + 'background-image', 'background-position', 'background-repeat', + 'border', 'border-bottom', 'border-bottom-color', + 'border-bottom-style', 'border-bottom-width', 'border-collapse', + 'border-color', 'border-left', 'border-left-color', + 'border-left-style', 'border-left-width', 'border-right', + 'border-right-color', 'border-right-style', 'border-right-width', + 'border-spacing', 'border-style', 'border-top', 'border-top-color', + 'border-top-style', 'border-top-width', 'border-width', 'bottom', + 'caption-side', 'clear', 'clip', 'color', 'content', + 'counter-increment', 'counter-reset', 'cursor', 'direction', 'display', + 'empty-cells', 'float', 'font', 'font-family', 'font-size', + 'font-style', 'font-variant', 'font-weight', 'height', 'left', + 'letter-spacing', 'line-height', 'list-style', 'list-style-image', + 'list-style-position', 'list-style-type', 'margin', 'margin-bottom', + 'margin-left', 'margin-right', 'margin-top', 'max-height', 'max-width', + 'min-height', 'min-width', 'opacity', 'orphans', 'outline', + 'outline-color', 'outline-style', 'outline-width', 'overflow', + 'padding', 'padding-bottom', 'padding-left', 'padding-right', + 'padding-top', 'page-break-after', 'page-break-before', + 'page-break-inside', 'quotes', 'right', 'table-layout', + 'text-align', 'text-decoration', 'text-indent', 'text-transform', + 'top', 'unicode-bidi', 'vertical-align', 'visibility', 'white-space', + 'widows', 'width', 'word-spacing', 'z-index', + ]) + SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', 'src']) def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, - safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): + safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS, + safe_css=SAFE_CSS): """Create the sanitizer. The exact set of allowed elements and attributes can be configured. @@ -271,13 +300,63 @@ :param uri_attrs: a set of names of attributes that contain URIs """ self.safe_tags = safe_tags - "The set of tag names that are considered safe." + # The set of tag names that are considered safe. self.safe_attrs = safe_attrs - "The set of attribute names that are considered safe." + # The set of attribute names that are considered safe. + self.safe_css = safe_css + # The set of CSS properties that are considered safe. self.uri_attrs = uri_attrs - "The set of names of attributes that may contain URIs." + # The set of names of attributes that may contain URIs. self.safe_schemes = safe_schemes - "The set of URI schemes that are considered safe." + # The set of URI schemes that are considered safe. + + # IE6 <http://heideri.ch/jso/#80> + _EXPRESSION_SEARCH = re.compile(u""" + [eE + \uFF25 # FULLWIDTH LATIN CAPITAL LETTER E + \uFF45 # FULLWIDTH LATIN SMALL LETTER E + ] + [xX + \uFF38 # FULLWIDTH LATIN CAPITAL LETTER X + \uFF58 # FULLWIDTH LATIN SMALL LETTER X + ] + [pP + \uFF30 # FULLWIDTH LATIN CAPITAL LETTER P + \uFF50 # FULLWIDTH LATIN SMALL LETTER P + ] + [rR + \u0280 # LATIN LETTER SMALL CAPITAL R + \uFF32 # FULLWIDTH LATIN CAPITAL LETTER R + \uFF52 # FULLWIDTH LATIN SMALL LETTER R + ] + [eE + \uFF25 # FULLWIDTH LATIN CAPITAL LETTER E + \uFF45 # FULLWIDTH LATIN SMALL LETTER E + ] + [sS + \uFF33 # FULLWIDTH LATIN CAPITAL LETTER S + \uFF53 # FULLWIDTH LATIN SMALL LETTER S + ]{2} + [iI + \u026A # LATIN LETTER SMALL CAPITAL I + \uFF29 # FULLWIDTH LATIN CAPITAL LETTER I + \uFF49 # FULLWIDTH LATIN SMALL LETTER I + ] + [oO + \uFF2F # FULLWIDTH LATIN CAPITAL LETTER O + \uFF4F # FULLWIDTH LATIN SMALL LETTER O + ] + [nN + \u0274 # LATIN LETTER SMALL CAPITAL N + \uFF2E # FULLWIDTH LATIN CAPITAL LETTER N + \uFF4E # FULLWIDTH LATIN SMALL LETTER N + ] + """, re.VERBOSE).search + + # IE6 <http://openmya.hacker.jp/hasegawa/security/expression.txt> + # 7) Particular bit of Unicode characters + _URL_FINDITER = re.compile( + u'[Uu][Rr\u0280][Ll\u029F]\s*\(([^)]+)').finditer def __call__(self, stream): """Apply the filter to the given stream. @@ -336,7 +415,7 @@ :rtype: bool :since: version 0.6 """ - if propname == 'position': + if propname not in self.safe_css: return False if propname.startswith('margin') and '-' in value: # Negative margins can be used for phishing @@ -430,9 +509,9 @@ if not self.is_safe_css(propname.strip().lower(), value.strip()): continue is_evil = False - if 'expression' in value: + if self._EXPRESSION_SEARCH(value): is_evil = True - for match in re.finditer(r'url\s*\(([^)]+)', value): + for match in self._URL_FINDITER(value): if not self.is_safe_uri(match.group(1)): is_evil = True break @@ -441,11 +520,20 @@ return decls _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub - _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub + _UNICODE_ESCAPE = re.compile( + r"""\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'"{};:()#*])""", + re.UNICODE).sub def _replace_unicode_escapes(self, text): def _repl(match): - return unichr(int(match.group(1), 16)) + t = match.group(1) + if t: + return unichr(int(t, 16)) + t = match.group(2) + if t == '\\': + return r'\\' + else: + return t return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
--- a/genshi/filters/tests/test_html.py +++ b/genshi/filters/tests/test_html.py @@ -361,6 +361,11 @@ </p></form>""", html.render()) +def StyleSanitizer(): + safe_attrs = HTMLSanitizer.SAFE_ATTRS | frozenset(['style']) + return HTMLSanitizer(safe_attrs=safe_attrs) + + class HTMLSanitizerTestCase(unittest.TestCase): def test_sanitize_unchanged(self): @@ -420,7 +425,7 @@ self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) def test_sanitize_remove_style_scripts(self): - sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + sanitizer = StyleSanitizer() # Inline style with url() using javascript: scheme html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) @@ -453,7 +458,7 @@ self.assertEquals('<div/>', (html | sanitizer).render()) def test_sanitize_remove_style_phishing(self): - sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + sanitizer = StyleSanitizer() # The position property is not allowed html = HTML(u'<div style="position:absolute;top:0"></div>') self.assertEquals('<div style="top:0"/>', (html | sanitizer).render()) @@ -500,6 +505,95 @@ html = HTML(u'<IMG SRC=\'jav	ascript:alert("foo");\'>') self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + def test_sanitize_expression(self): + html = HTML(ur'<div style="top:expression(alert())">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_capital_expression(self): + html = HTML(ur'<div style="top:EXPRESSION(alert())">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_url_with_javascript(self): + html = HTML(u'<div style="background-image:url(javascript:alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_capital_url_with_javascript(self): + html = HTML(u'<div style="background-image:URL(javascript:alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_unicode_escapes(self): + html = HTML(ur'<div style="top:exp\72 ess\000069 on(alert())">' + ur'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_backslash_without_hex(self): + html = HTML(ur'<div style="top:e\xp\ression(alert())">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + html = HTML(ur'<div style="top:e\\xp\\ression(alert())">XSS</div>') + self.assertEqual(r'<div style="top:e\\xp\\ression(alert())">' + 'XSS</div>', + unicode(html | StyleSanitizer())) + + def test_sanitize_unsafe_props(self): + html = HTML(u'<div style="POSITION:RELATIVE">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + html = HTML(u'<div style="behavior:url(test.htc)">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + html = HTML(u'<div style="-ms-behavior:url(test.htc) url(#obj)">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + html = HTML(u"""<div style="-o-link:'javascript:alert(1)';""" + u"""-o-link-source:current">XSS</div>""") + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + html = HTML(u"""<div style="-moz-binding:url(xss.xbl)">XSS</div>""") + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_negative_margin(self): + html = HTML(u'<div style="margin-top:-9999px">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + html = HTML(u'<div style="margin:0 -9999px">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_css_hack(self): + html = HTML(u'<div style="*position:static">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + html = HTML(u'<div style="_margin:-10px">XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_property_name(self): + html = HTML(u'<div style="display:none;border-left-color:red;' + u'user_defined:1;-moz-user-selct:-moz-all">prop</div>') + self.assertEqual('<div style="display:none; border-left-color:red' + '">prop</div>', + unicode(html | StyleSanitizer())) + + def test_sanitize_unicode_expression(self): + # Fullwidth small letters + html = HTML(u'<div style="top:expression(alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + # Fullwidth capital letters + html = HTML(u'<div style="top:EXPRESSION(alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + # IPA extensions + html = HTML(u'<div style="top:expʀessɪoɴ(alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + + def test_sanitize_unicode_url(self): + # IPA extensions + html = HTML(u'<div style="background-image:uʀʟ(javascript:alert())">' + u'XSS</div>') + self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) + def suite(): suite = unittest.TestSuite()