# HG changeset patch # User cmlenz # Date 1183494628 0 # Node ID b590da4a45e887d1b4e537eac1b0412a935a7981 # Parent 0c4e30a3cac18996183d2b476c9d23a0ef70265d Ported [667] to 0.4.x branch. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,8 @@ it is not available for use through configuration files. * The I18n filter now extracts messages from gettext functions even inside ignored tags (ticket #132). + * The HTML sanitizer now strips any CSS comments in style attributes, which + could previously be used to hide malicious property values. Version 0.4.2 diff --git a/genshi/filters/html.py b/genshi/filters/html.py --- a/genshi/filters/html.py +++ b/genshi/filters/html.py @@ -285,7 +285,9 @@ elif attr == 'style': # Remove dangerous CSS declarations from inline styles decls = [] - value = self._replace_unicode_escapes(value) + value = self._strip_css_comments( + self._replace_unicode_escapes(value) + ) for decl in filter(None, value.split(';')): is_evil = False if 'expression' in decl: @@ -322,3 +324,8 @@ def _repl(match): return unichr(int(match.group(1), 16)) return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) + + _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub + + def _strip_css_comments(self, text): + return self._CSS_COMMENTS('', text) diff --git a/genshi/filters/tests/html.py b/genshi/filters/tests/html.py --- a/genshi/filters/tests/html.py +++ b/genshi/filters/tests/html.py @@ -332,6 +332,8 @@ # IE expressions in CSS not allowed html = HTML('
') self.assertEquals(u'
', unicode(html | sanitizer)) + html = HTML('
') + self.assertEquals(u'
', unicode(html | sanitizer)) html = HTML('
') self.assertEquals(u'
',