genshi/genshi-test: markup/filters.py annotate

annotate markup/filters.py @ 15:f083101b8e8a

Port HTML sanitizer fix from trac:changeset:3417.

author	cmlenz
date	Fri, 16 Jun 2006 09:55:48 +0000
parents	76b5d4b189e6
children	ad63ad459524

rev	line source
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	1 # -- coding: utf-8 --
821114ec4f69 Initial import. cmlenz parents: diff changeset	2 #
821114ec4f69 Initial import. cmlenz parents: diff changeset	3 # Copyright (C) 2006 Christopher Lenz
821114ec4f69 Initial import. cmlenz parents: diff changeset	4 # All rights reserved.
821114ec4f69 Initial import. cmlenz parents: diff changeset	5 #
821114ec4f69 Initial import. cmlenz parents: diff changeset	6 # This software is licensed as described in the file COPYING, which
821114ec4f69 Initial import. cmlenz parents: diff changeset	7 # you should have received as part of this distribution. The terms
821114ec4f69 Initial import. cmlenz parents: diff changeset	8 # are also available at http://trac.edgewall.com/license.html.
821114ec4f69 Initial import. cmlenz parents: diff changeset	9 #
821114ec4f69 Initial import. cmlenz parents: diff changeset	10 # This software consists of voluntary contributions made by many
821114ec4f69 Initial import. cmlenz parents: diff changeset	11 # individuals. For the exact contribution history, see the revision
821114ec4f69 Initial import. cmlenz parents: diff changeset	12 # history and logs, available at http://projects.edgewall.com/trac/.
821114ec4f69 Initial import. cmlenz parents: diff changeset	13
821114ec4f69 Initial import. cmlenz parents: diff changeset	14 """Implementation of a number of stream filters."""
821114ec4f69 Initial import. cmlenz parents: diff changeset	15
821114ec4f69 Initial import. cmlenz parents: diff changeset	16 try:
821114ec4f69 Initial import. cmlenz parents: diff changeset	17 frozenset
821114ec4f69 Initial import. cmlenz parents: diff changeset	18 except NameError:
821114ec4f69 Initial import. cmlenz parents: diff changeset	19 from sets import ImmutableSet as frozenset
821114ec4f69 Initial import. cmlenz parents: diff changeset	20 import re
821114ec4f69 Initial import. cmlenz parents: diff changeset	21
821114ec4f69 Initial import. cmlenz parents: diff changeset	22 from markup.core import Attributes, Markup, Stream
821114ec4f69 Initial import. cmlenz parents: diff changeset	23 from markup.path import Path
821114ec4f69 Initial import. cmlenz parents: diff changeset	24
14 76b5d4b189e6 The `<py:match>` directive now protects itself against simple infinite recursion (see MatchDirective), while still allowing recursion in general. cmlenz parents: 13 diff changeset	25 __all__ = ['EvalFilter', 'IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer']
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	26
821114ec4f69 Initial import. cmlenz parents: diff changeset	27
821114ec4f69 Initial import. cmlenz parents: diff changeset	28 class EvalFilter(object):
821114ec4f69 Initial import. cmlenz parents: diff changeset	29 """Responsible for evaluating expressions in a template."""
821114ec4f69 Initial import. cmlenz parents: diff changeset	30
821114ec4f69 Initial import. cmlenz parents: diff changeset	31 def __call__(self, stream, ctxt=None):
10 c5890ef863ba Moved the template-specific stream event kinds into the template module. cmlenz parents: 1 diff changeset	32 from markup.template import Template
c5890ef863ba Moved the template-specific stream event kinds into the template module. cmlenz parents: 1 diff changeset	33
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	34 for kind, data, pos in stream:
821114ec4f69 Initial import. cmlenz parents: diff changeset	35
821114ec4f69 Initial import. cmlenz parents: diff changeset	36 if kind is Stream.START:
821114ec4f69 Initial import. cmlenz parents: diff changeset	37 # Attributes may still contain expressions in start tags at
821114ec4f69 Initial import. cmlenz parents: diff changeset	38 # this point, so do some evaluation
821114ec4f69 Initial import. cmlenz parents: diff changeset	39 tag, attrib = data
821114ec4f69 Initial import. cmlenz parents: diff changeset	40 new_attrib = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	41 for name, substream in attrib:
821114ec4f69 Initial import. cmlenz parents: diff changeset	42 if isinstance(substream, basestring):
821114ec4f69 Initial import. cmlenz parents: diff changeset	43 value = substream
821114ec4f69 Initial import. cmlenz parents: diff changeset	44 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	45 values = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	46 for subkind, subdata, subpos in substream:
10 c5890ef863ba Moved the template-specific stream event kinds into the template module. cmlenz parents: 1 diff changeset	47 if subkind is Template.EXPR:
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	48 values.append(subdata.evaluate(ctxt))
821114ec4f69 Initial import. cmlenz parents: diff changeset	49 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	50 values.append(subdata)
821114ec4f69 Initial import. cmlenz parents: diff changeset	51 value = filter(lambda x: x is not None, values)
821114ec4f69 Initial import. cmlenz parents: diff changeset	52 if not value:
821114ec4f69 Initial import. cmlenz parents: diff changeset	53 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	54 new_attrib.append((name, ''.join(value)))
821114ec4f69 Initial import. cmlenz parents: diff changeset	55 yield kind, (tag, Attributes(new_attrib)), pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	56
10 c5890ef863ba Moved the template-specific stream event kinds into the template module. cmlenz parents: 1 diff changeset	57 elif kind is Template.EXPR:
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	58 result = data.evaluate(ctxt)
821114ec4f69 Initial import. cmlenz parents: diff changeset	59 if result is None:
821114ec4f69 Initial import. cmlenz parents: diff changeset	60 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	61
821114ec4f69 Initial import. cmlenz parents: diff changeset	62 # First check for a string, otherwise the iterable
821114ec4f69 Initial import. cmlenz parents: diff changeset	63 # test below succeeds, and the string will be
821114ec4f69 Initial import. cmlenz parents: diff changeset	64 # chopped up into characters
821114ec4f69 Initial import. cmlenz parents: diff changeset	65 if isinstance(result, basestring):
821114ec4f69 Initial import. cmlenz parents: diff changeset	66 yield Stream.TEXT, result, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	67 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	68 # Test if the expression evaluated to an
821114ec4f69 Initial import. cmlenz parents: diff changeset	69 # iterable, in which case we yield the
821114ec4f69 Initial import. cmlenz parents: diff changeset	70 # individual items
821114ec4f69 Initial import. cmlenz parents: diff changeset	71 try:
10 c5890ef863ba Moved the template-specific stream event kinds into the template module. cmlenz parents: 1 diff changeset	72 yield Template.SUB, ([], iter(result)), pos
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	73 except TypeError:
821114ec4f69 Initial import. cmlenz parents: diff changeset	74 # Neither a string nor an iterable, so just
821114ec4f69 Initial import. cmlenz parents: diff changeset	75 # pass it through
821114ec4f69 Initial import. cmlenz parents: diff changeset	76 yield Stream.TEXT, unicode(result), pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	77
821114ec4f69 Initial import. cmlenz parents: diff changeset	78 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	79 yield kind, data, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	80
821114ec4f69 Initial import. cmlenz parents: diff changeset	81
821114ec4f69 Initial import. cmlenz parents: diff changeset	82 class IncludeFilter(object):
821114ec4f69 Initial import. cmlenz parents: diff changeset	83 """Template filter providing (very) basic XInclude support
821114ec4f69 Initial import. cmlenz parents: diff changeset	84 (see http://www.w3.org/TR/xinclude/) in templates.
821114ec4f69 Initial import. cmlenz parents: diff changeset	85 """
821114ec4f69 Initial import. cmlenz parents: diff changeset	86
821114ec4f69 Initial import. cmlenz parents: diff changeset	87 _NAMESPACE = 'http://www.w3.org/2001/XInclude'
821114ec4f69 Initial import. cmlenz parents: diff changeset	88
13 bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	89 def __init__(self, loader, template):
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	90 """Initialize the filter.
821114ec4f69 Initial import. cmlenz parents: diff changeset	91
821114ec4f69 Initial import. cmlenz parents: diff changeset	92 @param loader: the `TemplateLoader` to use for resolving references to
821114ec4f69 Initial import. cmlenz parents: diff changeset	93 external template files
821114ec4f69 Initial import. cmlenz parents: diff changeset	94 """
821114ec4f69 Initial import. cmlenz parents: diff changeset	95 self.loader = loader
13 bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	96 self.template = template
bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	97 if not hasattr(template, '_included_filters'):
bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	98 template._included_filters = []
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	99
12 87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	100 def __call__(self, stream, ctxt=None, ns_prefixes=None):
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	101 """Filter the stream, processing any XInclude directives it may
821114ec4f69 Initial import. cmlenz parents: diff changeset	102 contain.
821114ec4f69 Initial import. cmlenz parents: diff changeset	103
821114ec4f69 Initial import. cmlenz parents: diff changeset	104 @param ctxt: the template context
821114ec4f69 Initial import. cmlenz parents: diff changeset	105 @param stream: the markup event stream to filter
821114ec4f69 Initial import. cmlenz parents: diff changeset	106 """
12 87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	107 from markup.template import Template, TemplateError, TemplateNotFound
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	108
12 87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	109 if ns_prefixes is None:
87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	110 ns_prefixes = []
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	111 in_fallback = False
821114ec4f69 Initial import. cmlenz parents: diff changeset	112 include_href, fallback_stream = None, None
821114ec4f69 Initial import. cmlenz parents: diff changeset	113 indent = 0
821114ec4f69 Initial import. cmlenz parents: diff changeset	114
821114ec4f69 Initial import. cmlenz parents: diff changeset	115 for kind, data, pos in stream:
821114ec4f69 Initial import. cmlenz parents: diff changeset	116
821114ec4f69 Initial import. cmlenz parents: diff changeset	117 if kind is Stream.START and data[0].namespace == self._NAMESPACE \
821114ec4f69 Initial import. cmlenz parents: diff changeset	118 and not in_fallback:
821114ec4f69 Initial import. cmlenz parents: diff changeset	119 tag, attrib = data
821114ec4f69 Initial import. cmlenz parents: diff changeset	120 if tag.localname == 'include':
821114ec4f69 Initial import. cmlenz parents: diff changeset	121 include_href = attrib.get('href')
821114ec4f69 Initial import. cmlenz parents: diff changeset	122 indent = pos[1]
821114ec4f69 Initial import. cmlenz parents: diff changeset	123 elif tag.localname == 'fallback':
821114ec4f69 Initial import. cmlenz parents: diff changeset	124 in_fallback = True
821114ec4f69 Initial import. cmlenz parents: diff changeset	125 fallback_stream = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	126
821114ec4f69 Initial import. cmlenz parents: diff changeset	127 elif kind is Stream.END and data.namespace == self._NAMESPACE:
821114ec4f69 Initial import. cmlenz parents: diff changeset	128 if data.localname == 'include':
821114ec4f69 Initial import. cmlenz parents: diff changeset	129 try:
821114ec4f69 Initial import. cmlenz parents: diff changeset	130 if not include_href:
821114ec4f69 Initial import. cmlenz parents: diff changeset	131 raise TemplateError('Include misses required '
821114ec4f69 Initial import. cmlenz parents: diff changeset	132 'attribute "href"')
821114ec4f69 Initial import. cmlenz parents: diff changeset	133 template = self.loader.load(include_href)
821114ec4f69 Initial import. cmlenz parents: diff changeset	134 for ikind, idata, ipos in template.generate(ctxt):
821114ec4f69 Initial import. cmlenz parents: diff changeset	135 # Fixup indentation of included markup
821114ec4f69 Initial import. cmlenz parents: diff changeset	136 if ikind is Stream.TEXT:
821114ec4f69 Initial import. cmlenz parents: diff changeset	137 idata = idata.replace('\n', '\n' + ' ' * indent)
821114ec4f69 Initial import. cmlenz parents: diff changeset	138 yield ikind, idata, ipos
821114ec4f69 Initial import. cmlenz parents: diff changeset	139
821114ec4f69 Initial import. cmlenz parents: diff changeset	140 # If the included template defines any filters added at
821114ec4f69 Initial import. cmlenz parents: diff changeset	141 # runtime (such as py:match templates), those need to be
821114ec4f69 Initial import. cmlenz parents: diff changeset	142 # applied to the including template, too.
14 76b5d4b189e6 The `<py:match>` directive now protects itself against simple infinite recursion (see MatchDirective), while still allowing recursion in general. cmlenz parents: 13 diff changeset	143 filters = template._included_filters + template.filters
13 bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	144 for filter_ in filters:
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	145 stream = filter_(stream, ctxt)
821114ec4f69 Initial import. cmlenz parents: diff changeset	146
13 bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	147 # Runtime filters included need to be propagated up
bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	148 self.template._included_filters += filters
bf9de5a4c896 Match directives should now also be applied when included indirectly. cmlenz parents: 12 diff changeset	149
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	150 except TemplateNotFound:
821114ec4f69 Initial import. cmlenz parents: diff changeset	151 if fallback_stream is None:
821114ec4f69 Initial import. cmlenz parents: diff changeset	152 raise
821114ec4f69 Initial import. cmlenz parents: diff changeset	153 for event in fallback_stream:
821114ec4f69 Initial import. cmlenz parents: diff changeset	154 yield event
821114ec4f69 Initial import. cmlenz parents: diff changeset	155
821114ec4f69 Initial import. cmlenz parents: diff changeset	156 include_href = None
821114ec4f69 Initial import. cmlenz parents: diff changeset	157 fallback_stream = None
821114ec4f69 Initial import. cmlenz parents: diff changeset	158 indent = 0
821114ec4f69 Initial import. cmlenz parents: diff changeset	159 break
821114ec4f69 Initial import. cmlenz parents: diff changeset	160 elif data.localname == 'fallback':
821114ec4f69 Initial import. cmlenz parents: diff changeset	161 in_fallback = False
821114ec4f69 Initial import. cmlenz parents: diff changeset	162
821114ec4f69 Initial import. cmlenz parents: diff changeset	163 elif in_fallback:
821114ec4f69 Initial import. cmlenz parents: diff changeset	164 fallback_stream.append((kind, data, pos))
821114ec4f69 Initial import. cmlenz parents: diff changeset	165
821114ec4f69 Initial import. cmlenz parents: diff changeset	166 elif kind is Stream.START_NS and data[1] == self._NAMESPACE:
12 87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	167 ns_prefixes.append(data[0])
87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	168 continue
87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	169
87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	170 elif kind is Stream.END_NS and data in ns_prefixes:
87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	171 ns_prefixes.pop()
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	172 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	173
821114ec4f69 Initial import. cmlenz parents: diff changeset	174 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	175 yield kind, data, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	176 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	177 # The loop exited normally, so there shouldn't be further events to
821114ec4f69 Initial import. cmlenz parents: diff changeset	178 # process
821114ec4f69 Initial import. cmlenz parents: diff changeset	179 return
821114ec4f69 Initial import. cmlenz parents: diff changeset	180
12 87238328a71d Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace. cmlenz parents: 10 diff changeset	181 for event in self(stream, ctxt, ns_prefixes=ns_prefixes):
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	182 yield event
821114ec4f69 Initial import. cmlenz parents: diff changeset	183
821114ec4f69 Initial import. cmlenz parents: diff changeset	184
821114ec4f69 Initial import. cmlenz parents: diff changeset	185 class WhitespaceFilter(object):
821114ec4f69 Initial import. cmlenz parents: diff changeset	186 """A filter that removes extraneous white space from the stream.
821114ec4f69 Initial import. cmlenz parents: diff changeset	187
821114ec4f69 Initial import. cmlenz parents: diff changeset	188 Todo:
821114ec4f69 Initial import. cmlenz parents: diff changeset	189 * Support for xml:space
821114ec4f69 Initial import. cmlenz parents: diff changeset	190 """
821114ec4f69 Initial import. cmlenz parents: diff changeset	191
821114ec4f69 Initial import. cmlenz parents: diff changeset	192 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
821114ec4f69 Initial import. cmlenz parents: diff changeset	193 _LINE_COLLAPSE = re.compile('\n{2,}')
821114ec4f69 Initial import. cmlenz parents: diff changeset	194
821114ec4f69 Initial import. cmlenz parents: diff changeset	195 def __call__(self, stream, ctxt=None):
821114ec4f69 Initial import. cmlenz parents: diff changeset	196 textbuf = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	197 prev_kind = None
821114ec4f69 Initial import. cmlenz parents: diff changeset	198 for kind, data, pos in stream:
821114ec4f69 Initial import. cmlenz parents: diff changeset	199 if kind is Stream.TEXT:
821114ec4f69 Initial import. cmlenz parents: diff changeset	200 textbuf.append(data)
821114ec4f69 Initial import. cmlenz parents: diff changeset	201 elif prev_kind is Stream.TEXT:
821114ec4f69 Initial import. cmlenz parents: diff changeset	202 text = ''.join(textbuf)
821114ec4f69 Initial import. cmlenz parents: diff changeset	203 text = self._TRAILING_SPACE.sub('', text)
821114ec4f69 Initial import. cmlenz parents: diff changeset	204 text = self._LINE_COLLAPSE.sub('\n', text)
821114ec4f69 Initial import. cmlenz parents: diff changeset	205 yield Stream.TEXT, text, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	206 del textbuf[:]
821114ec4f69 Initial import. cmlenz parents: diff changeset	207 prev_kind = kind
821114ec4f69 Initial import. cmlenz parents: diff changeset	208 if kind is not Stream.TEXT:
821114ec4f69 Initial import. cmlenz parents: diff changeset	209 yield kind, data, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	210
821114ec4f69 Initial import. cmlenz parents: diff changeset	211 if textbuf:
821114ec4f69 Initial import. cmlenz parents: diff changeset	212 text = self._LINE_COLLAPSE.sub('\n', ''.join(textbuf))
821114ec4f69 Initial import. cmlenz parents: diff changeset	213 yield Stream.TEXT, text, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	214
821114ec4f69 Initial import. cmlenz parents: diff changeset	215
821114ec4f69 Initial import. cmlenz parents: diff changeset	216 class HTMLSanitizer(object):
821114ec4f69 Initial import. cmlenz parents: diff changeset	217 """A filter that removes potentially dangerous HTML tags and attributes
821114ec4f69 Initial import. cmlenz parents: diff changeset	218 from the stream.
821114ec4f69 Initial import. cmlenz parents: diff changeset	219 """
821114ec4f69 Initial import. cmlenz parents: diff changeset	220
821114ec4f69 Initial import. cmlenz parents: diff changeset	221 _SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
821114ec4f69 Initial import. cmlenz parents: diff changeset	222 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
821114ec4f69 Initial import. cmlenz parents: diff changeset	223 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
821114ec4f69 Initial import. cmlenz parents: diff changeset	224 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
821114ec4f69 Initial import. cmlenz parents: diff changeset	225 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
821114ec4f69 Initial import. cmlenz parents: diff changeset	226 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
821114ec4f69 Initial import. cmlenz parents: diff changeset	227 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
821114ec4f69 Initial import. cmlenz parents: diff changeset	228 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
821114ec4f69 Initial import. cmlenz parents: diff changeset	229 'ul', 'var'])
821114ec4f69 Initial import. cmlenz parents: diff changeset	230
821114ec4f69 Initial import. cmlenz parents: diff changeset	231 _SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
15 f083101b8e8a Port HTML sanitizer fix from trac:changeset:3417. cmlenz parents: 14 diff changeset	232 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
1 821114ec4f69 Initial import. cmlenz parents: diff changeset	233 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
821114ec4f69 Initial import. cmlenz parents: diff changeset	234 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
821114ec4f69 Initial import. cmlenz parents: diff changeset	235 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
821114ec4f69 Initial import. cmlenz parents: diff changeset	236 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
821114ec4f69 Initial import. cmlenz parents: diff changeset	237 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
821114ec4f69 Initial import. cmlenz parents: diff changeset	238 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
821114ec4f69 Initial import. cmlenz parents: diff changeset	239 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
821114ec4f69 Initial import. cmlenz parents: diff changeset	240 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target',
821114ec4f69 Initial import. cmlenz parents: diff changeset	241 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
821114ec4f69 Initial import. cmlenz parents: diff changeset	242 _URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
821114ec4f69 Initial import. cmlenz parents: diff changeset	243 'src'])
821114ec4f69 Initial import. cmlenz parents: diff changeset	244 _SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
821114ec4f69 Initial import. cmlenz parents: diff changeset	245
821114ec4f69 Initial import. cmlenz parents: diff changeset	246 def __call__(self, stream, ctxt=None):
821114ec4f69 Initial import. cmlenz parents: diff changeset	247 waiting_for = None
821114ec4f69 Initial import. cmlenz parents: diff changeset	248
821114ec4f69 Initial import. cmlenz parents: diff changeset	249 for kind, data, pos in stream:
821114ec4f69 Initial import. cmlenz parents: diff changeset	250 if kind is Stream.START:
821114ec4f69 Initial import. cmlenz parents: diff changeset	251 if waiting_for:
821114ec4f69 Initial import. cmlenz parents: diff changeset	252 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	253 tag, attrib = data
821114ec4f69 Initial import. cmlenz parents: diff changeset	254 if tag not in self._SAFE_TAGS:
821114ec4f69 Initial import. cmlenz parents: diff changeset	255 waiting_for = tag
821114ec4f69 Initial import. cmlenz parents: diff changeset	256 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	257
821114ec4f69 Initial import. cmlenz parents: diff changeset	258 new_attrib = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	259 for attr, value in attrib:
821114ec4f69 Initial import. cmlenz parents: diff changeset	260 if attr not in self._SAFE_ATTRS:
821114ec4f69 Initial import. cmlenz parents: diff changeset	261 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	262 elif attr in self._URI_ATTRS:
821114ec4f69 Initial import. cmlenz parents: diff changeset	263 # Don't allow URI schemes such as "javascript:"
821114ec4f69 Initial import. cmlenz parents: diff changeset	264 if self._get_scheme(value) not in self._SAFE_SCHEMES:
821114ec4f69 Initial import. cmlenz parents: diff changeset	265 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	266 elif attr == 'style':
821114ec4f69 Initial import. cmlenz parents: diff changeset	267 # Remove dangerous CSS declarations from inline styles
821114ec4f69 Initial import. cmlenz parents: diff changeset	268 decls = []
821114ec4f69 Initial import. cmlenz parents: diff changeset	269 for decl in filter(None, value.split(';')):
821114ec4f69 Initial import. cmlenz parents: diff changeset	270 is_evil = False
821114ec4f69 Initial import. cmlenz parents: diff changeset	271 if 'expression' in decl:
821114ec4f69 Initial import. cmlenz parents: diff changeset	272 is_evil = True
821114ec4f69 Initial import. cmlenz parents: diff changeset	273 for m in re.finditer(r'url\s*\(([^)]+)', decl):
821114ec4f69 Initial import. cmlenz parents: diff changeset	274 if self._get_scheme(m.group(1)) not in self._SAFE_SCHEMES:
821114ec4f69 Initial import. cmlenz parents: diff changeset	275 is_evil = True
821114ec4f69 Initial import. cmlenz parents: diff changeset	276 break
821114ec4f69 Initial import. cmlenz parents: diff changeset	277 if not is_evil:
821114ec4f69 Initial import. cmlenz parents: diff changeset	278 decls.append(decl.strip())
821114ec4f69 Initial import. cmlenz parents: diff changeset	279 if not decls:
821114ec4f69 Initial import. cmlenz parents: diff changeset	280 continue
821114ec4f69 Initial import. cmlenz parents: diff changeset	281 value = '; '.join(decls)
821114ec4f69 Initial import. cmlenz parents: diff changeset	282 new_attrib.append((attr, value))
821114ec4f69 Initial import. cmlenz parents: diff changeset	283
821114ec4f69 Initial import. cmlenz parents: diff changeset	284 yield kind, (tag, new_attrib), pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	285
821114ec4f69 Initial import. cmlenz parents: diff changeset	286 elif kind is Stream.END:
821114ec4f69 Initial import. cmlenz parents: diff changeset	287 tag = data
821114ec4f69 Initial import. cmlenz parents: diff changeset	288 if waiting_for:
821114ec4f69 Initial import. cmlenz parents: diff changeset	289 if waiting_for == tag:
821114ec4f69 Initial import. cmlenz parents: diff changeset	290 waiting_for = None
821114ec4f69 Initial import. cmlenz parents: diff changeset	291 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	292 yield kind, data, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	293
821114ec4f69 Initial import. cmlenz parents: diff changeset	294 else:
821114ec4f69 Initial import. cmlenz parents: diff changeset	295 if not waiting_for:
821114ec4f69 Initial import. cmlenz parents: diff changeset	296 yield kind, data, pos
821114ec4f69 Initial import. cmlenz parents: diff changeset	297
821114ec4f69 Initial import. cmlenz parents: diff changeset	298 def _get_scheme(self, text):
821114ec4f69 Initial import. cmlenz parents: diff changeset	299 if ':' not in text:
821114ec4f69 Initial import. cmlenz parents: diff changeset	300 return None
821114ec4f69 Initial import. cmlenz parents: diff changeset	301 chars = [char for char in text.split(':', 1)[0] if char.isalnum()]
821114ec4f69 Initial import. cmlenz parents: diff changeset	302 return ''.join(chars).lower()

Mercurial > genshi > genshi-test

annotate markup/filters.py @ 15:f083101b8e8a