comparison markup/filters.py @ 1:821114ec4f69

Initial import.
author cmlenz
date Sat, 03 Jun 2006 07:16:01 +0000
parents
children c5890ef863ba
comparison
equal deleted inserted replaced
0:20f3417d4171 1:821114ec4f69
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Christopher Lenz
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://trac.edgewall.com/license.html.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://projects.edgewall.com/trac/.
13
14 """Implementation of a number of stream filters."""
15
16 try:
17 frozenset
18 except NameError:
19 from sets import ImmutableSet as frozenset
20 import re
21
22 from markup.core import Attributes, Markup, Stream
23 from markup.path import Path
24
25 __all__ = ['EvalFilter', 'IncludeFilter', 'MatchFilter', 'WhitespaceFilter',
26 'HTMLSanitizer']
27
28
29 class EvalFilter(object):
30 """Responsible for evaluating expressions in a template."""
31
32 def __call__(self, stream, ctxt=None):
33 for kind, data, pos in stream:
34
35 if kind is Stream.START:
36 # Attributes may still contain expressions in start tags at
37 # this point, so do some evaluation
38 tag, attrib = data
39 new_attrib = []
40 for name, substream in attrib:
41 if isinstance(substream, basestring):
42 value = substream
43 else:
44 values = []
45 for subkind, subdata, subpos in substream:
46 if subkind is Stream.EXPR:
47 values.append(subdata.evaluate(ctxt))
48 else:
49 values.append(subdata)
50 value = filter(lambda x: x is not None, values)
51 if not value:
52 continue
53 new_attrib.append((name, ''.join(value)))
54 yield kind, (tag, Attributes(new_attrib)), pos
55
56 elif kind is Stream.EXPR:
57 result = data.evaluate(ctxt)
58 if result is None:
59 continue
60
61 # First check for a string, otherwise the iterable
62 # test below succeeds, and the string will be
63 # chopped up into characters
64 if isinstance(result, basestring):
65 yield Stream.TEXT, result, pos
66 else:
67 # Test if the expression evaluated to an
68 # iterable, in which case we yield the
69 # individual items
70 try:
71 yield Stream.SUB, ([], iter(result)), pos
72 except TypeError:
73 # Neither a string nor an iterable, so just
74 # pass it through
75 yield Stream.TEXT, unicode(result), pos
76
77 else:
78 yield kind, data, pos
79
80
81 class IncludeFilter(object):
82 """Template filter providing (very) basic XInclude support
83 (see http://www.w3.org/TR/xinclude/) in templates.
84 """
85
86 _NAMESPACE = 'http://www.w3.org/2001/XInclude'
87
88 def __init__(self, loader):
89 """Initialize the filter.
90
91 @param loader: the `TemplateLoader` to use for resolving references to
92 external template files
93 """
94 self.loader = loader
95
96 def __call__(self, stream, ctxt=None):
97 """Filter the stream, processing any XInclude directives it may
98 contain.
99
100 @param ctxt: the template context
101 @param stream: the markup event stream to filter
102 """
103 from markup.template import TemplateError, TemplateNotFound
104
105 in_fallback = False
106 include_href, fallback_stream = None, None
107 indent = 0
108
109 for kind, data, pos in stream:
110
111 if kind is Stream.START and data[0].namespace == self._NAMESPACE \
112 and not in_fallback:
113 tag, attrib = data
114 if tag.localname == 'include':
115 include_href = attrib.get('href')
116 indent = pos[1]
117 elif tag.localname == 'fallback':
118 in_fallback = True
119 fallback_stream = []
120
121 elif kind is Stream.END and data.namespace == self._NAMESPACE:
122 if data.localname == 'include':
123 try:
124 if not include_href:
125 raise TemplateError('Include misses required '
126 'attribute "href"')
127 template = self.loader.load(include_href)
128 for ikind, idata, ipos in template.generate(ctxt):
129 # Fixup indentation of included markup
130 if ikind is Stream.TEXT:
131 idata = idata.replace('\n', '\n' + ' ' * indent)
132 yield ikind, idata, ipos
133
134 # If the included template defines any filters added at
135 # runtime (such as py:match templates), those need to be
136 # applied to the including template, too.
137 for filter_ in template.filters:
138 stream = filter_(stream, ctxt)
139
140 except TemplateNotFound:
141 if fallback_stream is None:
142 raise
143 for event in fallback_stream:
144 yield event
145
146 include_href = None
147 fallback_stream = None
148 indent = 0
149 break
150 elif data.localname == 'fallback':
151 in_fallback = False
152
153 elif in_fallback:
154 fallback_stream.append((kind, data, pos))
155
156 elif kind is Stream.START_NS and data[1] == self._NAMESPACE:
157 continue
158
159 else:
160 yield kind, data, pos
161 else:
162 # The loop exited normally, so there shouldn't be further events to
163 # process
164 return
165
166 for event in self(stream, ctxt):
167 yield event
168
169
170 class MatchFilter(object):
171 """A filter that delegates to a given handler function when the input stream
172 matches some path expression.
173 """
174
175 def __init__(self, path, handler):
176 self.path = Path(path)
177 self.handler = handler
178
179 def __call__(self, stream, ctxt=None):
180 test = self.path.test()
181 for kind, data, pos in stream:
182 result = test(kind, data, pos)
183 if result is True:
184 content = [(kind, data, pos)]
185 depth = 1
186 while depth > 0:
187 ev = stream.next()
188 if ev[0] is Stream.START:
189 depth += 1
190 elif ev[0] is Stream.END:
191 depth -= 1
192 content.append(ev)
193 test(*ev)
194
195 yield (Stream.SUB,
196 ([lambda stream, ctxt: self.handler(content, ctxt)], []),
197 pos)
198 else:
199 yield kind, data, pos
200
201
202 class WhitespaceFilter(object):
203 """A filter that removes extraneous white space from the stream.
204
205 Todo:
206 * Support for xml:space
207 """
208
209 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
210 _LINE_COLLAPSE = re.compile('\n{2,}')
211
212 def __call__(self, stream, ctxt=None):
213 textbuf = []
214 prev_kind = None
215 for kind, data, pos in stream:
216 if kind is Stream.TEXT:
217 textbuf.append(data)
218 elif prev_kind is Stream.TEXT:
219 text = ''.join(textbuf)
220 text = self._TRAILING_SPACE.sub('', text)
221 text = self._LINE_COLLAPSE.sub('\n', text)
222 yield Stream.TEXT, text, pos
223 del textbuf[:]
224 prev_kind = kind
225 if kind is not Stream.TEXT:
226 yield kind, data, pos
227
228 if textbuf:
229 text = self._LINE_COLLAPSE.sub('\n', ''.join(textbuf))
230 yield Stream.TEXT, text, pos
231
232
233 class HTMLSanitizer(object):
234 """A filter that removes potentially dangerous HTML tags and attributes
235 from the stream.
236 """
237
238 _SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
239 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
240 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
241 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
242 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
243 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
244 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
245 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
246 'ul', 'var'])
247
248 _SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
249 'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
250 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
251 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
252 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
253 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
254 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
255 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
256 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
257 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target',
258 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
259 _URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
260 'src'])
261 _SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
262
263 def __call__(self, stream, ctxt=None):
264 waiting_for = None
265
266 for kind, data, pos in stream:
267 if kind is Stream.START:
268 if waiting_for:
269 continue
270 tag, attrib = data
271 if tag not in self._SAFE_TAGS:
272 waiting_for = tag
273 continue
274
275 new_attrib = []
276 for attr, value in attrib:
277 if attr not in self._SAFE_ATTRS:
278 continue
279 elif attr in self._URI_ATTRS:
280 # Don't allow URI schemes such as "javascript:"
281 if self._get_scheme(value) not in self._SAFE_SCHEMES:
282 continue
283 elif attr == 'style':
284 # Remove dangerous CSS declarations from inline styles
285 decls = []
286 for decl in filter(None, value.split(';')):
287 is_evil = False
288 if 'expression' in decl:
289 is_evil = True
290 for m in re.finditer(r'url\s*\(([^)]+)', decl):
291 if self._get_scheme(m.group(1)) not in self._SAFE_SCHEMES:
292 is_evil = True
293 break
294 if not is_evil:
295 decls.append(decl.strip())
296 if not decls:
297 continue
298 value = '; '.join(decls)
299 new_attrib.append((attr, value))
300
301 yield kind, (tag, new_attrib), pos
302
303 elif kind is Stream.END:
304 tag = data
305 if waiting_for:
306 if waiting_for == tag:
307 waiting_for = None
308 else:
309 yield kind, data, pos
310
311 else:
312 if not waiting_for:
313 yield kind, data, pos
314
315 def _get_scheme(self, text):
316 if ':' not in text:
317 return None
318 chars = [char for char in text.split(':', 1)[0] if char.isalnum()]
319 return ''.join(chars).lower()
Copyright (C) 2012-2017 Edgewall Software