1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Christopher Lenz
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://trac.edgewall.com/license.html.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://projects.edgewall.com/trac/.
|
|
13
|
|
14 """Implementation of a number of stream filters."""
|
|
15
|
|
16 try:
|
|
17 frozenset
|
|
18 except NameError:
|
|
19 from sets import ImmutableSet as frozenset
|
|
20 import re
|
|
21
|
|
22 from markup.core import Attributes, Markup, Stream
|
|
23 from markup.path import Path
|
|
24
|
|
25 __all__ = ['EvalFilter', 'IncludeFilter', 'MatchFilter', 'WhitespaceFilter',
|
|
26 'HTMLSanitizer']
|
|
27
|
|
28
|
|
29 class EvalFilter(object):
|
|
30 """Responsible for evaluating expressions in a template."""
|
|
31
|
|
32 def __call__(self, stream, ctxt=None):
|
|
33 for kind, data, pos in stream:
|
|
34
|
|
35 if kind is Stream.START:
|
|
36 # Attributes may still contain expressions in start tags at
|
|
37 # this point, so do some evaluation
|
|
38 tag, attrib = data
|
|
39 new_attrib = []
|
|
40 for name, substream in attrib:
|
|
41 if isinstance(substream, basestring):
|
|
42 value = substream
|
|
43 else:
|
|
44 values = []
|
|
45 for subkind, subdata, subpos in substream:
|
|
46 if subkind is Stream.EXPR:
|
|
47 values.append(subdata.evaluate(ctxt))
|
|
48 else:
|
|
49 values.append(subdata)
|
|
50 value = filter(lambda x: x is not None, values)
|
|
51 if not value:
|
|
52 continue
|
|
53 new_attrib.append((name, ''.join(value)))
|
|
54 yield kind, (tag, Attributes(new_attrib)), pos
|
|
55
|
|
56 elif kind is Stream.EXPR:
|
|
57 result = data.evaluate(ctxt)
|
|
58 if result is None:
|
|
59 continue
|
|
60
|
|
61 # First check for a string, otherwise the iterable
|
|
62 # test below succeeds, and the string will be
|
|
63 # chopped up into characters
|
|
64 if isinstance(result, basestring):
|
|
65 yield Stream.TEXT, result, pos
|
|
66 else:
|
|
67 # Test if the expression evaluated to an
|
|
68 # iterable, in which case we yield the
|
|
69 # individual items
|
|
70 try:
|
|
71 yield Stream.SUB, ([], iter(result)), pos
|
|
72 except TypeError:
|
|
73 # Neither a string nor an iterable, so just
|
|
74 # pass it through
|
|
75 yield Stream.TEXT, unicode(result), pos
|
|
76
|
|
77 else:
|
|
78 yield kind, data, pos
|
|
79
|
|
80
|
|
81 class IncludeFilter(object):
|
|
82 """Template filter providing (very) basic XInclude support
|
|
83 (see http://www.w3.org/TR/xinclude/) in templates.
|
|
84 """
|
|
85
|
|
86 _NAMESPACE = 'http://www.w3.org/2001/XInclude'
|
|
87
|
|
88 def __init__(self, loader):
|
|
89 """Initialize the filter.
|
|
90
|
|
91 @param loader: the `TemplateLoader` to use for resolving references to
|
|
92 external template files
|
|
93 """
|
|
94 self.loader = loader
|
|
95
|
|
96 def __call__(self, stream, ctxt=None):
|
|
97 """Filter the stream, processing any XInclude directives it may
|
|
98 contain.
|
|
99
|
|
100 @param ctxt: the template context
|
|
101 @param stream: the markup event stream to filter
|
|
102 """
|
|
103 from markup.template import TemplateError, TemplateNotFound
|
|
104
|
|
105 in_fallback = False
|
|
106 include_href, fallback_stream = None, None
|
|
107 indent = 0
|
|
108
|
|
109 for kind, data, pos in stream:
|
|
110
|
|
111 if kind is Stream.START and data[0].namespace == self._NAMESPACE \
|
|
112 and not in_fallback:
|
|
113 tag, attrib = data
|
|
114 if tag.localname == 'include':
|
|
115 include_href = attrib.get('href')
|
|
116 indent = pos[1]
|
|
117 elif tag.localname == 'fallback':
|
|
118 in_fallback = True
|
|
119 fallback_stream = []
|
|
120
|
|
121 elif kind is Stream.END and data.namespace == self._NAMESPACE:
|
|
122 if data.localname == 'include':
|
|
123 try:
|
|
124 if not include_href:
|
|
125 raise TemplateError('Include misses required '
|
|
126 'attribute "href"')
|
|
127 template = self.loader.load(include_href)
|
|
128 for ikind, idata, ipos in template.generate(ctxt):
|
|
129 # Fixup indentation of included markup
|
|
130 if ikind is Stream.TEXT:
|
|
131 idata = idata.replace('\n', '\n' + ' ' * indent)
|
|
132 yield ikind, idata, ipos
|
|
133
|
|
134 # If the included template defines any filters added at
|
|
135 # runtime (such as py:match templates), those need to be
|
|
136 # applied to the including template, too.
|
|
137 for filter_ in template.filters:
|
|
138 stream = filter_(stream, ctxt)
|
|
139
|
|
140 except TemplateNotFound:
|
|
141 if fallback_stream is None:
|
|
142 raise
|
|
143 for event in fallback_stream:
|
|
144 yield event
|
|
145
|
|
146 include_href = None
|
|
147 fallback_stream = None
|
|
148 indent = 0
|
|
149 break
|
|
150 elif data.localname == 'fallback':
|
|
151 in_fallback = False
|
|
152
|
|
153 elif in_fallback:
|
|
154 fallback_stream.append((kind, data, pos))
|
|
155
|
|
156 elif kind is Stream.START_NS and data[1] == self._NAMESPACE:
|
|
157 continue
|
|
158
|
|
159 else:
|
|
160 yield kind, data, pos
|
|
161 else:
|
|
162 # The loop exited normally, so there shouldn't be further events to
|
|
163 # process
|
|
164 return
|
|
165
|
|
166 for event in self(stream, ctxt):
|
|
167 yield event
|
|
168
|
|
169
|
|
170 class MatchFilter(object):
|
|
171 """A filter that delegates to a given handler function when the input stream
|
|
172 matches some path expression.
|
|
173 """
|
|
174
|
|
175 def __init__(self, path, handler):
|
|
176 self.path = Path(path)
|
|
177 self.handler = handler
|
|
178
|
|
179 def __call__(self, stream, ctxt=None):
|
|
180 test = self.path.test()
|
|
181 for kind, data, pos in stream:
|
|
182 result = test(kind, data, pos)
|
|
183 if result is True:
|
|
184 content = [(kind, data, pos)]
|
|
185 depth = 1
|
|
186 while depth > 0:
|
|
187 ev = stream.next()
|
|
188 if ev[0] is Stream.START:
|
|
189 depth += 1
|
|
190 elif ev[0] is Stream.END:
|
|
191 depth -= 1
|
|
192 content.append(ev)
|
|
193 test(*ev)
|
|
194
|
|
195 yield (Stream.SUB,
|
|
196 ([lambda stream, ctxt: self.handler(content, ctxt)], []),
|
|
197 pos)
|
|
198 else:
|
|
199 yield kind, data, pos
|
|
200
|
|
201
|
|
202 class WhitespaceFilter(object):
|
|
203 """A filter that removes extraneous white space from the stream.
|
|
204
|
|
205 Todo:
|
|
206 * Support for xml:space
|
|
207 """
|
|
208
|
|
209 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
|
|
210 _LINE_COLLAPSE = re.compile('\n{2,}')
|
|
211
|
|
212 def __call__(self, stream, ctxt=None):
|
|
213 textbuf = []
|
|
214 prev_kind = None
|
|
215 for kind, data, pos in stream:
|
|
216 if kind is Stream.TEXT:
|
|
217 textbuf.append(data)
|
|
218 elif prev_kind is Stream.TEXT:
|
|
219 text = ''.join(textbuf)
|
|
220 text = self._TRAILING_SPACE.sub('', text)
|
|
221 text = self._LINE_COLLAPSE.sub('\n', text)
|
|
222 yield Stream.TEXT, text, pos
|
|
223 del textbuf[:]
|
|
224 prev_kind = kind
|
|
225 if kind is not Stream.TEXT:
|
|
226 yield kind, data, pos
|
|
227
|
|
228 if textbuf:
|
|
229 text = self._LINE_COLLAPSE.sub('\n', ''.join(textbuf))
|
|
230 yield Stream.TEXT, text, pos
|
|
231
|
|
232
|
|
233 class HTMLSanitizer(object):
|
|
234 """A filter that removes potentially dangerous HTML tags and attributes
|
|
235 from the stream.
|
|
236 """
|
|
237
|
|
238 _SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
|
239 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
|
|
240 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
|
|
241 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
242 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
|
|
243 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
|
|
244 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
|
|
245 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
|
|
246 'ul', 'var'])
|
|
247
|
|
248 _SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
|
|
249 'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
|
|
250 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
|
|
251 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
|
|
252 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
|
|
253 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
|
|
254 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
|
|
255 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
|
|
256 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
|
|
257 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target',
|
|
258 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
|
|
259 _URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
|
|
260 'src'])
|
|
261 _SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
|
|
262
|
|
263 def __call__(self, stream, ctxt=None):
|
|
264 waiting_for = None
|
|
265
|
|
266 for kind, data, pos in stream:
|
|
267 if kind is Stream.START:
|
|
268 if waiting_for:
|
|
269 continue
|
|
270 tag, attrib = data
|
|
271 if tag not in self._SAFE_TAGS:
|
|
272 waiting_for = tag
|
|
273 continue
|
|
274
|
|
275 new_attrib = []
|
|
276 for attr, value in attrib:
|
|
277 if attr not in self._SAFE_ATTRS:
|
|
278 continue
|
|
279 elif attr in self._URI_ATTRS:
|
|
280 # Don't allow URI schemes such as "javascript:"
|
|
281 if self._get_scheme(value) not in self._SAFE_SCHEMES:
|
|
282 continue
|
|
283 elif attr == 'style':
|
|
284 # Remove dangerous CSS declarations from inline styles
|
|
285 decls = []
|
|
286 for decl in filter(None, value.split(';')):
|
|
287 is_evil = False
|
|
288 if 'expression' in decl:
|
|
289 is_evil = True
|
|
290 for m in re.finditer(r'url\s*\(([^)]+)', decl):
|
|
291 if self._get_scheme(m.group(1)) not in self._SAFE_SCHEMES:
|
|
292 is_evil = True
|
|
293 break
|
|
294 if not is_evil:
|
|
295 decls.append(decl.strip())
|
|
296 if not decls:
|
|
297 continue
|
|
298 value = '; '.join(decls)
|
|
299 new_attrib.append((attr, value))
|
|
300
|
|
301 yield kind, (tag, new_attrib), pos
|
|
302
|
|
303 elif kind is Stream.END:
|
|
304 tag = data
|
|
305 if waiting_for:
|
|
306 if waiting_for == tag:
|
|
307 waiting_for = None
|
|
308 else:
|
|
309 yield kind, data, pos
|
|
310
|
|
311 else:
|
|
312 if not waiting_for:
|
|
313 yield kind, data, pos
|
|
314
|
|
315 def _get_scheme(self, text):
|
|
316 if ':' not in text:
|
|
317 return None
|
|
318 chars = [char for char in text.split(':', 1)[0] if char.isalnum()]
|
|
319 return ''.join(chars).lower()
|