Mercurial > genshi > genshi-test
annotate markup/filters.py @ 13:bf9de5a4c896
Match directives should now also be applied when included indirectly.
For example, a match directive defined in `a.html`, which is included by `b.html`, which in turn is included by `c.html` should now also be applied to `c.html`.
author | cmlenz |
---|---|
date | Mon, 05 Jun 2006 00:03:43 +0000 |
parents | 87238328a71d |
children | 76b5d4b189e6 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2006 Christopher Lenz | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://trac.edgewall.com/license.html. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://projects.edgewall.com/trac/. | |
13 | |
14 """Implementation of a number of stream filters.""" | |
15 | |
16 try: | |
17 frozenset | |
18 except NameError: | |
19 from sets import ImmutableSet as frozenset | |
20 import re | |
21 | |
22 from markup.core import Attributes, Markup, Stream | |
23 from markup.path import Path | |
24 | |
25 __all__ = ['EvalFilter', 'IncludeFilter', 'MatchFilter', 'WhitespaceFilter', | |
26 'HTMLSanitizer'] | |
27 | |
28 | |
29 class EvalFilter(object): | |
30 """Responsible for evaluating expressions in a template.""" | |
31 | |
32 def __call__(self, stream, ctxt=None): | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
33 from markup.template import Template |
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
34 |
1 | 35 for kind, data, pos in stream: |
36 | |
37 if kind is Stream.START: | |
38 # Attributes may still contain expressions in start tags at | |
39 # this point, so do some evaluation | |
40 tag, attrib = data | |
41 new_attrib = [] | |
42 for name, substream in attrib: | |
43 if isinstance(substream, basestring): | |
44 value = substream | |
45 else: | |
46 values = [] | |
47 for subkind, subdata, subpos in substream: | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
48 if subkind is Template.EXPR: |
1 | 49 values.append(subdata.evaluate(ctxt)) |
50 else: | |
51 values.append(subdata) | |
52 value = filter(lambda x: x is not None, values) | |
53 if not value: | |
54 continue | |
55 new_attrib.append((name, ''.join(value))) | |
56 yield kind, (tag, Attributes(new_attrib)), pos | |
57 | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
58 elif kind is Template.EXPR: |
1 | 59 result = data.evaluate(ctxt) |
60 if result is None: | |
61 continue | |
62 | |
63 # First check for a string, otherwise the iterable | |
64 # test below succeeds, and the string will be | |
65 # chopped up into characters | |
66 if isinstance(result, basestring): | |
67 yield Stream.TEXT, result, pos | |
68 else: | |
69 # Test if the expression evaluated to an | |
70 # iterable, in which case we yield the | |
71 # individual items | |
72 try: | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
73 yield Template.SUB, ([], iter(result)), pos |
1 | 74 except TypeError: |
75 # Neither a string nor an iterable, so just | |
76 # pass it through | |
77 yield Stream.TEXT, unicode(result), pos | |
78 | |
79 else: | |
80 yield kind, data, pos | |
81 | |
82 | |
83 class IncludeFilter(object): | |
84 """Template filter providing (very) basic XInclude support | |
85 (see http://www.w3.org/TR/xinclude/) in templates. | |
86 """ | |
87 | |
88 _NAMESPACE = 'http://www.w3.org/2001/XInclude' | |
89 | |
13
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
90 def __init__(self, loader, template): |
1 | 91 """Initialize the filter. |
92 | |
93 @param loader: the `TemplateLoader` to use for resolving references to | |
94 external template files | |
95 """ | |
96 self.loader = loader | |
13
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
97 self.template = template |
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
98 if not hasattr(template, '_included_filters'): |
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
99 template._included_filters = [] |
1 | 100 |
12
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
101 def __call__(self, stream, ctxt=None, ns_prefixes=None): |
1 | 102 """Filter the stream, processing any XInclude directives it may |
103 contain. | |
104 | |
105 @param ctxt: the template context | |
106 @param stream: the markup event stream to filter | |
107 """ | |
12
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
108 from markup.template import Template, TemplateError, TemplateNotFound |
1 | 109 |
12
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
110 if ns_prefixes is None: |
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
111 ns_prefixes = [] |
1 | 112 in_fallback = False |
113 include_href, fallback_stream = None, None | |
114 indent = 0 | |
115 | |
116 for kind, data, pos in stream: | |
117 | |
118 if kind is Stream.START and data[0].namespace == self._NAMESPACE \ | |
119 and not in_fallback: | |
120 tag, attrib = data | |
121 if tag.localname == 'include': | |
122 include_href = attrib.get('href') | |
123 indent = pos[1] | |
124 elif tag.localname == 'fallback': | |
125 in_fallback = True | |
126 fallback_stream = [] | |
127 | |
128 elif kind is Stream.END and data.namespace == self._NAMESPACE: | |
129 if data.localname == 'include': | |
130 try: | |
131 if not include_href: | |
132 raise TemplateError('Include misses required ' | |
133 'attribute "href"') | |
134 template = self.loader.load(include_href) | |
135 for ikind, idata, ipos in template.generate(ctxt): | |
136 # Fixup indentation of included markup | |
137 if ikind is Stream.TEXT: | |
138 idata = idata.replace('\n', '\n' + ' ' * indent) | |
139 yield ikind, idata, ipos | |
140 | |
141 # If the included template defines any filters added at | |
142 # runtime (such as py:match templates), those need to be | |
143 # applied to the including template, too. | |
13
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
144 filters = template.filters + template._included_filters |
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
145 for filter_ in filters: |
1 | 146 stream = filter_(stream, ctxt) |
147 | |
13
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
148 # Runtime filters included need to be propagated up |
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
149 self.template._included_filters += filters |
bf9de5a4c896
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
150 |
1 | 151 except TemplateNotFound: |
152 if fallback_stream is None: | |
153 raise | |
154 for event in fallback_stream: | |
155 yield event | |
156 | |
157 include_href = None | |
158 fallback_stream = None | |
159 indent = 0 | |
160 break | |
161 elif data.localname == 'fallback': | |
162 in_fallback = False | |
163 | |
164 elif in_fallback: | |
165 fallback_stream.append((kind, data, pos)) | |
166 | |
167 elif kind is Stream.START_NS and data[1] == self._NAMESPACE: | |
12
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
168 ns_prefixes.append(data[0]) |
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
169 continue |
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
170 |
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
171 elif kind is Stream.END_NS and data in ns_prefixes: |
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
172 ns_prefixes.pop() |
1 | 173 continue |
174 | |
175 else: | |
176 yield kind, data, pos | |
177 else: | |
178 # The loop exited normally, so there shouldn't be further events to | |
179 # process | |
180 return | |
181 | |
12
87238328a71d
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
182 for event in self(stream, ctxt, ns_prefixes=ns_prefixes): |
1 | 183 yield event |
184 | |
185 | |
186 class MatchFilter(object): | |
187 """A filter that delegates to a given handler function when the input stream | |
188 matches some path expression. | |
189 """ | |
190 | |
191 def __init__(self, path, handler): | |
192 self.path = Path(path) | |
193 self.handler = handler | |
194 | |
195 def __call__(self, stream, ctxt=None): | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
196 from markup.template import Template |
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
197 |
1 | 198 test = self.path.test() |
199 for kind, data, pos in stream: | |
200 result = test(kind, data, pos) | |
201 if result is True: | |
202 content = [(kind, data, pos)] | |
203 depth = 1 | |
204 while depth > 0: | |
205 ev = stream.next() | |
206 if ev[0] is Stream.START: | |
207 depth += 1 | |
208 elif ev[0] is Stream.END: | |
209 depth -= 1 | |
210 content.append(ev) | |
211 test(*ev) | |
212 | |
10
c5890ef863ba
Moved the template-specific stream event kinds into the template module.
cmlenz
parents:
1
diff
changeset
|
213 yield (Template.SUB, |
1 | 214 ([lambda stream, ctxt: self.handler(content, ctxt)], []), |
215 pos) | |
216 else: | |
217 yield kind, data, pos | |
218 | |
219 | |
220 class WhitespaceFilter(object): | |
221 """A filter that removes extraneous white space from the stream. | |
222 | |
223 Todo: | |
224 * Support for xml:space | |
225 """ | |
226 | |
227 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') | |
228 _LINE_COLLAPSE = re.compile('\n{2,}') | |
229 | |
230 def __call__(self, stream, ctxt=None): | |
231 textbuf = [] | |
232 prev_kind = None | |
233 for kind, data, pos in stream: | |
234 if kind is Stream.TEXT: | |
235 textbuf.append(data) | |
236 elif prev_kind is Stream.TEXT: | |
237 text = ''.join(textbuf) | |
238 text = self._TRAILING_SPACE.sub('', text) | |
239 text = self._LINE_COLLAPSE.sub('\n', text) | |
240 yield Stream.TEXT, text, pos | |
241 del textbuf[:] | |
242 prev_kind = kind | |
243 if kind is not Stream.TEXT: | |
244 yield kind, data, pos | |
245 | |
246 if textbuf: | |
247 text = self._LINE_COLLAPSE.sub('\n', ''.join(textbuf)) | |
248 yield Stream.TEXT, text, pos | |
249 | |
250 | |
251 class HTMLSanitizer(object): | |
252 """A filter that removes potentially dangerous HTML tags and attributes | |
253 from the stream. | |
254 """ | |
255 | |
256 _SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', | |
257 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', | |
258 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', | |
259 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | |
260 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', | |
261 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', | |
262 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', | |
263 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', | |
264 'ul', 'var']) | |
265 | |
266 _SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey', | |
267 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', | |
268 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', | |
269 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', | |
270 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', | |
271 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', | |
272 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', | |
273 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', | |
274 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', | |
275 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target', | |
276 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) | |
277 _URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', | |
278 'src']) | |
279 _SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) | |
280 | |
281 def __call__(self, stream, ctxt=None): | |
282 waiting_for = None | |
283 | |
284 for kind, data, pos in stream: | |
285 if kind is Stream.START: | |
286 if waiting_for: | |
287 continue | |
288 tag, attrib = data | |
289 if tag not in self._SAFE_TAGS: | |
290 waiting_for = tag | |
291 continue | |
292 | |
293 new_attrib = [] | |
294 for attr, value in attrib: | |
295 if attr not in self._SAFE_ATTRS: | |
296 continue | |
297 elif attr in self._URI_ATTRS: | |
298 # Don't allow URI schemes such as "javascript:" | |
299 if self._get_scheme(value) not in self._SAFE_SCHEMES: | |
300 continue | |
301 elif attr == 'style': | |
302 # Remove dangerous CSS declarations from inline styles | |
303 decls = [] | |
304 for decl in filter(None, value.split(';')): | |
305 is_evil = False | |
306 if 'expression' in decl: | |
307 is_evil = True | |
308 for m in re.finditer(r'url\s*\(([^)]+)', decl): | |
309 if self._get_scheme(m.group(1)) not in self._SAFE_SCHEMES: | |
310 is_evil = True | |
311 break | |
312 if not is_evil: | |
313 decls.append(decl.strip()) | |
314 if not decls: | |
315 continue | |
316 value = '; '.join(decls) | |
317 new_attrib.append((attr, value)) | |
318 | |
319 yield kind, (tag, new_attrib), pos | |
320 | |
321 elif kind is Stream.END: | |
322 tag = data | |
323 if waiting_for: | |
324 if waiting_for == tag: | |
325 waiting_for = None | |
326 else: | |
327 yield kind, data, pos | |
328 | |
329 else: | |
330 if not waiting_for: | |
331 yield kind, data, pos | |
332 | |
333 def _get_scheme(self, text): | |
334 if ':' not in text: | |
335 return None | |
336 chars = [char for char in text.split(':', 1)[0] if char.isalnum()] | |
337 return ''.join(chars).lower() |