Mercurial > genshi > mirror
annotate genshi/filters.py @ 277:7e30bfa966ab trunk
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
author | cmlenz |
---|---|
date | Mon, 02 Oct 2006 09:45:06 +0000 |
parents | d91cbdeb75e9 |
children | 60111a041e7c |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
54
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
14 """Implementation of a number of stream filters.""" | |
15 | |
16 try: | |
17 frozenset | |
18 except NameError: | |
19 from sets import ImmutableSet as frozenset | |
20 import re | |
21 | |
230 | 22 from genshi.core import Attrs, Namespace, stripentities |
275
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
23 from genshi.core import END, END_NS, START, START_NS, TEXT |
1 | 24 |
275
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
25 __all__ = ['HTMLFormFiller', 'HTMLSanitizer', 'IncludeFilter'] |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
26 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
27 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
28 class HTMLFormFiller(object): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
29 """A stream filter that can populate HTML forms from a dictionary of values. |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
30 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
31 >>> from genshi.input import HTML |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
32 >>> html = HTML('''<form> |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
33 ... <p><input type="text" name="foo" /></p> |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
34 ... </form>''') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
35 >>> filler = HTMLFormFiller(data={'foo': 'bar'}) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
36 >>> print html | filler |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
37 <form> |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
38 <p><input type="text" name="foo" value="bar"/></p> |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
39 </form> |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
40 """ |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
41 # TODO: only select the first radio button, and the first select option |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
42 # (if not in a multiple-select) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
43 # TODO: only apply to elements in the XHTML namespace (or no namespace)? |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
44 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
45 def __init__(self, name=None, id=None, data=None): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
46 """Create the filter. |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
47 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
48 @param name: The name of the form that should be populated. If this |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
49 parameter is given, only forms where the ``name`` attribute value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
50 matches the parameter are processed. |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
51 @param id: The ID of the form that should be populated. If this |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
52 parameter is given, only forms where the ``id`` attribute value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
53 matches the parameter are processed. |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
54 @param data: The dictionary of form values, where the keys are the names |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
55 of the form fields, and the values are the values to fill in. |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
56 """ |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
57 self.name = name |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
58 self.id = id |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
59 if data is None: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
60 data = {} |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
61 self.data = data |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
62 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
63 def __call__(self, stream, ctxt=None): |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
64 """Apply the filter to the given stream. |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
65 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
66 @param stream: the markup event stream to filter |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
67 @param ctxt: the template context (unused) |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
68 """ |
275
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
69 in_form = in_select = in_option = in_textarea = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
70 select_value = option_value = textarea_value = None |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
71 option_start = option_text = None |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
72 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
73 for kind, data, pos in stream: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
74 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
75 if kind is START: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
76 tag, attrib = data |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
77 tagname = tag.localname |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
78 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
79 if tagname == 'form' and ( |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
80 self.name and attrib.get('name') == self.name or |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
81 self.id and attrib.get('id') == self.id or |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
82 not (self.id or self.name)): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
83 in_form = True |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
84 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
85 elif in_form: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
86 if tagname == 'input': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
87 type = attrib.get('type') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
88 if type in ('checkbox', 'radio'): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
89 name = attrib.get('name') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
90 if name: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
91 value = self.data.get(name) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
92 declval = attrib.get('value') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
93 checked = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
94 if isinstance(value, (list, tuple)): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
95 if declval: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
96 checked = declval in value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
97 else: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
98 checked = bool(filter(None, value)) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
99 else: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
100 if declval: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
101 checked = declval == value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
102 elif type == 'checkbox': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
103 checked = bool(value) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
104 if checked: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
105 attrib.set('checked', 'checked') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
106 else: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
107 attrib.remove('checked') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
108 elif type in (None, 'hidden', 'text'): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
109 name = attrib.get('name') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
110 if name: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
111 value = self.data.get(name) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
112 if isinstance(value, (list, tuple)): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
113 value = value[0] |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
114 if value is not None: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
115 attrib.set('value', unicode(value)) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
116 elif tagname == 'select': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
117 name = attrib.get('name') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
118 select_value = self.data.get(name) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
119 in_select = True |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
120 elif tagname == 'textarea': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
121 name = attrib.get('name') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
122 textarea_value = self.data.get(name) |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
123 if isinstance(textarea_value, (list, tuple)): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
124 textarea_value = textarea_value[0] |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
125 in_textarea = True |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
126 elif in_select and tagname == 'option': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
127 option_start = kind, data, pos |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
128 option_value = attrib.get('value') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
129 in_option = True |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
130 continue |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
131 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
132 elif in_form and kind is TEXT: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
133 if in_select and in_option: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
134 if option_value is None: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
135 option_value = data |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
136 option_text = kind, data, pos |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
137 continue |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
138 elif in_textarea: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
139 continue |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
140 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
141 elif in_form and kind is END: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
142 tagname = data.localname |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
143 if tagname == 'form': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
144 in_form = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
145 elif tagname == 'select': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
146 in_select = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
147 select_value = None |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
148 elif in_select and tagname == 'option': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
149 if isinstance(select_value, (tuple, list)): |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
150 selected = option_value in select_value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
151 else: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
152 selected = option_value == select_value |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
153 attrib = option_start[1][1] |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
154 if selected: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
155 attrib.set('selected', 'selected') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
156 else: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
157 attrib.remove('selected') |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
158 yield option_start |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
159 if option_text: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
160 yield option_text |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
161 in_option = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
162 option_start = option_text = option_value = None |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
163 elif tagname == 'textarea': |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
164 if textarea_value: |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
165 yield TEXT, unicode(textarea_value), pos |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
166 in_textarea = False |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
167 |
d91cbdeb75e9
Integrated `HTMLFormFiller` filter initially presented as a [wiki:FormFilling#Usingatemplatefilter recipe].
cmlenz
parents:
230
diff
changeset
|
168 yield kind, data, pos |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
169 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
170 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
171 class HTMLSanitizer(object): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
172 """A filter that removes potentially dangerous HTML tags and attributes |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
173 from the stream. |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
174 """ |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
175 |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
176 SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
177 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
178 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
179 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
180 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
181 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
182 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
183 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
184 'ul', 'var']) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
185 |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
186 SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey', |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
187 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
188 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
189 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
190 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
191 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
192 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
193 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
194 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
195 'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target', |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
196 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
197 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
198 SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
199 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
200 URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
201 'src']) |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
202 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
203 def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
204 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
205 """Create the sanitizer. |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
206 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
207 The exact set of allowed elements and attributes can be configured. |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
208 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
209 @param safe_tags: a set of tag names that are considered safe |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
210 @param safe_attrs: a set of attribute names that are considered safe |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
211 @param safe_schemes: a set of URI schemes that are considered safe |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
212 @param uri_attrs: a set of names of attributes that contain URIs |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
213 """ |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
214 self.safe_tags = safe_tags |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
215 self.safe_attrs = safe_attrs |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
216 self.uri_attrs = uri_attrs |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
217 self.safe_schemes = safe_schemes |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
218 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
219 def __call__(self, stream, ctxt=None): |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
220 """Apply the filter to the given stream. |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
221 |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
222 @param stream: the markup event stream to filter |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
223 @param ctxt: the template context (unused) |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
224 """ |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
225 waiting_for = None |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
226 |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
227 def _get_scheme(href): |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
228 if ':' not in href: |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
229 return None |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
230 chars = [char for char in href.split(':', 1)[0] if char.isalnum()] |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
231 return ''.join(chars).lower() |
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
232 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
233 for kind, data, pos in stream: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
234 if kind is START: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
235 if waiting_for: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
236 continue |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
237 tag, attrib = data |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
238 if tag not in self.safe_tags: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
239 waiting_for = tag |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
240 continue |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
241 |
182
2f30ce3fb85e
Renamed `Attributes` to `Attrs` to reduce the verbosity.
cmlenz
parents:
145
diff
changeset
|
242 new_attrib = Attrs() |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
243 for attr, value in attrib: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
244 value = stripentities(value) |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
245 if attr not in self.safe_attrs: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
246 continue |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
247 elif attr in self.uri_attrs: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
248 # Don't allow URI schemes such as "javascript:" |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
249 if _get_scheme(value) not in self.safe_schemes: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
250 continue |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
251 elif attr == 'style': |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
252 # Remove dangerous CSS declarations from inline styles |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
253 decls = [] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
254 for decl in filter(None, value.split(';')): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
255 is_evil = False |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
256 if 'expression' in decl: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
257 is_evil = True |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
258 for m in re.finditer(r'url\s*\(([^)]+)', decl): |
277
7e30bfa966ab
The `HTMLSanitizer` now lets you override the default set of tag and attribute names that are considered safe.
cmlenz
parents:
275
diff
changeset
|
259 if _get_scheme(m.group(1)) not in self.safe_schemes: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
260 is_evil = True |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
261 break |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
262 if not is_evil: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
263 decls.append(decl.strip()) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
264 if not decls: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
265 continue |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
266 value = '; '.join(decls) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
267 new_attrib.append((attr, value)) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
268 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
269 yield kind, (tag, new_attrib), pos |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
270 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
271 elif kind is END: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
272 tag = data |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
273 if waiting_for: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
274 if waiting_for == tag: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
275 waiting_for = None |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
276 else: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
277 yield kind, data, pos |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
278 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
279 else: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
280 if not waiting_for: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
281 yield kind, data, pos |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
113
diff
changeset
|
282 |
1 | 283 |
284 class IncludeFilter(object): | |
285 """Template filter providing (very) basic XInclude support | |
286 (see http://www.w3.org/TR/xinclude/) in templates. | |
287 """ | |
288 | |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
17
diff
changeset
|
289 NAMESPACE = Namespace('http://www.w3.org/2001/XInclude') |
1 | 290 |
17
74cc70129d04
Refactoring to address #6: all match templates are now processed by a single filter, which means that match templates added by included templates are properly applied. A side effect of this refactoring is that `Context` objects may not be reused across multiple template processing runs.
cmlenz
parents:
15
diff
changeset
|
291 def __init__(self, loader): |
1 | 292 """Initialize the filter. |
293 | |
294 @param loader: the `TemplateLoader` to use for resolving references to | |
295 external template files | |
296 """ | |
297 self.loader = loader | |
298 | |
142 | 299 def __call__(self, stream, ctxt=None): |
1 | 300 """Filter the stream, processing any XInclude directives it may |
301 contain. | |
302 | |
142 | 303 @param stream: the markup event stream to filter |
1 | 304 @param ctxt: the template context |
305 """ | |
230 | 306 from genshi.template import TemplateError, TemplateNotFound |
1 | 307 |
142 | 308 ns_prefixes = [] |
1 | 309 in_fallback = False |
310 include_href, fallback_stream = None, None | |
69 | 311 namespace = self.NAMESPACE |
1 | 312 |
313 for kind, data, pos in stream: | |
314 | |
69 | 315 if kind is START and not in_fallback and data[0] in namespace: |
1 | 316 tag, attrib = data |
317 if tag.localname == 'include': | |
318 include_href = attrib.get('href') | |
319 elif tag.localname == 'fallback': | |
320 in_fallback = True | |
321 fallback_stream = [] | |
322 | |
69 | 323 elif kind is END and data in namespace: |
1 | 324 if data.localname == 'include': |
325 try: | |
326 if not include_href: | |
327 raise TemplateError('Include misses required ' | |
328 'attribute "href"') | |
21
b4d17897d053
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
18
diff
changeset
|
329 template = self.loader.load(include_href, |
b4d17897d053
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
18
diff
changeset
|
330 relative_to=pos[0]) |
17
74cc70129d04
Refactoring to address #6: all match templates are now processed by a single filter, which means that match templates added by included templates are properly applied. A side effect of this refactoring is that `Context` objects may not be reused across multiple template processing runs.
cmlenz
parents:
15
diff
changeset
|
331 for event in template.generate(ctxt): |
74cc70129d04
Refactoring to address #6: all match templates are now processed by a single filter, which means that match templates added by included templates are properly applied. A side effect of this refactoring is that `Context` objects may not be reused across multiple template processing runs.
cmlenz
parents:
15
diff
changeset
|
332 yield event |
13
f9001cd6785b
Match directives should now also be applied when included indirectly.
cmlenz
parents:
12
diff
changeset
|
333 |
1 | 334 except TemplateNotFound: |
335 if fallback_stream is None: | |
336 raise | |
337 for event in fallback_stream: | |
338 yield event | |
339 | |
340 include_href = None | |
341 fallback_stream = None | |
17
74cc70129d04
Refactoring to address #6: all match templates are now processed by a single filter, which means that match templates added by included templates are properly applied. A side effect of this refactoring is that `Context` objects may not be reused across multiple template processing runs.
cmlenz
parents:
15
diff
changeset
|
342 |
1 | 343 elif data.localname == 'fallback': |
344 in_fallback = False | |
345 | |
346 elif in_fallback: | |
347 fallback_stream.append((kind, data, pos)) | |
348 | |
69 | 349 elif kind is START_NS and data[1] == namespace: |
12
97423376736e
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
350 ns_prefixes.append(data[0]) |
97423376736e
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
351 |
69 | 352 elif kind is END_NS and data in ns_prefixes: |
12
97423376736e
Make the XInclude filter track namespace context, to enable it to omit `END_NS` events for the XInclude namespace.
cmlenz
parents:
10
diff
changeset
|
353 ns_prefixes.pop() |
1 | 354 |
355 else: | |
356 yield kind, data, pos |