annotate genshi/filters/html.py @ 820:1837f39efd6f experimental-inline

Sync (old) experimental inline branch with trunk@1027.
author cmlenz
date Wed, 11 Mar 2009 17:51:06 +0000
parents 0742f421caba
children 09cc3627654c
rev   line source
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
2 #
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
3 # Copyright (C) 2006-2008 Edgewall Software
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
4 # All rights reserved.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
5 #
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
8 # are also available at http://genshi.edgewall.org/wiki/License.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
9 #
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
12 # history and logs, available at http://genshi.edgewall.org/log/.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
13
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
14 """Implementation of a number of stream filters."""
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
15
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
16 import re
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
17
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
18 from genshi.core import Attrs, QName, stripentities
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
19 from genshi.core import END, START, TEXT, COMMENT
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
20
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
21 __all__ = ['HTMLFormFiller', 'HTMLSanitizer']
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
22 __docformat__ = 'restructuredtext en'
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
23
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
24
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
25 class HTMLFormFiller(object):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
26 """A stream filter that can populate HTML forms from a dictionary of values.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
27
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
28 >>> from genshi.input import HTML
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
29 >>> html = HTML('''<form>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
30 ... <p><input type="text" name="foo" /></p>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
31 ... </form>''')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
32 >>> filler = HTMLFormFiller(data={'foo': 'bar'})
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
33 >>> print html | filler
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
34 <form>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
35 <p><input type="text" name="foo" value="bar"/></p>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
36 </form>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
37 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
38 # TODO: only select the first radio button, and the first select option
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
39 # (if not in a multiple-select)
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
40 # TODO: only apply to elements in the XHTML namespace (or no namespace)?
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
41
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
42 def __init__(self, name=None, id=None, data=None):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
43 """Create the filter.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
44
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
45 :param name: The name of the form that should be populated. If this
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
46 parameter is given, only forms where the ``name`` attribute
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
47 value matches the parameter are processed.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
48 :param id: The ID of the form that should be populated. If this
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
49 parameter is given, only forms where the ``id`` attribute
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
50 value matches the parameter are processed.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
51 :param data: The dictionary of form values, where the keys are the names
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
52 of the form fields, and the values are the values to fill
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
53 in.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
54 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
55 self.name = name
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
56 self.id = id
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
57 if data is None:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
58 data = {}
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
59 self.data = data
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
60
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
61 def __call__(self, stream):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
62 """Apply the filter to the given stream.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
63
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
64 :param stream: the markup event stream to filter
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
65 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
66 in_form = in_select = in_option = in_textarea = False
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
67 select_value = option_value = textarea_value = None
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
68 option_start = None
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
69 option_text = []
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
70 no_option_value = False
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
71
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
72 for kind, data, pos in stream:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
73
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
74 if kind is START:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
75 tag, attrs = data
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
76 tagname = tag.localname
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
77
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
78 if tagname == 'form' and (
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
79 self.name and attrs.get('name') == self.name or
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
80 self.id and attrs.get('id') == self.id or
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
81 not (self.id or self.name)):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
82 in_form = True
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
83
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
84 elif in_form:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
85 if tagname == 'input':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
86 type = attrs.get('type')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
87 if type in ('checkbox', 'radio'):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
88 name = attrs.get('name')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
89 if name and name in self.data:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
90 value = self.data[name]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
91 declval = attrs.get('value')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
92 checked = False
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
93 if isinstance(value, (list, tuple)):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
94 if declval:
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
95 checked = declval in [unicode(v) for v
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
96 in value]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
97 else:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
98 checked = bool(filter(None, value))
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
99 else:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
100 if declval:
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
101 checked = declval == unicode(value)
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
102 elif type == 'checkbox':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
103 checked = bool(value)
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
104 if checked:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
105 attrs |= [(QName('checked'), 'checked')]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
106 elif 'checked' in attrs:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
107 attrs -= 'checked'
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
108 elif type in (None, 'hidden', 'text'):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
109 name = attrs.get('name')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
110 if name and name in self.data:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
111 value = self.data[name]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
112 if isinstance(value, (list, tuple)):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
113 value = value[0]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
114 if value is not None:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
115 attrs |= [(QName('value'), unicode(value))]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
116 elif tagname == 'select':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
117 name = attrs.get('name')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
118 if name in self.data:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
119 select_value = self.data[name]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
120 in_select = True
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
121 elif tagname == 'textarea':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
122 name = attrs.get('name')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
123 if name in self.data:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
124 textarea_value = self.data.get(name)
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
125 if isinstance(textarea_value, (list, tuple)):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
126 textarea_value = textarea_value[0]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
127 in_textarea = True
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
128 elif in_select and tagname == 'option':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
129 option_start = kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
130 option_value = attrs.get('value')
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
131 if option_value is None:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
132 no_option_value = True
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
133 option_value = ''
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
134 in_option = True
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
135 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
136 yield kind, (tag, attrs), pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
137
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
138 elif in_form and kind is TEXT:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
139 if in_select and in_option:
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
140 if no_option_value:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
141 option_value += data
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
142 option_text.append((kind, data, pos))
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
143 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
144 elif in_textarea:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
145 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
146 yield kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
147
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
148 elif in_form and kind is END:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
149 tagname = data.localname
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
150 if tagname == 'form':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
151 in_form = False
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
152 elif tagname == 'select':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
153 in_select = False
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
154 select_value = None
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
155 elif in_select and tagname == 'option':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
156 if isinstance(select_value, (tuple, list)):
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
157 selected = option_value in [unicode(v) for v
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
158 in select_value]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
159 else:
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
160 selected = option_value == unicode(select_value)
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
161 okind, (tag, attrs), opos = option_start
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
162 if selected:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
163 attrs |= [(QName('selected'), 'selected')]
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
164 elif 'selected' in attrs:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
165 attrs -= 'selected'
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
166 yield okind, (tag, attrs), opos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
167 if option_text:
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
168 for event in option_text:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
169 yield event
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
170 in_option = False
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
171 no_option_value = False
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
172 option_start = option_value = None
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
173 option_text = []
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
174 elif tagname == 'textarea':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
175 if textarea_value:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
176 yield TEXT, unicode(textarea_value), pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
177 in_textarea = False
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
178 yield kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
179
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
180 else:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
181 yield kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
182
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
183
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
184 class HTMLSanitizer(object):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
185 """A filter that removes potentially dangerous HTML tags and attributes
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
186 from the stream.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
187
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
188 >>> from genshi import HTML
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
189 >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
190 >>> print html | HTMLSanitizer()
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
191 <div/>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
192
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
193 The default set of safe tags and attributes can be modified when the filter
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
194 is instantiated. For example, to allow inline ``style`` attributes, the
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
195 following instantation would work:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
196
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
197 >>> html = HTML('<div style="background: #000"></div>')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
198 >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
199 >>> print html | sanitizer
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
200 <div style="background: #000"/>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
201
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
202 Note that even in this case, the filter *does* attempt to remove dangerous
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
203 constructs from style attributes:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
204
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
205 >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
206 >>> print html | sanitizer
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
207 <div style="color: #000"/>
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
208
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
209 This handles HTML entities, unicode escapes in CSS and Javascript text, as
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
210 well as a lot of other things. However, the style tag is still excluded by
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
211 default because it is very hard for such sanitizing to be completely safe,
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
212 especially considering how much error recovery current web browsers perform.
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
213
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
214 :warn: Note that this special processing of CSS is currently only applied to
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
215 style attributes, **not** style elements.
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
216 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
217
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
218 SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
219 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
220 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
221 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
222 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
223 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
224 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
225 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
226 'ul', 'var'])
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
227
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
228 SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
229 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
230 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
231 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
232 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
233 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
234 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
235 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
236 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
237 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
238 'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
239
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
240 SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
241
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
242 URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
243 'src'])
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
244
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
245 def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
246 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
247 """Create the sanitizer.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
248
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
249 The exact set of allowed elements and attributes can be configured.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
250
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
251 :param safe_tags: a set of tag names that are considered safe
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
252 :param safe_attrs: a set of attribute names that are considered safe
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
253 :param safe_schemes: a set of URI schemes that are considered safe
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
254 :param uri_attrs: a set of names of attributes that contain URIs
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
255 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
256 self.safe_tags = safe_tags
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
257 "The set of tag names that are considered safe."
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
258 self.safe_attrs = safe_attrs
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
259 "The set of attribute names that are considered safe."
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
260 self.uri_attrs = uri_attrs
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
261 "The set of names of attributes that may contain URIs."
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
262 self.safe_schemes = safe_schemes
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
263 "The set of URI schemes that are considered safe."
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
264
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
265 def __call__(self, stream):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
266 """Apply the filter to the given stream.
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
267
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
268 :param stream: the markup event stream to filter
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
269 """
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
270 waiting_for = None
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
271
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
272 for kind, data, pos in stream:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
273 if kind is START:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
274 if waiting_for:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
275 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
276 tag, attrs = data
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
277 if tag not in self.safe_tags:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
278 waiting_for = tag
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
279 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
280
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
281 new_attrs = []
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
282 for attr, value in attrs:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
283 value = stripentities(value)
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
284 if attr not in self.safe_attrs:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
285 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
286 elif attr in self.uri_attrs:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
287 # Don't allow URI schemes such as "javascript:"
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
288 if not self.is_safe_uri(value):
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
289 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
290 elif attr == 'style':
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
291 # Remove dangerous CSS declarations from inline styles
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
292 decls = self.sanitize_css(value)
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
293 if not decls:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
294 continue
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
295 value = '; '.join(decls)
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
296 new_attrs.append((attr, value))
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
297
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
298 yield kind, (tag, Attrs(new_attrs)), pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
299
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
300 elif kind is END:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
301 tag = data
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
302 if waiting_for:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
303 if waiting_for == tag:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
304 waiting_for = None
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
305 else:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
306 yield kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
307
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
308 elif kind is not COMMENT:
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
309 if not waiting_for:
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
310 yield kind, data, pos
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
311
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
312 def is_safe_uri(self, uri):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
313 """Determine whether the given URI is to be considered safe for
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
314 inclusion in the output.
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
315
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
316 The default implementation checks whether the scheme of the URI is in
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
317 the set of allowed URIs (`safe_schemes`).
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
318
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
319 >>> sanitizer = HTMLSanitizer()
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
320 >>> sanitizer.is_safe_uri('http://example.org/')
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
321 True
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
322 >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
323 False
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
324
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
325 :param uri: the URI to check
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
326 :return: `True` if the URI can be considered safe, `False` otherwise
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
327 :rtype: `bool`
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
328 :since: version 0.4.3
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
329 """
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
330 if ':' not in uri:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
331 return True # This is a relative URI
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
332 chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
333 return ''.join(chars).lower() in self.safe_schemes
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
334
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
335 def sanitize_css(self, text):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
336 """Remove potentially dangerous property declarations from CSS code.
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
337
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
338 In particular, properties using the CSS ``url()`` function with a scheme
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
339 that is not considered safe are removed:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
340
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
341 >>> sanitizer = HTMLSanitizer()
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
342 >>> sanitizer.sanitize_css(u'''
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
343 ... background: url(javascript:alert("foo"));
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
344 ... color: #000;
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
345 ... ''')
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
346 [u'color: #000']
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
347
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
348 Also, the proprietary Internet Explorer function ``expression()`` is
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
349 always stripped:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
350
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
351 >>> sanitizer.sanitize_css(u'''
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
352 ... background: #fff;
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
353 ... color: #000;
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
354 ... width: e/**/xpression(alert("foo"));
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
355 ... ''')
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
356 [u'background: #fff', u'color: #000']
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
357
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
358 :param text: the CSS text; this is expected to be `unicode` and to not
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
359 contain any character or numeric references
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
360 :return: a list of declarations that are considered safe
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
361 :rtype: `list`
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
362 :since: version 0.4.3
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
363 """
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
364 decls = []
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
365 text = self._strip_css_comments(self._replace_unicode_escapes(text))
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
366 for decl in filter(None, text.split(';')):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
367 decl = decl.strip()
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
368 if not decl:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
369 continue
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
370 is_evil = False
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
371 if 'expression' in decl:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
372 is_evil = True
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
373 for match in re.finditer(r'url\s*\(([^)]+)', decl):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
374 if not self.is_safe_uri(match.group(1)):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
375 is_evil = True
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
376 break
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
377 if not is_evil:
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
378 decls.append(decl.strip())
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
379 return decls
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
380
500
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
381 _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
382 _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
383
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
384 def _replace_unicode_escapes(self, text):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
385 def _repl(match):
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
386 return unichr(int(match.group(1), 16))
0742f421caba Merged revisions 487-603 via svnmerge from
cmlenz
parents:
diff changeset
387 return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
820
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
388
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
389 _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
390
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
391 def _strip_css_comments(self, text):
1837f39efd6f Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents: 500
diff changeset
392 return self._CSS_COMMENTS('', text)
Copyright (C) 2012-2017 Edgewall Software