Mercurial > genshi > genshi-test
annotate genshi/filters/html.py @ 820:1837f39efd6f experimental-inline
Sync (old) experimental inline branch with trunk@1027.
author | cmlenz |
---|---|
date | Wed, 11 Mar 2009 17:51:06 +0000 |
parents | 0742f421caba |
children | 09cc3627654c |
rev | line source |
---|---|
500 | 1 # -*- coding: utf-8 -*- |
2 # | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
3 # Copyright (C) 2006-2008 Edgewall Software |
500 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://genshi.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://genshi.edgewall.org/log/. | |
13 | |
14 """Implementation of a number of stream filters.""" | |
15 | |
16 import re | |
17 | |
18 from genshi.core import Attrs, QName, stripentities | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
19 from genshi.core import END, START, TEXT, COMMENT |
500 | 20 |
21 __all__ = ['HTMLFormFiller', 'HTMLSanitizer'] | |
22 __docformat__ = 'restructuredtext en' | |
23 | |
24 | |
25 class HTMLFormFiller(object): | |
26 """A stream filter that can populate HTML forms from a dictionary of values. | |
27 | |
28 >>> from genshi.input import HTML | |
29 >>> html = HTML('''<form> | |
30 ... <p><input type="text" name="foo" /></p> | |
31 ... </form>''') | |
32 >>> filler = HTMLFormFiller(data={'foo': 'bar'}) | |
33 >>> print html | filler | |
34 <form> | |
35 <p><input type="text" name="foo" value="bar"/></p> | |
36 </form> | |
37 """ | |
38 # TODO: only select the first radio button, and the first select option | |
39 # (if not in a multiple-select) | |
40 # TODO: only apply to elements in the XHTML namespace (or no namespace)? | |
41 | |
42 def __init__(self, name=None, id=None, data=None): | |
43 """Create the filter. | |
44 | |
45 :param name: The name of the form that should be populated. If this | |
46 parameter is given, only forms where the ``name`` attribute | |
47 value matches the parameter are processed. | |
48 :param id: The ID of the form that should be populated. If this | |
49 parameter is given, only forms where the ``id`` attribute | |
50 value matches the parameter are processed. | |
51 :param data: The dictionary of form values, where the keys are the names | |
52 of the form fields, and the values are the values to fill | |
53 in. | |
54 """ | |
55 self.name = name | |
56 self.id = id | |
57 if data is None: | |
58 data = {} | |
59 self.data = data | |
60 | |
61 def __call__(self, stream): | |
62 """Apply the filter to the given stream. | |
63 | |
64 :param stream: the markup event stream to filter | |
65 """ | |
66 in_form = in_select = in_option = in_textarea = False | |
67 select_value = option_value = textarea_value = None | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
68 option_start = None |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
69 option_text = [] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
70 no_option_value = False |
500 | 71 |
72 for kind, data, pos in stream: | |
73 | |
74 if kind is START: | |
75 tag, attrs = data | |
76 tagname = tag.localname | |
77 | |
78 if tagname == 'form' and ( | |
79 self.name and attrs.get('name') == self.name or | |
80 self.id and attrs.get('id') == self.id or | |
81 not (self.id or self.name)): | |
82 in_form = True | |
83 | |
84 elif in_form: | |
85 if tagname == 'input': | |
86 type = attrs.get('type') | |
87 if type in ('checkbox', 'radio'): | |
88 name = attrs.get('name') | |
89 if name and name in self.data: | |
90 value = self.data[name] | |
91 declval = attrs.get('value') | |
92 checked = False | |
93 if isinstance(value, (list, tuple)): | |
94 if declval: | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
95 checked = declval in [unicode(v) for v |
500 | 96 in value] |
97 else: | |
98 checked = bool(filter(None, value)) | |
99 else: | |
100 if declval: | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
101 checked = declval == unicode(value) |
500 | 102 elif type == 'checkbox': |
103 checked = bool(value) | |
104 if checked: | |
105 attrs |= [(QName('checked'), 'checked')] | |
106 elif 'checked' in attrs: | |
107 attrs -= 'checked' | |
108 elif type in (None, 'hidden', 'text'): | |
109 name = attrs.get('name') | |
110 if name and name in self.data: | |
111 value = self.data[name] | |
112 if isinstance(value, (list, tuple)): | |
113 value = value[0] | |
114 if value is not None: | |
115 attrs |= [(QName('value'), unicode(value))] | |
116 elif tagname == 'select': | |
117 name = attrs.get('name') | |
118 if name in self.data: | |
119 select_value = self.data[name] | |
120 in_select = True | |
121 elif tagname == 'textarea': | |
122 name = attrs.get('name') | |
123 if name in self.data: | |
124 textarea_value = self.data.get(name) | |
125 if isinstance(textarea_value, (list, tuple)): | |
126 textarea_value = textarea_value[0] | |
127 in_textarea = True | |
128 elif in_select and tagname == 'option': | |
129 option_start = kind, data, pos | |
130 option_value = attrs.get('value') | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
131 if option_value is None: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
132 no_option_value = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
133 option_value = '' |
500 | 134 in_option = True |
135 continue | |
136 yield kind, (tag, attrs), pos | |
137 | |
138 elif in_form and kind is TEXT: | |
139 if in_select and in_option: | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
140 if no_option_value: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
141 option_value += data |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
142 option_text.append((kind, data, pos)) |
500 | 143 continue |
144 elif in_textarea: | |
145 continue | |
146 yield kind, data, pos | |
147 | |
148 elif in_form and kind is END: | |
149 tagname = data.localname | |
150 if tagname == 'form': | |
151 in_form = False | |
152 elif tagname == 'select': | |
153 in_select = False | |
154 select_value = None | |
155 elif in_select and tagname == 'option': | |
156 if isinstance(select_value, (tuple, list)): | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
157 selected = option_value in [unicode(v) for v |
500 | 158 in select_value] |
159 else: | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
160 selected = option_value == unicode(select_value) |
500 | 161 okind, (tag, attrs), opos = option_start |
162 if selected: | |
163 attrs |= [(QName('selected'), 'selected')] | |
164 elif 'selected' in attrs: | |
165 attrs -= 'selected' | |
166 yield okind, (tag, attrs), opos | |
167 if option_text: | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
168 for event in option_text: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
169 yield event |
500 | 170 in_option = False |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
171 no_option_value = False |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
172 option_start = option_value = None |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
173 option_text = [] |
500 | 174 elif tagname == 'textarea': |
175 if textarea_value: | |
176 yield TEXT, unicode(textarea_value), pos | |
177 in_textarea = False | |
178 yield kind, data, pos | |
179 | |
180 else: | |
181 yield kind, data, pos | |
182 | |
183 | |
184 class HTMLSanitizer(object): | |
185 """A filter that removes potentially dangerous HTML tags and attributes | |
186 from the stream. | |
187 | |
188 >>> from genshi import HTML | |
189 >>> html = HTML('<div><script>alert(document.cookie)</script></div>') | |
190 >>> print html | HTMLSanitizer() | |
191 <div/> | |
192 | |
193 The default set of safe tags and attributes can be modified when the filter | |
194 is instantiated. For example, to allow inline ``style`` attributes, the | |
195 following instantation would work: | |
196 | |
197 >>> html = HTML('<div style="background: #000"></div>') | |
198 >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) | |
199 >>> print html | sanitizer | |
200 <div style="background: #000"/> | |
201 | |
202 Note that even in this case, the filter *does* attempt to remove dangerous | |
203 constructs from style attributes: | |
204 | |
205 >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>') | |
206 >>> print html | sanitizer | |
207 <div style="color: #000"/> | |
208 | |
209 This handles HTML entities, unicode escapes in CSS and Javascript text, as | |
210 well as a lot of other things. However, the style tag is still excluded by | |
211 default because it is very hard for such sanitizing to be completely safe, | |
212 especially considering how much error recovery current web browsers perform. | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
213 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
214 :warn: Note that this special processing of CSS is currently only applied to |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
215 style attributes, **not** style elements. |
500 | 216 """ |
217 | |
218 SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', | |
219 'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite', | |
220 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', | |
221 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | |
222 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map', | |
223 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', | |
224 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', | |
225 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', | |
226 'ul', 'var']) | |
227 | |
228 SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey', | |
229 'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding', | |
230 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class', | |
231 'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime', | |
232 'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height', | |
233 'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang', | |
234 'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name', | |
235 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', | |
236 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', | |
237 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', | |
238 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) | |
239 | |
240 SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) | |
241 | |
242 URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc', | |
243 'src']) | |
244 | |
245 def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, | |
246 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): | |
247 """Create the sanitizer. | |
248 | |
249 The exact set of allowed elements and attributes can be configured. | |
250 | |
251 :param safe_tags: a set of tag names that are considered safe | |
252 :param safe_attrs: a set of attribute names that are considered safe | |
253 :param safe_schemes: a set of URI schemes that are considered safe | |
254 :param uri_attrs: a set of names of attributes that contain URIs | |
255 """ | |
256 self.safe_tags = safe_tags | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
257 "The set of tag names that are considered safe." |
500 | 258 self.safe_attrs = safe_attrs |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
259 "The set of attribute names that are considered safe." |
500 | 260 self.uri_attrs = uri_attrs |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
261 "The set of names of attributes that may contain URIs." |
500 | 262 self.safe_schemes = safe_schemes |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
263 "The set of URI schemes that are considered safe." |
500 | 264 |
265 def __call__(self, stream): | |
266 """Apply the filter to the given stream. | |
267 | |
268 :param stream: the markup event stream to filter | |
269 """ | |
270 waiting_for = None | |
271 | |
272 for kind, data, pos in stream: | |
273 if kind is START: | |
274 if waiting_for: | |
275 continue | |
276 tag, attrs = data | |
277 if tag not in self.safe_tags: | |
278 waiting_for = tag | |
279 continue | |
280 | |
281 new_attrs = [] | |
282 for attr, value in attrs: | |
283 value = stripentities(value) | |
284 if attr not in self.safe_attrs: | |
285 continue | |
286 elif attr in self.uri_attrs: | |
287 # Don't allow URI schemes such as "javascript:" | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
288 if not self.is_safe_uri(value): |
500 | 289 continue |
290 elif attr == 'style': | |
291 # Remove dangerous CSS declarations from inline styles | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
292 decls = self.sanitize_css(value) |
500 | 293 if not decls: |
294 continue | |
295 value = '; '.join(decls) | |
296 new_attrs.append((attr, value)) | |
297 | |
298 yield kind, (tag, Attrs(new_attrs)), pos | |
299 | |
300 elif kind is END: | |
301 tag = data | |
302 if waiting_for: | |
303 if waiting_for == tag: | |
304 waiting_for = None | |
305 else: | |
306 yield kind, data, pos | |
307 | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
308 elif kind is not COMMENT: |
500 | 309 if not waiting_for: |
310 yield kind, data, pos | |
311 | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
312 def is_safe_uri(self, uri): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
313 """Determine whether the given URI is to be considered safe for |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
314 inclusion in the output. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
315 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
316 The default implementation checks whether the scheme of the URI is in |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
317 the set of allowed URIs (`safe_schemes`). |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
318 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
319 >>> sanitizer = HTMLSanitizer() |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
320 >>> sanitizer.is_safe_uri('http://example.org/') |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
321 True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
322 >>> sanitizer.is_safe_uri('javascript:alert(document.cookie)') |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
323 False |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
324 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
325 :param uri: the URI to check |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
326 :return: `True` if the URI can be considered safe, `False` otherwise |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
327 :rtype: `bool` |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
328 :since: version 0.4.3 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
329 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
330 if ':' not in uri: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
331 return True # This is a relative URI |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
332 chars = [char for char in uri.split(':', 1)[0] if char.isalnum()] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
333 return ''.join(chars).lower() in self.safe_schemes |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
334 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
335 def sanitize_css(self, text): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
336 """Remove potentially dangerous property declarations from CSS code. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
337 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
338 In particular, properties using the CSS ``url()`` function with a scheme |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
339 that is not considered safe are removed: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
340 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
341 >>> sanitizer = HTMLSanitizer() |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
342 >>> sanitizer.sanitize_css(u''' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
343 ... background: url(javascript:alert("foo")); |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
344 ... color: #000; |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
345 ... ''') |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
346 [u'color: #000'] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
347 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
348 Also, the proprietary Internet Explorer function ``expression()`` is |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
349 always stripped: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
350 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
351 >>> sanitizer.sanitize_css(u''' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
352 ... background: #fff; |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
353 ... color: #000; |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
354 ... width: e/**/xpression(alert("foo")); |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
355 ... ''') |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
356 [u'background: #fff', u'color: #000'] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
357 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
358 :param text: the CSS text; this is expected to be `unicode` and to not |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
359 contain any character or numeric references |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
360 :return: a list of declarations that are considered safe |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
361 :rtype: `list` |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
362 :since: version 0.4.3 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
363 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
364 decls = [] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
365 text = self._strip_css_comments(self._replace_unicode_escapes(text)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
366 for decl in filter(None, text.split(';')): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
367 decl = decl.strip() |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
368 if not decl: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
369 continue |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
370 is_evil = False |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
371 if 'expression' in decl: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
372 is_evil = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
373 for match in re.finditer(r'url\s*\(([^)]+)', decl): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
374 if not self.is_safe_uri(match.group(1)): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
375 is_evil = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
376 break |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
377 if not is_evil: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
378 decls.append(decl.strip()) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
379 return decls |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
380 |
500 | 381 _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub |
382 _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub | |
383 | |
384 def _replace_unicode_escapes(self, text): | |
385 def _repl(match): | |
386 return unichr(int(match.group(1), 16)) | |
387 return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
388 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
389 _CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
390 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
391 def _strip_css_comments(self, text): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
392 return self._CSS_COMMENTS('', text) |