comparison genshi/filters/tests/test_html.py @ 916:872726bac135 experimental-py3k

add support for python 3 to genshi.filters: * minor changes to track encoding=None API change in core genshi modules. * renamed genshi/filters/tests/html.py to test_html.py to avoid clashes with Python 3 top-level html module when running tests subset. * did not rename genshi/filters/html.py. * i18n filters: * ugettext and friends are gone in Python 3 (and only gettext and friends exist and they now handle unicode) * Some \ line continuations inside doctests confused 2to3 and so were removed them. * Testing picked up a problem (already present in trunk) where Translator.__call__ could end up defining gettext as an endlessly recursive function. Noted with a TODO.
author hodgestar
date Sun, 24 Oct 2010 22:21:28 +0000
parents
children
comparison
equal deleted inserted replaced
915:9fafb35032a1 916:872726bac135
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006-2009 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://genshi.edgewall.org/wiki/License.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://genshi.edgewall.org/log/.
13
14 import doctest
15 import unittest
16
17 from genshi.input import HTML, ParseError
18 from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
19 from genshi.template import MarkupTemplate
20
21 class HTMLFormFillerTestCase(unittest.TestCase):
22
23 def test_fill_input_text_no_value(self):
24 html = HTML(u"""<form><p>
25 <input type="text" name="foo" />
26 </p></form>""") | HTMLFormFiller()
27 self.assertEquals("""<form><p>
28 <input type="text" name="foo"/>
29 </p></form>""", html.render())
30
31 def test_fill_input_text_single_value(self):
32 html = HTML(u"""<form><p>
33 <input type="text" name="foo" />
34 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
35 self.assertEquals("""<form><p>
36 <input type="text" name="foo" value="bar"/>
37 </p></form>""", html.render())
38
39 def test_fill_input_text_multi_value(self):
40 html = HTML(u"""<form><p>
41 <input type="text" name="foo" />
42 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
43 self.assertEquals("""<form><p>
44 <input type="text" name="foo" value="bar"/>
45 </p></form>""", html.render())
46
47 def test_fill_input_hidden_no_value(self):
48 html = HTML(u"""<form><p>
49 <input type="hidden" name="foo" />
50 </p></form>""") | HTMLFormFiller()
51 self.assertEquals("""<form><p>
52 <input type="hidden" name="foo"/>
53 </p></form>""", html.render())
54
55 def test_fill_input_hidden_single_value(self):
56 html = HTML(u"""<form><p>
57 <input type="hidden" name="foo" />
58 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
59 self.assertEquals("""<form><p>
60 <input type="hidden" name="foo" value="bar"/>
61 </p></form>""", html.render())
62
63 def test_fill_input_hidden_multi_value(self):
64 html = HTML(u"""<form><p>
65 <input type="hidden" name="foo" />
66 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
67 self.assertEquals("""<form><p>
68 <input type="hidden" name="foo" value="bar"/>
69 </p></form>""", html.render())
70
71 def test_fill_textarea_no_value(self):
72 html = HTML(u"""<form><p>
73 <textarea name="foo"></textarea>
74 </p></form>""") | HTMLFormFiller()
75 self.assertEquals("""<form><p>
76 <textarea name="foo"/>
77 </p></form>""", html.render())
78
79 def test_fill_textarea_single_value(self):
80 html = HTML(u"""<form><p>
81 <textarea name="foo"></textarea>
82 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
83 self.assertEquals("""<form><p>
84 <textarea name="foo">bar</textarea>
85 </p></form>""", html.render())
86
87 def test_fill_textarea_multi_value(self):
88 html = HTML(u"""<form><p>
89 <textarea name="foo"></textarea>
90 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
91 self.assertEquals("""<form><p>
92 <textarea name="foo">bar</textarea>
93 </p></form>""", html.render())
94
95 def test_fill_textarea_multiple(self):
96 # Ensure that the subsequent textarea doesn't get the data from the
97 # first
98 html = HTML(u"""<form><p>
99 <textarea name="foo"></textarea>
100 <textarea name="bar"></textarea>
101 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
102 self.assertEquals("""<form><p>
103 <textarea name="foo">Some text</textarea>
104 <textarea name="bar"/>
105 </p></form>""", html.render())
106
107 def test_fill_textarea_preserve_original(self):
108 html = HTML(u"""<form><p>
109 <textarea name="foo"></textarea>
110 <textarea name="bar">Original value</textarea>
111 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
112 self.assertEquals("""<form><p>
113 <textarea name="foo">Some text</textarea>
114 <textarea name="bar">Original value</textarea>
115 </p></form>""", html.render())
116
117 def test_fill_input_checkbox_single_value_auto_no_value(self):
118 html = HTML(u"""<form><p>
119 <input type="checkbox" name="foo" />
120 </p></form>""") | HTMLFormFiller()
121 self.assertEquals("""<form><p>
122 <input type="checkbox" name="foo"/>
123 </p></form>""", html.render())
124
125 def test_fill_input_checkbox_single_value_auto(self):
126 html = HTML(u"""<form><p>
127 <input type="checkbox" name="foo" />
128 </p></form>""")
129 self.assertEquals("""<form><p>
130 <input type="checkbox" name="foo"/>
131 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
132 self.assertEquals("""<form><p>
133 <input type="checkbox" name="foo" checked="checked"/>
134 </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
135
136 def test_fill_input_checkbox_single_value_defined(self):
137 html = HTML("""<form><p>
138 <input type="checkbox" name="foo" value="1" />
139 </p></form>""", encoding='ascii')
140 self.assertEquals("""<form><p>
141 <input type="checkbox" name="foo" value="1" checked="checked"/>
142 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
143 self.assertEquals("""<form><p>
144 <input type="checkbox" name="foo" value="1"/>
145 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
146
147 def test_fill_input_checkbox_multi_value_auto(self):
148 html = HTML("""<form><p>
149 <input type="checkbox" name="foo" />
150 </p></form>""", encoding='ascii')
151 self.assertEquals("""<form><p>
152 <input type="checkbox" name="foo"/>
153 </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
154 self.assertEquals("""<form><p>
155 <input type="checkbox" name="foo" checked="checked"/>
156 </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
157
158 def test_fill_input_checkbox_multi_value_defined(self):
159 html = HTML(u"""<form><p>
160 <input type="checkbox" name="foo" value="1" />
161 </p></form>""")
162 self.assertEquals("""<form><p>
163 <input type="checkbox" name="foo" value="1" checked="checked"/>
164 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
165 self.assertEquals("""<form><p>
166 <input type="checkbox" name="foo" value="1"/>
167 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
168
169 def test_fill_input_radio_no_value(self):
170 html = HTML(u"""<form><p>
171 <input type="radio" name="foo" />
172 </p></form>""") | HTMLFormFiller()
173 self.assertEquals("""<form><p>
174 <input type="radio" name="foo"/>
175 </p></form>""", html.render())
176
177 def test_fill_input_radio_single_value(self):
178 html = HTML(u"""<form><p>
179 <input type="radio" name="foo" value="1" />
180 </p></form>""")
181 self.assertEquals("""<form><p>
182 <input type="radio" name="foo" value="1" checked="checked"/>
183 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
184 self.assertEquals("""<form><p>
185 <input type="radio" name="foo" value="1"/>
186 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
187
188 def test_fill_input_radio_multi_value(self):
189 html = HTML(u"""<form><p>
190 <input type="radio" name="foo" value="1" />
191 </p></form>""")
192 self.assertEquals("""<form><p>
193 <input type="radio" name="foo" value="1" checked="checked"/>
194 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
195 self.assertEquals("""<form><p>
196 <input type="radio" name="foo" value="1"/>
197 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
198
199 def test_fill_input_radio_empty_string(self):
200 html = HTML(u"""<form><p>
201 <input type="radio" name="foo" value="" />
202 </p></form>""")
203 self.assertEquals("""<form><p>
204 <input type="radio" name="foo" value="" checked="checked"/>
205 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
206
207 def test_fill_input_radio_multi_empty_string(self):
208 html = HTML(u"""<form><p>
209 <input type="radio" name="foo" value="" />
210 </p></form>""")
211 self.assertEquals("""<form><p>
212 <input type="radio" name="foo" value="" checked="checked"/>
213 </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
214
215 def test_fill_select_no_value_auto(self):
216 html = HTML(u"""<form><p>
217 <select name="foo">
218 <option>1</option>
219 <option>2</option>
220 <option>3</option>
221 </select>
222 </p></form>""") | HTMLFormFiller()
223 self.assertEquals("""<form><p>
224 <select name="foo">
225 <option>1</option>
226 <option>2</option>
227 <option>3</option>
228 </select>
229 </p></form>""", html.render())
230
231 def test_fill_select_no_value_defined(self):
232 html = HTML(u"""<form><p>
233 <select name="foo">
234 <option value="1">1</option>
235 <option value="2">2</option>
236 <option value="3">3</option>
237 </select>
238 </p></form>""") | HTMLFormFiller()
239 self.assertEquals("""<form><p>
240 <select name="foo">
241 <option value="1">1</option>
242 <option value="2">2</option>
243 <option value="3">3</option>
244 </select>
245 </p></form>""", html.render())
246
247 def test_fill_select_single_value_auto(self):
248 html = HTML(u"""<form><p>
249 <select name="foo">
250 <option>1</option>
251 <option>2</option>
252 <option>3</option>
253 </select>
254 </p></form>""") | HTMLFormFiller(data={'foo': '1'})
255 self.assertEquals("""<form><p>
256 <select name="foo">
257 <option selected="selected">1</option>
258 <option>2</option>
259 <option>3</option>
260 </select>
261 </p></form>""", html.render())
262
263 def test_fill_select_single_value_defined(self):
264 html = HTML(u"""<form><p>
265 <select name="foo">
266 <option value="1">1</option>
267 <option value="2">2</option>
268 <option value="3">3</option>
269 </select>
270 </p></form>""") | HTMLFormFiller(data={'foo': '1'})
271 self.assertEquals("""<form><p>
272 <select name="foo">
273 <option value="1" selected="selected">1</option>
274 <option value="2">2</option>
275 <option value="3">3</option>
276 </select>
277 </p></form>""", html.render())
278
279 def test_fill_select_multi_value_auto(self):
280 html = HTML(u"""<form><p>
281 <select name="foo" multiple>
282 <option>1</option>
283 <option>2</option>
284 <option>3</option>
285 </select>
286 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
287 self.assertEquals("""<form><p>
288 <select name="foo" multiple="multiple">
289 <option selected="selected">1</option>
290 <option>2</option>
291 <option selected="selected">3</option>
292 </select>
293 </p></form>""", html.render())
294
295 def test_fill_select_multi_value_defined(self):
296 html = HTML(u"""<form><p>
297 <select name="foo" multiple>
298 <option value="1">1</option>
299 <option value="2">2</option>
300 <option value="3">3</option>
301 </select>
302 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
303 self.assertEquals("""<form><p>
304 <select name="foo" multiple="multiple">
305 <option value="1" selected="selected">1</option>
306 <option value="2">2</option>
307 <option value="3" selected="selected">3</option>
308 </select>
309 </p></form>""", html.render())
310
311 def test_fill_option_segmented_text(self):
312 html = MarkupTemplate(u"""<form>
313 <select name="foo">
314 <option value="1">foo $x</option>
315 </select>
316 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
317 self.assertEquals(u"""<form>
318 <select name="foo">
319 <option value="1" selected="selected">foo 1</option>
320 </select>
321 </form>""", html.render())
322
323 def test_fill_option_segmented_text_no_value(self):
324 html = MarkupTemplate("""<form>
325 <select name="foo">
326 <option>foo $x bar</option>
327 </select>
328 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
329 self.assertEquals("""<form>
330 <select name="foo">
331 <option selected="selected">foo 1 bar</option>
332 </select>
333 </form>""", html.render())
334
335 def test_fill_option_unicode_value(self):
336 html = HTML(u"""<form>
337 <select name="foo">
338 <option value="&ouml;">foo</option>
339 </select>
340 </form>""") | HTMLFormFiller(data={'foo': u'ö'})
341 self.assertEquals(u"""<form>
342 <select name="foo">
343 <option value="ö" selected="selected">foo</option>
344 </select>
345 </form>""", html.render(encoding=None))
346
347 def test_fill_input_password_disabled(self):
348 html = HTML(u"""<form><p>
349 <input type="password" name="pass" />
350 </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
351 self.assertEquals("""<form><p>
352 <input type="password" name="pass"/>
353 </p></form>""", html.render())
354
355 def test_fill_input_password_enabled(self):
356 html = HTML(u"""<form><p>
357 <input type="password" name="pass" />
358 </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
359 self.assertEquals("""<form><p>
360 <input type="password" name="pass" value="1234"/>
361 </p></form>""", html.render())
362
363
364 class HTMLSanitizerTestCase(unittest.TestCase):
365
366 def test_sanitize_unchanged(self):
367 html = HTML(u'<a href="#">fo<br />o</a>')
368 self.assertEquals('<a href="#">fo<br/>o</a>',
369 (html | HTMLSanitizer()).render())
370 html = HTML(u'<a href="#with:colon">foo</a>')
371 self.assertEquals('<a href="#with:colon">foo</a>',
372 (html | HTMLSanitizer()).render())
373
374 def test_sanitize_escape_text(self):
375 html = HTML(u'<a href="#">fo&amp;</a>')
376 self.assertEquals('<a href="#">fo&amp;</a>',
377 (html | HTMLSanitizer()).render())
378 html = HTML(u'<a href="#">&lt;foo&gt;</a>')
379 self.assertEquals('<a href="#">&lt;foo&gt;</a>',
380 (html | HTMLSanitizer()).render())
381
382 def test_sanitize_entityref_text(self):
383 html = HTML(u'<a href="#">fo&ouml;</a>')
384 self.assertEquals(u'<a href="#">foö</a>',
385 (html | HTMLSanitizer()).render(encoding=None))
386
387 def test_sanitize_escape_attr(self):
388 html = HTML(u'<div title="&lt;foo&gt;"></div>')
389 self.assertEquals('<div title="&lt;foo&gt;"/>',
390 (html | HTMLSanitizer()).render())
391
392 def test_sanitize_close_empty_tag(self):
393 html = HTML(u'<a href="#">fo<br>o</a>')
394 self.assertEquals('<a href="#">fo<br/>o</a>',
395 (html | HTMLSanitizer()).render())
396
397 def test_sanitize_invalid_entity(self):
398 html = HTML(u'&junk;')
399 self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
400
401 def test_sanitize_remove_script_elem(self):
402 html = HTML(u'<script>alert("Foo")</script>')
403 self.assertEquals('', (html | HTMLSanitizer()).render())
404 html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>')
405 self.assertEquals('', (html | HTMLSanitizer()).render())
406 self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>')
407 self.assertRaises(ParseError, HTML,
408 u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
409
410 def test_sanitize_remove_onclick_attr(self):
411 html = HTML(u'<div onclick=\'alert("foo")\' />')
412 self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
413
414 def test_sanitize_remove_input_password(self):
415 html = HTML(u'<form><input type="password" /></form>')
416 self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
417
418 def test_sanitize_remove_comments(self):
419 html = HTML(u'''<div><!-- conditional comment crap --></div>''')
420 self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
421
422 def test_sanitize_remove_style_scripts(self):
423 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
424 # Inline style with url() using javascript: scheme
425 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
426 self.assertEquals('<div/>', (html | sanitizer).render())
427 # Inline style with url() using javascript: scheme, using control char
428 html = HTML(u'<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
429 self.assertEquals('<div/>', (html | sanitizer).render())
430 # Inline style with url() using javascript: scheme, in quotes
431 html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
432 self.assertEquals('<div/>', (html | sanitizer).render())
433 # IE expressions in CSS not allowed
434 html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>')
435 self.assertEquals('<div/>', (html | sanitizer).render())
436 html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
437 self.assertEquals('<div/>', (html | sanitizer).render())
438 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));'
439 'color: #fff\'>')
440 self.assertEquals('<div style="color: #fff"/>',
441 (html | sanitizer).render())
442 # Inline style with url() using javascript: scheme, using unicode
443 # escapes
444 html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
445 self.assertEquals('<div/>', (html | sanitizer).render())
446 html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
447 self.assertEquals('<div/>', (html | sanitizer).render())
448 html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
449 self.assertEquals('<div/>', (html | sanitizer).render())
450 html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
451 self.assertEquals('<div/>', (html | sanitizer).render())
452 html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
453 self.assertEquals('<div/>', (html | sanitizer).render())
454
455 def test_sanitize_remove_style_phishing(self):
456 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
457 # The position property is not allowed
458 html = HTML(u'<div style="position:absolute;top:0"></div>')
459 self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
460 # Normal margins get passed through
461 html = HTML(u'<div style="margin:10px 20px"></div>')
462 self.assertEquals('<div style="margin:10px 20px"/>',
463 (html | sanitizer).render())
464 # But not negative margins
465 html = HTML(u'<div style="margin:-1000px 0 0"></div>')
466 self.assertEquals('<div/>', (html | sanitizer).render())
467 html = HTML(u'<div style="margin-left:-2000px 0 0"></div>')
468 self.assertEquals('<div/>', (html | sanitizer).render())
469 html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>')
470 self.assertEquals('<div/>', (html | sanitizer).render())
471
472 def test_sanitize_remove_src_javascript(self):
473 html = HTML(u'<img src=\'javascript:alert("foo")\'>')
474 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
475 # Case-insensitive protocol matching
476 html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
477 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
478 # Grave accents (not parsed)
479 self.assertRaises(ParseError, HTML,
480 u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
481 # Protocol encoded using UTF-8 numeric entities
482 html = HTML(u'<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
483 '&#112;&#116;&#58;alert("foo")\'>')
484 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
485 # Protocol encoded using UTF-8 numeric entities without a semicolon
486 # (which is allowed because the max number of digits is used)
487 html = HTML(u'<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
488 '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
489 '&#0000058alert("foo")\'>')
490 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
491 # Protocol encoded using UTF-8 numeric hex entities without a semicolon
492 # (which is allowed because the max number of digits is used)
493 html = HTML(u'<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
494 '&#x70&#x74&#x3A;alert("foo")\'>')
495 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
496 # Embedded tab character in protocol
497 html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>')
498 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
499 # Embedded tab character in protocol, but encoded this time
500 html = HTML(u'<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
501 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
502
503
504 def suite():
505 suite = unittest.TestSuite()
506 suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
507 suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
508 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
509 return suite
510
511
512 if __name__ == '__main__':
513 unittest.main(defaultTest='suite')
Copyright (C) 2012-2017 Edgewall Software