Mercurial > genshi > mirror
changeset 933:1e8c33345e52 trunk
Merge r1141 from py3k:
add support for python 3 to genshi.filters:
* minor changes to track encoding=None API change in core genshi modules.
* renamed genshi/filters/tests/html.py to test_html.py to avoid clashes with Python 3 top-level html module when running tests subset.
* did not rename genshi/filters/html.py.
* i18n filters:
* ugettext and friends are gone in Python 3 (and only gettext and friends exist and they now handle unicode)
* Some \ line continuations inside doctests confused 2to3 and so were removed them.
* Testing picked up a problem (already present in trunk) where Translator.__call__ could end up defining gettext as an endlessly recursive function. Noted with a TODO.
author | hodgestar |
---|---|
date | Fri, 18 Mar 2011 09:11:53 +0000 |
parents | 18209925c54e |
children | 31bbb6f9e87b |
files | genshi/filters/html.py genshi/filters/i18n.py genshi/filters/tests/__init__.py genshi/filters/tests/html.py genshi/filters/tests/i18n.py genshi/filters/tests/test_html.py genshi/filters/tests/transform.py genshi/filters/transform.py |
diffstat | 8 files changed, 664 insertions(+), 604 deletions(-) [+] |
line wrap: on
line diff
--- a/genshi/filters/html.py +++ b/genshi/filters/html.py @@ -32,7 +32,7 @@ >>> from genshi.input import HTML >>> html = HTML('''<form> ... <p><input type="text" name="foo" /></p> - ... </form>''') + ... </form>''', encoding='utf-8') >>> filler = HTMLFormFiller(data={'foo': 'bar'}) >>> print(html | filler) <form> @@ -199,7 +199,7 @@ from the stream. >>> from genshi import HTML - >>> html = HTML('<div><script>alert(document.cookie)</script></div>') + >>> html = HTML('<div><script>alert(document.cookie)</script></div>', encoding='utf-8') >>> print(html | HTMLSanitizer()) <div/> @@ -207,7 +207,7 @@ is instantiated. For example, to allow inline ``style`` attributes, the following instantation would work: - >>> html = HTML('<div style="background: #000"></div>') + >>> html = HTML('<div style="background: #000"></div>', encoding='utf-8') >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) >>> print(html | sanitizer) <div style="background: #000"/> @@ -215,7 +215,7 @@ Note that even in this case, the filter *does* attempt to remove dangerous constructs from style attributes: - >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>') + >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>', encoding='utf-8') >>> print(html | sanitizer) <div style="color: #000"/>
--- a/genshi/filters/i18n.py +++ b/genshi/filters/i18n.py @@ -33,6 +33,7 @@ from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives from genshi.template.directives import Directive, StripDirective from genshi.template.markup import MarkupTemplate, EXEC +from genshi.compat import IS_PYTHON2 __all__ = ['Translator', 'extract'] __docformat__ = 'restructuredtext en' @@ -288,8 +289,7 @@ also need to pass a name for those parameters. Consider the following examples: - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> ... <div i18n:choose="num; num"> ... <p i18n:singular="">There is $num coin</p> ... <p i18n:plural="">There are $num coins</p> @@ -301,8 +301,7 @@ [(2, 'ngettext', (u'There is %(num)s coin', u'There are %(num)s coins'), [])] - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> ... <div i18n:choose="num; num"> ... <p i18n:singular="">There is $num coin</p> ... <p i18n:plural="">There are $num coins</p> @@ -324,8 +323,7 @@ When used as a element and not as an attribute: - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> ... <i18n:choose numeral="num" params="num"> ... <p i18n:singular="">There is $num coin</p> ... <p i18n:plural="">There are $num coins</p> @@ -492,8 +490,7 @@ another i18n domain(catalog) to translate from. >>> from genshi.filters.tests.i18n import DummyTranslations - >>> tmpl = MarkupTemplate('''\ - <html xmlns:i18n="http://genshi.edgewall.org/i18n"> + >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> ... <p i18n:msg="">Bar</p> ... <div i18n:domain="foo"> ... <p i18n:msg="">FooBar</p> @@ -663,11 +660,19 @@ if ctxt: ctxt['_i18n.gettext'] = gettext else: - gettext = self.translate.ugettext - ngettext = self.translate.ungettext + if IS_PYTHON2: + gettext = self.translate.ugettext + ngettext = self.translate.ungettext + else: + gettext = self.translate.gettext + ngettext = self.translate.ngettext try: - dgettext = self.translate.dugettext - dngettext = self.translate.dungettext + if IS_PYTHON2: + dgettext = self.translate.dugettext + dngettext = self.translate.dungettext + else: + dgettext = self.translate.dgettext + dngettext = self.translate.dngettext except AttributeError: dgettext = lambda _, y: gettext(y) dngettext = lambda _, s, p, n: ngettext(s, p, n) @@ -678,6 +683,8 @@ ctxt['_i18n.dngettext'] = dngettext if ctxt and ctxt.get('_i18n.domain'): + # TODO: This can cause infinite recursion if dgettext is defined + # via the AttributeError case above! gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) for kind, data, pos in stream: @@ -1168,7 +1175,9 @@ and node.func.id in gettext_functions: strings = [] def _add(arg): - if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring): + if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode): + strings.append(arg.s) + elif isinstance(arg, _ast.Str): strings.append(unicode(arg.s, 'utf-8')) elif arg: strings.append(None)
--- a/genshi/filters/tests/__init__.py +++ b/genshi/filters/tests/__init__.py @@ -15,9 +15,9 @@ import unittest def suite(): - from genshi.filters.tests import html, i18n, transform + from genshi.filters.tests import test_html, i18n, transform suite = unittest.TestSuite() - suite.addTest(html.suite()) + suite.addTest(test_html.suite()) suite.addTest(i18n.suite()) if hasattr(doctest, 'NORMALIZE_WHITESPACE'): suite.addTest(transform.suite())
deleted file mode 100644 --- a/genshi/filters/tests/html.py +++ /dev/null @@ -1,513 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2006-2009 Edgewall Software -# All rights reserved. -# -# This software is licensed as described in the file COPYING, which -# you should have received as part of this distribution. The terms -# are also available at http://genshi.edgewall.org/wiki/License. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at http://genshi.edgewall.org/log/. - -import doctest -import unittest - -from genshi.input import HTML, ParseError -from genshi.filters.html import HTMLFormFiller, HTMLSanitizer -from genshi.template import MarkupTemplate - -class HTMLFormFillerTestCase(unittest.TestCase): - - def test_fill_input_text_no_value(self): - html = HTML("""<form><p> - <input type="text" name="foo" /> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <input type="text" name="foo"/> - </p></form>""", html.render()) - - def test_fill_input_text_single_value(self): - html = HTML("""<form><p> - <input type="text" name="foo" /> - </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) - self.assertEquals("""<form><p> - <input type="text" name="foo" value="bar"/> - </p></form>""", html.render()) - - def test_fill_input_text_multi_value(self): - html = HTML("""<form><p> - <input type="text" name="foo" /> - </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) - self.assertEquals("""<form><p> - <input type="text" name="foo" value="bar"/> - </p></form>""", html.render()) - - def test_fill_input_hidden_no_value(self): - html = HTML("""<form><p> - <input type="hidden" name="foo" /> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <input type="hidden" name="foo"/> - </p></form>""", html.render()) - - def test_fill_input_hidden_single_value(self): - html = HTML("""<form><p> - <input type="hidden" name="foo" /> - </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) - self.assertEquals("""<form><p> - <input type="hidden" name="foo" value="bar"/> - </p></form>""", html.render()) - - def test_fill_input_hidden_multi_value(self): - html = HTML("""<form><p> - <input type="hidden" name="foo" /> - </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) - self.assertEquals("""<form><p> - <input type="hidden" name="foo" value="bar"/> - </p></form>""", html.render()) - - def test_fill_textarea_no_value(self): - html = HTML("""<form><p> - <textarea name="foo"></textarea> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <textarea name="foo"/> - </p></form>""", html.render()) - - def test_fill_textarea_single_value(self): - html = HTML("""<form><p> - <textarea name="foo"></textarea> - </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) - self.assertEquals("""<form><p> - <textarea name="foo">bar</textarea> - </p></form>""", html.render()) - - def test_fill_textarea_multi_value(self): - html = HTML("""<form><p> - <textarea name="foo"></textarea> - </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) - self.assertEquals("""<form><p> - <textarea name="foo">bar</textarea> - </p></form>""", html.render()) - - def test_fill_textarea_multiple(self): - # Ensure that the subsequent textarea doesn't get the data from the - # first - html = HTML("""<form><p> - <textarea name="foo"></textarea> - <textarea name="bar"></textarea> - </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) - self.assertEquals("""<form><p> - <textarea name="foo">Some text</textarea> - <textarea name="bar"/> - </p></form>""", html.render()) - - def test_fill_textarea_preserve_original(self): - html = HTML("""<form><p> - <textarea name="foo"></textarea> - <textarea name="bar">Original value</textarea> - </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) - self.assertEquals("""<form><p> - <textarea name="foo">Some text</textarea> - <textarea name="bar">Original value</textarea> - </p></form>""", html.render()) - - def test_fill_input_checkbox_single_value_auto_no_value(self): - html = HTML("""<form><p> - <input type="checkbox" name="foo" /> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <input type="checkbox" name="foo"/> - </p></form>""", html.render()) - - def test_fill_input_checkbox_single_value_auto(self): - html = HTML("""<form><p> - <input type="checkbox" name="foo" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="checkbox" name="foo"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render()) - - def test_fill_input_checkbox_single_value_defined(self): - html = HTML("""<form><p> - <input type="checkbox" name="foo" value="1" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" value="1" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" value="1"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) - - def test_fill_input_checkbox_multi_value_auto(self): - html = HTML("""<form><p> - <input type="checkbox" name="foo" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="checkbox" name="foo"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render()) - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render()) - - def test_fill_input_checkbox_multi_value_defined(self): - html = HTML("""<form><p> - <input type="checkbox" name="foo" value="1" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" value="1" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) - self.assertEquals("""<form><p> - <input type="checkbox" name="foo" value="1"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) - - def test_fill_input_radio_no_value(self): - html = HTML("""<form><p> - <input type="radio" name="foo" /> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <input type="radio" name="foo"/> - </p></form>""", html.render()) - - def test_fill_input_radio_single_value(self): - html = HTML("""<form><p> - <input type="radio" name="foo" value="1" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="1" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="1"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) - - def test_fill_input_radio_multi_value(self): - html = HTML("""<form><p> - <input type="radio" name="foo" value="1" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="1" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="1"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) - - def test_fill_input_radio_empty_string(self): - html = HTML("""<form><p> - <input type="radio" name="foo" value="" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) - - def test_fill_input_radio_multi_empty_string(self): - html = HTML("""<form><p> - <input type="radio" name="foo" value="" /> - </p></form>""") - self.assertEquals("""<form><p> - <input type="radio" name="foo" value="" checked="checked"/> - </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render()) - - def test_fill_select_no_value_auto(self): - html = HTML("""<form><p> - <select name="foo"> - <option>1</option> - <option>2</option> - <option>3</option> - </select> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <select name="foo"> - <option>1</option> - <option>2</option> - <option>3</option> - </select> - </p></form>""", html.render()) - - def test_fill_select_no_value_defined(self): - html = HTML("""<form><p> - <select name="foo"> - <option value="1">1</option> - <option value="2">2</option> - <option value="3">3</option> - </select> - </p></form>""") | HTMLFormFiller() - self.assertEquals("""<form><p> - <select name="foo"> - <option value="1">1</option> - <option value="2">2</option> - <option value="3">3</option> - </select> - </p></form>""", html.render()) - - def test_fill_select_single_value_auto(self): - html = HTML("""<form><p> - <select name="foo"> - <option>1</option> - <option>2</option> - <option>3</option> - </select> - </p></form>""") | HTMLFormFiller(data={'foo': '1'}) - self.assertEquals("""<form><p> - <select name="foo"> - <option selected="selected">1</option> - <option>2</option> - <option>3</option> - </select> - </p></form>""", html.render()) - - def test_fill_select_single_value_defined(self): - html = HTML("""<form><p> - <select name="foo"> - <option value="1">1</option> - <option value="2">2</option> - <option value="3">3</option> - </select> - </p></form>""") | HTMLFormFiller(data={'foo': '1'}) - self.assertEquals("""<form><p> - <select name="foo"> - <option value="1" selected="selected">1</option> - <option value="2">2</option> - <option value="3">3</option> - </select> - </p></form>""", html.render()) - - def test_fill_select_multi_value_auto(self): - html = HTML("""<form><p> - <select name="foo" multiple> - <option>1</option> - <option>2</option> - <option>3</option> - </select> - </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) - self.assertEquals("""<form><p> - <select name="foo" multiple="multiple"> - <option selected="selected">1</option> - <option>2</option> - <option selected="selected">3</option> - </select> - </p></form>""", html.render()) - - def test_fill_select_multi_value_defined(self): - html = HTML("""<form><p> - <select name="foo" multiple> - <option value="1">1</option> - <option value="2">2</option> - <option value="3">3</option> - </select> - </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) - self.assertEquals("""<form><p> - <select name="foo" multiple="multiple"> - <option value="1" selected="selected">1</option> - <option value="2">2</option> - <option value="3" selected="selected">3</option> - </select> - </p></form>""", html.render()) - - def test_fill_option_segmented_text(self): - html = MarkupTemplate("""<form> - <select name="foo"> - <option value="1">foo $x</option> - </select> - </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'}) - self.assertEquals("""<form> - <select name="foo"> - <option value="1" selected="selected">foo 1</option> - </select> - </form>""", html.render()) - - def test_fill_option_segmented_text_no_value(self): - html = MarkupTemplate("""<form> - <select name="foo"> - <option>foo $x bar</option> - </select> - </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'}) - self.assertEquals("""<form> - <select name="foo"> - <option selected="selected">foo 1 bar</option> - </select> - </form>""", html.render()) - - def test_fill_option_unicode_value(self): - html = HTML("""<form> - <select name="foo"> - <option value="ö">foo</option> - </select> - </form>""") | HTMLFormFiller(data={'foo': u'ö'}) - self.assertEquals(u"""<form> - <select name="foo"> - <option value="ö" selected="selected">foo</option> - </select> - </form>""", html.render(encoding=None)) - - def test_fill_input_password_disabled(self): - html = HTML("""<form><p> - <input type="password" name="pass" /> - </p></form>""") | HTMLFormFiller(data={'pass': 'bar'}) - self.assertEquals("""<form><p> - <input type="password" name="pass"/> - </p></form>""", html.render()) - - def test_fill_input_password_enabled(self): - html = HTML("""<form><p> - <input type="password" name="pass" /> - </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True) - self.assertEquals("""<form><p> - <input type="password" name="pass" value="1234"/> - </p></form>""", html.render()) - - -class HTMLSanitizerTestCase(unittest.TestCase): - - def test_sanitize_unchanged(self): - html = HTML('<a href="#">fo<br />o</a>') - self.assertEquals('<a href="#">fo<br/>o</a>', - (html | HTMLSanitizer()).render()) - html = HTML('<a href="#with:colon">foo</a>') - self.assertEquals('<a href="#with:colon">foo</a>', - (html | HTMLSanitizer()).render()) - - def test_sanitize_escape_text(self): - html = HTML('<a href="#">fo&</a>') - self.assertEquals('<a href="#">fo&</a>', - (html | HTMLSanitizer()).render()) - html = HTML('<a href="#"><foo></a>') - self.assertEquals('<a href="#"><foo></a>', - (html | HTMLSanitizer()).render()) - - def test_sanitize_entityref_text(self): - html = HTML('<a href="#">foö</a>') - self.assertEquals(u'<a href="#">foö</a>', - (html | HTMLSanitizer()).render(encoding=None)) - - def test_sanitize_escape_attr(self): - html = HTML('<div title="<foo>"></div>') - self.assertEquals('<div title="<foo>"/>', - (html | HTMLSanitizer()).render()) - - def test_sanitize_close_empty_tag(self): - html = HTML('<a href="#">fo<br>o</a>') - self.assertEquals('<a href="#">fo<br/>o</a>', - (html | HTMLSanitizer()).render()) - - def test_sanitize_invalid_entity(self): - html = HTML('&junk;') - self.assertEquals('&junk;', (html | HTMLSanitizer()).render()) - - def test_sanitize_remove_script_elem(self): - html = HTML('<script>alert("Foo")</script>') - self.assertEquals('', (html | HTMLSanitizer()).render()) - html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>') - self.assertEquals('', (html | HTMLSanitizer()).render()) - self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>') - self.assertRaises(ParseError, HTML, - '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') - - def test_sanitize_remove_onclick_attr(self): - html = HTML('<div onclick=\'alert("foo")\' />') - self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) - - def test_sanitize_remove_input_password(self): - html = HTML('<form><input type="password" /></form>') - self.assertEquals('<form/>', (html | HTMLSanitizer()).render()) - - def test_sanitize_remove_comments(self): - html = HTML('''<div><!-- conditional comment crap --></div>''') - self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) - - def test_sanitize_remove_style_scripts(self): - sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) - # Inline style with url() using javascript: scheme - html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - # Inline style with url() using javascript: scheme, using control char - html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - # Inline style with url() using javascript: scheme, in quotes - html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - # IE expressions in CSS not allowed - html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));' - 'color: #fff\'>') - self.assertEquals('<div style="color: #fff"/>', - (html | sanitizer).render()) - # Inline style with url() using javascript: scheme, using unicode - # escapes - html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') - self.assertEquals('<div/>', (html | sanitizer).render()) - - def test_sanitize_remove_style_phishing(self): - sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) - # The position property is not allowed - html = HTML('<div style="position:absolute;top:0"></div>') - self.assertEquals('<div style="top:0"/>', (html | sanitizer).render()) - # Normal margins get passed through - html = HTML('<div style="margin:10px 20px"></div>') - self.assertEquals('<div style="margin:10px 20px"/>', - (html | sanitizer).render()) - # But not negative margins - html = HTML('<div style="margin:-1000px 0 0"></div>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<div style="margin-left:-2000px 0 0"></div>') - self.assertEquals('<div/>', (html | sanitizer).render()) - html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>') - self.assertEquals('<div/>', (html | sanitizer).render()) - - def test_sanitize_remove_src_javascript(self): - html = HTML('<img src=\'javascript:alert("foo")\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Case-insensitive protocol matching - html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Grave accents (not parsed) - self.assertRaises(ParseError, HTML, - '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') - # Protocol encoded using UTF-8 numeric entities - html = HTML('<IMG SRC=\'javascri' - 'pt:alert("foo")\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Protocol encoded using UTF-8 numeric entities without a semicolon - # (which is allowed because the max number of digits is used) - html = HTML('<IMG SRC=\'java' - 'script' - ':alert("foo")\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Protocol encoded using UTF-8 numeric hex entities without a semicolon - # (which is allowed because the max number of digits is used) - html = HTML('<IMG SRC=\'javascri' - 'pt:alert("foo")\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Embedded tab character in protocol - html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - # Embedded tab character in protocol, but encoded this time - html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') - self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__)) - suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test')) - suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) - return suite - - -if __name__ == '__main__': - unittest.main(defaultTest='suite')
--- a/genshi/filters/tests/i18n.py +++ b/genshi/filters/tests/i18n.py @@ -14,13 +14,13 @@ from datetime import datetime import doctest from gettext import NullTranslations -from StringIO import StringIO import unittest from genshi.core import Attrs from genshi.template import MarkupTemplate, Context from genshi.filters.i18n import Translator, extract from genshi.input import HTML +from genshi.compat import IS_PYTHON2, StringIO class DummyTranslations(NullTranslations): @@ -39,17 +39,31 @@ def _domain_call(self, func, domain, *args, **kwargs): return getattr(self._domains.get(domain, self), func)(*args, **kwargs) - def ugettext(self, message): - missing = object() - tmsg = self._catalog.get(message, missing) - if tmsg is missing: - if self._fallback: - return self._fallback.ugettext(message) - return unicode(message) - return tmsg + if IS_PYTHON2: + def ugettext(self, message): + missing = object() + tmsg = self._catalog.get(message, missing) + if tmsg is missing: + if self._fallback: + return self._fallback.ugettext(message) + return unicode(message) + return tmsg + else: + def gettext(self, message): + missing = object() + tmsg = self._catalog.get(message, missing) + if tmsg is missing: + if self._fallback: + return self._fallback.gettext(message) + return unicode(message) + return tmsg - def dugettext(self, domain, message): - return self._domain_call('ugettext', domain, message) + if IS_PYTHON2: + def dugettext(self, domain, message): + return self._domain_call('ugettext', domain, message) + else: + def dgettext(self, domain, message): + return self._domain_call('gettext', domain, message) def ungettext(self, msgid1, msgid2, n): try: @@ -62,8 +76,16 @@ else: return msgid2 - def dungettext(self, domain, singular, plural, numeral): - return self._domain_call('ungettext', domain, singular, plural, numeral) + if not IS_PYTHON2: + ngettext = ungettext + del ungettext + + if IS_PYTHON2: + def dungettext(self, domain, singular, plural, numeral): + return self._domain_call('ungettext', domain, singular, plural, numeral) + else: + def dngettext(self, domain, singular, plural, numeral): + return self._domain_call('ngettext', domain, singular, plural, numeral) class TranslatorTestCase(unittest.TestCase): @@ -72,7 +94,7 @@ """ Verify that translated attributes end up in a proper `Attrs` instance. """ - html = HTML("""<html> + html = HTML(u"""<html> <span title="Foo"></span> </html>""") translator = Translator(lambda s: u"Voh") @@ -218,9 +240,9 @@ gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Für Details siehe bitte <a href="help.html">Hilfe</a>.</p> - </html>""", tmpl.generate().render()) + </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) def test_extract_i18n_msg_nonewline(self): tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" @@ -241,7 +263,7 @@ gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]" translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Für Details siehe bitte <a href="help.html">Hilfe</a></p> </html>""", tmpl.generate().render()) @@ -264,9 +286,9 @@ gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]" translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> Für Details siehe bitte <a href="help.html">Hilfe</a> - </html>""", tmpl.generate().render()) + </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) def test_extract_i18n_msg_with_attributes(self): tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" @@ -394,7 +416,7 @@ gettext = lambda s: u"Für Details siehe bitte [1:[2:Hilfeseite]]." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Für Details siehe bitte <a href="help.html"><em>Hilfeseite</em></a>.</p> </html>""", tmpl.generate().render()) @@ -449,7 +471,7 @@ gettext = lambda s: u"[1:] Einträge pro Seite anzeigen." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p><input type="text" name="num"/> Einträge pro Seite anzeigen.</p> </html>""", tmpl.generate().render()) @@ -476,7 +498,7 @@ gettext = lambda s: u"Für [2:Details] siehe bitte [1:Hilfe]." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Für <em>Details</em> siehe bitte <a href="help.html">Hilfe</a>.</p> </html>""", tmpl.generate().render()) @@ -500,13 +522,13 @@ <p i18n:msg=""> Show me <input type="text" name="num" /> entries per page, starting at page <input type="text" name="num" />. </p> - </html>""") + </html>""", encoding='utf-8') gettext = lambda s: u"[1:] Einträge pro Seite, beginnend auf Seite [2:]." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> - <p><input type="text" name="num"/> Eintr\xc3\xa4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p> - </html>""", tmpl.generate().render()) + self.assertEqual(u"""<html> + <p><input type="text" name="num"/> Eintr\u00E4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p> + </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) def test_extract_i18n_msg_with_param(self): tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" @@ -545,7 +567,7 @@ gettext = lambda s: u"%(name)s, sei gegrüßt!" translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Jim, sei gegrüßt!</p> </html>""", tmpl.generate(user=dict(name='Jim')).render()) @@ -559,7 +581,7 @@ gettext = lambda s: u"Sei gegrüßt, [1:Alter]!" translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Sei gegrüßt, <a href="#42">Alter</a>!</p> </html>""", tmpl.generate(anchor='42').render()) @@ -617,7 +639,7 @@ gettext = lambda s: u"[1:] Einträge pro Seite anzeigen." translator = Translator(gettext) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p><input type="text" name="num" value="x"/> Einträge pro Seite anzeigen.</p> </html>""", tmpl.generate().render()) @@ -676,7 +698,7 @@ })) tmpl.filters.insert(0, translator) tmpl.add_directives(Translator.NAMESPACE, translator) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p title="Voh bär">Voh</p> </html>""", tmpl.generate().render()) @@ -720,9 +742,9 @@ }) translator = Translator(translations) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> Modificado à um dia por Pedro - </html>""", tmpl.generate(date='um dia', author="Pedro").render()) + </html>""".encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8')) def test_i18n_msg_ticket_251_extract(self): @@ -749,9 +771,9 @@ }) translator = Translator(translations) translator.setup(tmpl) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p><tt><b>Trandução[ 0 ]</b>: <em>Uma moeda</em></tt></p> - </html>""", tmpl.generate().render()) + </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) def test_extract_i18n_msg_with_other_directives_nested(self): tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" @@ -811,7 +833,7 @@ self.assertEqual(1, len(messages)) ctx = Context() ctx.push({'trac': {'homepage': 'http://trac.edgewall.org/'}}) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p>Antes de o fazer, porém, <strong>por favor tente <a href="http://trac.edgewall.org/search?ticket=yes&noquickjump=1&q=q">procurar</a> por problemas semelhantes</strong>, uma vez que é muito provável que este problema @@ -846,11 +868,11 @@ '[2:[3:trac.ini]]\n and cannot be edited on this page.', messages[0][2] ) - self.assertEqual("""<html> + self.assertEqual(u"""<html> <p class="hint"><strong>Nota:</strong> Este repositório está definido em <code><a href="href.wiki(TracIni)">trac.ini</a></code> e não pode ser editado nesta página.</p> - </html>""", tmpl.generate(editable=False).render()) + </html>""".encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8')) def test_extract_i18n_msg_with_py_strip(self): tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" @@ -1771,6 +1793,11 @@ loader = TemplateLoader([dirname], callback=callback) tmpl = loader.load('tmpl10.html') + if IS_PYTHON2: + dgettext = translations.dugettext + else: + dgettext = translations.dgettext + self.assertEqual("""<html> <div>Included tmpl0</div> <p title="foo_Bar 0">foo_Bar 0</p> @@ -1797,7 +1824,7 @@ <p title="Voh">Voh 3</p> <p title="Voh">Voh 3</p> </html>""", tmpl.generate(idx=-1, - dg=translations.dugettext).render()) + dg=dgettext).render()) finally: shutil.rmtree(dirname)
new file mode 100644 --- /dev/null +++ b/genshi/filters/tests/test_html.py @@ -0,0 +1,513 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2009 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +import doctest +import unittest + +from genshi.input import HTML, ParseError +from genshi.filters.html import HTMLFormFiller, HTMLSanitizer +from genshi.template import MarkupTemplate + +class HTMLFormFillerTestCase(unittest.TestCase): + + def test_fill_input_text_no_value(self): + html = HTML(u"""<form><p> + <input type="text" name="foo" /> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <input type="text" name="foo"/> + </p></form>""", html.render()) + + def test_fill_input_text_single_value(self): + html = HTML(u"""<form><p> + <input type="text" name="foo" /> + </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) + self.assertEquals("""<form><p> + <input type="text" name="foo" value="bar"/> + </p></form>""", html.render()) + + def test_fill_input_text_multi_value(self): + html = HTML(u"""<form><p> + <input type="text" name="foo" /> + </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) + self.assertEquals("""<form><p> + <input type="text" name="foo" value="bar"/> + </p></form>""", html.render()) + + def test_fill_input_hidden_no_value(self): + html = HTML(u"""<form><p> + <input type="hidden" name="foo" /> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <input type="hidden" name="foo"/> + </p></form>""", html.render()) + + def test_fill_input_hidden_single_value(self): + html = HTML(u"""<form><p> + <input type="hidden" name="foo" /> + </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) + self.assertEquals("""<form><p> + <input type="hidden" name="foo" value="bar"/> + </p></form>""", html.render()) + + def test_fill_input_hidden_multi_value(self): + html = HTML(u"""<form><p> + <input type="hidden" name="foo" /> + </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) + self.assertEquals("""<form><p> + <input type="hidden" name="foo" value="bar"/> + </p></form>""", html.render()) + + def test_fill_textarea_no_value(self): + html = HTML(u"""<form><p> + <textarea name="foo"></textarea> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <textarea name="foo"/> + </p></form>""", html.render()) + + def test_fill_textarea_single_value(self): + html = HTML(u"""<form><p> + <textarea name="foo"></textarea> + </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) + self.assertEquals("""<form><p> + <textarea name="foo">bar</textarea> + </p></form>""", html.render()) + + def test_fill_textarea_multi_value(self): + html = HTML(u"""<form><p> + <textarea name="foo"></textarea> + </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) + self.assertEquals("""<form><p> + <textarea name="foo">bar</textarea> + </p></form>""", html.render()) + + def test_fill_textarea_multiple(self): + # Ensure that the subsequent textarea doesn't get the data from the + # first + html = HTML(u"""<form><p> + <textarea name="foo"></textarea> + <textarea name="bar"></textarea> + </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) + self.assertEquals("""<form><p> + <textarea name="foo">Some text</textarea> + <textarea name="bar"/> + </p></form>""", html.render()) + + def test_fill_textarea_preserve_original(self): + html = HTML(u"""<form><p> + <textarea name="foo"></textarea> + <textarea name="bar">Original value</textarea> + </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) + self.assertEquals("""<form><p> + <textarea name="foo">Some text</textarea> + <textarea name="bar">Original value</textarea> + </p></form>""", html.render()) + + def test_fill_input_checkbox_single_value_auto_no_value(self): + html = HTML(u"""<form><p> + <input type="checkbox" name="foo" /> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <input type="checkbox" name="foo"/> + </p></form>""", html.render()) + + def test_fill_input_checkbox_single_value_auto(self): + html = HTML(u"""<form><p> + <input type="checkbox" name="foo" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="checkbox" name="foo"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render()) + + def test_fill_input_checkbox_single_value_defined(self): + html = HTML("""<form><p> + <input type="checkbox" name="foo" value="1" /> + </p></form>""", encoding='ascii') + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" value="1" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" value="1"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) + + def test_fill_input_checkbox_multi_value_auto(self): + html = HTML("""<form><p> + <input type="checkbox" name="foo" /> + </p></form>""", encoding='ascii') + self.assertEquals("""<form><p> + <input type="checkbox" name="foo"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render()) + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render()) + + def test_fill_input_checkbox_multi_value_defined(self): + html = HTML(u"""<form><p> + <input type="checkbox" name="foo" value="1" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" value="1" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) + self.assertEquals("""<form><p> + <input type="checkbox" name="foo" value="1"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) + + def test_fill_input_radio_no_value(self): + html = HTML(u"""<form><p> + <input type="radio" name="foo" /> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <input type="radio" name="foo"/> + </p></form>""", html.render()) + + def test_fill_input_radio_single_value(self): + html = HTML(u"""<form><p> + <input type="radio" name="foo" value="1" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="1" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="1"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) + + def test_fill_input_radio_multi_value(self): + html = HTML(u"""<form><p> + <input type="radio" name="foo" value="1" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="1" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="1"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) + + def test_fill_input_radio_empty_string(self): + html = HTML(u"""<form><p> + <input type="radio" name="foo" value="" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) + + def test_fill_input_radio_multi_empty_string(self): + html = HTML(u"""<form><p> + <input type="radio" name="foo" value="" /> + </p></form>""") + self.assertEquals("""<form><p> + <input type="radio" name="foo" value="" checked="checked"/> + </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render()) + + def test_fill_select_no_value_auto(self): + html = HTML(u"""<form><p> + <select name="foo"> + <option>1</option> + <option>2</option> + <option>3</option> + </select> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <select name="foo"> + <option>1</option> + <option>2</option> + <option>3</option> + </select> + </p></form>""", html.render()) + + def test_fill_select_no_value_defined(self): + html = HTML(u"""<form><p> + <select name="foo"> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + </select> + </p></form>""") | HTMLFormFiller() + self.assertEquals("""<form><p> + <select name="foo"> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + </select> + </p></form>""", html.render()) + + def test_fill_select_single_value_auto(self): + html = HTML(u"""<form><p> + <select name="foo"> + <option>1</option> + <option>2</option> + <option>3</option> + </select> + </p></form>""") | HTMLFormFiller(data={'foo': '1'}) + self.assertEquals("""<form><p> + <select name="foo"> + <option selected="selected">1</option> + <option>2</option> + <option>3</option> + </select> + </p></form>""", html.render()) + + def test_fill_select_single_value_defined(self): + html = HTML(u"""<form><p> + <select name="foo"> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + </select> + </p></form>""") | HTMLFormFiller(data={'foo': '1'}) + self.assertEquals("""<form><p> + <select name="foo"> + <option value="1" selected="selected">1</option> + <option value="2">2</option> + <option value="3">3</option> + </select> + </p></form>""", html.render()) + + def test_fill_select_multi_value_auto(self): + html = HTML(u"""<form><p> + <select name="foo" multiple> + <option>1</option> + <option>2</option> + <option>3</option> + </select> + </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) + self.assertEquals("""<form><p> + <select name="foo" multiple="multiple"> + <option selected="selected">1</option> + <option>2</option> + <option selected="selected">3</option> + </select> + </p></form>""", html.render()) + + def test_fill_select_multi_value_defined(self): + html = HTML(u"""<form><p> + <select name="foo" multiple> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + </select> + </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) + self.assertEquals("""<form><p> + <select name="foo" multiple="multiple"> + <option value="1" selected="selected">1</option> + <option value="2">2</option> + <option value="3" selected="selected">3</option> + </select> + </p></form>""", html.render()) + + def test_fill_option_segmented_text(self): + html = MarkupTemplate(u"""<form> + <select name="foo"> + <option value="1">foo $x</option> + </select> + </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'}) + self.assertEquals(u"""<form> + <select name="foo"> + <option value="1" selected="selected">foo 1</option> + </select> + </form>""", html.render()) + + def test_fill_option_segmented_text_no_value(self): + html = MarkupTemplate("""<form> + <select name="foo"> + <option>foo $x bar</option> + </select> + </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'}) + self.assertEquals("""<form> + <select name="foo"> + <option selected="selected">foo 1 bar</option> + </select> + </form>""", html.render()) + + def test_fill_option_unicode_value(self): + html = HTML(u"""<form> + <select name="foo"> + <option value="ö">foo</option> + </select> + </form>""") | HTMLFormFiller(data={'foo': u'ö'}) + self.assertEquals(u"""<form> + <select name="foo"> + <option value="ö" selected="selected">foo</option> + </select> + </form>""", html.render(encoding=None)) + + def test_fill_input_password_disabled(self): + html = HTML(u"""<form><p> + <input type="password" name="pass" /> + </p></form>""") | HTMLFormFiller(data={'pass': 'bar'}) + self.assertEquals("""<form><p> + <input type="password" name="pass"/> + </p></form>""", html.render()) + + def test_fill_input_password_enabled(self): + html = HTML(u"""<form><p> + <input type="password" name="pass" /> + </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True) + self.assertEquals("""<form><p> + <input type="password" name="pass" value="1234"/> + </p></form>""", html.render()) + + +class HTMLSanitizerTestCase(unittest.TestCase): + + def test_sanitize_unchanged(self): + html = HTML(u'<a href="#">fo<br />o</a>') + self.assertEquals('<a href="#">fo<br/>o</a>', + (html | HTMLSanitizer()).render()) + html = HTML(u'<a href="#with:colon">foo</a>') + self.assertEquals('<a href="#with:colon">foo</a>', + (html | HTMLSanitizer()).render()) + + def test_sanitize_escape_text(self): + html = HTML(u'<a href="#">fo&</a>') + self.assertEquals('<a href="#">fo&</a>', + (html | HTMLSanitizer()).render()) + html = HTML(u'<a href="#"><foo></a>') + self.assertEquals('<a href="#"><foo></a>', + (html | HTMLSanitizer()).render()) + + def test_sanitize_entityref_text(self): + html = HTML(u'<a href="#">foö</a>') + self.assertEquals(u'<a href="#">foö</a>', + (html | HTMLSanitizer()).render(encoding=None)) + + def test_sanitize_escape_attr(self): + html = HTML(u'<div title="<foo>"></div>') + self.assertEquals('<div title="<foo>"/>', + (html | HTMLSanitizer()).render()) + + def test_sanitize_close_empty_tag(self): + html = HTML(u'<a href="#">fo<br>o</a>') + self.assertEquals('<a href="#">fo<br/>o</a>', + (html | HTMLSanitizer()).render()) + + def test_sanitize_invalid_entity(self): + html = HTML(u'&junk;') + self.assertEquals('&junk;', (html | HTMLSanitizer()).render()) + + def test_sanitize_remove_script_elem(self): + html = HTML(u'<script>alert("Foo")</script>') + self.assertEquals('', (html | HTMLSanitizer()).render()) + html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>') + self.assertEquals('', (html | HTMLSanitizer()).render()) + self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>') + self.assertRaises(ParseError, HTML, + u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') + + def test_sanitize_remove_onclick_attr(self): + html = HTML(u'<div onclick=\'alert("foo")\' />') + self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) + + def test_sanitize_remove_input_password(self): + html = HTML(u'<form><input type="password" /></form>') + self.assertEquals('<form/>', (html | HTMLSanitizer()).render()) + + def test_sanitize_remove_comments(self): + html = HTML(u'''<div><!-- conditional comment crap --></div>''') + self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) + + def test_sanitize_remove_style_scripts(self): + sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + # Inline style with url() using javascript: scheme + html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + # Inline style with url() using javascript: scheme, using control char + html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + # Inline style with url() using javascript: scheme, in quotes + html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + # IE expressions in CSS not allowed + html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));' + 'color: #fff\'>') + self.assertEquals('<div style="color: #fff"/>', + (html | sanitizer).render()) + # Inline style with url() using javascript: scheme, using unicode + # escapes + html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') + self.assertEquals('<div/>', (html | sanitizer).render()) + + def test_sanitize_remove_style_phishing(self): + sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + # The position property is not allowed + html = HTML(u'<div style="position:absolute;top:0"></div>') + self.assertEquals('<div style="top:0"/>', (html | sanitizer).render()) + # Normal margins get passed through + html = HTML(u'<div style="margin:10px 20px"></div>') + self.assertEquals('<div style="margin:10px 20px"/>', + (html | sanitizer).render()) + # But not negative margins + html = HTML(u'<div style="margin:-1000px 0 0"></div>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<div style="margin-left:-2000px 0 0"></div>') + self.assertEquals('<div/>', (html | sanitizer).render()) + html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>') + self.assertEquals('<div/>', (html | sanitizer).render()) + + def test_sanitize_remove_src_javascript(self): + html = HTML(u'<img src=\'javascript:alert("foo")\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Case-insensitive protocol matching + html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Grave accents (not parsed) + self.assertRaises(ParseError, HTML, + u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') + # Protocol encoded using UTF-8 numeric entities + html = HTML(u'<IMG SRC=\'javascri' + 'pt:alert("foo")\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Protocol encoded using UTF-8 numeric entities without a semicolon + # (which is allowed because the max number of digits is used) + html = HTML(u'<IMG SRC=\'java' + 'script' + ':alert("foo")\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Protocol encoded using UTF-8 numeric hex entities without a semicolon + # (which is allowed because the max number of digits is used) + html = HTML(u'<IMG SRC=\'javascri' + 'pt:alert("foo")\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Embedded tab character in protocol + html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + # Embedded tab character in protocol, but encoded this time + html = HTML(u'<IMG SRC=\'jav	ascript:alert("foo");\'>') + self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__)) + suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test')) + suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest='suite')
--- a/genshi/filters/tests/transform.py +++ b/genshi/filters/tests/transform.py @@ -48,8 +48,10 @@ def _transform(html, transformer, with_attrs=False): """Apply transformation returning simplified marked stream.""" - if isinstance(html, basestring): - html = HTML(html) + if isinstance(html, basestring) and not isinstance(html, unicode): + html = HTML(html, encoding='utf-8') + elif isinstance(html, unicode): + html = HTML(html, encoding='utf-8') stream = transformer(html, keep_marks=True) return _simplify(stream, with_attrs) @@ -57,7 +59,7 @@ class SelectTest(unittest.TestCase): """Test .select()""" def _select(self, select): - html = HTML(FOOBAR) + html = HTML(FOOBAR, encoding='utf-8') if isinstance(select, basestring): select = [select] transformer = Transformer(select[0]) @@ -138,7 +140,7 @@ def test_select_text_context(self): self.assertEqual( - list(Transformer('.')(HTML('foo'), keep_marks=True)), + list(Transformer('.')(HTML(u'foo'), keep_marks=True)), [('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))], ) @@ -205,7 +207,7 @@ def test_invert_text_context(self): self.assertEqual( - _simplify(Transformer('.').invert()(HTML('foo'), keep_marks=True)), + _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)), [(None, 'TEXT', u'foo')], ) @@ -271,7 +273,7 @@ def test_empty_text_context(self): self.assertEqual( - _simplify(Transformer('.')(HTML('foo'), keep_marks=True)), + _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)), [(OUTSIDE, TEXT, u'foo')], ) @@ -656,9 +658,11 @@ def __iter__(self): self.count += 1 - return iter(HTML('CONTENT %i' % self.count)) + return iter(HTML(u'CONTENT %i' % self.count)) - if isinstance(html, basestring): + if isinstance(html, basestring) and not isinstance(html, unicode): + html = HTML(html, encoding='utf-8') + else: html = HTML(html) if content is None: content = Injector()
--- a/genshi/filters/transform.py +++ b/genshi/filters/transform.py @@ -31,7 +31,8 @@ ... <body> ... Some <em>body</em> text. ... </body> -... </html>''') +... </html>''', +... encoding='utf-8') >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) ... .unwrap().wrap(tag.u)) <html> @@ -136,7 +137,8 @@ mark. >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') Transformations act on selected stream events matching an XPath expression. Here's an example of removing some markup (the title, in this case) @@ -215,7 +217,8 @@ ... yield mark, (kind, data.upper(), pos) ... else: ... yield mark, (kind, data, pos) - >>> short_stream = HTML('<body>Some <em>test</em> text</body>') + >>> short_stream = HTML('<body>Some <em>test</em> text</body>', + ... encoding='utf-8') >>> print(short_stream | Transformer('.//em/text()').apply(upper)) <body>Some <em>TEST</em> text</body> """ @@ -233,7 +236,7 @@ """Mark events matching the given XPath expression, within the current selection. - >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') >>> print(html | Transformer().select('.//em').trace()) (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) (None, ('TEXT', u'Some ', (None, 1, 6))) @@ -257,7 +260,7 @@ Specificaly, all marks are converted to null marks, and all null marks are converted to OUTSIDE marks. - >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') >>> print(html | Transformer('//em').invert().trace()) ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) @@ -277,7 +280,7 @@ Example: - >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') >>> print(html | Transformer('//em').end().trace()) ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) @@ -301,7 +304,8 @@ Example: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').empty()) <html><head><title>Some Title</title></head><body>Some <em/> text.</body></html> @@ -316,7 +320,8 @@ Example: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').remove()) <html><head><title>Some Title</title></head><body>Some text.</body></html> @@ -333,7 +338,8 @@ Example: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').unwrap()) <html><head><title>Some Title</title></head><body>Some body text.</body></html> @@ -346,7 +352,8 @@ """Wrap selection in an element. >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').wrap('strong')) <html><head><title>Some Title</title></head><body>Some <strong><em>body</em></strong> text.</body></html> @@ -362,7 +369,8 @@ """Replace selection with content. >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//title/text()').replace('New Title')) <html><head><title>New Title</title></head><body>Some <em>body</em> text.</body></html> @@ -380,7 +388,8 @@ tag: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').before('emphasised ')) <html><head><title>Some Title</title></head><body>Some emphasised <em>body</em> text.</body></html> @@ -397,7 +406,8 @@ Here, we insert some text after the </em> closing tag: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em').after(' rock')) <html><head><title>Some Title</title></head><body>Some <em>body</em> rock text.</body></html> @@ -414,7 +424,8 @@ Inserting some new text at the start of the <body>: >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//body').prepend('Some new body text. ')) <html><head><title>Some Title</title></head><body>Some new body text. Some <em>body</em> text.</body></html> @@ -429,7 +440,8 @@ """Insert content before the END event of the selection. >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//body').append(' Some new body text.')) <html><head><title>Some Title</title></head><body>Some <em>body</em> text. Some new body text.</body></html> @@ -450,7 +462,7 @@ >>> html = HTML('<html><head><title>Some Title</title></head>' ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' - ... '</html>') + ... '</html>', encoding='utf-8') >>> print(html | Transformer('body/em').attr('class', None)) <html><head><title>Some Title</title></head><body>Some <em>body</em> <em>text</em>.</body></html> @@ -493,7 +505,8 @@ >>> from genshi.builder import tag >>> buffer = StreamBuffer() >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('head/title/text()').copy(buffer) ... .end().select('body').prepend(tag.h1(buffer))) <html><head><title>Some Title</title></head><body><h1>Some @@ -514,7 +527,8 @@ >>> html = HTML('<html><head><title>Some Title</title></head>' ... '<body><em>Some</em> <em class="before">body</em>' - ... '<em>text</em>.</body></html>') + ... '<em>text</em>.</body></html>', + ... encoding='utf-8') >>> buffer = StreamBuffer() >>> def apply_attr(name, entry): ... return list(buffer)[0][1][1].get('class') @@ -546,7 +560,8 @@ >>> from genshi.builder import tag >>> buffer = StreamBuffer() >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('.//em/text()').cut(buffer) ... .end().select('.//em').after(tag.h1(buffer))) <html><head><title>Some Title</title></head><body>Some @@ -577,7 +592,8 @@ top of the document: >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' - ... 'text <note>two</note>.</body></doc>') + ... 'text <note>two</note>.</body></doc>', + ... encoding='utf-8') >>> buffer = StreamBuffer() >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) ... .end().buffer().select('notes').prepend(buffer)) @@ -595,7 +611,8 @@ >>> from genshi.filters.html import HTMLSanitizer >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' - ... '</script> and some more text</body></html>') + ... '</script> and some more text</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) <html><body>Some text and some more text</body></html> @@ -609,7 +626,8 @@ the selection. >>> html = HTML('<html><head><title>Some Title</title></head>' - ... '<body>Some <em>body</em> text.</body></html>') + ... '<body>Some <em>body</em> text.</body></html>', + ... encoding='utf-8') >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> text.</body></html> @@ -627,7 +645,8 @@ >>> html = HTML('<html><body>Some text, some more text and ' ... '<b>some bold text</b>\\n' - ... '<i>some italicised text</i></body></html>') + ... '<i>some italicised text</i></body></html>', + ... encoding='utf-8') >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) <html><body>Some text, some more text and <b>SOME bold text</b> <i>some italicised text</i></body></html> @@ -649,7 +668,8 @@ """Rename matching elements. >>> html = HTML('<html><body>Some text, some more text and ' - ... '<b>some bold text</b></body></html>') + ... '<b>some bold text</b></body></html>', + ... encoding='utf-8') >>> print(html | Transformer('body/b').rename('strong')) <html><body>Some text, some more text and <strong>some bold text</strong></body></html> """ @@ -658,7 +678,7 @@ def trace(self, prefix='', fileobj=None): """Print events as they pass through the transform. - >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') >>> print(html | Transformer('em').trace()) (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) (None, ('TEXT', u'Some ', (None, 1, 6))) @@ -1024,7 +1044,7 @@ ... yield event ... for event in stream: ... yield event - >>> html = HTML('<body>Some <em>test</em> text</body>') + >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') >>> print(html | Transformer('.//em').apply(Top('Prefix '))) Prefix <body>Some <em>test</em> text</body> """