changeset 933:feba07fc925b

Merge r1141 from py3k: add support for python 3 to genshi.filters: * minor changes to track encoding=None API change in core genshi modules. * renamed genshi/filters/tests/html.py to test_html.py to avoid clashes with Python 3 top-level html module when running tests subset. * did not rename genshi/filters/html.py. * i18n filters: * ugettext and friends are gone in Python 3 (and only gettext and friends exist and they now handle unicode) * Some \ line continuations inside doctests confused 2to3 and so were removed them. * Testing picked up a problem (already present in trunk) where Translator.__call__ could end up defining gettext as an endlessly recursive function. Noted with a TODO.
author hodgestar
date Fri, 18 Mar 2011 09:11:53 +0000
parents e53161c2773c
children 7c9ec79caedc
files genshi/filters/html.py genshi/filters/i18n.py genshi/filters/tests/__init__.py genshi/filters/tests/html.py genshi/filters/tests/i18n.py genshi/filters/tests/test_html.py genshi/filters/tests/transform.py genshi/filters/transform.py
diffstat 8 files changed, 664 insertions(+), 604 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/filters/html.py
+++ b/genshi/filters/html.py
@@ -32,7 +32,7 @@
     >>> from genshi.input import HTML
     >>> html = HTML('''<form>
     ...   <p><input type="text" name="foo" /></p>
-    ... </form>''')
+    ... </form>''', encoding='utf-8')
     >>> filler = HTMLFormFiller(data={'foo': 'bar'})
     >>> print(html | filler)
     <form>
@@ -199,7 +199,7 @@
     from the stream.
     
     >>> from genshi import HTML
-    >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
+    >>> html = HTML('<div><script>alert(document.cookie)</script></div>', encoding='utf-8')
     >>> print(html | HTMLSanitizer())
     <div/>
     
@@ -207,7 +207,7 @@
     is instantiated. For example, to allow inline ``style`` attributes, the
     following instantation would work:
     
-    >>> html = HTML('<div style="background: #000"></div>')
+    >>> html = HTML('<div style="background: #000"></div>', encoding='utf-8')
     >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
     >>> print(html | sanitizer)
     <div style="background: #000"/>
@@ -215,7 +215,7 @@
     Note that even in this case, the filter *does* attempt to remove dangerous
     constructs from style attributes:
 
-    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
+    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>', encoding='utf-8')
     >>> print(html | sanitizer)
     <div style="color: #000"/>
     
--- a/genshi/filters/i18n.py
+++ b/genshi/filters/i18n.py
@@ -33,6 +33,7 @@
 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
 from genshi.template.directives import Directive, StripDirective
 from genshi.template.markup import MarkupTemplate, EXEC
+from genshi.compat import IS_PYTHON2
 
 __all__ = ['Translator', 'extract']
 __docformat__ = 'restructuredtext en'
@@ -288,8 +289,7 @@
     also need to pass a name for those parameters. Consider the following
     examples:
     
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <div i18n:choose="num; num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -301,8 +301,7 @@
     [(2, 'ngettext', (u'There is %(num)s coin',
                       u'There are %(num)s coins'), [])]
 
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <div i18n:choose="num; num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -324,8 +323,7 @@
 
     When used as a element and not as an attribute:
 
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <i18n:choose numeral="num" params="num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -492,8 +490,7 @@
     another i18n domain(catalog) to translate from.
     
     >>> from genshi.filters.tests.i18n import DummyTranslations
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <p i18n:msg="">Bar</p>
     ...   <div i18n:domain="foo">
     ...     <p i18n:msg="">FooBar</p>
@@ -663,11 +660,19 @@
             if ctxt:
                 ctxt['_i18n.gettext'] = gettext
         else:
-            gettext = self.translate.ugettext
-            ngettext = self.translate.ungettext
+            if IS_PYTHON2:
+                gettext = self.translate.ugettext
+                ngettext = self.translate.ungettext
+            else:
+                gettext = self.translate.gettext
+                ngettext = self.translate.ngettext
             try:
-                dgettext = self.translate.dugettext
-                dngettext = self.translate.dungettext
+                if IS_PYTHON2:
+                    dgettext = self.translate.dugettext
+                    dngettext = self.translate.dungettext
+                else:
+                    dgettext = self.translate.dgettext
+                    dngettext = self.translate.dngettext
             except AttributeError:
                 dgettext = lambda _, y: gettext(y)
                 dngettext = lambda _, s, p, n: ngettext(s, p, n)
@@ -678,6 +683,8 @@
                 ctxt['_i18n.dngettext'] = dngettext
 
         if ctxt and ctxt.get('_i18n.domain'):
+            # TODO: This can cause infinite recursion if dgettext is defined
+            #       via the AttributeError case above!
             gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
 
         for kind, data, pos in stream:
@@ -1168,7 +1175,9 @@
                 and node.func.id in gettext_functions:
             strings = []
             def _add(arg):
-                if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
+                if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode):
+                    strings.append(arg.s)
+                elif isinstance(arg, _ast.Str):
                     strings.append(unicode(arg.s, 'utf-8'))
                 elif arg:
                     strings.append(None)
--- a/genshi/filters/tests/__init__.py
+++ b/genshi/filters/tests/__init__.py
@@ -15,9 +15,9 @@
 import unittest
 
 def suite():
-    from genshi.filters.tests import html, i18n, transform
+    from genshi.filters.tests import test_html, i18n, transform
     suite = unittest.TestSuite()
-    suite.addTest(html.suite())
+    suite.addTest(test_html.suite())
     suite.addTest(i18n.suite())
     if hasattr(doctest, 'NORMALIZE_WHITESPACE'):
         suite.addTest(transform.suite())
deleted file mode 100644
--- a/genshi/filters/tests/html.py
+++ /dev/null
@@ -1,513 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-import doctest
-import unittest
-
-from genshi.input import HTML, ParseError
-from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
-from genshi.template import MarkupTemplate
-
-class HTMLFormFillerTestCase(unittest.TestCase):
-
-    def test_fill_input_text_no_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_text_single_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_text_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_no_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_single_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_no_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <textarea name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_single_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">bar</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_multi_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">bar</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_multiple(self):
-        # Ensure that the subsequent textarea doesn't get the data from the
-        # first
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-          <textarea name="bar"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">Some text</textarea>
-          <textarea name="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_preserve_original(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-          <textarea name="bar">Original value</textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">Some text</textarea>
-          <textarea name="bar">Original value</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_input_checkbox_single_value_auto_no_value(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_checkbox_single_value_auto(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
-
-    def test_fill_input_checkbox_single_value_defined(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
-
-    def test_fill_input_checkbox_multi_value_auto(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
-
-    def test_fill_input_checkbox_multi_value_defined(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
-
-    def test_fill_input_radio_no_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_radio_single_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
-
-    def test_fill_input_radio_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
-
-    def test_fill_input_radio_empty_string(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
-
-    def test_fill_input_radio_multi_empty_string(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
-
-    def test_fill_select_no_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_no_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_single_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option selected="selected">1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_single_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option value="1" selected="selected">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_multi_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo" multiple>
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
-        self.assertEquals("""<form><p>
-          <select name="foo" multiple="multiple">
-            <option selected="selected">1</option>
-            <option>2</option>
-            <option selected="selected">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_multi_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo" multiple>
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
-        self.assertEquals("""<form><p>
-          <select name="foo" multiple="multiple">
-            <option value="1" selected="selected">1</option>
-            <option value="2">2</option>
-            <option value="3" selected="selected">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_option_segmented_text(self):
-        html = MarkupTemplate("""<form>
-          <select name="foo">
-            <option value="1">foo $x</option>
-          </select>
-        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form>
-          <select name="foo">
-            <option value="1" selected="selected">foo 1</option>
-          </select>
-        </form>""", html.render())
-
-    def test_fill_option_segmented_text_no_value(self):
-        html = MarkupTemplate("""<form>
-          <select name="foo">
-            <option>foo $x bar</option>
-          </select>
-        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
-        self.assertEquals("""<form>
-          <select name="foo">
-            <option selected="selected">foo 1 bar</option>
-          </select>
-        </form>""", html.render())
-
-    def test_fill_option_unicode_value(self):
-        html = HTML("""<form>
-          <select name="foo">
-            <option value="&ouml;">foo</option>
-          </select>
-        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
-        self.assertEquals(u"""<form>
-          <select name="foo">
-            <option value="ö" selected="selected">foo</option>
-          </select>
-        </form>""", html.render(encoding=None))
-
-    def test_fill_input_password_disabled(self):
-        html = HTML("""<form><p>
-          <input type="password" name="pass" />
-        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="password" name="pass"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_password_enabled(self):
-        html = HTML("""<form><p>
-          <input type="password" name="pass" />
-        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
-        self.assertEquals("""<form><p>
-          <input type="password" name="pass" value="1234"/>
-        </p></form>""", html.render())
-
-
-class HTMLSanitizerTestCase(unittest.TestCase):
-
-    def test_sanitize_unchanged(self):
-        html = HTML('<a href="#">fo<br />o</a>')
-        self.assertEquals('<a href="#">fo<br/>o</a>',
-                          (html | HTMLSanitizer()).render())
-        html = HTML('<a href="#with:colon">foo</a>')
-        self.assertEquals('<a href="#with:colon">foo</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_escape_text(self):
-        html = HTML('<a href="#">fo&amp;</a>')
-        self.assertEquals('<a href="#">fo&amp;</a>',
-                          (html | HTMLSanitizer()).render())
-        html = HTML('<a href="#">&lt;foo&gt;</a>')
-        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_entityref_text(self):
-        html = HTML('<a href="#">fo&ouml;</a>')
-        self.assertEquals(u'<a href="#">foö</a>',
-                          (html | HTMLSanitizer()).render(encoding=None))
-
-    def test_sanitize_escape_attr(self):
-        html = HTML('<div title="&lt;foo&gt;"></div>')
-        self.assertEquals('<div title="&lt;foo&gt;"/>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_close_empty_tag(self):
-        html = HTML('<a href="#">fo<br>o</a>')
-        self.assertEquals('<a href="#">fo<br/>o</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_invalid_entity(self):
-        html = HTML('&junk;')
-        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_script_elem(self):
-        html = HTML('<script>alert("Foo")</script>')
-        self.assertEquals('', (html | HTMLSanitizer()).render())
-        html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
-        self.assertEquals('', (html | HTMLSanitizer()).render())
-        self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
-        self.assertRaises(ParseError, HTML,
-                          '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
-
-    def test_sanitize_remove_onclick_attr(self):
-        html = HTML('<div onclick=\'alert("foo")\' />')
-        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_input_password(self):
-        html = HTML('<form><input type="password" /></form>')
-        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_comments(self):
-        html = HTML('''<div><!-- conditional comment crap --></div>''')
-        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_style_scripts(self):
-        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
-        # Inline style with url() using javascript: scheme
-        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, using control char
-        html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, in quotes
-        html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # IE expressions in CSS not allowed
-        html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
-                                 'color: #fff\'>')
-        self.assertEquals('<div style="color: #fff"/>',
-                          (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, using unicode
-        # escapes
-        html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-
-    def test_sanitize_remove_style_phishing(self):
-        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
-        # The position property is not allowed
-        html = HTML('<div style="position:absolute;top:0"></div>')
-        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
-        # Normal margins get passed through
-        html = HTML('<div style="margin:10px 20px"></div>')
-        self.assertEquals('<div style="margin:10px 20px"/>',
-                          (html | sanitizer).render())
-        # But not negative margins
-        html = HTML('<div style="margin:-1000px 0 0"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<div style="margin-left:-2000px 0 0"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-
-    def test_sanitize_remove_src_javascript(self):
-        html = HTML('<img src=\'javascript:alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Case-insensitive protocol matching
-        html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Grave accents (not parsed)
-        self.assertRaises(ParseError, HTML,
-                          '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
-        # Protocol encoded using UTF-8 numeric entities
-        html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
-                    '&#112;&#116;&#58;alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Protocol encoded using UTF-8 numeric entities without a semicolon
-        # (which is allowed because the max number of digits is used)
-        html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
-                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
-                    '&#0000058alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
-        # (which is allowed because the max number of digits is used)
-        html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
-                    '&#x70&#x74&#x3A;alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Embedded tab character in protocol
-        html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Embedded tab character in protocol, but encoded this time
-        html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-
-
-def suite():
-    suite = unittest.TestSuite()
-    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
-    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
-    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
-    return suite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest='suite')
--- a/genshi/filters/tests/i18n.py
+++ b/genshi/filters/tests/i18n.py
@@ -14,13 +14,13 @@
 from datetime import datetime
 import doctest
 from gettext import NullTranslations
-from StringIO import StringIO
 import unittest
 
 from genshi.core import Attrs
 from genshi.template import MarkupTemplate, Context
 from genshi.filters.i18n import Translator, extract
 from genshi.input import HTML
+from genshi.compat import IS_PYTHON2, StringIO
 
 
 class DummyTranslations(NullTranslations):
@@ -39,17 +39,31 @@
     def _domain_call(self, func, domain, *args, **kwargs):
         return getattr(self._domains.get(domain, self), func)(*args, **kwargs)
 
-    def ugettext(self, message):
-        missing = object()
-        tmsg = self._catalog.get(message, missing)
-        if tmsg is missing:
-            if self._fallback:
-                return self._fallback.ugettext(message)
-            return unicode(message)
-        return tmsg
+    if IS_PYTHON2:
+        def ugettext(self, message):
+            missing = object()
+            tmsg = self._catalog.get(message, missing)
+            if tmsg is missing:
+                if self._fallback:
+                    return self._fallback.ugettext(message)
+                return unicode(message)
+            return tmsg
+    else:
+        def gettext(self, message):
+            missing = object()
+            tmsg = self._catalog.get(message, missing)
+            if tmsg is missing:
+                if self._fallback:
+                    return self._fallback.gettext(message)
+                return unicode(message)
+            return tmsg
 
-    def dugettext(self, domain, message):
-        return self._domain_call('ugettext', domain, message)
+    if IS_PYTHON2:
+        def dugettext(self, domain, message):
+            return self._domain_call('ugettext', domain, message)
+    else:
+        def dgettext(self, domain, message):
+            return self._domain_call('gettext', domain, message)
 
     def ungettext(self, msgid1, msgid2, n):
         try:
@@ -62,8 +76,16 @@
             else:
                 return msgid2
 
-    def dungettext(self, domain, singular, plural, numeral):
-        return self._domain_call('ungettext', domain, singular, plural, numeral)
+    if not IS_PYTHON2:
+        ngettext = ungettext
+        del ungettext
+
+    if IS_PYTHON2:
+        def dungettext(self, domain, singular, plural, numeral):
+            return self._domain_call('ungettext', domain, singular, plural, numeral)
+    else:
+        def dngettext(self, domain, singular, plural, numeral):
+            return self._domain_call('ngettext', domain, singular, plural, numeral)
 
 
 class TranslatorTestCase(unittest.TestCase):
@@ -72,7 +94,7 @@
         """
         Verify that translated attributes end up in a proper `Attrs` instance.
         """
-        html = HTML("""<html>
+        html = HTML(u"""<html>
           <span title="Foo"></span>
         </html>""")
         translator = Translator(lambda s: u"Voh")
@@ -218,9 +240,9 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html">Hilfe</a>.</p>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_nonewline(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -241,7 +263,7 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html">Hilfe</a></p>
         </html>""", tmpl.generate().render())
 
@@ -264,9 +286,9 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           Für Details siehe bitte <a href="help.html">Hilfe</a>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_attributes(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -394,7 +416,7 @@
         gettext = lambda s: u"Für Details siehe bitte [1:[2:Hilfeseite]]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html"><em>Hilfeseite</em></a>.</p>
         </html>""", tmpl.generate().render())
 
@@ -449,7 +471,7 @@
         gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><input type="text" name="num"/> Einträge pro Seite anzeigen.</p>
         </html>""", tmpl.generate().render())
 
@@ -476,7 +498,7 @@
         gettext = lambda s: u"Für [2:Details] siehe bitte [1:Hilfe]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für <em>Details</em> siehe bitte <a href="help.html">Hilfe</a>.</p>
         </html>""", tmpl.generate().render())
 
@@ -500,13 +522,13 @@
           <p i18n:msg="">
             Show me <input type="text" name="num" /> entries per page, starting at page <input type="text" name="num" />.
           </p>
-        </html>""")
+        </html>""", encoding='utf-8')
         gettext = lambda s: u"[1:] Einträge pro Seite, beginnend auf Seite [2:]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
-          <p><input type="text" name="num"/> Eintr\xc3\xa4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
-        </html>""", tmpl.generate().render())
+        self.assertEqual(u"""<html>
+          <p><input type="text" name="num"/> Eintr\u00E4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_param(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -545,7 +567,7 @@
         gettext = lambda s: u"%(name)s, sei gegrüßt!"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Jim, sei gegrüßt!</p>
         </html>""", tmpl.generate(user=dict(name='Jim')).render())
 
@@ -559,7 +581,7 @@
         gettext = lambda s: u"Sei gegrüßt, [1:Alter]!"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Sei gegrüßt, <a href="#42">Alter</a>!</p>
         </html>""", tmpl.generate(anchor='42').render())
 
@@ -617,7 +639,7 @@
         gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><input type="text" name="num" value="x"/> Einträge pro Seite anzeigen.</p>
         </html>""", tmpl.generate().render())
 
@@ -676,7 +698,7 @@
         }))
         tmpl.filters.insert(0, translator)
         tmpl.add_directives(Translator.NAMESPACE, translator)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p title="Voh bär">Voh</p>
         </html>""", tmpl.generate().render())
 
@@ -720,9 +742,9 @@
         })
         translator = Translator(translations)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           Modificado à um dia por Pedro
-        </html>""", tmpl.generate(date='um dia', author="Pedro").render())
+        </html>""".encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8'))
 
 
     def test_i18n_msg_ticket_251_extract(self):
@@ -749,9 +771,9 @@
         })
         translator = Translator(translations)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><tt><b>Trandução[ 0 ]</b>: <em>Uma moeda</em></tt></p>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_other_directives_nested(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -811,7 +833,7 @@
         self.assertEqual(1, len(messages))
         ctx = Context()
         ctx.push({'trac': {'homepage': 'http://trac.edgewall.org/'}})
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Antes de o fazer, porém,
             <strong>por favor tente <a href="http://trac.edgewall.org/search?ticket=yes&amp;noquickjump=1&amp;q=q">procurar</a>
             por problemas semelhantes</strong>, uma vez que é muito provável que este problema
@@ -846,11 +868,11 @@
             '[2:[3:trac.ini]]\n            and cannot be edited on this page.',
             messages[0][2]
         )
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p class="hint"><strong>Nota:</strong> Este repositório está definido em
            <code><a href="href.wiki(TracIni)">trac.ini</a></code>
             e não pode ser editado nesta página.</p>
-        </html>""", tmpl.generate(editable=False).render())
+        </html>""".encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_py_strip(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -1771,6 +1793,11 @@
             loader = TemplateLoader([dirname], callback=callback)
             tmpl = loader.load('tmpl10.html')
 
+            if IS_PYTHON2:
+                dgettext = translations.dugettext
+            else:
+                dgettext = translations.dgettext
+
             self.assertEqual("""<html>
                         <div>Included tmpl0</div>
                         <p title="foo_Bar 0">foo_Bar 0</p>
@@ -1797,7 +1824,7 @@
                         <p title="Voh">Voh 3</p>
                         <p title="Voh">Voh 3</p>
                 </html>""", tmpl.generate(idx=-1,
-                                          dg=translations.dugettext).render())
+                                          dg=dgettext).render())
         finally:
             shutil.rmtree(dirname)
 
new file mode 100644
--- /dev/null
+++ b/genshi/filters/tests/test_html.py
@@ -0,0 +1,513 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from genshi.input import HTML, ParseError
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.template import MarkupTemplate
+
+class HTMLFormFillerTestCase(unittest.TestCase):
+
+    def test_fill_input_text_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_text_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_text_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_no_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <textarea name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_single_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">bar</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_multi_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">bar</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_multiple(self):
+        # Ensure that the subsequent textarea doesn't get the data from the
+        # first
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+          <textarea name="bar"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">Some text</textarea>
+          <textarea name="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_preserve_original(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+          <textarea name="bar">Original value</textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">Some text</textarea>
+          <textarea name="bar">Original value</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_input_checkbox_single_value_auto_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_checkbox_single_value_auto(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
+
+    def test_fill_input_checkbox_single_value_defined(self):
+        html = HTML("""<form><p>
+          <input type="checkbox" name="foo" value="1" />
+        </p></form>""", encoding='ascii')
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+    def test_fill_input_checkbox_multi_value_auto(self):
+        html = HTML("""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""", encoding='ascii')
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
+
+    def test_fill_input_checkbox_multi_value_defined(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+    def test_fill_input_radio_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_radio_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+    def test_fill_input_radio_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+    def test_fill_input_radio_empty_string(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
+
+    def test_fill_input_radio_multi_empty_string(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
+
+    def test_fill_select_no_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_no_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_single_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option selected="selected">1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_single_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option value="1" selected="selected">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_multi_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo" multiple>
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
+        self.assertEquals("""<form><p>
+          <select name="foo" multiple="multiple">
+            <option selected="selected">1</option>
+            <option>2</option>
+            <option selected="selected">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_multi_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo" multiple>
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
+        self.assertEquals("""<form><p>
+          <select name="foo" multiple="multiple">
+            <option value="1" selected="selected">1</option>
+            <option value="2">2</option>
+            <option value="3" selected="selected">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_option_segmented_text(self):
+        html = MarkupTemplate(u"""<form>
+          <select name="foo">
+            <option value="1">foo $x</option>
+          </select>
+        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals(u"""<form>
+          <select name="foo">
+            <option value="1" selected="selected">foo 1</option>
+          </select>
+        </form>""", html.render())
+
+    def test_fill_option_segmented_text_no_value(self):
+        html = MarkupTemplate("""<form>
+          <select name="foo">
+            <option>foo $x bar</option>
+          </select>
+        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
+        self.assertEquals("""<form>
+          <select name="foo">
+            <option selected="selected">foo 1 bar</option>
+          </select>
+        </form>""", html.render())
+
+    def test_fill_option_unicode_value(self):
+        html = HTML(u"""<form>
+          <select name="foo">
+            <option value="&ouml;">foo</option>
+          </select>
+        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
+        self.assertEquals(u"""<form>
+          <select name="foo">
+            <option value="ö" selected="selected">foo</option>
+          </select>
+        </form>""", html.render(encoding=None))
+
+    def test_fill_input_password_disabled(self):
+        html = HTML(u"""<form><p>
+          <input type="password" name="pass" />
+        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="password" name="pass"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_password_enabled(self):
+        html = HTML(u"""<form><p>
+          <input type="password" name="pass" />
+        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
+        self.assertEquals("""<form><p>
+          <input type="password" name="pass" value="1234"/>
+        </p></form>""", html.render())
+
+
+class HTMLSanitizerTestCase(unittest.TestCase):
+
+    def test_sanitize_unchanged(self):
+        html = HTML(u'<a href="#">fo<br />o</a>')
+        self.assertEquals('<a href="#">fo<br/>o</a>',
+                          (html | HTMLSanitizer()).render())
+        html = HTML(u'<a href="#with:colon">foo</a>')
+        self.assertEquals('<a href="#with:colon">foo</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_escape_text(self):
+        html = HTML(u'<a href="#">fo&amp;</a>')
+        self.assertEquals('<a href="#">fo&amp;</a>',
+                          (html | HTMLSanitizer()).render())
+        html = HTML(u'<a href="#">&lt;foo&gt;</a>')
+        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_entityref_text(self):
+        html = HTML(u'<a href="#">fo&ouml;</a>')
+        self.assertEquals(u'<a href="#">foö</a>',
+                          (html | HTMLSanitizer()).render(encoding=None))
+
+    def test_sanitize_escape_attr(self):
+        html = HTML(u'<div title="&lt;foo&gt;"></div>')
+        self.assertEquals('<div title="&lt;foo&gt;"/>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_close_empty_tag(self):
+        html = HTML(u'<a href="#">fo<br>o</a>')
+        self.assertEquals('<a href="#">fo<br/>o</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_invalid_entity(self):
+        html = HTML(u'&junk;')
+        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_script_elem(self):
+        html = HTML(u'<script>alert("Foo")</script>')
+        self.assertEquals('', (html | HTMLSanitizer()).render())
+        html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>')
+        self.assertEquals('', (html | HTMLSanitizer()).render())
+        self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>')
+        self.assertRaises(ParseError, HTML,
+                          u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
+
+    def test_sanitize_remove_onclick_attr(self):
+        html = HTML(u'<div onclick=\'alert("foo")\' />')
+        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_input_password(self):
+        html = HTML(u'<form><input type="password" /></form>')
+        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_comments(self):
+        html = HTML(u'''<div><!-- conditional comment crap --></div>''')
+        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_style_scripts(self):
+        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+        # Inline style with url() using javascript: scheme
+        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, using control char
+        html = HTML(u'<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, in quotes
+        html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # IE expressions in CSS not allowed
+        html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));'
+                                 'color: #fff\'>')
+        self.assertEquals('<div style="color: #fff"/>',
+                          (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, using unicode
+        # escapes
+        html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+
+    def test_sanitize_remove_style_phishing(self):
+        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+        # The position property is not allowed
+        html = HTML(u'<div style="position:absolute;top:0"></div>')
+        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
+        # Normal margins get passed through
+        html = HTML(u'<div style="margin:10px 20px"></div>')
+        self.assertEquals('<div style="margin:10px 20px"/>',
+                          (html | sanitizer).render())
+        # But not negative margins
+        html = HTML(u'<div style="margin:-1000px 0 0"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<div style="margin-left:-2000px 0 0"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+
+    def test_sanitize_remove_src_javascript(self):
+        html = HTML(u'<img src=\'javascript:alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Case-insensitive protocol matching
+        html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Grave accents (not parsed)
+        self.assertRaises(ParseError, HTML,
+                          u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
+        # Protocol encoded using UTF-8 numeric entities
+        html = HTML(u'<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
+                    '&#112;&#116;&#58;alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Protocol encoded using UTF-8 numeric entities without a semicolon
+        # (which is allowed because the max number of digits is used)
+        html = HTML(u'<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
+                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
+                    '&#0000058alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
+        # (which is allowed because the max number of digits is used)
+        html = HTML(u'<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
+                    '&#x70&#x74&#x3A;alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Embedded tab character in protocol
+        html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Embedded tab character in protocol, but encoded this time
+        html = HTML(u'<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
+    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
+    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
+    return suite
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
--- a/genshi/filters/tests/transform.py
+++ b/genshi/filters/tests/transform.py
@@ -48,8 +48,10 @@
 
 def _transform(html, transformer, with_attrs=False):
     """Apply transformation returning simplified marked stream."""
-    if isinstance(html, basestring):
-        html = HTML(html)
+    if isinstance(html, basestring) and not isinstance(html, unicode):
+        html = HTML(html, encoding='utf-8')
+    elif isinstance(html, unicode):
+        html = HTML(html, encoding='utf-8')
     stream = transformer(html, keep_marks=True)
     return _simplify(stream, with_attrs)
 
@@ -57,7 +59,7 @@
 class SelectTest(unittest.TestCase):
     """Test .select()"""
     def _select(self, select):
-        html = HTML(FOOBAR)
+        html = HTML(FOOBAR, encoding='utf-8')
         if isinstance(select, basestring):
             select = [select]
         transformer = Transformer(select[0])
@@ -138,7 +140,7 @@
 
     def test_select_text_context(self):
         self.assertEqual(
-            list(Transformer('.')(HTML('foo'), keep_marks=True)),
+            list(Transformer('.')(HTML(u'foo'), keep_marks=True)),
             [('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))],
             )
 
@@ -205,7 +207,7 @@
 
     def test_invert_text_context(self):
         self.assertEqual(
-            _simplify(Transformer('.').invert()(HTML('foo'), keep_marks=True)),
+            _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)),
             [(None, 'TEXT', u'foo')],
             )
 
@@ -271,7 +273,7 @@
 
     def test_empty_text_context(self):
         self.assertEqual(
-            _simplify(Transformer('.')(HTML('foo'), keep_marks=True)),
+            _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)),
             [(OUTSIDE, TEXT, u'foo')],
             )
 
@@ -656,9 +658,11 @@
 
             def __iter__(self):
                 self.count += 1
-                return iter(HTML('CONTENT %i' % self.count))
+                return iter(HTML(u'CONTENT %i' % self.count))
 
-        if isinstance(html, basestring):
+        if isinstance(html, basestring) and not isinstance(html, unicode):
+            html = HTML(html, encoding='utf-8')
+        else:
             html = HTML(html)
         if content is None:
             content = Injector()
--- a/genshi/filters/transform.py
+++ b/genshi/filters/transform.py
@@ -31,7 +31,8 @@
 ...  <body>
 ...    Some <em>body</em> text.
 ...  </body>
-... </html>''')
+... </html>''',
+... encoding='utf-8')
 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
 ...                                    .unwrap().wrap(tag.u))
 <html>
@@ -136,7 +137,8 @@
     mark.
 
     >>> html = HTML('<html><head><title>Some Title</title></head>'
-    ...             '<body>Some <em>body</em> text.</body></html>')
+    ...             '<body>Some <em>body</em> text.</body></html>',
+    ...             encoding='utf-8')
 
     Transformations act on selected stream events matching an XPath expression.
     Here's an example of removing some markup (the title, in this case)
@@ -215,7 +217,8 @@
         ...             yield mark, (kind, data.upper(), pos)
         ...         else:
         ...             yield mark, (kind, data, pos)
-        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
+        >>> short_stream = HTML('<body>Some <em>test</em> text</body>',
+        ...                      encoding='utf-8')
         >>> print(short_stream | Transformer('.//em/text()').apply(upper))
         <body>Some <em>TEST</em> text</body>
         """
@@ -233,7 +236,7 @@
         """Mark events matching the given XPath expression, within the current
         selection.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer().select('.//em').trace())
         (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
         (None, ('TEXT', u'Some ', (None, 1, 6)))
@@ -257,7 +260,7 @@
         Specificaly, all marks are converted to null marks, and all null marks
         are converted to OUTSIDE marks.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('//em').invert().trace())
         ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
         ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -277,7 +280,7 @@
 
         Example:
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('//em').end().trace())
         ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
         ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -301,7 +304,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').empty())
         <html><head><title>Some Title</title></head><body>Some <em/>
         text.</body></html>
@@ -316,7 +320,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').remove())
         <html><head><title>Some Title</title></head><body>Some
         text.</body></html>
@@ -333,7 +338,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').unwrap())
         <html><head><title>Some Title</title></head><body>Some body
         text.</body></html>
@@ -346,7 +352,8 @@
         """Wrap selection in an element.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').wrap('strong'))
         <html><head><title>Some Title</title></head><body>Some
         <strong><em>body</em></strong> text.</body></html>
@@ -362,7 +369,8 @@
         """Replace selection with content.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//title/text()').replace('New Title'))
         <html><head><title>New Title</title></head><body>Some <em>body</em>
         text.</body></html>
@@ -380,7 +388,8 @@
         tag:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').before('emphasised '))
         <html><head><title>Some Title</title></head><body>Some emphasised
         <em>body</em> text.</body></html>
@@ -397,7 +406,8 @@
         Here, we insert some text after the </em> closing tag:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').after(' rock'))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         rock text.</body></html>
@@ -414,7 +424,8 @@
         Inserting some new text at the start of the <body>:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//body').prepend('Some new body text. '))
         <html><head><title>Some Title</title></head><body>Some new body text.
         Some <em>body</em> text.</body></html>
@@ -429,7 +440,8 @@
         """Insert content before the END event of the selection.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//body').append(' Some new body text.'))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         text. Some new body text.</body></html>
@@ -450,7 +462,7 @@
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
         ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
-        ...             '</html>')
+        ...             '</html>', encoding='utf-8')
         >>> print(html | Transformer('body/em').attr('class', None))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         <em>text</em>.</body></html>
@@ -493,7 +505,8 @@
         >>> from genshi.builder import tag
         >>> buffer = StreamBuffer()
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('head/title/text()').copy(buffer)
         ...     .end().select('body').prepend(tag.h1(buffer)))
         <html><head><title>Some Title</title></head><body><h1>Some
@@ -514,7 +527,8 @@
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
         ...             '<body><em>Some</em> <em class="before">body</em>'
-        ...             '<em>text</em>.</body></html>')
+        ...             '<em>text</em>.</body></html>',
+        ...             encoding='utf-8')
         >>> buffer = StreamBuffer()
         >>> def apply_attr(name, entry):
         ...     return list(buffer)[0][1][1].get('class')
@@ -546,7 +560,8 @@
         >>> from genshi.builder import tag
         >>> buffer = StreamBuffer()
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em/text()').cut(buffer)
         ...     .end().select('.//em').after(tag.h1(buffer)))
         <html><head><title>Some Title</title></head><body>Some
@@ -577,7 +592,8 @@
         top of the document:
 
         >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
-        ...            'text <note>two</note>.</body></doc>')
+        ...            'text <note>two</note>.</body></doc>',
+        ...             encoding='utf-8')
         >>> buffer = StreamBuffer()
         >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
         ...     .end().buffer().select('notes').prepend(buffer))
@@ -595,7 +611,8 @@
 
         >>> from genshi.filters.html import HTMLSanitizer
         >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
-        ...             '</script> and some more text</body></html>')
+        ...             '</script> and some more text</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
         <html><body>Some text and some more text</body></html>
 
@@ -609,7 +626,8 @@
         the selection.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...               '<body>Some <em>body</em> text.</body></html>')
+        ...               '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
         <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
         text.</body></html>
@@ -627,7 +645,8 @@
 
         >>> html = HTML('<html><body>Some text, some more text and '
         ...             '<b>some bold text</b>\\n'
-        ...             '<i>some italicised text</i></body></html>')
+        ...             '<i>some italicised text</i></body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
         <html><body>Some text, some more text and <b>SOME bold text</b>
         <i>some italicised text</i></body></html>
@@ -649,7 +668,8 @@
         """Rename matching elements.
 
         >>> html = HTML('<html><body>Some text, some more text and '
-        ...             '<b>some bold text</b></body></html>')
+        ...             '<b>some bold text</b></body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/b').rename('strong'))
         <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
         """
@@ -658,7 +678,7 @@
     def trace(self, prefix='', fileobj=None):
         """Print events as they pass through the transform.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('em').trace())
         (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
         (None, ('TEXT', u'Some ', (None, 1, 6)))
@@ -1024,7 +1044,7 @@
     ...             yield event
     ...         for event in stream:
     ...             yield event
-    >>> html = HTML('<body>Some <em>test</em> text</body>')
+    >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
     >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
     Prefix <body>Some <em>test</em> text</body>
     """
Copyright (C) 2012-2017 Edgewall Software