')
- self.assertEquals('
', (html | sanitizer).render())
-
- def test_sanitize_remove_style_phishing(self):
- sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
- # The position property is not allowed
- html = HTML('
')
- self.assertEquals('
', (html | sanitizer).render())
- # Normal margins get passed through
- html = HTML('
')
- self.assertEquals('
',
- (html | sanitizer).render())
- # But not negative margins
- html = HTML('
')
- self.assertEquals('
', (html | sanitizer).render())
- html = HTML('
')
- self.assertEquals('
', (html | sanitizer).render())
- html = HTML('
')
- self.assertEquals('
', (html | sanitizer).render())
-
- def test_sanitize_remove_src_javascript(self):
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Case-insensitive protocol matching
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Grave accents (not parsed)
- self.assertRaises(ParseError, HTML,
- '
')
- # Protocol encoded using UTF-8 numeric entities
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Protocol encoded using UTF-8 numeric entities without a semicolon
- # (which is allowed because the max number of digits is used)
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Protocol encoded using UTF-8 numeric hex entities without a semicolon
- # (which is allowed because the max number of digits is used)
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Embedded tab character in protocol
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
- # Embedded tab character in protocol, but encoded this time
- html = HTML('
')
- self.assertEquals('
', (html | HTMLSanitizer()).render())
-
-
-def suite():
- suite = unittest.TestSuite()
- suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
- suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
- suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
- return suite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest='suite')
diff --git a/genshi/filters/tests/i18n.py b/genshi/filters/tests/i18n.py
--- a/genshi/filters/tests/i18n.py
+++ b/genshi/filters/tests/i18n.py
@@ -14,13 +14,13 @@
from datetime import datetime
import doctest
from gettext import NullTranslations
-from StringIO import StringIO
import unittest
from genshi.core import Attrs
from genshi.template import MarkupTemplate, Context
from genshi.filters.i18n import Translator, extract
from genshi.input import HTML
+from genshi.compat import IS_PYTHON2, StringIO
class DummyTranslations(NullTranslations):
@@ -39,17 +39,31 @@
def _domain_call(self, func, domain, *args, **kwargs):
return getattr(self._domains.get(domain, self), func)(*args, **kwargs)
- def ugettext(self, message):
- missing = object()
- tmsg = self._catalog.get(message, missing)
- if tmsg is missing:
- if self._fallback:
- return self._fallback.ugettext(message)
- return unicode(message)
- return tmsg
+ if IS_PYTHON2:
+ def ugettext(self, message):
+ missing = object()
+ tmsg = self._catalog.get(message, missing)
+ if tmsg is missing:
+ if self._fallback:
+ return self._fallback.ugettext(message)
+ return unicode(message)
+ return tmsg
+ else:
+ def gettext(self, message):
+ missing = object()
+ tmsg = self._catalog.get(message, missing)
+ if tmsg is missing:
+ if self._fallback:
+ return self._fallback.gettext(message)
+ return unicode(message)
+ return tmsg
- def dugettext(self, domain, message):
- return self._domain_call('ugettext', domain, message)
+ if IS_PYTHON2:
+ def dugettext(self, domain, message):
+ return self._domain_call('ugettext', domain, message)
+ else:
+ def dgettext(self, domain, message):
+ return self._domain_call('gettext', domain, message)
def ungettext(self, msgid1, msgid2, n):
try:
@@ -62,8 +76,16 @@
else:
return msgid2
- def dungettext(self, domain, singular, plural, numeral):
- return self._domain_call('ungettext', domain, singular, plural, numeral)
+ if not IS_PYTHON2:
+ ngettext = ungettext
+ del ungettext
+
+ if IS_PYTHON2:
+ def dungettext(self, domain, singular, plural, numeral):
+ return self._domain_call('ungettext', domain, singular, plural, numeral)
+ else:
+ def dngettext(self, domain, singular, plural, numeral):
+ return self._domain_call('ngettext', domain, singular, plural, numeral)
class TranslatorTestCase(unittest.TestCase):
@@ -72,7 +94,7 @@
"""
Verify that translated attributes end up in a proper `Attrs` instance.
"""
- html = HTML("""
+ html = HTML(u"""
""")
translator = Translator(lambda s: u"Voh")
@@ -218,9 +240,9 @@
gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]."
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Für Details siehe bitte Hilfe .
- """, tmpl.generate().render())
+ """.encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
def test_extract_i18n_msg_nonewline(self):
tmpl = MarkupTemplate("""
+ self.assertEqual(u"""
Für Details siehe bitte Hilfe
""", tmpl.generate().render())
@@ -264,9 +286,9 @@
gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]"
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Für Details siehe bitte
Hilfe
- """, tmpl.generate().render())
+ """.encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
def test_extract_i18n_msg_with_attributes(self):
tmpl = MarkupTemplate("""
+ self.assertEqual(u"""
Für Details siehe bitte Hilfeseite .
""", tmpl.generate().render())
@@ -449,7 +471,7 @@
gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Einträge pro Seite anzeigen.
""", tmpl.generate().render())
@@ -476,7 +498,7 @@
gettext = lambda s: u"Für [2:Details] siehe bitte [1:Hilfe]."
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Für Details siehe bitte Hilfe .
""", tmpl.generate().render())
@@ -500,13 +522,13 @@
Show me entries per page, starting at page .
- """)
+ """, encoding='utf-8')
gettext = lambda s: u"[1:] Einträge pro Seite, beginnend auf Seite [2:]."
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
-
Eintr\xc3\xa4ge pro Seite, beginnend auf Seite .
- """, tmpl.generate().render())
+ self.assertEqual(u"""
+
Eintr\u00E4ge pro Seite, beginnend auf Seite .
+ """.encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
def test_extract_i18n_msg_with_param(self):
tmpl = MarkupTemplate("""
+ self.assertEqual(u"""
Jim, sei gegrüßt!
""", tmpl.generate(user=dict(name='Jim')).render())
@@ -559,7 +581,7 @@
gettext = lambda s: u"Sei gegrüßt, [1:Alter]!"
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Sei gegrüßt, Alter !
""", tmpl.generate(anchor='42').render())
@@ -617,7 +639,7 @@
gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
translator = Translator(gettext)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Einträge pro Seite anzeigen.
""", tmpl.generate().render())
@@ -676,7 +698,7 @@
}))
tmpl.filters.insert(0, translator)
tmpl.add_directives(Translator.NAMESPACE, translator)
- self.assertEqual("""
+ self.assertEqual(u"""
Voh
""", tmpl.generate().render())
@@ -720,9 +742,9 @@
})
translator = Translator(translations)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Modificado à um dia por Pedro
- """, tmpl.generate(date='um dia', author="Pedro").render())
+ """.encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8'))
def test_i18n_msg_ticket_251_extract(self):
@@ -749,9 +771,9 @@
})
translator = Translator(translations)
translator.setup(tmpl)
- self.assertEqual("""
+ self.assertEqual(u"""
Trandução[ 0 ] : Uma moeda
- """, tmpl.generate().render())
+ """.encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
def test_extract_i18n_msg_with_other_directives_nested(self):
tmpl = MarkupTemplate("""
+ self.assertEqual(u"""
Antes de o fazer, porém,
por favor tente procurar
por problemas semelhantes , uma vez que é muito provável que este problema
@@ -846,11 +868,11 @@
'[2:[3:trac.ini]]\n and cannot be edited on this page.',
messages[0][2]
)
- self.assertEqual("""
+ self.assertEqual(u"""
Nota: Este repositório está definido em
trac.ini
e não pode ser editado nesta página.
- """, tmpl.generate(editable=False).render())
+ """.encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8'))
def test_extract_i18n_msg_with_py_strip(self):
tmpl = MarkupTemplate("""
Included tmpl0
foo_Bar 0
@@ -1797,7 +1824,7 @@
Voh 3
Voh 3
""", tmpl.generate(idx=-1,
- dg=translations.dugettext).render())
+ dg=dgettext).render())
finally:
shutil.rmtree(dirname)
diff --git a/genshi/filters/tests/test_html.py b/genshi/filters/tests/test_html.py
new file mode 100644
--- /dev/null
+++ b/genshi/filters/tests/test_html.py
@@ -0,0 +1,513 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from genshi.input import HTML, ParseError
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.template import MarkupTemplate
+
+class HTMLFormFillerTestCase(unittest.TestCase):
+
+ def test_fill_input_text_no_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller()
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_input_text_single_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller(data={'foo': 'bar'})
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_input_text_multi_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller(data={'foo': ['bar']})
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_input_hidden_no_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller()
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_input_hidden_single_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller(data={'foo': 'bar'})
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_input_hidden_multi_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller(data={'foo': ['bar']})
+ self.assertEquals("""
""", html.render())
+
+ def test_fill_textarea_no_value(self):
+ html = HTML(u"""
""") | HTMLFormFiller()
+ self.assertEquals("""
+
+
""", html.render())
+
+ def test_fill_textarea_single_value(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller(data={'foo': 'bar'})
+ self.assertEquals("""
+ bar
+
""", html.render())
+
+ def test_fill_textarea_multi_value(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller(data={'foo': ['bar']})
+ self.assertEquals("""
+ bar
+
""", html.render())
+
+ def test_fill_textarea_multiple(self):
+ # Ensure that the subsequent textarea doesn't get the data from the
+ # first
+ html = HTML(u"""
+
+
+
""") | HTMLFormFiller(data={'foo': 'Some text'})
+ self.assertEquals("""
+ Some text
+
+
""", html.render())
+
+ def test_fill_textarea_preserve_original(self):
+ html = HTML(u"""
+
+ Original value
+
""") | HTMLFormFiller(data={'foo': 'Some text'})
+ self.assertEquals("""
+ Some text
+ Original value
+
""", html.render())
+
+ def test_fill_input_checkbox_single_value_auto_no_value(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller()
+ self.assertEquals("""
+
+
""", html.render())
+
+ def test_fill_input_checkbox_single_value_auto(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ''})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
+
+ def test_fill_input_checkbox_single_value_defined(self):
+ html = HTML("""
+
+
""", encoding='ascii')
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+ def test_fill_input_checkbox_multi_value_auto(self):
+ html = HTML("""
+
+
""", encoding='ascii')
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': []})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
+
+ def test_fill_input_checkbox_multi_value_defined(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+ def test_fill_input_radio_no_value(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller()
+ self.assertEquals("""
+
+
""", html.render())
+
+ def test_fill_input_radio_single_value(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+ def test_fill_input_radio_multi_value(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+ def test_fill_input_radio_empty_string(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ''})).render())
+
+ def test_fill_input_radio_multi_empty_string(self):
+ html = HTML(u"""
+
+
""")
+ self.assertEquals("""
+
+
""", (html | HTMLFormFiller(data={'foo': ['']})).render())
+
+ def test_fill_select_no_value_auto(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller()
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_select_no_value_defined(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller()
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_select_single_value_auto(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller(data={'foo': '1'})
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_select_single_value_defined(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller(data={'foo': '1'})
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_select_multi_value_auto(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller(data={'foo': ['1', '3']})
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_select_multi_value_defined(self):
+ html = HTML(u"""
+
+ 1
+ 2
+ 3
+
+
""") | HTMLFormFiller(data={'foo': ['1', '3']})
+ self.assertEquals("""
+
+ 1
+ 2
+ 3
+
+
""", html.render())
+
+ def test_fill_option_segmented_text(self):
+ html = MarkupTemplate(u"""
+
+ foo $x
+
+ """).generate(x=1) | HTMLFormFiller(data={'foo': '1'})
+ self.assertEquals(u"""
+
+ foo 1
+
+ """, html.render())
+
+ def test_fill_option_segmented_text_no_value(self):
+ html = MarkupTemplate("""
+
+ foo $x bar
+
+ """).generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
+ self.assertEquals("""
+
+ foo 1 bar
+
+ """, html.render())
+
+ def test_fill_option_unicode_value(self):
+ html = HTML(u"""
+
+ foo
+
+ """) | HTMLFormFiller(data={'foo': u'ö'})
+ self.assertEquals(u"""
+
+ foo
+
+ """, html.render(encoding=None))
+
+ def test_fill_input_password_disabled(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller(data={'pass': 'bar'})
+ self.assertEquals("""
+
+
""", html.render())
+
+ def test_fill_input_password_enabled(self):
+ html = HTML(u"""
+
+
""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
+ self.assertEquals("""
+
+
""", html.render())
+
+
+class HTMLSanitizerTestCase(unittest.TestCase):
+
+ def test_sanitize_unchanged(self):
+ html = HTML(u'
fo o ')
+ self.assertEquals('
fo o ',
+ (html | HTMLSanitizer()).render())
+ html = HTML(u'
foo ')
+ self.assertEquals('
foo ',
+ (html | HTMLSanitizer()).render())
+
+ def test_sanitize_escape_text(self):
+ html = HTML(u'
fo& ')
+ self.assertEquals('
fo& ',
+ (html | HTMLSanitizer()).render())
+ html = HTML(u'
<foo> ')
+ self.assertEquals('
<foo> ',
+ (html | HTMLSanitizer()).render())
+
+ def test_sanitize_entityref_text(self):
+ html = HTML(u'
foö ')
+ self.assertEquals(u'
foö ',
+ (html | HTMLSanitizer()).render(encoding=None))
+
+ def test_sanitize_escape_attr(self):
+ html = HTML(u'
')
+ self.assertEquals('
',
+ (html | HTMLSanitizer()).render())
+
+ def test_sanitize_close_empty_tag(self):
+ html = HTML(u'
fo o ')
+ self.assertEquals('
fo o ',
+ (html | HTMLSanitizer()).render())
+
+ def test_sanitize_invalid_entity(self):
+ html = HTML(u'&junk;')
+ self.assertEquals('&junk;', (html | HTMLSanitizer()).render())
+
+ def test_sanitize_remove_script_elem(self):
+ html = HTML(u'')
+ self.assertEquals('', (html | HTMLSanitizer()).render())
+ html = HTML(u'')
+ self.assertEquals('', (html | HTMLSanitizer()).render())
+ self.assertRaises(ParseError, HTML, u'
alert("foo") ')
+ self.assertRaises(ParseError, HTML,
+ u'')
+
+ def test_sanitize_remove_onclick_attr(self):
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+
+ def test_sanitize_remove_input_password(self):
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+
+ def test_sanitize_remove_comments(self):
+ html = HTML(u'''
''')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+
+ def test_sanitize_remove_style_scripts(self):
+ sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+ # Inline style with url() using javascript: scheme
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ # Inline style with url() using javascript: scheme, using control char
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ # Inline style with url() using javascript: scheme, in quotes
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ # IE expressions in CSS not allowed
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
',
+ (html | sanitizer).render())
+ # Inline style with url() using javascript: scheme, using unicode
+ # escapes
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+
+ def test_sanitize_remove_style_phishing(self):
+ sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+ # The position property is not allowed
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ # Normal margins get passed through
+ html = HTML(u'
')
+ self.assertEquals('
',
+ (html | sanitizer).render())
+ # But not negative margins
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+ html = HTML(u'
')
+ self.assertEquals('
', (html | sanitizer).render())
+
+ def test_sanitize_remove_src_javascript(self):
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Case-insensitive protocol matching
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Grave accents (not parsed)
+ self.assertRaises(ParseError, HTML,
+ u'
')
+ # Protocol encoded using UTF-8 numeric entities
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Protocol encoded using UTF-8 numeric entities without a semicolon
+ # (which is allowed because the max number of digits is used)
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Protocol encoded using UTF-8 numeric hex entities without a semicolon
+ # (which is allowed because the max number of digits is used)
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Embedded tab character in protocol
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+ # Embedded tab character in protocol, but encoded this time
+ html = HTML(u'
')
+ self.assertEquals('
', (html | HTMLSanitizer()).render())
+
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
+ suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
+ suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
+ return suite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
diff --git a/genshi/filters/tests/transform.py b/genshi/filters/tests/transform.py
--- a/genshi/filters/tests/transform.py
+++ b/genshi/filters/tests/transform.py
@@ -48,8 +48,10 @@
def _transform(html, transformer, with_attrs=False):
"""Apply transformation returning simplified marked stream."""
- if isinstance(html, basestring):
- html = HTML(html)
+ if isinstance(html, basestring) and not isinstance(html, unicode):
+ html = HTML(html, encoding='utf-8')
+ elif isinstance(html, unicode):
+ html = HTML(html, encoding='utf-8')
stream = transformer(html, keep_marks=True)
return _simplify(stream, with_attrs)
@@ -57,7 +59,7 @@
class SelectTest(unittest.TestCase):
"""Test .select()"""
def _select(self, select):
- html = HTML(FOOBAR)
+ html = HTML(FOOBAR, encoding='utf-8')
if isinstance(select, basestring):
select = [select]
transformer = Transformer(select[0])
@@ -138,7 +140,7 @@
def test_select_text_context(self):
self.assertEqual(
- list(Transformer('.')(HTML('foo'), keep_marks=True)),
+ list(Transformer('.')(HTML(u'foo'), keep_marks=True)),
[('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))],
)
@@ -205,7 +207,7 @@
def test_invert_text_context(self):
self.assertEqual(
- _simplify(Transformer('.').invert()(HTML('foo'), keep_marks=True)),
+ _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)),
[(None, 'TEXT', u'foo')],
)
@@ -271,7 +273,7 @@
def test_empty_text_context(self):
self.assertEqual(
- _simplify(Transformer('.')(HTML('foo'), keep_marks=True)),
+ _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)),
[(OUTSIDE, TEXT, u'foo')],
)
@@ -656,9 +658,11 @@
def __iter__(self):
self.count += 1
- return iter(HTML('CONTENT %i' % self.count))
+ return iter(HTML(u'CONTENT %i' % self.count))
- if isinstance(html, basestring):
+ if isinstance(html, basestring) and not isinstance(html, unicode):
+ html = HTML(html, encoding='utf-8')
+ else:
html = HTML(html)
if content is None:
content = Injector()
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
--- a/genshi/filters/transform.py
+++ b/genshi/filters/transform.py
@@ -31,7 +31,8 @@
...
... Some
body text.
...
-... ''')
+... ''',
+... encoding='utf-8')
>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
... .unwrap().wrap(tag.u))
@@ -136,7 +137,8 @@
mark.
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
Transformations act on selected stream events matching an XPath expression.
Here's an example of removing some markup (the title, in this case)
@@ -215,7 +217,8 @@
... yield mark, (kind, data.upper(), pos)
... else:
... yield mark, (kind, data, pos)
- >>> short_stream = HTML('Some
test text')
+ >>> short_stream = HTML('Some
test text',
+ ... encoding='utf-8')
>>> print(short_stream | Transformer('.//em/text()').apply(upper))
Some
TEST text
"""
@@ -233,7 +236,7 @@
"""Mark events matching the given XPath expression, within the current
selection.
- >>> html = HTML('Some
test text')
+ >>> html = HTML('Some
test text', encoding='utf-8')
>>> print(html | Transformer().select('.//em').trace())
(None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
(None, ('TEXT', u'Some ', (None, 1, 6)))
@@ -257,7 +260,7 @@
Specificaly, all marks are converted to null marks, and all null marks
are converted to OUTSIDE marks.
- >>> html = HTML('Some
test text')
+ >>> html = HTML('Some
test text', encoding='utf-8')
>>> print(html | Transformer('//em').invert().trace())
('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -277,7 +280,7 @@
Example:
- >>> html = HTML('Some
test text')
+ >>> html = HTML('Some
test text', encoding='utf-8')
>>> print(html | Transformer('//em').end().trace())
('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -301,7 +304,8 @@
Example:
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').empty())
Some Title Some
text.
@@ -316,7 +320,8 @@
Example:
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').remove())
Some Title Some
text.
@@ -333,7 +338,8 @@
Example:
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').unwrap())
Some Title Some body
text.
@@ -346,7 +352,8 @@
"""Wrap selection in an element.
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').wrap('strong'))
Some Title Some
body text.
@@ -362,7 +369,8 @@
"""Replace selection with content.
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//title/text()').replace('New Title'))
New Title Some
body
text.
@@ -380,7 +388,8 @@
tag:
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').before('emphasised '))
Some Title Some emphasised
body text.
@@ -397,7 +406,8 @@
Here, we insert some text after the closing tag:
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em').after(' rock'))
Some Title Some
body
rock text.
@@ -414,7 +424,8 @@
Inserting some new text at the start of the :
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//body').prepend('Some new body text. '))
Some Title Some new body text.
Some
body text.
@@ -429,7 +440,8 @@
"""Insert content before the END event of the selection.
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//body').append(' Some new body text.'))
Some Title Some
body
text. Some new body text.
@@ -450,7 +462,7 @@
>>> html = HTML('
Some Title '
... 'Some
body text .'
- ... '')
+ ... '', encoding='utf-8')
>>> print(html | Transformer('body/em').attr('class', None))
Some Title Some
body
text .
@@ -493,7 +505,8 @@
>>> from genshi.builder import tag
>>> buffer = StreamBuffer()
>>> html = HTML('
Some Title '
- ... 'Some
body text.')
+ ... 'Some
body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('head/title/text()').copy(buffer)
... .end().select('body').prepend(tag.h1(buffer)))
Some Title Some
@@ -514,7 +527,8 @@
>>> html = HTML('Some Title '
... 'Some body '
- ... 'text .')
+ ... 'text .',
+ ... encoding='utf-8')
>>> buffer = StreamBuffer()
>>> def apply_attr(name, entry):
... return list(buffer)[0][1][1].get('class')
@@ -546,7 +560,8 @@
>>> from genshi.builder import tag
>>> buffer = StreamBuffer()
>>> html = HTML('Some Title '
- ... 'Some body text.')
+ ... 'Some body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('.//em/text()').cut(buffer)
... .end().select('.//em').after(tag.h1(buffer)))
Some Title Some
@@ -577,7 +592,8 @@
top of the document:
>>> doc = HTML(' Some one '
- ... 'text two . ')
+ ... 'text two .',
+ ... encoding='utf-8')
>>> buffer = StreamBuffer()
>>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
... .end().buffer().select('notes').prepend(buffer))
@@ -595,7 +611,8 @@
>>> from genshi.filters.html import HTMLSanitizer
>>> html = HTML('Some text and some more text')
+ ... ' and some more text',
+ ... encoding='utf-8')
>>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
Some text and some more text
@@ -609,7 +626,8 @@
the selection.
>>> html = HTML('Some Title '
- ... 'Some body text.')
+ ... 'Some body text.',
+ ... encoding='utf-8')
>>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
SOME TITLE Some body
text.
@@ -627,7 +645,8 @@
>>> html = HTML('Some text, some more text and '
... 'some bold text \\n'
- ... 'some italicised text ')
+ ... 'some italicised text ',
+ ... encoding='utf-8')
>>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
Some text, some more text and SOME bold text
some italicised text
@@ -649,7 +668,8 @@
"""Rename matching elements.
>>> html = HTML('Some text, some more text and '
- ... 'some bold text ')
+ ... 'some bold text