changeset 311:8de1ff534d22 trunk

* The `HTMLParser` class and the `HTML` function now accept an `encoding` parameter to properly deal with bytestring input (defaults to UTF-8). * The `TemplateLoader` class can now also be initialized from a string for the search path, for cases where the search path contains only a single directory.
author cmlenz
date Sun, 22 Oct 2006 14:57:40 +0000
parents a867f4a10d12
children cb7326367f91
files genshi/input.py genshi/template.py genshi/tests/input.py genshi/tests/template.py
diffstat 4 files changed, 47 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -76,7 +76,7 @@
     _external_dtd = '\n'.join(_entitydefs)
 
     def __init__(self, source, filename=None):
-        """Initialize the parser for the given XML text.
+        """Initialize the parser for the given XML input.
         
         @param source: the XML text as a file-like object
         @param filename: the name of the file, if appropriate
@@ -250,10 +250,17 @@
                               'hr', 'img', 'input', 'isindex', 'link', 'meta',
                               'param'])
 
-    def __init__(self, source, filename=None):
+    def __init__(self, source, filename=None, encoding='utf-8'):
+        """Initialize the parser for the given HTML input.
+        
+        @param source: the HTML text as a file-like object
+        @param filename: the name of the file, if known
+        @param filename: encoding of the file; ignored if the input is unicode
+        """
         html.HTMLParser.__init__(self)
         self.source = source
         self.filename = filename
+        self.encoding = encoding
         self._queue = []
         self._open_tags = []
 
@@ -321,6 +328,8 @@
             self._enqueue(END, QName(tag))
 
     def handle_data(self, text):
+        if not isinstance(text, unicode):
+            text = text.decode(self.encoding, 'replace')
         self._enqueue(TEXT, text)
 
     def handle_charref(self, name):
@@ -343,8 +352,8 @@
         self._enqueue(COMMENT, text)
 
 
-def HTML(text):
-    return Stream(list(HTMLParser(StringIO(text))))
+def HTML(text, encoding='utf-8'):
+    return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
 
 def _coalesce(stream):
     """Coalesces adjacent TEXT events into a single event."""
--- a/genshi/template.py
+++ b/genshi/template.py
@@ -1287,7 +1287,8 @@
         """Create the template laoder.
         
         @param search_path: a list of absolute path names that should be
-            searched for template files
+            searched for template files, or a string containing a single
+            absolute path
         @param auto_reload: whether to check the last modification time of
             template files, and reload them if they have changed
         @param max_cache_size: the maximum number of templates to keep in the
@@ -1296,6 +1297,8 @@
         self.search_path = search_path
         if self.search_path is None:
             self.search_path = []
+        elif isinstance(self.search_path, basestring):
+            self.search_path = [self.search_path]
         self.auto_reload = auto_reload
         self._cache = LRUCache(max_cache_size)
         self._mtime = {}
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -120,6 +120,13 @@
         if sys.version_info[:2] >= (2, 4):
             self.assertEqual((None, 1, 6), pos)
 
+    def test_input_encoding(self):
+        text = u'<div>\xf6</div>'.encode('iso-8859-1')
+        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
     def test_unicode_input(self):
         text = u'<div>\u2013</div>'
         events = list(HTMLParser(StringIO(text)))
--- a/genshi/tests/template.py
+++ b/genshi/tests/template.py
@@ -1140,6 +1140,21 @@
         self.assertEqual('', str(tmpl.generate()))
 
 
+    # FIXME
+    #def test_empty_lines(self):
+    #    tmpl = TextTemplate("""Your items:
+    #
+    #    #for item in items
+    #      * ${item}
+    #
+    #    #end""")
+    #    self.assertEqual("""Your items:
+    #      * 0
+    #      * 1
+    #      * 2
+    #    """, tmpl.generate(items=range(3)).render('text'))
+
+
 class TemplateLoaderTestCase(unittest.TestCase):
     """Tests for the template loader."""
 
@@ -1149,6 +1164,14 @@
     def tearDown(self):
         shutil.rmtree(self.dirname)
 
+    def test_search_path_empty(self):
+        loader = TemplateLoader()
+        self.assertEqual([], loader.search_path)
+
+    def test_search_path_as_string(self):
+        loader = TemplateLoader(self.dirname)
+        self.assertEqual([self.dirname], loader.search_path)
+
     def test_relative_include_samedir(self):
         file1 = open(os.path.join(self.dirname, 'tmpl1.html'), 'w')
         try:
Copyright (C) 2012-2017 Edgewall Software