changeset 316:a946edefac40 trunk

Configurable encoding of template files, closing #65. * The `XMLParser` constructor now accepts an optional encoding parameter. * The `MarkupTemplate` and `TextTemplate` class also get an encoding parameter in their initializers. * A `default_encoding` can be specified when creating a `TemplateLoader` instance, but the encoding can also be specified explicitly when calling the `load()` method. * Encodings specified using an XML declaration in templates and XML files are now honored (as long as Expat supports them, that is, and it doesn't support many).
author cmlenz
date Tue, 24 Oct 2006 15:52:57 +0000
parents 06a25d0962af
children efa7870b63cb
files genshi/input.py genshi/template.py genshi/tests/input.py genshi/tests/template.py
diffstat 4 files changed, 86 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -75,17 +75,20 @@
                    htmlentitydefs.name2codepoint.items()]
     _external_dtd = '\n'.join(_entitydefs)
 
-    def __init__(self, source, filename=None):
+    def __init__(self, source, filename=None, encoding=None):
         """Initialize the parser for the given XML input.
         
         @param source: the XML text as a file-like object
         @param filename: the name of the file, if appropriate
+        @param encoding: the encoding of the file; if not specified, the
+            encoding is assumed to be ASCII, UTF-8, or UTF-16, or whatever the
+            encoding specified in the XML declaration (if any)
         """
         self.source = source
         self.filename = filename
 
         # Setup the Expat parser
-        parser = expat.ParserCreate('utf-8', '}')
+        parser = expat.ParserCreate(encoding, '}')
         parser.buffer_text = True
         parser.returns_unicode = True
         parser.ordered_attributes = True
--- a/genshi/template.py
+++ b/genshi/template.py
@@ -797,7 +797,8 @@
     EXPR = StreamEventKind('EXPR') # an expression
     SUB = StreamEventKind('SUB') # a "subprogram"
 
-    def __init__(self, source, basedir=None, filename=None, loader=None):
+    def __init__(self, source, basedir=None, filename=None, loader=None,
+                 encoding=None):
         """Initialize a template from either a string or a file-like object."""
         if isinstance(source, basestring):
             self.source = StringIO(source)
@@ -812,12 +813,12 @@
 
         self.filters = [self._flatten, self._eval]
 
-        self.stream = self._parse()
+        self.stream = self._parse(encoding)
 
     def __repr__(self):
         return '<%s "%s">' % (self.__class__.__name__, self.filename)
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template.
         
         The parsing stage parses the template and constructs a list of
@@ -989,24 +990,26 @@
                   ('attrs', AttrsDirective),
                   ('strip', StripDirective)]
 
-    def __init__(self, source, basedir=None, filename=None, loader=None):
+    def __init__(self, source, basedir=None, filename=None, loader=None,
+                 encoding=None):
         """Initialize a template from either a string or a file-like object."""
         Template.__init__(self, source, basedir=basedir, filename=filename,
-                          loader=loader)
+                          loader=loader, encoding=encoding)
 
         self.filters.append(self._match)
         if loader:
             from genshi.filters import IncludeFilter
             self.filters.append(IncludeFilter(loader))
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template from an XML document."""
         stream = [] # list of events of the "compiled" template
         dirmap = {} # temporary mapping of directives to elements
         ns_prefix = {}
         depth = 0
 
-        for kind, data, pos in XMLParser(self.source, filename=self.filename):
+        for kind, data, pos in XMLParser(self.source, filename=self.filename,
+                                         encoding=encoding):
 
             if kind is START_NS:
                 # Strip out the namespace declaration for template directives
@@ -1199,13 +1202,15 @@
 
     _DIRECTIVE_RE = re.compile(r'^\s*(?<!\\)#((?:\w+|#).*)\n?', re.MULTILINE)
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template from text input."""
         stream = [] # list of events of the "compiled" template
         dirmap = {} # temporary mapping of directives to elements
         depth = 0
+        if not encoding:
+            encoding = 'utf-8'
 
-        source = self.source.read()
+        source = self.source.read().decode(encoding, 'replace')
         offset = 0
         lineno = 1
 
@@ -1283,7 +1288,8 @@
     
     >>> os.remove(path)
     """
-    def __init__(self, search_path=None, auto_reload=False, max_cache_size=25):
+    def __init__(self, search_path=None, auto_reload=False, max_cache_size=25,
+                 default_encoding=None):
         """Create the template laoder.
         
         @param search_path: a list of absolute path names that should be
@@ -1293,6 +1299,8 @@
             template files, and reload them if they have changed
         @param max_cache_size: the maximum number of templates to keep in the
             cache
+        @param default_encoding: the default encoding to assume when loading
+            templates; defaults to UTF-8
         """
         self.search_path = search_path
         if self.search_path is None:
@@ -1300,11 +1308,13 @@
         elif isinstance(self.search_path, basestring):
             self.search_path = [self.search_path]
         self.auto_reload = auto_reload
+        self.default_encoding = default_encoding
         self._cache = LRUCache(max_cache_size)
         self._mtime = {}
         self._lock = threading.Lock()
 
-    def load(self, filename, relative_to=None, cls=MarkupTemplate):
+    def load(self, filename, relative_to=None, cls=MarkupTemplate,
+             encoding=None):
         """Load the template with the given name.
         
         If the `filename` parameter is relative, this method searches the search
@@ -1329,7 +1339,11 @@
             template is being loaded, or `None` if the template is being loaded
             directly
         @param cls: the class of the template object to instantiate
+        @param encoding: the encoding of the template to load; defaults to the
+            `default_encoding` of the loader instance
         """
+        if encoding is None:
+            encoding = self.default_encoding
         if relative_to and not os.path.isabs(relative_to):
             filename = os.path.join(os.path.dirname(relative_to), filename)
         filename = os.path.normpath(filename)
@@ -1379,7 +1393,7 @@
                             filename = os.path.join(dirname, filename)
                             dirname = ''
                         tmpl = cls(fileobj, basedir=dirname, filename=filename,
-                                   loader=self)
+                                   encoding=encoding, loader=self)
                     finally:
                         fileobj.close()
                     self._cache[filename] = tmpl
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -59,6 +59,22 @@
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\u2013', data)
 
+    def test_latin1_encoded(self):
+        text = u'<div>\xf6</div>'.encode('iso-8859-1')
+        events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
+    def test_latin1_encoded_xmldecl(self):
+        text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
+        <div>\xf6</div>
+        """.encode('iso-8859-1')
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
     def test_html_entity_with_dtd(self):
         text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
--- a/genshi/tests/template.py
+++ b/genshi/tests/template.py
@@ -1128,6 +1128,23 @@
           </span>
         </div>""", str(tmpl.generate()))
 
+    def test_latin1_encoded_with_xmldecl(self):
+        tmpl = MarkupTemplate(u"""<?xml version="1.0" encoding="iso-8859-1" ?>
+        <div xmlns:py="http://genshi.edgewall.org/">
+          \xf6
+        </div>""".encode('iso-8859-1'), encoding='iso-8859-1')
+        self.assertEqual(u"""<div>
+          \xf6
+        </div>""", unicode(tmpl.generate()))
+
+    def test_latin1_encoded_explicit_encoding(self):
+        tmpl = MarkupTemplate(u"""<div xmlns:py="http://genshi.edgewall.org/">
+          \xf6
+        </div>""".encode('iso-8859-1'), encoding='iso-8859-1')
+        self.assertEqual(u"""<div>
+          \xf6
+        </div>""", unicode(tmpl.generate()))
+
 
 class TextTemplateTestCase(unittest.TestCase):
     """Tests for text template processing."""
@@ -1151,6 +1168,10 @@
         #end 'if foo'""")
         self.assertEqual('', str(tmpl.generate()))
 
+    def test_latin1_encoded(self):
+        text = u'$foo\xf6$bar'.encode('iso-8859-1')
+        tmpl = TextTemplate(text, encoding='iso-8859-1')
+        self.assertEqual(u'x\xf6y', unicode(tmpl.generate(foo='x', bar='y')))
 
     # FIXME
     #def test_empty_lines(self):
@@ -1317,6 +1338,24 @@
           <div>Included</div>
         </html>""", tmpl2.generate().render())
 
+    def test_load_with_default_encoding(self):
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        try:
+            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
+        finally:
+            f.close()
+        loader = TemplateLoader([self.dirname], default_encoding='iso-8859-1')
+        loader.load('tmpl.html')
+
+    def test_load_with_explicit_encoding(self):
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        try:
+            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
+        finally:
+            f.close()
+        loader = TemplateLoader([self.dirname], default_encoding='utf-8')
+        loader.load('tmpl.html', encoding='iso-8859-1')
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software