changeset 318:f738ffe8f1dd stable-0.3.x

Ported [389:391] to 0.3.x branch.
author cmlenz
date Tue, 24 Oct 2006 16:14:04 +0000
parents b12c045c1e81
children 2e0ca25af903
files ChangeLog genshi/input.py genshi/template.py genshi/tests/input.py genshi/tests/template.py
diffstat 5 files changed, 97 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Version 0.3.4
+http://svn.edgewall.org/repos/genshi/tags/0.3.4/
+(?, from branches/stable/0.3.x)
+
+ * The encoding of HTML and XML files, as well as markup and text templates,
+   can now be specified. Also, the encoding specified in XML declarations is
+   now respected unless an expiclit encoding is requested.
+ * Expressions used as arguments for `py:with`, `py:def`, and `py:for`
+   directives can now contain non-ASCII strings.
+
+
 Version 0.3.3
 http://svn.edgewall.org/repos/genshi/tags/0.3.3/
 (Oct 16 2006, from branches/stable/0.3.x)
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -60,17 +60,20 @@
                    htmlentitydefs.name2codepoint.items()]
     _external_dtd = '\n'.join(_entitydefs)
 
-    def __init__(self, source, filename=None):
+    def __init__(self, source, filename=None, encoding=None):
         """Initialize the parser for the given XML input.
         
         @param source: the XML text as a file-like object
         @param filename: the name of the file, if appropriate
+        @param encoding: the encoding of the file; if not specified, the
+            encoding is assumed to be ASCII, UTF-8, or UTF-16, or whatever the
+            encoding specified in the XML declaration (if any)
         """
         self.source = source
         self.filename = filename
 
         # Setup the Expat parser
-        parser = expat.ParserCreate('utf-8', '}')
+        parser = expat.ParserCreate(encoding, '}')
         parser.buffer_text = True
         parser.returns_unicode = True
         parser.ordered_attributes = True
--- a/genshi/template.py
+++ b/genshi/template.py
@@ -792,7 +792,8 @@
     EXPR = StreamEventKind('EXPR') # an expression
     SUB = StreamEventKind('SUB') # a "subprogram"
 
-    def __init__(self, source, basedir=None, filename=None, loader=None):
+    def __init__(self, source, basedir=None, filename=None, loader=None,
+                 encoding=None):
         """Initialize a template from either a string or a file-like object."""
         if isinstance(source, basestring):
             self.source = StringIO(source)
@@ -807,12 +808,12 @@
 
         self.filters = [self._flatten, self._eval]
 
-        self.stream = self._parse()
+        self.stream = self._parse(encoding)
 
     def __repr__(self):
         return '<%s "%s">' % (self.__class__.__name__, self.filename)
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template.
         
         The parsing stage parses the template and constructs a list of
@@ -992,24 +993,26 @@
                   ('attrs', AttrsDirective),
                   ('strip', StripDirective)]
 
-    def __init__(self, source, basedir=None, filename=None, loader=None):
+    def __init__(self, source, basedir=None, filename=None, loader=None,
+                 encoding=None):
         """Initialize a template from either a string or a file-like object."""
         Template.__init__(self, source, basedir=basedir, filename=filename,
-                          loader=loader)
+                          loader=loader, encoding=encoding)
 
         self.filters.append(self._match)
         if loader:
             from genshi.filters import IncludeFilter
             self.filters.append(IncludeFilter(loader))
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template from an XML document."""
         stream = [] # list of events of the "compiled" template
         dirmap = {} # temporary mapping of directives to elements
         ns_prefix = {}
         depth = 0
 
-        for kind, data, pos in XMLParser(self.source, filename=self.filename):
+        for kind, data, pos in XMLParser(self.source, filename=self.filename,
+                                         encoding=encoding):
 
             if kind is START_NS:
                 # Strip out the namespace declaration for template directives
@@ -1203,13 +1206,15 @@
 
     _DIRECTIVE_RE = re.compile(r'^\s*(?<!\\)#((?:\w+|#).*)\n?', re.MULTILINE)
 
-    def _parse(self):
+    def _parse(self, encoding):
         """Parse the template from text input."""
         stream = [] # list of events of the "compiled" template
         dirmap = {} # temporary mapping of directives to elements
         depth = 0
+        if not encoding:
+            encoding = 'utf-8'
 
-        source = self.source.read()
+        source = self.source.read().decode(encoding, 'replace')
         offset = 0
         lineno = 1
 
@@ -1287,22 +1292,27 @@
     
     >>> os.remove(path)
     """
-    def __init__(self, search_path=None, auto_reload=False):
+    def __init__(self, search_path=None, auto_reload=False,
+                 default_encoding=None):
         """Create the template laoder.
         
         @param search_path: a list of absolute path names that should be
             searched for template files
         @param auto_reload: whether to check the last modification time of
             template files, and reload them if they have changed
+        @param default_encoding: the default encoding to assume when loading
+            templates; defaults to UTF-8
         """
         self.search_path = search_path
         if self.search_path is None:
             self.search_path = []
         self.auto_reload = auto_reload
+        self.default_encoding = default_encoding
         self._cache = {}
         self._mtime = {}
 
-    def load(self, filename, relative_to=None, cls=MarkupTemplate):
+    def load(self, filename, relative_to=None, cls=MarkupTemplate,
+             encoding=None):
         """Load the template with the given name.
         
         If the `filename` parameter is relative, this method searches the search
@@ -1327,7 +1337,11 @@
             template is being loaded, or `None` if the template is being loaded
             directly
         @param cls: the class of the template object to instantiate
+        @param encoding: the encoding of the template to load; defaults to the
+            `default_encoding` of the loader instance
         """
+        if encoding is None:
+            encoding = self.default_encoding
         if relative_to and not os.path.isabs(relative_to):
             filename = os.path.join(os.path.dirname(relative_to), filename)
         filename = os.path.normpath(filename)
@@ -1375,7 +1389,7 @@
                         filename = os.path.join(dirname, filename)
                         dirname = ''
                     tmpl = cls(fileobj, basedir=dirname, filename=filename,
-                               loader=self)
+                               loader=self, encoding=encoding)
                 finally:
                     fileobj.close()
                 self._cache[filename] = tmpl
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -59,6 +59,22 @@
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\u2013', data)
 
+    def test_latin1_encoded(self):
+        text = u'<div>\xf6</div>'.encode('iso-8859-1')
+        events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
+    def test_latin1_encoded_xmldecl(self):
+        text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
+        <div>\xf6</div>
+        """.encode('iso-8859-1')
+        events = list(XMLParser(StringIO(text)))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
     def test_html_entity_with_dtd(self):
         text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
--- a/genshi/tests/template.py
+++ b/genshi/tests/template.py
@@ -1067,6 +1067,23 @@
           </span>
         </div>""", str(tmpl.generate()))
 
+    def test_latin1_encoded_with_xmldecl(self):
+        tmpl = MarkupTemplate(u"""<?xml version="1.0" encoding="iso-8859-1" ?>
+        <div xmlns:py="http://genshi.edgewall.org/">
+          \xf6
+        </div>""".encode('iso-8859-1'), encoding='iso-8859-1')
+        self.assertEqual(u"""<div>
+          \xf6
+        </div>""", unicode(tmpl.generate()))
+
+    def test_latin1_encoded_explicit_encoding(self):
+        tmpl = MarkupTemplate(u"""<div xmlns:py="http://genshi.edgewall.org/">
+          \xf6
+        </div>""".encode('iso-8859-1'), encoding='iso-8859-1')
+        self.assertEqual(u"""<div>
+          \xf6
+        </div>""", unicode(tmpl.generate()))
+
 
 class TextTemplateTestCase(unittest.TestCase):
     """Tests for text template processing."""
@@ -1090,6 +1107,10 @@
         #end 'if foo'""")
         self.assertEqual('', str(tmpl.generate()))
 
+    def test_latin1_encoded(self):
+        text = u'$foo\xf6$bar'.encode('iso-8859-1')
+        tmpl = TextTemplate(text, encoding='iso-8859-1')
+        self.assertEqual(u'x\xf6y', unicode(tmpl.generate(foo='x', bar='y')))
 
 class TemplateLoaderTestCase(unittest.TestCase):
     """Tests for the template loader."""
@@ -1233,6 +1254,24 @@
           <div>Included</div>
         </html>""", tmpl2.generate().render())
 
+    def test_load_with_default_encoding(self):
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        try:
+            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
+        finally:
+            f.close()
+        loader = TemplateLoader([self.dirname], default_encoding='iso-8859-1')
+        loader.load('tmpl.html')
+
+    def test_load_with_explicit_encoding(self):
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        try:
+            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
+        finally:
+            f.close()
+        loader = TemplateLoader([self.dirname], default_encoding='utf-8')
+        loader.load('tmpl.html', encoding='iso-8859-1')
+
 
 def suite():
     suite = unittest.TestSuite()
Copyright (C) 2012-2017 Edgewall Software