# HG changeset patch # User cmlenz # Date 1161705177 0 # Node ID 4ab9edf5e83bd94a8450d6ee31bd715af259bbf4 # Parent b592333efe1b56d4a529e2c77f56402979ec4795 Configurable encoding of template files, closing #65. * The `XMLParser` constructor now accepts an optional encoding parameter. * The `MarkupTemplate` and `TextTemplate` class also get an encoding parameter in their initializers. * A `default_encoding` can be specified when creating a `TemplateLoader` instance, but the encoding can also be specified explicitly when calling the `load()` method. * Encodings specified using an XML declaration in templates and XML files are now honored (as long as Expat supports them, that is, and it doesn't support many). diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -75,17 +75,20 @@ htmlentitydefs.name2codepoint.items()] _external_dtd = '\n'.join(_entitydefs) - def __init__(self, source, filename=None): + def __init__(self, source, filename=None, encoding=None): """Initialize the parser for the given XML input. @param source: the XML text as a file-like object @param filename: the name of the file, if appropriate + @param encoding: the encoding of the file; if not specified, the + encoding is assumed to be ASCII, UTF-8, or UTF-16, or whatever the + encoding specified in the XML declaration (if any) """ self.source = source self.filename = filename # Setup the Expat parser - parser = expat.ParserCreate('utf-8', '}') + parser = expat.ParserCreate(encoding, '}') parser.buffer_text = True parser.returns_unicode = True parser.ordered_attributes = True diff --git a/genshi/template.py b/genshi/template.py --- a/genshi/template.py +++ b/genshi/template.py @@ -797,7 +797,8 @@ EXPR = StreamEventKind('EXPR') # an expression SUB = StreamEventKind('SUB') # a "subprogram" - def __init__(self, source, basedir=None, filename=None, loader=None): + def __init__(self, source, basedir=None, filename=None, loader=None, + encoding=None): """Initialize a template from either a string or a file-like object.""" if isinstance(source, basestring): self.source = StringIO(source) @@ -812,12 +813,12 @@ self.filters = [self._flatten, self._eval] - self.stream = self._parse() + self.stream = self._parse(encoding) def __repr__(self): return '<%s "%s">' % (self.__class__.__name__, self.filename) - def _parse(self): + def _parse(self, encoding): """Parse the template. The parsing stage parses the template and constructs a list of @@ -989,24 +990,26 @@ ('attrs', AttrsDirective), ('strip', StripDirective)] - def __init__(self, source, basedir=None, filename=None, loader=None): + def __init__(self, source, basedir=None, filename=None, loader=None, + encoding=None): """Initialize a template from either a string or a file-like object.""" Template.__init__(self, source, basedir=basedir, filename=filename, - loader=loader) + loader=loader, encoding=encoding) self.filters.append(self._match) if loader: from genshi.filters import IncludeFilter self.filters.append(IncludeFilter(loader)) - def _parse(self): + def _parse(self, encoding): """Parse the template from an XML document.""" stream = [] # list of events of the "compiled" template dirmap = {} # temporary mapping of directives to elements ns_prefix = {} depth = 0 - for kind, data, pos in XMLParser(self.source, filename=self.filename): + for kind, data, pos in XMLParser(self.source, filename=self.filename, + encoding=encoding): if kind is START_NS: # Strip out the namespace declaration for template directives @@ -1199,13 +1202,15 @@ _DIRECTIVE_RE = re.compile(r'^\s*(?>> os.remove(path) """ - def __init__(self, search_path=None, auto_reload=False, max_cache_size=25): + def __init__(self, search_path=None, auto_reload=False, max_cache_size=25, + default_encoding=None): """Create the template laoder. @param search_path: a list of absolute path names that should be @@ -1293,6 +1299,8 @@ template files, and reload them if they have changed @param max_cache_size: the maximum number of templates to keep in the cache + @param default_encoding: the default encoding to assume when loading + templates; defaults to UTF-8 """ self.search_path = search_path if self.search_path is None: @@ -1300,11 +1308,13 @@ elif isinstance(self.search_path, basestring): self.search_path = [self.search_path] self.auto_reload = auto_reload + self.default_encoding = default_encoding self._cache = LRUCache(max_cache_size) self._mtime = {} self._lock = threading.Lock() - def load(self, filename, relative_to=None, cls=MarkupTemplate): + def load(self, filename, relative_to=None, cls=MarkupTemplate, + encoding=None): """Load the template with the given name. If the `filename` parameter is relative, this method searches the search @@ -1329,7 +1339,11 @@ template is being loaded, or `None` if the template is being loaded directly @param cls: the class of the template object to instantiate + @param encoding: the encoding of the template to load; defaults to the + `default_encoding` of the loader instance """ + if encoding is None: + encoding = self.default_encoding if relative_to and not os.path.isabs(relative_to): filename = os.path.join(os.path.dirname(relative_to), filename) filename = os.path.normpath(filename) @@ -1379,7 +1393,7 @@ filename = os.path.join(dirname, filename) dirname = '' tmpl = cls(fileobj, basedir=dirname, filename=filename, - loader=self) + encoding=encoding, loader=self) finally: fileobj.close() self._cache[filename] = tmpl diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -59,6 +59,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_latin1_encoded(self): + text = u'
\xf6
'.encode('iso-8859-1') + events = list(XMLParser(StringIO(text), encoding='iso-8859-1')) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xf6', data) + + def test_latin1_encoded_xmldecl(self): + text = u""" +
\xf6
+ """.encode('iso-8859-1') + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xf6', data) + def test_html_entity_with_dtd(self): text = """ diff --git a/genshi/tests/template.py b/genshi/tests/template.py --- a/genshi/tests/template.py +++ b/genshi/tests/template.py @@ -1128,6 +1128,23 @@ """, str(tmpl.generate())) + def test_latin1_encoded_with_xmldecl(self): + tmpl = MarkupTemplate(u""" +
+ \xf6 +
""".encode('iso-8859-1'), encoding='iso-8859-1') + self.assertEqual(u"""
+ \xf6 +
""", unicode(tmpl.generate())) + + def test_latin1_encoded_explicit_encoding(self): + tmpl = MarkupTemplate(u"""
+ \xf6 +
""".encode('iso-8859-1'), encoding='iso-8859-1') + self.assertEqual(u"""
+ \xf6 +
""", unicode(tmpl.generate())) + class TextTemplateTestCase(unittest.TestCase): """Tests for text template processing.""" @@ -1151,6 +1168,10 @@ #end 'if foo'""") self.assertEqual('', str(tmpl.generate())) + def test_latin1_encoded(self): + text = u'$foo\xf6$bar'.encode('iso-8859-1') + tmpl = TextTemplate(text, encoding='iso-8859-1') + self.assertEqual(u'x\xf6y', unicode(tmpl.generate(foo='x', bar='y'))) # FIXME #def test_empty_lines(self): @@ -1317,6 +1338,24 @@
Included
""", tmpl2.generate().render()) + def test_load_with_default_encoding(self): + f = open(os.path.join(self.dirname, 'tmpl.html'), 'w') + try: + f.write(u'
\xf6
'.encode('iso-8859-1')) + finally: + f.close() + loader = TemplateLoader([self.dirname], default_encoding='iso-8859-1') + loader.load('tmpl.html') + + def test_load_with_explicit_encoding(self): + f = open(os.path.join(self.dirname, 'tmpl.html'), 'w') + try: + f.write(u'
\xf6
'.encode('iso-8859-1')) + finally: + f.close() + loader = TemplateLoader([self.dirname], default_encoding='utf-8') + loader.load('tmpl.html', encoding='iso-8859-1') + def suite(): suite = unittest.TestSuite()