# HG changeset patch # User cmlenz # Date 1161706444 0 # Node ID d7d71fb4d28b92a6f12ad95d206e1fcd016746ed # Parent adfb3090afcb15aa85c6bf2dda6597a563148164 Ported [389:391] to 0.3.x branch. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Version 0.3.4 +http://svn.edgewall.org/repos/genshi/tags/0.3.4/ +(?, from branches/stable/0.3.x) + + * The encoding of HTML and XML files, as well as markup and text templates, + can now be specified. Also, the encoding specified in XML declarations is + now respected unless an expiclit encoding is requested. + * Expressions used as arguments for `py:with`, `py:def`, and `py:for` + directives can now contain non-ASCII strings. + + Version 0.3.3 http://svn.edgewall.org/repos/genshi/tags/0.3.3/ (Oct 16 2006, from branches/stable/0.3.x) diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -60,17 +60,20 @@ htmlentitydefs.name2codepoint.items()] _external_dtd = '\n'.join(_entitydefs) - def __init__(self, source, filename=None): + def __init__(self, source, filename=None, encoding=None): """Initialize the parser for the given XML input. @param source: the XML text as a file-like object @param filename: the name of the file, if appropriate + @param encoding: the encoding of the file; if not specified, the + encoding is assumed to be ASCII, UTF-8, or UTF-16, or whatever the + encoding specified in the XML declaration (if any) """ self.source = source self.filename = filename # Setup the Expat parser - parser = expat.ParserCreate('utf-8', '}') + parser = expat.ParserCreate(encoding, '}') parser.buffer_text = True parser.returns_unicode = True parser.ordered_attributes = True diff --git a/genshi/template.py b/genshi/template.py --- a/genshi/template.py +++ b/genshi/template.py @@ -792,7 +792,8 @@ EXPR = StreamEventKind('EXPR') # an expression SUB = StreamEventKind('SUB') # a "subprogram" - def __init__(self, source, basedir=None, filename=None, loader=None): + def __init__(self, source, basedir=None, filename=None, loader=None, + encoding=None): """Initialize a template from either a string or a file-like object.""" if isinstance(source, basestring): self.source = StringIO(source) @@ -807,12 +808,12 @@ self.filters = [self._flatten, self._eval] - self.stream = self._parse() + self.stream = self._parse(encoding) def __repr__(self): return '<%s "%s">' % (self.__class__.__name__, self.filename) - def _parse(self): + def _parse(self, encoding): """Parse the template. The parsing stage parses the template and constructs a list of @@ -992,24 +993,26 @@ ('attrs', AttrsDirective), ('strip', StripDirective)] - def __init__(self, source, basedir=None, filename=None, loader=None): + def __init__(self, source, basedir=None, filename=None, loader=None, + encoding=None): """Initialize a template from either a string or a file-like object.""" Template.__init__(self, source, basedir=basedir, filename=filename, - loader=loader) + loader=loader, encoding=encoding) self.filters.append(self._match) if loader: from genshi.filters import IncludeFilter self.filters.append(IncludeFilter(loader)) - def _parse(self): + def _parse(self, encoding): """Parse the template from an XML document.""" stream = [] # list of events of the "compiled" template dirmap = {} # temporary mapping of directives to elements ns_prefix = {} depth = 0 - for kind, data, pos in XMLParser(self.source, filename=self.filename): + for kind, data, pos in XMLParser(self.source, filename=self.filename, + encoding=encoding): if kind is START_NS: # Strip out the namespace declaration for template directives @@ -1203,13 +1206,15 @@ _DIRECTIVE_RE = re.compile(r'^\s*(?>> os.remove(path) """ - def __init__(self, search_path=None, auto_reload=False): + def __init__(self, search_path=None, auto_reload=False, + default_encoding=None): """Create the template laoder. @param search_path: a list of absolute path names that should be searched for template files @param auto_reload: whether to check the last modification time of template files, and reload them if they have changed + @param default_encoding: the default encoding to assume when loading + templates; defaults to UTF-8 """ self.search_path = search_path if self.search_path is None: self.search_path = [] self.auto_reload = auto_reload + self.default_encoding = default_encoding self._cache = {} self._mtime = {} - def load(self, filename, relative_to=None, cls=MarkupTemplate): + def load(self, filename, relative_to=None, cls=MarkupTemplate, + encoding=None): """Load the template with the given name. If the `filename` parameter is relative, this method searches the search @@ -1327,7 +1337,11 @@ template is being loaded, or `None` if the template is being loaded directly @param cls: the class of the template object to instantiate + @param encoding: the encoding of the template to load; defaults to the + `default_encoding` of the loader instance """ + if encoding is None: + encoding = self.default_encoding if relative_to and not os.path.isabs(relative_to): filename = os.path.join(os.path.dirname(relative_to), filename) filename = os.path.normpath(filename) @@ -1375,7 +1389,7 @@ filename = os.path.join(dirname, filename) dirname = '' tmpl = cls(fileobj, basedir=dirname, filename=filename, - loader=self) + loader=self, encoding=encoding) finally: fileobj.close() self._cache[filename] = tmpl diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -59,6 +59,22 @@ self.assertEqual(Stream.TEXT, kind) self.assertEqual(u'\u2013', data) + def test_latin1_encoded(self): + text = u'
\xf6
'.encode('iso-8859-1') + events = list(XMLParser(StringIO(text), encoding='iso-8859-1')) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xf6', data) + + def test_latin1_encoded_xmldecl(self): + text = u""" +
\xf6
+ """.encode('iso-8859-1') + events = list(XMLParser(StringIO(text))) + kind, data, pos = events[1] + self.assertEqual(Stream.TEXT, kind) + self.assertEqual(u'\xf6', data) + def test_html_entity_with_dtd(self): text = """ diff --git a/genshi/tests/template.py b/genshi/tests/template.py --- a/genshi/tests/template.py +++ b/genshi/tests/template.py @@ -1067,6 +1067,23 @@ """, str(tmpl.generate())) + def test_latin1_encoded_with_xmldecl(self): + tmpl = MarkupTemplate(u""" +
+ \xf6 +
""".encode('iso-8859-1'), encoding='iso-8859-1') + self.assertEqual(u"""
+ \xf6 +
""", unicode(tmpl.generate())) + + def test_latin1_encoded_explicit_encoding(self): + tmpl = MarkupTemplate(u"""
+ \xf6 +
""".encode('iso-8859-1'), encoding='iso-8859-1') + self.assertEqual(u"""
+ \xf6 +
""", unicode(tmpl.generate())) + class TextTemplateTestCase(unittest.TestCase): """Tests for text template processing.""" @@ -1090,6 +1107,10 @@ #end 'if foo'""") self.assertEqual('', str(tmpl.generate())) + def test_latin1_encoded(self): + text = u'$foo\xf6$bar'.encode('iso-8859-1') + tmpl = TextTemplate(text, encoding='iso-8859-1') + self.assertEqual(u'x\xf6y', unicode(tmpl.generate(foo='x', bar='y'))) class TemplateLoaderTestCase(unittest.TestCase): """Tests for the template loader.""" @@ -1233,6 +1254,24 @@
Included
""", tmpl2.generate().render()) + def test_load_with_default_encoding(self): + f = open(os.path.join(self.dirname, 'tmpl.html'), 'w') + try: + f.write(u'
\xf6
'.encode('iso-8859-1')) + finally: + f.close() + loader = TemplateLoader([self.dirname], default_encoding='iso-8859-1') + loader.load('tmpl.html') + + def test_load_with_explicit_encoding(self): + f = open(os.path.join(self.dirname, 'tmpl.html'), 'w') + try: + f.write(u'
\xf6
'.encode('iso-8859-1')) + finally: + f.close() + loader = TemplateLoader([self.dirname], default_encoding='utf-8') + loader.load('tmpl.html', encoding='iso-8859-1') + def suite(): suite = unittest.TestSuite()