changeset 315:b12c045c1e81 stable-0.3.x

Ported [383:387] to 0.3.x stable branch.
author cmlenz
date Tue, 24 Oct 2006 15:00:27 +0000
parents f1569069b6e8
children f738ffe8f1dd
files genshi/eval.py genshi/input.py genshi/template.py genshi/tests/input.py genshi/tests/template.py
diffstat 5 files changed, 60 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/eval.py
+++ b/genshi/eval.py
@@ -67,10 +67,8 @@
     def __init__(self, source, filename=None, lineno=-1):
         if isinstance(source, basestring):
             self.source = source
-            if isinstance(source, unicode):
-                source = '\xef\xbb\xbf' + source.encode('utf-8')
-            self.code = _compile(parse(source, 'eval'), self.source,
-                                 filename=filename, lineno=lineno)
+            self.code = _compile(_parse(source), self.source, filename=filename,
+                                 lineno=lineno)
         else:
             assert isinstance(source, ast.Node)
             self.source = '?'
@@ -152,6 +150,11 @@
         raise NameError('Variable "%s" is not defined' % self.name)
 
 
+def _parse(source, mode='eval'):
+    if isinstance(source, unicode):
+        source = '\xef\xbb\xbf' + source.encode('utf-8')
+    return parse(source, mode)
+
 def _compile(node, source=None, filename=None, lineno=-1):
     tree = ExpressionASTTransformer().visit(node)
     if isinstance(filename, unicode):
--- a/genshi/input.py
+++ b/genshi/input.py
@@ -61,7 +61,7 @@
     _external_dtd = '\n'.join(_entitydefs)
 
     def __init__(self, source, filename=None):
-        """Initialize the parser for the given XML text.
+        """Initialize the parser for the given XML input.
         
         @param source: the XML text as a file-like object
         @param filename: the name of the file, if appropriate
@@ -235,10 +235,17 @@
                               'hr', 'img', 'input', 'isindex', 'link', 'meta',
                               'param'])
 
-    def __init__(self, source, filename=None):
+    def __init__(self, source, filename=None, encoding='utf-8'):
+        """Initialize the parser for the given HTML input.
+        
+        @param source: the HTML text as a file-like object
+        @param filename: the name of the file, if known
+        @param filename: encoding of the file; ignored if the input is unicode
+        """
         html.HTMLParser.__init__(self)
         self.source = source
         self.filename = filename
+        self.encoding = encoding
         self._queue = []
         self._open_tags = []
 
@@ -287,8 +294,10 @@
         fixed_attrib = []
         for name, value in attrib: # Fixup minimized attributes
             if value is None:
-                value = name
-            fixed_attrib.append((name, unicode(stripentities(value))))
+                value = unicode(name)
+            elif not isinstance(value, unicode):
+                value = value.decode(self.encoding, 'replace')
+            fixed_attrib.append((name, stripentities(value)))
 
         self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
         if tag in self._EMPTY_ELEMS:
@@ -306,6 +315,8 @@
             self._enqueue(END, QName(tag))
 
     def handle_data(self, text):
+        if not isinstance(text, unicode):
+            text = text.decode(self.encoding, 'replace')
         self._enqueue(TEXT, text)
 
     def handle_charref(self, name):
@@ -328,8 +339,8 @@
         self._enqueue(COMMENT, text)
 
 
-def HTML(text):
-    return Stream(list(HTMLParser(StringIO(text))))
+def HTML(text, encoding='utf-8'):
+    return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
 
 def _coalesce(stream):
     """Coalesces adjacent TEXT events into a single event."""
--- a/genshi/template.py
+++ b/genshi/template.py
@@ -27,7 +27,7 @@
 
 from genshi.core import Attrs, Namespace, Stream, StreamEventKind, _ensure
 from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT
-from genshi.eval import Expression
+from genshi.eval import Expression, _parse
 from genshi.input import XMLParser
 from genshi.path import Path
 
@@ -357,7 +357,7 @@
     def __init__(self, args, namespaces=None, filename=None, lineno=-1,
                  offset=-1):
         Directive.__init__(self, None, namespaces, filename, lineno, offset)
-        ast = compiler.parse(args, 'eval').node
+        ast = _parse(args).node
         self.args = []
         self.defaults = {}
         if isinstance(ast, compiler.ast.CallFunc):
@@ -430,7 +430,7 @@
             raise TemplateSyntaxError('"in" keyword missing in "for" directive',
                                       filename, lineno, offset)
         assign, value = value.split(' in ', 1)
-        ast = compiler.parse(assign, 'exec')
+        ast = _parse(assign, 'exec')
         self.assign = _assignment(ast.node.nodes[0].expr)
         self.filename = filename
         Directive.__init__(self, value.strip(), namespaces, filename, lineno,
@@ -739,7 +739,7 @@
         self.vars = []
         value = value.strip()
         try:
-            ast = compiler.parse(value, 'exec').node
+            ast = _parse(value, 'exec').node
             for node in ast.nodes:
                 if isinstance(node, compiler.ast.Discard):
                     continue
@@ -837,15 +837,17 @@
         @param offset: the column number at which the text starts in the source
             (optional)
         """
-        def _interpolate(text, patterns, filename=filename, lineno=lineno,
-                         offset=offset):
+        filepath = filename
+        if filepath and basedir:
+            filepath = os.path.join(basedir, filepath)
+        def _interpolate(text, patterns, lineno=lineno, offset=offset):
             for idx, grp in enumerate(patterns.pop(0).split(text)):
                 if idx % 2:
                     try:
-                        yield EXPR, Expression(grp.strip(), filename, lineno), \
+                        yield EXPR, Expression(grp.strip(), filepath, lineno), \
                               (filename, lineno, offset)
                     except SyntaxError, err:
-                        raise TemplateSyntaxError(err, filename, lineno,
+                        raise TemplateSyntaxError(err, filepath, lineno,
                                                   offset + (err.offset or 0))
                 elif grp:
                     if patterns:
--- a/genshi/tests/input.py
+++ b/genshi/tests/input.py
@@ -120,6 +120,20 @@
         if sys.version_info[:2] >= (2, 4):
             self.assertEqual((None, 1, 6), pos)
 
+    def test_input_encoding_text(self):
+        text = u'<div>\xf6</div>'.encode('iso-8859-1')
+        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, data, pos = events[1]
+        self.assertEqual(Stream.TEXT, kind)
+        self.assertEqual(u'\xf6', data)
+
+    def test_input_encoding_attribute(self):
+        text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
+        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        kind, (tag, attrib), pos = events[0]
+        self.assertEqual(Stream.START, kind)
+        self.assertEqual(u'\xf6', attrib.get('title'))
+
     def test_unicode_input(self):
         text = u'<div>\u2013</div>'
         events = list(HTMLParser(StringIO(text)))
--- a/genshi/tests/template.py
+++ b/genshi/tests/template.py
@@ -826,6 +826,18 @@
             here are two semicolons: ;;
         </div>""", str(tmpl.generate()))
 
+    def test_unicode_expr(self):
+        tmpl = MarkupTemplate("""<div xmlns:py="http://genshi.edgewall.org/">
+          <span py:with="weeks=(u'一', u'二', u'三', u'四', u'五', u'六', u'日')">
+            $weeks
+          </span>
+        </div>""")
+        self.assertEqual("""<div>
+          <span>
+            一二三四五六日
+          </span>
+        </div>""", str(tmpl.generate()))
+
 
 class TemplateTestCase(unittest.TestCase):
     """Tests for basic template processing, expression evaluation and error
Copyright (C) 2012-2017 Edgewall Software