# HG changeset patch # User cmlenz # Date 1161702027 0 # Node ID b12c045c1e8106cf40e447209e2eda73c2fa8c08 # Parent f1569069b6e8e9f9b20069aa611c0f5188cb4330 Ported [383:387] to 0.3.x stable branch. diff --git a/genshi/eval.py b/genshi/eval.py --- a/genshi/eval.py +++ b/genshi/eval.py @@ -67,10 +67,8 @@ def __init__(self, source, filename=None, lineno=-1): if isinstance(source, basestring): self.source = source - if isinstance(source, unicode): - source = '\xef\xbb\xbf' + source.encode('utf-8') - self.code = _compile(parse(source, 'eval'), self.source, - filename=filename, lineno=lineno) + self.code = _compile(_parse(source), self.source, filename=filename, + lineno=lineno) else: assert isinstance(source, ast.Node) self.source = '?' @@ -152,6 +150,11 @@ raise NameError('Variable "%s" is not defined' % self.name) +def _parse(source, mode='eval'): + if isinstance(source, unicode): + source = '\xef\xbb\xbf' + source.encode('utf-8') + return parse(source, mode) + def _compile(node, source=None, filename=None, lineno=-1): tree = ExpressionASTTransformer().visit(node) if isinstance(filename, unicode): diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -61,7 +61,7 @@ _external_dtd = '\n'.join(_entitydefs) def __init__(self, source, filename=None): - """Initialize the parser for the given XML text. + """Initialize the parser for the given XML input. @param source: the XML text as a file-like object @param filename: the name of the file, if appropriate @@ -235,10 +235,17 @@ 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']) - def __init__(self, source, filename=None): + def __init__(self, source, filename=None, encoding='utf-8'): + """Initialize the parser for the given HTML input. + + @param source: the HTML text as a file-like object + @param filename: the name of the file, if known + @param filename: encoding of the file; ignored if the input is unicode + """ html.HTMLParser.__init__(self) self.source = source self.filename = filename + self.encoding = encoding self._queue = [] self._open_tags = [] @@ -287,8 +294,10 @@ fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: - value = name - fixed_attrib.append((name, unicode(stripentities(value)))) + value = unicode(name) + elif not isinstance(value, unicode): + value = value.decode(self.encoding, 'replace') + fixed_attrib.append((name, stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: @@ -306,6 +315,8 @@ self._enqueue(END, QName(tag)) def handle_data(self, text): + if not isinstance(text, unicode): + text = text.decode(self.encoding, 'replace') self._enqueue(TEXT, text) def handle_charref(self, name): @@ -328,8 +339,8 @@ self._enqueue(COMMENT, text) -def HTML(text): - return Stream(list(HTMLParser(StringIO(text)))) +def HTML(text, encoding='utf-8'): + return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) def _coalesce(stream): """Coalesces adjacent TEXT events into a single event.""" diff --git a/genshi/template.py b/genshi/template.py --- a/genshi/template.py +++ b/genshi/template.py @@ -27,7 +27,7 @@ from genshi.core import Attrs, Namespace, Stream, StreamEventKind, _ensure from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT -from genshi.eval import Expression +from genshi.eval import Expression, _parse from genshi.input import XMLParser from genshi.path import Path @@ -357,7 +357,7 @@ def __init__(self, args, namespaces=None, filename=None, lineno=-1, offset=-1): Directive.__init__(self, None, namespaces, filename, lineno, offset) - ast = compiler.parse(args, 'eval').node + ast = _parse(args).node self.args = [] self.defaults = {} if isinstance(ast, compiler.ast.CallFunc): @@ -430,7 +430,7 @@ raise TemplateSyntaxError('"in" keyword missing in "for" directive', filename, lineno, offset) assign, value = value.split(' in ', 1) - ast = compiler.parse(assign, 'exec') + ast = _parse(assign, 'exec') self.assign = _assignment(ast.node.nodes[0].expr) self.filename = filename Directive.__init__(self, value.strip(), namespaces, filename, lineno, @@ -739,7 +739,7 @@ self.vars = [] value = value.strip() try: - ast = compiler.parse(value, 'exec').node + ast = _parse(value, 'exec').node for node in ast.nodes: if isinstance(node, compiler.ast.Discard): continue @@ -837,15 +837,17 @@ @param offset: the column number at which the text starts in the source (optional) """ - def _interpolate(text, patterns, filename=filename, lineno=lineno, - offset=offset): + filepath = filename + if filepath and basedir: + filepath = os.path.join(basedir, filepath) + def _interpolate(text, patterns, lineno=lineno, offset=offset): for idx, grp in enumerate(patterns.pop(0).split(text)): if idx % 2: try: - yield EXPR, Expression(grp.strip(), filename, lineno), \ + yield EXPR, Expression(grp.strip(), filepath, lineno), \ (filename, lineno, offset) except SyntaxError, err: - raise TemplateSyntaxError(err, filename, lineno, + raise TemplateSyntaxError(err, filepath, lineno, offset + (err.offset or 0)) elif grp: if patterns: diff --git a/genshi/tests/input.py b/genshi/tests/input.py --- a/genshi/tests/input.py +++ b/genshi/tests/input.py @@ -120,6 +120,20 @@ if sys.version_info[:2] >= (2, 4): self.assertEqual((None, 1, 6), pos) + def test_input_encoding_text(self): + text = u'