cmlenz@500: # -*- coding: utf-8 -*- cmlenz@500: # cmlenz@902: # Copyright (C) 2007-2009 Edgewall Software cmlenz@500: # All rights reserved. cmlenz@500: # cmlenz@500: # This software is licensed as described in the file COPYING, which cmlenz@500: # you should have received as part of this distribution. The terms cmlenz@500: # are also available at http://genshi.edgewall.org/wiki/License. cmlenz@500: # cmlenz@500: # This software consists of voluntary contributions made by many cmlenz@500: # individuals. For the exact contribution history, see the revision cmlenz@500: # history and logs, available at http://genshi.edgewall.org/log/. cmlenz@500: cmlenz@500: """String interpolation routines, i.e. the splitting up a given text into some cmlenz@500: parts that are literal strings, and others that are Python expressions. cmlenz@500: """ cmlenz@500: cmlenz@500: from itertools import chain cmlenz@500: import os cmlenz@820: import re cmlenz@820: from tokenize import PseudoToken cmlenz@500: cmlenz@500: from genshi.core import TEXT cmlenz@500: from genshi.template.base import TemplateSyntaxError, EXPR cmlenz@500: from genshi.template.eval import Expression cmlenz@500: cmlenz@500: __all__ = ['interpolate'] cmlenz@500: __docformat__ = 'restructuredtext en' cmlenz@500: cmlenz@500: NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' cmlenz@500: NAMECHARS = NAMESTART + '.0123456789' cmlenz@500: PREFIX = '$' cmlenz@500: cmlenz@820: token_re = re.compile('%s|%s(?s)' % ( cmlenz@820: r'[uU]?[rR]?("""|\'\'\')((?>> for kind, data, pos in interpolate("hey ${foo}bar"): cmlenz@902: ... print('%s %r' % (kind, data)) cmlenz@902: TEXT 'hey ' cmlenz@500: EXPR Expression('foo') cmlenz@902: TEXT 'bar' cmlenz@500: cmlenz@500: :param text: the text to parse cmlenz@820: :param filepath: absolute path to the file in which the text was found cmlenz@820: (optional) cmlenz@500: :param lineno: the line number at which the text was found (optional) cmlenz@500: :param offset: the column number at which the text starts in the source cmlenz@500: (optional) cmlenz@500: :param lookup: the variable lookup mechanism; either "lenient" (the cmlenz@500: default), "strict", or a custom lookup class cmlenz@500: :return: a list of `TEXT` and `EXPR` events cmlenz@500: :raise TemplateSyntaxError: when a syntax error in an expression is cmlenz@500: encountered cmlenz@500: """ cmlenz@500: pos = [filepath, lineno, offset] cmlenz@500: cmlenz@500: textbuf = [] cmlenz@500: textpos = None cmlenz@500: for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]): cmlenz@500: if is_expr: cmlenz@500: if textbuf: cmlenz@902: yield TEXT, ''.join(textbuf), textpos cmlenz@500: del textbuf[:] cmlenz@500: textpos = None cmlenz@500: if chunk: cmlenz@500: try: cmlenz@500: expr = Expression(chunk.strip(), pos[0], pos[1], cmlenz@820: lookup=lookup) cmlenz@500: yield EXPR, expr, tuple(pos) cmlenz@500: except SyntaxError, err: cmlenz@500: raise TemplateSyntaxError(err, filepath, pos[1], cmlenz@500: pos[2] + (err.offset or 0)) cmlenz@500: else: cmlenz@500: textbuf.append(chunk) cmlenz@500: if textpos is None: cmlenz@500: textpos = tuple(pos) cmlenz@500: cmlenz@500: if '\n' in chunk: cmlenz@500: lines = chunk.splitlines() cmlenz@500: pos[1] += len(lines) - 1 cmlenz@500: pos[2] += len(lines[-1]) cmlenz@500: else: cmlenz@500: pos[2] += len(chunk) cmlenz@500: cmlenz@902: cmlenz@500: def lex(text, textpos, filepath): cmlenz@500: offset = pos = 0 cmlenz@500: end = len(text) cmlenz@500: escaped = False cmlenz@500: cmlenz@500: while 1: cmlenz@500: if escaped: cmlenz@500: offset = text.find(PREFIX, offset + 2) cmlenz@500: escaped = False cmlenz@500: else: cmlenz@500: offset = text.find(PREFIX, pos) cmlenz@500: if offset < 0 or offset == end - 1: cmlenz@500: break cmlenz@500: next = text[offset + 1] cmlenz@500: cmlenz@500: if next == '{': cmlenz@500: if offset > pos: cmlenz@500: yield False, text[pos:offset] cmlenz@500: pos = offset + 2 cmlenz@500: level = 1 cmlenz@500: while level: cmlenz@820: match = token_re.match(text, pos) cmlenz@500: if match is None: cmlenz@500: raise TemplateSyntaxError('invalid syntax', filepath, cmlenz@500: *textpos[1:]) cmlenz@500: pos = match.end() cmlenz@500: tstart, tend = match.regs[3] cmlenz@500: token = text[tstart:tend] cmlenz@500: if token == '{': cmlenz@500: level += 1 cmlenz@500: elif token == '}': cmlenz@500: level -= 1 cmlenz@500: yield True, text[offset + 2:pos - 1] cmlenz@500: cmlenz@500: elif next in NAMESTART: cmlenz@500: if offset > pos: cmlenz@500: yield False, text[pos:offset] cmlenz@500: pos = offset cmlenz@500: pos += 1 cmlenz@500: while pos < end: cmlenz@500: char = text[pos] cmlenz@500: if char not in NAMECHARS: cmlenz@500: break cmlenz@500: pos += 1 cmlenz@500: yield True, text[offset + 1:pos].strip() cmlenz@500: cmlenz@500: elif not escaped and next == PREFIX: cmlenz@820: if offset > pos: cmlenz@820: yield False, text[pos:offset] cmlenz@500: escaped = True cmlenz@500: pos = offset + 1 cmlenz@500: cmlenz@500: else: cmlenz@500: yield False, text[pos:offset + 1] cmlenz@500: pos = offset + 1 cmlenz@500: cmlenz@500: if pos < end: cmlenz@500: yield False, text[pos:]