cmlenz@407: # -*- coding: utf-8 -*- cmlenz@407: # aflett@724: # Copyright (C) 2007-2008 Edgewall Software cmlenz@407: # All rights reserved. cmlenz@407: # cmlenz@407: # This software is licensed as described in the file COPYING, which cmlenz@407: # you should have received as part of this distribution. The terms cmlenz@407: # are also available at http://genshi.edgewall.org/wiki/License. cmlenz@407: # cmlenz@407: # This software consists of voluntary contributions made by many cmlenz@407: # individuals. For the exact contribution history, see the revision cmlenz@407: # history and logs, available at http://genshi.edgewall.org/log/. cmlenz@407: cmlenz@407: """String interpolation routines, i.e. the splitting up a given text into some cmlenz@407: parts that are literal strings, and others that are Python expressions. cmlenz@407: """ cmlenz@407: cmlenz@407: from itertools import chain cmlenz@407: import os cmlenz@407: from tokenize import tokenprog cmlenz@407: cmlenz@407: from genshi.core import TEXT cmlenz@407: from genshi.template.base import TemplateSyntaxError, EXPR cmlenz@407: from genshi.template.eval import Expression cmlenz@407: cmlenz@407: __all__ = ['interpolate'] cmlenz@425: __docformat__ = 'restructuredtext en' cmlenz@407: cmlenz@407: NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' cmlenz@407: NAMECHARS = NAMESTART + '.0123456789' cmlenz@407: PREFIX = '$' cmlenz@407: aflett@718: def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'): cmlenz@407: """Parse the given string and extract expressions. cmlenz@407: cmlenz@425: This function is a generator that yields `TEXT` events for literal strings, cmlenz@425: and `EXPR` events for expressions, depending on the results of parsing the cmlenz@425: string. cmlenz@407: cmlenz@442: >>> for kind, data, pos in interpolate("hey ${foo}bar"): cmlenz@407: ... print kind, `data` cmlenz@442: TEXT u'hey ' cmlenz@407: EXPR Expression('foo') cmlenz@442: TEXT u'bar' cmlenz@407: cmlenz@425: :param text: the text to parse aflett@718: :param filepath: absolute path to the file in which the text was found aflett@718: (optional) cmlenz@425: :param lineno: the line number at which the text was found (optional) cmlenz@425: :param offset: the column number at which the text starts in the source cmlenz@425: (optional) cmlenz@442: :param lookup: the variable lookup mechanism; either "lenient" (the cmlenz@442: default), "strict", or a custom lookup class cmlenz@425: :return: a list of `TEXT` and `EXPR` events cmlenz@425: :raise TemplateSyntaxError: when a syntax error in an expression is cmlenz@425: encountered cmlenz@407: """ cmlenz@407: pos = [filepath, lineno, offset] cmlenz@407: cmlenz@407: textbuf = [] cmlenz@407: textpos = None cmlenz@422: for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]): cmlenz@407: if is_expr: cmlenz@407: if textbuf: cmlenz@407: yield TEXT, u''.join(textbuf), textpos cmlenz@407: del textbuf[:] cmlenz@407: textpos = None cmlenz@407: if chunk: cmlenz@407: try: cmlenz@442: expr = Expression(chunk.strip(), pos[0], pos[1], cmlenz@601: lookup=lookup) cmlenz@407: yield EXPR, expr, tuple(pos) cmlenz@407: except SyntaxError, err: cmlenz@422: raise TemplateSyntaxError(err, filepath, pos[1], cmlenz@407: pos[2] + (err.offset or 0)) cmlenz@407: else: cmlenz@407: textbuf.append(chunk) cmlenz@407: if textpos is None: cmlenz@407: textpos = tuple(pos) cmlenz@407: cmlenz@407: if '\n' in chunk: cmlenz@407: lines = chunk.splitlines() cmlenz@407: pos[1] += len(lines) - 1 cmlenz@407: pos[2] += len(lines[-1]) cmlenz@407: else: cmlenz@407: pos[2] += len(chunk) cmlenz@407: cmlenz@422: def lex(text, textpos, filepath): cmlenz@407: offset = pos = 0 cmlenz@407: end = len(text) cmlenz@407: escaped = False cmlenz@407: cmlenz@407: while 1: cmlenz@407: if escaped: cmlenz@407: offset = text.find(PREFIX, offset + 2) cmlenz@407: escaped = False cmlenz@407: else: cmlenz@407: offset = text.find(PREFIX, pos) cmlenz@407: if offset < 0 or offset == end - 1: cmlenz@407: break cmlenz@407: next = text[offset + 1] cmlenz@407: cmlenz@407: if next == '{': cmlenz@407: if offset > pos: cmlenz@407: yield False, text[pos:offset] cmlenz@407: pos = offset + 2 cmlenz@407: level = 1 cmlenz@407: while level: cmlenz@407: match = tokenprog.match(text, pos) cmlenz@407: if match is None: cmlenz@422: raise TemplateSyntaxError('invalid syntax', filepath, cmlenz@422: *textpos[1:]) cmlenz@407: pos = match.end() cmlenz@407: tstart, tend = match.regs[3] cmlenz@407: token = text[tstart:tend] cmlenz@407: if token == '{': cmlenz@407: level += 1 cmlenz@407: elif token == '}': cmlenz@407: level -= 1 cmlenz@407: yield True, text[offset + 2:pos - 1] cmlenz@407: cmlenz@407: elif next in NAMESTART: cmlenz@407: if offset > pos: cmlenz@407: yield False, text[pos:offset] cmlenz@407: pos = offset cmlenz@407: pos += 1 cmlenz@407: while pos < end: cmlenz@407: char = text[pos] cmlenz@407: if char not in NAMECHARS: cmlenz@407: break cmlenz@407: pos += 1 cmlenz@407: yield True, text[offset + 1:pos].strip() cmlenz@407: cmlenz@407: elif not escaped and next == PREFIX: cmlenz@526: if offset > pos: cmlenz@526: yield False, text[pos:offset] cmlenz@407: escaped = True cmlenz@407: pos = offset + 1 cmlenz@407: cmlenz@407: else: cmlenz@407: yield False, text[pos:offset + 1] cmlenz@407: pos = offset + 1 cmlenz@407: cmlenz@407: if pos < end: cmlenz@407: yield False, text[pos:]