cmlenz@407: # -*- coding: utf-8 -*-
cmlenz@407: #
aflett@724: # Copyright (C) 2007-2008 Edgewall Software
cmlenz@407: # All rights reserved.
cmlenz@407: #
cmlenz@407: # This software is licensed as described in the file COPYING, which
cmlenz@407: # you should have received as part of this distribution. The terms
cmlenz@407: # are also available at http://genshi.edgewall.org/wiki/License.
cmlenz@407: #
cmlenz@407: # This software consists of voluntary contributions made by many
cmlenz@407: # individuals. For the exact contribution history, see the revision
cmlenz@407: # history and logs, available at http://genshi.edgewall.org/log/.
cmlenz@407: 
cmlenz@407: """String interpolation routines, i.e. the splitting up a given text into some
cmlenz@407: parts that are literal strings, and others that are Python expressions.
cmlenz@407: """
cmlenz@407: 
cmlenz@407: from itertools import chain
cmlenz@407: import os
cmlenz@407: from tokenize import tokenprog
cmlenz@407: 
cmlenz@407: from genshi.core import TEXT
cmlenz@407: from genshi.template.base import TemplateSyntaxError, EXPR
cmlenz@407: from genshi.template.eval import Expression
cmlenz@407: 
cmlenz@407: __all__ = ['interpolate']
cmlenz@425: __docformat__ = 'restructuredtext en'
cmlenz@407: 
cmlenz@407: NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
cmlenz@407: NAMECHARS = NAMESTART + '.0123456789'
cmlenz@407: PREFIX = '$'
cmlenz@407: 
aflett@718: def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
cmlenz@407:     """Parse the given string and extract expressions.
cmlenz@407:     
cmlenz@425:     This function is a generator that yields `TEXT` events for literal strings,
cmlenz@425:     and `EXPR` events for expressions, depending on the results of parsing the
cmlenz@425:     string.
cmlenz@407:     
cmlenz@442:     >>> for kind, data, pos in interpolate("hey ${foo}bar"):
cmlenz@407:     ...     print kind, `data`
cmlenz@442:     TEXT u'hey '
cmlenz@407:     EXPR Expression('foo')
cmlenz@442:     TEXT u'bar'
cmlenz@407:     
cmlenz@425:     :param text: the text to parse
aflett@718:     :param filepath: absolute path to the file in which the text was found
aflett@718:                      (optional)
cmlenz@425:     :param lineno: the line number at which the text was found (optional)
cmlenz@425:     :param offset: the column number at which the text starts in the source
cmlenz@425:                    (optional)
cmlenz@442:     :param lookup: the variable lookup mechanism; either "lenient" (the
cmlenz@442:                    default), "strict", or a custom lookup class
cmlenz@425:     :return: a list of `TEXT` and `EXPR` events
cmlenz@425:     :raise TemplateSyntaxError: when a syntax error in an expression is
cmlenz@425:                                 encountered
cmlenz@407:     """
cmlenz@407:     pos = [filepath, lineno, offset]
cmlenz@407: 
cmlenz@407:     textbuf = []
cmlenz@407:     textpos = None
cmlenz@422:     for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
cmlenz@407:         if is_expr:
cmlenz@407:             if textbuf:
cmlenz@407:                 yield TEXT, u''.join(textbuf), textpos
cmlenz@407:                 del textbuf[:]
cmlenz@407:                 textpos = None
cmlenz@407:             if chunk:
cmlenz@407:                 try:
cmlenz@442:                     expr = Expression(chunk.strip(), pos[0], pos[1],
cmlenz@601:                                       lookup=lookup)
cmlenz@407:                     yield EXPR, expr, tuple(pos)
cmlenz@407:                 except SyntaxError, err:
cmlenz@422:                     raise TemplateSyntaxError(err, filepath, pos[1],
cmlenz@407:                                               pos[2] + (err.offset or 0))
cmlenz@407:         else:
cmlenz@407:             textbuf.append(chunk)
cmlenz@407:             if textpos is None:
cmlenz@407:                 textpos = tuple(pos)
cmlenz@407: 
cmlenz@407:         if '\n' in chunk:
cmlenz@407:             lines = chunk.splitlines()
cmlenz@407:             pos[1] += len(lines) - 1
cmlenz@407:             pos[2] += len(lines[-1])
cmlenz@407:         else:
cmlenz@407:             pos[2] += len(chunk)
cmlenz@407: 
cmlenz@422: def lex(text, textpos, filepath):
cmlenz@407:     offset = pos = 0
cmlenz@407:     end = len(text)
cmlenz@407:     escaped = False
cmlenz@407: 
cmlenz@407:     while 1:
cmlenz@407:         if escaped:
cmlenz@407:             offset = text.find(PREFIX, offset + 2)
cmlenz@407:             escaped = False
cmlenz@407:         else:
cmlenz@407:             offset = text.find(PREFIX, pos)
cmlenz@407:         if offset < 0 or offset == end - 1:
cmlenz@407:             break
cmlenz@407:         next = text[offset + 1]
cmlenz@407: 
cmlenz@407:         if next == '{':
cmlenz@407:             if offset > pos:
cmlenz@407:                 yield False, text[pos:offset]
cmlenz@407:             pos = offset + 2
cmlenz@407:             level = 1
cmlenz@407:             while level:
cmlenz@407:                 match = tokenprog.match(text, pos)
cmlenz@407:                 if match is None:
cmlenz@422:                     raise TemplateSyntaxError('invalid syntax',  filepath,
cmlenz@422:                                               *textpos[1:])
cmlenz@407:                 pos = match.end()
cmlenz@407:                 tstart, tend = match.regs[3]
cmlenz@407:                 token = text[tstart:tend]
cmlenz@407:                 if token == '{':
cmlenz@407:                     level += 1
cmlenz@407:                 elif token == '}':
cmlenz@407:                     level -= 1
cmlenz@407:             yield True, text[offset + 2:pos - 1]
cmlenz@407: 
cmlenz@407:         elif next in NAMESTART:
cmlenz@407:             if offset > pos:
cmlenz@407:                 yield False, text[pos:offset]
cmlenz@407:                 pos = offset
cmlenz@407:             pos += 1
cmlenz@407:             while pos < end:
cmlenz@407:                 char = text[pos]
cmlenz@407:                 if char not in NAMECHARS:
cmlenz@407:                     break
cmlenz@407:                 pos += 1
cmlenz@407:             yield True, text[offset + 1:pos].strip()
cmlenz@407: 
cmlenz@407:         elif not escaped and next == PREFIX:
cmlenz@526:             if offset > pos:
cmlenz@526:                 yield False, text[pos:offset]
cmlenz@407:             escaped = True
cmlenz@407:             pos = offset + 1
cmlenz@407: 
cmlenz@407:         else:
cmlenz@407:             yield False, text[pos:offset + 1]
cmlenz@407:             pos = offset + 1
cmlenz@407: 
cmlenz@407:     if pos < end:
cmlenz@407:         yield False, text[pos:]