500
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2007 Edgewall Software
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://genshi.edgewall.org/wiki/License.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://genshi.edgewall.org/log/.
|
|
13
|
|
14 """String interpolation routines, i.e. the splitting up a given text into some
|
|
15 parts that are literal strings, and others that are Python expressions.
|
|
16 """
|
|
17
|
|
18 from itertools import chain
|
|
19 import os
|
|
20 from tokenize import tokenprog
|
|
21
|
|
22 from genshi.core import TEXT
|
|
23 from genshi.template.base import TemplateSyntaxError, EXPR
|
|
24 from genshi.template.eval import Expression
|
|
25
|
|
26 __all__ = ['interpolate']
|
|
27 __docformat__ = 'restructuredtext en'
|
|
28
|
|
29 NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
|
30 NAMECHARS = NAMESTART + '.0123456789'
|
|
31 PREFIX = '$'
|
|
32
|
|
33 def interpolate(text, basedir=None, filename=None, lineno=-1, offset=0,
|
|
34 lookup='lenient'):
|
|
35 """Parse the given string and extract expressions.
|
|
36
|
|
37 This function is a generator that yields `TEXT` events for literal strings,
|
|
38 and `EXPR` events for expressions, depending on the results of parsing the
|
|
39 string.
|
|
40
|
|
41 >>> for kind, data, pos in interpolate("hey ${foo}bar"):
|
|
42 ... print kind, `data`
|
|
43 TEXT u'hey '
|
|
44 EXPR Expression('foo')
|
|
45 TEXT u'bar'
|
|
46
|
|
47 :param text: the text to parse
|
|
48 :param basedir: base directory of the file in which the text was found
|
|
49 (optional)
|
|
50 :param filename: basename of the file in which the text was found (optional)
|
|
51 :param lineno: the line number at which the text was found (optional)
|
|
52 :param offset: the column number at which the text starts in the source
|
|
53 (optional)
|
|
54 :param lookup: the variable lookup mechanism; either "lenient" (the
|
|
55 default), "strict", or a custom lookup class
|
|
56 :return: a list of `TEXT` and `EXPR` events
|
|
57 :raise TemplateSyntaxError: when a syntax error in an expression is
|
|
58 encountered
|
|
59 """
|
|
60 filepath = filename
|
|
61 if filepath and basedir:
|
|
62 filepath = os.path.join(basedir, filepath)
|
|
63 pos = [filepath, lineno, offset]
|
|
64
|
|
65 textbuf = []
|
|
66 textpos = None
|
|
67 for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
|
|
68 if is_expr:
|
|
69 if textbuf:
|
|
70 yield TEXT, u''.join(textbuf), textpos
|
|
71 del textbuf[:]
|
|
72 textpos = None
|
|
73 if chunk:
|
|
74 try:
|
|
75 expr = Expression(chunk.strip(), pos[0], pos[1],
|
|
76 lookup=lookup)
|
|
77 yield EXPR, expr, tuple(pos)
|
|
78 except SyntaxError, err:
|
|
79 raise TemplateSyntaxError(err, filepath, pos[1],
|
|
80 pos[2] + (err.offset or 0))
|
|
81 else:
|
|
82 textbuf.append(chunk)
|
|
83 if textpos is None:
|
|
84 textpos = tuple(pos)
|
|
85
|
|
86 if '\n' in chunk:
|
|
87 lines = chunk.splitlines()
|
|
88 pos[1] += len(lines) - 1
|
|
89 pos[2] += len(lines[-1])
|
|
90 else:
|
|
91 pos[2] += len(chunk)
|
|
92
|
|
93 def lex(text, textpos, filepath):
|
|
94 offset = pos = 0
|
|
95 end = len(text)
|
|
96 escaped = False
|
|
97
|
|
98 while 1:
|
|
99 if escaped:
|
|
100 offset = text.find(PREFIX, offset + 2)
|
|
101 escaped = False
|
|
102 else:
|
|
103 offset = text.find(PREFIX, pos)
|
|
104 if offset < 0 or offset == end - 1:
|
|
105 break
|
|
106 next = text[offset + 1]
|
|
107
|
|
108 if next == '{':
|
|
109 if offset > pos:
|
|
110 yield False, text[pos:offset]
|
|
111 pos = offset + 2
|
|
112 level = 1
|
|
113 while level:
|
|
114 match = tokenprog.match(text, pos)
|
|
115 if match is None:
|
|
116 raise TemplateSyntaxError('invalid syntax', filepath,
|
|
117 *textpos[1:])
|
|
118 pos = match.end()
|
|
119 tstart, tend = match.regs[3]
|
|
120 token = text[tstart:tend]
|
|
121 if token == '{':
|
|
122 level += 1
|
|
123 elif token == '}':
|
|
124 level -= 1
|
|
125 yield True, text[offset + 2:pos - 1]
|
|
126
|
|
127 elif next in NAMESTART:
|
|
128 if offset > pos:
|
|
129 yield False, text[pos:offset]
|
|
130 pos = offset
|
|
131 pos += 1
|
|
132 while pos < end:
|
|
133 char = text[pos]
|
|
134 if char not in NAMECHARS:
|
|
135 break
|
|
136 pos += 1
|
|
137 yield True, text[offset + 1:pos].strip()
|
|
138
|
|
139 elif not escaped and next == PREFIX:
|
|
140 escaped = True
|
|
141 pos = offset + 1
|
|
142
|
|
143 else:
|
|
144 yield False, text[pos:offset + 1]
|
|
145 pos = offset + 1
|
|
146
|
|
147 if pos < end:
|
|
148 yield False, text[pos:]
|