changeset 399:f0b785d3d407 trunk

Rework parsing of expressions in template text, to be able to: * handle dict literals (as well as strings containing the character ?}?) inside the expression (#37), and * allow escaped dollar signs in front of full expressions (#92)
author cmlenz
date Thu, 18 Jan 2007 14:52:42 +0000
parents 31742fe6d47e
children e29a94b3ba0c
files ChangeLog genshi/template/core.py genshi/template/tests/core.py
diffstat 3 files changed, 170 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -30,7 +30,10 @@
  * `MarkupTemplate`s can now be instantiated from markup streams, in addition
    to strings and file-like objects (ticket #69).
  * Improve handling of incorrectly nested tags in the HTML parser.
- * Template includes can you be nested inside fallback content.
+ * Template includes can now be nested inside fallback content.
+ * Expressions can now contain dict literals (ticket #37).
+ * It is now possible to have one or more escaped dollar signs in front of a 
+   full expression (ticket #92).
 
 
 Version 0.3.6
--- a/genshi/template/core.py
+++ b/genshi/template/core.py
@@ -17,9 +17,11 @@
     class deque(list):
         def appendleft(self, x): self.insert(0, x)
         def popleft(self): return self.pop(0)
+from itertools import chain
 import os
 import re
 from StringIO import StringIO
+from tokenize import tokenprog
 
 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
 from genshi.template.eval import Expression
@@ -198,9 +200,6 @@
         """
         raise NotImplementedError
 
-    _FULL_EXPR_RE = re.compile(r'(?<!\$)\$\{(.+?)\}', re.DOTALL)
-    _SHORT_EXPR_RE = re.compile(r'(?<!\$)\$([a-zA-Z_][a-zA-Z0-9_\.]*)')
-
     def _interpolate(cls, text, basedir=None, filename=None, lineno=-1,
                      offset=0):
         """Parse the given string and extract expressions.
@@ -216,29 +215,94 @@
         filepath = filename
         if filepath and basedir:
             filepath = os.path.join(basedir, filepath)
-        def _interpolate(text, patterns, lineno=lineno, offset=offset):
-            for idx, grp in enumerate(patterns.pop(0).split(text)):
-                if idx % 2:
+
+        namestart = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+        namechars = namestart + '.0123456789'
+
+        def _split():
+            pos = 0
+            end = len(text)
+            escaped = False
+
+            while 1:
+                if escaped:
+                    offset = text.find('$', offset + 2)
+                    escaped = False
+                else:
+                    offset = text.find('$', pos)
+                if offset < 0 or offset == end - 1:
+                    break
+                next = text[offset + 1]
+
+                if next == '{':
+                    if offset > pos:
+                        yield False, text[pos:offset]
+                    pos = offset + 2
+                    level = 1
+                    while level:
+                        match = tokenprog.match(text, pos)
+                        if match is None:
+                            raise TemplateSyntaxError('invalid syntax',
+                                                      filename, lineno, offset)
+                        pos = match.end()
+                        tstart, tend = match.regs[3]
+                        token = text[tstart:tend]
+                        if token == '{':
+                            level += 1
+                        elif token == '}':
+                            level -= 1
+                    yield True, text[offset + 2:pos - 1]
+
+                elif next in namestart:
+                    if offset > pos:
+                        yield False, text[pos:offset]
+                        pos = offset
+                    pos += 1
+                    while pos < end:
+                        char = text[pos]
+                        if char not in namechars:
+                            break
+                        pos += 1
+                    yield True, text[offset + 1:pos].strip()
+
+                elif not escaped and next == '$':
+                    escaped = True
+                    pos = offset + 1
+
+                else:
+                    yield False, text[pos:offset + 1]
+                    pos = offset + 1
+
+            if pos < end:
+                yield False, text[pos:]
+
+        textbuf = []
+        textpos = None
+        for is_expr, chunk in chain(_split(), [(True, '')]):
+            if is_expr:
+                if textbuf:
+                    yield TEXT, u''.join(textbuf), textpos
+                    del textbuf[:]
+                    textpos = None
+                if chunk:
                     try:
-                        yield EXPR, Expression(grp.strip(), filepath, lineno), \
-                              (filename, lineno, offset)
+                        expr = Expression(chunk.strip(), filename, lineno)
+                        yield EXPR, expr, (filename, lineno, offset)
                     except SyntaxError, err:
-                        raise TemplateSyntaxError(err, filepath, lineno,
+                        raise TemplateSyntaxError(err, filename, lineno,
                                                   offset + (err.offset or 0))
-                elif grp:
-                    if patterns:
-                        for result in _interpolate(grp, patterns[:]):
-                            yield result
-                    else:
-                        yield TEXT, grp.replace('$$', '$'), \
-                              (filename, lineno, offset)
-                if '\n' in grp:
-                    lines = grp.splitlines()
-                    lineno += len(lines) - 1
-                    offset += len(lines[-1])
-                else:
-                    offset += len(grp)
-        return _interpolate(text, [cls._FULL_EXPR_RE, cls._SHORT_EXPR_RE])
+            else:
+                textbuf.append(chunk)
+                if textpos is None:
+                    textpos = (filename, lineno, offset)
+
+            if '\n' in chunk:
+                lines = chunk.splitlines()
+                lineno += len(lines) - 1
+                offset += len(lines[-1])
+            else:
+                offset += len(chunk)
+
     _interpolate = classmethod(_interpolate)
 
     def _prepare(self, stream):
--- a/genshi/template/tests/core.py
+++ b/genshi/template/tests/core.py
@@ -15,7 +15,7 @@
 import unittest
 
 from genshi.core import Stream
-from genshi.template.core import Template
+from genshi.template.core import Template, TemplateSyntaxError
 
 
 class TemplateTestCase(unittest.TestCase):
@@ -41,12 +41,34 @@
         self.assertEqual(Stream.TEXT, parts[0][0])
         self.assertEqual('${bla}', parts[0][1])
 
+    def test_interpolate_dobuleescaped(self):
+        parts = list(Template._interpolate('$$${bla}'))
+        self.assertEqual(2, len(parts))
+        self.assertEqual(Stream.TEXT, parts[0][0])
+        self.assertEqual('$', parts[0][1])
+        self.assertEqual(Template.EXPR, parts[1][0])
+        self.assertEqual('bla', parts[1][1].source)
+
     def test_interpolate_short(self):
         parts = list(Template._interpolate('$bla'))
         self.assertEqual(1, len(parts))
         self.assertEqual(Template.EXPR, parts[0][0])
         self.assertEqual('bla', parts[0][1].source)
 
+    def test_interpolate_short_escaped(self):
+        parts = list(Template._interpolate('$$bla'))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Stream.TEXT, parts[0][0])
+        self.assertEqual('$bla', parts[0][1])
+
+    def test_interpolate_short_doubleescaped(self):
+        parts = list(Template._interpolate('$$$bla'))
+        self.assertEqual(2, len(parts))
+        self.assertEqual(Stream.TEXT, parts[0][0])
+        self.assertEqual('$', parts[0][1])
+        self.assertEqual(Template.EXPR, parts[1][0])
+        self.assertEqual('bla', parts[1][1].source)
+
     def test_interpolate_short_starting_with_underscore(self):
         parts = list(Template._interpolate('$_bla'))
         self.assertEqual(1, len(parts))
@@ -83,6 +105,62 @@
         self.assertEqual(Template.EXPR, parts[0][0])
         self.assertEqual('foo0', parts[0][1].source)
 
+    def test_interpolate_short_starting_with_digit(self):
+        parts = list(Template._interpolate('$0bla'))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Stream.TEXT, parts[0][0])
+        self.assertEqual('$0bla', parts[0][1])
+
+    def test_interpolate_short_containing_digit(self):
+        parts = list(Template._interpolate('$foo0'))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual('foo0', parts[0][1].source)
+
+    def test_interpolate_full_nested_brackets(self):
+        parts = list(Template._interpolate('${{1:2}}'))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual('{1:2}', parts[0][1].source)
+
+    def test_interpolate_full_mismatched_brackets(self):
+        try:
+            list(Template._interpolate('${{1:2}'))
+        except TemplateSyntaxError, e:
+            pass
+        else:
+            self.fail('Expected TemplateSyntaxError')
+
+    def test_interpolate_quoted_brackets_1(self):
+        parts = list(Template._interpolate('${"}"}'))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual('"}"', parts[0][1].source)
+
+    def test_interpolate_quoted_brackets_2(self):
+        parts = list(Template._interpolate("${'}'}"))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual("'}'", parts[0][1].source)
+
+    def test_interpolate_quoted_brackets_3(self):
+        parts = list(Template._interpolate("${'''}'''}"))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual("'''}'''", parts[0][1].source)
+
+    def test_interpolate_quoted_brackets_4(self):
+        parts = list(Template._interpolate("${'''}\"\"\"'''}"))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual("'''}\"\"\"'''", parts[0][1].source)
+
+    def test_interpolate_quoted_brackets_5(self):
+        parts = list(Template._interpolate(r"${'\'}'}"))
+        self.assertEqual(1, len(parts))
+        self.assertEqual(Template.EXPR, parts[0][0])
+        self.assertEqual(r"'\'}'", parts[0][1].source)
+
     def test_interpolate_mixed1(self):
         parts = list(Template._interpolate('$foo bar $baz'))
         self.assertEqual(3, len(parts))
Copyright (C) 2012-2017 Edgewall Software