changeset 329:d81532240de7 experimental-compiler

inlined generation some more, with the adapters module converting events back to Genshi-style when needed. added PostWhitespaceFilter as proof of concept (greatly slows down execution)
author zzzeek
date Sun, 05 Nov 2006 20:54:03 +0000
parents f013a84050ac
children d8fc236ca3d8
files genshi/codegen/adapters.py genshi/codegen/generator.py genshi/codegen/output.py genshi/codegen/serialize.py genshi/codegen/tests/template.py genshi/codegen/tests/test_generator.py
diffstat 6 files changed, 248 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/adapters.py
@@ -0,0 +1,69 @@
+"""a set of Inline adapters, which convert from inlined structures to Genshi core structures"""
+
+from genshi.path import Path
+
+def get_attrib(attrib, name, default=None):
+    """return an 'attribute' name from a list of tuples, similar to genshi.core.Attrib"""
+    for attr, value in attrib:
+        if attr == name:
+            return value
+    return default
+
+class InlineStream(object):
+    """works similarly to genshi.core.Stream"""
+    def __init__(self, generator, context):
+        self.code = generator.code
+        self.filters = generator.filters
+        self.context = context
+        self.stream = self.code.go(self.context)
+    def __iter__(self):
+        return list(self.stream)
+
+    def __or__(self, function):
+        return InlineStream(function(self))
+
+    def filter(self, *filters):
+        return reduce(operator.or_, (self,) + filters)
+
+    def render(self, method='xml', encoding='utf-8', **kwargs):
+        generator = self.serialize(method=method, **kwargs)
+        output = u''.join(list(generator))
+        if encoding is not None:
+            errors = 'replace'
+            if method != 'text':
+                errors = 'xmlcharrefreplace'
+            return output.encode(encoding, errors)
+        return output
+
+    def select(self, path, namespaces=None, variables=None):
+        return InlinedPath(path).select(self, namespaces, variables)
+
+    def serialize(self, method='xml', **kwargs):
+        stream = self.stream
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for evt in stream:
+            yield evt[3]
+
+    def __str__(self):
+        return self.render()
+
+    def __unicode__(self):
+        return self.render(encoding=None)
+
+class InlineQName(object):
+    """creates a QName-like object from a START event"""
+    def __init__(self, event):
+        self.namespace = event[1][0]
+        self.localname = event[1][1]
+        
+class InlinedPath(Path):
+    """overrides Path.test to adapt incoming events from inlined to Genshi."""
+    def test(self, ignore_context=False):
+        t = super(InlinedPath, self).test(ignore_context=ignore_context)
+        def _test(event, namespaces, variables, updateonly=False):
+            if event[0] is START:
+                return t((event[0], (InlineQName(event), event[1][1]), event[2]), namespaces, variables, updateonly=updateonly)
+            else:
+                return t(event[0:3], namespaces, variables, updateonly=updateonly)
+        return _test
--- a/genshi/codegen/generator.py
+++ b/genshi/codegen/generator.py
@@ -13,14 +13,17 @@
 
 
 from genshi import template
-from genshi.template import Template
+from genshi.template import Template, Context
+from genshi.path import Path
+from genshi.core import QName
 from genshi.codegen.printer import PythonPrinter, PYTHON_LINE, PYTHON_COMMENT, PYTHON_BLOCK
+from genshi.codegen import serialize, adapters, output
 from compiler import ast, parse, visitor
 import sets, re
 
 _directive_printers = {}
 
-def ident_from_assign(assign):
+def _ident_from_assign(assign):
     # a little trick to get the variable name from the already 
     # compiled assignment expression
     x = {}
@@ -40,15 +43,15 @@
 class ForDirectivePrinter(DirectivePrinter):
     __directive__ = template.ForDirective
     def produce_directive(self, gencontext, directive, event, substream):
-        varname = ident_from_assign(directive.assign)
+        varname = _ident_from_assign(directive.assign)
         yield (PYTHON_LINE, "for %s in %s:" % (varname, directive.expr.source))
         for evt in gencontext.gen_stream(substream):
             yield evt
         yield (PYTHON_LINE, "")
     def declared_identifiers(self, gencontext, directive, event):
-        return [ident_from_assign(directive.assign)]
+        return [_ident_from_assign(directive.assign)]
     def undeclared_identifiers(self, gencontext, directive, event):
-        s = SearchIdents(directive.expr.source)
+        s = _SearchIdents(directive.expr.source)
         return list(s.identifiers)
 ForDirectivePrinter()
 
@@ -85,33 +88,52 @@
     def undeclared_identifiers(self, gencontext, directive, event):
         result = sets.Set()
         for expr in directive.defaults.values():
-            s = SearchIdents(expr.node)
+            s = _SearchIdents(expr.node)
             result = result.union(s.identifiers)
         return iter(result)
-            
 DefDirectivePrinter()
-    
 
 class Generator(object):
     """given a Template, generates Python modules (as strings or code objects)
     optimized to a particular Serializer."""
-    def __init__(self, template):
+    def __init__(self, template, method='html', serializer=None, strip_whitespace=False, filters=None):
         self.template = template
-    def generate_stream(self, serializer):
+        self.serializer = serializer or ({
+                'xml':   serialize.XMLSerializeFilter,
+               'xhtml': serialize.XHTMLSerializeFilter,
+               'html':  serialize.HTMLSerializeFilter,
+               'text':  serialize.TextSerializeFilter}[method]())
+        self.code = self._generate_module()
+        self.filters = filters or []
+        if strip_whitespace:
+            self.filters.append(output.PostWhitespaceFilter())
+    def generate(self, *args, **kwargs):
+        if args:
+            assert len(args) == 1
+            ctxt = args[0]
+            if ctxt is None:
+                ctxt = Context(**kwargs)
+            assert isinstance(ctxt, Context)
+        else:
+            ctxt = Context(**kwargs)
+        
+        return adapters.InlineStream(self, ctxt)
+        
+    def _generate_code_events(self):
         return PythonPrinter(
             PythonGenerator(
-                self.template.stream, serializer
+                self.template.stream, self.serializer
             ).generate()
         ).generate()
-    def generate_module(self, serializer):
+    def _generate_module(self):
         import imp
         module = imp.new_module("_some_ident")
-        pycode = u''.join(self.generate_stream(serializer))
+        pycode = u''.join(self._generate_code_events())
         code = compile(pycode, '<String>', 'exec')
         exec code in module.__dict__, module.__dict__
         return module
-        
-class SearchIdents(visitor.ASTVisitor):
+            
+class _SearchIdents(visitor.ASTVisitor):
     """an ASTVisitor that can locate identifier names in a string-based code block.
 
     This is not used in this example module, but will be used to locate and pre-declare 
@@ -145,7 +167,7 @@
         """locate undeclared python identifiers in the given stream.  stack is an empty set."""
         for evt in stream:
             if evt[0] is template.EXPR:
-                s = SearchIdents(evt[1].source)
+                s = _SearchIdents(evt[1].source)
                 for ident in s.identifiers.difference(stack):
                     yield (PYTHON_LINE, "%s = context.get('%s', None)" % (ident, ident))
             elif evt[0] is template.SUB:
@@ -162,7 +184,6 @@
     def gen_stream(self, stream):
         for event in self.serializer(stream):
             (kind, data, pos, literal) = event
-            #print "INCOMING:", event
             if kind is template.SUB:
                 directives, substream = event[1]
                 for d in directives:
@@ -182,7 +203,7 @@
                     yield evt
     def produce_preamble(self):
         for line in [
-            "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, QName, Stream",
+            "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream",
             "from genshi.template import Context, Template",
             "from genshi.path import Path",
             "from genshi.codegen import interp",
@@ -203,8 +224,10 @@
     def end(self):
         yield (PYTHON_LINE, "")
     def produce_start_event(self, event):
-        yield (PYTHON_LINE, "yield (START, (QName(%s), %s), %s, %s)" % (
-            repr(event[1][0]), 
+        qn = QName(event[1][0])
+        yield (PYTHON_LINE, "yield (START, (%s, %s, %s), %s, %s)" % (
+            repr(qn.namespace),
+            repr(qn.localname), 
             repr(event[1][1]), 
             repr(event[2]), 
             repr(event[3]))
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/output.py
@@ -0,0 +1,83 @@
+from itertools import chain
+try:
+    frozenset
+except NameError:
+    from sets import ImmutableSet as frozenset
+import re
+
+from genshi.codegen import adapters
+from genshi.core import escape, Markup, Namespace, QName, StreamEventKind
+from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
+                        END_CDATA, PI, COMMENT, XML_NAMESPACE
+
+
+class PostWhitespaceFilter(object):
+    """A filter that removes extraneous ignorable white space from the
+    stream."""
+
+    def __init__(self, preserve=None, noescape=None):
+        """Initialize the filter.
+
+        @param preserve: a set or sequence of tag names for which white-space
+            should be ignored.
+        @param noescape: a set or sequence of tag names for which text content
+            should not be escaped
+
+        Both the `preserve` and `noescape` sets are expected to refer to
+        elements that cannot contain further child elements.
+        """
+        if preserve is None:
+            preserve = []
+        self.preserve = frozenset(preserve)
+        if noescape is None:
+            noescape = []
+        self.noescape = frozenset(noescape)
+
+    def __call__(self, stream, space=XML_NAMESPACE['space'],
+                 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
+                 collapse_lines=re.compile('\n{2,}').sub):
+        mjoin = Markup('').join
+        preserve_elems = self.preserve
+        preserve = False
+        noescape_elems = self.noescape
+        noescape = False
+
+        textbuf = []
+        push_text = textbuf.append
+        pop_text = textbuf.pop
+        for kind, data, pos, literal in chain(stream, [(None, None, None, None)]):
+            if kind is TEXT:
+                if noescape:
+                    data = Markup(data)
+                push_text(data)
+            else:
+                if textbuf:
+                    if len(textbuf) > 1:
+                        text = mjoin(textbuf, escape_quotes=False)
+                        del textbuf[:]
+                    else:
+                        text = escape(pop_text(), quotes=False)
+                    if not preserve:
+                        text = collapse_lines('\n', trim_trailing_space('', text))
+                    yield TEXT, Markup(text), pos, unicode(text)
+
+                if kind is START:
+                    namespace, localname, attrib = data
+                    tag = (namespace, localname)
+                    if not preserve and (tag in preserve_elems or
+                                         adapters.get_attrib(attrib, space) == 'preserve'):
+                        preserve = True
+                    if not noescape and tag in noescape_elems:
+                        noescape = True
+
+                elif kind is END:
+                    preserve = noescape = False
+
+                elif kind is START_CDATA:
+                    noescape = True
+
+                elif kind is END_CDATA:
+                    noescape = False
+
+                if kind:
+                    yield kind, data, pos, literal
--- a/genshi/codegen/serialize.py
+++ b/genshi/codegen/serialize.py
@@ -30,7 +30,7 @@
 from genshi.core import escape, Markup, Namespace, QName, StreamEventKind
 from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
                         END_CDATA, PI, COMMENT, XML_NAMESPACE
-from genshi.output import DocType, WhitespaceFilter
+from genshi.output import DocType
 
 __all__ = ['XMLSerializeFilter', 'XHTMLSerializeFilter', 'HTMLSerializeFilter']
 
@@ -38,10 +38,9 @@
     """Delivers the given stream with additional XML text added to outgoing events.
     
     """
-
     _PRESERVE_SPACE = frozenset()
 
-    def __init__(self, doctype=None, strip_whitespace=True):
+    def __init__(self, doctype=None):
         """Initialize the XML serialize filter.
         
         @param doctype: a `(name, pubid, sysid)` tuple that represents the
@@ -54,10 +53,6 @@
         if doctype:
             self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
         # TODO: fold empty tags ?
-        self.filters = []
-        if strip_whitespace:
-            # TODO: can we process whitespace before a template is executed with a Context ?
-            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
 
     def __call__(self, stream):
         raise "TODO"
@@ -89,7 +84,7 @@
                                  QName('style'),
                                  QName('http://www.w3.org/1999/xhtml}style')])
 
-    def __init__(self, doctype=None, strip_whitespace=True):
+    def __init__(self, doctype=None):
         """Initialize the HTML serialize filter.
         
         @param doctype: a `(name, pubid, sysid)` tuple that represents the
@@ -98,11 +93,8 @@
         @param strip_whitespace: whether extraneous whitespace should be
             stripped from the output
         """
-        super(HTMLSerializeFilter, self).__init__(doctype, False)
-        if strip_whitespace:
-            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
-                                                 self._NOESCAPE_ELEMS))
-
+        super(HTMLSerializeFilter, self).__init__(doctype)
+            
     def __call__(self, stream):
         namespace = self.NAMESPACE
         ns_mapping = {}
@@ -113,8 +105,6 @@
         noescape = False
 
         stream = chain(self.preamble, stream)
-        for filter_ in self.filters:
-            stream = filter_(stream)
         for kind, data, pos in stream:
             if kind is START:
                 tag, attrib = data
@@ -174,3 +164,6 @@
             else:
                 # all other events pass-thru
                 yield kind, data, pos, None
+
+class TextSerializeFilter(object):
+    pass    
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/tests/template.py
@@ -0,0 +1,30 @@
+from genshi.tests import template
+from genshi.template import MarkupTemplate as RealMarkupTemplate
+from genshi.codegen.generator import Generator
+from genshi.codegen.serialize import HTMLSerializeFilter
+
+
+import unittest
+
+# original template unittest does this:
+# tmpl = MarkupTemplate(text)
+# result = str(tmpl.generate(items=items)))
+
+class MarkupTemplateAdapter(object):
+    def __init__(self, text):
+        self.generator = Generator(RealMarkupTemplate(text), strip_whitespace=True)
+    def generate(self, *args, **kwargs):
+        return self.generator.generate(*args, **kwargs)
+
+        
+template.MarkupTemplate = MarkupTemplateAdapter
+
+from genshi.tests.template import *
+
+
+
+def suite():
+    return template.suite()
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
--- a/genshi/codegen/tests/test_generator.py
+++ b/genshi/codegen/tests/test_generator.py
@@ -52,30 +52,36 @@
 def items():
     return ["one", "two", "three"]
 
-data = {'lala':'hi', 'items':items, 'foo':['f1', 'f2', 'f3']}
+data = {'lala':'hi', 'items':lambda:["one", "two", "three"], 'foo':['f1', 'f2', 'f3']}
     
 t = MarkupTemplate(text)
-print u''.join(HTMLSerializer()(t.generate(**data)))
+print t.generate(**data).render()
 
 g = generator.Generator(t)
-pycode =  u''.join(g.generate_stream(HTMLSerializeFilter()))
+pycode =  u''.join(g._generate_code_events())
 print pycode
 
-g = generator.Generator(t)
-module = g.generate_module(HTMLSerializeFilter())
-print u''.join(interp.run_inlined(module, data))
+print str(g.generate(**data))
 
 print "Running MarkupTemplate.generate()/HTMLSerializer..."
 now = time.time()
 for x in range(1,1000):
     stream = t.generate(**data)
-    serializer = HTMLSerializer()
-    list(serializer(stream))
+    stream.render()
 print "MarkupTemplate.generate()/HTMLSerializer totaltime: %f" % (time.time() - now)
 
 # inline
 print "Running inlined module..."
 now = time.time()
 for x in range(1,1000):
-    list(interp.run_inlined(module, data))
+    str(g.generate(**data))
 print "Inlined module totaltime: %f" % (time.time() - now)
+
+# inline with whitespace filter
+print "Running inlined module..."
+g = generator.Generator(t, strip_whitespace=True)
+now = time.time()
+for x in range(1,1000):
+    str(g.generate(**data))
+
+print "Inlined module w/ strip_whitespace totaltime: %f" % (time.time() - now)
Copyright (C) 2012-2017 Edgewall Software