changeset 337:6cc2eabf73b8 experimental-compiler

- using QName/Attrs now, is faster if match templates are being used, only slightly slower if not - got XMLSerializeFilter working. Filters also are stateful for now since they are used multiple times for the same module during generation
author zzzeek
date Thu, 09 Nov 2006 01:11:46 +0000
parents 2fdf34945dfd
children
files genshi/codegen/adapters.py genshi/codegen/generator.py genshi/codegen/interp.py genshi/codegen/output.py genshi/codegen/serialize.py genshi/codegen/tests/template.py genshi/codegen/tests/test_generator.py
diffstat 7 files changed, 137 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/codegen/adapters.py
+++ b/genshi/codegen/adapters.py
@@ -46,37 +46,12 @@
     def __unicode__(self):
         return self.render(encoding=None)
 
-class InlineEvent(object):
-    __eventtypes__ = {}
-    def __new__(cls, event):
-        return object.__new__(InlineEvent.__eventtypes__.get(event[0], InlineEvent), event)
-    def __init__(self, event):
-        self.event = event
-    def to_genshi(self):
-        return self.event[0:3]
-
-class InlineStartEvent(InlineEvent):
-    def to_genshi(self):
-        return (self.event[0], (InlineQName(self.event), Attrs(self.event[1][2])), self.event[2])
-InlineEvent.__eventtypes__[template.START] = InlineStartEvent
-        
-class InlineQName(unicode):
-    """creates a QName-like object from a START event"""
-    def __new__(cls, event):
-        if event[1][0] is not None:
-            self = unicode.__new__(cls, u'{%s}%s' % (event[1][0], event[1][1]))
-        else:
-            self = unicode.__new__(cls, u'%s' % (event[1][1]))
-        self.namespace = event[1][0]
-        self.localname = event[1][1]
-        return self
-        
 class InlinePath(Path):
     """overrides Path.test to adapt incoming events from inlined to Genshi."""
     def test(self, ignore_context=False):
         t = super(InlinePath, self).test(ignore_context=ignore_context)
         def _test(event, namespaces, variables, updateonly=False):
-            return t(InlineEvent(event).to_genshi(), namespaces, variables, updateonly=updateonly)
+            return t(event[0:3], namespaces, variables, updateonly=updateonly)
         return _test
     def select(self, stream, namespaces=None, variables=None):
         if namespaces is None:
@@ -104,9 +79,8 @@
                 elif isinstance(result, tuple):
                     yield event
                 elif result:
-                    # in genshi.path.Path, this could be an Attrs or a 3-tupled event.
-                    # here, we only want Attrs to come out.
-                    yield result
+                    # scalar result, return an event
+                    yield template.TEXT, unicode(result), (None, -1, -1), unicode(result)
         return InlineStream(_generate())
 
     def __repr__(self):
--- a/genshi/codegen/generator.py
+++ b/genshi/codegen/generator.py
@@ -20,6 +20,8 @@
 from genshi.codegen import serialize, adapters, output, interp
 from compiler import ast, parse, visitor
 import sets, re
+from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
+                        END_CDATA, PI, COMMENT, XML_NAMESPACE
 
 _directive_printers = {}
 
@@ -112,7 +114,7 @@
 class Generator(object):
     """given a Template, generates Python modules (as strings or code objects)
     optimized to a particular Serializer."""
-    def __init__(self, template, method='html', serializer=None, strip_whitespace=False, compress_empty=False, filters=None):
+    def __init__(self, template, method='xml', serializer=None, strip_whitespace=False, compress_empty=False, filters=None):
         self.template = template
         self.serializer = serializer or ({
                 'xml':   serialize.XMLSerializeFilter,
@@ -227,16 +229,16 @@
                 for d in directives:
                     for evt in self.produce_directive(d, event, substream):
                         yield evt
-            elif kind is template.START:
+            elif kind is START:
                 for evt in self.produce_start_event(event):
                     yield evt
-            elif kind is template.END:
+            elif kind is END:
                 for evt in self.produce_end_event(event):
                     yield evt
             elif kind is template.EXPR:
                 for evt in self.produce_expr_event(event):
                     yield evt
-            elif kind is template.TEXT:
+            elif kind is TEXT:
                 for evt in self.produce_text_event(event):
                     yield evt
             elif kind is template.START_NS:
@@ -245,9 +247,12 @@
             elif kind is template.END_NS:
                 for evt in self.produce_end_ns_event(event):
                     yield evt
+            elif kind is DOCTYPE:
+                for evt in self.produce_doctype_event(event):
+                    yield evt
     def produce_preamble(self):
         for line in [
-            "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream",
+            "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream, QName, Attrs",
             "from genshi.template import Context, Template",
             "from genshi.path import Path",
             "from genshi.codegen import interp, adapters",
@@ -265,9 +270,8 @@
         yield (PYTHON_LINE, "")
     def produce_start_event(self, event):
         qn = QName(event[1][0])
-        yield (PYTHON_LINE, "yield (START, (%s, %s, %s), %s, %s)" % (
-            repr(qn.namespace),
-            repr(qn.localname), 
+        yield (PYTHON_LINE, "yield (START, (QName(%s), Attrs(%s)), %s, %s)" % (
+            repr(qn),
             repr(event[1][1]), 
             repr(event[2]), 
             repr(event[3]))
@@ -294,6 +298,12 @@
             repr(event[2]),
             repr(unicode(event[3]))
         ))
+    def produce_doctype_event(self, event):
+        yield (PYTHON_LINE, "yield (DOCTYPE, (%s), %s, %s)" % (
+            repr(unicode(event[1])),
+            repr(event[2]),
+            repr(unicode(event[3]))
+        ))
         
     def produce_start_ns_event(self, event):
         yield (PYTHON_LINE, "yield (START_NS, (%s), %s, %s)" % (
@@ -303,7 +313,7 @@
         ))
         yield (PYTHON_LINE, "_namespaces[%s] = %s" % (repr(event[1][0]), repr(unicode(event[1][1]))))
     def produce_end_ns_event(self, event):
-        yield (PYTHON_LINE, "del _namespaces[%s]" % (repr(event[1])))
+        #yield (PYTHON_LINE, "del _namespaces[%s]" % (repr(event[1])))
         yield (PYTHON_LINE, "yield (START_NS, (%s), %s, %s)" % (
             repr(unicode(event[1])),
             repr(event[2]),
--- a/genshi/codegen/interp.py
+++ b/genshi/codegen/interp.py
@@ -10,7 +10,6 @@
 import sets
 from itertools import chain
 
-
 # we re-implement our own _match function, based on MarkupTemplate._match.
 def _match(stream, ctxt, match_templates=None):
     """match method from MarkupTemplate, modified to handle inlined stream of events.
@@ -41,8 +40,6 @@
                                    event[0] is not END):
             yield event
             continue
-        # no need for a sub-list of directives (nor _apply_directives function) since inlined code 
-        # expands all nesting explicitly (TODO: is this really true ?)
         for idx, (test, path, template, namespaces) in \
                 enumerate(match_templates):
 
@@ -52,9 +49,7 @@
                     
                 content = chain([event], _match(_strip(stream), ctxt),
                                 tail)
-                # TODO: not sure if extra list of filters is needed
-                #for filter_ in self.filters[3:]:
-                #    content = filter_(content, ctxt)
+
                 content = list(content)
 
                 for test in [mt[0] for mt in match_templates]:
@@ -63,31 +58,18 @@
                 def select(path):
                     return adapters.InlinePath(path).select(Stream(content), namespaces, ctxt)
                 
-                # similarly, no need for _eval (eval is inlined) as well as _flatten (inlined code already "flattened")
                 for event in _match(template(select), ctxt, match_templates[:idx] + match_templates[idx + 1:]):
                     yield event
                 break
         else:
             yield event
 
-# TODO: this adds too much overhead
-def _ensure(stream):
-    """Ensure that every item on the stream is actually an inline event."""
-    for event in stream:
-        if type(event) is not tuple:
-            if hasattr(event, 'totuple'):
-                event = event.totuple()
-            else:
-                event = TEXT, unicode(event), (None, -1, -1), unicode(event)
-        yield event
-
 def evaluate(result, pos):
     if result is not None:
         if isinstance(result, basestring):
             yield TEXT, result, pos, result
         elif hasattr(result, '__iter__'):
-            substream = _ensure(result)
-            for event in substream:
+            for event in result:
                 yield event
         else:
             yield TEXT, unicode(result), pos, result
--- a/genshi/codegen/output.py
+++ b/genshi/codegen/output.py
@@ -84,8 +84,8 @@
                     yield TEXT, Markup(text), pos, unicode(text)
 
                 if kind is START:
-                    namespace, localname, attrib = data
-                    tag = (namespace, localname)
+                    qname, attrib = data
+                    tag = (qname.namespace, qname.localname)
                     if not preserve and (tag in preserve_elems or
                                          adapters.get_attrib(attrib, space) == 'preserve'):
                         preserve = True
--- a/genshi/codegen/serialize.py
+++ b/genshi/codegen/serialize.py
@@ -18,6 +18,8 @@
 
 While this module is a severe transgression of DRY, reusing the output-specific logic
 from the genshi.output module would require de-optimizing the base genshi.output implementations.
+
+The Filters are also stateful and must be created per-generator.
 """
 
 from itertools import chain
@@ -52,10 +54,94 @@
         self.preamble = []
         if doctype:
             self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
-        # TODO: fold empty tags ?
+        self.ns_attrib = []
+        self.ns_mapping = {XML_NAMESPACE.uri: 'xml'}
+        self.have_doctype = False
+        self.in_cdata = False
 
     def __call__(self, stream):
-        raise "TODO"
+        stream = chain(self.preamble, stream)
+        for kind, data, pos in stream:
+            if kind is START:
+                tag, attrib = data
+                tagname = tag.localname
+                namespace = tag.namespace
+                if namespace:
+                    if namespace in self.ns_mapping:
+                        prefix = self.ns_mapping[namespace]
+                        if prefix:
+                            tagname = '%s:%s' % (prefix, tagname)
+                    else:
+                        self.ns_attrib.append((QName('xmlns'), namespace))
+                buf = ['<', tagname]
+
+                for attr, value in attrib + self.ns_attrib:
+                    attrname = attr.localname
+                    if attr.namespace:
+                        prefix = ns_mapping.get(attr.namespace)
+                        if prefix:
+                            attrname = '%s:%s' % (prefix, attrname)
+                    buf += [' ', attrname, '="', escape(value), '"']
+                self.ns_attrib = []
+
+                buf += ['>']
+                yield kind, data, pos, u''.join(buf)
+
+            elif kind is END:
+                tag = data
+                tagname = tag.localname
+                if tag.namespace:
+                    prefix = self.ns_mapping.get(tag.namespace)
+                    if prefix:
+                        tagname = '%s:%s' % (prefix, tag.localname)
+                yield kind, data, pos, u'</%s>' % tagname
+
+            elif kind is TEXT:
+                if self.in_cdata:
+                    yield kind, data, pos, data
+                else:
+                    yield kind, data, pos, escape(data, quotes=False)
+
+            elif kind is COMMENT:
+                yield kind, data, pos, u'<!--%s-->' % data
+
+            elif kind is DOCTYPE and not self.have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf += [' PUBLIC "%s"']
+                elif sysid:
+                    buf += [' SYSTEM']
+                if sysid:
+                    buf += [' "%s"']
+                buf += ['>\n']
+                yield kind, data, pos, unicode(Markup(''.join(buf), *filter(None, data)))
+                self.have_doctype = True
+
+            elif kind is START_NS:
+                prefix, uri = data
+                if uri not in self.ns_mapping:
+                    self.ns_mapping[uri] = prefix
+                    if not prefix:
+                        self.ns_attrib.append((QName('xmlns'), uri))
+                    else:
+                        self.ns_attrib.append((QName('xmlns:%s' % prefix), uri))
+                    yield kind, data, pos, None
+
+            elif kind is START_CDATA:
+                yield kind, data, pos, u'<![CDATA['
+                self.in_cdata = True
+
+            elif kind is END_CDATA:
+                yield kind, data, pos, u']]>'
+                self.in_cdata = False
+
+            elif kind is PI:
+                yield kind, data, pos, u'<?%s %s?>' % data
+            else:
+                # all other events pass-thru
+                yield kind, data, pos, None
+
 
 class XHTMLSerializeFilter(XMLSerializeFilter):
     """Delivers the given stream with additional XHTML text added to outgoing events.
@@ -108,30 +194,32 @@
         for kind, data, pos in stream:
             if kind is START:
                 tag, attrib = data
-                if not tag.namespace or tag in namespace:
-                    tagname = tag.localname
-                    buf = ['<', tagname]
+                tagname = tag.localname
+                buf = ['<', tagname]
 
-                    for attr, value in attrib:
-                        attrname = attr.localname
-                        if not attr.namespace or attr in namespace:
-                            if attrname in boolean_attrs:
-                                if value:
-                                    buf += [' ', attrname]
-                            else:
-                                buf += [' ', attrname, '="', escape(value), '"']
+                for attr, value in attrib:
+                    attrname = attr.localname
+                    if not attr.namespace or attr in namespace:
+                        if attrname in boolean_attrs:
+                            if value:
+                                buf += [' ', attrname]
+                        else:
+                            buf += [' ', attrname, '="', escape(value), '"']
 
-                    buf += ['>']
+                buf += ['>']
 
+                if tag.namespace and tag not in namespace:
+                    yield kind, data, pos, u''
+                else:
                     yield kind, data, pos, u''.join(buf)
-
                     if tagname in noescape_elems:
                         noescape = True
 
             elif kind is END:
                 if not data.namespace or data in namespace:
                     yield kind, data, pos, u'</%s>' % data.localname
-
+                else:
+                    yield kind, data, pos, u''
                 noescape = False
 
             elif kind is TEXT:
--- a/genshi/codegen/tests/template.py
+++ b/genshi/codegen/tests/template.py
@@ -13,7 +13,7 @@
 class MarkupTemplateAdapter(object):
     def __init__(self, text):
         self.generator = Generator(RealMarkupTemplate(text), strip_whitespace=True, compress_empty=True)
-        #print u''.join(self.generator._generate_code_events())
+        print u''.join(self.generator._generate_code_events())
     def generate(self, *args, **kwargs):
         return self.generator.generate(*args, **kwargs)
 
--- a/genshi/codegen/tests/test_generator.py
+++ b/genshi/codegen/tests/test_generator.py
@@ -12,9 +12,8 @@
 # history and logs, available at http://genshi.edgewall.org/log/.
 
 from genshi.template import MarkupTemplate, Template, Context
-from genshi.output import HTMLSerializer
+from genshi.output import XMLSerializer
 from genshi.codegen import generator, interp
-from genshi.codegen.serialize import HTMLSerializeFilter
 import time, sys
 
 text = """<!DOCTYPE html
@@ -25,10 +24,10 @@
       xmlns:xi="http://www.w3.org/2001/XInclude"
       lang="en">
  <body>
-    <!-- remove this match to get much faster performance -->
-    <py:match path='*[@class="message"]'>
-        matched the message, which was ${select('*|text()')}
-        </py:match>
+ <py:match path='*[@class="message"]'>
+     matched the message, which was ${select('*|text()')}
+     </py:match>
+
         
     <div py:for="item in items()">
         ${lala + 'hi'}
@@ -69,6 +68,7 @@
 
 print str(g.generate(**data))
 
+#sys.exit()
 
 print "Running MarkupTemplate.generate()/HTMLSerializer..."
 now = time.time()
Copyright (C) 2012-2017 Edgewall Software