changeset 781:93e0f2c89576 experimental-soc2008

Optimization work first commit (many changes)
author mkurczych
date Fri, 11 Jul 2008 21:10:46 +0000
parents 7320190e10c5
children
files genshi/core.py genshi/optimization.py genshi/output.py genshi/path.py genshi/template/base.py genshi/template/directives.py genshi/template/markup.py genshi/template/tests/__init__.py genshi/template/tests/optimization.py genshi/tests/__init__.py genshi/tests/optimization.py
diffstat 11 files changed, 692 insertions(+), 301 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -235,9 +235,11 @@
         :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
         """
         from genshi.output import get_serializer
+        from genshi.optimization import optimized_flatten
         if method is None:
             method = self.serializer or 'xml'
-        return get_serializer(method, **kwargs)(_ensure(self))
+        return optimized_flatten(get_serializer(method, **kwargs)
+                                                (_ensure(self)))
 
     def __str__(self):
         return self.render()
new file mode 100644
--- /dev/null
+++ b/genshi/optimization.py
@@ -0,0 +1,83 @@
+from genshi.core import StreamEventKind
+from genshi.util import LRUCache
+
+OPTIMIZATION_POSSIBILITY = StreamEventKind("OPTIMIZATION_POSSIBILITY")
+OPTIMIZED_FRAGMENT = StreamEventKind("OPTIMIZED_FRAGMENT")
+
+class Optimizer(object):
+    """Manages optimized tags with variables bound and filter trees"""
+    def __init__(self, size=100):
+        self._filtersCount = 1
+        self._filters = {0:{}}
+        self._fragmentsCount = 0
+        self._fragments = {}
+        self._data = LRUCache(size)
+
+    def get_filters_child_id(self, fid, filter):
+        """Finds filter tree node"""
+        try:
+            return self._filters[fid][filter]
+        except KeyError:
+            id_ = self._filtersCount
+            self._filters[fid][filter] = id_
+            self._filters[id_] = {}
+            self._filtersCount += 1
+            return id_
+    def get_fragment_id(self, *args):
+        try:
+            return self._fragments[args]
+        except KeyError:
+            id_ = self._fragmentsCount
+            self._fragments[args] = id_
+            self._fragmentsCount += 1
+            return id_
+
+    @property
+    def root_id(self):
+        return 0
+
+    def get_cache_for(self, fragmentId, filtersId):
+        try:
+            return self._data[(filtersId, fragmentId)]
+        except KeyError:
+            return None
+
+    def set_cache_for(self, fragmentId, filtersId, stream):
+        self._data[(filtersId, fragmentId,)] = stream
+
+
+class OptimizedFragment(object):
+    def __init__(self, stream, optimizer, fragmentId, filtersId=None):
+        self._stream = stream
+        self.fragmentId = fragmentId
+        if filtersId is None:
+            self.filtersId = optimizer.root_id
+        else:
+            self.filtersId = filtersId
+        self.optimizer = optimizer
+    def get_stream(self):
+        """Returns stream. Only for embedding in generators, if something
+        more needed use process_stream"""
+        return self._stream
+    def process_stream(self):
+        """Renders it in place and asks to save in cache"""
+        s = self.optimizer.get_cache_for(self.fragmentId, self.filtersId)
+        if s is None:
+            s = list(self._stream)
+            self.optimizer.set_cache_for(self.fragmentId, self.filtersId, s)
+        self._stream = s
+        return self._stream
+    def create_child(self, filter, stream):
+        """Create child fragment (representing fragment after applying filter)"""
+        filtersId = self.optimizer.get_filters_child_id(self.filtersId, filter)
+        #print "Creating child", filter
+        return OptimizedFragment(stream, self.optimizer, self.fragmentId,
+                                 filtersId)
+
+def optimized_flatten(stream):
+    for event in stream:
+        if event[0] is OPTIMIZED_FRAGMENT:
+            for e in optimized_flatten(event[1].process_stream()):
+                yield e
+        else:
+            yield event
--- a/genshi/output.py
+++ b/genshi/output.py
@@ -25,6 +25,7 @@
 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
                         START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
+from genshi.optimization import OPTIMIZED_FRAGMENT
 
 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
            'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
@@ -61,6 +62,7 @@
     for chunk in iterator:
         out.write(_encode(chunk))
 
+
 def get_serializer(method='xml', **kwargs):
     """Return a serializer object for the given method.
     
@@ -73,7 +75,10 @@
     :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
     :since: version 0.4.1
     """
-    if isinstance(method, basestring):
+    serializers = XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+    if isinstance(method, serializers):
+        return method
+    elif isinstance(method, basestring):
         method = {'xml':   XMLSerializer,
                   'xhtml': XHTMLSerializer,
                   'html':  HTMLSerializer,
@@ -202,68 +207,75 @@
             self.filters.append(DocTypeInserter(doctype))
 
     def __call__(self, stream):
-        have_decl = have_doctype = False
-        in_cdata = False
 
         for filter_ in self.filters:
             stream = filter_(stream)
-        for kind, data, pos in stream:
-
-            if kind is START or kind is EMPTY:
-                tag, attrib = data
-                buf = ['<', tag]
-                for attr, value in attrib:
-                    buf += [' ', attr, '="', escape(value), '"']
-                buf.append(kind is EMPTY and '/>' or '>')
-                yield Markup(u''.join(buf))
-
-            elif kind is END:
-                yield Markup('</%s>' % data)
-
-            elif kind is TEXT:
-                if in_cdata:
-                    yield data
-                else:
-                    yield escape(data, quotes=False)
-
-            elif kind is COMMENT:
-                yield Markup('<!--%s-->' % data)
+        def _process(stream):
+            #TODO SOC: You know what to check :-)
+            have_decl = have_doctype = False
+            in_cdata = False
+            for kind, data, pos in stream:
 
-            elif kind is XML_DECL and not have_decl:
-                version, encoding, standalone = data
-                buf = ['<?xml version="%s"' % version]
-                if encoding:
-                    buf.append(' encoding="%s"' % encoding)
-                if standalone != -1:
-                    standalone = standalone and 'yes' or 'no'
-                    buf.append(' standalone="%s"' % standalone)
-                buf.append('?>\n')
-                yield Markup(u''.join(buf))
-                have_decl = True
+                if kind is OPTIMIZED_FRAGMENT:
+                    frag = data.create_child(self, 
+                                            _process(data.get_stream()))
+                    yield (kind, frag, pos,)
+                elif kind is START or kind is EMPTY:
+                    tag, attrib = data
+                    buf = ['<', tag]
+                    for attr, value in attrib:
+                        buf += [' ', attr, '="', escape(value), '"']
+                    buf.append(kind is EMPTY and '/>' or '>')
+                    yield Markup(u''.join(buf))
 
-            elif kind is DOCTYPE and not have_doctype:
-                name, pubid, sysid = data
-                buf = ['<!DOCTYPE %s']
-                if pubid:
-                    buf.append(' PUBLIC "%s"')
-                elif sysid:
-                    buf.append(' SYSTEM')
-                if sysid:
-                    buf.append(' "%s"')
-                buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
-                have_doctype = True
+                elif kind is END:
+                    yield Markup('</%s>' % data)
 
-            elif kind is START_CDATA:
-                yield Markup('<![CDATA[')
-                in_cdata = True
+                elif kind is TEXT:
+                    if in_cdata:
+                        yield data
+                    else:
+                        yield escape(data, quotes=False)
 
-            elif kind is END_CDATA:
-                yield Markup(']]>')
-                in_cdata = False
+                elif kind is COMMENT:
+                    yield Markup('<!--%s-->' % data)
 
-            elif kind is PI:
-                yield Markup('<?%s %s?>' % data)
+                elif kind is XML_DECL and not have_decl:
+                    version, encoding, standalone = data
+                    buf = ['<?xml version="%s"' % version]
+                    if encoding:
+                        buf.append(' encoding="%s"' % encoding)
+                    if standalone != -1:
+                        standalone = standalone and 'yes' or 'no'
+                        buf.append(' standalone="%s"' % standalone)
+                    buf.append('?>\n')
+                    yield Markup(u''.join(buf))
+                    have_decl = True
+
+                elif kind is DOCTYPE and not have_doctype:
+                    name, pubid, sysid = data
+                    buf = ['<!DOCTYPE %s']
+                    if pubid:
+                        buf.append(' PUBLIC "%s"')
+                    elif sysid:
+                        buf.append(' SYSTEM')
+                    if sysid:
+                        buf.append(' "%s"')
+                    buf.append('>\n')
+                    yield Markup(u''.join(buf)) % filter(None, data)
+                    have_doctype = True
+
+                elif kind is START_CDATA:
+                    yield Markup('<![CDATA[')
+                    in_cdata = True
+
+                elif kind is END_CDATA:
+                    yield Markup(']]>')
+                    in_cdata = False
+
+                elif kind is PI:
+                    yield Markup('<?%s %s?>' % data)
+        return _process(stream)
 
 
 class XHTMLSerializer(XMLSerializer):
@@ -303,80 +315,85 @@
         boolean_attrs = self._BOOLEAN_ATTRS
         empty_elems = self._EMPTY_ELEMS
         drop_xml_decl = self.drop_xml_decl
-        have_decl = have_doctype = False
-        in_cdata = False
 
         for filter_ in self.filters:
             stream = filter_(stream)
-        for kind, data, pos in stream:
-
-            if kind is START or kind is EMPTY:
-                tag, attrib = data
-                buf = ['<', tag]
-                for attr, value in attrib:
-                    if attr in boolean_attrs:
-                        value = attr
-                    elif attr == u'xml:lang' and u'lang' not in attrib:
-                        buf += [' lang="', escape(value), '"']
-                    elif attr == u'xml:space':
-                        continue
-                    buf += [' ', attr, '="', escape(value), '"']
-                if kind is EMPTY:
-                    if tag in empty_elems:
-                        buf.append(' />')
-                    else:
-                        buf.append('></%s>' % tag)
-                else:
-                    buf.append('>')
-                yield Markup(u''.join(buf))
-
-            elif kind is END:
-                yield Markup('</%s>' % data)
-
-            elif kind is TEXT:
-                if in_cdata:
-                    yield data
-                else:
-                    yield escape(data, quotes=False)
-
-            elif kind is COMMENT:
-                yield Markup('<!--%s-->' % data)
+        def _process(stream, have_decl=True, have_doctype=True):
+            in_cdata = False
+            for kind, data, pos in stream:
 
-            elif kind is DOCTYPE and not have_doctype:
-                name, pubid, sysid = data
-                buf = ['<!DOCTYPE %s']
-                if pubid:
-                    buf.append(' PUBLIC "%s"')
-                elif sysid:
-                    buf.append(' SYSTEM')
-                if sysid:
-                    buf.append(' "%s"')
-                buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
-                have_doctype = True
+                if kind is OPTIMIZED_FRAGMENT:
+                    frag = data.create_child(self, 
+                                            _process(data.get_stream()))
+                    yield (kind, frag, pos,)
+                elif kind is START or kind is EMPTY:
+                    tag, attrib = data
+                    buf = ['<', tag]
+                    for attr, value in attrib:
+                        if attr in boolean_attrs:
+                            value = attr
+                        elif attr == u'xml:lang' and u'lang' not in attrib:
+                            buf += [' lang="', escape(value), '"']
+                        elif attr == u'xml:space':
+                            continue
+                        buf += [' ', attr, '="', escape(value), '"']
+                    if kind is EMPTY:
+                        if tag in empty_elems:
+                            buf.append(' />')
+                        else:
+                            buf.append('></%s>' % tag)
+                    else:
+                        buf.append('>')
+                    yield Markup(u''.join(buf))
 
-            elif kind is XML_DECL and not have_decl and not drop_xml_decl:
-                version, encoding, standalone = data
-                buf = ['<?xml version="%s"' % version]
-                if encoding:
-                    buf.append(' encoding="%s"' % encoding)
-                if standalone != -1:
-                    standalone = standalone and 'yes' or 'no'
-                    buf.append(' standalone="%s"' % standalone)
-                buf.append('?>\n')
-                yield Markup(u''.join(buf))
-                have_decl = True
+                elif kind is END:
+                    yield Markup('</%s>' % data)
 
-            elif kind is START_CDATA:
-                yield Markup('<![CDATA[')
-                in_cdata = True
+                elif kind is TEXT:
+                    if in_cdata:
+                        yield data
+                    else:
+                        yield escape(data, quotes=False)
 
-            elif kind is END_CDATA:
-                yield Markup(']]>')
-                in_cdata = False
+                elif kind is COMMENT:
+                    yield Markup('<!--%s-->' % data)
 
-            elif kind is PI:
-                yield Markup('<?%s %s?>' % data)
+                elif kind is DOCTYPE and not have_doctype:
+                    name, pubid, sysid = data
+                    buf = ['<!DOCTYPE %s']
+                    if pubid:
+                        buf.append(' PUBLIC "%s"')
+                    elif sysid:
+                        buf.append(' SYSTEM')
+                    if sysid:
+                        buf.append(' "%s"')
+                    buf.append('>\n')
+                    yield Markup(u''.join(buf)) % filter(None, data)
+                    have_doctype = True
+
+                elif kind is XML_DECL and not have_decl and not drop_xml_decl:
+                    version, encoding, standalone = data
+                    buf = ['<?xml version="%s"' % version]
+                    if encoding:
+                        buf.append(' encoding="%s"' % encoding)
+                    if standalone != -1:
+                        standalone = standalone and 'yes' or 'no'
+                        buf.append(' standalone="%s"' % standalone)
+                    buf.append('?>\n')
+                    yield Markup(u''.join(buf))
+                    have_decl = True
+
+                elif kind is START_CDATA:
+                    yield Markup('<![CDATA[')
+                    in_cdata = True
+
+                elif kind is END_CDATA:
+                    yield Markup(']]>')
+                    in_cdata = False
+
+                elif kind is PI:
+                    yield Markup('<?%s %s?>' % data)
+        return _process(stream, False, False)
 
 
 class HTMLSerializer(XHTMLSerializer):
@@ -417,61 +434,65 @@
         boolean_attrs = self._BOOLEAN_ATTRS
         empty_elems = self._EMPTY_ELEMS
         noescape_elems = self._NOESCAPE_ELEMS
-        have_doctype = False
-        noescape = False
 
         for filter_ in self.filters:
             stream = filter_(stream)
-        for kind, data, pos in stream:
-
-            if kind is START or kind is EMPTY:
-                tag, attrib = data
-                buf = ['<', tag]
-                for attr, value in attrib:
-                    if attr in boolean_attrs:
-                        if value:
-                            buf += [' ', attr]
-                    elif ':' in attr:
-                        if attr == 'xml:lang' and u'lang' not in attrib:
-                            buf += [' lang="', escape(value), '"']
-                    elif attr != 'xmlns':
-                        buf += [' ', attr, '="', escape(value), '"']
-                buf.append('>')
-                if kind is EMPTY:
-                    if tag not in empty_elems:
-                        buf.append('</%s>' % tag)
-                yield Markup(u''.join(buf))
-                if tag in noescape_elems:
-                    noescape = True
+        def _process(stream, noescape, have_doctype=True):
+            for kind, data, pos in stream:
 
-            elif kind is END:
-                yield Markup('</%s>' % data)
-                noescape = False
-
-            elif kind is TEXT:
-                if noescape:
-                    yield data
-                else:
-                    yield escape(data, quotes=False)
-
-            elif kind is COMMENT:
-                yield Markup('<!--%s-->' % data)
+                if kind is OPTIMIZED_FRAGMENT:
+                    frag = data.create_child((self, noescape), 
+                                        _process(data.get_stream(), noescape))
+                    yield (kind, frag, pos,)
+                elif kind is START or kind is EMPTY:
+                    tag, attrib = data
+                    buf = ['<', tag]
+                    for attr, value in attrib:
+                        if attr in boolean_attrs:
+                            if value:
+                                buf += [' ', attr]
+                        elif ':' in attr:
+                            if attr == 'xml:lang' and u'lang' not in attrib:
+                                buf += [' lang="', escape(value), '"']
+                        elif attr != 'xmlns':
+                            buf += [' ', attr, '="', escape(value), '"']
+                    buf.append('>')
+                    if kind is EMPTY:
+                        if tag not in empty_elems:
+                            buf.append('</%s>' % tag)
+                    yield Markup(u''.join(buf))
+                    if tag in noescape_elems:
+                        noescape = True
 
-            elif kind is DOCTYPE and not have_doctype:
-                name, pubid, sysid = data
-                buf = ['<!DOCTYPE %s']
-                if pubid:
-                    buf.append(' PUBLIC "%s"')
-                elif sysid:
-                    buf.append(' SYSTEM')
-                if sysid:
-                    buf.append(' "%s"')
-                buf.append('>\n')
-                yield Markup(u''.join(buf)) % filter(None, data)
-                have_doctype = True
+                elif kind is END:
+                    yield Markup('</%s>' % data)
+                    noescape = False
 
-            elif kind is PI:
-                yield Markup('<?%s %s?>' % data)
+                elif kind is TEXT:
+                    if noescape:
+                        yield data
+                    else:
+                        yield escape(data, quotes=False)
+
+                elif kind is COMMENT:
+                    yield Markup('<!--%s-->' % data)
+
+                elif kind is DOCTYPE and not have_doctype:
+                    name, pubid, sysid = data
+                    buf = ['<!DOCTYPE %s']
+                    if pubid:
+                        buf.append(' PUBLIC "%s"')
+                    elif sysid:
+                        buf.append(' SYSTEM')
+                    if sysid:
+                        buf.append(' "%s"')
+                    buf.append('>\n')
+                    yield Markup(u''.join(buf)) % filter(None, data)
+                    have_doctype = True
+
+                elif kind is PI:
+                    yield Markup('<?%s %s?>' % data)
+        return _process(stream, False, False)
 
 
 class TextSerializer(object):
@@ -512,12 +533,18 @@
 
     def __call__(self, stream):
         strip_markup = self.strip_markup
-        for event in stream:
-            if event[0] is TEXT:
-                data = event[1]
-                if strip_markup and type(data) is Markup:
-                    data = data.striptags().stripentities()
-                yield unicode(data)
+        def _process(stream):
+            for event in stream:
+                if event[0] is OPTIMIZED_FRAGMENT:
+                    frag = event[1].create_child((self, noescape), 
+                                            _process(event[1].get_stream()))
+                    yield (event[0], frag, event[2],)
+                elif event[0] is TEXT:
+                    data = event[1]
+                    if strip_markup and type(data) is Markup:
+                        data = data.striptags().stripentities()
+                    yield unicode(data)
+        return _process(stream)
 
 
 class EmptyTagFilter(object):
@@ -528,23 +555,31 @@
     EMPTY = StreamEventKind('EMPTY')
 
     def __call__(self, stream):
-        prev = (None, None, None)
-        for ev in stream:
-            if prev[0] is START:
-                if ev[0] is END:
-                    prev = EMPTY, prev[1], prev[2]
-                    yield prev
-                    continue
+        def _process(stream):
+            prev = (None, None, None)
+            for ev in stream:
+                if ev[0] is OPTIMIZED_FRAGMENT:
+                    frag = ev[1].create_child(EmptyTagFilter, 
+                                            _process(ev[1].get_stream()))
+                    yield (ev[0], frag, ev[2],)
                 else:
-                    yield prev
-            if ev[0] is not START:
-                yield ev
-            prev = ev
+                    if prev[0] is START:
+                        if ev[0] is END:
+                            prev = EMPTY, prev[1], prev[2]
+                            yield prev
+                            continue
+                        else:
+                            yield prev
+                    if ev[0] is not START:
+                        yield ev
+                    prev = ev
+        return _process(stream)
 
 
 EMPTY = EmptyTagFilter.EMPTY
 
 
+#TODO SOC
 class NamespaceFlattener(object):
     r"""Output stream filter that removes namespace information from the stream,
     instead adding namespace attributes and prefixes as needed.
@@ -589,76 +624,84 @@
                 yield 'ns%d' % val
         _gen_prefix = _gen_prefix().next
 
-        for kind, data, pos in stream:
+        # TODO SOC: that rather doesn't work...
+        def _process(stream):
+            for kind, data, pos in stream:
 
-            if kind is START or kind is EMPTY:
-                tag, attrs = data
+                if kind is OPTIMIZED_FRAGMENT:
+                    sstream = _process(data.get_stream())
+                    frag = data.create_child(self, sstream)
+                    yield (kind, frag, pos,)
+                elif kind is START or kind is EMPTY:
+                    tag, attrs = data
 
-                tagname = tag.localname
-                tagns = tag.namespace
-                if tagns:
-                    if tagns in namespaces:
+                    tagname = tag.localname
+                    tagns = tag.namespace
+                    if tagns:
+                        if tagns in namespaces:
+                            prefix = namespaces[tagns][-1]
+                            if prefix:
+                                tagname = u'%s:%s' % (prefix, tagname)
+                        else:
+                            _push_ns_attr((u'xmlns', tagns))
+                            _push_ns('', tagns)
+
+                    new_attrs = []
+                    for attr, value in attrs:
+                        attrname = attr.localname
+                        attrns = attr.namespace
+                        if attrns:
+                            if attrns not in namespaces:
+                                prefix = _gen_prefix()
+                                _push_ns(prefix, attrns)
+                                _push_ns_attr(('xmlns:%s' % prefix, attrns))
+                            else:
+                                prefix = namespaces[attrns][-1]
+                            if prefix:
+                                attrname = u'%s:%s' % (prefix, attrname)
+                        new_attrs.append((attrname, value))
+
+                    yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos
+                    del ns_attrs[:]
+
+                elif kind is END:
+                    tagname = data.localname
+                    tagns = data.namespace
+                    if tagns:
                         prefix = namespaces[tagns][-1]
                         if prefix:
                             tagname = u'%s:%s' % (prefix, tagname)
-                    else:
-                        _push_ns_attr((u'xmlns', tagns))
-                        _push_ns('', tagns)
-
-                new_attrs = []
-                for attr, value in attrs:
-                    attrname = attr.localname
-                    attrns = attr.namespace
-                    if attrns:
-                        if attrns not in namespaces:
-                            prefix = _gen_prefix()
-                            _push_ns(prefix, attrns)
-                            _push_ns_attr(('xmlns:%s' % prefix, attrns))
-                        else:
-                            prefix = namespaces[attrns][-1]
-                        if prefix:
-                            attrname = u'%s:%s' % (prefix, attrname)
-                    new_attrs.append((attrname, value))
-
-                yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos
-                del ns_attrs[:]
+                    yield kind, tagname, pos
 
-            elif kind is END:
-                tagname = data.localname
-                tagns = data.namespace
-                if tagns:
-                    prefix = namespaces[tagns][-1]
-                    if prefix:
-                        tagname = u'%s:%s' % (prefix, tagname)
-                yield kind, tagname, pos
-
-            elif kind is START_NS:
-                prefix, uri = data
-                if uri not in namespaces:
-                    prefix = prefixes.get(uri, [prefix])[-1]
-                    _push_ns_attr(_make_ns_attr(prefix, uri))
-                _push_ns(prefix, uri)
+                elif kind is START_NS:
+                    prefix, uri = data
+                    if uri not in namespaces:
+                        prefix = prefixes.get(uri, [prefix])[-1]
+                        _push_ns_attr(_make_ns_attr(prefix, uri))
+                    _push_ns(prefix, uri)
 
-            elif kind is END_NS:
-                if data in prefixes:
-                    uris = prefixes.get(data)
-                    uri = uris.pop()
-                    if not uris:
-                        del prefixes[data]
-                    if uri not in uris or uri != uris[-1]:
-                        uri_prefixes = namespaces[uri]
-                        uri_prefixes.pop()
-                        if not uri_prefixes:
-                            del namespaces[uri]
-                    if ns_attrs:
-                        attr = _make_ns_attr(data, uri)
-                        if attr in ns_attrs:
-                            ns_attrs.remove(attr)
+                elif kind is END_NS:
+                    if data in prefixes:
+                        uris = prefixes.get(data)
+                        uri = uris.pop()
+                        if not uris:
+                            del prefixes[data]
+                        if uri not in uris or uri != uris[-1]:
+                            uri_prefixes = namespaces[uri]
+                            uri_prefixes.pop()
+                            if not uri_prefixes:
+                                del namespaces[uri]
+                        if ns_attrs:
+                            attr = _make_ns_attr(data, uri)
+                            if attr in ns_attrs:
+                                ns_attrs.remove(attr)
 
-            else:
-                yield kind, data, pos
+                else:
+                    yield kind, data, pos
+        return _process(stream)
 
 
+#TODO SOC
 class WhitespaceFilter(object):
     """A filter that removes extraneous ignorable white space from the
     stream.
@@ -688,51 +731,60 @@
                  collapse_lines=re.compile('\n{2,}').sub):
         mjoin = Markup('').join
         preserve_elems = self.preserve
-        preserve = 0
         noescape_elems = self.noescape
-        noescape = False
-
-        textbuf = []
-        push_text = textbuf.append
-        pop_text = textbuf.pop
-        for kind, data, pos in chain(stream, [(None, None, None)]):
 
-            if kind is TEXT:
-                if noescape:
-                    data = Markup(data)
-                push_text(data)
-            else:
-                if textbuf:
-                    if len(textbuf) > 1:
-                        text = mjoin(textbuf, escape_quotes=False)
-                        del textbuf[:]
-                    else:
-                        text = escape(pop_text(), quotes=False)
-                    if not preserve:
-                        text = collapse_lines('\n', trim_trailing_space('', text))
-                    yield TEXT, Markup(text), pos
+        def _process(stream, preserve=0, noescape=False):
+            textbuf = []
+            push_text = textbuf.append
+            pop_text = textbuf.pop
+            for kind, data, pos in chain(stream, [(None, None, None)]):
 
-                if kind is START:
-                    tag, attrs = data
-                    if preserve or (tag in preserve_elems or
-                                    attrs.get(space) == 'preserve'):
-                        preserve += 1
-                    if not noescape and tag in noescape_elems:
+                if kind is TEXT:
+                    if noescape:
+                        data = Markup(data)
+                    push_text(data)
+                else:
+                    if textbuf:
+                        if len(textbuf) > 1:
+                            text = mjoin(textbuf, escape_quotes=False)
+                            del textbuf[:]
+                        else:
+                            text = escape(pop_text(), quotes=False)
+                        if not preserve:
+                            text = collapse_lines('\n',
+                                       trim_trailing_space('', text))
+                        yield TEXT, Markup(text), pos
+
+                    if kind is OPTIMIZED_FRAGMENT:
+                        # there's exactly the same nubmer of STARTs as ENDs
+                        # in optimized fragment, so no need for higher pres
+                        pres = preserve and 1 or 0
+                        sstream = _process(data.get_stream(), pres, noescape)
+                        frag = data.create_child((self, pres, noescape), 
+                                                 sstream)
+                        yield (kind, frag, pos,)
+                    elif kind is START:
+                        tag, attrs = data
+                        if preserve or (tag in preserve_elems or
+                                        attrs.get(space) == 'preserve'):
+                            preserve += 1
+                        if not noescape and tag in noescape_elems:
+                            noescape = True
+
+                    elif kind is END:
+                        noescape = False
+                        if preserve:
+                            preserve -= 1
+
+                    elif kind is START_CDATA:
                         noescape = True
 
-                elif kind is END:
-                    noescape = False
-                    if preserve:
-                        preserve -= 1
+                    elif kind is END_CDATA:
+                        noescape = False
 
-                elif kind is START_CDATA:
-                    noescape = True
-
-                elif kind is END_CDATA:
-                    noescape = False
-
-                if kind:
-                    yield kind, data, pos
+                    if kind:
+                        yield kind, data, pos
+        return _process(stream)
 
 
 class DocTypeInserter(object):
@@ -748,8 +800,7 @@
             doctype = DocType.get(doctype)
         self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
 
-    def __call__(self, stream):
-        doctype_inserted = False
+    def __call__(self, stream, doctype_inserted = False):
         for kind, data, pos in stream:
             if not doctype_inserted:
                 doctype_inserted = True
@@ -759,7 +810,13 @@
                     continue
                 yield self.doctype_event
 
-            yield (kind, data, pos)
+            #There cannot be XML Declaration in optimized fragments
+            if kind is OPTIMIZED_FRAGMENT:
+                sstream = self(data.get_stream(), True)
+                frag = data.create_child(DocTypeInserter, sstream)
+                yield (kind, frag, pos,)
+            else:
+                yield (kind, data, pos)
 
         if not doctype_inserted:
             yield self.doctype_event
--- a/genshi/path.py
+++ b/genshi/path.py
@@ -45,6 +45,7 @@
 from genshi.core import Stream, Attrs, Namespace, QName
 from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \
                         START_CDATA, END_CDATA
+from genshi.optimization import OPTIMIZED_FRAGMENT
 
 __all__ = ['Path', 'PathSyntaxError']
 __docformat__ = 'restructuredtext en'
@@ -128,24 +129,43 @@
             namespaces = {}
         if variables is None:
             variables = {}
-        stream = iter(stream)
+        #stream stack
+        ss = [iter(stream)]
         def _generate():
             test = self.test()
-            for event in stream:
+            while ss:
+                try:
+                    event = ss[-1].next()
+                except StopIteration:
+                    ss.pop()
+                    continue
+                if event[0] is OPTIMIZED_FRAGMENT:
+                    ss.append(iter(event[1].process_stream()))
+                    continue
                 result = test(event, namespaces, variables)
                 if result is True:
                     yield event
+                    if event[0] is OPTIMIZED_FRAGMENT:
+                        for e in optimized_flatten(event[1]):
+                            test(e, namespaces, variables,
+                                 updateonly=True)
+
                     if event[0] is START:
                         depth = 1
                         while depth > 0:
-                            subevent = stream.next()
+                            subevent = ss[-1].next()
                             if subevent[0] is START:
                                 depth += 1
                             elif subevent[0] is END:
                                 depth -= 1
                             yield subevent
-                            test(subevent, namespaces, variables,
-                                 updateonly=True)
+                            if event[0] is OPTIMIZED_FRAGMENT:
+                                for e in optimized_flatten(event[1]):
+                                    test(e, namespaces, variables,
+                                         updateonly=True)
+                            else:
+                                test(subevent, namespaces, variables,
+                                     updateonly=True)
                 elif result:
                     yield result
         return Stream(_generate(),
--- a/genshi/template/base.py
+++ b/genshi/template/base.py
@@ -25,6 +25,7 @@
 
 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
 from genshi.input import ParseError
+from genshi.optimization import OPTIMIZATION_POSSIBILITY, OPTIMIZED_FRAGMENT, Optimizer, OptimizedFragment
 
 __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError',
            'TemplateSyntaxError', 'BadDirectiveError']
@@ -343,6 +344,7 @@
     _number_conv = unicode # function used to convert numbers to event data
 
     def __init__(self, source, filepath=None, filename=None, loader=None,
+                 optimizer=None, serializer = None,
                  encoding=None, lookup='strict', allow_exec=True):
         """Initialize a template from either a string, a file-like object, or
         an already parsed markup stream.
@@ -378,6 +380,14 @@
         except ParseError, e:
             raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
 
+        self.optimizer = optimizer
+        from genshi.output import get_serializer
+        if serializer is None:
+            self.serializerObject = get_serializer(self.serializer)
+        else:
+            self.serializerObject = get_serializer(serializer)
+
+
     def __getstate__(self):
         state = self.__dict__.copy()
         state['filters'] = []
@@ -391,7 +401,7 @@
         return '<%s "%s">' % (self.__class__.__name__, self.filename)
 
     def _init_filters(self):
-        self.filters = [self._flatten, self._eval, self._exec]
+        self.filters = [self._flatten, self._optimize, self._eval, self._exec]
         if self.loader:
             self.filters.append(self._include)
 
@@ -484,7 +494,7 @@
         stream = self.stream
         for filter_ in self.filters:
             stream = filter_(iter(stream), ctxt, **vars)
-        return Stream(stream, self.serializer)
+        return Stream(stream, self.serializerObject)
 
     def _eval(self, stream, ctxt, **vars):
         """Internal stream filter that evaluates any expressions in `START` and
@@ -591,6 +601,32 @@
             else:
                 yield event
 
+    def _optimize(self, stream, ctx, **vars):
+        """Changes optimization possibilities into optimized fragments"""
+        for event in stream:
+            if event[0] is OPTIMIZATION_POSSIBILITY:
+                substream, subvars, subid = event[1]
+
+                # don't want match here, match should work after all optimizations
+                filters = (self._flatten, self._optimize,
+                           self._eval, self._exec)
+                for filter_ in filters:
+                    substream = filter_(substream, ctx, **vars)
+                if self.optimizer is None:
+                    for subevent in substream:
+                        yield subevent
+                elif len(subvars) == 0:
+                    fragmentId = self.optimizer.get_fragment_id(subid, 
+                                                                *subvars)
+                    ret = OptimizedFragment(substream, self.optimizer,
+                                            fragmentId)
+                    yield (OPTIMIZED_FRAGMENT, ret, event[2],)
+                else:                        
+                    for subevent in self._optimize(substream, ctx, **vars):
+                        yield subevent
+            else:
+                yield event
+
 
 EXEC = Template.EXEC
 EXPR = Template.EXPR
--- a/genshi/template/directives.py
+++ b/genshi/template/directives.py
@@ -25,9 +25,12 @@
                                  EXPR, _apply_directives, _eval_expr, \
                                  _exec_suite
 from genshi.template.eval import Expression, ExpressionASTTransformer, _parse
+from genshi.optimization import OPTIMIZATION_POSSIBILITY
 
+#TODO SOC: after choosing name repair line breaks
 __all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
            'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
+           'OptimizeDirective',
            'OtherwiseDirective', 'ReplaceDirective', 'StripDirective',
            'WhenDirective', 'WithDirective']
 __docformat__ = 'restructuredtext en'
@@ -463,6 +466,29 @@
     def __repr__(self):
         return '<%s "%s">' % (self.__class__.__name__, self.path.source)
 
+class OptimizeDirective(Directive):
+    """Implementation of the ``py:optimize`` template directive.
+
+    This directive marks subtree in stream as optimization safe and depending
+    only on variables which names are given in argument.
+    """
+
+    def __init__(self, value, template=None, namespaces=None, lineno=-1,
+                 offset=-1):
+        self.vars = filter(None, (x.strip() for x in value.split(",")))
+
+    def attach(cls, template, stream, value, namespaces, pos):
+        if type(value) is dict:
+            value = value.get('vars')
+        return super(OptimizeDirective, cls).attach(template, stream, value,
+                                              namespaces, pos)
+    attach = classmethod(attach)
+
+    def __call__(self, stream, directives, ctxt, **vars):
+        stream = _apply_directives(stream, directives, ctxt, **vars)
+        #TODO SOC: Should we really return id as identifier
+        return ((OPTIMIZATION_POSSIBILITY, (stream, self.vars, id(self)),
+                                           (None, -1, -1),),)
 
 class ReplaceDirective(Directive):
     """Implementation of the ``py:replace`` template directive.
--- a/genshi/template/markup.py
+++ b/genshi/template/markup.py
@@ -25,6 +25,7 @@
 from genshi.template.interpolation import interpolate
 from genshi.template.directives import *
 from genshi.template.text import NewTextTemplate
+from genshi.optimization import OPTIMIZED_FRAGMENT
 
 __all__ = ['MarkupTemplate']
 __docformat__ = 'restructuredtext en'
@@ -53,6 +54,7 @@
                   ('if', IfDirective),
                   ('choose', ChooseDirective),
                   ('with', WithDirective),
+                  ('optimize', OptimizeDirective),
                   ('replace', ReplaceDirective),
                   ('content', ContentDirective),
                   ('attrs', AttrsDirective),
@@ -221,7 +223,7 @@
         assert len(streams) == 1
         return streams[0]
 
-    def _match(self, stream, ctxt, match_templates=None, **vars):
+    def _match(self, stream, ctxt, match_templates=None, _matched=None, **vars):
         """Internal stream filter that applies any defined match templates
         to the stream.
         """
@@ -245,6 +247,18 @@
 
         for event in stream:
 
+            if event[0] is OPTIMIZED_FRAGMENT:
+                substream = iter(event[1].process_stream())
+                matched = [False]
+                substream = self._match(substream, ctxt, match_templates, matched)
+                substream = list(substream)
+                if not matched[0]:
+                    yield event
+                else:
+                    for subevent in substream:
+                        yield subevent
+                continue
+
             # We (currently) only care about start and end events for matching
             # We might care about namespace events in the future, though
             if not match_templates or (event[0] is not START and
@@ -256,6 +270,8 @@
                     in enumerate(match_templates):
 
                 if test(event, namespaces, ctxt) is True:
+                    if _matched is not None:
+                        _matched[0] = True
                     if 'match_once' in hints:
                         del match_templates[idx]
                         idx -= 1
@@ -293,7 +309,9 @@
                     for event in self._match(
                             self._exec(
                                 self._eval(
-                                    self._flatten(template, ctxt, **vars),
+                                    self._optimize(
+                                        self._flatten(template, ctxt, **vars),
+                                        ctxt, **vars),
                                     ctxt, **vars),
                                 ctxt, **vars),
                             ctxt, match_templates[idx + 1:], **vars):
--- a/genshi/template/tests/__init__.py
+++ b/genshi/template/tests/__init__.py
@@ -16,13 +16,15 @@
 
 def suite():
     from genshi.template.tests import base, directives, eval, interpolation, \
-                                      loader, markup, plugin, text
+                                      loader, optimization, markup, plugin, \
+                                      text
     suite = unittest.TestSuite()
     suite.addTest(base.suite())
     suite.addTest(directives.suite())
     suite.addTest(eval.suite())
     suite.addTest(interpolation.suite())
     suite.addTest(loader.suite())
+    suite.addTest(optimization.suite())
     suite.addTest(markup.suite())
     suite.addTest(plugin.suite())
     suite.addTest(text.suite())
new file mode 100644
--- /dev/null
+++ b/genshi/template/tests/optimization.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import doctest
+import unittest
+import sys
+
+from genshi.core import Attrs, Stream, QName
+from genshi.input import HTML, XML
+from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \
+                          HTMLSerializer, EmptyTagFilter
+from genshi.optimization import Optimizer, OptimizedFragment, OPTIMIZED_FRAGMENT
+
+from genshi.template.base import BadDirectiveError, TemplateSyntaxError
+from genshi.template.markup import MarkupTemplate
+
+
+def _unopt(code):
+    return code.replace(' py:optimize=""', '')
+
+class OptimizedTemplatesTestCase(unittest.TestCase):
+
+    def _test_doc(self, doc, serializer='xml'):
+        unopt = MarkupTemplate(XML(_unopt(doc)), serializer=serializer)
+        optimizer = Optimizer(10)
+        opt = MarkupTemplate(XML(doc), serializer=serializer,
+                                optimizer=optimizer)
+        result = unopt.generate().render()
+        #non-cached one
+        self.assertEqual(opt.generate().render(), result)
+        #cached one
+        self.assertEqual(opt.generate().render(), result)
+    def test_double_match(self):
+        code = """\
+<root xmlns:py="http://genshi.edgewall.org/">
+    <py:match path="tag/test">
+        <other>
+            ${select('.')}
+        </other>
+        <foo py:optimize="">
+            Some text that could <b>be</b> optimized.
+        </foo>
+    </py:match>
+    <py:match path="tag/other/test">
+        <other>
+            ${select('.')}
+        </other>
+    </py:match>
+    <tag>
+        <test py:optimize="">
+            Foo bar <i>bar</i>
+        </test>
+    </tag>
+</root>
+"""
+        self._test_doc(code)
+
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(OptimizedTemplatesTestCase, 'test'))
+    return suite
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
--- a/genshi/tests/__init__.py
+++ b/genshi/tests/__init__.py
@@ -15,7 +15,8 @@
 
 def suite():
     import genshi
-    from genshi.tests import builder, core, input, output, path, util
+    from genshi.tests import builder, core, input, optimization, \
+                             output, path, util
     from genshi.filters import tests as filters
     from genshi.template import tests as template
 
@@ -25,6 +26,7 @@
     suite.addTest(filters.suite())
     suite.addTest(input.suite())
     suite.addTest(output.suite())
+    suite.addTest(optimization.suite())
     suite.addTest(path.suite())
     suite.addTest(template.suite())
     suite.addTest(util.suite())
new file mode 100644
--- /dev/null
+++ b/genshi/tests/optimization.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import doctest
+import unittest
+import sys
+
+from genshi.core import Attrs, Stream, QName
+from genshi.input import HTML, XML
+from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \
+                          HTMLSerializer, EmptyTagFilter
+from genshi.optimization import Optimizer, OptimizedFragment, OPTIMIZED_FRAGMENT
+
+class FiltersOptimizationTestCase(unittest.TestCase):
+
+    def _inner_helper(self, istream):
+        optimizer = Optimizer(10)
+        of = OptimizedFragment(istream, optimizer, 1, 0)
+        stream = Stream([(OPTIMIZED_FRAGMENT, of, (None, -1, -1))])
+        return stream
+
+    def _test_doc(self, doc, serializer):
+        istream = XML(doc)
+        istream = Stream(list(istream), serializer)
+        stream = self._inner_helper(istream)
+        #non-cached one
+        self.assertEqual(stream.render(), istream.render())
+        #cached one
+        self.assertEqual(stream.render(), istream.render())
+
+    test_doc = """\
+<div>
+  <head>
+    <title>Hello world</title>
+    <style type="text/css">@import(style.css)</style>
+  </head>
+  <div>
+    Hello everyone!
+  </div>
+  <span class="greeting">
+        And you too!
+  </span>
+</div>
+"""
+    def test_xml_serializer(self):
+        self._test_doc(self.test_doc, XMLSerializer())
+    def test_xhtml_serializer(self):
+        self._test_doc(self.test_doc, XHTMLSerializer())
+    def test_html_serializer(self):
+        self._test_doc(self.test_doc, HTMLSerializer())
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(FiltersOptimizationTestCase, 'test'))
+    return suite
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
Copyright (C) 2012-2017 Edgewall Software