changeset 734:ea2566b2f226 trunk

Fixed some unintuitive behaviour in `Transformer.{cut,copy}`. The old behaviour was to copy all selected events in the stream before passing to the next transform. The new behaviour is to copy only a single contiguous block before yielding to the next trasnform. The previous behaviour can be preserved with the following construct: `...copy(buffer, accumulate=True).buffer()...`
author athomas
date Wed, 04 Jun 2008 13:02:00 +0000
parents 43147cbc9ea3
children 7e428b22dbaa
files genshi/filters/transform.py
diffstat 1 files changed, 79 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/genshi/filters/transform.py
+++ b/genshi/filters/transform.py
@@ -440,9 +440,13 @@
 
     #{ Buffer operations
 
-    def copy(self, buffer):
+    def copy(self, buffer, accumulate=False):
         """Copy selection into buffer.
 
+        The buffer is replaced by each contiguous selection before being passed
+        to the next transformation. If accumulate=True, further selections will
+        be appended to the buffer rather than replacing it.
+
         >>> from genshi.builder import tag
         >>> buffer = StreamBuffer()
         >>> html = HTML('<html><head><title>Some Title</title></head>'
@@ -452,17 +456,14 @@
         <html><head><title>Some Title</title></head><body><h1>Some
         Title</h1>Some <em>body</em> text.</body></html>
 
-        To ensure that a transformation can be reused deterministically, the
-        contents of ``buffer`` is replaced by the ``copy()`` operation:
+        This example illustrates that only a single contiguous selection will
+        be buffered:
 
-        >>> print buffer
-        Some Title
         >>> print html | Transformer('head/title/text()').copy(buffer) \\
         ...     .end().select('body/em').copy(buffer).end().select('body') \\
         ...     .prepend(tag.h1(buffer))
-        <html><head><title>Some
-        Title</title></head><body><h1><em>body</em></h1>Some <em>body</em>
-        text.</body></html>
+        <html><head><title>Some Title</title></head><body><h1>Some
+        Title</h1>Some <em>body</em> text.</body></html>
         >>> print buffer
         <em>body</em>
 
@@ -475,7 +476,8 @@
         >>> def apply_attr(name, entry):
         ...     return list(buffer)[0][1][1].get('class')
         >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\
-        ...     .end().select('body/em[not(@class)]').attr('class', apply_attr)
+        ...     .end().buffer().select('body/em[not(@class)]') \\
+        ...     .attr('class', apply_attr)
         <html><head><title>Some Title</title></head><body><em
         class="before">Some</em> <em class="before">body</em><em
         class="before">text</em>.</body></html>
@@ -486,9 +488,9 @@
         :rtype: `Transformer`
         :note: this transformation will buffer the entire input stream
         """
-        return self.apply(CopyTransformation(buffer))
+        return self.apply(CopyTransformation(buffer, accumulate))
 
-    def cut(self, buffer):
+    def cut(self, buffer, accumulate=False):
         """Copy selection into buffer and remove the selection from the stream.
 
         >>> from genshi.builder import tag
@@ -500,12 +502,40 @@
         <html><head><title>Some Title</title></head><body>Some
         <em/><h1>body</h1> text.</body></html>
 
+        Specifying accumulate=True, appends all selected intervals onto the
+        buffer. Combining this with the .buffer() operation allows us operate
+        on all copied events rather than per-segment. See the documentation on
+        buffer() for more information.
+
         :param buffer: the `StreamBuffer` in which the selection should be
                        stored
         :rtype: `Transformer`
         :note: this transformation will buffer the entire input stream
         """
-        return self.apply(CutTransformation(buffer))
+        return self.apply(CutTransformation(buffer, accumulate))
+
+    def buffer(self):
+        """Buffer the entire stream (can consume a considerable amount of
+        memory).
+
+        Useful in conjunction with copy(accumulate=True) and
+        cut(accumulate=True) to ensure that all marked events in the entire
+        stream are copied to the buffer before further transformations are
+        applied.
+
+        For example, to move all <note> elements inside a <notes> tag at the
+        top of the document:
+
+        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
+        ...            'text <note>two</note>.</body></doc>')
+        >>> buffer = StreamBuffer()
+        >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\
+        ...     .end().buffer().select('notes').prepend(buffer)
+        <doc><notes><note>one</note><note>two</note></notes><body>Some  text
+        .</body></doc>
+
+        """
+        return self.apply(list)
 
     #{ Miscellaneous operations
 
@@ -1087,25 +1117,35 @@
 class CopyTransformation(object):
     """Copy selected events into a buffer for later insertion."""
 
-    def __init__(self, buffer):
+    def __init__(self, buffer, accumulate=False):
         """Create the copy transformation.
 
         :param buffer: the `StreamBuffer` in which the selection should be
                        stored
         """
+        if not accumulate:
+            buffer.reset()
         self.buffer = buffer
+        self.accumulate = accumulate
 
     def __call__(self, stream):
         """Apply the transformation to the marked stream.
 
         :param stream: the marked event stream to filter
         """
-        self.buffer.reset()
-        stream = list(stream)
+        stream = iter(stream)
         for mark, event in stream:
             if mark:
-                self.buffer.append(event)
-        return stream
+                if not self.accumulate:
+                    self.buffer.reset()
+                events = []
+                while mark:
+                    events.append((mark, event))
+                    self.buffer.append(event)
+                    mark, event = stream.next()
+                for i in events:
+                    yield i
+            yield mark, event
 
 
 class CutTransformation(object):
@@ -1113,36 +1153,39 @@
     selection.
     """
 
-    def __init__(self, buffer):
+    def __init__(self, buffer, accumulate=False):
         """Create the cut transformation.
 
         :param buffer: the `StreamBuffer` in which the selection should be
                        stored
         """
+        if not accumulate:
+            buffer.reset()
         self.buffer = buffer
+        self.accumulate = accumulate
+
 
     def __call__(self, stream):
         """Apply the transform filter to the marked stream.
 
         :param stream: the marked event stream to filter
         """
-        out_stream = []
         attributes = None
-        for mark, (kind, data, pos) in stream:
-            if attributes:
-                assert kind is START
-                data = (data[0], data[1] - attributes)
-                attributes = None
+        stream = iter(stream)
+        for mark, event in stream:
             if mark:
-                # There is some magic here. ATTR marked events are pushed into
-                # the stream *before* the START event they originated from.
-                # This allows cut() to strip out the attributes from START
-                # event as would be expected.
-                if mark is ATTR:
-                    self.buffer.append((kind, data, pos))
-                    attributes = [name for name, _ in data[1]]
-                else:
-                    self.buffer.append((kind, data, pos))
-            else:
-                out_stream.append((mark, (kind, data, pos)))
-        return out_stream
+                if not self.accumulate:
+                    self.buffer.reset()
+                while mark:
+                    if mark is ATTR:
+                        attributes = [name for name, _ in data[1]]
+                    self.buffer.append(event)
+                    mark, event = stream.next()
+                # If we've cut attributes, the associated element should START
+                # immediately after.
+                if attributes:
+                    assert kind is START
+                    data = (data[0], data[1] - attributes)
+                    attributes = None
+
+            yield mark, event
Copyright (C) 2012-2017 Edgewall Software