# HG changeset patch
# User athomas
# Date 1212584520 0
# Node ID ea2566b2f226fdf0c0844a0bc28fa283c2e42582
# Parent 43147cbc9ea34c5e8a63a4c49917e57521963d89
Fixed some unintuitive behaviour in `Transformer.{cut,copy}`.
The old behaviour was to copy all selected events in the stream before passing
to the next transform. The new behaviour is to copy only a single contiguous
block before yielding to the next trasnform.
The previous behaviour can be preserved with the following construct:
`...copy(buffer, accumulate=True).buffer()...`
diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py
--- a/genshi/filters/transform.py
+++ b/genshi/filters/transform.py
@@ -440,9 +440,13 @@
#{ Buffer operations
- def copy(self, buffer):
+ def copy(self, buffer, accumulate=False):
"""Copy selection into buffer.
+ The buffer is replaced by each contiguous selection before being passed
+ to the next transformation. If accumulate=True, further selections will
+ be appended to the buffer rather than replacing it.
+
>>> from genshi.builder import tag
>>> buffer = StreamBuffer()
>>> html = HTML('
Some Title'
@@ -452,17 +456,14 @@
Some TitleSome
Title
Some body text.
- To ensure that a transformation can be reused deterministically, the
- contents of ``buffer`` is replaced by the ``copy()`` operation:
+ This example illustrates that only a single contiguous selection will
+ be buffered:
- >>> print buffer
- Some Title
>>> print html | Transformer('head/title/text()').copy(buffer) \\
... .end().select('body/em').copy(buffer).end().select('body') \\
... .prepend(tag.h1(buffer))
- Some
- Titlebody
Some body
- text.
+ Some TitleSome
+ Title
Some body text.
>>> print buffer
body
@@ -475,7 +476,8 @@
>>> def apply_attr(name, entry):
... return list(buffer)[0][1][1].get('class')
>>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\
- ... .end().select('body/em[not(@class)]').attr('class', apply_attr)
+ ... .end().buffer().select('body/em[not(@class)]') \\
+ ... .attr('class', apply_attr)
Some TitleSome bodytext.
@@ -486,9 +488,9 @@
:rtype: `Transformer`
:note: this transformation will buffer the entire input stream
"""
- return self.apply(CopyTransformation(buffer))
+ return self.apply(CopyTransformation(buffer, accumulate))
- def cut(self, buffer):
+ def cut(self, buffer, accumulate=False):
"""Copy selection into buffer and remove the selection from the stream.
>>> from genshi.builder import tag
@@ -500,12 +502,40 @@
Some TitleSome
body
text.
+ Specifying accumulate=True, appends all selected intervals onto the
+ buffer. Combining this with the .buffer() operation allows us operate
+ on all copied events rather than per-segment. See the documentation on
+ buffer() for more information.
+
:param buffer: the `StreamBuffer` in which the selection should be
stored
:rtype: `Transformer`
:note: this transformation will buffer the entire input stream
"""
- return self.apply(CutTransformation(buffer))
+ return self.apply(CutTransformation(buffer, accumulate))
+
+ def buffer(self):
+ """Buffer the entire stream (can consume a considerable amount of
+ memory).
+
+ Useful in conjunction with copy(accumulate=True) and
+ cut(accumulate=True) to ensure that all marked events in the entire
+ stream are copied to the buffer before further transformations are
+ applied.
+
+ For example, to move all elements inside a tag at the
+ top of the document:
+
+ >>> doc = HTML('Some one '
+ ... 'text two.')
+ >>> buffer = StreamBuffer()
+ >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\
+ ... .end().buffer().select('notes').prepend(buffer)
+ onetwoSome text
+ .
+
+ """
+ return self.apply(list)
#{ Miscellaneous operations
@@ -1087,25 +1117,35 @@
class CopyTransformation(object):
"""Copy selected events into a buffer for later insertion."""
- def __init__(self, buffer):
+ def __init__(self, buffer, accumulate=False):
"""Create the copy transformation.
:param buffer: the `StreamBuffer` in which the selection should be
stored
"""
+ if not accumulate:
+ buffer.reset()
self.buffer = buffer
+ self.accumulate = accumulate
def __call__(self, stream):
"""Apply the transformation to the marked stream.
:param stream: the marked event stream to filter
"""
- self.buffer.reset()
- stream = list(stream)
+ stream = iter(stream)
for mark, event in stream:
if mark:
- self.buffer.append(event)
- return stream
+ if not self.accumulate:
+ self.buffer.reset()
+ events = []
+ while mark:
+ events.append((mark, event))
+ self.buffer.append(event)
+ mark, event = stream.next()
+ for i in events:
+ yield i
+ yield mark, event
class CutTransformation(object):
@@ -1113,36 +1153,39 @@
selection.
"""
- def __init__(self, buffer):
+ def __init__(self, buffer, accumulate=False):
"""Create the cut transformation.
:param buffer: the `StreamBuffer` in which the selection should be
stored
"""
+ if not accumulate:
+ buffer.reset()
self.buffer = buffer
+ self.accumulate = accumulate
+
def __call__(self, stream):
"""Apply the transform filter to the marked stream.
:param stream: the marked event stream to filter
"""
- out_stream = []
attributes = None
- for mark, (kind, data, pos) in stream:
- if attributes:
- assert kind is START
- data = (data[0], data[1] - attributes)
- attributes = None
+ stream = iter(stream)
+ for mark, event in stream:
if mark:
- # There is some magic here. ATTR marked events are pushed into
- # the stream *before* the START event they originated from.
- # This allows cut() to strip out the attributes from START
- # event as would be expected.
- if mark is ATTR:
- self.buffer.append((kind, data, pos))
- attributes = [name for name, _ in data[1]]
- else:
- self.buffer.append((kind, data, pos))
- else:
- out_stream.append((mark, (kind, data, pos)))
- return out_stream
+ if not self.accumulate:
+ self.buffer.reset()
+ while mark:
+ if mark is ATTR:
+ attributes = [name for name, _ in data[1]]
+ self.buffer.append(event)
+ mark, event = stream.next()
+ # If we've cut attributes, the associated element should START
+ # immediately after.
+ if attributes:
+ assert kind is START
+ data = (data[0], data[1] - attributes)
+ attributes = None
+
+ yield mark, event