# HG changeset patch # User athomas # Date 1212584520 0 # Node ID ea2566b2f226fdf0c0844a0bc28fa283c2e42582 # Parent 43147cbc9ea34c5e8a63a4c49917e57521963d89 Fixed some unintuitive behaviour in `Transformer.{cut,copy}`. The old behaviour was to copy all selected events in the stream before passing to the next transform. The new behaviour is to copy only a single contiguous block before yielding to the next trasnform. The previous behaviour can be preserved with the following construct: `...copy(buffer, accumulate=True).buffer()...` diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py --- a/genshi/filters/transform.py +++ b/genshi/filters/transform.py @@ -440,9 +440,13 @@ #{ Buffer operations - def copy(self, buffer): + def copy(self, buffer, accumulate=False): """Copy selection into buffer. + The buffer is replaced by each contiguous selection before being passed + to the next transformation. If accumulate=True, further selections will + be appended to the buffer rather than replacing it. + >>> from genshi.builder import tag >>> buffer = StreamBuffer() >>> html = HTML('Some Title' @@ -452,17 +456,14 @@ Some Title

Some Title

Some body text. - To ensure that a transformation can be reused deterministically, the - contents of ``buffer`` is replaced by the ``copy()`` operation: + This example illustrates that only a single contiguous selection will + be buffered: - >>> print buffer - Some Title >>> print html | Transformer('head/title/text()').copy(buffer) \\ ... .end().select('body/em').copy(buffer).end().select('body') \\ ... .prepend(tag.h1(buffer)) - Some - Title

body

Some body - text. + Some Title

Some + Title

Some body text. >>> print buffer body @@ -475,7 +476,8 @@ >>> def apply_attr(name, entry): ... return list(buffer)[0][1][1].get('class') >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\ - ... .end().select('body/em[not(@class)]').attr('class', apply_attr) + ... .end().buffer().select('body/em[not(@class)]') \\ + ... .attr('class', apply_attr) Some TitleSome bodytext. @@ -486,9 +488,9 @@ :rtype: `Transformer` :note: this transformation will buffer the entire input stream """ - return self.apply(CopyTransformation(buffer)) + return self.apply(CopyTransformation(buffer, accumulate)) - def cut(self, buffer): + def cut(self, buffer, accumulate=False): """Copy selection into buffer and remove the selection from the stream. >>> from genshi.builder import tag @@ -500,12 +502,40 @@ Some TitleSome

body

text. + Specifying accumulate=True, appends all selected intervals onto the + buffer. Combining this with the .buffer() operation allows us operate + on all copied events rather than per-segment. See the documentation on + buffer() for more information. + :param buffer: the `StreamBuffer` in which the selection should be stored :rtype: `Transformer` :note: this transformation will buffer the entire input stream """ - return self.apply(CutTransformation(buffer)) + return self.apply(CutTransformation(buffer, accumulate)) + + def buffer(self): + """Buffer the entire stream (can consume a considerable amount of + memory). + + Useful in conjunction with copy(accumulate=True) and + cut(accumulate=True) to ensure that all marked events in the entire + stream are copied to the buffer before further transformations are + applied. + + For example, to move all elements inside a tag at the + top of the document: + + >>> doc = HTML('Some one ' + ... 'text two.') + >>> buffer = StreamBuffer() + >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\ + ... .end().buffer().select('notes').prepend(buffer) + onetwoSome text + . + + """ + return self.apply(list) #{ Miscellaneous operations @@ -1087,25 +1117,35 @@ class CopyTransformation(object): """Copy selected events into a buffer for later insertion.""" - def __init__(self, buffer): + def __init__(self, buffer, accumulate=False): """Create the copy transformation. :param buffer: the `StreamBuffer` in which the selection should be stored """ + if not accumulate: + buffer.reset() self.buffer = buffer + self.accumulate = accumulate def __call__(self, stream): """Apply the transformation to the marked stream. :param stream: the marked event stream to filter """ - self.buffer.reset() - stream = list(stream) + stream = iter(stream) for mark, event in stream: if mark: - self.buffer.append(event) - return stream + if not self.accumulate: + self.buffer.reset() + events = [] + while mark: + events.append((mark, event)) + self.buffer.append(event) + mark, event = stream.next() + for i in events: + yield i + yield mark, event class CutTransformation(object): @@ -1113,36 +1153,39 @@ selection. """ - def __init__(self, buffer): + def __init__(self, buffer, accumulate=False): """Create the cut transformation. :param buffer: the `StreamBuffer` in which the selection should be stored """ + if not accumulate: + buffer.reset() self.buffer = buffer + self.accumulate = accumulate + def __call__(self, stream): """Apply the transform filter to the marked stream. :param stream: the marked event stream to filter """ - out_stream = [] attributes = None - for mark, (kind, data, pos) in stream: - if attributes: - assert kind is START - data = (data[0], data[1] - attributes) - attributes = None + stream = iter(stream) + for mark, event in stream: if mark: - # There is some magic here. ATTR marked events are pushed into - # the stream *before* the START event they originated from. - # This allows cut() to strip out the attributes from START - # event as would be expected. - if mark is ATTR: - self.buffer.append((kind, data, pos)) - attributes = [name for name, _ in data[1]] - else: - self.buffer.append((kind, data, pos)) - else: - out_stream.append((mark, (kind, data, pos))) - return out_stream + if not self.accumulate: + self.buffer.reset() + while mark: + if mark is ATTR: + attributes = [name for name, _ in data[1]] + self.buffer.append(event) + mark, event = stream.next() + # If we've cut attributes, the associated element should START + # immediately after. + if attributes: + assert kind is START + data = (data[0], data[1] - attributes) + attributes = None + + yield mark, event