genshi/genshi-test: genshi/filters/transform.py comparison

comparison genshi/filters/transform.py @ 784:67d324a62cc0 experimental-match-fastpaths

update to 0.5.x branch, up through r907 don't know how this fits in with SoC work, but I wanted to do due diligence and keep this branch working in case it someday gets considered for trunk

author	aflett
date	Mon, 21 Jul 2008 23:17:52 +0000
parents	b57681255af9
children

comparison

equal deleted inserted replaced

-:8f2c7023af94
+:67d324a62cc0
 from genshi.builder import Element
 from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup
 from genshi.path import Path
 __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER',
-'EXIT', 'INSIDE', 'OUTSIDE']
+'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK']
 class TransformMark(str):
 """A mark on a transformation stream."""
 __slots__ = []
 """Stream augmentation mark indicating a selected element attribute."""
 EXIT = TransformMark('EXIT')
 """Stream augmentation mark indicating that a selected element is being
 exited."""
+BREAK = TransformMark('BREAK')
+"""Stream augmentation mark indicating a break between two otherwise contiguous
+blocks of marked events.
+This is used primarily by the cut() transform to provide later transforms with
+an opportunity to operate on the cut buffer.
+"""
+class PushBackStream(object):
+"""Allows a single event to be pushed back onto the stream and re-consumed.
+"""
+def __init__(self, stream):
+self.stream = iter(stream)
+self.peek = None
+def push(self, event):
+assert self.peek is None
+self.peek = event
+def __iter__(self):
+while True:
+if self.peek is not None:
+peek = self.peek
+self.peek = None
+yield peek
+else:
+try:
+event = self.stream.next()
+yield event
+except StopIteration:
+if self.peek is None:
+raise
 class Transformer(object):
 """Stream filter that can apply a variety of different transformations to
 a stream.
 :param path: an XPath expression (as string) or a `Path` instance
 """
 self.transforms = [SelectTransformation(path)]
-def __call__(self, stream):
+def __call__(self, stream, keep_marks=False):
 """Apply the transform filter to the marked stream.
 :param stream: the marked event stream to filter
+:param keep_marks: Do not strip transformer selection marks from the
+stream. Useful for testing.
 :return: the transformed stream
 :rtype: `Stream`
 """
 transforms = self._mark(stream)
 for link in self.transforms:
 transforms = link(transforms)
-return Stream(self._unmark(transforms),
+if not keep_marks:
+transforms = self._unmark(transforms)
+return Stream(transforms,
 serializer=getattr(stream, 'serializer', None))
 def apply(self, function):
 """Apply a transformation to the stream.
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('.//title/text()').replace('New Title')
 <html><head><title>New Title</title></head><body>Some <em>body</em>
 text.</body></html>
-:param content: Either an iterable of events or a string to insert.
+:param content: Either a callable, an iterable of events, or a string
+to insert.
 :rtype: `Transformer`
 """
 return self.apply(ReplaceTransformation(content))
 def before(self, content):
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('.//em').before('emphasised ')
 <html><head><title>Some Title</title></head><body>Some emphasised
 <em>body</em> text.</body></html>
-:param content: Either an iterable of events or a string to insert.
+:param content: Either a callable, an iterable of events, or a string
+to insert.
 :rtype: `Transformer`
 """
 return self.apply(BeforeTransformation(content))
 def after(self, content):
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('.//em').after(' rock')
 <html><head><title>Some Title</title></head><body>Some <em>body</em>
 rock text.</body></html>
-:param content: Either an iterable of events or a string to insert.
+:param content: Either a callable, an iterable of events, or a string
+to insert.
 :rtype: `Transformer`
 """
 return self.apply(AfterTransformation(content))
 def prepend(self, content):
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('.//body').prepend('Some new body text. ')
 <html><head><title>Some Title</title></head><body>Some new body text.
 Some <em>body</em> text.</body></html>
-:param content: Either an iterable of events or a string to insert.
+:param content: Either a callable, an iterable of events, or a string
+to insert.
 :rtype: `Transformer`
 """
 return self.apply(PrependTransformation(content))
 def append(self, content):
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('.//body').append(' Some new body text.')
 <html><head><title>Some Title</title></head><body>Some <em>body</em>
 text. Some new body text.</body></html>
-:param content: Either an iterable of events or a string to insert.
+:param content: Either a callable, an iterable of events, or a string
+to insert.
 :rtype: `Transformer`
 """
 return self.apply(AppendTransformation(content))
 #{ Attribute manipulation
 """
 return self.apply(AttrTransformation(name, value))
 #{ Buffer operations
-def copy(self, buffer):
+def copy(self, buffer, accumulate=False):
 """Copy selection into buffer.
+The buffer is replaced by each *contiguous* selection before being passed
+to the next transformation. If accumulate=True, further selections will
+be appended to the buffer rather than replacing it.
 >>> from genshi.builder import tag
 >>> buffer = StreamBuffer()
 >>> html = HTML('<html><head><title>Some Title</title></head>'
 ...             '<body>Some <em>body</em> text.</body></html>')
 >>> print html | Transformer('title/text()').copy(buffer) \\
 ...     .end().select('body').prepend(tag.h1(buffer))
 <html><head><title>Some Title</title></head><body><h1>Some
 Title</h1>Some <em>body</em> text.</body></html>
-To ensure that a transformation can be reused deterministically, the
+This example illustrates that only a single contiguous selection will
-contents of ``buffer`` is replaced by the ``copy()`` operation:
+be buffered:
->>> print buffer
-Some Title
 >>> print html | Transformer('head/title/text()').copy(buffer) \\
 ...     .end().select('body/em').copy(buffer).end().select('body') \\
 ...     .prepend(tag.h1(buffer))
-<html><head><title>Some
+<html><head><title>Some Title</title></head><body><h1>Some
-Title</title></head><body><h1><em>body</em></h1>Some <em>body</em>
+Title</h1>Some <em>body</em> text.</body></html>
-text.</body></html>
 >>> print buffer
 <em>body</em>
 Element attributes can also be copied for later use:
 ...             '<em>text</em>.</body></html>')
 >>> buffer = StreamBuffer()
 >>> def apply_attr(name, entry):
 ...     return list(buffer)[0][1][1].get('class')
 >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\
-...     .end().select('body/em[not(@class)]').attr('class', apply_attr)
+...     .end().buffer().select('body/em[not(@class)]') \\
+...     .attr('class', apply_attr)
 <html><head><title>Some Title</title></head><body><em
 class="before">Some</em> <em class="before">body</em><em
 class="before">text</em>.</body></html>
 :param buffer: the `StreamBuffer` in which the selection should be
 stored
 :rtype: `Transformer`
-:note: this transformation will buffer the entire input stream
+:note: Copy (and cut) copy each individual selected object into the
-"""
+buffer before passing to the next transform. For example, the
-return self.apply(CopyTransformation(buffer))
+XPath ``*|text()`` will select all elements and text, each
+instance of which will be copied to the buffer individually
-def cut(self, buffer):
+before passing to the next transform. This has implications for
+how ``StreamBuffer`` objects can be used, so some
+experimentation may be required.
+"""
+return self.apply(CopyTransformation(buffer, accumulate))
+def cut(self, buffer, accumulate=False):
 """Copy selection into buffer and remove the selection from the stream.
 >>> from genshi.builder import tag
 >>> buffer = StreamBuffer()
 >>> html = HTML('<html><head><title>Some Title</title></head>'
 >>> print html | Transformer('.//em/text()').cut(buffer) \\
 ...     .end().select('.//em').after(tag.h1(buffer))
 <html><head><title>Some Title</title></head><body>Some
 <em/><h1>body</h1> text.</body></html>
+Specifying accumulate=True, appends all selected intervals onto the
+buffer. Combining this with the .buffer() operation allows us operate
+on all copied events rather than per-segment. See the documentation on
+buffer() for more information.
 :param buffer: the `StreamBuffer` in which the selection should be
 stored
 :rtype: `Transformer`
 :note: this transformation will buffer the entire input stream
 """
-return self.apply(CutTransformation(buffer))
+return self.apply(CutTransformation(buffer, accumulate))
+def buffer(self):
+"""Buffer the entire stream (can consume a considerable amount of
+memory).
+Useful in conjunction with copy(accumulate=True) and
+cut(accumulate=True) to ensure that all marked events in the entire
+stream are copied to the buffer before further transformations are
+applied.
+For example, to move all <note> elements inside a <notes> tag at the
+top of the document:
+>>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
+...            'text <note>two</note>.</body></doc>')
+>>> buffer = StreamBuffer()
+>>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\
+...     .end().buffer().select('notes').prepend(buffer)
+<doc><notes><note>one</note><note>two</note></notes><body>Some  text
+.</body></doc>
+"""
+return self.apply(list)
 #{ Miscellaneous operations
 def filter(self, filter):
 """Apply a normal stream filter to the selection. The filter is called
 """Replace text matching a regular expression.
 Refer to the documentation for ``re.sub()`` for details.
 >>> html = HTML('<html><body>Some text, some more text and '
-...             '<b>some bold text</b></body></html>')
+...             '<b>some bold text</b>\\n'
->>> print html | Transformer('body').substitute('(?i)some', 'SOME')
+...             '<i>some italicised text</i></body></html>')
-<html><body>SOME text, some more text and <b>SOME bold text</b></body></html>
+>>> print html | Transformer('body/b').substitute('(?i)some', 'SOME')
->>> tags = tag.html(tag.body('Some text, some more text and ',
+<html><body>Some text, some more text and <b>SOME bold text</b>
+<i>some italicised text</i></body></html>
+>>> tags = tag.html(tag.body('Some text, some more text and\\n',
 ...      Markup('<b>some bold text</b>')))
->>> print tags.generate() | Transformer('body').substitute('(?i)some', 'SOME')
+>>> print tags.generate() | Transformer('body').substitute(
-<html><body>SOME text, some more text and <b>SOME bold text</b></body></html>
+...     '(?i)some', 'SOME')
+<html><body>SOME text, some more text and
+<b>SOME bold text</b></body></html>
 :param pattern: A regular expression object or string.
 :param replace: Replacement pattern.
 :param count: Number of replacements to make in each text fragment.
 :rtype: `Transformer`
 for event in stream:
 yield OUTSIDE, event
 def _unmark(self, stream):
 for mark, event in stream:
-if event[0] is not None:
+kind = event[0]
+if not (kind is None or kind is ATTR or kind is BREAK):
 yield event
 class SelectTransformation(object):
 """Select and mark events that match an XPath expression."""
 else:
 yield OUTSIDE, event
 elif isinstance(result, Attrs):
 # XXX  Selected *attributes* are given a "kind" of None to
 # indicate they are not really part of the stream.
-yield ATTR, (None, (QName(event[1][0] + '@*'), result), event[2])
+yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2])
 yield None, event
+elif isinstance(result, tuple):
+yield OUTSIDE, result
 elif result:
+# XXX Assume everything else is "text"?
 yield None, (TEXT, unicode(result), (None, -1, -1))
 else:
 yield None, event
 """Apply the transform filter to the marked stream.
 :param stream: the marked event stream to filter
 """
 for mark, event in stream:
-if mark not in (INSIDE, OUTSIDE):
+yield mark, event
-yield mark, event
+if mark is ENTER:
+for mark, event in stream:
+if mark is EXIT:
+yield mark, event
+break
 class RemoveTransformation(object):
 """Remove selection from the stream."""
 if mark:
 element = list(self.element.generate())
 for prefix in element[:-1]:
 yield None, prefix
 yield mark, event
-while True:
+start = mark
-try:
+stopped = False
-mark, event = stream.next()
+for mark, event in stream:
-except StopIteration:
+if start is ENTER and mark is EXIT:
-yield None, element[-1]
+yield mark, event
+stopped = True
+break
 if not mark:
 break
 yield mark, event
+else:
+stopped = True
 yield None, element[-1]
-yield mark, event
+if not stopped:
+yield mark, event
 else:
 yield mark, event
 class TraceTransformation(object):
 yield event
 class FilterTransformation(object):
 """Apply a normal stream filter to the selection. The filter is called once
-for each contiguous block of marked events."""
+for each selection."""
 def __init__(self, filter):
 """Create the transform.
 :param filter: The stream filter to apply.
 yield OUTSIDE, event
 del queue[:]
 queue = []
 for mark, event in stream:
-if mark:
+if mark is ENTER:
 queue.append(event)
-else:
+for mark, event in stream:
+queue.append(event)
+if mark is EXIT:
+break
 for queue_event in flush(queue):
 yield queue_event
-yield None, event
+elif mark is OUTSIDE:
-for event in flush(queue):
+stopped = True
-yield event
+queue.append(event)
+for mark, event in stream:
+if mark is not OUTSIDE:
+break
+queue.append(event)
+else:
+stopped = True
+for queue_event in flush(queue):
+yield queue_event
+if not stopped:
+yield None, event
+else:
+yield mark, event
+for queue_event in flush(queue):
+yield queue_event
 class MapTransformation(object):
 """Apply a function to the `data` element of events of ``kind`` in the
 selection.
 class SubstituteTransformation(object):
 """Replace text matching a regular expression.
 Refer to the documentation for ``re.sub()`` for details.
 """
-def __init__(self, pattern, replace, count=1):
+def __init__(self, pattern, replace, count=0):
 """Create the transform.
 :param pattern: A regular expression object, or string.
 :param replace: Replacement pattern.
 :param count: Number of replacements to make in each text fragment.
 """Apply the transform filter to the marked stream.
 :param stream: The marked event stream to filter
 """
 for mark, (kind, data, pos) in stream:
-if kind is TEXT:
+if mark is not None and kind is TEXT:
 new_data = self.pattern.sub(self.replace, data, self.count)
 if isinstance(data, Markup):
 data = Markup(new_data)
 else:
 data = new_data
 injected.
 """
 self.content = content
 def _inject(self):
-for event in _ensure(self.content):
+content = self.content
+if callable(content):
+content = content()
+for event in _ensure(content):
 yield None, event
 class ReplaceTransformation(InjectorTransformation):
 """Replace selection with content."""
 def __call__(self, stream):
 """Apply the transform filter to the marked stream.
 :param stream: The marked event stream to filter
 """
+stream = PushBackStream(stream)
 for mark, event in stream:
 if mark is not None:
+start = mark
 for subevent in self._inject():
 yield subevent
-while True:
+for mark, event in stream:
-mark, event = stream.next()
+if start is ENTER:
-if mark is None:
+if mark is EXIT:
-yield mark, event
+break
+elif mark != start:
+stream.push((mark, event))
 break
 else:
 yield mark, event
 def __call__(self, stream):
 """Apply the transform filter to the marked stream.
 :param stream: The marked event stream to filter
 """
+stream = PushBackStream(stream)
 for mark, event in stream:
 if mark is not None:
+start = mark
 for subevent in self._inject():
 yield subevent
 yield mark, event
-while True:
+for mark, event in stream:
-mark, event = stream.next()
+if mark != start and start is not ENTER:
-if not mark:
+stream.push((mark, event))
 break
 yield mark, event
-yield mark, event
+if start is ENTER and mark is EXIT:
+break
+else:
+yield mark, event
 class AfterTransformation(InjectorTransformation):
 """Insert content after selection."""
 def __call__(self, stream):
 """Apply the transform filter to the marked stream.
 :param stream: The marked event stream to filter
 """
+stream = PushBackStream(stream)
 for mark, event in stream:
 yield mark, event
 if mark:
-while True:
+start = mark
-try:
+for mark, event in stream:
-mark, event = stream.next()
+if start is not ENTER and mark != start:
-except StopIteration:
+stream.push((mark, event))
-break
-if not mark:
 break
 yield mark, event
+if start is ENTER and mark is EXIT:
+break
 for subevent in self._inject():
 yield subevent
-yield mark, event
 class PrependTransformation(InjectorTransformation):
 """Prepend content to the inside of selected elements."""
 :param stream: The marked event stream to filter
 """
 for mark, event in stream:
 yield mark, event
-if mark in (ENTER, OUTSIDE):
+if mark is ENTER:
 for subevent in self._inject():
 yield subevent
 class AppendTransformation(InjectorTransformation):
 :param stream: The marked event stream to filter
 """
 for mark, event in stream:
 yield mark, event
 if mark is ENTER:
-while True:
+for mark, event in stream:
-mark, event = stream.next()
 if mark is EXIT:
 break
 yield mark, event
 for subevent in self._inject():
 yield subevent
 :param event: the markup event to add
 """
 self.events.append(event)
 def reset(self):
-"""Reset the buffer so that it's empty."""
+"""Empty the buffer of events."""
 del self.events[:]
 class CopyTransformation(object):
 """Copy selected events into a buffer for later insertion."""
-def __init__(self, buffer):
+def __init__(self, buffer, accumulate=False):
 """Create the copy transformation.
 :param buffer: the `StreamBuffer` in which the selection should be
 stored
 """
+if not accumulate:
+buffer.reset()
 self.buffer = buffer
+self.accumulate = accumulate
 def __call__(self, stream):
 """Apply the transformation to the marked stream.
 :param stream: the marked event stream to filter
 """
-self.buffer.reset()
+stream = PushBackStream(stream)
-stream = list(stream)
 for mark, event in stream:
 if mark:
+if not self.accumulate:
+self.buffer.reset()
+events = [(mark, event)]
 self.buffer.append(event)
-return stream
+start = mark
+for mark, event in stream:
+if start is not ENTER and mark != start:
+stream.push((mark, event))
+break
+events.append((mark, event))
+self.buffer.append(event)
+if start is ENTER and mark is EXIT:
+break
+for i in events:
+yield i
+else:
+yield mark, event
 class CutTransformation(object):
 """Cut selected events into a buffer for later insertion and remove the
 selection.
 """
-def __init__(self, buffer):
+def __init__(self, buffer, accumulate=False):
 """Create the cut transformation.
 :param buffer: the `StreamBuffer` in which the selection should be
 stored
 """
 self.buffer = buffer
+self.accumulate = accumulate
 def __call__(self, stream):
 """Apply the transform filter to the marked stream.
 :param stream: the marked event stream to filter
 """
-out_stream = []
+attributes = []
-attributes = None
+stream = PushBackStream(stream)
-for mark, (kind, data, pos) in stream:
+broken = False
-if attributes:
+if not self.accumulate:
-assert kind is START
+self.buffer.reset()
-data = (data[0], data[1] - attributes)
+for mark, event in stream:
-attributes = None
 if mark:
-# There is some magic here. ATTR marked events are pushed into
+# Send a BREAK event if there was no other event sent between
-# the stream *before* the START event they originated from.
+if not self.accumulate:
-# This allows cut() to strip out the attributes from START
+if not broken and self.buffer:
-# event as would be expected.
+yield BREAK, (BREAK, None, None)
+self.buffer.reset()
+self.buffer.append(event)
+start = mark
 if mark is ATTR:
-self.buffer.append((kind, data, pos))
+attributes.extend([name for name, _ in event[1][1]])
-attributes = [name for name, _ in data[1]]
+for mark, event in stream:
-else:
+if start is mark is ATTR:
-self.buffer.append((kind, data, pos))
+attributes.extend([name for name, _ in event[1][1]])
+# Handle non-element contiguous selection
+if start is not ENTER and mark != start:
+# Operating on the attributes of a START event
+if start is ATTR:
+kind, data, pos = event
+assert kind is START
+data = (data[0], data[1] - attributes)
+attributes = None
+stream.push((mark, (kind, data, pos)))
+else:
+stream.push((mark, event))
+break
+self.buffer.append(event)
+if start is ENTER and mark is EXIT:
+break
+broken = False
 else:
-out_stream.append((mark, (kind, data, pos)))
+broken = True
-return out_stream
+yield mark, event
+if not broken and self.buffer:
+yield BREAK, (BREAK, None, None)

Mercurial > genshi > genshi-test

comparison genshi/filters/transform.py @ 784:67d324a62cc0 experimental-match-fastpaths