Mercurial > genshi > mirror
changeset 533:4d486f15c986 trunk
Thanks to Dave Abrahams for pointing out some deficiencies in the transformer
filter:
- `apply()` has been renamed to `map()`.
- `filter()` applies a normal stream filter to the selection.
- To reduce confusion with normal stream filter pipelines the `__or__`
operator, which previously applied a custom transform, has been renamed to
`apply()`.
Also added a `substitute()` transformation that applies regex replacement to
`TEXT` events.
author | athomas |
---|---|
date | Fri, 22 Jun 2007 16:42:38 +0000 |
parents | 9ce91447fc6b |
children | 57b5d5138f1a |
files | doc/filters.txt genshi/filters/transform.py |
diffstat | 2 files changed, 129 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/filters.txt +++ b/doc/filters.txt @@ -164,7 +164,7 @@ ... </body> ... </html>''') - >>> print html | Transformer('body/em').apply(unicode.upper, TEXT) \ + >>> print html | Transformer('body/em').map(unicode.upper, TEXT) \ ... .unwrap().wrap(tag.u).end() \ ... .select('body/u') \ ... .prepend('underlined ') @@ -181,7 +181,7 @@ 2. uppercases any text nodes in the element, 3. strips off the `<em>` start and close tags, 4. wraps the content in a `<u>` tag, and - 5. inserts the text `underlind` inside the `<u>` tag. + 5. inserts the text `underlined` inside the `<u>` tag. A number of commonly useful transformations are available for this filter. Please consult the API documentation a complete list. @@ -209,13 +209,13 @@ stream. In this case we define a class, so that we can initialize it with the tag name. -Custom transformations can be applied using the `|` operator on the transformer -instance: +Custom transformations can be applied using the `apply()` method of a +transformer instance: .. code-block:: pycon - >>> xform = Transformer('body//em').apply(unicode.upper, TEXT) - >>> xform |= RenameTransformation('u') + >>> xform = Transformer('body//em').map(unicode.upper, TEXT) \ + >>> xform = xform.apply(RenameTransformation('u')) >>> print html | xform <html> <head><title>Some Title</title></head>
--- a/genshi/filters/transform.py +++ b/genshi/filters/transform.py @@ -32,7 +32,7 @@ ... Some <em>body</em> text. ... </body> ... </html>''') ->>> print html | Transformer('body/em').apply(unicode.upper, TEXT) \\ +>>> print html | Transformer('body/em').map(unicode.upper, TEXT) \\ ... .unwrap().wrap(tag.u) <html> <head><title>Some Title</title></head> @@ -45,6 +45,7 @@ box, but custom transformations can be added easily. """ +import re import sys from genshi.builder import Element @@ -161,8 +162,8 @@ transforms = link(transforms) return Stream(self._unmark(transforms)) - def __or__(self, function): - """Combine transformations. + def apply(self, function): + """Apply a transformation to the stream. Transformations can be chained, similar to stream filters. Any callable accepting a marked stream can be used as a transform. @@ -176,7 +177,7 @@ ... else: ... yield mark, (kind, data, pos) >>> short_stream = HTML('<body>Some <em>test</em> text</body>') - >>> print short_stream | (Transformer('.//em/text()') | upper) + >>> print short_stream | Transformer('.//em/text()').apply(upper) <body>Some <em>TEST</em> text</body> """ transformer = Transformer() @@ -208,7 +209,7 @@ :return: the stream augmented by transformation marks :rtype: `Transformer` """ - return self | SelectTransformation(path) + return self.apply(SelectTransformation(path)) def invert(self): """Invert selection so that marked events become unmarked, and vice @@ -230,7 +231,7 @@ :rtype: `Transformer` """ - return self | InvertTransformation() + return self.apply(InvertTransformation()) def end(self): """End current selection, allowing all events to be selected. @@ -251,7 +252,7 @@ :return: the stream augmented by transformation marks :rtype: `Transformer` """ - return self | EndTransformation() + return self.apply(EndTransformation()) #{ Deletion operations @@ -268,7 +269,7 @@ :rtype: `Transformer` """ - return self | EmptyTransformation() + return self.apply(EmptyTransformation()) def remove(self): """Remove selection from the stream. @@ -283,7 +284,7 @@ :rtype: `Transformer` """ - return self | RemoveTransformation() + return self.apply(RemoveTransformation()) #{ Direct element operations @@ -300,7 +301,7 @@ :rtype: `Transformer` """ - return self | UnwrapTransformation() + return self.apply(UnwrapTransformation()) def wrap(self, element): """Wrap selection in an element. @@ -314,7 +315,7 @@ :param element: either a tag name (as string) or an `Element` object :rtype: `Transformer` """ - return self | WrapTransformation(element) + return self.apply(WrapTransformation(element)) #{ Content insertion operations @@ -330,7 +331,7 @@ :param content: Either an iterable of events or a string to insert. :rtype: `Transformer` """ - return self | ReplaceTransformation(content) + return self.apply(ReplaceTransformation(content)) def before(self, content): """Insert content before selection. @@ -347,7 +348,7 @@ :param content: Either an iterable of events or a string to insert. :rtype: `Transformer` """ - return self | BeforeTransformation(content) + return self.apply(BeforeTransformation(content)) def after(self, content): """Insert content after selection. @@ -363,7 +364,7 @@ :param content: Either an iterable of events or a string to insert. :rtype: `Transformer` """ - return self | AfterTransformation(content) + return self.apply(AfterTransformation(content)) def prepend(self, content): """Insert content after the ENTER event of the selection. @@ -379,7 +380,7 @@ :param content: Either an iterable of events or a string to insert. :rtype: `Transformer` """ - return self | PrependTransformation(content) + return self.apply(PrependTransformation(content)) def append(self, content): """Insert content before the END event of the selection. @@ -393,7 +394,7 @@ :param content: Either an iterable of events or a string to insert. :rtype: `Transformer` """ - return self | AppendTransformation(content) + return self.apply(AppendTransformation(content)) #{ Attribute manipulation @@ -434,7 +435,7 @@ :param value: the value that should be set for the attribute. :rtype: `Transformer` """ - return self | AttrTransformation(name, value) + return self.apply(AttrTransformation(name, value)) #{ Buffer operations @@ -484,7 +485,7 @@ :rtype: `Transformer` :note: this transformation will buffer the entire input stream """ - return self | CopyTransformation(buffer) + return self.apply(CopyTransformation(buffer)) def cut(self, buffer): """Copy selection into buffer and remove the selection from the stream. @@ -503,17 +504,32 @@ :rtype: `Transformer` :note: this transformation will buffer the entire input stream """ - return self | CutTransformation(buffer) + return self.apply(CutTransformation(buffer)) #{ Miscellaneous operations - def apply(self, function, kind): - """Apply a function to the ``data`` element of events of ``kind`` in + def filter(self, filter): + """Apply a normal stream filter to the selection. The filter is called + once for each contiguous block of marked events. + + >>> from genshi.filters.html import HTMLSanitizer + >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' + ... '</script> and some more text</body></html>') + >>> print html | Transformer('body/*').filter(HTMLSanitizer()) + <html><body>Some text and some more text</body></html> + + :param filter: The stream filter to apply. + :rtype: `Transformer` + """ + return self.apply(FilterTransformation(filter)) + + def map(self, function, kind): + """Applies a function to the ``data`` element of events of ``kind`` in the selection. >>> html = HTML('<html><head><title>Some Title</title></head>' ... '<body>Some <em>body</em> text.</body></html>') - >>> print html | Transformer('head/title').apply(unicode.upper, TEXT) + >>> print html | Transformer('head/title').map(unicode.upper, TEXT) <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> text.</body></html> @@ -521,7 +537,24 @@ :param kind: the kind of event the function should be applied to :rtype: `Transformer` """ - return self | ApplyTransformation(function, kind) + return self.apply(MapTransformation(function, kind)) + + def substitute(self, pattern, replace, count=1): + """Replace text matching a regular expression. + + Refer to the documentation for ``re.sub()`` for details. + + >>> html = HTML('<html><body>Some text, some more text and ' + ... '<b>some bold text</b></body></html>') + >>> print html | Transformer('body').substitute('(?i)some', 'SOME') + <html><body>SOME text, some more text and <b>SOME bold text</b></body></html> + + :param pattern: A regular expression object or string. + :param replace: Replacement pattern. + :param count: Number of replacements to make in each text fragment. + :rtype: `Transformer` + """ + return self.apply(SubstituteTransformation(pattern, replace, count)) def trace(self, prefix='', fileobj=None): """Print events as they pass through the transform. @@ -542,7 +575,7 @@ the standard output stream :rtype: `Transformer` """ - return self | TraceTransformation(prefix, fileobj=fileobj) + return self.apply(TraceTransformation(prefix, fileobj=fileobj)) # Internal methods @@ -731,7 +764,41 @@ yield event -class ApplyTransformation(object): +class FilterTransformation(object): + """Apply a normal stream filter to the selection. The filter is called once + for each contiguous block of marked events.""" + + def __init__(self, filter): + """Create the transform. + + :param filter: The stream filter to apply. + """ + self.filter = filter + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + def flush(queue): + if queue: + for event in self.filter(queue): + yield OUTSIDE, event + del queue[:] + + queue = [] + for mark, event in stream: + if mark: + queue.append(event) + else: + for event in flush(queue): + yield event + yield None, event + for event in flush(queue): + yield event + + +class MapTransformation(object): """Apply a function to the `data` element of events of ``kind`` in the selection. """ @@ -758,6 +825,36 @@ yield mark, (kind, data, pos) +class SubstituteTransformation(object): + """Replace text matching a regular expression. + + Refer to the documentation for ``re.sub()`` for details. + """ + def __init__(self, pattern, replace, count=1): + """Create the transform. + + :param pattern: A regular expression object, or string. + :param replace: Replacement pattern. + :param count: Number of replacements to make in each text fragment. + """ + if isinstance(pattern, basestring): + self.pattern = re.compile(pattern) + else: + self.pattern = pattern + self.count = count + self.replace = replace + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if kind is TEXT: + data = self.pattern.sub(self.replace, data, self.count) + yield mark, (kind, data, pos) + + class InjectorTransformation(object): """Abstract base class for transformations that inject content into a stream. @@ -769,7 +866,7 @@ ... for event in stream: ... yield event >>> html = HTML('<body>Some <em>test</em> text</body>') - >>> print html | (Transformer('.//em') | Top('Prefix ')) + >>> print html | Transformer('.//em').apply(Top('Prefix ')) Prefix <body>Some <em>test</em> text</body> """ def __init__(self, content):