# HG changeset patch # User cmlenz # Date 1180977190 0 # Node ID 3073ac6886512e7010ebc585fdd459c456be9891 # Parent 5b42b341185a447c39db26da05dcac53a0d46c6f Added new markup transformation filter contributed by Alec Thomas (#122). This provides gorgeous jQuery-inspired stream transformation capabilities based on XPath expressions. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ (?, from branches/stable/0.5.x) * Added #include directive for text templates (ticket #115). + * Added new markup transformation filter contributed by Alec Thomas. This + provides gorgeous jQuery-inspired stream transformation capabilities based + on XPath expressions. Version 0.4.2 diff --git a/genshi/builder.py b/genshi/builder.py --- a/genshi/builder.py +++ b/genshi/builder.py @@ -121,7 +121,7 @@ self.children.extend(node.children) elif node is not None: try: - map(self.append, iter(node)) + self.append(Stream(iter(node))) except TypeError: self.children.append(node) diff --git a/genshi/filters/i18n.py b/genshi/filters/i18n.py --- a/genshi/filters/i18n.py +++ b/genshi/filters/i18n.py @@ -12,6 +12,9 @@ from genshi.template.base import Template, EXPR, SUB from genshi.template.markup import EXEC +__all__ = ['Translator'] +__docformat__ = 'restructuredtext en' + _LOAD_NAME = chr(opmap['LOAD_NAME']) _LOAD_CONST = chr(opmap['LOAD_CONST']) _CALL_FUNCTION = chr(opmap['CALL_FUNCTION']) diff --git a/genshi/filters/tests/__init__.py b/genshi/filters/tests/__init__.py --- a/genshi/filters/tests/__init__.py +++ b/genshi/filters/tests/__init__.py @@ -15,10 +15,11 @@ import unittest def suite(): - from genshi.filters.tests import html, i18n + from genshi.filters.tests import html, i18n, transform suite = unittest.TestSuite() suite.addTest(html.suite()) suite.addTest(i18n.suite()) + suite.addTest(transform.suite()) return suite if __name__ == '__main__': diff --git a/genshi/filters/tests/transform.py b/genshi/filters/tests/transform.py new file mode 100644 --- /dev/null +++ b/genshi/filters/tests/transform.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +import doctest +import unittest + +import genshi.filters.transform + + +def suite(): + from genshi.input import HTML + suite = doctest.DocTestSuite(genshi.filters.transform, + optionflags=doctest.NORMALIZE_WHITESPACE, + extraglobs={'HTML': HTML}) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/genshi/filters/transform.py b/genshi/filters/transform.py new file mode 100644 --- /dev/null +++ b/genshi/filters/transform.py @@ -0,0 +1,814 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +"""A filter for generalised functional-style transformations of markup streams, +inspired by JQuery. +""" + +import sys + +from genshi.path import Path +from genshi.builder import Element +from genshi.core import Stream, Attrs, QName, TEXT, START, END + +__all__ = ['Transformer', 'Injector', 'BEGIN', 'FINISH', 'INSIDE', 'OUTSIDE'] + + +class TransformMark(str): + """A mark on a transformation stream.""" + __slots__ = [] + _instances = {} + + def __new__(cls, val): + return cls._instances.setdefault(val, str.__new__(cls, val)) + + +BEGIN = TransformMark('BEGIN') +INSIDE = TransformMark('INSIDE') +OUTSIDE = TransformMark('OUTSIDE') +FINISH = TransformMark('FINISH') + + +class Transformer(object): + """Stream filter that can apply a variety of different transformations to + a stream. + + This is achieved by selecting the events to be transformed using XPath, + then applying the transformations to the events matched by the path + expression. Each marked event is in the form (mark, (kind, data, pos)), + where mark can be any of `BEGIN`, `FINISH`, `INSIDE`, `OUTSIDE` or None. + + The first three marks match `START` and `END` events, and any events + contained `INSIDE` any selected XML/HTML element. A non-element match + outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` + mark. + + >>> stream = HTML('Some Title' + ... 'Some body text.') + >>> short_stream = HTML('Some test text') + + Transformations act on selected stream events matching an XPath. Here's an + example of removing some markup (title) selected by an expression: + + >>> print stream | Transformer('.//title').remove() + Some body text. + + Inserted content can be passed in the form of a string, or a Genshi event + Stream, which includes ``genshi.builder.tag``: + + >>> from genshi.builder import tag + >>> print stream | Transformer('.//body').prepend(tag.h1('Document Title')) + Some Title

Document + Title

Some body text. + + Each XPath expression determines the set of tags that will be acted upon by + subsequent transformations. In this example we select the text, copy + it into a buffer, then select the <body> element and paste the copied text + into the body as <h1> enclosed text: + + >>> buffer = [] + >>> print stream | Transformer('.//title/text()').copy(buffer) \ + .select('.//body').prepend(tag.h1(buffer)) + <html><head><title>Some Title

Some Title

Some + body text. + + Transformations can also be assigned and reused, although care must be + taken when using buffers, to ensure that buffers are cleared between + transforms: + + >>> emphasis = Transformer('.//em').setattr('class', 'emphasis') + >>> print stream | emphasis + Some TitleSome body text. + >>> print HTML('Emphasis') | emphasis + Emphasis + """ + + __slots__ = ('transforms',) + + def __init__(self, path=None): + """Construct a new transformation filter. + + :param path: the XPath expression + """ + self.transforms = [] + if path: + self.transforms.append(Select(path)) + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + :return: the transformed stream + :rtype: `Stream` + """ + transforms = self._mark(stream) + for link in self.transforms: + transforms = link(transforms) + return Stream(self._unmark(transforms)) + + def __or__(self, function): + """Combine transformations. + + Transformations can be chained, similar to stream filters. Any callable + accepting a marked stream can be used as a transform. + + As an example, here is a simple `TEXT` event upper-casing transform: + + >>> def upper(stream): + ... for mark, (kind, data, pos) in stream: + ... if mark and kind is TEXT: + ... yield mark, (kind, data.upper(), pos) + ... else: + ... yield mark, (kind, data, pos) + >>> short_stream = HTML('Some test text') + >>> print short_stream | (Transformer('.//em/text()') | upper) + Some TEST text + """ + transform = Transformer() + transform.transforms = self.transforms[:] + if isinstance(function, Transformer): + transform.transforms.extend(function.transforms) + else: + transform.transforms.append(function) + return transform + + #{ Selection operations + + def select(self, path): + """Mark events matching the given XPath expression. + + >>> html = HTML('Some test text') + >>> print html | Transformer().select('.//em').trace() + (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) + (None, ('TEXT', u'Some ', (None, 1, 6))) + ('BEGIN', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) + ('INSIDE', ('TEXT', u'test', (None, 1, 15))) + ('FINISH', ('END', QName(u'em'), (None, 1, 19))) + (None, ('TEXT', u' text', (None, 1, 24))) + (None, ('END', QName(u'body'), (None, 1, 29))) + Some test text + + :return: the stream augmented by transformation marks + :rtype: `Transformer` + """ + return self | Select(path) + + def invert(self): + """Invert selection so that marked events become unmarked, and vice + versa. + + Specificaly, all marks are converted to null marks, and all null marks + are converted to OUTSIDE marks. + + >>> html = HTML('Some test text') + >>> print html | Transformer('//em').invert().trace() + ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0))) + ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) + (None, ('START', (QName(u'em'), Attrs()), (None, 1, 11))) + (None, ('TEXT', u'test', (None, 1, 15))) + (None, ('END', QName(u'em'), (None, 1, 19))) + ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) + ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29))) + Some test text + + :rtype: `Transformer` + """ + return self | invert + + #{ Deletion operations + + def empty(self): + """Empty selected elements of all content. + + Example: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').empty() + Some TitleSome + text. + + :rtype: `Transformer` + """ + return self | empty + + def remove(self): + """Remove selection from the stream. + + Example: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').remove() + Some TitleSome + text. + + :rtype: `Transformer` + """ + return self | remove + + #{ Direct element operations + + def unwrap(self): + """Remove outtermost enclosing elements from selection. + + Example: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').unwrap() + Some TitleSome body + text. + + :rtype: `Transformer` + """ + return self | unwrap + + + def wrap(self, element): + """Wrap selection in an element. + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').wrap('strong') + Some TitleSome + body text. + + :param element: Either a string tag name or a Genshi builder element. + :rtype: `Transformer` + """ + return self | Wrap(element) + + #{ Content insertion operations + + def replace(self, content): + """Replace selection with content. + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//title/text()').replace('New Title') + New TitleSome body + text. + + :param content: Either an iterable of events or a string to insert. + :rtype: `Transformer` + """ + return self | Replace(content) + + def before(self, content): + """Insert content before selection. + + In this example we insert the word 'emphasised' before the opening + tag: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').before('emphasised ') + Some TitleSome emphasised + body text. + + :param content: Either an iterable of events or a string to insert. + :rtype: `Transformer` + """ + return self | Before(content) + + def after(self, content): + """Insert content after selection. + + Here, we insert some text after the closing tag: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').after(' rock') + Some TitleSome body + rock text. + + :param content: Either an iterable of events or a string to insert. + :rtype: `Transformer` + """ + return self | After(content) + + def prepend(self, content): + """Insert content after the BEGIN event of the selection. + + Inserting some new text at the start of the : + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//body').prepend('Some new body text. ') + Some TitleSome new body text. + Some body text. + + :param content: Either an iterable of events or a string to insert. + :rtype: `Transformer` + """ + return self | Prepend(content) + + def append(self, content): + """Insert content before the END event of the selection. + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//body').append(' Some new body text.') + Some TitleSome body + text. Some new body text. + + :param content: Either an iterable of events or a string to insert. + :rtype: `Transformer` + """ + return self | Append(content) + + #{ Attribute manipulation + + def setattr(self, key, value): + """Add or replace an attribute to selected elements. + + Example: + + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em').setattr('class', 'emphasis') + Some TitleSome body text. + + :rtype: `Transformer` + """ + return self | SetAttr(key, value) + + def delattr(self, key): + """Delete an attribute from selected elements. + + Example: + + >>> html = HTML('Some Title' + ... 'Some body ' + ... 'text.') + >>> print html | Transformer('.//*[@class="emphasis"]').delattr('class') + Some TitleSome body + text. + + :rtype: `Transformer` + """ + return self | DelAttr(key) + + #{ Buffer operations + + def copy(self, buffer): + """Copy selection into buffer. + + >>> from genshi.builder import tag + >>> buffer = [] + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//title/text()').copy(buffer) \\ + ... .select('.//body').prepend(tag.h1(buffer)) + Some Title

Some Title

Some + body text. + + :param buffer: a list-like object (must support .append() and be + iterable) where the selection will be buffered. + :rtype: `Transformer` + :note: this transformation will buffer the entire input stream + """ + return self | Copy(buffer) + + def cut(self, buffer): + """Copy selection into buffer and remove the selection from the stream. + + >>> from genshi.builder import tag + >>> buffer = [] + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('.//em/text()').cut(buffer) \\ + ... .select('.//em').after(tag.h1(buffer)) + Some TitleSome +

body

text. + + :param buffer: a list-like object (must support .append() and be + iterable) where the selection will be buffered. + :rtype: `Transformer` + :note: this transformation will buffer the entire input stream + """ + return self | Cut(buffer) + + #{ Miscellaneous operations + + def apply(self, function, kind): + """Apply a function to the ``data`` element of events of ``kind`` in + the selection. + + >>> import string + >>> html = HTML('Some Title' + ... 'Some body text.') + >>> print html | Transformer('head/title').apply(string.upper, TEXT) + SOME TITLESome body + text. + + :param function: the function to apply + :param kind: the kind of event the function should be applied to + :rtype: `Transformer` + """ + return self | Apply(function, kind) + + def trace(self, prefix='', fileobj=None): + """Print events as they pass through the transform. + + >>> html = HTML('Some test text') + >>> print html | Transformer('em').trace() + (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) + (None, ('TEXT', u'Some ', (None, 1, 6))) + ('BEGIN', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) + ('INSIDE', ('TEXT', u'test', (None, 1, 15))) + ('FINISH', ('END', QName(u'em'), (None, 1, 19))) + (None, ('TEXT', u' text', (None, 1, 24))) + (None, ('END', QName(u'body'), (None, 1, 29))) + Some test text + + :param prefix: a string to prefix each event with in the output + :param fileobj: the writable file-like object to write to; defaults to + the standard output stream + :rtype: `Transformer` + """ + return self | Trace(prefix, fileobj=fileobj) + + # Internal methods + + def _mark(self, stream): + for event in stream: + yield None, event + + def _unmark(self, stream): + for mark, event in stream: + yield event + + +class Select(object): + """Select and mark events that match an XPath expression.""" + def __init__(self, path): + """Create selection. + + :param path: XPath expression. + """ + self.path = Path(path) + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + namespaces = {} + variables = {} + test = self.path.test() + stream = iter(stream) + for mark, event in stream: + result = test(event, {}, {}) + if result is True: + if event[0] is START: + yield BEGIN, event + depth = 1 + while depth > 0: + mark, subevent = stream.next() + if subevent[0] is START: + depth += 1 + elif subevent[0] is END: + depth -= 1 + if depth == 0: + yield FINISH, subevent + else: + yield INSIDE, subevent + test(subevent, {}, {}, updateonly=True) + else: + yield OUTSIDE, event + elif result: + yield BEGIN, result + else: + yield None, event + + +def invert(stream): + """Invert selection so that marked events become unmarked, and vice versa. + + Specificaly, all input marks are converted to null marks, and all input + null marks are converted to OUTSIDE marks. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark: + yield None, event + else: + yield OUTSIDE, event + +def empty(stream): + """Empty selected elements of all content. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark not in (INSIDE, OUTSIDE): + yield mark, event + +def remove(stream): + """Remove selection from the stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark is None: + yield mark, event + +def unwrap(stream): + """Remove outtermost enclosing elements from selection. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark not in (BEGIN, FINISH): + yield mark, event + + +class Wrap(object): + """Wrap selection in an element.""" + + def __init__(self, element): + if isinstance(element, Element): + self.element = element + else: + self.element = Element(element) + + def __call__(self, stream): + for mark, event in stream: + if mark: + element = list(self.element.generate()) + for prefix in element[:-1]: + yield None, prefix + yield mark, event + while True: + mark, event = stream.next() + if not mark: + break + yield mark, event + yield None, element[-1] + yield mark, event + else: + yield mark, event + + +class Trace(object): + """Print events as they pass through the transform.""" + + def __init__(self, prefix='', fileobj=None): + """Trace constructor. + + :param prefix: text to prefix each traced line with. + :param fileobj: the writable file-like object to write to + """ + self.prefix = prefix + self.fileobj = fileobj or sys.stdout + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for event in stream: + print>>self.fileobj, self.prefix + str(event) + yield event + + +class Apply(object): + """Apply a function to the `data` element of events of ``kind`` in the + selection. + """ + + def __init__(self, function, kind): + """Create the transform. + + :param function: The function to apply. The function must take one + argument, the `data` element of each selected event. + :param kind: The Genshi event `kind` to apply ``function`` to. + """ + self.function = function + self.kind = kind + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark and kind == self.kind: + yield mark, (kind, self.function(data), pos) + else: + yield mark, (kind, data, pos) + + +class Injector(object): + """Abstract base class for transformations that inject content into a + stream. + + >>> class Top(Injector): + ... def __call__(self, stream): + ... for event in self._inject(): + ... yield event + ... for event in stream: + ... yield event + >>> html = HTML('Some test text') + >>> print html | (Transformer('.//em') | Top('Prefix ')) + Prefix Some test text + """ + def __init__(self, content): + """Create a new injector. + + :param content: An iterable of Genshi stream events, or a string to be + injected. + """ + self.content = content + + def _inject(self): + if isinstance(self.content, basestring): + yield None, (TEXT, self.content, (None, -1, -1)) + else: + for event in self.content: + yield None, event + + +class Replace(Injector): + """Replace selection with content.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark is not None: + for subevent in self._inject(): + yield subevent + while True: + mark, event = stream.next() + if mark is None: + yield mark, event + break + else: + yield mark, event + + +class Before(Injector): + """Insert content before selection.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + if mark in (BEGIN, OUTSIDE): + for subevent in self._inject(): + yield subevent + yield mark, event + + +class After(Injector): + """Insert content after selection.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark: + while True: + mark, event = stream.next() + if not mark: + break + yield mark, event + for subevent in self._inject(): + yield subevent + yield mark, event + + +class Prepend(Injector): + """Prepend content to the inside of selected elements.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark in (BEGIN, OUTSIDE): + for subevent in self._inject(): + yield subevent + + +class Append(Injector): + """Append content after the content of selected elements.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, event in stream: + yield mark, event + if mark is BEGIN: + while True: + mark, event = stream.next() + if mark is FINISH: + break + yield mark, event + for subevent in self._inject(): + yield subevent + yield mark, event + + +class SetAttr(object): + """Set an attribute on selected elements.""" + def __init__(self, key, value): + """Construct transform. + + :param key: Attribute to set. + :param value: Value of attribute. + """ + self.key = key + self.value = value + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark is BEGIN: + data = (data[0], data[1] | [(QName(self.key), self.value)]) + yield mark, (kind, data, pos) + + +class DelAttr(object): + """Delete an attribute of selected elements.""" + def __init__(self, key): + """Construct transform. + + :param key: The attribute to remove.""" + self.key = key + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + for mark, (kind, data, pos) in stream: + if mark is BEGIN: + data = (data[0], data[1] - self.key) + yield mark, (kind, data, pos) + + +class Copy(object): + """Copy selected events into a buffer for later insertion.""" + def __init__(self, buffer): + """Create a Copy transform filter. + + :param buffer: A list-like object (must support .append() and be + iterable) where the buffered events will be stored. + """ + self.buffer = buffer + + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + stream = list(stream) + for mark, event in stream: + if mark: + self.buffer.append(event) + return stream + + +class Cut(Copy): + """Cut selected events into a buffer for later insertion and remove the + selection.""" + def __call__(self, stream): + """Apply the transform filter to the marked stream. + + :param stream: The marked event stream to filter + """ + stream = Copy.__call__(self, stream) + return remove(stream) + + +if __name__ == '__main__': + import doctest + from genshi.input import HTML + doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE, + extraglobs={'HTML': HTML})