Mercurial > genshi > mirror
changeset 438:2c38ec4e2dff trunk
Added documentation page on the builtin stream filters.
author | cmlenz |
---|---|
date | Mon, 02 Apr 2007 18:21:03 +0000 |
parents | 821fc97d3c0a |
children | 9f11c745fac9 |
files | MANIFEST.in doc/epydoc.conf doc/filters.txt doc/index.txt doc/streams.txt doc/style/apidoc.css doc/style/edgewall.css doc/style/epydoc.css genshi/core.py genshi/template/base.py genshi/template/eval.py |
diffstat | 10 files changed, 209 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,4 +2,5 @@ exclude doc/docutils.conf recursive-exclude doc/logo.lineform * exclude doc/Makefile +include doc/api/*.* include doc/*.html
--- a/doc/epydoc.conf +++ b/doc/epydoc.conf @@ -18,7 +18,7 @@ # HTML output output: html target: doc/api/ -css: doc/style/apidoc.css +css: doc/style/epydoc.css top: genshi frames: no sourcecode: no
new file mode 100644 --- /dev/null +++ b/doc/filters.txt @@ -0,0 +1,132 @@ +.. -*- mode: rst; encoding: utf-8 -*- + +============== +Stream Filters +============== + +`Markup Streams`_ showed how to write filters and how they are applied to +markup streams. This page describes the features of the various filters that +come with Genshi itself. + +.. _`Markup Streams`: streams.html + +.. contents:: Contents + :depth: 1 +.. sectnum:: + + +HTML Form Filler +================ + +The filter ``genshi.filters.HTMLFormFiller`` can automatically populate an HTML +form from values provided as a simple dictionary. When using thi filter, you can +basically omit any ``value``, ``selected``, or ``checked`` attributes from form +controls in your templates, and let the filter do all that work for you. + +``HTMLFormFiller`` takes a dictionary of data to populate the form with, where +the keys should match the names of form elements, and the values determine the +values of those controls. For example:: + + >>> from genshi.filters import HTMLFormFiller + >>> from genshi.template import MarkupTemplate + >>> template = MarkupTemplate("""<form> + ... <p> + ... <label>User name: + ... <input type="text" name="username" /> + ... </label><br /> + ... <label>Password: + ... <input type="password" name="password" /> + ... </label><br /> + ... <label> + ... <input type="checkbox" name="remember" /> Remember me + ... </label> + ... </p> + ... </form>""") + >>> filler = HTMLFormFiller(data=dict(username='john', remember=True)) + >>> print template.generate() | filler + <form> + <p> + <label>User name: + <input type="text" name="username" value="john"/> + </label><br/> + <label>Password: + <input type="password" name="password"/> + </label><br/> + <label> + <input type="checkbox" name="remember" checked="checked"/> Remember me + </label> + </p> + </form> + +.. note:: This processing is done without in any way reparsing the template + output. As any stream filter it operates after the template output is + generated but *before* that output is actually serialized. + +The filter will of course also handle radio buttons as well as ``<select>`` and +``<textarea>`` elements. For radio buttons to be marked as checked, the value in +the data dictionary needs to match the ``value`` attribute of the ``<input>`` +element, or evaluate to a truth value if the element has no such attribute. For +options in a ``<select>`` box to be marked as selected, the value in the data +dictionary needs to match the ``value`` attribute of the ``<option>`` element, +or the text content of the option if it has no ``value`` attribute. Password and +file input fields are not populated, as most browsers would ignore that anyway +for security reasons. + +You'll want to make sure that the values in the data dictionary have already +been converted to strings. While the filter may be able to deal with non-string +data in some cases (such as check boxes), in most cases it will either not +attempt any conversion or not produce the desired results. + +You can restrict the form filler to operate only on a specific ``<form>`` by +passing either the ``id`` or the ``name`` keyword argument to the initializer. +If either of those is specified, the filter will only apply to form tags with +an attribute matching the specified value. + + +HTML Sanitizer +============== + +The filter ``genshi.filters.HTMLSanitizer`` filter can be used to clean up +user-submitted HTML markup, removing potentially dangerous constructs that could +be used for various kinds of abuse, such as cross-site scripting (XSS) attacks:: + + >>> from genshi.filters import HTMLSanitizer + >>> from genshi.input import HTML + >>> html = HTML("""<div> + ... <p>Innocent looking text.</p> + ... <script>alert("Danger: " + document.cookie)</script> + ... </div>""") + >>> sanitize = HTMLSanitizer() + >>> print html | sanitize + <div> + <p>Innocent looking text.</p> + </div> + +In this example, the ``<script>`` tag was removed from the output. + +You can determine which tags and attributes should be allowed by initializing +the filter with corresponding sets. See the API documentation for more +information. + +Inline ``style`` attributes are forbidden by default. If you allow them, the +filter will still perform sanitization on the contents any encountered inline +styles: the proprietary ``expression()`` function (supported only by Internet +Explorer) is removed, and any property using an ``url()`` which a potentially +dangerous URL scheme (such as ``javascript:``) are also stripped out:: + + >>> from genshi.filters import HTMLSanitizer + >>> from genshi.input import HTML + >>> html = HTML("""<div> + ... <br style="background: url(javascript:alert(document.cookie); color: #000" /> + ... </div>""") + >>> sanitize = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) + >>> print html | sanitize + <div> + <br style="color: #000"/> + </div> + +.. warning:: You should probably not rely on the ``style`` filtering, as + sanitizing mixed HTML, CSS, and Javascript is very complicated and + suspect to various browser bugs. If you can somehow get away with + not allowing inline styles in user-submitted content, that would + definitely be the safer route to follow.
--- a/doc/index.txt +++ b/doc/index.txt @@ -23,5 +23,6 @@ * `Markup Streams <streams.html>`_ * `Genshi XML Template Language <xml-templates.html>`_ * `Genshi Text Template Language <text-templates.html>`_ +* `Using Stream Filters <filters.html>`_ * `Using XPath in Genshi <xpath.html>`_ * `Generated API Documentation <api/index.html>`_
--- a/doc/streams.txt +++ b/doc/streams.txt @@ -18,9 +18,8 @@ A stream can be attained in a number of ways. It can be: * the result of parsing XML or HTML text, or -* programmatically generated, or -* the result of selecting a subset of another stream filtered by an XPath - expression. +* the result of selecting a subset of another stream using XPath, or +* programmatically generated. For example, the functions ``XML()`` and ``HTML()`` can be used to convert literal XML or HTML text to a markup stream:: @@ -91,7 +90,9 @@ ``genshi.filters``. It processes a stream of HTML markup, and strips out any potentially dangerous constructs, such as Javascript event handlers. ``HTMLSanitizer`` is not a function, but rather a class that implements -``__call__``, which means instances of the class are callable. +``__call__``, which means instances of the class are callable:: + + stream = stream | HTMLSanitizer() Both the ``filter()`` method and the pipe operator allow easy chaining of filters:: @@ -103,15 +104,22 @@ stream = stream | noop | HTMLSanitizer() +For more information about the built-in filters, see `Stream Filters`_. + +.. _`Stream Filters`: filters.html + Serialization ============= -The ``Stream`` class provides two methods for serializing this list of events: -``serialize()`` and ``render()``. The former is a generator that yields chunks -of ``Markup`` objects (which are basically unicode strings that are considered -safe for output on the web). The latter returns a single string, by default -UTF-8 encoded. +Serialization means producing some kind of textual output from a stream of +events, which you'll need when you want to transmit or store the results of +generating or otherwise processing markup. + +The ``Stream`` class provides two methods for serialization: ``serialize()`` and +``render()``. The former is a generator that yields chunks of ``Markup`` objects +(which are basically unicode strings that are considered safe for output on the +web). The latter returns a single string, by default UTF-8 encoded. Here's the output from ``serialize()``:: @@ -159,6 +167,35 @@ Some text and a link. +Serialization Options +--------------------- + +Both ``serialize()`` and ``render()`` support additional keyword arguments that +are passed through to the initializer of the serializer class. The following +options are supported by the built-in serializers: + +``strip_whitespace`` + Whether the serializer should remove trailing spaces and empty lines. Defaults + to ``True``. + + (This option is not available for serialization to plain text.) + +``doctype`` + A ``(name, pubid, sysid)`` tuple defining the name, publid identifier, and + system identifier of a ``DOCTYPE`` declaration to prepend to the generated + output. If provided, this declaration will override any ``DOCTYPE`` + declaration in the stream. + + (This option is not available for serialization to plain text.) + +``namespace_prefixes`` + The namespace prefixes to use for namespace that are not bound to a prefix + in the stream itself. + + (This option is not available for serialization to HTML or plain text.) + + + Using XPath ===========
--- a/doc/style/edgewall.css +++ b/doc/style/edgewall.css @@ -54,4 +54,6 @@ } p.admonition-title { font-weight: bold; margin-bottom: 0; } -div.note { font-style: italic; margin-left: 2em; margin-right: 2em; } +div.note, div.warning { font-style: italic; margin-left: 2em; + margin-right: 2em; +}
--- a/genshi/core.py +++ b/genshi/core.py @@ -144,6 +144,9 @@ Any additional keyword arguments are passed to the serializer, and thus depend on the `method` parameter value. + + :see: XMLSerializer.__init__, XHTMLSerializer.__init__, + HTMLSerializer.__init__, TextSerializer.__init__ """ generator = self.serialize(method=method, **kwargs) output = u''.join(list(generator)) @@ -179,9 +182,12 @@ :param method: determines how the stream is serialized; can be either "xml", "xhtml", "html", "text", or a custom serializer class - + Any additional keyword arguments are passed to the serializer, and thus depend on the `method` parameter value. + + :see: XMLSerializer.__init__, XHTMLSerializer.__init__, + HTMLSerializer.__init__, TextSerializer.__init__ """ from genshi import output cls = method
--- a/genshi/template/base.py +++ b/genshi/template/base.py @@ -33,14 +33,8 @@ class TemplateError(Exception): """Base exception class for errors related to template processing.""" - -class TemplateRuntimeError(TemplateError): - """Exception raised when an the evaluation of a Python expression in a - template causes an error. - """ - def __init__(self, message, filename='<string>', lineno=-1, offset=-1): - """Create the exception + """Create the exception. :param message: the error message :param filename: the filename of the template @@ -48,18 +42,19 @@ occurred :param offset: the column number at which the error occurred """ - self.msg = message + self.msg = message #: the error message string if filename != '<string>' or lineno >= 0: message = '%s (%s, line %d)' % (self.msg, filename, lineno) - TemplateError.__init__(self, message) - self.filename = filename - self.lineno = lineno - self.offset = offset + Exception.__init__(self, message) + self.filename = filename #: the name of the template file + self.lineno = lineno #: the number of the line containing the error + self.offset = offset #: the offset on the line class TemplateSyntaxError(TemplateError): """Exception raised when an expression in a template causes a Python syntax - error.""" + error, or the template is not well-formed. + """ def __init__(self, message, filename='<string>', lineno=-1, offset=-1): """Create the exception @@ -72,12 +67,7 @@ """ if isinstance(message, SyntaxError) and message.lineno is not None: message = str(message).replace(' (line %d)' % message.lineno, '') - self.msg = message - message = '%s (%s, line %d)' % (self.msg, filename, lineno) - TemplateError.__init__(self, message) - self.filename = filename - self.lineno = lineno - self.offset = offset + TemplateError.__init__(self, message, filename, lineno) class BadDirectiveError(TemplateSyntaxError): @@ -96,8 +86,14 @@ :param lineno: the number of line in the template at which the error occurred """ - message = 'bad directive "%s"' % name - TemplateSyntaxError.__init__(self, message, filename, lineno) + TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name, + filename, lineno) + + +class TemplateRuntimeError(TemplateError): + """Exception raised when an the evaluation of a Python expression in a + template causes an error. + """ class Context(object):