changeset 438:2c38ec4e2dff trunk

Added documentation page on the builtin stream filters.
author cmlenz
date Mon, 02 Apr 2007 18:21:03 +0000
parents 821fc97d3c0a
children 9f11c745fac9
files MANIFEST.in doc/epydoc.conf doc/filters.txt doc/index.txt doc/streams.txt doc/style/apidoc.css doc/style/edgewall.css doc/style/epydoc.css genshi/core.py genshi/template/base.py genshi/template/eval.py
diffstat 10 files changed, 209 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,4 +2,5 @@
 exclude doc/docutils.conf
 recursive-exclude doc/logo.lineform *
 exclude doc/Makefile
+include doc/api/*.*
 include doc/*.html
--- a/doc/epydoc.conf
+++ b/doc/epydoc.conf
@@ -18,7 +18,7 @@
 # HTML output
 output: html
 target: doc/api/
-css: doc/style/apidoc.css
+css: doc/style/epydoc.css
 top: genshi
 frames: no
 sourcecode: no
new file mode 100644
--- /dev/null
+++ b/doc/filters.txt
@@ -0,0 +1,132 @@
+.. -*- mode: rst; encoding: utf-8 -*-
+
+==============
+Stream Filters
+==============
+
+`Markup Streams`_ showed how to write filters and how they are applied to
+markup streams. This page describes the features of the various filters that
+come with Genshi itself.
+
+.. _`Markup Streams`: streams.html
+
+.. contents:: Contents
+   :depth: 1
+.. sectnum::
+
+
+HTML Form Filler
+================
+
+The filter ``genshi.filters.HTMLFormFiller`` can automatically populate an HTML
+form from values provided as a simple dictionary. When using thi filter, you can
+basically omit any ``value``, ``selected``, or ``checked`` attributes from form
+controls in your templates, and let the filter do all that work for you.
+
+``HTMLFormFiller`` takes a dictionary of data to populate the form with, where
+the keys should match the names of form elements, and the values determine the
+values of those controls. For example::
+
+  >>> from genshi.filters import HTMLFormFiller
+  >>> from genshi.template import MarkupTemplate
+  >>> template = MarkupTemplate("""<form>
+  ...   <p>
+  ...     <label>User name:
+  ...       <input type="text" name="username" />
+  ...     </label><br />
+  ...     <label>Password:
+  ...       <input type="password" name="password" />
+  ...     </label><br />
+  ...     <label>
+  ...       <input type="checkbox" name="remember" /> Remember me
+  ...     </label>
+  ...   </p>
+  ... </form>""")
+  >>> filler = HTMLFormFiller(data=dict(username='john', remember=True))
+  >>> print template.generate() | filler
+  <form>
+    <p>
+      <label>User name:
+        <input type="text" name="username" value="john"/>
+      </label><br/>
+      <label>Password:
+        <input type="password" name="password"/>
+      </label><br/>
+      <label>
+        <input type="checkbox" name="remember" checked="checked"/> Remember me
+      </label>
+    </p>
+  </form>
+
+.. note:: This processing is done without in any way reparsing the template
+          output. As any stream filter it operates after the template output is
+          generated but *before* that output is actually serialized.
+
+The filter will of course also handle radio buttons as well as ``<select>`` and
+``<textarea>`` elements. For radio buttons to be marked as checked, the value in
+the data dictionary needs to match the ``value`` attribute of the ``<input>``
+element, or evaluate to a truth value if the element has no such attribute. For
+options in a ``<select>`` box to be marked as selected, the value in the data
+dictionary needs to match the ``value`` attribute of the ``<option>`` element,
+or the text content of the option if it has no ``value`` attribute. Password and
+file input fields are not populated, as most browsers would ignore that anyway
+for security reasons.
+
+You'll want to make sure that the values in the data dictionary have already
+been converted to strings. While the filter may be able to deal with non-string
+data in some cases (such as check boxes), in most cases it will either not
+attempt any conversion or not produce the desired results.
+
+You can restrict the form filler to operate only on a specific ``<form>`` by
+passing either the ``id`` or the ``name`` keyword argument to the initializer.
+If either of those is specified, the filter will only apply to form tags with
+an attribute matching the specified value.
+
+
+HTML Sanitizer
+==============
+
+The filter ``genshi.filters.HTMLSanitizer`` filter can be used to clean up
+user-submitted HTML markup, removing potentially dangerous constructs that could
+be used for various kinds of abuse, such as cross-site scripting (XSS) attacks::
+
+  >>> from genshi.filters import HTMLSanitizer
+  >>> from genshi.input import HTML
+  >>> html = HTML("""<div>
+  ...   <p>Innocent looking text.</p>
+  ...   <script>alert("Danger: " + document.cookie)</script>
+  ... </div>""")
+  >>> sanitize = HTMLSanitizer()
+  >>> print html | sanitize
+  <div>
+    <p>Innocent looking text.</p>
+  </div>
+
+In this example, the ``<script>`` tag was removed from the output.
+
+You can determine which tags and attributes should be allowed by initializing
+the filter with corresponding sets. See the API documentation for more
+information.
+
+Inline ``style`` attributes are forbidden by default. If you allow them, the
+filter will still perform sanitization on the contents any encountered inline
+styles: the proprietary ``expression()`` function (supported only by Internet
+Explorer) is removed, and any property using an ``url()`` which a potentially
+dangerous URL scheme (such as ``javascript:``) are also stripped out::
+
+  >>> from genshi.filters import HTMLSanitizer
+  >>> from genshi.input import HTML
+  >>> html = HTML("""<div>
+  ...   <br style="background: url(javascript:alert(document.cookie); color: #000" />
+  ... </div>""")
+  >>> sanitize = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+  >>> print html | sanitize
+  <div>
+    <br style="color: #000"/>
+  </div>
+
+.. warning:: You should probably not rely on the ``style`` filtering, as
+             sanitizing mixed HTML, CSS, and Javascript is very complicated and
+             suspect to various browser bugs. If you can somehow get away with
+             not allowing inline styles in user-submitted content, that would
+             definitely be the safer route to follow.
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -23,5 +23,6 @@
 * `Markup Streams <streams.html>`_
 * `Genshi XML Template Language <xml-templates.html>`_
 * `Genshi Text Template Language <text-templates.html>`_
+* `Using Stream Filters <filters.html>`_
 * `Using XPath in Genshi <xpath.html>`_
 * `Generated API Documentation <api/index.html>`_
--- a/doc/streams.txt
+++ b/doc/streams.txt
@@ -18,9 +18,8 @@
 A stream can be attained in a number of ways. It can be:
 
 * the result of parsing XML or HTML text, or
-* programmatically generated, or
-* the result of selecting a subset of another stream filtered by an XPath
-  expression.
+* the result of selecting a subset of another stream using XPath, or
+* programmatically generated.
 
 For example, the functions ``XML()`` and ``HTML()`` can be used to convert
 literal XML or HTML text to a markup stream::
@@ -91,7 +90,9 @@
 ``genshi.filters``. It processes a stream of HTML markup, and strips out any
 potentially dangerous constructs, such as Javascript event handlers.
 ``HTMLSanitizer`` is not a function, but rather a class that implements
-``__call__``, which means instances of the class are callable.
+``__call__``, which means instances of the class are callable::
+
+  stream = stream | HTMLSanitizer()
 
 Both the ``filter()`` method and the pipe operator allow easy chaining of
 filters::
@@ -103,15 +104,22 @@
 
   stream = stream | noop | HTMLSanitizer()
 
+For more information about the built-in filters, see `Stream Filters`_.
+
+.. _`Stream Filters`: filters.html
+
 
 Serialization
 =============
 
-The ``Stream`` class provides two methods for serializing this list of events:
-``serialize()`` and ``render()``. The former is a generator that yields chunks
-of ``Markup`` objects (which are basically unicode strings that are considered
-safe for output on the web). The latter returns a single string, by default
-UTF-8 encoded.
+Serialization means producing some kind of textual output from a stream of
+events, which you'll need when you want to transmit or store the results of
+generating or otherwise processing markup.
+
+The ``Stream`` class provides two methods for serialization: ``serialize()`` and
+``render()``. The former is a generator that yields chunks of ``Markup`` objects
+(which are basically unicode strings that are considered safe for output on the
+web). The latter returns a single string, by default UTF-8 encoded.
 
 Here's the output from ``serialize()``::
 
@@ -159,6 +167,35 @@
   Some text and a link.
 
 
+Serialization Options
+---------------------
+
+Both ``serialize()`` and ``render()`` support additional keyword arguments that
+are passed through to the initializer of the serializer class. The following
+options are supported by the built-in serializers:
+
+``strip_whitespace``
+  Whether the serializer should remove trailing spaces and empty lines. Defaults
+  to ``True``.
+
+  (This option is not available for serialization to plain text.)
+
+``doctype``
+  A ``(name, pubid, sysid)`` tuple defining the name, publid identifier, and
+  system identifier of a ``DOCTYPE`` declaration to prepend to the generated
+  output. If provided, this declaration will override any ``DOCTYPE``
+  declaration in the stream.
+
+  (This option is not available for serialization to plain text.)
+
+``namespace_prefixes``
+  The namespace prefixes to use for namespace that are not bound to a prefix
+  in the stream itself.
+
+  (This option is not available for serialization to HTML or plain text.)
+
+
+
 Using XPath
 ===========
 
--- a/doc/style/edgewall.css
+++ b/doc/style/edgewall.css
@@ -54,4 +54,6 @@
 }
 
 p.admonition-title { font-weight: bold; margin-bottom: 0; }
-div.note { font-style: italic; margin-left: 2em; margin-right: 2em; }
+div.note, div.warning { font-style: italic; margin-left: 2em;
+  margin-right: 2em;
+}
rename from doc/style/apidoc.css
rename to doc/style/epydoc.css
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -144,6 +144,9 @@
 
         Any additional keyword arguments are passed to the serializer, and thus
         depend on the `method` parameter value.
+        
+        :see: XMLSerializer.__init__, XHTMLSerializer.__init__,
+              HTMLSerializer.__init__, TextSerializer.__init__
         """
         generator = self.serialize(method=method, **kwargs)
         output = u''.join(list(generator))
@@ -179,9 +182,12 @@
         :param method: determines how the stream is serialized; can be either
                        "xml", "xhtml", "html", "text", or a custom serializer
                        class
-
+        
         Any additional keyword arguments are passed to the serializer, and thus
         depend on the `method` parameter value.
+        
+        :see: XMLSerializer.__init__, XHTMLSerializer.__init__,
+              HTMLSerializer.__init__, TextSerializer.__init__
         """
         from genshi import output
         cls = method
--- a/genshi/template/base.py
+++ b/genshi/template/base.py
@@ -33,14 +33,8 @@
 class TemplateError(Exception):
     """Base exception class for errors related to template processing."""
 
-
-class TemplateRuntimeError(TemplateError):
-    """Exception raised when an the evaluation of a Python expression in a
-    template causes an error.
-    """
-
     def __init__(self, message, filename='<string>', lineno=-1, offset=-1):
-        """Create the exception
+        """Create the exception.
         
         :param message: the error message
         :param filename: the filename of the template
@@ -48,18 +42,19 @@
                        occurred
         :param offset: the column number at which the error occurred
         """
-        self.msg = message
+        self.msg = message #: the error message string
         if filename != '<string>' or lineno >= 0:
             message = '%s (%s, line %d)' % (self.msg, filename, lineno)
-        TemplateError.__init__(self, message)
-        self.filename = filename
-        self.lineno = lineno
-        self.offset = offset
+        Exception.__init__(self, message)
+        self.filename = filename #: the name of the template file
+        self.lineno = lineno #: the number of the line containing the error
+        self.offset = offset #: the offset on the line
 
 
 class TemplateSyntaxError(TemplateError):
     """Exception raised when an expression in a template causes a Python syntax
-    error."""
+    error, or the template is not well-formed.
+    """
 
     def __init__(self, message, filename='<string>', lineno=-1, offset=-1):
         """Create the exception
@@ -72,12 +67,7 @@
         """
         if isinstance(message, SyntaxError) and message.lineno is not None:
             message = str(message).replace(' (line %d)' % message.lineno, '')
-        self.msg = message
-        message = '%s (%s, line %d)' % (self.msg, filename, lineno)
-        TemplateError.__init__(self, message)
-        self.filename = filename
-        self.lineno = lineno
-        self.offset = offset
+        TemplateError.__init__(self, message, filename, lineno)
 
 
 class BadDirectiveError(TemplateSyntaxError):
@@ -96,8 +86,14 @@
         :param lineno: the number of line in the template at which the error
                        occurred
         """
-        message = 'bad directive "%s"' % name
-        TemplateSyntaxError.__init__(self, message, filename, lineno)
+        TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name,
+                                     filename, lineno)
+
+
+class TemplateRuntimeError(TemplateError):
+    """Exception raised when an the evaluation of a Python expression in a
+    template causes an error.
+    """
 
 
 class Context(object):
--- a/genshi/template/eval.py
+++ b/genshi/template/eval.py
@@ -27,7 +27,7 @@
 from genshi.template.base import TemplateRuntimeError
 from genshi.util import flatten
 
-__all__ = ['Expression', 'Suite']
+__all__ = ['Code', 'Expression', 'Suite', 'UndefinedError']
 __docformat__ = 'restructuredtext en'
 
 
Copyright (C) 2012-2017 Edgewall Software