diff genshi/core.py @ 820:1837f39efd6f experimental-inline

Sync (old) experimental inline branch with trunk@1027.
author cmlenz
date Wed, 11 Mar 2009 17:51:06 +0000
parents 0742f421caba
children de82830f8816
line wrap: on
line diff
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2006-2007 Edgewall Software
+# Copyright (C) 2006-2008 Edgewall Software
 # All rights reserved.
 #
 # This software is licensed as described in the file COPYING, which
@@ -13,6 +13,11 @@
 
 """Core classes for markup processing."""
 
+try:
+    from functools import reduce
+except ImportError:
+    pass # builtin in Python <= 2.5
+from itertools import chain
 import operator
 
 from genshi.util import plaintext, stripentities, striptags
@@ -51,7 +56,7 @@
     returns the complete generated text at once. Both accept various parameters
     that impact the way the stream is serialized.
     """
-    __slots__ = ['events']
+    __slots__ = ['events', 'serializer']
 
     START = StreamEventKind('START') #: a start tag
     END = StreamEventKind('END') #: an end tag
@@ -65,12 +70,17 @@
     PI = StreamEventKind('PI') #: processing instruction
     COMMENT = StreamEventKind('COMMENT') #: comment
 
-    def __init__(self, events):
+    def __init__(self, events, serializer=None):
         """Initialize the stream with a sequence of markup events.
         
         :param events: a sequence or iterable providing the events
+        :param serializer: the default serialization method to use for this
+                           stream
+
+        :note: Changed in 0.5: added the `serializer` argument
         """
         self.events = events #: The underlying iterable producing the events
+        self.serializer = serializer #: The default serializion method
 
     def __iter__(self):
         return iter(self.events)
@@ -119,7 +129,7 @@
         :return: the filtered stream
         :rtype: `Stream`
         """
-        return Stream(_ensure(function(self)))
+        return Stream(_ensure(function(self)), serializer=self.serializer)
 
     def filter(self, *filters):
         """Apply filters to the stream.
@@ -143,7 +153,7 @@
         """
         return reduce(operator.or_, (self,) + filters)
 
-    def render(self, method='xml', encoding='utf-8', **kwargs):
+    def render(self, method=None, encoding='utf-8', out=None, **kwargs):
         """Return a string representation of the stream.
         
         Any additional keyword arguments are passed to the serializer, and thus
@@ -151,21 +161,52 @@
         
         :param method: determines how the stream is serialized; can be either
                        "xml", "xhtml", "html", "text", or a custom serializer
-                       class
+                       class; if `None`, the default serialization method of
+                       the stream is used
         :param encoding: how the output string should be encoded; if set to
                          `None`, this method returns a `unicode` object
-        :return: a `str` or `unicode` object
+        :param out: a file-like object that the output should be written to
+                    instead of being returned as one big string; note that if
+                    this is a file or socket (or similar), the `encoding` must
+                    not be `None` (that is, the output must be encoded)
+        :return: a `str` or `unicode` object (depending on the `encoding`
+                 parameter), or `None` if the `out` parameter is provided
         :rtype: `basestring`
+        
         :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
+        :note: Changed in 0.5: added the `out` parameter
         """
         from genshi.output import encode
+        if method is None:
+            method = self.serializer or 'xml'
         generator = self.serialize(method=method, **kwargs)
-        return encode(generator, method=method, encoding=encoding)
+        return encode(generator, method=method, encoding=encoding, out=out)
 
     def select(self, path, namespaces=None, variables=None):
         """Return a new stream that contains the events matching the given
         XPath expression.
         
+        >>> from genshi import HTML
+        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
+        >>> print stream.select('elem')
+        <elem>foo</elem><elem>bar</elem>
+        >>> print stream.select('elem/text()')
+        foobar
+        
+        Note that the outermost element of the stream becomes the *context
+        node* for the XPath test. That means that the expression "doc" would
+        not match anything in the example above, because it only tests against
+        child elements of the outermost element:
+        
+        >>> print stream.select('doc')
+        <BLANKLINE>
+        
+        You can use the "." expression to match the context node itself
+        (although that usually makes little sense):
+        
+        >>> print stream.select('.')
+        <doc><elem>foo</elem><elem>bar</elem></doc>
+        
         :param path: a string containing the XPath expression
         :param namespaces: mapping of namespace prefixes used in the path
         :param variables: mapping of variable names to values
@@ -190,13 +231,16 @@
         
         :param method: determines how the stream is serialized; can be either
                        "xml", "xhtml", "html", "text", or a custom serializer
-                       class
+                       class; if `None`, the default serialization method of
+                       the stream is used
         :return: an iterator over the serialization results (`Markup` or
                  `unicode` objects, depending on the serialization method)
         :rtype: ``iterator``
         :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
         """
         from genshi.output import get_serializer
+        if method is None:
+            method = self.serializer or 'xml'
         return get_serializer(method, **kwargs)(_ensure(self))
 
     def __str__(self):
@@ -205,6 +249,9 @@
     def __unicode__(self):
         return self.render(encoding=None)
 
+    def __html__(self):
+        return self
+
 
 START = Stream.START
 END = Stream.END
@@ -220,12 +267,24 @@
 
 def _ensure(stream):
     """Ensure that every item on the stream is actually a markup event."""
-    for event in stream:
-        if type(event) is not tuple:
+    stream = iter(stream)
+    event = stream.next()
+
+    # Check whether the iterable is a real markup event stream by examining the
+    # first item it yields; if it's not we'll need to do some conversion
+    if type(event) is not tuple or len(event) != 3:
+        for event in chain([event], stream):
             if hasattr(event, 'totuple'):
                 event = event.totuple()
             else:
                 event = TEXT, unicode(event), (None, -1, -1)
+            yield event
+        return
+
+    # This looks like a markup event stream, so we'll just pass it through
+    # unchanged
+    yield event
+    for event in stream:
         yield event
 
 
@@ -295,6 +354,12 @@
                 return True
 
     def __getslice__(self, i, j):
+        """Return a slice of the attributes list.
+        
+        >>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
+        >>> attrs[1:]
+        Attrs([('title', 'Foo')])
+        """
         return Attrs(tuple.__getslice__(self, i, j))
 
     def __or__(self, attrs):
@@ -361,11 +426,6 @@
     """
     __slots__ = []
 
-    def __new__(cls, text='', *args):
-        if args:
-            text %= tuple(map(escape, args))
-        return unicode.__new__(cls, text)
-
     def __add__(self, other):
         return Markup(unicode(self) + unicode(escape(other)))
 
@@ -373,9 +433,13 @@
         return Markup(unicode(escape(other)) + unicode(self))
 
     def __mod__(self, args):
-        if not isinstance(args, (list, tuple)):
-            args = [args]
-        return Markup(unicode.__mod__(self, tuple(map(escape, args))))
+        if isinstance(args, dict):
+            args = dict(zip(args.keys(), map(escape, args.values())))
+        elif isinstance(args, (list, tuple)):
+            args = tuple(map(escape, args))
+        else:
+            args = escape(args)
+        return Markup(unicode.__mod__(self, args))
 
     def __mul__(self, num):
         return Markup(unicode(self) * num)
@@ -428,6 +492,9 @@
             return cls()
         if type(text) is cls:
             return text
+        if hasattr(text, '__html__'):
+            return Markup(text.__html__())
+
         text = unicode(text).replace('&', '&amp;') \
                             .replace('<', '&lt;') \
                             .replace('>', '&gt;')
@@ -477,6 +544,11 @@
         return Markup(striptags(self))
 
 
+try:
+    from genshi._speedups import Markup
+except ImportError:
+    pass # just use the Python implementation
+
 escape = Markup.escape
 
 def unescape(text):
@@ -543,7 +615,7 @@
     def __new__(cls, uri):
         if type(uri) is cls:
             return uri
-        return object.__new__(cls, uri)
+        return object.__new__(cls)
 
     def __getnewargs__(self):
         return (self.uri,)
@@ -572,6 +644,9 @@
         return QName(self.uri + u'}' + name)
     __getattr__ = __getitem__
 
+    def __hash__(self):
+        return hash(self.uri)
+
     def __repr__(self):
         return '<Namespace "%s">' % self.uri
 
@@ -590,9 +665,9 @@
     """A qualified element or attribute name.
     
     The unicode value of instances of this class contains the qualified name of
-    the element or attribute, in the form ``{namespace}localname``. The namespace
-    URI can be obtained through the additional `namespace` attribute, while the
-    local name can be accessed through the `localname` attribute.
+    the element or attribute, in the form ``{namespace-uri}local-name``. The
+    namespace URI can be obtained through the additional `namespace` attribute,
+    while the local name can be accessed through the `localname` attribute.
     
     >>> qname = QName('foo')
     >>> qname
@@ -612,10 +687,16 @@
     __slots__ = ['namespace', 'localname']
 
     def __new__(cls, qname):
+        """Create the `QName` instance.
+        
+        :param qname: the qualified name as a string of the form
+                      ``{namespace-uri}local-name``, where the leading curly
+                      brace is optional
+        """
         if type(qname) is cls:
             return qname
 
-        parts = qname.split(u'}', 1)
+        parts = qname.lstrip(u'{').split(u'}', 1)
         if len(parts) > 1:
             self = unicode.__new__(cls, u'{%s' % qname)
             self.namespace, self.localname = map(unicode, parts)
Copyright (C) 2012-2017 Edgewall Software