annotate markup/output.py @ 123:10279d2eeec9 trunk

Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
author cmlenz
date Thu, 03 Aug 2006 14:49:22 +0000
parents 230ee6a2c6b2
children b86f496f6035
rev   line source
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
2 #
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
3 # Copyright (C) 2006 Edgewall Software
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
5 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
8 # are also available at http://markup.edgewall.org/wiki/License.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
9 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
12 # history and logs, available at http://markup.edgewall.org/log/.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
13
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
14 """This module provides different kinds of serialization methods for XML event
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
15 streams.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
16 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
17
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
18 from itertools import chain
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
19 try:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
20 frozenset
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
21 except NameError:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
22 from sets import ImmutableSet as frozenset
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
23 import re
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
24
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
25 from markup.core import escape, Markup, Namespace, QName
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
27
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
29
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
30
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
31 class DocType(object):
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
32 """Defines a number of commonly used DOCTYPE declarations as constants."""
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
33
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
34 HTML_STRICT = ('html', '-//W3C//DTD HTML 4.01//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
35 'http://www.w3.org/TR/html4/strict.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
36 HTML_TRANSITIONAL = ('html', '-//W3C//DTD HTML 4.01 Transitional//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
37 'http://www.w3.org/TR/html4/loose.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
38 HTML = HTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
39
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
40 XHTML_STRICT = ('html', '-//W3C//DTD XHTML 1.0 Strict//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
41 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
42 XHTML_TRANSITIONAL = ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
43 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
44 XHTML = XHTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
45
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
46
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
47 class XMLSerializer(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
48 """Produces XML text from an event stream.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
49
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
50 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
51 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
52 >>> print ''.join(XMLSerializer()(elem.generate()))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
53 <div><a href="foo"/><br/><hr noshade="True"/></div>
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
54 """
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
55
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
56 _PRESERVE_SPACE = frozenset()
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
57
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
58 def __init__(self, doctype=None, strip_whitespace=True):
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
59 """Initialize the XML serializer.
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
60
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
61 @param doctype: a `(name, pubid, sysid)` tuple that represents the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
62 DOCTYPE declaration that should be included at the top of the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
63 generated output
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
64 @param strip_whitespace: whether extraneous whitespace should be
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
65 stripped from the output
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
66 """
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
67 self.preamble = []
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
68 if doctype:
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
69 self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
70 self.filters = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
71 if strip_whitespace:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
73
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
74 def __call__(self, stream):
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
75 have_doctype = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
76 ns_attrib = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
77 ns_mapping = {}
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
78
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
79 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
80 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
81 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
82 stream = _PushbackIterator(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
83 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
84
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
85 if kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
86 tag, attrib = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
87
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
88 tagname = tag.localname
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
89 namespace = tag.namespace
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
90 if namespace:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
91 if namespace in ns_mapping:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
92 prefix = ns_mapping[namespace]
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
93 if prefix:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
94 tagname = '%s:%s' % (prefix, tagname)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
95 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
96 ns_attrib.append((QName('xmlns'), namespace))
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
97 buf = ['<%s' % tagname]
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
98
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
99 for attr, value in attrib + ns_attrib:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
100 attrname = attr.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
101 if attr.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
102 prefix = ns_mapping.get(attr.namespace)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
103 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
104 attrname = '%s:%s' % (prefix, attrname)
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
105 buf.append(' %s="%s"' % (attrname, escape(value)))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
106 ns_attrib = []
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
107
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
108 kind, data, pos = stream.next()
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
109 if kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
110 buf.append('/>')
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
111 else:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
112 buf.append('>')
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
113 stream.pushback((kind, data, pos))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
114
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
115 yield Markup(''.join(buf))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
116
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
117 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
118 tag = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
119 tagname = tag.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
120 if tag.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
121 prefix = ns_mapping.get(tag.namespace)
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
122 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
123 tagname = '%s:%s' % (prefix, tag.localname)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
124 yield Markup('</%s>' % tagname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
125
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
126 elif kind is TEXT:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
127 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
128
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
129 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
130 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
131
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
132 elif kind is DOCTYPE:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
133 if not have_doctype:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
134 name, pubid, sysid = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
135 buf = ['<!DOCTYPE %s']
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
136 if pubid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
137 buf.append(' PUBLIC "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
138 elif sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
139 buf.append(' SYSTEM')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
140 if sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
141 buf.append(' "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
142 buf.append('>\n')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
143 yield Markup(''.join(buf), *filter(None, data))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
144 have_doctype = True
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
145
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
146 elif kind is START_NS:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
147 prefix, uri = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
148 if uri not in ns_mapping:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
149 ns_mapping[uri] = prefix
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
150 if not prefix:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
151 ns_attrib.append((QName('xmlns'), uri))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
152 else:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
153 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
154
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
155 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
156 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
157
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
158
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
159 class XHTMLSerializer(XMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
160 """Produces XHTML text from an event stream.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
161
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
162 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
163 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
164 >>> print ''.join(XHTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
165 <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
166 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
167
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
168 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
169
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
170 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
171 'hr', 'img', 'input', 'isindex', 'link', 'meta',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
172 'param'])
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
173 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
174 'defer', 'disabled', 'ismap', 'multiple',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
175 'nohref', 'noresize', 'noshade', 'nowrap'])
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
177
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
178 def __call__(self, stream):
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
179 have_doctype = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
180 ns_mapping = {}
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
181
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
182 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
183 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
184 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
185 stream = _PushbackIterator(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
186 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
187
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
188 if kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
189 tag, attrib = data
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
190 if tag.namespace and tag not in self.NAMESPACE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
191 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
192 buf = ['<', tag.localname]
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
193
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
194 for attr, value in attrib:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
195 if attr.namespace and attr not in self.NAMESPACE:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
196 continue # not in the HTML namespace, so don't emit
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
197 if attr.localname in self._BOOLEAN_ATTRS:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
198 if value:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
199 buf.append(' %s="%s"' % (attr.localname, attr.localname))
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
200 else:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
201 buf.append(' %s="%s"' % (attr.localname, escape(value)))
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
202
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
203 if tag.localname in self._EMPTY_ELEMS:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
204 kind, data, pos = stream.next()
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
205 if kind is END:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
206 buf.append(' />')
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
207 else:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
208 buf.append('>')
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
209 stream.pushback((kind, data, pos))
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
210 else:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
211 buf.append('>')
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
212
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
213 yield Markup(''.join(buf))
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
214
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
215 elif kind is END:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
216 tag = data
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
217 if tag.namespace and tag not in self.NAMESPACE:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
218 continue # not in the HTML namespace, so don't emit
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
219 yield Markup('</%s>' % tag.localname)
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
220
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
221 elif kind is TEXT:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
222 yield escape(data, quotes=False)
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
223
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
224 elif kind is COMMENT:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
225 yield Markup('<!--%s-->' % data)
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
226
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
227 elif kind is DOCTYPE:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
228 if not have_doctype:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
229 name, pubid, sysid = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
230 buf = ['<!DOCTYPE %s']
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
231 if pubid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
232 buf.append(' PUBLIC "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
233 elif sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
234 buf.append(' SYSTEM')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
235 if sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
236 buf.append(' "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
237 buf.append('>\n')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
238 yield Markup(''.join(buf), *filter(None, data))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
239 have_doctype = True
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
240
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
241 elif kind is START_NS:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
242 prefix, uri = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
243 if uri not in ns_mapping:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
244 ns_mapping[uri] = prefix
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
245
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
246 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
247 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
248
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
249
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
250 class HTMLSerializer(XHTMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
251 """Produces HTML text from an event stream.
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
252
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
253 >>> from markup.builder import tag
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
254 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
255 >>> print ''.join(HTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
256 <div><a href="foo"></a><br><hr noshade></div>
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
257 """
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
258
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
259 def __call__(self, stream):
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
260 have_doctype = False
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
261 ns_mapping = {}
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
262
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
263 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
264 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
265 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
266 stream = _PushbackIterator(stream)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
267 for kind, data, pos in stream:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
268
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
269 if kind is START:
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
270 tag, attrib = data
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
271 if tag.namespace and tag not in self.NAMESPACE:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
272 continue # not in the HTML namespace, so don't emit
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
273 buf = ['<', tag.localname]
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
274
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
275 for attr, value in attrib:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
276 if attr.namespace and attr not in self.NAMESPACE \
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
277 or attr.localname.startswith('xml:'):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
278 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
279 if attr.localname in self._BOOLEAN_ATTRS:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
280 if value:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
281 buf.append(' %s' % attr.localname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
282 else:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
283 buf.append(' %s="%s"' % (attr.localname, escape(value)))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
284
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
285 if tag.localname in self._EMPTY_ELEMS:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
286 kind, data, pos = stream.next()
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
287 if kind is not END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
288 stream.pushback((kind, data, pos))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
289
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
290 yield Markup(''.join(buf + ['>']))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
291
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
292 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
293 tag = data
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
294 if tag.namespace and tag not in self.NAMESPACE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
295 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
296 yield Markup('</%s>' % tag.localname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
297
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
298 elif kind is TEXT:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
299 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
300
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
301 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
302 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
303
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
304 elif kind is DOCTYPE:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
305 if not have_doctype:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
306 name, pubid, sysid = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
307 buf = ['<!DOCTYPE %s']
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
308 if pubid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
309 buf.append(' PUBLIC "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
310 elif sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
311 buf.append(' SYSTEM')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
312 if sysid:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
313 buf.append(' "%s"')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
314 buf.append('>\n')
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
315 yield Markup(''.join(buf), *filter(None, data))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
316 have_doctype = True
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
317
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
318 elif kind is START_NS:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
319 prefix, uri = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
320 if uri not in ns_mapping:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
321 ns_mapping[uri] = prefix
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
322
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
323 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
324 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
325
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
326
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
327 class WhitespaceFilter(object):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
328 """A filter that removes extraneous ignorable white space from the
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
329 stream."""
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
330
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
331 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
332 _LINE_COLLAPSE = re.compile('\n{2,}')
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
333
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
334 def __init__(self, preserve=None):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
335 """Initialize the filter.
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
336
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
337 @param preserve: a sequence of tag names for which white-space should
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
338 be ignored.
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
339 """
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
340 if preserve is None:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
341 preserve = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
342 self.preserve = frozenset(preserve)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
343
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
344 def __call__(self, stream, ctxt=None):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
345 trim_trailing_space = self._TRAILING_SPACE.sub
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
346 collapse_lines = self._LINE_COLLAPSE.sub
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
347 mjoin = Markup('').join
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
348 preserve = [False]
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
349
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
350 textbuf = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
351 for kind, data, pos in chain(stream, [(None, None, None)]):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
352 if kind is TEXT:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
353 textbuf.append(data)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
354 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
355 if kind is START:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
356 preserve.append(data[0] in self.preserve or
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
357 data[1].get('xml:space') == 'preserve')
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
358 if textbuf:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
359 if len(textbuf) > 1:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
360 text = mjoin(textbuf, escape_quotes=False)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
361 del textbuf[:]
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
362 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
363 text = escape(textbuf.pop(), quotes=False)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
364 if not preserve[-1]:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
365 text = collapse_lines('\n', trim_trailing_space('', text))
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
366 yield TEXT, Markup(text), pos
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
367 if kind is END:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
368 preserve.pop()
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
369 if kind is not None:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
370 yield kind, data, pos
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
371
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
372
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
373 class _PushbackIterator(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
374 """A simple wrapper for iterators that allows pushing items back on the
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
375 queue via the `pushback()` method.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
376
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
377 That can effectively be used to peek at the next item."""
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
378 __slots__ = ['iterable', 'buf']
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
379
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
380 def __init__(self, iterable):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
381 self.iterable = iter(iterable)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
382 self.buf = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
383
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
384 def __iter__(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
385 return self
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
386
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
387 def next(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
388 if self.buf:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
389 return self.buf.pop(0)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
390 return self.iterable.next()
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
391
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
392 def pushback(self, item):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
393 self.buf.append(item)
Copyright (C) 2012-2017 Edgewall Software