annotate markup/output.py @ 201:c5e0a1c86173 trunk

The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
author cmlenz
date Fri, 25 Aug 2006 12:39:55 +0000
parents 5861f4446c26
children 0141f45c18e1
rev   line source
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
2 #
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
3 # Copyright (C) 2006 Edgewall Software
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
5 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
8 # are also available at http://markup.edgewall.org/wiki/License.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
9 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
12 # history and logs, available at http://markup.edgewall.org/log/.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
13
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
14 """This module provides different kinds of serialization methods for XML event
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
15 streams.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
16 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
17
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
18 from itertools import chain
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
19 try:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
20 frozenset
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
21 except NameError:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
22 from sets import ImmutableSet as frozenset
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
23 import re
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
24
145
47bbd9d2a5af * Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents: 143
diff changeset
25 from markup.core import escape, Markup, Namespace, QName
47bbd9d2a5af * Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents: 143
diff changeset
26 from markup.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
47bbd9d2a5af * Fix error in expression evaluation when the expression evaluates to an iterable that does not produce event tuples.
cmlenz
parents: 143
diff changeset
27 END_CDATA, PI, COMMENT, XML_NAMESPACE
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
28
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
29 __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer',
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
30 'TextSerializer']
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
31
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
32
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
33 class DocType(object):
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
34 """Defines a number of commonly used DOCTYPE declarations as constants."""
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
35
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
36 HTML_STRICT = ('html', '-//W3C//DTD HTML 4.01//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
37 'http://www.w3.org/TR/html4/strict.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
38 HTML_TRANSITIONAL = ('html', '-//W3C//DTD HTML 4.01 Transitional//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
39 'http://www.w3.org/TR/html4/loose.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
40 HTML = HTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
41
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
42 XHTML_STRICT = ('html', '-//W3C//DTD XHTML 1.0 Strict//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
43 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
44 XHTML_TRANSITIONAL = ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
45 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
46 XHTML = XHTML_STRICT
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
47
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
48
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
49 class XMLSerializer(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
50 """Produces XML text from an event stream.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
51
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
52 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
53 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
54 >>> print ''.join(XMLSerializer()(elem.generate()))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
55 <div><a href="foo"/><br/><hr noshade="True"/></div>
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
56 """
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
57
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
58 _PRESERVE_SPACE = frozenset()
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
59
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
60 def __init__(self, doctype=None, strip_whitespace=True):
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
61 """Initialize the XML serializer.
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
62
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
63 @param doctype: a `(name, pubid, sysid)` tuple that represents the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
64 DOCTYPE declaration that should be included at the top of the
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
65 generated output
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
66 @param strip_whitespace: whether extraneous whitespace should be
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
67 stripped from the output
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
68 """
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
69 self.preamble = []
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
70 if doctype:
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
71 self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
72 self.filters = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
73 if strip_whitespace:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
74 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
75
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
76 def __call__(self, stream):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
77 ns_attrib = []
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
78 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
79 have_doctype = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
80 in_cdata = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
81
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
82 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
83 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
84 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
85 stream = _PushbackIterator(stream)
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
86 pushback = stream.pushback
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
87 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
88
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
89 if kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
90 tag, attrib = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
91
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
92 tagname = tag.localname
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
93 namespace = tag.namespace
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
94 if namespace:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
95 if namespace in ns_mapping:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
96 prefix = ns_mapping[namespace]
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
97 if prefix:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
98 tagname = '%s:%s' % (prefix, tagname)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
99 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
100 ns_attrib.append((QName('xmlns'), namespace))
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
101 buf = ['<', tagname]
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
102
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
103 for attr, value in attrib + ns_attrib:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
104 attrname = attr.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
105 if attr.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
106 prefix = ns_mapping.get(attr.namespace)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
107 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
108 attrname = '%s:%s' % (prefix, attrname)
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
109 buf += [' ', attrname, '="', escape(value), '"']
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
110 ns_attrib = []
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
111
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
112 kind, data, pos = stream.next()
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
113 if kind is END:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
114 buf += ['/>']
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
115 else:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
116 buf += ['>']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
117 pushback((kind, data, pos))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
118
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
119 yield Markup(''.join(buf))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
120
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
121 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
122 tag = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
123 tagname = tag.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
124 if tag.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
125 prefix = ns_mapping.get(tag.namespace)
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
126 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
127 tagname = '%s:%s' % (prefix, tag.localname)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
128 yield Markup('</%s>' % tagname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
129
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
130 elif kind is TEXT:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
131 if in_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
132 yield data
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
133 else:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
134 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
135
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
136 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
137 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
138
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
139 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
140 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
141 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
142 if pubid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
143 buf += [' PUBLIC "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
144 elif sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
145 buf += [' SYSTEM']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
146 if sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
147 buf += [' "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
148 buf += ['>\n']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
149 yield Markup(''.join(buf), *filter(None, data))
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
150 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
151
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
152 elif kind is START_NS:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
153 prefix, uri = data
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
154 if uri not in ns_mapping:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
155 ns_mapping[uri] = prefix
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
156 if not prefix:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
157 ns_attrib.append((QName('xmlns'), uri))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
158 else:
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
159 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
160
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
161 elif kind is START_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
162 yield Markup('<![CDATA[')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
163 in_cdata = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
164
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
165 elif kind is END_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
166 yield Markup(']]>')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
167 in_cdata = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
168
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
169 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
170 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
171
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
172
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
173 class XHTMLSerializer(XMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
174 """Produces XHTML text from an event stream.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
175
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
176 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
177 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
178 >>> print ''.join(XHTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
179 <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
180 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
181
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
182 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
183
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
184 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
185 'hr', 'img', 'input', 'isindex', 'link', 'meta',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
186 'param'])
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
187 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
188 'defer', 'disabled', 'ismap', 'multiple',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
189 'nohref', 'noresize', 'noshade', 'nowrap'])
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
190 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
191
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
192 def __call__(self, stream):
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
193 namespace = self.NAMESPACE
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
194 ns_attrib = []
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
195 ns_mapping = {XML_NAMESPACE.uri: 'xml'}
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
196 boolean_attrs = self._BOOLEAN_ATTRS
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
197 empty_elems = self._EMPTY_ELEMS
85
4938c310d904 Improve handling of DOCTYPE declarations.
cmlenz
parents: 73
diff changeset
198 have_doctype = False
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
199 in_cdata = False
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
200
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
201 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
202 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
203 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
204 stream = _PushbackIterator(stream)
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
205 pushback = stream.pushback
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
206 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
207
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
208 if kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
209 tag, attrib = data
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
210
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
211 tagname = tag.localname
177
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
212 tagns = tag.namespace
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
213 if tagns:
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
214 if tagns in ns_mapping:
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
215 prefix = ns_mapping[tagns]
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
216 if prefix:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
217 tagname = '%s:%s' % (prefix, tagname)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
218 else:
177
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
219 ns_attrib.append((QName('xmlns'), tagns))
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
220 buf = ['<', tagname]
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
221
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
222 for attr, value in attrib + ns_attrib:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
223 attrname = attr.localname
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
224 if attr.namespace:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
225 prefix = ns_mapping.get(attr.namespace)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
226 if prefix:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
227 attrname = '%s:%s' % (prefix, attrname)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
228 if attrname in boolean_attrs:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
229 if value:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
230 buf += [' ', attrname, '="', attrname, '"']
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
231 else:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
232 buf += [' ', attrname, '="', escape(value), '"']
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
233 ns_attrib = []
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
234
178
ba7556e3a835 Fix bug in XHTML serialization: all elements were allowed to be written out as empty if the namespace was set.
cmlenz
parents: 177
diff changeset
235 if (tagns and tagns != namespace.uri) or tagname in empty_elems:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
236 kind, data, pos = stream.next()
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
237 if kind is END:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
238 buf += [' />']
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
239 else:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
240 buf += ['>']
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
241 pushback((kind, data, pos))
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
242 else:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
243 buf += ['>']
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
244
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
245 yield Markup(''.join(buf))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
246
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
247 elif kind is END:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
248 tag = data
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
249 tagname = tag.localname
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
250 if tag.namespace:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
251 prefix = ns_mapping.get(tag.namespace)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
252 if prefix:
177
553866249cb0 * Minor fix for the XHTML serializer (the local namespace var got clobbered)
cmlenz
parents: 158
diff changeset
253 tagname = '%s:%s' % (prefix, tagname)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
254 yield Markup('</%s>' % tagname)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
255
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
256 elif kind is TEXT:
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
257 if in_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
258 yield data
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
259 else:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
260 yield escape(data, quotes=False)
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
261
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
262 elif kind is COMMENT:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
263 yield Markup('<!--%s-->' % data)
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
264
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
265 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
266 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
267 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
268 if pubid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
269 buf += [' PUBLIC "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
270 elif sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
271 buf += [' SYSTEM']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
272 if sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
273 buf += [' "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
274 buf += ['>\n']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
275 yield Markup(''.join(buf), *filter(None, data))
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
276 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
277
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
278 elif kind is START_NS:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
279 prefix, uri = data
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
280 if uri not in ns_mapping:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
281 ns_mapping[uri] = prefix
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
282 if not prefix:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
283 ns_attrib.append((QName('xmlns'), uri))
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
284 else:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
285 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
286
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
287 elif kind is START_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
288 yield Markup('<![CDATA[')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
289 in_cdata = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
290
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
291 elif kind is END_CDATA:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
292 yield Markup(']]>')
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
293 in_cdata = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
294
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
295 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
296 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
297
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
298
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
299 class HTMLSerializer(XHTMLSerializer):
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
300 """Produces HTML text from an event stream.
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
301
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
302 >>> from markup.builder import tag
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
303 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
304 >>> print ''.join(HTMLSerializer()(elem.generate()))
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
305 <div><a href="foo"></a><br><hr noshade></div>
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
306 """
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
307
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
308 _NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')])
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
309
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
310 def __init__(self, doctype=None, strip_whitespace=True):
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
311 """Initialize the HTML serializer.
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
312
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
313 @param doctype: a `(name, pubid, sysid)` tuple that represents the
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
314 DOCTYPE declaration that should be included at the top of the
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
315 generated output
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
316 @param strip_whitespace: whether extraneous whitespace should be
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
317 stripped from the output
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
318 """
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
319 super(HTMLSerializer, self).__init__(doctype, False)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
320 if strip_whitespace:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
321 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
322 self._NOESCAPE_ELEMS, True))
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
323
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
324 def __call__(self, stream):
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
325 namespace = self.NAMESPACE
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
326 ns_mapping = {}
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
327 boolean_attrs = self._BOOLEAN_ATTRS
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
328 empty_elems = self._EMPTY_ELEMS
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
329 noescape_elems = self._NOESCAPE_ELEMS
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
330 have_doctype = False
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
331 noescape = False
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
332
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
333 stream = chain(self.preamble, stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
334 for filter_ in self.filters:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
335 stream = filter_(stream)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
336 stream = _PushbackIterator(stream)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
337 pushback = stream.pushback
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
338 for kind, data, pos in stream:
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
339
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
340 if kind is START:
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
341 tag, attrib = data
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
342 if not tag.namespace or tag in namespace:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
343 tagname = tag.localname
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
344 buf = ['<', tagname]
96
fa08aef181a2 Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents: 89
diff changeset
345
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
346 for attr, value in attrib:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
347 attrname = attr.localname
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
348 if not attr.namespace or attr in namespace:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
349 if attrname in boolean_attrs:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
350 if value:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
351 buf += [' ', attrname]
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
352 else:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
353 buf += [' ', attrname, '="', escape(value), '"']
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
354
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
355 if tagname in empty_elems:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
356 kind, data, pos = stream.next()
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
357 if kind is not END:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
358 pushback((kind, data, pos))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
359
140
c1f4390d50f8 Fix bug in HTML serializer, plus some other minor tweaks.
cmlenz
parents: 136
diff changeset
360 buf += ['>']
c1f4390d50f8 Fix bug in HTML serializer, plus some other minor tweaks.
cmlenz
parents: 136
diff changeset
361 yield Markup(''.join(buf))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
362
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
363 if tagname in noescape_elems:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
364 noescape = True
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
365
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
366 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
367 tag = data
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
368 if not tag.namespace or tag in namespace:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
369 yield Markup('</%s>' % tag.localname)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
370
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
371 noescape = False
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
372
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
373 elif kind is TEXT:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
374 if noescape:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
375 yield data
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
376 else:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
377 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
378
89
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
379 elif kind is COMMENT:
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
380 yield Markup('<!--%s-->' % data)
80386d62814f Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents: 85
diff changeset
381
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
382 elif kind is DOCTYPE and not have_doctype:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
383 name, pubid, sysid = data
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
384 buf = ['<!DOCTYPE %s']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
385 if pubid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
386 buf += [' PUBLIC "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
387 elif sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
388 buf += [' SYSTEM']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
389 if sysid:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
390 buf += [' "%s"']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
391 buf += ['>\n']
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
392 yield Markup(''.join(buf), *filter(None, data))
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
393 have_doctype = True
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
394
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
395 elif kind is START_NS and data[1] not in ns_mapping:
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
396 ns_mapping[data[1]] = data[0]
109
230ee6a2c6b2 Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents: 105
diff changeset
397
105
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
398 elif kind is PI:
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
399 yield Markup('<?%s %s?>' % data)
71f3db26eecb Include processing instructions in serialized streams.
cmlenz
parents: 96
diff changeset
400
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
401
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
402 class TextSerializer(object):
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
403 """Produces plain text from an event stream.
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
404
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
405 Only text events are included in the output. Unlike the other serializer,
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
406 special XML characters are not escaped:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
407
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
408 >>> from markup.builder import tag
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
409 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
410 >>> print elem
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
411 <div><a href="foo">&lt;Hello!&gt;</a><br/></div>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
412 >>> print ''.join(TextSerializer()(elem.generate()))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
413 <Hello!>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
414
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
415 If text events contain literal markup (instances of the `Markup` class),
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
416 tags or entities are stripped from the output:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
417
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
418 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>'))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
419 >>> print elem
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
420 <div><a href="foo">Hello!</a><br/></div>
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
421 >>> print ''.join(TextSerializer()(elem.generate()))
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
422 Hello!
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
423 """
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
424
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
425 def __call__(self, stream):
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
426 for kind, data, pos in stream:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
427 if kind is TEXT:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
428 if type(data) is Markup:
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
429 data = data.striptags().stripentities()
201
c5e0a1c86173 The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents: 200
diff changeset
430 yield unicode(data)
200
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
431
5861f4446c26 Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents: 178
diff changeset
432
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
433 class WhitespaceFilter(object):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
434 """A filter that removes extraneous ignorable white space from the
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
435 stream."""
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
436
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
437 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
438 _LINE_COLLAPSE = re.compile('\n{2,}')
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
439 _XML_SPACE = XML_NAMESPACE['space']
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
440
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
441 def __init__(self, preserve=None, noescape=None, escape_cdata=False):
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
442 """Initialize the filter.
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
443
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
444 @param preserve: a set or sequence of tag names for which white-space
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
445 should be ignored.
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
446 @param noescape: a set or sequence of tag names for which text content
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
447 should not be escaped
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
448
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
449 Both the `preserve` and `noescape` sets are expected to refer to
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
450 elements that cannot contain further child elements.
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
451 """
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
452 if preserve is None:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
453 preserve = []
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
454 self.preserve = frozenset(preserve)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
455 if noescape is None:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
456 noescape = []
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
457 self.noescape = frozenset(noescape)
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
458 self.escape_cdata = escape_cdata
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
459
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
460 def __call__(self, stream, ctxt=None):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
461 trim_trailing_space = self._TRAILING_SPACE.sub
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
462 collapse_lines = self._LINE_COLLAPSE.sub
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
463 xml_space = self._XML_SPACE
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
464 mjoin = Markup('').join
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
465 preserve_elems = self.preserve
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
466 preserve = False
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
467 noescape_elems = self.noescape
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
468 noescape = False
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
469 escape_cdata = self.escape_cdata
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
470
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
471 textbuf = []
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
472 push_text = textbuf.append
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
473 pop_text = textbuf.pop
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
474 for kind, data, pos in chain(stream, [(None, None, None)]):
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
475 if kind is TEXT:
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
476 if noescape:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
477 data = Markup(data)
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
478 push_text(data)
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
479 else:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
480 if textbuf:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
481 if len(textbuf) > 1:
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
482 text = mjoin(textbuf, escape_quotes=False)
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
483 del textbuf[:]
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
484 else:
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
485 text = escape(pop_text(), quotes=False)
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
486 if not preserve:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
487 text = collapse_lines('\n', trim_trailing_space('', text))
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
488 yield TEXT, Markup(text), pos
141
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
489
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
490 if kind is START:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
491 tag, attrib = data
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
492 if tag.localname in preserve_elems or \
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
493 data[1].get(xml_space) == 'preserve':
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
494 preserve = True
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
495
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
496 if tag.localname in noescape_elems:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
497 noescape = True
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
498
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
499 elif kind is END:
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
500 preserve = noescape = False
520a5b7dd6d2 * No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents: 140
diff changeset
501
143
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
502 elif kind is START_CDATA and not escape_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
503 noescape = True
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
504
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
505 elif kind is END_CDATA and not escape_cdata:
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
506 noescape = False
3d4c214c979a CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents: 141
diff changeset
507
136
b86f496f6035 Minor performance improvements in serialization.
cmlenz
parents: 123
diff changeset
508 if kind:
123
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
509 yield kind, data, pos
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
510
10279d2eeec9 Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents: 109
diff changeset
511
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
512 class _PushbackIterator(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
513 """A simple wrapper for iterators that allows pushing items back on the
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
514 queue via the `pushback()` method.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
515
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
516 That can effectively be used to peek at the next item."""
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
517 __slots__ = ['iterable', 'buf']
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
518
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
519 def __init__(self, iterable):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
520 self.iterable = iter(iterable)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
521 self.buf = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
522
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
523 def __iter__(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
524 return self
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
525
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
526 def next(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
527 if self.buf:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
528 return self.buf.pop(0)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
529 return self.iterable.next()
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
530
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
531 def pushback(self, item):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
532 self.buf.append(item)
Copyright (C) 2012-2017 Edgewall Software