Mercurial > genshi > mirror
annotate markup/output.py @ 142:349b3ff5367d trunk
Minor cleanup in XInclude filter.
author | cmlenz |
---|---|
date | Fri, 11 Aug 2006 08:31:58 +0000 |
parents | 520a5b7dd6d2 |
children | 3d4c214c979a |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
18 from itertools import chain |
1 | 19 try: |
20 frozenset | |
21 except NameError: | |
22 from sets import ImmutableSet as frozenset | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
23 import re |
1 | 24 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
25 from markup.core import escape, Markup, Namespace, QName, XML_NAMESPACE |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
26 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, COMMENT, PI |
1 | 27 |
28 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] | |
29 | |
30 | |
85 | 31 class DocType(object): |
32 """Defines a number of commonly used DOCTYPE declarations as constants.""" | |
33 | |
34 HTML_STRICT = ('html', '-//W3C//DTD HTML 4.01//EN', | |
35 'http://www.w3.org/TR/html4/strict.dtd') | |
36 HTML_TRANSITIONAL = ('html', '-//W3C//DTD HTML 4.01 Transitional//EN', | |
37 'http://www.w3.org/TR/html4/loose.dtd') | |
38 HTML = HTML_STRICT | |
39 | |
40 XHTML_STRICT = ('html', '-//W3C//DTD XHTML 1.0 Strict//EN', | |
41 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd') | |
42 XHTML_TRANSITIONAL = ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', | |
43 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') | |
44 XHTML = XHTML_STRICT | |
45 | |
46 | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
47 class XMLSerializer(object): |
1 | 48 """Produces XML text from an event stream. |
49 | |
50 >>> from markup.builder import tag | |
20 | 51 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
52 >>> print ''.join(XMLSerializer()(elem.generate())) |
1 | 53 <div><a href="foo"/><br/><hr noshade="True"/></div> |
54 """ | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
55 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
56 _PRESERVE_SPACE = frozenset() |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
57 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
58 def __init__(self, doctype=None, strip_whitespace=True): |
85 | 59 """Initialize the XML serializer. |
60 | |
61 @param doctype: a `(name, pubid, sysid)` tuple that represents the | |
62 DOCTYPE declaration that should be included at the top of the | |
63 generated output | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
64 @param strip_whitespace: whether extraneous whitespace should be |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
65 stripped from the output |
85 | 66 """ |
67 self.preamble = [] | |
68 if doctype: | |
69 self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
70 self.filters = [] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
71 if strip_whitespace: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
72 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
1 | 73 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
74 def __call__(self, stream): |
85 | 75 have_doctype = False |
1 | 76 ns_attrib = [] |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
77 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
1 | 78 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
79 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
80 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
81 stream = filter_(stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
82 stream = _PushbackIterator(stream) |
136 | 83 pushback = stream.pushback |
1 | 84 for kind, data, pos in stream: |
85 | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
86 if kind is START: |
1 | 87 tag, attrib = data |
88 | |
89 tagname = tag.localname | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
90 namespace = tag.namespace |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
91 if namespace: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
92 if namespace in ns_mapping: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
93 prefix = ns_mapping[namespace] |
1 | 94 if prefix: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
95 tagname = '%s:%s' % (prefix, tagname) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
96 else: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
97 ns_attrib.append((QName('xmlns'), namespace)) |
136 | 98 buf = ['<', tagname] |
1 | 99 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
100 for attr, value in attrib + ns_attrib: |
1 | 101 attrname = attr.localname |
102 if attr.namespace: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
103 prefix = ns_mapping.get(attr.namespace) |
1 | 104 if prefix: |
69 | 105 attrname = '%s:%s' % (prefix, attrname) |
136 | 106 buf += [' ', attrname, '="', escape(value), '"'] |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
107 ns_attrib = [] |
1 | 108 |
109 kind, data, pos = stream.next() | |
69 | 110 if kind is END: |
136 | 111 buf += ['/>'] |
1 | 112 else: |
136 | 113 buf += ['>'] |
114 pushback((kind, data, pos)) | |
1 | 115 |
116 yield Markup(''.join(buf)) | |
117 | |
69 | 118 elif kind is END: |
1 | 119 tag = data |
120 tagname = tag.localname | |
121 if tag.namespace: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
122 prefix = ns_mapping.get(tag.namespace) |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
123 if prefix: |
69 | 124 tagname = '%s:%s' % (prefix, tag.localname) |
1 | 125 yield Markup('</%s>' % tagname) |
126 | |
69 | 127 elif kind is TEXT: |
73 | 128 yield escape(data, quotes=False) |
1 | 129 |
89
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
130 elif kind is COMMENT: |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
131 yield Markup('<!--%s-->' % data) |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
132 |
136 | 133 elif kind is DOCTYPE and not have_doctype: |
134 name, pubid, sysid = data | |
135 buf = ['<!DOCTYPE %s'] | |
136 if pubid: | |
137 buf += [' PUBLIC "%s"'] | |
138 elif sysid: | |
139 buf += [' SYSTEM'] | |
140 if sysid: | |
141 buf += [' "%s"'] | |
142 buf += ['>\n'] | |
143 yield Markup(''.join(buf), *filter(None, data)) | |
144 have_doctype = True | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
145 |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
146 elif kind is START_NS: |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
147 prefix, uri = data |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
148 if uri not in ns_mapping: |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
149 ns_mapping[uri] = prefix |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
150 if not prefix: |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
151 ns_attrib.append((QName('xmlns'), uri)) |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
152 else: |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
153 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
154 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
155 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
156 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
157 |
1 | 158 |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
159 class XHTMLSerializer(XMLSerializer): |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
160 """Produces XHTML text from an event stream. |
1 | 161 |
162 >>> from markup.builder import tag | |
20 | 163 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
164 >>> print ''.join(XHTMLSerializer()(elem.generate())) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
165 <div><a href="foo"></a><br /><hr noshade="noshade" /></div> |
1 | 166 """ |
167 | |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
168 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') |
1 | 169 |
170 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
171 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
172 'param']) | |
173 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
174 'defer', 'disabled', 'ismap', 'multiple', | |
175 'nohref', 'noresize', 'noshade', 'nowrap']) | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
176 _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) |
1 | 177 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
178 def __call__(self, stream): |
136 | 179 namespace = self.NAMESPACE |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
180 ns_attrib = [] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
181 ns_mapping = {XML_NAMESPACE.uri: 'xml'} |
136 | 182 boolean_attrs = self._BOOLEAN_ATTRS |
183 empty_elems = self._EMPTY_ELEMS | |
85 | 184 have_doctype = False |
1 | 185 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
186 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
187 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
188 stream = filter_(stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
189 stream = _PushbackIterator(stream) |
136 | 190 pushback = stream.pushback |
1 | 191 for kind, data, pos in stream: |
192 | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
193 if kind is START: |
1 | 194 tag, attrib = data |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
195 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
196 tagname = tag.localname |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
197 namespace = tag.namespace |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
198 if namespace: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
199 if namespace in ns_mapping: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
200 prefix = ns_mapping[namespace] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
201 if prefix: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
202 tagname = '%s:%s' % (prefix, tagname) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
203 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
204 ns_attrib.append((QName('xmlns'), namespace)) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
205 buf = ['<', tagname] |
136 | 206 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
207 for attr, value in attrib + ns_attrib: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
208 attrname = attr.localname |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
209 if attr.namespace: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
210 prefix = ns_mapping.get(attr.namespace) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
211 if prefix: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
212 attrname = '%s:%s' % (prefix, attrname) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
213 if attrname in boolean_attrs: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
214 if value: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
215 buf += [' ', attrname, '="', attrname, '"'] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
216 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
217 buf += [' ', attrname, '="', escape(value), '"'] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
218 ns_attrib = [] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
219 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
220 if (not tag.namespace or tag in namespace) and \ |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
221 tagname in empty_elems: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
222 kind, data, pos = stream.next() |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
223 if kind is END: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
224 buf += [' />'] |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
225 else: |
136 | 226 buf += ['>'] |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
227 pushback((kind, data, pos)) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
228 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
229 buf += ['>'] |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
230 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
231 yield Markup(''.join(buf)) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
232 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
233 elif kind is END: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
234 tag = data |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
235 tagname = tag.localname |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
236 if tag.namespace: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
237 prefix = ns_mapping.get(tag.namespace) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
238 if prefix: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
239 tagname = '%s:%s' % (prefix, tag.localname) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
240 yield Markup('</%s>' % tagname) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
241 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
242 elif kind is TEXT: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
243 yield escape(data, quotes=False) |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
244 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
245 elif kind is COMMENT: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
246 yield Markup('<!--%s-->' % data) |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
247 |
136 | 248 elif kind is DOCTYPE and not have_doctype: |
249 name, pubid, sysid = data | |
250 buf = ['<!DOCTYPE %s'] | |
251 if pubid: | |
252 buf += [' PUBLIC "%s"'] | |
253 elif sysid: | |
254 buf += [' SYSTEM'] | |
255 if sysid: | |
256 buf += [' "%s"'] | |
257 buf += ['>\n'] | |
258 yield Markup(''.join(buf), *filter(None, data)) | |
259 have_doctype = True | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
260 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
261 elif kind is START_NS: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
262 prefix, uri = data |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
263 if uri not in ns_mapping: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
264 ns_mapping[uri] = prefix |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
265 if not prefix: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
266 ns_attrib.append((QName('xmlns'), uri)) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
267 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
268 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
269 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
270 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
271 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
272 |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
273 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
274 class HTMLSerializer(XHTMLSerializer): |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
275 """Produces HTML text from an event stream. |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
276 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
277 >>> from markup.builder import tag |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
278 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
279 >>> print ''.join(HTMLSerializer()(elem.generate())) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
280 <div><a href="foo"></a><br><hr noshade></div> |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
281 """ |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
282 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
283 _NOESCAPE_ELEMS = frozenset([QName('script'), QName('style')]) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
284 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
285 def __init__(self, doctype=None, strip_whitespace=True): |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
286 """Initialize the HTML serializer. |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
287 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
288 @param doctype: a `(name, pubid, sysid)` tuple that represents the |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
289 DOCTYPE declaration that should be included at the top of the |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
290 generated output |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
291 @param strip_whitespace: whether extraneous whitespace should be |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
292 stripped from the output |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
293 """ |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
294 super(HTMLSerializer, self).__init__(doctype, False) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
295 if strip_whitespace: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
296 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
297 self._NOESCAPE_ELEMS)) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
298 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
299 def __call__(self, stream): |
136 | 300 namespace = self.NAMESPACE |
301 ns_mapping = {} | |
302 boolean_attrs = self._BOOLEAN_ATTRS | |
303 empty_elems = self._EMPTY_ELEMS | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
304 noescape_elems = self._NOESCAPE_ELEMS |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
305 have_doctype = False |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
306 noescape = False |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
307 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
308 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
309 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
310 stream = filter_(stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
311 stream = _PushbackIterator(stream) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
312 pushback = stream.pushback |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
313 for kind, data, pos in stream: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
314 |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
315 if kind is START: |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
316 tag, attrib = data |
136 | 317 if not tag.namespace or tag in namespace: |
318 tagname = tag.localname | |
319 buf = ['<', tagname] | |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
320 |
136 | 321 for attr, value in attrib: |
322 attrname = attr.localname | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
323 if not attr.namespace or attr in namespace: |
136 | 324 if attrname in boolean_attrs: |
325 if value: | |
326 buf += [' ', attrname] | |
327 else: | |
328 buf += [' ', attrname, '="', escape(value), '"'] | |
1 | 329 |
136 | 330 if tagname in empty_elems: |
331 kind, data, pos = stream.next() | |
332 if kind is not END: | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
333 pushback((kind, data, pos)) |
1 | 334 |
140
c1f4390d50f8
Fix bug in HTML serializer, plus some other minor tweaks.
cmlenz
parents:
136
diff
changeset
|
335 buf += ['>'] |
c1f4390d50f8
Fix bug in HTML serializer, plus some other minor tweaks.
cmlenz
parents:
136
diff
changeset
|
336 yield Markup(''.join(buf)) |
1 | 337 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
338 if tagname in noescape_elems: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
339 noescape = True |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
340 |
69 | 341 elif kind is END: |
1 | 342 tag = data |
136 | 343 if not tag.namespace or tag in namespace: |
344 yield Markup('</%s>' % tag.localname) | |
1 | 345 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
346 noescape = False |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
347 |
69 | 348 elif kind is TEXT: |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
349 if noescape: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
350 yield data |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
351 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
352 yield escape(data, quotes=False) |
1 | 353 |
89
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
354 elif kind is COMMENT: |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
355 yield Markup('<!--%s-->' % data) |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
356 |
136 | 357 elif kind is DOCTYPE and not have_doctype: |
358 name, pubid, sysid = data | |
359 buf = ['<!DOCTYPE %s'] | |
360 if pubid: | |
361 buf += [' PUBLIC "%s"'] | |
362 elif sysid: | |
363 buf += [' SYSTEM'] | |
364 if sysid: | |
365 buf += [' "%s"'] | |
366 buf += ['>\n'] | |
367 yield Markup(''.join(buf), *filter(None, data)) | |
368 have_doctype = True | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
369 |
136 | 370 elif kind is START_NS and data[1] not in ns_mapping: |
371 ns_mapping[data[1]] = data[0] | |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
372 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
373 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
374 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
375 |
1 | 376 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
377 class WhitespaceFilter(object): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
378 """A filter that removes extraneous ignorable white space from the |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
379 stream.""" |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
380 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
381 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
382 _LINE_COLLAPSE = re.compile('\n{2,}') |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
383 _XML_SPACE = XML_NAMESPACE['space'] |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
384 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
385 def __init__(self, preserve=None, noescape=None): |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
386 """Initialize the filter. |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
387 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
388 @param preserve: a set or sequence of tag names for which white-space |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
389 should be ignored. |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
390 @param noescape: a set or sequence of tag names for which text content |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
391 should not be escaped |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
392 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
393 Both the `preserve` and `noescape` sets are expected to refer to |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
394 elements that cannot contain further child elements. |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
395 """ |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
396 if preserve is None: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
397 preserve = [] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
398 self.preserve = frozenset(preserve) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
399 if noescape is None: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
400 noescape = [] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
401 self.noescape = frozenset(noescape) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
402 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
403 def __call__(self, stream, ctxt=None): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
404 trim_trailing_space = self._TRAILING_SPACE.sub |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
405 collapse_lines = self._LINE_COLLAPSE.sub |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
406 xml_space = self._XML_SPACE |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
407 mjoin = Markup('').join |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
408 preserve_elems = self.preserve |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
409 preserve = False |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
410 noescape_elems = self.noescape |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
411 noescape = False |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
412 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
413 textbuf = [] |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
414 push_text = textbuf.append |
136 | 415 pop_text = textbuf.pop |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
416 for kind, data, pos in chain(stream, [(None, None, None)]): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
417 if kind is TEXT: |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
418 if noescape: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
419 data = Markup(data) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
420 push_text(data) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
421 else: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
422 if textbuf: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
423 if len(textbuf) > 1: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
424 text = mjoin(textbuf, escape_quotes=False) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
425 del textbuf[:] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
426 else: |
136 | 427 text = escape(pop_text(), quotes=False) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
428 if not preserve: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
429 text = collapse_lines('\n', trim_trailing_space('', text)) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
430 yield TEXT, Markup(text), pos |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
431 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
432 if kind is START: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
433 tag, attrib = data |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
434 if tag.localname in preserve_elems or \ |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
435 data[1].get(xml_space) == 'preserve': |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
436 preserve = True |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
437 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
438 if tag.localname in noescape_elems: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
439 noescape = True |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
440 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
441 elif kind is END: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
442 preserve = noescape = False |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
443 |
136 | 444 if kind: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
445 yield kind, data, pos |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
446 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
447 |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
448 class _PushbackIterator(object): |
1 | 449 """A simple wrapper for iterators that allows pushing items back on the |
450 queue via the `pushback()` method. | |
451 | |
452 That can effectively be used to peek at the next item.""" | |
453 __slots__ = ['iterable', 'buf'] | |
454 | |
455 def __init__(self, iterable): | |
456 self.iterable = iter(iterable) | |
457 self.buf = [] | |
458 | |
459 def __iter__(self): | |
460 return self | |
461 | |
462 def next(self): | |
463 if self.buf: | |
464 return self.buf.pop(0) | |
465 return self.iterable.next() | |
466 | |
467 def pushback(self, item): | |
468 self.buf.append(item) |