Mercurial > genshi > mirror
annotate genshi/output.py @ 464:2f13c5fc4a4d trunk
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
author | cmlenz |
---|---|
date | Thu, 26 Apr 2007 09:46:36 +0000 |
parents | d5e2a7b58116 |
children | 942d73ba938c |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
408 | 3 # Copyright (C) 2006-2007 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
18 from itertools import chain |
1 | 19 try: |
20 frozenset | |
21 except NameError: | |
22 from sets import ImmutableSet as frozenset | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
23 import re |
1 | 24 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
25 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
26 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ |
402
c199e9b95884
Fix output of namespace declarations for namespace URLs appearing more than once in a stream. Thanks to Jeff Cutsinger for reporting the problem.
cmlenz
parents:
397
diff
changeset
|
27 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE |
1 | 28 |
462 | 29 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', |
30 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] | |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
31 __docformat__ = 'restructuredtext en' |
1 | 32 |
462 | 33 def encode(iterator, method='xml', encoding='utf-8'): |
34 """Encode serializer output into a string. | |
35 | |
36 :param iterator: the iterator returned from serializing a stream (basically | |
37 any iterator that yields unicode objects) | |
38 :param method: the serialization method; determines how characters not | |
39 representable in the specified encoding are treated | |
40 :param encoding: how the output string should be encoded; if set to `None`, | |
41 this method returns a `unicode` object | |
42 :return: a string or unicode object (depending on the `encoding` parameter) | |
43 :since: version 0.4.1 | |
44 """ | |
45 output = u''.join(list(iterator)) | |
46 if encoding is not None: | |
47 errors = 'replace' | |
48 if method != 'text' and not isinstance(method, TextSerializer): | |
49 errors = 'xmlcharrefreplace' | |
50 return output.encode(encoding, errors) | |
51 return output | |
52 | |
53 def get_serializer(method='xml', **kwargs): | |
54 """Return a serializer object for the given method. | |
55 | |
56 :param method: the serialization method; can be either "xml", "xhtml", | |
57 "html", "text", or a custom serializer class | |
58 | |
59 Any additional keyword arguments are passed to the serializer, and thus | |
60 depend on the `method` parameter value. | |
61 | |
62 :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` | |
63 :since: version 0.4.1 | |
64 """ | |
65 if isinstance(method, basestring): | |
66 method = {'xml': XMLSerializer, | |
67 'xhtml': XHTMLSerializer, | |
68 'html': HTMLSerializer, | |
69 'text': TextSerializer}[method.lower()] | |
70 return method(**kwargs) | |
71 | |
1 | 72 |
85 | 73 class DocType(object): |
74 """Defines a number of commonly used DOCTYPE declarations as constants.""" | |
75 | |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
76 HTML_STRICT = ( |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
77 'html', '-//W3C//DTD HTML 4.01//EN', |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
78 'http://www.w3.org/TR/html4/strict.dtd' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
79 ) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
80 HTML_TRANSITIONAL = ( |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
81 'html', '-//W3C//DTD HTML 4.01 Transitional//EN', |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
82 'http://www.w3.org/TR/html4/loose.dtd' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
83 ) |
464
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
84 HTML_FRAMESET = ( |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
85 'html', '-//W3C//DTD HTML 4.01 Frameset//EN', |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
86 'http://www.w3.org/TR/html4/frameset.dtd' |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
87 ) |
85 | 88 HTML = HTML_STRICT |
89 | |
464
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
90 HTML5 = ('html', None, None) |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
91 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
92 XHTML_STRICT = ( |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
93 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
94 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
95 ) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
96 XHTML_TRANSITIONAL = ( |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
97 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN', |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
98 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
99 ) |
464
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
100 XHTML_FRAMESET = ( |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
101 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
102 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
103 ) |
85 | 104 XHTML = XHTML_STRICT |
105 | |
464
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
106 def get(cls, name): |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
107 """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
108 declaration for the specified name. |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
109 |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
110 The following names are recognized in this version: |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
111 * "html" or "html-strict" for the HTML 4.01 strict DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
112 * "html-transitional" for the HTML 4.01 transitional DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
113 * "html-transitional" for the HTML 4.01 frameset DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
114 * "html5" for the ``DOCTYPE`` proposed for HTML5 |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
115 * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
116 * "xhtml-transitional" for the XHTML 1.0 transitional DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
117 * "xhtml-frameset" for the XHTML 1.0 frameset DTD |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
118 |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
119 :param name: the name of the ``DOCTYPE`` |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
120 :return: the ``(name, pubid, sysid)`` tuple for the requested |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
121 ``DOCTYPE``, or ``None`` if the name is not recognized |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
122 :since: version 0.4.1 |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
123 """ |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
124 return { |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
125 'html': cls.HTML, 'html-strict': cls.HTML_STRICT, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
126 'html-transitional': DocType.HTML_TRANSITIONAL, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
127 'html-frameset': DocType.HTML_FRAMESET, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
128 'html5': cls.HTML5, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
129 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
130 'xhtml-transitional': cls.XHTML_TRANSITIONAL, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
131 'xhtml-frameset': cls.XHTML_FRAMESET, |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
132 }.get(name.lower()) |
2f13c5fc4a4d
Move the mapping of doctype names to tuples out of the plugin into the `DocType` class.
cmlenz
parents:
462
diff
changeset
|
133 get = classmethod(get) |
448 | 134 |
85 | 135 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
136 class XMLSerializer(object): |
1 | 137 """Produces XML text from an event stream. |
138 | |
230 | 139 >>> from genshi.builder import tag |
20 | 140 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
141 >>> print ''.join(XMLSerializer()(elem.generate())) |
1 | 142 <div><a href="foo"/><br/><hr noshade="True"/></div> |
143 """ | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
144 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
145 _PRESERVE_SPACE = frozenset() |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
146 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
147 def __init__(self, doctype=None, strip_whitespace=True, |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
148 namespace_prefixes=None): |
85 | 149 """Initialize the XML serializer. |
150 | |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
151 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
152 DOCTYPE declaration that should be included at the top |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
153 of the generated output |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
154 :param strip_whitespace: whether extraneous whitespace should be |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
155 stripped from the output |
85 | 156 """ |
157 self.preamble = [] | |
158 if doctype: | |
159 self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) | |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
160 self.filters = [EmptyTagFilter()] |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
161 if strip_whitespace: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
162 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
163 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) |
1 | 164 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
165 def __call__(self, stream): |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
166 have_decl = have_doctype = False |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
167 in_cdata = False |
1 | 168 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
169 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
170 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
171 stream = filter_(stream) |
1 | 172 for kind, data, pos in stream: |
173 | |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
174 if kind is START or kind is EMPTY: |
1 | 175 tag, attrib = data |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
176 buf = ['<', tag] |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
177 for attr, value in attrib: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
178 buf += [' ', attr, '="', escape(value), '"'] |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
179 buf.append(kind is EMPTY and '/>' or '>') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
180 yield Markup(u''.join(buf)) |
1 | 181 |
69 | 182 elif kind is END: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
183 yield Markup('</%s>' % data) |
1 | 184 |
69 | 185 elif kind is TEXT: |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
186 if in_cdata: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
187 yield data |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
188 else: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
189 yield escape(data, quotes=False) |
1 | 190 |
89
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
191 elif kind is COMMENT: |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
192 yield Markup('<!--%s-->' % data) |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
193 |
460
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
194 elif kind is XML_DECL and not have_decl: |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
195 version, encoding, standalone = data |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
196 buf = ['<?xml version="%s"' % version] |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
197 if encoding: |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
198 buf.append(' encoding="%s"' % encoding) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
199 if standalone != -1: |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
200 standalone = standalone and 'yes' or 'no' |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
201 buf.append(' standalone="%s"' % standalone) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
202 buf.append('?>\n') |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
203 yield Markup(u''.join(buf)) |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
204 have_decl = True |
75425671b437
Apply patch by Alec Thomas for processing XML declarations (#111). Thanks!
cmlenz
parents:
448
diff
changeset
|
205 |
136 | 206 elif kind is DOCTYPE and not have_doctype: |
207 name, pubid, sysid = data | |
208 buf = ['<!DOCTYPE %s'] | |
209 if pubid: | |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
210 buf.append(' PUBLIC "%s"') |
136 | 211 elif sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
212 buf.append(' SYSTEM') |
136 | 213 if sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
214 buf.append(' "%s"') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
215 buf.append('>\n') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
216 yield Markup(u''.join(buf), *filter(None, data)) |
136 | 217 have_doctype = True |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
218 |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
219 elif kind is START_CDATA: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
220 yield Markup('<![CDATA[') |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
221 in_cdata = True |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
222 |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
223 elif kind is END_CDATA: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
224 yield Markup(']]>') |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
225 in_cdata = False |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
226 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
227 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
228 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
229 |
1 | 230 |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
231 class XHTMLSerializer(XMLSerializer): |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
232 """Produces XHTML text from an event stream. |
1 | 233 |
230 | 234 >>> from genshi.builder import tag |
20 | 235 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
236 >>> print ''.join(XHTMLSerializer()(elem.generate())) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
237 <div><a href="foo"></a><br /><hr noshade="noshade" /></div> |
1 | 238 """ |
239 | |
240 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
241 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
242 'param']) | |
243 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
244 'defer', 'disabled', 'ismap', 'multiple', | |
245 'nohref', 'noresize', 'noshade', 'nowrap']) | |
346
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
246 _PRESERVE_SPACE = frozenset([ |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
247 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
248 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
249 ]) |
1 | 250 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
251 def __init__(self, doctype=None, strip_whitespace=True, |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
252 namespace_prefixes=None): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
253 super(XHTMLSerializer, self).__init__(doctype, False) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
254 self.filters = [EmptyTagFilter()] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
255 if strip_whitespace: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
256 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
257 namespace_prefixes = namespace_prefixes or {} |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
258 namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
259 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
260 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
261 def __call__(self, stream): |
136 | 262 boolean_attrs = self._BOOLEAN_ATTRS |
263 empty_elems = self._EMPTY_ELEMS | |
85 | 264 have_doctype = False |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
265 in_cdata = False |
1 | 266 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
267 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
268 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
269 stream = filter_(stream) |
1 | 270 for kind, data, pos in stream: |
271 | |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
272 if kind is START or kind is EMPTY: |
1 | 273 tag, attrib = data |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
274 buf = ['<', tag] |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
275 for attr, value in attrib: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
276 if attr in boolean_attrs: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
277 value = attr |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
278 buf += [' ', attr, '="', escape(value), '"'] |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
279 if kind is EMPTY: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
280 if tag in empty_elems: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
281 buf.append(' />') |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
282 else: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
283 buf.append('></%s>' % tag) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
284 else: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
285 buf.append('>') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
286 yield Markup(u''.join(buf)) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
287 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
288 elif kind is END: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
289 yield Markup('</%s>' % data) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
290 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
291 elif kind is TEXT: |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
292 if in_cdata: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
293 yield data |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
294 else: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
295 yield escape(data, quotes=False) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
296 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
297 elif kind is COMMENT: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
298 yield Markup('<!--%s-->' % data) |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
299 |
136 | 300 elif kind is DOCTYPE and not have_doctype: |
301 name, pubid, sysid = data | |
302 buf = ['<!DOCTYPE %s'] | |
303 if pubid: | |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
304 buf.append(' PUBLIC "%s"') |
136 | 305 elif sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
306 buf.append(' SYSTEM') |
136 | 307 if sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
308 buf.append(' "%s"') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
309 buf.append('>\n') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
310 yield Markup(u''.join(buf), *filter(None, data)) |
136 | 311 have_doctype = True |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
312 |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
313 elif kind is START_CDATA: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
314 yield Markup('<![CDATA[') |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
315 in_cdata = True |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
316 |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
317 elif kind is END_CDATA: |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
318 yield Markup(']]>') |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
319 in_cdata = False |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
320 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
321 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
322 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
323 |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
324 |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
325 class HTMLSerializer(XHTMLSerializer): |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
326 """Produces HTML text from an event stream. |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
327 |
230 | 328 >>> from genshi.builder import tag |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
329 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
330 >>> print ''.join(HTMLSerializer()(elem.generate())) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
331 <div><a href="foo"></a><br><hr noshade></div> |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
332 """ |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
333 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
334 _NOESCAPE_ELEMS = frozenset([ |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
335 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
336 QName('style'), QName('http://www.w3.org/1999/xhtml}style') |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
337 ]) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
338 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
339 def __init__(self, doctype=None, strip_whitespace=True): |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
340 """Initialize the HTML serializer. |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
341 |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
342 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
343 DOCTYPE declaration that should be included at the top |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
344 of the generated output |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
345 :param strip_whitespace: whether extraneous whitespace should be |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
346 stripped from the output |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
347 """ |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
348 super(HTMLSerializer, self).__init__(doctype, False) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
349 self.filters = [EmptyTagFilter()] |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
350 if strip_whitespace: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
351 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
305 | 352 self._NOESCAPE_ELEMS)) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
353 self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml')) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
354 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
355 def __call__(self, stream): |
136 | 356 boolean_attrs = self._BOOLEAN_ATTRS |
357 empty_elems = self._EMPTY_ELEMS | |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
358 noescape_elems = self._NOESCAPE_ELEMS |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
359 have_doctype = False |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
360 noescape = False |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
361 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
362 stream = chain(self.preamble, stream) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
363 for filter_ in self.filters: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
364 stream = filter_(stream) |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
365 for kind, data, pos in stream: |
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
366 |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
367 if kind is START or kind is EMPTY: |
96
fa08aef181a2
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
368 tag, attrib = data |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
369 buf = ['<', tag] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
370 for attr, value in attrib: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
371 if attr in boolean_attrs: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
372 if value: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
373 buf += [' ', attr] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
374 else: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
375 buf += [' ', attr, '="', escape(value), '"'] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
376 buf.append('>') |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
377 if kind is EMPTY: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
378 if tag not in empty_elems: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
379 buf.append('</%s>' % tag) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
380 yield Markup(u''.join(buf)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
381 if tag in noescape_elems: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
382 noescape = True |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
383 |
69 | 384 elif kind is END: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
385 yield Markup('</%s>' % data) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
386 noescape = False |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
387 |
69 | 388 elif kind is TEXT: |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
389 if noescape: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
390 yield data |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
391 else: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
392 yield escape(data, quotes=False) |
1 | 393 |
89
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
394 elif kind is COMMENT: |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
395 yield Markup('<!--%s-->' % data) |
80386d62814f
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
396 |
136 | 397 elif kind is DOCTYPE and not have_doctype: |
398 name, pubid, sysid = data | |
399 buf = ['<!DOCTYPE %s'] | |
400 if pubid: | |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
401 buf.append(' PUBLIC "%s"') |
136 | 402 elif sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
403 buf.append(' SYSTEM') |
136 | 404 if sysid: |
397
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
405 buf.append(' "%s"') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
406 buf.append('>\n') |
31742fe6d47e
* Moved some utility functions from `genshi.core` to `genshi.util` (backwards compatibility preserved via imports)
cmlenz
parents:
346
diff
changeset
|
407 yield Markup(u''.join(buf), *filter(None, data)) |
136 | 408 have_doctype = True |
109
230ee6a2c6b2
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
409 |
105
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
410 elif kind is PI: |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
411 yield Markup('<?%s %s?>' % data) |
71f3db26eecb
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
412 |
1 | 413 |
200
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
414 class TextSerializer(object): |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
415 """Produces plain text from an event stream. |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
416 |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
417 Only text events are included in the output. Unlike the other serializer, |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
418 special XML characters are not escaped: |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
419 |
230 | 420 >>> from genshi.builder import tag |
200
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
421 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
422 >>> print elem |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
423 <div><a href="foo"><Hello!></a><br/></div> |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
424 >>> print ''.join(TextSerializer()(elem.generate())) |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
425 <Hello!> |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
426 |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
427 If text events contain literal markup (instances of the `Markup` class), |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
428 tags or entities are stripped from the output: |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
429 |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
430 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>')) |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
431 >>> print elem |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
432 <div><a href="foo">Hello!</a><br/></div> |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
433 >>> print ''.join(TextSerializer()(elem.generate())) |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
434 Hello! |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
435 """ |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
436 |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
437 def __call__(self, stream): |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
438 for event in stream: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
439 if event[0] is TEXT: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
440 data = event[1] |
200
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
441 if type(data) is Markup: |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
442 data = data.striptags().stripentities() |
201
c5e0a1c86173
The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents:
200
diff
changeset
|
443 yield unicode(data) |
200
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
444 |
5861f4446c26
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
445 |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
446 class EmptyTagFilter(object): |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
447 """Combines `START` and `STOP` events into `EMPTY` events for elements that |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
448 have no contents. |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
449 """ |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
450 |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
451 EMPTY = StreamEventKind('EMPTY') |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
452 |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
453 def __call__(self, stream): |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
454 prev = (None, None, None) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
455 for ev in stream: |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
456 if prev[0] is START: |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
457 if ev[0] is END: |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
458 prev = EMPTY, prev[1], prev[2] |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
459 yield prev |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
460 continue |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
461 else: |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
462 yield prev |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
463 if ev[0] is not START: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
464 yield ev |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
465 prev = ev |
212
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
466 |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
467 |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
468 EMPTY = EmptyTagFilter.EMPTY |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
469 |
0141f45c18e1
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
470 |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
471 class NamespaceFlattener(object): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
472 r"""Output stream filter that removes namespace information from the stream, |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
473 instead adding namespace attributes and prefixes as needed. |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
474 |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
475 :param prefixes: optional mapping of namespace URIs to prefixes |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
476 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
477 >>> from genshi.input import XML |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
478 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
479 ... <two:item/> |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
480 ... </doc>''') |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
481 >>> for kind, data, pos in NamespaceFlattener()(xml): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
482 ... print kind, repr(data) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
483 START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
484 TEXT u'\n ' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
485 START (u'two:item', Attrs()) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
486 END u'two:item' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
487 TEXT u'\n' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
488 END u'doc' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
489 """ |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
490 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
491 def __init__(self, prefixes=None): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
492 self.prefixes = {XML_NAMESPACE.uri: 'xml'} |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
493 if prefixes is not None: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
494 self.prefixes.update(prefixes) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
495 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
496 def __call__(self, stream): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
497 prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
498 namespaces = {XML_NAMESPACE.uri: ['xml']} |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
499 def _push_ns(prefix, uri): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
500 namespaces.setdefault(uri, []).append(prefix) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
501 prefixes.setdefault(prefix, []).append(uri) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
502 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
503 ns_attrs = [] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
504 _push_ns_attr = ns_attrs.append |
437 | 505 def _make_ns_attr(prefix, uri): |
506 return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri | |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
507 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
508 def _gen_prefix(): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
509 val = 0 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
510 while 1: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
511 val += 1 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
512 yield 'ns%d' % val |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
513 _gen_prefix = _gen_prefix().next |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
514 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
515 for kind, data, pos in stream: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
516 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
517 if kind is START or kind is EMPTY: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
518 tag, attrs = data |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
519 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
520 tagname = tag.localname |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
521 tagns = tag.namespace |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
522 if tagns: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
523 if tagns in namespaces: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
524 prefix = namespaces[tagns][-1] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
525 if prefix: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
526 tagname = u'%s:%s' % (prefix, tagname) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
527 else: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
528 _push_ns_attr((u'xmlns', tagns)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
529 _push_ns('', tagns) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
530 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
531 new_attrs = [] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
532 for attr, value in attrs: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
533 attrname = attr.localname |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
534 attrns = attr.namespace |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
535 if attrns: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
536 if attrns not in namespaces: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
537 prefix = _gen_prefix() |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
538 _push_ns(prefix, attrns) |
412
bd51adc20a67
Actually write xmlns declaratons for generated attribute namespace prefixes.
cmlenz
parents:
410
diff
changeset
|
539 _push_ns_attr(('xmlns:%s' % prefix, attrns)) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
540 else: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
541 prefix = namespaces[attrns][-1] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
542 if prefix: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
543 attrname = u'%s:%s' % (prefix, attrname) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
544 new_attrs.append((attrname, value)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
545 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
546 yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
547 del ns_attrs[:] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
548 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
549 elif kind is END: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
550 tagname = data.localname |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
551 tagns = data.namespace |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
552 if tagns: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
553 prefix = namespaces[tagns][-1] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
554 if prefix: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
555 tagname = u'%s:%s' % (prefix, tagname) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
556 yield kind, tagname, pos |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
557 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
558 elif kind is START_NS: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
559 prefix, uri = data |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
560 if uri not in namespaces: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
561 prefix = prefixes.get(uri, [prefix])[-1] |
437 | 562 _push_ns_attr(_make_ns_attr(prefix, uri)) |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
563 _push_ns(prefix, uri) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
564 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
565 elif kind is END_NS: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
566 if data in prefixes: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
567 uris = prefixes.get(data) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
568 uri = uris.pop() |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
569 if not uris: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
570 del prefixes[data] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
571 if uri not in uris or uri != uris[-1]: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
572 uri_prefixes = namespaces[uri] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
573 uri_prefixes.pop() |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
574 if not uri_prefixes: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
575 del namespaces[uri] |
437 | 576 if ns_attrs: |
577 attr = _make_ns_attr(data, uri) | |
578 if attr in ns_attrs: | |
579 ns_attrs.remove(attr) | |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
580 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
581 else: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
582 yield kind, data, pos |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
583 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
584 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
585 class NamespaceStripper(object): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
586 r"""Stream filter that removes all namespace information from a stream, and |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
587 optionally strips out all tags not in a given namespace. |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
588 |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
589 :param namespace: the URI of the namespace that should not be stripped. If |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
590 not set, only elements with no namespace are included in |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
591 the output. |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
592 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
593 >>> from genshi.input import XML |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
594 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
595 ... <two:item/> |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
596 ... </doc>''') |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
597 >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
598 ... print kind, repr(data) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
599 START (u'doc', Attrs()) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
600 TEXT u'\n ' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
601 TEXT u'\n' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
602 END u'doc' |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
603 """ |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
604 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
605 def __init__(self, namespace=None): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
606 if namespace is not None: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
607 self.namespace = Namespace(namespace) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
608 else: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
609 self.namespace = {} |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
610 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
611 def __call__(self, stream): |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
612 namespace = self.namespace |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
613 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
614 for kind, data, pos in stream: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
615 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
616 if kind is START or kind is EMPTY: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
617 tag, attrs = data |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
618 if tag.namespace and tag not in namespace: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
619 continue |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
620 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
621 new_attrs = [] |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
622 for attr, value in attrs: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
623 if not attr.namespace or attr in namespace: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
624 new_attrs.append((attr, value)) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
625 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
626 data = tag.localname, Attrs(new_attrs) |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
627 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
628 elif kind is END: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
629 if data.namespace and data not in namespace: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
630 continue |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
631 data = data.localname |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
632 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
633 elif kind is START_NS or kind is END_NS: |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
634 continue |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
635 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
636 yield kind, data, pos |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
637 |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
638 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
639 class WhitespaceFilter(object): |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
640 """A filter that removes extraneous ignorable white space from the |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
641 stream. |
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
642 """ |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
643 |
305 | 644 def __init__(self, preserve=None, noescape=None): |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
645 """Initialize the filter. |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
646 |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
647 :param preserve: a set or sequence of tag names for which white-space |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
648 should be preserved |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
649 :param noescape: a set or sequence of tag names for which text content |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
650 should not be escaped |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
651 |
346
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
652 The `noescape` set is expected to refer to elements that cannot contain |
425
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
653 further child elements (such as ``<style>`` or ``<script>`` in HTML |
073640758a42
Try to use proper reStructuredText for docstrings throughout.
cmlenz
parents:
412
diff
changeset
|
654 documents). |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
655 """ |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
656 if preserve is None: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
657 preserve = [] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
658 self.preserve = frozenset(preserve) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
659 if noescape is None: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
660 noescape = [] |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
661 self.noescape = frozenset(noescape) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
662 |
219 | 663 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], |
664 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, | |
665 collapse_lines=re.compile('\n{2,}').sub): | |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
666 mjoin = Markup('').join |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
667 preserve_elems = self.preserve |
346
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
668 preserve = 0 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
669 noescape_elems = self.noescape |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
670 noescape = False |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
671 |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
672 textbuf = [] |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
673 push_text = textbuf.append |
136 | 674 pop_text = textbuf.pop |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
675 for kind, data, pos in chain(stream, [(None, None, None)]): |
410
d14d89995c29
Improve the handling of namespaces in serialization.
cmlenz
parents:
408
diff
changeset
|
676 |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
677 if kind is TEXT: |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
678 if noescape: |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
679 data = Markup(data) |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
680 push_text(data) |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
681 else: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
682 if textbuf: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
683 if len(textbuf) > 1: |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
684 text = mjoin(textbuf, escape_quotes=False) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
685 del textbuf[:] |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
686 else: |
136 | 687 text = escape(pop_text(), quotes=False) |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
688 if not preserve: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
689 text = collapse_lines('\n', trim_trailing_space('', text)) |
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
690 yield TEXT, Markup(text), pos |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
691 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
692 if kind is START: |
346
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
693 tag, attrs = data |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
694 if preserve or (tag in preserve_elems or |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
695 attrs.get(space) == 'preserve'): |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
696 preserve += 1 |
219 | 697 if not noescape and tag in noescape_elems: |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
698 noescape = True |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
699 |
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
700 elif kind is END: |
346
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
701 noescape = False |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
702 if preserve: |
96882a191686
Whitespace was not getting preserved in HTML `<pre>` elements that contained other HTML elements.
cmlenz
parents:
345
diff
changeset
|
703 preserve -= 1 |
141
520a5b7dd6d2
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
704 |
305 | 705 elif kind is START_CDATA: |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
706 noescape = True |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
707 |
305 | 708 elif kind is END_CDATA: |
143
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
709 noescape = False |
3d4c214c979a
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
710 |
136 | 711 if kind: |
123
10279d2eeec9
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
712 yield kind, data, pos |