Mercurial > genshi > genshi-test
annotate genshi/output.py @ 820:1837f39efd6f experimental-inline
Sync (old) experimental inline branch with trunk@1027.
author | cmlenz |
---|---|
date | Wed, 11 Mar 2009 17:51:06 +0000 |
parents | 0742f421caba |
children | de82830f8816 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
3 # Copyright (C) 2006-2008 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
230 | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
230 | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
18 from itertools import chain |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
19 import re |
1 | 20 |
500 | 21 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind |
22 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ | |
23 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE | |
1 | 24 |
500 | 25 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', |
26 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] | |
27 __docformat__ = 'restructuredtext en' | |
28 | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
29 def encode(iterator, method='xml', encoding='utf-8', out=None): |
500 | 30 """Encode serializer output into a string. |
31 | |
32 :param iterator: the iterator returned from serializing a stream (basically | |
33 any iterator that yields unicode objects) | |
34 :param method: the serialization method; determines how characters not | |
35 representable in the specified encoding are treated | |
36 :param encoding: how the output string should be encoded; if set to `None`, | |
37 this method returns a `unicode` object | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
38 :param out: a file-like object that the output should be written to |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
39 instead of being returned as one big string; note that if |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
40 this is a file or socket (or similar), the `encoding` must |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
41 not be `None` (that is, the output must be encoded) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
42 :return: a `str` or `unicode` object (depending on the `encoding` |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
43 parameter), or `None` if the `out` parameter is provided |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
44 |
500 | 45 :since: version 0.4.1 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
46 :note: Changed in 0.5: added the `out` parameter |
500 | 47 """ |
48 if encoding is not None: | |
49 errors = 'replace' | |
50 if method != 'text' and not isinstance(method, TextSerializer): | |
51 errors = 'xmlcharrefreplace' | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
52 _encode = lambda string: string.encode(encoding, errors) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
53 else: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
54 _encode = lambda string: string |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
55 if out is None: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
56 return _encode(u''.join(list(iterator))) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
57 for chunk in iterator: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
58 out.write(_encode(chunk)) |
500 | 59 |
60 def get_serializer(method='xml', **kwargs): | |
61 """Return a serializer object for the given method. | |
62 | |
63 :param method: the serialization method; can be either "xml", "xhtml", | |
64 "html", "text", or a custom serializer class | |
65 | |
66 Any additional keyword arguments are passed to the serializer, and thus | |
67 depend on the `method` parameter value. | |
68 | |
69 :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` | |
70 :since: version 0.4.1 | |
71 """ | |
72 if isinstance(method, basestring): | |
73 method = {'xml': XMLSerializer, | |
74 'xhtml': XHTMLSerializer, | |
75 'html': HTMLSerializer, | |
76 'text': TextSerializer}[method.lower()] | |
77 return method(**kwargs) | |
1 | 78 |
79 | |
85 | 80 class DocType(object): |
81 """Defines a number of commonly used DOCTYPE declarations as constants.""" | |
82 | |
500 | 83 HTML_STRICT = ( |
84 'html', '-//W3C//DTD HTML 4.01//EN', | |
85 'http://www.w3.org/TR/html4/strict.dtd' | |
86 ) | |
87 HTML_TRANSITIONAL = ( | |
88 'html', '-//W3C//DTD HTML 4.01 Transitional//EN', | |
89 'http://www.w3.org/TR/html4/loose.dtd' | |
90 ) | |
91 HTML_FRAMESET = ( | |
92 'html', '-//W3C//DTD HTML 4.01 Frameset//EN', | |
93 'http://www.w3.org/TR/html4/frameset.dtd' | |
94 ) | |
85 | 95 HTML = HTML_STRICT |
96 | |
500 | 97 HTML5 = ('html', None, None) |
98 | |
99 XHTML_STRICT = ( | |
100 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', | |
101 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' | |
102 ) | |
103 XHTML_TRANSITIONAL = ( | |
104 'html', '-//W3C//DTD XHTML 1.0 Transitional//EN', | |
105 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' | |
106 ) | |
107 XHTML_FRAMESET = ( | |
108 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', | |
109 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' | |
110 ) | |
85 | 111 XHTML = XHTML_STRICT |
112 | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
113 XHTML11 = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
114 'html', '-//W3C//DTD XHTML 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
115 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
116 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
117 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
118 SVG_FULL = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
119 'svg', '-//W3C//DTD SVG 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
120 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
121 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
122 SVG_BASIC = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
123 'svg', '-//W3C//DTD SVG Basic 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
124 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
125 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
126 SVG_TINY = ( |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
127 'svg', '-//W3C//DTD SVG Tiny 1.1//EN', |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
128 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
129 ) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
130 SVG = SVG_FULL |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
131 |
500 | 132 def get(cls, name): |
133 """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` | |
134 declaration for the specified name. | |
135 | |
136 The following names are recognized in this version: | |
137 * "html" or "html-strict" for the HTML 4.01 strict DTD | |
138 * "html-transitional" for the HTML 4.01 transitional DTD | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
139 * "html-frameset" for the HTML 4.01 frameset DTD |
500 | 140 * "html5" for the ``DOCTYPE`` proposed for HTML5 |
141 * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD | |
142 * "xhtml-transitional" for the XHTML 1.0 transitional DTD | |
143 * "xhtml-frameset" for the XHTML 1.0 frameset DTD | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
144 * "xhtml11" for the XHTML 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
145 * "svg" or "svg-full" for the SVG 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
146 * "svg-basic" for the SVG Basic 1.1 DTD |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
147 * "svg-tiny" for the SVG Tiny 1.1 DTD |
500 | 148 |
149 :param name: the name of the ``DOCTYPE`` | |
150 :return: the ``(name, pubid, sysid)`` tuple for the requested | |
151 ``DOCTYPE``, or ``None`` if the name is not recognized | |
152 :since: version 0.4.1 | |
153 """ | |
154 return { | |
155 'html': cls.HTML, 'html-strict': cls.HTML_STRICT, | |
156 'html-transitional': DocType.HTML_TRANSITIONAL, | |
157 'html-frameset': DocType.HTML_FRAMESET, | |
158 'html5': cls.HTML5, | |
159 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, | |
160 'xhtml-transitional': cls.XHTML_TRANSITIONAL, | |
161 'xhtml-frameset': cls.XHTML_FRAMESET, | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
162 'xhtml11': cls.XHTML11, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
163 'svg': cls.SVG, 'svg-full': cls.SVG_FULL, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
164 'svg-basic': cls.SVG_BASIC, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
165 'svg-tiny': cls.SVG_TINY |
500 | 166 }.get(name.lower()) |
167 get = classmethod(get) | |
168 | |
85 | 169 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
170 class XMLSerializer(object): |
1 | 171 """Produces XML text from an event stream. |
172 | |
230 | 173 >>> from genshi.builder import tag |
20 | 174 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
175 >>> print ''.join(XMLSerializer()(elem.generate())) |
1 | 176 <div><a href="foo"/><br/><hr noshade="True"/></div> |
177 """ | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
178 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
179 _PRESERVE_SPACE = frozenset() |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
180 |
500 | 181 def __init__(self, doctype=None, strip_whitespace=True, |
182 namespace_prefixes=None): | |
85 | 183 """Initialize the XML serializer. |
184 | |
500 | 185 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
186 DOCTYPE declaration that should be included at the top | |
187 of the generated output, or the name of a DOCTYPE as | |
188 defined in `DocType.get` | |
189 :param strip_whitespace: whether extraneous whitespace should be | |
190 stripped from the output | |
191 :note: Changed in 0.4.2: The `doctype` parameter can now be a string. | |
85 | 192 """ |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
193 self.filters = [EmptyTagFilter()] |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
194 if strip_whitespace: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
195 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
500 | 196 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
197 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
198 self.filters.append(DocTypeInserter(doctype)) |
1 | 199 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
200 def __call__(self, stream): |
500 | 201 have_decl = have_doctype = False |
202 in_cdata = False | |
203 | |
204 for filter_ in self.filters: | |
205 stream = filter_(stream) | |
206 for kind, data, pos in stream: | |
207 | |
208 if kind is START or kind is EMPTY: | |
209 tag, attrib = data | |
210 buf = ['<', tag] | |
211 for attr, value in attrib: | |
212 buf += [' ', attr, '="', escape(value), '"'] | |
213 buf.append(kind is EMPTY and '/>' or '>') | |
214 yield Markup(u''.join(buf)) | |
215 | |
216 elif kind is END: | |
217 yield Markup('</%s>' % data) | |
218 | |
219 elif kind is TEXT: | |
220 if in_cdata: | |
221 yield data | |
222 else: | |
223 yield escape(data, quotes=False) | |
224 | |
225 elif kind is COMMENT: | |
226 yield Markup('<!--%s-->' % data) | |
227 | |
228 elif kind is XML_DECL and not have_decl: | |
229 version, encoding, standalone = data | |
230 buf = ['<?xml version="%s"' % version] | |
231 if encoding: | |
232 buf.append(' encoding="%s"' % encoding) | |
233 if standalone != -1: | |
234 standalone = standalone and 'yes' or 'no' | |
235 buf.append(' standalone="%s"' % standalone) | |
236 buf.append('?>\n') | |
237 yield Markup(u''.join(buf)) | |
238 have_decl = True | |
239 | |
240 elif kind is DOCTYPE and not have_doctype: | |
241 name, pubid, sysid = data | |
242 buf = ['<!DOCTYPE %s'] | |
243 if pubid: | |
244 buf.append(' PUBLIC "%s"') | |
245 elif sysid: | |
246 buf.append(' SYSTEM') | |
247 if sysid: | |
248 buf.append(' "%s"') | |
249 buf.append('>\n') | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
250 yield Markup(u''.join(buf)) % filter(None, data) |
500 | 251 have_doctype = True |
252 | |
253 elif kind is START_CDATA: | |
254 yield Markup('<![CDATA[') | |
255 in_cdata = True | |
256 | |
257 elif kind is END_CDATA: | |
258 yield Markup(']]>') | |
259 in_cdata = False | |
260 | |
261 elif kind is PI: | |
262 yield Markup('<?%s %s?>' % data) | |
263 | |
264 | |
265 class XHTMLSerializer(XMLSerializer): | |
266 """Produces XHTML text from an event stream. | |
267 | |
268 >>> from genshi.builder import tag | |
269 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) | |
270 >>> print ''.join(XHTMLSerializer()(elem.generate())) | |
271 <div><a href="foo"></a><br /><hr noshade="noshade" /></div> | |
272 """ | |
273 | |
274 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
275 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
276 'param']) | |
277 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
278 'defer', 'disabled', 'ismap', 'multiple', | |
279 'nohref', 'noresize', 'noshade', 'nowrap']) | |
280 _PRESERVE_SPACE = frozenset([ | |
281 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), | |
282 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') | |
283 ]) | |
284 | |
285 def __init__(self, doctype=None, strip_whitespace=True, | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
286 namespace_prefixes=None, drop_xml_decl=True): |
500 | 287 super(XHTMLSerializer, self).__init__(doctype, False) |
288 self.filters = [EmptyTagFilter()] | |
289 if strip_whitespace: | |
290 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | |
291 namespace_prefixes = namespace_prefixes or {} | |
292 namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' | |
293 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
294 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
295 self.filters.append(DocTypeInserter(doctype)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
296 self.drop_xml_decl = drop_xml_decl |
500 | 297 |
298 def __call__(self, stream): | |
299 boolean_attrs = self._BOOLEAN_ATTRS | |
300 empty_elems = self._EMPTY_ELEMS | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
301 drop_xml_decl = self.drop_xml_decl |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
302 have_decl = have_doctype = False |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
303 in_cdata = False |
1 | 304 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
305 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
306 stream = filter_(stream) |
1 | 307 for kind, data, pos in stream: |
308 | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
309 if kind is START or kind is EMPTY: |
1 | 310 tag, attrib = data |
500 | 311 buf = ['<', tag] |
312 for attr, value in attrib: | |
313 if attr in boolean_attrs: | |
314 value = attr | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
315 elif attr == u'xml:lang' and u'lang' not in attrib: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
316 buf += [' lang="', escape(value), '"'] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
317 elif attr == u'xml:space': |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
318 continue |
500 | 319 buf += [' ', attr, '="', escape(value), '"'] |
320 if kind is EMPTY: | |
321 if tag in empty_elems: | |
322 buf.append(' />') | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
323 else: |
500 | 324 buf.append('></%s>' % tag) |
325 else: | |
326 buf.append('>') | |
398 | 327 yield Markup(u''.join(buf)) |
1 | 328 |
69 | 329 elif kind is END: |
500 | 330 yield Markup('</%s>' % data) |
1 | 331 |
69 | 332 elif kind is TEXT: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
333 if in_cdata: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
334 yield data |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
335 else: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
336 yield escape(data, quotes=False) |
1 | 337 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
338 elif kind is COMMENT: |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
339 yield Markup('<!--%s-->' % data) |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
340 |
136 | 341 elif kind is DOCTYPE and not have_doctype: |
342 name, pubid, sysid = data | |
343 buf = ['<!DOCTYPE %s'] | |
344 if pubid: | |
398 | 345 buf.append(' PUBLIC "%s"') |
136 | 346 elif sysid: |
398 | 347 buf.append(' SYSTEM') |
136 | 348 if sysid: |
398 | 349 buf.append(' "%s"') |
350 buf.append('>\n') | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
351 yield Markup(u''.join(buf)) % filter(None, data) |
136 | 352 have_doctype = True |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
353 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
354 elif kind is XML_DECL and not have_decl and not drop_xml_decl: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
355 version, encoding, standalone = data |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
356 buf = ['<?xml version="%s"' % version] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
357 if encoding: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
358 buf.append(' encoding="%s"' % encoding) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
359 if standalone != -1: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
360 standalone = standalone and 'yes' or 'no' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
361 buf.append(' standalone="%s"' % standalone) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
362 buf.append('?>\n') |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
363 yield Markup(u''.join(buf)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
364 have_decl = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
365 |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
366 elif kind is START_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
367 yield Markup('<![CDATA[') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
368 in_cdata = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
369 |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
370 elif kind is END_CDATA: |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
371 yield Markup(']]>') |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
372 in_cdata = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
373 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
374 elif kind is PI: |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
375 yield Markup('<?%s %s?>' % data) |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
376 |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
377 |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
378 class HTMLSerializer(XHTMLSerializer): |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
379 """Produces HTML text from an event stream. |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
380 |
230 | 381 >>> from genshi.builder import tag |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
382 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
383 >>> print ''.join(HTMLSerializer()(elem.generate())) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
384 <div><a href="foo"></a><br><hr noshade></div> |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
385 """ |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
386 |
500 | 387 _NOESCAPE_ELEMS = frozenset([ |
388 QName('script'), QName('http://www.w3.org/1999/xhtml}script'), | |
389 QName('style'), QName('http://www.w3.org/1999/xhtml}style') | |
390 ]) | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
391 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
392 def __init__(self, doctype=None, strip_whitespace=True): |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
393 """Initialize the HTML serializer. |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
394 |
500 | 395 :param doctype: a ``(name, pubid, sysid)`` tuple that represents the |
396 DOCTYPE declaration that should be included at the top | |
397 of the generated output | |
398 :param strip_whitespace: whether extraneous whitespace should be | |
399 stripped from the output | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
400 """ |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
401 super(HTMLSerializer, self).__init__(doctype, False) |
500 | 402 self.filters = [EmptyTagFilter()] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
403 if strip_whitespace: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
404 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
305 | 405 self._NOESCAPE_ELEMS)) |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
406 self.filters.append(NamespaceFlattener(prefixes={ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
407 'http://www.w3.org/1999/xhtml': '' |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
408 })) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
409 if doctype: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
410 self.filters.append(DocTypeInserter(doctype)) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
411 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
412 def __call__(self, stream): |
136 | 413 boolean_attrs = self._BOOLEAN_ATTRS |
414 empty_elems = self._EMPTY_ELEMS | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
415 noescape_elems = self._NOESCAPE_ELEMS |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
416 have_doctype = False |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
417 noescape = False |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
418 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
419 for filter_ in self.filters: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
420 stream = filter_(stream) |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
421 for kind, data, pos in stream: |
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
422 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
423 if kind is START or kind is EMPTY: |
96
35d681a94763
Add an XHTML serialization method. Now really need to get rid of some code duplication in the `markup.output` module.
cmlenz
parents:
89
diff
changeset
|
424 tag, attrib = data |
500 | 425 buf = ['<', tag] |
426 for attr, value in attrib: | |
427 if attr in boolean_attrs: | |
428 if value: | |
429 buf += [' ', attr] | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
430 elif ':' in attr: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
431 if attr == 'xml:lang' and u'lang' not in attrib: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
432 buf += [' lang="', escape(value), '"'] |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
433 elif attr != 'xmlns': |
500 | 434 buf += [' ', attr, '="', escape(value), '"'] |
435 buf.append('>') | |
436 if kind is EMPTY: | |
437 if tag not in empty_elems: | |
438 buf.append('</%s>' % tag) | |
439 yield Markup(u''.join(buf)) | |
440 if tag in noescape_elems: | |
441 noescape = True | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
442 |
69 | 443 elif kind is END: |
500 | 444 yield Markup('</%s>' % data) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
445 noescape = False |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
446 |
69 | 447 elif kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
448 if noescape: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
449 yield data |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
450 else: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
451 yield escape(data, quotes=False) |
1 | 452 |
89
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
453 elif kind is COMMENT: |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
454 yield Markup('<!--%s-->' % data) |
d4c7617900e3
Support comments in templates that are not included in the output, in the same way Kid does: if the comment text starts with a `!` character, it is stripped from the output.
cmlenz
parents:
85
diff
changeset
|
455 |
136 | 456 elif kind is DOCTYPE and not have_doctype: |
457 name, pubid, sysid = data | |
458 buf = ['<!DOCTYPE %s'] | |
459 if pubid: | |
398 | 460 buf.append(' PUBLIC "%s"') |
136 | 461 elif sysid: |
398 | 462 buf.append(' SYSTEM') |
136 | 463 if sysid: |
398 | 464 buf.append(' "%s"') |
465 buf.append('>\n') | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
466 yield Markup(u''.join(buf)) % filter(None, data) |
136 | 467 have_doctype = True |
109
2de3f9d84a1c
Reorder the conditional branches in the serializers so that the more common event kinds are on top.
cmlenz
parents:
105
diff
changeset
|
468 |
105
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
469 elif kind is PI: |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
470 yield Markup('<?%s %s?>' % data) |
334a338847af
Include processing instructions in serialized streams.
cmlenz
parents:
96
diff
changeset
|
471 |
1 | 472 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
473 class TextSerializer(object): |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
474 """Produces plain text from an event stream. |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
475 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
476 Only text events are included in the output. Unlike the other serializer, |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
477 special XML characters are not escaped: |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
478 |
230 | 479 >>> from genshi.builder import tag |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
480 >>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
481 >>> print elem |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
482 <div><a href="foo"><Hello!></a><br/></div> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
483 >>> print ''.join(TextSerializer()(elem.generate())) |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
484 <Hello!> |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
485 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
486 If text events contain literal markup (instances of the `Markup` class), |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
487 that markup is by default passed through unchanged: |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
488 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
489 >>> elem = tag.div(Markup('<a href="foo">Hello & Bye!</a><br/>')) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
490 >>> print elem.generate().render(TextSerializer) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
491 <a href="foo">Hello & Bye!</a><br/> |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
492 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
493 You can use the ``strip_markup`` to change this behavior, so that tags and |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
494 entities are stripped from the output (or in the case of entities, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
495 replaced with the equivalent character): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
496 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
497 >>> print elem.generate().render(TextSerializer, strip_markup=True) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
498 Hello & Bye! |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
499 """ |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
500 |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
501 def __init__(self, strip_markup=False): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
502 """Create the serializer. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
503 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
504 :param strip_markup: whether markup (tags and encoded characters) found |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
505 in the text should be removed |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
506 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
507 self.strip_markup = strip_markup |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
508 |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
509 def __call__(self, stream): |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
510 strip_markup = self.strip_markup |
500 | 511 for event in stream: |
512 if event[0] is TEXT: | |
513 data = event[1] | |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
514 if strip_markup and type(data) is Markup: |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
515 data = data.striptags().stripentities() |
201
0f16c907077e
The `TextSerializer` should produce `unicode` objects, not `Markup` objects.
cmlenz
parents:
200
diff
changeset
|
516 yield unicode(data) |
200
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
517 |
50eab0469148
Add serialization to plain text, based on cboos' patch. Closes #41.
cmlenz
parents:
178
diff
changeset
|
518 |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
519 class EmptyTagFilter(object): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
520 """Combines `START` and `STOP` events into `EMPTY` events for elements that |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
521 have no contents. |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
522 """ |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
523 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
524 EMPTY = StreamEventKind('EMPTY') |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
525 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
526 def __call__(self, stream): |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
527 prev = (None, None, None) |
500 | 528 for ev in stream: |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
529 if prev[0] is START: |
500 | 530 if ev[0] is END: |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
531 prev = EMPTY, prev[1], prev[2] |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
532 yield prev |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
533 continue |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
534 else: |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
535 yield prev |
500 | 536 if ev[0] is not START: |
537 yield ev | |
538 prev = ev | |
212
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
539 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
540 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
541 EMPTY = EmptyTagFilter.EMPTY |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
542 |
e8c43127d9a9
Refactored the handling of empty tags in the serializer: use an `EmptyTagFilter` that combines adjacent start/end events, instead of the generic pushback-iterator.
cmlenz
parents:
201
diff
changeset
|
543 |
500 | 544 class NamespaceFlattener(object): |
545 r"""Output stream filter that removes namespace information from the stream, | |
546 instead adding namespace attributes and prefixes as needed. | |
547 | |
548 :param prefixes: optional mapping of namespace URIs to prefixes | |
549 | |
550 >>> from genshi.input import XML | |
551 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> | |
552 ... <two:item/> | |
553 ... </doc>''') | |
554 >>> for kind, data, pos in NamespaceFlattener()(xml): | |
555 ... print kind, repr(data) | |
556 START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')])) | |
557 TEXT u'\n ' | |
558 START (u'two:item', Attrs()) | |
559 END u'two:item' | |
560 TEXT u'\n' | |
561 END u'doc' | |
562 """ | |
563 | |
564 def __init__(self, prefixes=None): | |
565 self.prefixes = {XML_NAMESPACE.uri: 'xml'} | |
566 if prefixes is not None: | |
567 self.prefixes.update(prefixes) | |
568 | |
569 def __call__(self, stream): | |
570 prefixes = dict([(v, [k]) for k, v in self.prefixes.items()]) | |
571 namespaces = {XML_NAMESPACE.uri: ['xml']} | |
572 def _push_ns(prefix, uri): | |
573 namespaces.setdefault(uri, []).append(prefix) | |
574 prefixes.setdefault(prefix, []).append(uri) | |
575 | |
576 ns_attrs = [] | |
577 _push_ns_attr = ns_attrs.append | |
578 def _make_ns_attr(prefix, uri): | |
579 return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri | |
580 | |
581 def _gen_prefix(): | |
582 val = 0 | |
583 while 1: | |
584 val += 1 | |
585 yield 'ns%d' % val | |
586 _gen_prefix = _gen_prefix().next | |
587 | |
588 for kind, data, pos in stream: | |
589 | |
590 if kind is START or kind is EMPTY: | |
591 tag, attrs = data | |
592 | |
593 tagname = tag.localname | |
594 tagns = tag.namespace | |
595 if tagns: | |
596 if tagns in namespaces: | |
597 prefix = namespaces[tagns][-1] | |
598 if prefix: | |
599 tagname = u'%s:%s' % (prefix, tagname) | |
600 else: | |
601 _push_ns_attr((u'xmlns', tagns)) | |
602 _push_ns('', tagns) | |
603 | |
604 new_attrs = [] | |
605 for attr, value in attrs: | |
606 attrname = attr.localname | |
607 attrns = attr.namespace | |
608 if attrns: | |
609 if attrns not in namespaces: | |
610 prefix = _gen_prefix() | |
611 _push_ns(prefix, attrns) | |
612 _push_ns_attr(('xmlns:%s' % prefix, attrns)) | |
613 else: | |
614 prefix = namespaces[attrns][-1] | |
615 if prefix: | |
616 attrname = u'%s:%s' % (prefix, attrname) | |
617 new_attrs.append((attrname, value)) | |
618 | |
619 yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos | |
620 del ns_attrs[:] | |
621 | |
622 elif kind is END: | |
623 tagname = data.localname | |
624 tagns = data.namespace | |
625 if tagns: | |
626 prefix = namespaces[tagns][-1] | |
627 if prefix: | |
628 tagname = u'%s:%s' % (prefix, tagname) | |
629 yield kind, tagname, pos | |
630 | |
631 elif kind is START_NS: | |
632 prefix, uri = data | |
633 if uri not in namespaces: | |
634 prefix = prefixes.get(uri, [prefix])[-1] | |
635 _push_ns_attr(_make_ns_attr(prefix, uri)) | |
636 _push_ns(prefix, uri) | |
637 | |
638 elif kind is END_NS: | |
639 if data in prefixes: | |
640 uris = prefixes.get(data) | |
641 uri = uris.pop() | |
642 if not uris: | |
643 del prefixes[data] | |
644 if uri not in uris or uri != uris[-1]: | |
645 uri_prefixes = namespaces[uri] | |
646 uri_prefixes.pop() | |
647 if not uri_prefixes: | |
648 del namespaces[uri] | |
649 if ns_attrs: | |
650 attr = _make_ns_attr(data, uri) | |
651 if attr in ns_attrs: | |
652 ns_attrs.remove(attr) | |
653 | |
654 else: | |
655 yield kind, data, pos | |
656 | |
657 | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
658 class WhitespaceFilter(object): |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
659 """A filter that removes extraneous ignorable white space from the |
500 | 660 stream. |
661 """ | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
662 |
305 | 663 def __init__(self, preserve=None, noescape=None): |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
664 """Initialize the filter. |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
665 |
500 | 666 :param preserve: a set or sequence of tag names for which white-space |
667 should be preserved | |
668 :param noescape: a set or sequence of tag names for which text content | |
669 should not be escaped | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
670 |
347 | 671 The `noescape` set is expected to refer to elements that cannot contain |
500 | 672 further child elements (such as ``<style>`` or ``<script>`` in HTML |
673 documents). | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
674 """ |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
675 if preserve is None: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
676 preserve = [] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
677 self.preserve = frozenset(preserve) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
678 if noescape is None: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
679 noescape = [] |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
680 self.noescape = frozenset(noescape) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
681 |
219 | 682 def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'], |
683 trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, | |
684 collapse_lines=re.compile('\n{2,}').sub): | |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
685 mjoin = Markup('').join |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
686 preserve_elems = self.preserve |
347 | 687 preserve = 0 |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
688 noescape_elems = self.noescape |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
689 noescape = False |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
690 |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
691 textbuf = [] |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
692 push_text = textbuf.append |
136 | 693 pop_text = textbuf.pop |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
694 for kind, data, pos in chain(stream, [(None, None, None)]): |
500 | 695 |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
696 if kind is TEXT: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
697 if noescape: |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
698 data = Markup(data) |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
699 push_text(data) |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
700 else: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
701 if textbuf: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
702 if len(textbuf) > 1: |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
703 text = mjoin(textbuf, escape_quotes=False) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
704 del textbuf[:] |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
705 else: |
136 | 706 text = escape(pop_text(), quotes=False) |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
707 if not preserve: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
708 text = collapse_lines('\n', trim_trailing_space('', text)) |
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
709 yield TEXT, Markup(text), pos |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
710 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
711 if kind is START: |
347 | 712 tag, attrs = data |
713 if preserve or (tag in preserve_elems or | |
714 attrs.get(space) == 'preserve'): | |
715 preserve += 1 | |
219 | 716 if not noescape and tag in noescape_elems: |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
717 noescape = True |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
718 |
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
719 elif kind is END: |
347 | 720 noescape = False |
721 if preserve: | |
722 preserve -= 1 | |
141
b3ceaa35fb6b
* No escaping of `<script>` or `<style>` tags in HTML output (see #24)
cmlenz
parents:
140
diff
changeset
|
723 |
305 | 724 elif kind is START_CDATA: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
725 noescape = True |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
726 |
305 | 727 elif kind is END_CDATA: |
143
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
728 noescape = False |
ef761afcedff
CDATA sections in XML input now appear as CDATA sections in the output. This should address the problem with escaping the contents of `<style>` and `<script>` elements, which would only get interpreted correctly if the output was served as `application/xhtml+xml`. Closes #24.
cmlenz
parents:
141
diff
changeset
|
729 |
136 | 730 if kind: |
123
93bbdcf9428b
Fix for #18: whitespace in space-sensitive elements such as `<pre>` and `<textarea>` is now preserved.
cmlenz
parents:
109
diff
changeset
|
731 yield kind, data, pos |
820
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
732 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
733 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
734 class DocTypeInserter(object): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
735 """A filter that inserts the DOCTYPE declaration in the correct location, |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
736 after the XML declaration. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
737 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
738 def __init__(self, doctype): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
739 """Initialize the filter. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
740 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
741 :param doctype: DOCTYPE as a string or DocType object. |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
742 """ |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
743 if isinstance(doctype, basestring): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
744 doctype = DocType.get(doctype) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
745 self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
746 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
747 def __call__(self, stream): |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
748 doctype_inserted = False |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
749 for kind, data, pos in stream: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
750 if not doctype_inserted: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
751 doctype_inserted = True |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
752 if kind is XML_DECL: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
753 yield (kind, data, pos) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
754 yield self.doctype_event |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
755 continue |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
756 yield self.doctype_event |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
757 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
758 yield (kind, data, pos) |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
759 |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
760 if not doctype_inserted: |
1837f39efd6f
Sync (old) experimental inline branch with trunk@1027.
cmlenz
parents:
500
diff
changeset
|
761 yield self.doctype_event |