Mercurial > genshi > genshi-test
comparison genshi/output.py @ 820:1837f39efd6f experimental-inline
Sync (old) experimental inline branch with trunk@1027.
author | cmlenz |
---|---|
date | Wed, 11 Mar 2009 17:51:06 +0000 |
parents | 0742f421caba |
children | de82830f8816 |
comparison
equal
deleted
inserted
replaced
500:0742f421caba | 820:1837f39efd6f |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # | 2 # |
3 # Copyright (C) 2006-2007 Edgewall Software | 3 # Copyright (C) 2006-2008 Edgewall Software |
4 # All rights reserved. | 4 # All rights reserved. |
5 # | 5 # |
6 # This software is licensed as described in the file COPYING, which | 6 # This software is licensed as described in the file COPYING, which |
7 # you should have received as part of this distribution. The terms | 7 # you should have received as part of this distribution. The terms |
8 # are also available at http://genshi.edgewall.org/wiki/License. | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
14 """This module provides different kinds of serialization methods for XML event | 14 """This module provides different kinds of serialization methods for XML event |
15 streams. | 15 streams. |
16 """ | 16 """ |
17 | 17 |
18 from itertools import chain | 18 from itertools import chain |
19 try: | |
20 frozenset | |
21 except NameError: | |
22 from sets import ImmutableSet as frozenset | |
23 import re | 19 import re |
24 | 20 |
25 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind | 21 from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind |
26 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ | 22 from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ |
27 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE | 23 START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE |
28 | 24 |
29 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', | 25 __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', |
30 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] | 26 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] |
31 __docformat__ = 'restructuredtext en' | 27 __docformat__ = 'restructuredtext en' |
32 | 28 |
33 def encode(iterator, method='xml', encoding='utf-8'): | 29 def encode(iterator, method='xml', encoding='utf-8', out=None): |
34 """Encode serializer output into a string. | 30 """Encode serializer output into a string. |
35 | 31 |
36 :param iterator: the iterator returned from serializing a stream (basically | 32 :param iterator: the iterator returned from serializing a stream (basically |
37 any iterator that yields unicode objects) | 33 any iterator that yields unicode objects) |
38 :param method: the serialization method; determines how characters not | 34 :param method: the serialization method; determines how characters not |
39 representable in the specified encoding are treated | 35 representable in the specified encoding are treated |
40 :param encoding: how the output string should be encoded; if set to `None`, | 36 :param encoding: how the output string should be encoded; if set to `None`, |
41 this method returns a `unicode` object | 37 this method returns a `unicode` object |
42 :return: a string or unicode object (depending on the `encoding` parameter) | 38 :param out: a file-like object that the output should be written to |
39 instead of being returned as one big string; note that if | |
40 this is a file or socket (or similar), the `encoding` must | |
41 not be `None` (that is, the output must be encoded) | |
42 :return: a `str` or `unicode` object (depending on the `encoding` | |
43 parameter), or `None` if the `out` parameter is provided | |
44 | |
43 :since: version 0.4.1 | 45 :since: version 0.4.1 |
44 """ | 46 :note: Changed in 0.5: added the `out` parameter |
45 output = u''.join(list(iterator)) | 47 """ |
46 if encoding is not None: | 48 if encoding is not None: |
47 errors = 'replace' | 49 errors = 'replace' |
48 if method != 'text' and not isinstance(method, TextSerializer): | 50 if method != 'text' and not isinstance(method, TextSerializer): |
49 errors = 'xmlcharrefreplace' | 51 errors = 'xmlcharrefreplace' |
50 return output.encode(encoding, errors) | 52 _encode = lambda string: string.encode(encoding, errors) |
51 return output | 53 else: |
54 _encode = lambda string: string | |
55 if out is None: | |
56 return _encode(u''.join(list(iterator))) | |
57 for chunk in iterator: | |
58 out.write(_encode(chunk)) | |
52 | 59 |
53 def get_serializer(method='xml', **kwargs): | 60 def get_serializer(method='xml', **kwargs): |
54 """Return a serializer object for the given method. | 61 """Return a serializer object for the given method. |
55 | 62 |
56 :param method: the serialization method; can be either "xml", "xhtml", | 63 :param method: the serialization method; can be either "xml", "xhtml", |
100 XHTML_FRAMESET = ( | 107 XHTML_FRAMESET = ( |
101 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', | 108 'html', '-//W3C//DTD XHTML 1.0 Frameset//EN', |
102 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' | 109 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd' |
103 ) | 110 ) |
104 XHTML = XHTML_STRICT | 111 XHTML = XHTML_STRICT |
112 | |
113 XHTML11 = ( | |
114 'html', '-//W3C//DTD XHTML 1.1//EN', | |
115 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' | |
116 ) | |
117 | |
118 SVG_FULL = ( | |
119 'svg', '-//W3C//DTD SVG 1.1//EN', | |
120 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd' | |
121 ) | |
122 SVG_BASIC = ( | |
123 'svg', '-//W3C//DTD SVG Basic 1.1//EN', | |
124 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd' | |
125 ) | |
126 SVG_TINY = ( | |
127 'svg', '-//W3C//DTD SVG Tiny 1.1//EN', | |
128 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd' | |
129 ) | |
130 SVG = SVG_FULL | |
105 | 131 |
106 def get(cls, name): | 132 def get(cls, name): |
107 """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` | 133 """Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE`` |
108 declaration for the specified name. | 134 declaration for the specified name. |
109 | 135 |
110 The following names are recognized in this version: | 136 The following names are recognized in this version: |
111 * "html" or "html-strict" for the HTML 4.01 strict DTD | 137 * "html" or "html-strict" for the HTML 4.01 strict DTD |
112 * "html-transitional" for the HTML 4.01 transitional DTD | 138 * "html-transitional" for the HTML 4.01 transitional DTD |
113 * "html-transitional" for the HTML 4.01 frameset DTD | 139 * "html-frameset" for the HTML 4.01 frameset DTD |
114 * "html5" for the ``DOCTYPE`` proposed for HTML5 | 140 * "html5" for the ``DOCTYPE`` proposed for HTML5 |
115 * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD | 141 * "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD |
116 * "xhtml-transitional" for the XHTML 1.0 transitional DTD | 142 * "xhtml-transitional" for the XHTML 1.0 transitional DTD |
117 * "xhtml-frameset" for the XHTML 1.0 frameset DTD | 143 * "xhtml-frameset" for the XHTML 1.0 frameset DTD |
144 * "xhtml11" for the XHTML 1.1 DTD | |
145 * "svg" or "svg-full" for the SVG 1.1 DTD | |
146 * "svg-basic" for the SVG Basic 1.1 DTD | |
147 * "svg-tiny" for the SVG Tiny 1.1 DTD | |
118 | 148 |
119 :param name: the name of the ``DOCTYPE`` | 149 :param name: the name of the ``DOCTYPE`` |
120 :return: the ``(name, pubid, sysid)`` tuple for the requested | 150 :return: the ``(name, pubid, sysid)`` tuple for the requested |
121 ``DOCTYPE``, or ``None`` if the name is not recognized | 151 ``DOCTYPE``, or ``None`` if the name is not recognized |
122 :since: version 0.4.1 | 152 :since: version 0.4.1 |
127 'html-frameset': DocType.HTML_FRAMESET, | 157 'html-frameset': DocType.HTML_FRAMESET, |
128 'html5': cls.HTML5, | 158 'html5': cls.HTML5, |
129 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, | 159 'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT, |
130 'xhtml-transitional': cls.XHTML_TRANSITIONAL, | 160 'xhtml-transitional': cls.XHTML_TRANSITIONAL, |
131 'xhtml-frameset': cls.XHTML_FRAMESET, | 161 'xhtml-frameset': cls.XHTML_FRAMESET, |
162 'xhtml11': cls.XHTML11, | |
163 'svg': cls.SVG, 'svg-full': cls.SVG_FULL, | |
164 'svg-basic': cls.SVG_BASIC, | |
165 'svg-tiny': cls.SVG_TINY | |
132 }.get(name.lower()) | 166 }.get(name.lower()) |
133 get = classmethod(get) | 167 get = classmethod(get) |
134 | 168 |
135 | 169 |
136 class XMLSerializer(object): | 170 class XMLSerializer(object): |
154 defined in `DocType.get` | 188 defined in `DocType.get` |
155 :param strip_whitespace: whether extraneous whitespace should be | 189 :param strip_whitespace: whether extraneous whitespace should be |
156 stripped from the output | 190 stripped from the output |
157 :note: Changed in 0.4.2: The `doctype` parameter can now be a string. | 191 :note: Changed in 0.4.2: The `doctype` parameter can now be a string. |
158 """ | 192 """ |
159 self.preamble = [] | |
160 if doctype: | |
161 if isinstance(doctype, basestring): | |
162 doctype = DocType.get(doctype) | |
163 self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) | |
164 self.filters = [EmptyTagFilter()] | 193 self.filters = [EmptyTagFilter()] |
165 if strip_whitespace: | 194 if strip_whitespace: |
166 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | 195 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
167 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) | 196 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) |
197 if doctype: | |
198 self.filters.append(DocTypeInserter(doctype)) | |
168 | 199 |
169 def __call__(self, stream): | 200 def __call__(self, stream): |
170 have_decl = have_doctype = False | 201 have_decl = have_doctype = False |
171 in_cdata = False | 202 in_cdata = False |
172 | 203 |
173 stream = chain(self.preamble, stream) | |
174 for filter_ in self.filters: | 204 for filter_ in self.filters: |
175 stream = filter_(stream) | 205 stream = filter_(stream) |
176 for kind, data, pos in stream: | 206 for kind, data, pos in stream: |
177 | 207 |
178 if kind is START or kind is EMPTY: | 208 if kind is START or kind is EMPTY: |
215 elif sysid: | 245 elif sysid: |
216 buf.append(' SYSTEM') | 246 buf.append(' SYSTEM') |
217 if sysid: | 247 if sysid: |
218 buf.append(' "%s"') | 248 buf.append(' "%s"') |
219 buf.append('>\n') | 249 buf.append('>\n') |
220 yield Markup(u''.join(buf), *filter(None, data)) | 250 yield Markup(u''.join(buf)) % filter(None, data) |
221 have_doctype = True | 251 have_doctype = True |
222 | 252 |
223 elif kind is START_CDATA: | 253 elif kind is START_CDATA: |
224 yield Markup('<![CDATA[') | 254 yield Markup('<![CDATA[') |
225 in_cdata = True | 255 in_cdata = True |
251 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), | 281 QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), |
252 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') | 282 QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') |
253 ]) | 283 ]) |
254 | 284 |
255 def __init__(self, doctype=None, strip_whitespace=True, | 285 def __init__(self, doctype=None, strip_whitespace=True, |
256 namespace_prefixes=None): | 286 namespace_prefixes=None, drop_xml_decl=True): |
257 super(XHTMLSerializer, self).__init__(doctype, False) | 287 super(XHTMLSerializer, self).__init__(doctype, False) |
258 self.filters = [EmptyTagFilter()] | 288 self.filters = [EmptyTagFilter()] |
259 if strip_whitespace: | 289 if strip_whitespace: |
260 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) | 290 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) |
261 namespace_prefixes = namespace_prefixes or {} | 291 namespace_prefixes = namespace_prefixes or {} |
262 namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' | 292 namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' |
263 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) | 293 self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) |
294 if doctype: | |
295 self.filters.append(DocTypeInserter(doctype)) | |
296 self.drop_xml_decl = drop_xml_decl | |
264 | 297 |
265 def __call__(self, stream): | 298 def __call__(self, stream): |
266 boolean_attrs = self._BOOLEAN_ATTRS | 299 boolean_attrs = self._BOOLEAN_ATTRS |
267 empty_elems = self._EMPTY_ELEMS | 300 empty_elems = self._EMPTY_ELEMS |
268 have_doctype = False | 301 drop_xml_decl = self.drop_xml_decl |
302 have_decl = have_doctype = False | |
269 in_cdata = False | 303 in_cdata = False |
270 | 304 |
271 stream = chain(self.preamble, stream) | |
272 for filter_ in self.filters: | 305 for filter_ in self.filters: |
273 stream = filter_(stream) | 306 stream = filter_(stream) |
274 for kind, data, pos in stream: | 307 for kind, data, pos in stream: |
275 | 308 |
276 if kind is START or kind is EMPTY: | 309 if kind is START or kind is EMPTY: |
277 tag, attrib = data | 310 tag, attrib = data |
278 buf = ['<', tag] | 311 buf = ['<', tag] |
279 for attr, value in attrib: | 312 for attr, value in attrib: |
280 if attr in boolean_attrs: | 313 if attr in boolean_attrs: |
281 value = attr | 314 value = attr |
315 elif attr == u'xml:lang' and u'lang' not in attrib: | |
316 buf += [' lang="', escape(value), '"'] | |
317 elif attr == u'xml:space': | |
318 continue | |
282 buf += [' ', attr, '="', escape(value), '"'] | 319 buf += [' ', attr, '="', escape(value), '"'] |
283 if kind is EMPTY: | 320 if kind is EMPTY: |
284 if tag in empty_elems: | 321 if tag in empty_elems: |
285 buf.append(' />') | 322 buf.append(' />') |
286 else: | 323 else: |
309 elif sysid: | 346 elif sysid: |
310 buf.append(' SYSTEM') | 347 buf.append(' SYSTEM') |
311 if sysid: | 348 if sysid: |
312 buf.append(' "%s"') | 349 buf.append(' "%s"') |
313 buf.append('>\n') | 350 buf.append('>\n') |
314 yield Markup(u''.join(buf), *filter(None, data)) | 351 yield Markup(u''.join(buf)) % filter(None, data) |
315 have_doctype = True | 352 have_doctype = True |
353 | |
354 elif kind is XML_DECL and not have_decl and not drop_xml_decl: | |
355 version, encoding, standalone = data | |
356 buf = ['<?xml version="%s"' % version] | |
357 if encoding: | |
358 buf.append(' encoding="%s"' % encoding) | |
359 if standalone != -1: | |
360 standalone = standalone and 'yes' or 'no' | |
361 buf.append(' standalone="%s"' % standalone) | |
362 buf.append('?>\n') | |
363 yield Markup(u''.join(buf)) | |
364 have_decl = True | |
316 | 365 |
317 elif kind is START_CDATA: | 366 elif kind is START_CDATA: |
318 yield Markup('<![CDATA[') | 367 yield Markup('<![CDATA[') |
319 in_cdata = True | 368 in_cdata = True |
320 | 369 |
352 super(HTMLSerializer, self).__init__(doctype, False) | 401 super(HTMLSerializer, self).__init__(doctype, False) |
353 self.filters = [EmptyTagFilter()] | 402 self.filters = [EmptyTagFilter()] |
354 if strip_whitespace: | 403 if strip_whitespace: |
355 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, | 404 self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, |
356 self._NOESCAPE_ELEMS)) | 405 self._NOESCAPE_ELEMS)) |
357 self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml')) | 406 self.filters.append(NamespaceFlattener(prefixes={ |
407 'http://www.w3.org/1999/xhtml': '' | |
408 })) | |
409 if doctype: | |
410 self.filters.append(DocTypeInserter(doctype)) | |
358 | 411 |
359 def __call__(self, stream): | 412 def __call__(self, stream): |
360 boolean_attrs = self._BOOLEAN_ATTRS | 413 boolean_attrs = self._BOOLEAN_ATTRS |
361 empty_elems = self._EMPTY_ELEMS | 414 empty_elems = self._EMPTY_ELEMS |
362 noescape_elems = self._NOESCAPE_ELEMS | 415 noescape_elems = self._NOESCAPE_ELEMS |
363 have_doctype = False | 416 have_doctype = False |
364 noescape = False | 417 noescape = False |
365 | 418 |
366 stream = chain(self.preamble, stream) | |
367 for filter_ in self.filters: | 419 for filter_ in self.filters: |
368 stream = filter_(stream) | 420 stream = filter_(stream) |
369 for kind, data, pos in stream: | 421 for kind, data, pos in stream: |
370 | 422 |
371 if kind is START or kind is EMPTY: | 423 if kind is START or kind is EMPTY: |
373 buf = ['<', tag] | 425 buf = ['<', tag] |
374 for attr, value in attrib: | 426 for attr, value in attrib: |
375 if attr in boolean_attrs: | 427 if attr in boolean_attrs: |
376 if value: | 428 if value: |
377 buf += [' ', attr] | 429 buf += [' ', attr] |
378 else: | 430 elif ':' in attr: |
431 if attr == 'xml:lang' and u'lang' not in attrib: | |
432 buf += [' lang="', escape(value), '"'] | |
433 elif attr != 'xmlns': | |
379 buf += [' ', attr, '="', escape(value), '"'] | 434 buf += [' ', attr, '="', escape(value), '"'] |
380 buf.append('>') | 435 buf.append('>') |
381 if kind is EMPTY: | 436 if kind is EMPTY: |
382 if tag not in empty_elems: | 437 if tag not in empty_elems: |
383 buf.append('</%s>' % tag) | 438 buf.append('</%s>' % tag) |
406 elif sysid: | 461 elif sysid: |
407 buf.append(' SYSTEM') | 462 buf.append(' SYSTEM') |
408 if sysid: | 463 if sysid: |
409 buf.append(' "%s"') | 464 buf.append(' "%s"') |
410 buf.append('>\n') | 465 buf.append('>\n') |
411 yield Markup(u''.join(buf), *filter(None, data)) | 466 yield Markup(u''.join(buf)) % filter(None, data) |
412 have_doctype = True | 467 have_doctype = True |
413 | 468 |
414 elif kind is PI: | 469 elif kind is PI: |
415 yield Markup('<?%s %s?>' % data) | 470 yield Markup('<?%s %s?>' % data) |
416 | 471 |
427 <div><a href="foo"><Hello!></a><br/></div> | 482 <div><a href="foo"><Hello!></a><br/></div> |
428 >>> print ''.join(TextSerializer()(elem.generate())) | 483 >>> print ''.join(TextSerializer()(elem.generate())) |
429 <Hello!> | 484 <Hello!> |
430 | 485 |
431 If text events contain literal markup (instances of the `Markup` class), | 486 If text events contain literal markup (instances of the `Markup` class), |
432 tags or entities are stripped from the output: | 487 that markup is by default passed through unchanged: |
433 | 488 |
434 >>> elem = tag.div(Markup('<a href="foo">Hello!</a><br/>')) | 489 >>> elem = tag.div(Markup('<a href="foo">Hello & Bye!</a><br/>')) |
435 >>> print elem | 490 >>> print elem.generate().render(TextSerializer) |
436 <div><a href="foo">Hello!</a><br/></div> | 491 <a href="foo">Hello & Bye!</a><br/> |
437 >>> print ''.join(TextSerializer()(elem.generate())) | 492 |
438 Hello! | 493 You can use the ``strip_markup`` to change this behavior, so that tags and |
439 """ | 494 entities are stripped from the output (or in the case of entities, |
495 replaced with the equivalent character): | |
496 | |
497 >>> print elem.generate().render(TextSerializer, strip_markup=True) | |
498 Hello & Bye! | |
499 """ | |
500 | |
501 def __init__(self, strip_markup=False): | |
502 """Create the serializer. | |
503 | |
504 :param strip_markup: whether markup (tags and encoded characters) found | |
505 in the text should be removed | |
506 """ | |
507 self.strip_markup = strip_markup | |
440 | 508 |
441 def __call__(self, stream): | 509 def __call__(self, stream): |
510 strip_markup = self.strip_markup | |
442 for event in stream: | 511 for event in stream: |
443 if event[0] is TEXT: | 512 if event[0] is TEXT: |
444 data = event[1] | 513 data = event[1] |
445 if type(data) is Markup: | 514 if strip_markup and type(data) is Markup: |
446 data = data.striptags().stripentities() | 515 data = data.striptags().stripentities() |
447 yield unicode(data) | 516 yield unicode(data) |
448 | 517 |
449 | 518 |
450 class EmptyTagFilter(object): | 519 class EmptyTagFilter(object): |
582 if attr in ns_attrs: | 651 if attr in ns_attrs: |
583 ns_attrs.remove(attr) | 652 ns_attrs.remove(attr) |
584 | 653 |
585 else: | 654 else: |
586 yield kind, data, pos | 655 yield kind, data, pos |
587 | |
588 | |
589 class NamespaceStripper(object): | |
590 r"""Stream filter that removes all namespace information from a stream, and | |
591 optionally strips out all tags not in a given namespace. | |
592 | |
593 :param namespace: the URI of the namespace that should not be stripped. If | |
594 not set, only elements with no namespace are included in | |
595 the output. | |
596 | |
597 >>> from genshi.input import XML | |
598 >>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2"> | |
599 ... <two:item/> | |
600 ... </doc>''') | |
601 >>> for kind, data, pos in NamespaceStripper(Namespace('NS1'))(xml): | |
602 ... print kind, repr(data) | |
603 START (u'doc', Attrs()) | |
604 TEXT u'\n ' | |
605 TEXT u'\n' | |
606 END u'doc' | |
607 """ | |
608 | |
609 def __init__(self, namespace=None): | |
610 if namespace is not None: | |
611 self.namespace = Namespace(namespace) | |
612 else: | |
613 self.namespace = {} | |
614 | |
615 def __call__(self, stream): | |
616 namespace = self.namespace | |
617 | |
618 for kind, data, pos in stream: | |
619 | |
620 if kind is START or kind is EMPTY: | |
621 tag, attrs = data | |
622 if tag.namespace and tag not in namespace: | |
623 continue | |
624 | |
625 new_attrs = [] | |
626 for attr, value in attrs: | |
627 if not attr.namespace or attr in namespace: | |
628 new_attrs.append((attr, value)) | |
629 | |
630 data = tag.localname, Attrs(new_attrs) | |
631 | |
632 elif kind is END: | |
633 if data.namespace and data not in namespace: | |
634 continue | |
635 data = data.localname | |
636 | |
637 elif kind is START_NS or kind is END_NS: | |
638 continue | |
639 | |
640 yield kind, data, pos | |
641 | 656 |
642 | 657 |
643 class WhitespaceFilter(object): | 658 class WhitespaceFilter(object): |
644 """A filter that removes extraneous ignorable white space from the | 659 """A filter that removes extraneous ignorable white space from the |
645 stream. | 660 stream. |
712 elif kind is END_CDATA: | 727 elif kind is END_CDATA: |
713 noescape = False | 728 noescape = False |
714 | 729 |
715 if kind: | 730 if kind: |
716 yield kind, data, pos | 731 yield kind, data, pos |
732 | |
733 | |
734 class DocTypeInserter(object): | |
735 """A filter that inserts the DOCTYPE declaration in the correct location, | |
736 after the XML declaration. | |
737 """ | |
738 def __init__(self, doctype): | |
739 """Initialize the filter. | |
740 | |
741 :param doctype: DOCTYPE as a string or DocType object. | |
742 """ | |
743 if isinstance(doctype, basestring): | |
744 doctype = DocType.get(doctype) | |
745 self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) | |
746 | |
747 def __call__(self, stream): | |
748 doctype_inserted = False | |
749 for kind, data, pos in stream: | |
750 if not doctype_inserted: | |
751 doctype_inserted = True | |
752 if kind is XML_DECL: | |
753 yield (kind, data, pos) | |
754 yield self.doctype_event | |
755 continue | |
756 yield self.doctype_event | |
757 | |
758 yield (kind, data, pos) | |
759 | |
760 if not doctype_inserted: | |
761 yield self.doctype_event |