Mercurial > genshi > mirror
annotate markup/output.py @ 73:1da51d718391 trunk
Some more performance tweaks.
author | cmlenz |
---|---|
date | Wed, 12 Jul 2006 18:47:39 +0000 |
parents | c40a5dcd2b55 |
children | 4938c310d904 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
59eb24184e9c
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
14 """This module provides different kinds of serialization methods for XML event | |
15 streams. | |
16 """ | |
17 | |
18 try: | |
19 frozenset | |
20 except NameError: | |
21 from sets import ImmutableSet as frozenset | |
22 | |
73 | 23 from markup.core import escape, Markup, Namespace, QName |
69 | 24 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT |
1 | 25 |
26 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer'] | |
27 | |
28 | |
29 class Serializer(object): | |
30 """Base class for serializers.""" | |
31 | |
32 def serialize(self, stream): | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
33 """Must be implemented by concrete subclasses to serialize the given |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
34 stream. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
35 |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
36 This method must be implemented as a generator, producing the |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
37 serialized output incrementally as unicode strings. |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
38 """ |
1 | 39 raise NotImplementedError |
40 | |
41 | |
42 class XMLSerializer(Serializer): | |
43 """Produces XML text from an event stream. | |
44 | |
45 >>> from markup.builder import tag | |
20 | 46 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
1 | 47 >>> print ''.join(XMLSerializer().serialize(elem.generate())) |
48 <div><a href="foo"/><br/><hr noshade="True"/></div> | |
49 """ | |
50 | |
51 def serialize(self, stream): | |
52 ns_attrib = [] | |
53 ns_mapping = {} | |
54 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
55 stream = _PushbackIterator(stream) |
1 | 56 for kind, data, pos in stream: |
57 | |
69 | 58 if kind is DOCTYPE: |
1 | 59 # FIXME: what if there's no system or public ID in the input? |
60 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data) | |
61 | |
69 | 62 elif kind is START_NS: |
1 | 63 prefix, uri = data |
64 if uri not in ns_mapping: | |
65 ns_mapping[uri] = prefix | |
66 if not prefix: | |
67 ns_attrib.append((QName('xmlns'), uri)) | |
68 else: | |
69 ns_attrib.append((QName('xmlns:%s' % prefix), uri)) | |
70 | |
69 | 71 elif kind is START: |
1 | 72 tag, attrib = data |
73 | |
74 tagname = tag.localname | |
75 if tag.namespace: | |
76 try: | |
77 prefix = ns_mapping[tag.namespace] | |
78 if prefix: | |
69 | 79 tagname = '%s:%s' % (prefix, tag.localname) |
1 | 80 except KeyError: |
81 ns_attrib.append((QName('xmlns'), tag.namespace)) | |
69 | 82 buf = ['<%s' % tagname] |
1 | 83 |
84 if ns_attrib: | |
85 attrib.extend(ns_attrib) | |
86 ns_attrib = [] | |
87 for attr, value in attrib: | |
88 attrname = attr.localname | |
89 if attr.namespace: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
90 prefix = ns_mapping.get(attr.namespace) |
1 | 91 if prefix: |
69 | 92 attrname = '%s:%s' % (prefix, attrname) |
73 | 93 buf.append(' %s="%s"' % (attrname, escape(value))) |
1 | 94 |
95 kind, data, pos = stream.next() | |
69 | 96 if kind is END: |
1 | 97 buf.append('/>') |
98 else: | |
99 buf.append('>') | |
100 stream.pushback((kind, data, pos)) | |
101 | |
102 yield Markup(''.join(buf)) | |
103 | |
69 | 104 elif kind is END: |
1 | 105 tag = data |
106 tagname = tag.localname | |
107 if tag.namespace: | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
108 prefix = ns_mapping.get(tag.namespace) |
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
109 if prefix: |
69 | 110 tagname = '%s:%s' % (prefix, tag.localname) |
1 | 111 yield Markup('</%s>' % tagname) |
112 | |
69 | 113 elif kind is TEXT: |
73 | 114 yield escape(data, quotes=False) |
1 | 115 |
116 | |
117 class HTMLSerializer(Serializer): | |
118 """Produces HTML text from an event stream. | |
119 | |
120 >>> from markup.builder import tag | |
20 | 121 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) |
1 | 122 >>> print ''.join(HTMLSerializer().serialize(elem.generate())) |
123 <div><a href="foo"></a><br><hr noshade></div> | |
124 """ | |
125 | |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
126 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') |
1 | 127 |
128 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
129 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
130 'param']) | |
131 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', | |
132 'defer', 'disabled', 'ismap', 'multiple', | |
133 'nohref', 'noresize', 'noshade', 'nowrap']) | |
134 | |
135 def serialize(self, stream): | |
136 ns_mapping = {} | |
137 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
138 stream = _PushbackIterator(stream) |
1 | 139 for kind, data, pos in stream: |
140 | |
69 | 141 if kind is DOCTYPE: |
1 | 142 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data) |
143 | |
69 | 144 elif kind is START_NS: |
1 | 145 prefix, uri = data |
146 if uri not in ns_mapping: | |
147 ns_mapping[uri] = prefix | |
148 | |
69 | 149 elif kind is START: |
1 | 150 tag, attrib = data |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
151 if tag.namespace and tag not in self.NAMESPACE: |
1 | 152 continue # not in the HTML namespace, so don't emit |
153 buf = ['<', tag.localname] | |
154 for attr, value in attrib: | |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
155 if attr.namespace and attr not in self.NAMESPACE: |
1 | 156 continue # not in the HTML namespace, so don't emit |
157 if attr.localname in self._BOOLEAN_ATTRS: | |
158 if value: | |
159 buf.append(' %s' % attr.localname) | |
160 else: | |
73 | 161 buf.append(' %s="%s"' % (attr.localname, escape(value))) |
1 | 162 |
163 if tag.localname in self._EMPTY_ELEMS: | |
164 kind, data, pos = stream.next() | |
69 | 165 if kind is not END: |
1 | 166 stream.pushback((kind, data, pos)) |
167 | |
168 yield Markup(''.join(buf + ['>'])) | |
169 | |
69 | 170 elif kind is END: |
1 | 171 tag = data |
18
5420cfe42d36
Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents:
1
diff
changeset
|
172 if tag.namespace and tag not in self.NAMESPACE: |
1 | 173 continue # not in the HTML namespace, so don't emit |
174 yield Markup('</%s>' % tag.localname) | |
175 | |
69 | 176 elif kind is TEXT: |
73 | 177 yield escape(data, quotes=False) |
1 | 178 |
179 | |
26
3c1a022be04c
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
20
diff
changeset
|
180 class _PushbackIterator(object): |
1 | 181 """A simple wrapper for iterators that allows pushing items back on the |
182 queue via the `pushback()` method. | |
183 | |
184 That can effectively be used to peek at the next item.""" | |
185 __slots__ = ['iterable', 'buf'] | |
186 | |
187 def __init__(self, iterable): | |
188 self.iterable = iter(iterable) | |
189 self.buf = [] | |
190 | |
191 def __iter__(self): | |
192 return self | |
193 | |
194 def next(self): | |
195 if self.buf: | |
196 return self.buf.pop(0) | |
197 return self.iterable.next() | |
198 | |
199 def pushback(self, item): | |
200 self.buf.append(item) |