annotate markup/output.py @ 73:1da51d718391 trunk

Some more performance tweaks.
author cmlenz
date Wed, 12 Jul 2006 18:47:39 +0000
parents c40a5dcd2b55
children 4938c310d904
rev   line source
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
2 #
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
3 # Copyright (C) 2006 Edgewall Software
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
5 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
8 # are also available at http://markup.edgewall.org/wiki/License.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
9 #
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
66
59eb24184e9c Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents: 27
diff changeset
12 # history and logs, available at http://markup.edgewall.org/log/.
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
13
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
14 """This module provides different kinds of serialization methods for XML event
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
15 streams.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
16 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
17
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
18 try:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
19 frozenset
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
20 except NameError:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
21 from sets import ImmutableSet as frozenset
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
22
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
23 from markup.core import escape, Markup, Namespace, QName
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
24 from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
25
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
26 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
27
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
28
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
29 class Serializer(object):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
30 """Base class for serializers."""
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
31
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
32 def serialize(self, stream):
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
33 """Must be implemented by concrete subclasses to serialize the given
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
34 stream.
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
35
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
36 This method must be implemented as a generator, producing the
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
37 serialized output incrementally as unicode strings.
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
38 """
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
39 raise NotImplementedError
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
40
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
41
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
42 class XMLSerializer(Serializer):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
43 """Produces XML text from an event stream.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
44
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
45 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
46 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
47 >>> print ''.join(XMLSerializer().serialize(elem.generate()))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
48 <div><a href="foo"/><br/><hr noshade="True"/></div>
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
49 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
50
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
51 def serialize(self, stream):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
52 ns_attrib = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
53 ns_mapping = {}
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
54
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
55 stream = _PushbackIterator(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
56 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
57
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
58 if kind is DOCTYPE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
59 # FIXME: what if there's no system or public ID in the input?
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
60 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
61
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
62 elif kind is START_NS:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
63 prefix, uri = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
64 if uri not in ns_mapping:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
65 ns_mapping[uri] = prefix
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
66 if not prefix:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
67 ns_attrib.append((QName('xmlns'), uri))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
68 else:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
69 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
70
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
71 elif kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
72 tag, attrib = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
73
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
74 tagname = tag.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
75 if tag.namespace:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
76 try:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
77 prefix = ns_mapping[tag.namespace]
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
78 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
79 tagname = '%s:%s' % (prefix, tag.localname)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
80 except KeyError:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
81 ns_attrib.append((QName('xmlns'), tag.namespace))
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
82 buf = ['<%s' % tagname]
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
83
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
84 if ns_attrib:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
85 attrib.extend(ns_attrib)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
86 ns_attrib = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
87 for attr, value in attrib:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
88 attrname = attr.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
89 if attr.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
90 prefix = ns_mapping.get(attr.namespace)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
91 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
92 attrname = '%s:%s' % (prefix, attrname)
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
93 buf.append(' %s="%s"' % (attrname, escape(value)))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
94
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
95 kind, data, pos = stream.next()
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
96 if kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
97 buf.append('/>')
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
98 else:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
99 buf.append('>')
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
100 stream.pushback((kind, data, pos))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
101
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
102 yield Markup(''.join(buf))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
103
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
104 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
105 tag = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
106 tagname = tag.localname
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
107 if tag.namespace:
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
108 prefix = ns_mapping.get(tag.namespace)
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
109 if prefix:
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
110 tagname = '%s:%s' % (prefix, tag.localname)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
111 yield Markup('</%s>' % tagname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
112
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
113 elif kind is TEXT:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
114 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
115
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
116
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
117 class HTMLSerializer(Serializer):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
118 """Produces HTML text from an event stream.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
119
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
120 >>> from markup.builder import tag
20
cc92d74ce9e5 Fix tests broken in [20].
cmlenz
parents: 19
diff changeset
121 >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
122 >>> print ''.join(HTMLSerializer().serialize(elem.generate()))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
123 <div><a href="foo"></a><br><hr noshade></div>
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
124 """
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
125
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
126 NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
127
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
128 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
129 'hr', 'img', 'input', 'isindex', 'link', 'meta',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
130 'param'])
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
131 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
132 'defer', 'disabled', 'ismap', 'multiple',
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
133 'nohref', 'noresize', 'noshade', 'nowrap'])
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
134
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
135 def serialize(self, stream):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
136 ns_mapping = {}
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
137
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
138 stream = _PushbackIterator(stream)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
139 for kind, data, pos in stream:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
140
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
141 if kind is DOCTYPE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
142 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
143
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
144 elif kind is START_NS:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
145 prefix, uri = data
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
146 if uri not in ns_mapping:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
147 ns_mapping[uri] = prefix
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
148
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
149 elif kind is START:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
150 tag, attrib = data
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
151 if tag.namespace and tag not in self.NAMESPACE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
152 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
153 buf = ['<', tag.localname]
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
154 for attr, value in attrib:
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
155 if attr.namespace and attr not in self.NAMESPACE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
156 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
157 if attr.localname in self._BOOLEAN_ATTRS:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
158 if value:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
159 buf.append(' %s' % attr.localname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
160 else:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
161 buf.append(' %s="%s"' % (attr.localname, escape(value)))
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
162
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
163 if tag.localname in self._EMPTY_ELEMS:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
164 kind, data, pos = stream.next()
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
165 if kind is not END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
166 stream.pushback((kind, data, pos))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
167
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
168 yield Markup(''.join(buf + ['>']))
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
169
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
170 elif kind is END:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
171 tag = data
18
5420cfe42d36 Actually make use of the `markup.core.Namespace` class, and add a couple of doctests.
cmlenz
parents: 1
diff changeset
172 if tag.namespace and tag not in self.NAMESPACE:
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
173 continue # not in the HTML namespace, so don't emit
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
174 yield Markup('</%s>' % tag.localname)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
175
69
c40a5dcd2b55 A couple of minor performance improvements.
cmlenz
parents: 66
diff changeset
176 elif kind is TEXT:
73
1da51d718391 Some more performance tweaks.
cmlenz
parents: 69
diff changeset
177 yield escape(data, quotes=False)
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
178
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
179
26
3c1a022be04c * Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents: 20
diff changeset
180 class _PushbackIterator(object):
1
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
181 """A simple wrapper for iterators that allows pushing items back on the
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
182 queue via the `pushback()` method.
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
183
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
184 That can effectively be used to peek at the next item."""
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
185 __slots__ = ['iterable', 'buf']
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
186
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
187 def __init__(self, iterable):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
188 self.iterable = iter(iterable)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
189 self.buf = []
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
190
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
191 def __iter__(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
192 return self
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
193
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
194 def next(self):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
195 if self.buf:
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
196 return self.buf.pop(0)
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
197 return self.iterable.next()
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
198
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
199 def pushback(self, item):
5479aae32f5a Initial import.
cmlenz
parents:
diff changeset
200 self.buf.append(item)
Copyright (C) 2012-2017 Edgewall Software