1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Christopher Lenz
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://trac.edgewall.com/license.html.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://projects.edgewall.com/trac/.
|
|
13
|
|
14 """This module provides different kinds of serialization methods for XML event
|
|
15 streams.
|
|
16 """
|
|
17
|
|
18 try:
|
|
19 frozenset
|
|
20 except NameError:
|
|
21 from sets import ImmutableSet as frozenset
|
|
22
|
|
23 from markup.core import Markup, QName, Stream
|
|
24 from markup.filters import WhitespaceFilter
|
|
25
|
|
26 __all__ = ['Serializer', 'XMLSerializer', 'HTMLSerializer']
|
|
27
|
|
28
|
|
29 class Serializer(object):
|
|
30 """Base class for serializers."""
|
|
31
|
|
32 def serialize(self, stream):
|
|
33 raise NotImplementedError
|
|
34
|
|
35
|
|
36 class XMLSerializer(Serializer):
|
|
37 """Produces XML text from an event stream.
|
|
38
|
|
39 >>> from markup.builder import tag
|
|
40 >>> elem = tag.DIV(tag.A(href='foo'), tag.BR, tag.HR(noshade=True))
|
|
41 >>> print ''.join(XMLSerializer().serialize(elem.generate()))
|
|
42 <div><a href="foo"/><br/><hr noshade="True"/></div>
|
|
43 """
|
|
44
|
|
45 def serialize(self, stream):
|
|
46 ns_attrib = []
|
|
47 ns_mapping = {}
|
|
48
|
|
49 stream = PushbackIterator(stream)
|
|
50 for kind, data, pos in stream:
|
|
51
|
|
52 if kind is Stream.DOCTYPE:
|
|
53 # FIXME: what if there's no system or public ID in the input?
|
|
54 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data)
|
|
55
|
|
56 elif kind is Stream.START_NS:
|
|
57 prefix, uri = data
|
|
58 if uri not in ns_mapping:
|
|
59 ns_mapping[uri] = prefix
|
|
60 if not prefix:
|
|
61 ns_attrib.append((QName('xmlns'), uri))
|
|
62 else:
|
|
63 ns_attrib.append((QName('xmlns:%s' % prefix), uri))
|
|
64
|
|
65 elif kind is Stream.START:
|
|
66 tag, attrib = data
|
|
67
|
|
68 tagname = tag.localname
|
|
69 if tag.namespace:
|
|
70 try:
|
|
71 prefix = ns_mapping[tag.namespace]
|
|
72 if prefix:
|
|
73 tagname = prefix + ':' + tag.localname
|
|
74 except KeyError:
|
|
75 ns_attrib.append((QName('xmlns'), tag.namespace))
|
|
76 buf = ['<', tagname]
|
|
77
|
|
78 if ns_attrib:
|
|
79 attrib.extend(ns_attrib)
|
|
80 ns_attrib = []
|
|
81 for attr, value in attrib:
|
|
82 attrname = attr.localname
|
|
83 if attr.namespace:
|
|
84 try:
|
|
85 prefix = ns_mapping[attr.namespace]
|
|
86 except KeyError:
|
|
87 # FIXME: synthesize a prefix for the attribute?
|
|
88 prefix = ''
|
|
89 if prefix:
|
|
90 attrname = prefix + ':' + attrname
|
|
91 buf.append(' %s="%s"' % (attrname, Markup.escape(value)))
|
|
92
|
|
93 kind, data, pos = stream.next()
|
|
94 if kind is Stream.END:
|
|
95 buf.append('/>')
|
|
96 else:
|
|
97 buf.append('>')
|
|
98 stream.pushback((kind, data, pos))
|
|
99
|
|
100 yield Markup(''.join(buf))
|
|
101
|
|
102 elif kind is Stream.END:
|
|
103 tag = data
|
|
104 tagname = tag.localname
|
|
105 if tag.namespace:
|
|
106 prefix = ns_mapping[tag.namespace]
|
|
107 if prefix:
|
|
108 tagname = prefix + ':' + tag.localname
|
|
109 yield Markup('</%s>' % tagname)
|
|
110
|
|
111 elif kind is Stream.TEXT:
|
|
112 yield Markup.escape(data, quotes=False)
|
|
113
|
|
114
|
|
115 class HTMLSerializer(Serializer):
|
|
116 """Produces HTML text from an event stream.
|
|
117
|
|
118 >>> from markup.builder import tag
|
|
119 >>> elem = tag.DIV(tag.A(href='foo'), tag.BR, tag.HR(noshade=True))
|
|
120 >>> print ''.join(HTMLSerializer().serialize(elem.generate()))
|
|
121 <div><a href="foo"></a><br><hr noshade></div>
|
|
122 """
|
|
123
|
|
124 NAMESPACE = 'http://www.w3.org/1999/xhtml'
|
|
125
|
|
126 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
|
|
127 'hr', 'img', 'input', 'isindex', 'link', 'meta',
|
|
128 'param'])
|
|
129 _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
|
|
130 'defer', 'disabled', 'ismap', 'multiple',
|
|
131 'nohref', 'noresize', 'noshade', 'nowrap'])
|
|
132
|
|
133 def serialize(self, stream):
|
|
134 ns_mapping = {}
|
|
135
|
|
136 stream = PushbackIterator(stream)
|
|
137 for kind, data, pos in stream:
|
|
138
|
|
139 if kind is Stream.DOCTYPE:
|
|
140 yield Markup('<!DOCTYPE %s "%s" "%s">\n' % data)
|
|
141
|
|
142 elif kind is Stream.START_NS:
|
|
143 prefix, uri = data
|
|
144 if uri not in ns_mapping:
|
|
145 ns_mapping[uri] = prefix
|
|
146
|
|
147 elif kind is Stream.START:
|
|
148 tag, attrib = data
|
|
149 if tag.namespace and tag.namespace != self.NAMESPACE:
|
|
150 continue # not in the HTML namespace, so don't emit
|
|
151 buf = ['<', tag.localname]
|
|
152 for attr, value in attrib:
|
|
153 if attr.namespace and attr.namespace != self.NAMESPACE:
|
|
154 continue # not in the HTML namespace, so don't emit
|
|
155 if attr.localname in self._BOOLEAN_ATTRS:
|
|
156 if value:
|
|
157 buf.append(' %s' % attr.localname)
|
|
158 else:
|
|
159 buf.append(' %s="%s"' % (attr.localname,
|
|
160 Markup.escape(value)))
|
|
161
|
|
162 if tag.localname in self._EMPTY_ELEMS:
|
|
163 kind, data, pos = stream.next()
|
|
164 if kind is not Stream.END:
|
|
165 stream.pushback((kind, data, pos))
|
|
166
|
|
167 yield Markup(''.join(buf + ['>']))
|
|
168
|
|
169 elif kind is Stream.END:
|
|
170 tag = data
|
|
171 if tag.namespace and tag.namespace != self.NAMESPACE:
|
|
172 continue # not in the HTML namespace, so don't emit
|
|
173 yield Markup('</%s>' % tag.localname)
|
|
174
|
|
175 elif kind is Stream.TEXT:
|
|
176 yield Markup.escape(data, quotes=False)
|
|
177
|
|
178
|
|
179 class PushbackIterator(object):
|
|
180 """A simple wrapper for iterators that allows pushing items back on the
|
|
181 queue via the `pushback()` method.
|
|
182
|
|
183 That can effectively be used to peek at the next item."""
|
|
184 __slots__ = ['iterable', 'buf']
|
|
185
|
|
186 def __init__(self, iterable):
|
|
187 self.iterable = iter(iterable)
|
|
188 self.buf = []
|
|
189
|
|
190 def __iter__(self):
|
|
191 return self
|
|
192
|
|
193 def next(self):
|
|
194 if self.buf:
|
|
195 return self.buf.pop(0)
|
|
196 return self.iterable.next()
|
|
197
|
|
198 def pushback(self, item):
|
|
199 self.buf.append(item)
|