annotate markup/core.py @ 8:ea47069a901c

`Stream.render()` was masking `TypeError`s (fix based on suggestion by Matt Good).
author cmlenz
date Sun, 04 Jun 2006 10:59:25 +0000
parents 5da45906dda7
children 3dc28e165273
rev   line source
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
1 # -*- coding: utf-8 -*-
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
2 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
3 # Copyright (C) 2006 Christopher Lenz
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
4 # All rights reserved.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
5 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
6 # This software is licensed as described in the file COPYING, which
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
7 # you should have received as part of this distribution. The terms
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
8 # are also available at http://trac.edgewall.com/license.html.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
9 #
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
10 # This software consists of voluntary contributions made by many
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
11 # individuals. For the exact contribution history, see the revision
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
12 # history and logs, available at http://projects.edgewall.com/trac/.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
13
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
14 """Core classes for markup processing."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
15
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
16 import htmlentitydefs
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
17 import re
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
18 from StringIO import StringIO
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
19
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
20 __all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Namespace', 'QName']
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
21
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
22
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
23 class StreamEventKind(object):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
24 """A kind of event on an XML stream."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
25
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
26 __slots__ = ['name']
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
27
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
28 def __init__(self, name):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
29 self.name = name
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
30
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
31 def __repr__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
32 return self.name
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
33
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
34
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
35 class Stream(object):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
36 """Represents a stream of markup events.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
37
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
38 This class is basically an iterator over the events.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
39
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
40 Also provided are ways to serialize the stream to text. The `serialize()`
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
41 method will return an iterator over generated strings, while `render()`
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
42 returns the complete generated text at once. Both accept various parameters
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
43 that impact the way the stream is serialized.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
44
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
45 Stream events are tuples of the form:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
46
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
47 (kind, data, position)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
48
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
49 where `kind` is the event kind (such as `START`, `END`, `TEXT`, etc), `data`
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
50 depends on the kind of event, and `position` is a `(line, offset)` tuple
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
51 that contains the location of the original element or text in the input.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
52 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
53 __slots__ = ['events']
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
54
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
55 START = StreamEventKind('start') # a start tag
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
56 END = StreamEventKind('end') # an end tag
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
57 TEXT = StreamEventKind('text') # literal text
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
58 EXPR = StreamEventKind('expr') # an expression
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
59 SUB = StreamEventKind('sub') # a "subprogram"
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
60 PROLOG = StreamEventKind('prolog') # XML prolog
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
61 DOCTYPE = StreamEventKind('doctype') # doctype declaration
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
62 START_NS = StreamEventKind('start-ns') # start namespace mapping
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
63 END_NS = StreamEventKind('end-ns') # end namespace mapping
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
64 PI = StreamEventKind('pi') # processing instruction
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
65 COMMENT = StreamEventKind('comment') # comment
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
66
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
67 def __init__(self, events):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
68 """Initialize the stream with a sequence of markup events.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
69
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
70 @oaram events: a sequence or iterable providing the events
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
71 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
72 self.events = events
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
73
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
74 def __iter__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
75 return iter(self.events)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
76
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
77 def render(self, method='xml', encoding='utf-8', **kwargs):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
78 """Return a string representation of the stream.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
79
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
80 @param method: determines how the stream is serialized; can be either
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
81 'xml' or 'html', or a custom `Serializer` subclass
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
82 @param encoding: how the output string should be encoded; if set to
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
83 `None`, this method returns a `unicode` object
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
84
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
85 Any additional keyword arguments are passed to the serializer, and thus
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
86 depend on the `method` parameter value.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
87 """
8
ea47069a901c `Stream.render()` was masking `TypeError`s (fix based on suggestion by Matt Good).
cmlenz
parents: 6
diff changeset
88 output = u''.join(list(self.serialize(method=method, **kwargs)))
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
89 if encoding is not None:
8
ea47069a901c `Stream.render()` was masking `TypeError`s (fix based on suggestion by Matt Good).
cmlenz
parents: 6
diff changeset
90 return output.encode('utf-8')
ea47069a901c `Stream.render()` was masking `TypeError`s (fix based on suggestion by Matt Good).
cmlenz
parents: 6
diff changeset
91 return output
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
92
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
93 def select(self, path):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
94 """Return a new stream that contains the events matching the given
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
95 XPath expression.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
96
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
97 @param path: a string containing the XPath expression
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
98 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
99 from markup.path import Path
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
100 path = Path(path)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
101 return path.select(self)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
102
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
103 def serialize(self, method='xml', **kwargs):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
104 """Generate strings corresponding to a specific serialization of the
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
105 stream.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
106
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
107 Unlike the `render()` method, this method is a generator this returns
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
108 the serialized output incrementally, as opposed to returning a single
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
109 string.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
110
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
111 @param method: determines how the stream is serialized; can be either
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
112 'xml' or 'html', or a custom `Serializer` subclass
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
113 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
114 from markup import output
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
115 cls = method
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
116 if isinstance(method, basestring):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
117 cls = {'xml': output.XMLSerializer,
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
118 'html': output.HTMLSerializer}[method]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
119 else:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
120 assert issubclass(cls, serializers.Serializer)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
121 serializer = cls(**kwargs)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
122 return serializer.serialize(self)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
123
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
124 def __str__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
125 return self.render()
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
126
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
127 def __unicode__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
128 return self.render(encoding=None)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
129
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
130
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
131 class Attributes(list):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
132
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
133 def __init__(self, attrib=None):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
134 list.__init__(self, map(lambda (k, v): (QName(k), v), attrib or []))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
135
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
136 def __contains__(self, name):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
137 return name in [attr for attr, value in self]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
138
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
139 def get(self, name, default=None):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
140 for attr, value in self:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
141 if attr == name:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
142 return value
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
143 return default
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
144
5
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
145 def remove(self, name):
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
146 for idx, (attr, _) in enumerate(self):
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
147 if attr == name:
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
148 del self[idx]
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
149 break
1add946decb8 Improved `py:attrs` directive so that it removes existing attributes if they evaluate to `None` (AFAICT matching Kid behavior).
cmlenz
parents: 1
diff changeset
150
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
151 def set(self, name, value):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
152 for idx, (attr, _) in enumerate(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
153 if attr == name:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
154 self[idx] = (attr, value)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
155 break
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
156 else:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
157 self.append((QName(name), value))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
158
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
159
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
160 class Markup(unicode):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
161 """Marks a string as being safe for inclusion in HTML/XML output without
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
162 needing to be escaped.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
163 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
164 def __new__(self, text='', *args):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
165 if args:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
166 text %= tuple([escape(arg) for arg in args])
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
167 return unicode.__new__(self, text)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
168
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
169 def __add__(self, other):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
170 return Markup(unicode(self) + Markup.escape(other))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
171
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
172 def __mod__(self, args):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
173 if not isinstance(args, (list, tuple)):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
174 args = [args]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
175 return Markup(unicode.__mod__(self,
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
176 tuple([escape(arg) for arg in args])))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
177
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
178 def __mul__(self, num):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
179 return Markup(unicode(self) * num)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
180
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
181 def join(self, seq):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
182 return Markup(unicode(self).join([Markup.escape(item) for item in seq]))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
183
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
184 def stripentities(self, keepxmlentities=False):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
185 """Return a copy of the text with any character or numeric entities
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
186 replaced by the equivalent UTF-8 characters.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
187
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
188 If the `keepxmlentities` parameter is provided and evaluates to `True`,
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
189 the core XML entities (&, ', >, < and ").
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
190 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
191 def _replace_entity(match):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
192 if match.group(1): # numeric entity
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
193 ref = match.group(1)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
194 if ref.startswith('x'):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
195 ref = int(ref[1:], 16)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
196 else:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
197 ref = int(ref, 10)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
198 return unichr(ref)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
199 else: # character entity
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
200 ref = match.group(2)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
201 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
202 return '&%s;' % ref
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
203 try:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
204 codepoint = htmlentitydefs.name2codepoint[ref]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
205 return unichr(codepoint)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
206 except KeyError:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
207 if keepxmlentities:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
208 return '&%s;' % ref
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
209 else:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
210 return ref
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
211 return Markup(re.sub(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)',
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
212 _replace_entity, self))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
213
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
214 def striptags(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
215 """Return a copy of the text with all XML/HTML tags removed."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
216 return Markup(re.sub(r'<[^>]*?>', '', self))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
217
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
218 def escape(cls, text, quotes=True):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
219 """Create a Markup instance from a string and escape special characters
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
220 it may contain (<, >, & and \").
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
221
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
222 If the `quotes` parameter is set to `False`, the \" character is left
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
223 as is. Escaping quotes is generally only required for strings that are
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
224 to be used in attribute values.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
225 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
226 if isinstance(text, cls):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
227 return text
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
228 text = unicode(text)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
229 if not text:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
230 return cls()
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
231 text = text.replace('&', '&amp;') \
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
232 .replace('<', '&lt;') \
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
233 .replace('>', '&gt;')
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
234 if quotes:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
235 text = text.replace('"', '&#34;')
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
236 return cls(text)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
237 escape = classmethod(escape)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
238
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
239 def unescape(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
240 """Reverse-escapes &, <, > and \" and returns a `unicode` object."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
241 if not self:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
242 return ''
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
243 return unicode(self).replace('&#34;', '"') \
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
244 .replace('&gt;', '>') \
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
245 .replace('&lt;', '<') \
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
246 .replace('&amp;', '&')
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
247
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
248 def plaintext(self, keeplinebreaks=True):
6
5da45906dda7 Simplified implementation of `py:content` directive.
cmlenz
parents: 5
diff changeset
249 """Returns the text as a `unicode` string with all entities and tags
5da45906dda7 Simplified implementation of `py:content` directive.
cmlenz
parents: 5
diff changeset
250 removed.
5da45906dda7 Simplified implementation of `py:content` directive.
cmlenz
parents: 5
diff changeset
251 """
1
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
252 text = unicode(self.striptags().stripentities())
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
253 if not keeplinebreaks:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
254 text = text.replace('\n', ' ')
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
255 return text
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
256
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
257 def sanitize(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
258 from markup.filters import HTMLSanitizer
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
259 from markup.input import HTMLParser
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
260 sanitize = HTMLSanitizer()
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
261 text = self.stripentities(keepxmlentities=True)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
262 return Stream(sanitize(HTMLParser(StringIO(text)), None))
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
263
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
264
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
265 escape = Markup.escape
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
266
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
267 def unescape(text):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
268 """Reverse-escapes &, <, > and \" and returns a `unicode` object."""
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
269 if not isinstance(text, Markup):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
270 return text
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
271 return text.unescape()
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
272
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
273
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
274 class Namespace(object):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
275
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
276 def __init__(self, uri):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
277 self.uri = uri
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
278
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
279 def __getitem__(self, name):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
280 return QName(self.uri + '}' + name)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
281
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
282 __getattr__ = __getitem__
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
283
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
284 def __repr__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
285 return '<Namespace "%s">' % self.uri
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
286
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
287 def __str__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
288 return self.uri
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
289
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
290 def __unicode__(self):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
291 return unicode(self.uri)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
292
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
293
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
294 class QName(unicode):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
295 """A qualified element or attribute name.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
296
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
297 The unicode value of instances of this class contains the qualified name of
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
298 the element or attribute, in the form `{namespace}localname`. The namespace
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
299 URI can be obtained through the additional `namespace` attribute, while the
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
300 local name can be accessed through the `localname` attribute.
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
301 """
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
302 __slots__ = ['namespace', 'localname']
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
303
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
304 def __new__(cls, qname):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
305 if isinstance(qname, QName):
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
306 return qname
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
307
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
308 parts = qname.split('}', 1)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
309 if qname.find('}') > 0:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
310 self = unicode.__new__(cls, '{' + qname)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
311 self.namespace = parts[0]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
312 self.localname = parts[1]
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
313 else:
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
314 self = unicode.__new__(cls, qname)
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
315 self.namespace = None
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
316 self.localname = qname
821114ec4f69 Initial import.
cmlenz
parents:
diff changeset
317 return self
Copyright (C) 2012-2017 Edgewall Software