comparison markup/filters.py @ 69:c40a5dcd2b55 trunk

A couple of minor performance improvements.
author cmlenz
date Mon, 10 Jul 2006 17:37:01 +0000
parents 59eb24184e9c
children 01d36818bb3d
comparison
equal deleted inserted replaced
68:e7f91e75b0e1 69:c40a5dcd2b55
17 frozenset 17 frozenset
18 except NameError: 18 except NameError:
19 from sets import ImmutableSet as frozenset 19 from sets import ImmutableSet as frozenset
20 import re 20 import re
21 21
22 from markup.core import Attributes, Markup, Namespace, Stream 22 from markup.core import Attributes, Markup, Namespace
23 from markup.core import END, END_NS, START, START_NS, TEXT
23 from markup.path import Path 24 from markup.path import Path
24 25
25 __all__ = ['IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer'] 26 __all__ = ['IncludeFilter', 'WhitespaceFilter', 'HTMLSanitizer']
26 27
27 28
51 52
52 if ns_prefixes is None: 53 if ns_prefixes is None:
53 ns_prefixes = [] 54 ns_prefixes = []
54 in_fallback = False 55 in_fallback = False
55 include_href, fallback_stream = None, None 56 include_href, fallback_stream = None, None
57 namespace = self.NAMESPACE
56 58
57 for kind, data, pos in stream: 59 for kind, data, pos in stream:
58 60
59 if kind is Stream.START and data[0] in self.NAMESPACE \ 61 if kind is START and not in_fallback and data[0] in namespace:
60 and not in_fallback:
61 tag, attrib = data 62 tag, attrib = data
62 if tag.localname == 'include': 63 if tag.localname == 'include':
63 include_href = attrib.get('href') 64 include_href = attrib.get('href')
64 elif tag.localname == 'fallback': 65 elif tag.localname == 'fallback':
65 in_fallback = True 66 in_fallback = True
66 fallback_stream = [] 67 fallback_stream = []
67 68
68 elif kind is Stream.END and data in self.NAMESPACE: 69 elif kind is END and data in namespace:
69 if data.localname == 'include': 70 if data.localname == 'include':
70 try: 71 try:
71 if not include_href: 72 if not include_href:
72 raise TemplateError('Include misses required ' 73 raise TemplateError('Include misses required '
73 'attribute "href"') 74 'attribute "href"')
89 in_fallback = False 90 in_fallback = False
90 91
91 elif in_fallback: 92 elif in_fallback:
92 fallback_stream.append((kind, data, pos)) 93 fallback_stream.append((kind, data, pos))
93 94
94 elif kind is Stream.START_NS and data[1] == self.NAMESPACE: 95 elif kind is START_NS and data[1] == namespace:
95 ns_prefixes.append(data[0]) 96 ns_prefixes.append(data[0])
96 97
97 elif kind is Stream.END_NS and data in ns_prefixes: 98 elif kind is END_NS and data in ns_prefixes:
98 ns_prefixes.pop() 99 ns_prefixes.pop()
99 100
100 else: 101 else:
101 yield kind, data, pos 102 yield kind, data, pos
102 103
103 104
104 class WhitespaceFilter(object): 105 class WhitespaceFilter(object):
105 """A filter that removes extraneous white space from the stream. 106 """A filter that removes extraneous white space from the stream.
106 107
107 Todo: 108 TODO:
108 * Support for xml:space 109 * Support for xml:space
109 """ 110 """
110
111 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)') 111 _TRAILING_SPACE = re.compile('[ \t]+(?=\n)')
112 _LINE_COLLAPSE = re.compile('\n{2,}') 112 _LINE_COLLAPSE = re.compile('\n{2,}')
113 113
114 def __call__(self, stream, ctxt=None): 114 def __call__(self, stream, ctxt=None):
115 trim_trailing_space = self._TRAILING_SPACE.sub
116 collapse_lines = self._LINE_COLLAPSE.sub
117 mjoin = Markup('').join
118
115 textbuf = [] 119 textbuf = []
116 prev_kind = None
117 for kind, data, pos in stream: 120 for kind, data, pos in stream:
118 if kind is Stream.TEXT: 121 if kind is TEXT:
119 textbuf.append(data) 122 textbuf.append(data)
120 elif prev_kind is Stream.TEXT: 123 else:
121 text = Markup('').join(textbuf, escape_quotes=False) 124 if textbuf:
122 text = self._TRAILING_SPACE.sub('', text) 125 text = mjoin(textbuf, escape_quotes=False)
123 text = self._LINE_COLLAPSE.sub('\n', text) 126 text = trim_trailing_space('', text)
124 yield Stream.TEXT, Markup(text), pos 127 text = collapse_lines('\n', text)
125 del textbuf[:] 128 yield TEXT, Markup(text), pos
126 prev_kind = kind 129 del textbuf[:]
127 if kind is not Stream.TEXT:
128 yield kind, data, pos 130 yield kind, data, pos
129 131 else:
130 if textbuf: 132 if textbuf:
131 text = Markup('').join(textbuf, escape_quotes=False) 133 text = mjoin(textbuf, escape_quotes=False)
132 text = self._TRAILING_SPACE.sub('', text) 134 text = trim_trailing_space('', text)
133 text = self._LINE_COLLAPSE.sub('\n', text) 135 text = collapse_lines('\n', text)
134 yield Stream.TEXT, Markup(text), pos 136 yield TEXT, Markup(text), pos
135 137
136 138
137 class HTMLSanitizer(object): 139 class HTMLSanitizer(object):
138 """A filter that removes potentially dangerous HTML tags and attributes 140 """A filter that removes potentially dangerous HTML tags and attributes
139 from the stream. 141 from the stream.
166 168
167 def __call__(self, stream, ctxt=None): 169 def __call__(self, stream, ctxt=None):
168 waiting_for = None 170 waiting_for = None
169 171
170 for kind, data, pos in stream: 172 for kind, data, pos in stream:
171 if kind is Stream.START: 173 if kind is START:
172 if waiting_for: 174 if waiting_for:
173 continue 175 continue
174 tag, attrib = data 176 tag, attrib = data
175 if tag not in self._SAFE_TAGS: 177 if tag not in self._SAFE_TAGS:
176 waiting_for = tag 178 waiting_for = tag
202 value = '; '.join(decls) 204 value = '; '.join(decls)
203 new_attrib.append((attr, value)) 205 new_attrib.append((attr, value))
204 206
205 yield kind, (tag, new_attrib), pos 207 yield kind, (tag, new_attrib), pos
206 208
207 elif kind is Stream.END: 209 elif kind is END:
208 tag = data 210 tag = data
209 if waiting_for: 211 if waiting_for:
210 if waiting_for == tag: 212 if waiting_for == tag:
211 waiting_for = None 213 waiting_for = None
212 else: 214 else:
Copyright (C) 2012-2017 Edgewall Software