Mercurial > genshi > mirror
comparison genshi/core.py @ 958:6fc92535c888 experimental-performance-improvement-exploration
Be more careful about what is passed into streams as events and remove many uses of _ensure as a result. An ATTRS event is added for handling Attributes returned by gensh.path.select().
author | hodgestar |
---|---|
date | Tue, 13 Mar 2012 03:03:02 +0000 |
parents | 417787b9b9a7 |
children |
comparison
equal
deleted
inserted
replaced
957:51ab60299647 | 958:6fc92535c888 |
---|---|
105 >>> print(html | sanitizer) | 105 >>> print(html | sanitizer) |
106 <p>Hello, world!</p> | 106 <p>Hello, world!</p> |
107 | 107 |
108 Filters can be any function that accepts and produces a stream (where | 108 Filters can be any function that accepts and produces a stream (where |
109 a stream is anything that iterates over events): | 109 a stream is anything that iterates over events): |
110 | 110 |
111 >>> def uppercase(stream): | 111 >>> def uppercase(stream): |
112 ... for kind, data, pos in stream: | 112 ... for kind, data, pos in stream: |
113 ... if kind is TEXT: | 113 ... if kind is TEXT: |
114 ... data = data.upper() | 114 ... data = data.upper() |
115 ... yield kind, data, pos | 115 ... yield kind, data, pos |
128 | 128 |
129 :param function: the callable object that should be applied as a filter | 129 :param function: the callable object that should be applied as a filter |
130 :return: the filtered stream | 130 :return: the filtered stream |
131 :rtype: `Stream` | 131 :rtype: `Stream` |
132 """ | 132 """ |
133 return Stream(_ensure(function(self)), serializer=self.serializer) | 133 # TODO: this is horribly slow because is has to guess whether |
134 # the function passed in is something that produces stream | |
135 # events or something that produces a sequence of strings. | |
136 # Sequences of strings are converted back to a sequence of | |
137 # stream events (and then back to text when rendered). | |
138 events = _possible_text_iterator_to_stream(function(self)) | |
139 return Stream(events, serializer=self.serializer) | |
134 | 140 |
135 def filter(self, *filters): | 141 def filter(self, *filters): |
136 """Apply filters to the stream. | 142 """Apply filters to the stream. |
137 | 143 |
138 This method returns a new stream with the given filters applied. The | 144 This method returns a new stream with the given filters applied. The |
240 :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer | 246 :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer |
241 """ | 247 """ |
242 from genshi.output import get_serializer | 248 from genshi.output import get_serializer |
243 if method is None: | 249 if method is None: |
244 method = self.serializer or 'xml' | 250 method = self.serializer or 'xml' |
245 return get_serializer(method, **kwargs)(_ensure(self)) | 251 return get_serializer(method, **kwargs)(self) |
246 | 252 |
247 def __str__(self): | 253 def __str__(self): |
248 return self.render() | 254 return self.render() |
249 | 255 |
250 def __unicode__(self): | 256 def __unicode__(self): |
265 END_CDATA = Stream.END_CDATA | 271 END_CDATA = Stream.END_CDATA |
266 PI = Stream.PI | 272 PI = Stream.PI |
267 COMMENT = Stream.COMMENT | 273 COMMENT = Stream.COMMENT |
268 | 274 |
269 | 275 |
270 def _ensure(stream): | 276 def _text_event(text): |
271 """Ensure that every item on the stream is actually a markup event.""" | 277 return (TEXT, unicode(text), (None, -1, -1)) |
272 stream = iter(stream) | 278 |
273 event = stream.next() | 279 |
280 def _text_to_stream(text): | |
281 yield _text_event(text) | |
282 | |
283 | |
284 def _possible_text_iterator_to_stream(textiter_or_stream): | |
285 it = iter(textiter_or_stream) | |
286 event = it.next() | |
274 | 287 |
275 # Check whether the iterable is a real markup event stream by examining the | 288 # Check whether the iterable is a real markup event stream by examining the |
276 # first item it yields; if it's not we'll need to do some conversion | 289 # first item it yields; if it's not we'll need to do some conversion |
277 if type(event) is not tuple or len(event) != 3: | 290 if type(event) is not tuple: |
278 for event in chain([event], stream): | 291 yield TEXT, unicode(event), (None, -1, -1) |
279 if hasattr(event, 'totuple'): | 292 for event in it: |
280 event = event.totuple() | 293 yield TEXT, unicode(event), (None, -1, -1) |
281 else: | |
282 event = TEXT, unicode(event), (None, -1, -1) | |
283 yield event | |
284 return | 294 return |
285 | 295 |
286 # This looks like a markup event stream, so we'll just pass it through | 296 # This looks like a markup event stream, so we'll just pass it through |
287 # unchanged | 297 # unchanged |
288 yield event | 298 yield event |
289 for event in stream: | 299 for event in it: |
290 yield event | 300 yield event |
291 | 301 |
292 | 302 |
293 class Attrs(tuple): | 303 class Attrs(tuple): |
294 """Immutable sequence type that stores the attributes of an element. | 304 """Immutable sequence type that stores the attributes of an element. |
341 | 351 |
342 >>> attrs | [('href', 'http://example.org/')] | 352 >>> attrs | [('href', 'http://example.org/')] |
343 Attrs([('href', 'http://example.org/')]) | 353 Attrs([('href', 'http://example.org/')]) |
344 """ | 354 """ |
345 __slots__ = [] | 355 __slots__ = [] |
356 | |
357 ATTRS = StreamEventKind('ATTRS') | |
346 | 358 |
347 def __contains__(self, name): | 359 def __contains__(self, name): |
348 """Return whether the list includes an attribute with the specified | 360 """Return whether the list includes an attribute with the specified |
349 name. | 361 name. |
350 | 362 |
425 for attr, value in self: | 437 for attr, value in self: |
426 if attr == name: | 438 if attr == name: |
427 return value | 439 return value |
428 return default | 440 return default |
429 | 441 |
430 def totuple(self): | 442 def toevent(self): |
431 """Return the attributes as a markup event. | 443 """Return the attributes as a markup event. |
432 | 444 |
433 The returned event is a `TEXT` event, the data is the value of all | 445 The returned event is an `ATTRS` event, the data is the Attr object. |
434 attributes joined together. | 446 |
435 | 447 >>> a = Attrs([('href', '#'), ('title', 'Foo')]) |
436 >>> Attrs([('href', '#'), ('title', 'Foo')]).totuple() | 448 >>> a.toevent() |
437 ('TEXT', '#Foo', (None, -1, -1)) | 449 ('ATTRS', Attrs([('href', '#'), ('title', 'Foo')]), (None, -1, -1)) |
438 | 450 |
439 :return: a `TEXT` event | 451 :return: an `ATTR` event |
440 :rtype: `tuple` | 452 :rtype: `tuple` |
441 """ | 453 """ |
442 return TEXT, ''.join([x[1] for x in self]), (None, -1, -1) | 454 return self.ATTRS, self, (None, -1, -1) |
455 | |
456 def concatenate_values(self): | |
457 """Return the values of the attributes concatenated into a string. | |
458 | |
459 >>> a = Attrs([('href', '#'), ('title', 'Foo')]) | |
460 >>> a.concatenate_values() | |
461 '#Foo' | |
462 | |
463 :return: the concatenated attribute values | |
464 :rtype: `str` | |
465 """ | |
466 return ''.join([x[1] for x in self]) | |
467 | |
468 ATTRS = Attrs.ATTRS | |
443 | 469 |
444 | 470 |
445 class Markup(unicode): | 471 class Markup(unicode): |
446 """Marks a string as being safe for inclusion in HTML/XML output without | 472 """Marks a string as being safe for inclusion in HTML/XML output without |
447 needing to be escaped. | 473 needing to be escaped. |