Mercurial > genshi > genshi-test
comparison genshi/filters/transform.py @ 784:67d324a62cc0 experimental-match-fastpaths
update to 0.5.x branch, up through r907
don't know how this fits in with SoC work, but I wanted to do due diligence and keep this branch working in case it someday gets considered for trunk
author | aflett |
---|---|
date | Mon, 21 Jul 2008 23:17:52 +0000 |
parents | b57681255af9 |
children |
comparison
equal
deleted
inserted
replaced
724:8f2c7023af94 | 784:67d324a62cc0 |
---|---|
53 from genshi.builder import Element | 53 from genshi.builder import Element |
54 from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup | 54 from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup |
55 from genshi.path import Path | 55 from genshi.path import Path |
56 | 56 |
57 __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', | 57 __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', |
58 'EXIT', 'INSIDE', 'OUTSIDE'] | 58 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK'] |
59 | 59 |
60 | 60 |
61 class TransformMark(str): | 61 class TransformMark(str): |
62 """A mark on a transformation stream.""" | 62 """A mark on a transformation stream.""" |
63 __slots__ = [] | 63 __slots__ = [] |
83 """Stream augmentation mark indicating a selected element attribute.""" | 83 """Stream augmentation mark indicating a selected element attribute.""" |
84 | 84 |
85 EXIT = TransformMark('EXIT') | 85 EXIT = TransformMark('EXIT') |
86 """Stream augmentation mark indicating that a selected element is being | 86 """Stream augmentation mark indicating that a selected element is being |
87 exited.""" | 87 exited.""" |
88 | |
89 BREAK = TransformMark('BREAK') | |
90 """Stream augmentation mark indicating a break between two otherwise contiguous | |
91 blocks of marked events. | |
92 | |
93 This is used primarily by the cut() transform to provide later transforms with | |
94 an opportunity to operate on the cut buffer. | |
95 """ | |
96 | |
97 | |
98 class PushBackStream(object): | |
99 """Allows a single event to be pushed back onto the stream and re-consumed. | |
100 """ | |
101 def __init__(self, stream): | |
102 self.stream = iter(stream) | |
103 self.peek = None | |
104 | |
105 def push(self, event): | |
106 assert self.peek is None | |
107 self.peek = event | |
108 | |
109 def __iter__(self): | |
110 while True: | |
111 if self.peek is not None: | |
112 peek = self.peek | |
113 self.peek = None | |
114 yield peek | |
115 else: | |
116 try: | |
117 event = self.stream.next() | |
118 yield event | |
119 except StopIteration: | |
120 if self.peek is None: | |
121 raise | |
88 | 122 |
89 | 123 |
90 class Transformer(object): | 124 class Transformer(object): |
91 """Stream filter that can apply a variety of different transformations to | 125 """Stream filter that can apply a variety of different transformations to |
92 a stream. | 126 a stream. |
148 | 182 |
149 :param path: an XPath expression (as string) or a `Path` instance | 183 :param path: an XPath expression (as string) or a `Path` instance |
150 """ | 184 """ |
151 self.transforms = [SelectTransformation(path)] | 185 self.transforms = [SelectTransformation(path)] |
152 | 186 |
153 def __call__(self, stream): | 187 def __call__(self, stream, keep_marks=False): |
154 """Apply the transform filter to the marked stream. | 188 """Apply the transform filter to the marked stream. |
155 | 189 |
156 :param stream: the marked event stream to filter | 190 :param stream: the marked event stream to filter |
191 :param keep_marks: Do not strip transformer selection marks from the | |
192 stream. Useful for testing. | |
157 :return: the transformed stream | 193 :return: the transformed stream |
158 :rtype: `Stream` | 194 :rtype: `Stream` |
159 """ | 195 """ |
160 transforms = self._mark(stream) | 196 transforms = self._mark(stream) |
161 for link in self.transforms: | 197 for link in self.transforms: |
162 transforms = link(transforms) | 198 transforms = link(transforms) |
163 return Stream(self._unmark(transforms), | 199 if not keep_marks: |
200 transforms = self._unmark(transforms) | |
201 return Stream(transforms, | |
164 serializer=getattr(stream, 'serializer', None)) | 202 serializer=getattr(stream, 'serializer', None)) |
165 | 203 |
166 def apply(self, function): | 204 def apply(self, function): |
167 """Apply a transformation to the stream. | 205 """Apply a transformation to the stream. |
168 | 206 |
327 ... '<body>Some <em>body</em> text.</body></html>') | 365 ... '<body>Some <em>body</em> text.</body></html>') |
328 >>> print html | Transformer('.//title/text()').replace('New Title') | 366 >>> print html | Transformer('.//title/text()').replace('New Title') |
329 <html><head><title>New Title</title></head><body>Some <em>body</em> | 367 <html><head><title>New Title</title></head><body>Some <em>body</em> |
330 text.</body></html> | 368 text.</body></html> |
331 | 369 |
332 :param content: Either an iterable of events or a string to insert. | 370 :param content: Either a callable, an iterable of events, or a string |
371 to insert. | |
333 :rtype: `Transformer` | 372 :rtype: `Transformer` |
334 """ | 373 """ |
335 return self.apply(ReplaceTransformation(content)) | 374 return self.apply(ReplaceTransformation(content)) |
336 | 375 |
337 def before(self, content): | 376 def before(self, content): |
344 ... '<body>Some <em>body</em> text.</body></html>') | 383 ... '<body>Some <em>body</em> text.</body></html>') |
345 >>> print html | Transformer('.//em').before('emphasised ') | 384 >>> print html | Transformer('.//em').before('emphasised ') |
346 <html><head><title>Some Title</title></head><body>Some emphasised | 385 <html><head><title>Some Title</title></head><body>Some emphasised |
347 <em>body</em> text.</body></html> | 386 <em>body</em> text.</body></html> |
348 | 387 |
349 :param content: Either an iterable of events or a string to insert. | 388 :param content: Either a callable, an iterable of events, or a string |
389 to insert. | |
350 :rtype: `Transformer` | 390 :rtype: `Transformer` |
351 """ | 391 """ |
352 return self.apply(BeforeTransformation(content)) | 392 return self.apply(BeforeTransformation(content)) |
353 | 393 |
354 def after(self, content): | 394 def after(self, content): |
360 ... '<body>Some <em>body</em> text.</body></html>') | 400 ... '<body>Some <em>body</em> text.</body></html>') |
361 >>> print html | Transformer('.//em').after(' rock') | 401 >>> print html | Transformer('.//em').after(' rock') |
362 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 402 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
363 rock text.</body></html> | 403 rock text.</body></html> |
364 | 404 |
365 :param content: Either an iterable of events or a string to insert. | 405 :param content: Either a callable, an iterable of events, or a string |
406 to insert. | |
366 :rtype: `Transformer` | 407 :rtype: `Transformer` |
367 """ | 408 """ |
368 return self.apply(AfterTransformation(content)) | 409 return self.apply(AfterTransformation(content)) |
369 | 410 |
370 def prepend(self, content): | 411 def prepend(self, content): |
376 ... '<body>Some <em>body</em> text.</body></html>') | 417 ... '<body>Some <em>body</em> text.</body></html>') |
377 >>> print html | Transformer('.//body').prepend('Some new body text. ') | 418 >>> print html | Transformer('.//body').prepend('Some new body text. ') |
378 <html><head><title>Some Title</title></head><body>Some new body text. | 419 <html><head><title>Some Title</title></head><body>Some new body text. |
379 Some <em>body</em> text.</body></html> | 420 Some <em>body</em> text.</body></html> |
380 | 421 |
381 :param content: Either an iterable of events or a string to insert. | 422 :param content: Either a callable, an iterable of events, or a string |
423 to insert. | |
382 :rtype: `Transformer` | 424 :rtype: `Transformer` |
383 """ | 425 """ |
384 return self.apply(PrependTransformation(content)) | 426 return self.apply(PrependTransformation(content)) |
385 | 427 |
386 def append(self, content): | 428 def append(self, content): |
390 ... '<body>Some <em>body</em> text.</body></html>') | 432 ... '<body>Some <em>body</em> text.</body></html>') |
391 >>> print html | Transformer('.//body').append(' Some new body text.') | 433 >>> print html | Transformer('.//body').append(' Some new body text.') |
392 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 434 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
393 text. Some new body text.</body></html> | 435 text. Some new body text.</body></html> |
394 | 436 |
395 :param content: Either an iterable of events or a string to insert. | 437 :param content: Either a callable, an iterable of events, or a string |
438 to insert. | |
396 :rtype: `Transformer` | 439 :rtype: `Transformer` |
397 """ | 440 """ |
398 return self.apply(AppendTransformation(content)) | 441 return self.apply(AppendTransformation(content)) |
399 | 442 |
400 #{ Attribute manipulation | 443 #{ Attribute manipulation |
438 """ | 481 """ |
439 return self.apply(AttrTransformation(name, value)) | 482 return self.apply(AttrTransformation(name, value)) |
440 | 483 |
441 #{ Buffer operations | 484 #{ Buffer operations |
442 | 485 |
443 def copy(self, buffer): | 486 def copy(self, buffer, accumulate=False): |
444 """Copy selection into buffer. | 487 """Copy selection into buffer. |
488 | |
489 The buffer is replaced by each *contiguous* selection before being passed | |
490 to the next transformation. If accumulate=True, further selections will | |
491 be appended to the buffer rather than replacing it. | |
445 | 492 |
446 >>> from genshi.builder import tag | 493 >>> from genshi.builder import tag |
447 >>> buffer = StreamBuffer() | 494 >>> buffer = StreamBuffer() |
448 >>> html = HTML('<html><head><title>Some Title</title></head>' | 495 >>> html = HTML('<html><head><title>Some Title</title></head>' |
449 ... '<body>Some <em>body</em> text.</body></html>') | 496 ... '<body>Some <em>body</em> text.</body></html>') |
450 >>> print html | Transformer('title/text()').copy(buffer) \\ | 497 >>> print html | Transformer('title/text()').copy(buffer) \\ |
451 ... .end().select('body').prepend(tag.h1(buffer)) | 498 ... .end().select('body').prepend(tag.h1(buffer)) |
452 <html><head><title>Some Title</title></head><body><h1>Some | 499 <html><head><title>Some Title</title></head><body><h1>Some |
453 Title</h1>Some <em>body</em> text.</body></html> | 500 Title</h1>Some <em>body</em> text.</body></html> |
454 | 501 |
455 To ensure that a transformation can be reused deterministically, the | 502 This example illustrates that only a single contiguous selection will |
456 contents of ``buffer`` is replaced by the ``copy()`` operation: | 503 be buffered: |
457 | 504 |
458 >>> print buffer | |
459 Some Title | |
460 >>> print html | Transformer('head/title/text()').copy(buffer) \\ | 505 >>> print html | Transformer('head/title/text()').copy(buffer) \\ |
461 ... .end().select('body/em').copy(buffer).end().select('body') \\ | 506 ... .end().select('body/em').copy(buffer).end().select('body') \\ |
462 ... .prepend(tag.h1(buffer)) | 507 ... .prepend(tag.h1(buffer)) |
463 <html><head><title>Some | 508 <html><head><title>Some Title</title></head><body><h1>Some |
464 Title</title></head><body><h1><em>body</em></h1>Some <em>body</em> | 509 Title</h1>Some <em>body</em> text.</body></html> |
465 text.</body></html> | |
466 >>> print buffer | 510 >>> print buffer |
467 <em>body</em> | 511 <em>body</em> |
468 | 512 |
469 Element attributes can also be copied for later use: | 513 Element attributes can also be copied for later use: |
470 | 514 |
473 ... '<em>text</em>.</body></html>') | 517 ... '<em>text</em>.</body></html>') |
474 >>> buffer = StreamBuffer() | 518 >>> buffer = StreamBuffer() |
475 >>> def apply_attr(name, entry): | 519 >>> def apply_attr(name, entry): |
476 ... return list(buffer)[0][1][1].get('class') | 520 ... return list(buffer)[0][1][1].get('class') |
477 >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\ | 521 >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\ |
478 ... .end().select('body/em[not(@class)]').attr('class', apply_attr) | 522 ... .end().buffer().select('body/em[not(@class)]') \\ |
523 ... .attr('class', apply_attr) | |
479 <html><head><title>Some Title</title></head><body><em | 524 <html><head><title>Some Title</title></head><body><em |
480 class="before">Some</em> <em class="before">body</em><em | 525 class="before">Some</em> <em class="before">body</em><em |
481 class="before">text</em>.</body></html> | 526 class="before">text</em>.</body></html> |
482 | 527 |
483 | 528 |
484 :param buffer: the `StreamBuffer` in which the selection should be | 529 :param buffer: the `StreamBuffer` in which the selection should be |
485 stored | 530 stored |
486 :rtype: `Transformer` | 531 :rtype: `Transformer` |
487 :note: this transformation will buffer the entire input stream | 532 :note: Copy (and cut) copy each individual selected object into the |
488 """ | 533 buffer before passing to the next transform. For example, the |
489 return self.apply(CopyTransformation(buffer)) | 534 XPath ``*|text()`` will select all elements and text, each |
490 | 535 instance of which will be copied to the buffer individually |
491 def cut(self, buffer): | 536 before passing to the next transform. This has implications for |
537 how ``StreamBuffer`` objects can be used, so some | |
538 experimentation may be required. | |
539 | |
540 """ | |
541 return self.apply(CopyTransformation(buffer, accumulate)) | |
542 | |
543 def cut(self, buffer, accumulate=False): | |
492 """Copy selection into buffer and remove the selection from the stream. | 544 """Copy selection into buffer and remove the selection from the stream. |
493 | 545 |
494 >>> from genshi.builder import tag | 546 >>> from genshi.builder import tag |
495 >>> buffer = StreamBuffer() | 547 >>> buffer = StreamBuffer() |
496 >>> html = HTML('<html><head><title>Some Title</title></head>' | 548 >>> html = HTML('<html><head><title>Some Title</title></head>' |
498 >>> print html | Transformer('.//em/text()').cut(buffer) \\ | 550 >>> print html | Transformer('.//em/text()').cut(buffer) \\ |
499 ... .end().select('.//em').after(tag.h1(buffer)) | 551 ... .end().select('.//em').after(tag.h1(buffer)) |
500 <html><head><title>Some Title</title></head><body>Some | 552 <html><head><title>Some Title</title></head><body>Some |
501 <em/><h1>body</h1> text.</body></html> | 553 <em/><h1>body</h1> text.</body></html> |
502 | 554 |
555 Specifying accumulate=True, appends all selected intervals onto the | |
556 buffer. Combining this with the .buffer() operation allows us operate | |
557 on all copied events rather than per-segment. See the documentation on | |
558 buffer() for more information. | |
559 | |
503 :param buffer: the `StreamBuffer` in which the selection should be | 560 :param buffer: the `StreamBuffer` in which the selection should be |
504 stored | 561 stored |
505 :rtype: `Transformer` | 562 :rtype: `Transformer` |
506 :note: this transformation will buffer the entire input stream | 563 :note: this transformation will buffer the entire input stream |
507 """ | 564 """ |
508 return self.apply(CutTransformation(buffer)) | 565 return self.apply(CutTransformation(buffer, accumulate)) |
566 | |
567 def buffer(self): | |
568 """Buffer the entire stream (can consume a considerable amount of | |
569 memory). | |
570 | |
571 Useful in conjunction with copy(accumulate=True) and | |
572 cut(accumulate=True) to ensure that all marked events in the entire | |
573 stream are copied to the buffer before further transformations are | |
574 applied. | |
575 | |
576 For example, to move all <note> elements inside a <notes> tag at the | |
577 top of the document: | |
578 | |
579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' | |
580 ... 'text <note>two</note>.</body></doc>') | |
581 >>> buffer = StreamBuffer() | |
582 >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\ | |
583 ... .end().buffer().select('notes').prepend(buffer) | |
584 <doc><notes><note>one</note><note>two</note></notes><body>Some text | |
585 .</body></doc> | |
586 | |
587 """ | |
588 return self.apply(list) | |
509 | 589 |
510 #{ Miscellaneous operations | 590 #{ Miscellaneous operations |
511 | 591 |
512 def filter(self, filter): | 592 def filter(self, filter): |
513 """Apply a normal stream filter to the selection. The filter is called | 593 """Apply a normal stream filter to the selection. The filter is called |
544 """Replace text matching a regular expression. | 624 """Replace text matching a regular expression. |
545 | 625 |
546 Refer to the documentation for ``re.sub()`` for details. | 626 Refer to the documentation for ``re.sub()`` for details. |
547 | 627 |
548 >>> html = HTML('<html><body>Some text, some more text and ' | 628 >>> html = HTML('<html><body>Some text, some more text and ' |
549 ... '<b>some bold text</b></body></html>') | 629 ... '<b>some bold text</b>\\n' |
550 >>> print html | Transformer('body').substitute('(?i)some', 'SOME') | 630 ... '<i>some italicised text</i></body></html>') |
551 <html><body>SOME text, some more text and <b>SOME bold text</b></body></html> | 631 >>> print html | Transformer('body/b').substitute('(?i)some', 'SOME') |
552 >>> tags = tag.html(tag.body('Some text, some more text and ', | 632 <html><body>Some text, some more text and <b>SOME bold text</b> |
633 <i>some italicised text</i></body></html> | |
634 >>> tags = tag.html(tag.body('Some text, some more text and\\n', | |
553 ... Markup('<b>some bold text</b>'))) | 635 ... Markup('<b>some bold text</b>'))) |
554 >>> print tags.generate() | Transformer('body').substitute('(?i)some', 'SOME') | 636 >>> print tags.generate() | Transformer('body').substitute( |
555 <html><body>SOME text, some more text and <b>SOME bold text</b></body></html> | 637 ... '(?i)some', 'SOME') |
638 <html><body>SOME text, some more text and | |
639 <b>SOME bold text</b></body></html> | |
556 | 640 |
557 :param pattern: A regular expression object or string. | 641 :param pattern: A regular expression object or string. |
558 :param replace: Replacement pattern. | 642 :param replace: Replacement pattern. |
559 :param count: Number of replacements to make in each text fragment. | 643 :param count: Number of replacements to make in each text fragment. |
560 :rtype: `Transformer` | 644 :rtype: `Transformer` |
598 for event in stream: | 682 for event in stream: |
599 yield OUTSIDE, event | 683 yield OUTSIDE, event |
600 | 684 |
601 def _unmark(self, stream): | 685 def _unmark(self, stream): |
602 for mark, event in stream: | 686 for mark, event in stream: |
603 if event[0] is not None: | 687 kind = event[0] |
688 if not (kind is None or kind is ATTR or kind is BREAK): | |
604 yield event | 689 yield event |
605 | 690 |
606 | 691 |
607 class SelectTransformation(object): | 692 class SelectTransformation(object): |
608 """Select and mark events that match an XPath expression.""" | 693 """Select and mark events that match an XPath expression.""" |
650 else: | 735 else: |
651 yield OUTSIDE, event | 736 yield OUTSIDE, event |
652 elif isinstance(result, Attrs): | 737 elif isinstance(result, Attrs): |
653 # XXX Selected *attributes* are given a "kind" of None to | 738 # XXX Selected *attributes* are given a "kind" of None to |
654 # indicate they are not really part of the stream. | 739 # indicate they are not really part of the stream. |
655 yield ATTR, (None, (QName(event[1][0] + '@*'), result), event[2]) | 740 yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2]) |
656 yield None, event | 741 yield None, event |
742 elif isinstance(result, tuple): | |
743 yield OUTSIDE, result | |
657 elif result: | 744 elif result: |
745 # XXX Assume everything else is "text"? | |
658 yield None, (TEXT, unicode(result), (None, -1, -1)) | 746 yield None, (TEXT, unicode(result), (None, -1, -1)) |
659 else: | 747 else: |
660 yield None, event | 748 yield None, event |
661 | 749 |
662 | 750 |
698 """Apply the transform filter to the marked stream. | 786 """Apply the transform filter to the marked stream. |
699 | 787 |
700 :param stream: the marked event stream to filter | 788 :param stream: the marked event stream to filter |
701 """ | 789 """ |
702 for mark, event in stream: | 790 for mark, event in stream: |
703 if mark not in (INSIDE, OUTSIDE): | 791 yield mark, event |
704 yield mark, event | 792 if mark is ENTER: |
793 for mark, event in stream: | |
794 if mark is EXIT: | |
795 yield mark, event | |
796 break | |
705 | 797 |
706 | 798 |
707 class RemoveTransformation(object): | 799 class RemoveTransformation(object): |
708 """Remove selection from the stream.""" | 800 """Remove selection from the stream.""" |
709 | 801 |
744 if mark: | 836 if mark: |
745 element = list(self.element.generate()) | 837 element = list(self.element.generate()) |
746 for prefix in element[:-1]: | 838 for prefix in element[:-1]: |
747 yield None, prefix | 839 yield None, prefix |
748 yield mark, event | 840 yield mark, event |
749 while True: | 841 start = mark |
750 try: | 842 stopped = False |
751 mark, event = stream.next() | 843 for mark, event in stream: |
752 except StopIteration: | 844 if start is ENTER and mark is EXIT: |
753 yield None, element[-1] | 845 yield mark, event |
846 stopped = True | |
847 break | |
754 if not mark: | 848 if not mark: |
755 break | 849 break |
756 yield mark, event | 850 yield mark, event |
851 else: | |
852 stopped = True | |
757 yield None, element[-1] | 853 yield None, element[-1] |
758 yield mark, event | 854 if not stopped: |
855 yield mark, event | |
759 else: | 856 else: |
760 yield mark, event | 857 yield mark, event |
761 | 858 |
762 | 859 |
763 class TraceTransformation(object): | 860 class TraceTransformation(object): |
782 yield event | 879 yield event |
783 | 880 |
784 | 881 |
785 class FilterTransformation(object): | 882 class FilterTransformation(object): |
786 """Apply a normal stream filter to the selection. The filter is called once | 883 """Apply a normal stream filter to the selection. The filter is called once |
787 for each contiguous block of marked events.""" | 884 for each selection.""" |
788 | 885 |
789 def __init__(self, filter): | 886 def __init__(self, filter): |
790 """Create the transform. | 887 """Create the transform. |
791 | 888 |
792 :param filter: The stream filter to apply. | 889 :param filter: The stream filter to apply. |
804 yield OUTSIDE, event | 901 yield OUTSIDE, event |
805 del queue[:] | 902 del queue[:] |
806 | 903 |
807 queue = [] | 904 queue = [] |
808 for mark, event in stream: | 905 for mark, event in stream: |
809 if mark: | 906 if mark is ENTER: |
810 queue.append(event) | 907 queue.append(event) |
811 else: | 908 for mark, event in stream: |
909 queue.append(event) | |
910 if mark is EXIT: | |
911 break | |
812 for queue_event in flush(queue): | 912 for queue_event in flush(queue): |
813 yield queue_event | 913 yield queue_event |
814 yield None, event | 914 elif mark is OUTSIDE: |
815 for event in flush(queue): | 915 stopped = True |
816 yield event | 916 queue.append(event) |
917 for mark, event in stream: | |
918 if mark is not OUTSIDE: | |
919 break | |
920 queue.append(event) | |
921 else: | |
922 stopped = True | |
923 for queue_event in flush(queue): | |
924 yield queue_event | |
925 if not stopped: | |
926 yield None, event | |
927 else: | |
928 yield mark, event | |
929 for queue_event in flush(queue): | |
930 yield queue_event | |
817 | 931 |
818 | 932 |
819 class MapTransformation(object): | 933 class MapTransformation(object): |
820 """Apply a function to the `data` element of events of ``kind`` in the | 934 """Apply a function to the `data` element of events of ``kind`` in the |
821 selection. | 935 selection. |
846 class SubstituteTransformation(object): | 960 class SubstituteTransformation(object): |
847 """Replace text matching a regular expression. | 961 """Replace text matching a regular expression. |
848 | 962 |
849 Refer to the documentation for ``re.sub()`` for details. | 963 Refer to the documentation for ``re.sub()`` for details. |
850 """ | 964 """ |
851 def __init__(self, pattern, replace, count=1): | 965 def __init__(self, pattern, replace, count=0): |
852 """Create the transform. | 966 """Create the transform. |
853 | 967 |
854 :param pattern: A regular expression object, or string. | 968 :param pattern: A regular expression object, or string. |
855 :param replace: Replacement pattern. | 969 :param replace: Replacement pattern. |
856 :param count: Number of replacements to make in each text fragment. | 970 :param count: Number of replacements to make in each text fragment. |
866 """Apply the transform filter to the marked stream. | 980 """Apply the transform filter to the marked stream. |
867 | 981 |
868 :param stream: The marked event stream to filter | 982 :param stream: The marked event stream to filter |
869 """ | 983 """ |
870 for mark, (kind, data, pos) in stream: | 984 for mark, (kind, data, pos) in stream: |
871 if kind is TEXT: | 985 if mark is not None and kind is TEXT: |
872 new_data = self.pattern.sub(self.replace, data, self.count) | 986 new_data = self.pattern.sub(self.replace, data, self.count) |
873 if isinstance(data, Markup): | 987 if isinstance(data, Markup): |
874 data = Markup(new_data) | 988 data = Markup(new_data) |
875 else: | 989 else: |
876 data = new_data | 990 data = new_data |
920 injected. | 1034 injected. |
921 """ | 1035 """ |
922 self.content = content | 1036 self.content = content |
923 | 1037 |
924 def _inject(self): | 1038 def _inject(self): |
925 for event in _ensure(self.content): | 1039 content = self.content |
1040 if callable(content): | |
1041 content = content() | |
1042 for event in _ensure(content): | |
926 yield None, event | 1043 yield None, event |
927 | 1044 |
928 | 1045 |
929 class ReplaceTransformation(InjectorTransformation): | 1046 class ReplaceTransformation(InjectorTransformation): |
930 """Replace selection with content.""" | 1047 """Replace selection with content.""" |
932 def __call__(self, stream): | 1049 def __call__(self, stream): |
933 """Apply the transform filter to the marked stream. | 1050 """Apply the transform filter to the marked stream. |
934 | 1051 |
935 :param stream: The marked event stream to filter | 1052 :param stream: The marked event stream to filter |
936 """ | 1053 """ |
1054 stream = PushBackStream(stream) | |
937 for mark, event in stream: | 1055 for mark, event in stream: |
938 if mark is not None: | 1056 if mark is not None: |
1057 start = mark | |
939 for subevent in self._inject(): | 1058 for subevent in self._inject(): |
940 yield subevent | 1059 yield subevent |
941 while True: | 1060 for mark, event in stream: |
942 mark, event = stream.next() | 1061 if start is ENTER: |
943 if mark is None: | 1062 if mark is EXIT: |
944 yield mark, event | 1063 break |
1064 elif mark != start: | |
1065 stream.push((mark, event)) | |
945 break | 1066 break |
946 else: | 1067 else: |
947 yield mark, event | 1068 yield mark, event |
948 | 1069 |
949 | 1070 |
953 def __call__(self, stream): | 1074 def __call__(self, stream): |
954 """Apply the transform filter to the marked stream. | 1075 """Apply the transform filter to the marked stream. |
955 | 1076 |
956 :param stream: The marked event stream to filter | 1077 :param stream: The marked event stream to filter |
957 """ | 1078 """ |
1079 stream = PushBackStream(stream) | |
958 for mark, event in stream: | 1080 for mark, event in stream: |
959 if mark is not None: | 1081 if mark is not None: |
1082 start = mark | |
960 for subevent in self._inject(): | 1083 for subevent in self._inject(): |
961 yield subevent | 1084 yield subevent |
962 yield mark, event | 1085 yield mark, event |
963 while True: | 1086 for mark, event in stream: |
964 mark, event = stream.next() | 1087 if mark != start and start is not ENTER: |
965 if not mark: | 1088 stream.push((mark, event)) |
966 break | 1089 break |
967 yield mark, event | 1090 yield mark, event |
968 yield mark, event | 1091 if start is ENTER and mark is EXIT: |
1092 break | |
1093 else: | |
1094 yield mark, event | |
969 | 1095 |
970 | 1096 |
971 class AfterTransformation(InjectorTransformation): | 1097 class AfterTransformation(InjectorTransformation): |
972 """Insert content after selection.""" | 1098 """Insert content after selection.""" |
973 | 1099 |
974 def __call__(self, stream): | 1100 def __call__(self, stream): |
975 """Apply the transform filter to the marked stream. | 1101 """Apply the transform filter to the marked stream. |
976 | 1102 |
977 :param stream: The marked event stream to filter | 1103 :param stream: The marked event stream to filter |
978 """ | 1104 """ |
1105 stream = PushBackStream(stream) | |
979 for mark, event in stream: | 1106 for mark, event in stream: |
980 yield mark, event | 1107 yield mark, event |
981 if mark: | 1108 if mark: |
982 while True: | 1109 start = mark |
983 try: | 1110 for mark, event in stream: |
984 mark, event = stream.next() | 1111 if start is not ENTER and mark != start: |
985 except StopIteration: | 1112 stream.push((mark, event)) |
986 break | |
987 if not mark: | |
988 break | 1113 break |
989 yield mark, event | 1114 yield mark, event |
1115 if start is ENTER and mark is EXIT: | |
1116 break | |
990 for subevent in self._inject(): | 1117 for subevent in self._inject(): |
991 yield subevent | 1118 yield subevent |
992 yield mark, event | |
993 | 1119 |
994 | 1120 |
995 class PrependTransformation(InjectorTransformation): | 1121 class PrependTransformation(InjectorTransformation): |
996 """Prepend content to the inside of selected elements.""" | 1122 """Prepend content to the inside of selected elements.""" |
997 | 1123 |
1000 | 1126 |
1001 :param stream: The marked event stream to filter | 1127 :param stream: The marked event stream to filter |
1002 """ | 1128 """ |
1003 for mark, event in stream: | 1129 for mark, event in stream: |
1004 yield mark, event | 1130 yield mark, event |
1005 if mark in (ENTER, OUTSIDE): | 1131 if mark is ENTER: |
1006 for subevent in self._inject(): | 1132 for subevent in self._inject(): |
1007 yield subevent | 1133 yield subevent |
1008 | 1134 |
1009 | 1135 |
1010 class AppendTransformation(InjectorTransformation): | 1136 class AppendTransformation(InjectorTransformation): |
1016 :param stream: The marked event stream to filter | 1142 :param stream: The marked event stream to filter |
1017 """ | 1143 """ |
1018 for mark, event in stream: | 1144 for mark, event in stream: |
1019 yield mark, event | 1145 yield mark, event |
1020 if mark is ENTER: | 1146 if mark is ENTER: |
1021 while True: | 1147 for mark, event in stream: |
1022 mark, event = stream.next() | |
1023 if mark is EXIT: | 1148 if mark is EXIT: |
1024 break | 1149 break |
1025 yield mark, event | 1150 yield mark, event |
1026 for subevent in self._inject(): | 1151 for subevent in self._inject(): |
1027 yield subevent | 1152 yield subevent |
1074 :param event: the markup event to add | 1199 :param event: the markup event to add |
1075 """ | 1200 """ |
1076 self.events.append(event) | 1201 self.events.append(event) |
1077 | 1202 |
1078 def reset(self): | 1203 def reset(self): |
1079 """Reset the buffer so that it's empty.""" | 1204 """Empty the buffer of events.""" |
1080 del self.events[:] | 1205 del self.events[:] |
1081 | 1206 |
1082 | 1207 |
1083 class CopyTransformation(object): | 1208 class CopyTransformation(object): |
1084 """Copy selected events into a buffer for later insertion.""" | 1209 """Copy selected events into a buffer for later insertion.""" |
1085 | 1210 |
1086 def __init__(self, buffer): | 1211 def __init__(self, buffer, accumulate=False): |
1087 """Create the copy transformation. | 1212 """Create the copy transformation. |
1088 | 1213 |
1089 :param buffer: the `StreamBuffer` in which the selection should be | 1214 :param buffer: the `StreamBuffer` in which the selection should be |
1090 stored | 1215 stored |
1091 """ | 1216 """ |
1217 if not accumulate: | |
1218 buffer.reset() | |
1092 self.buffer = buffer | 1219 self.buffer = buffer |
1220 self.accumulate = accumulate | |
1093 | 1221 |
1094 def __call__(self, stream): | 1222 def __call__(self, stream): |
1095 """Apply the transformation to the marked stream. | 1223 """Apply the transformation to the marked stream. |
1096 | 1224 |
1097 :param stream: the marked event stream to filter | 1225 :param stream: the marked event stream to filter |
1098 """ | 1226 """ |
1099 self.buffer.reset() | 1227 stream = PushBackStream(stream) |
1100 stream = list(stream) | 1228 |
1101 for mark, event in stream: | 1229 for mark, event in stream: |
1102 if mark: | 1230 if mark: |
1231 if not self.accumulate: | |
1232 self.buffer.reset() | |
1233 events = [(mark, event)] | |
1103 self.buffer.append(event) | 1234 self.buffer.append(event) |
1104 return stream | 1235 start = mark |
1236 for mark, event in stream: | |
1237 if start is not ENTER and mark != start: | |
1238 stream.push((mark, event)) | |
1239 break | |
1240 events.append((mark, event)) | |
1241 self.buffer.append(event) | |
1242 if start is ENTER and mark is EXIT: | |
1243 break | |
1244 for i in events: | |
1245 yield i | |
1246 else: | |
1247 yield mark, event | |
1105 | 1248 |
1106 | 1249 |
1107 class CutTransformation(object): | 1250 class CutTransformation(object): |
1108 """Cut selected events into a buffer for later insertion and remove the | 1251 """Cut selected events into a buffer for later insertion and remove the |
1109 selection. | 1252 selection. |
1110 """ | 1253 """ |
1111 | 1254 |
1112 def __init__(self, buffer): | 1255 def __init__(self, buffer, accumulate=False): |
1113 """Create the cut transformation. | 1256 """Create the cut transformation. |
1114 | 1257 |
1115 :param buffer: the `StreamBuffer` in which the selection should be | 1258 :param buffer: the `StreamBuffer` in which the selection should be |
1116 stored | 1259 stored |
1117 """ | 1260 """ |
1118 self.buffer = buffer | 1261 self.buffer = buffer |
1262 self.accumulate = accumulate | |
1263 | |
1119 | 1264 |
1120 def __call__(self, stream): | 1265 def __call__(self, stream): |
1121 """Apply the transform filter to the marked stream. | 1266 """Apply the transform filter to the marked stream. |
1122 | 1267 |
1123 :param stream: the marked event stream to filter | 1268 :param stream: the marked event stream to filter |
1124 """ | 1269 """ |
1125 out_stream = [] | 1270 attributes = [] |
1126 attributes = None | 1271 stream = PushBackStream(stream) |
1127 for mark, (kind, data, pos) in stream: | 1272 broken = False |
1128 if attributes: | 1273 if not self.accumulate: |
1129 assert kind is START | 1274 self.buffer.reset() |
1130 data = (data[0], data[1] - attributes) | 1275 for mark, event in stream: |
1131 attributes = None | |
1132 if mark: | 1276 if mark: |
1133 # There is some magic here. ATTR marked events are pushed into | 1277 # Send a BREAK event if there was no other event sent between |
1134 # the stream *before* the START event they originated from. | 1278 if not self.accumulate: |
1135 # This allows cut() to strip out the attributes from START | 1279 if not broken and self.buffer: |
1136 # event as would be expected. | 1280 yield BREAK, (BREAK, None, None) |
1281 self.buffer.reset() | |
1282 self.buffer.append(event) | |
1283 start = mark | |
1137 if mark is ATTR: | 1284 if mark is ATTR: |
1138 self.buffer.append((kind, data, pos)) | 1285 attributes.extend([name for name, _ in event[1][1]]) |
1139 attributes = [name for name, _ in data[1]] | 1286 for mark, event in stream: |
1140 else: | 1287 if start is mark is ATTR: |
1141 self.buffer.append((kind, data, pos)) | 1288 attributes.extend([name for name, _ in event[1][1]]) |
1289 # Handle non-element contiguous selection | |
1290 if start is not ENTER and mark != start: | |
1291 # Operating on the attributes of a START event | |
1292 if start is ATTR: | |
1293 kind, data, pos = event | |
1294 assert kind is START | |
1295 data = (data[0], data[1] - attributes) | |
1296 attributes = None | |
1297 stream.push((mark, (kind, data, pos))) | |
1298 else: | |
1299 stream.push((mark, event)) | |
1300 break | |
1301 self.buffer.append(event) | |
1302 if start is ENTER and mark is EXIT: | |
1303 break | |
1304 broken = False | |
1142 else: | 1305 else: |
1143 out_stream.append((mark, (kind, data, pos))) | 1306 broken = True |
1144 return out_stream | 1307 yield mark, event |
1308 if not broken and self.buffer: | |
1309 yield BREAK, (BREAK, None, None) |