Mercurial > genshi > mirror
comparison genshi/filters/transform.py @ 744:cd6624cf2f7c trunk
Lots of `Transformer` cleanup:
- Content-insertion transformations (before, after, etc.) now accept a callable.
- `.prepend()` now ''only'' operates on elements. Previously it also operated on `OUTSIDE` marked events.
- Where it makes sense, transformations are now ''consistently'' applied to individually selected objects in the document, rather than on any contiguous selection. This means that adjacent selected elements will be treated individually rather than as a whole.
- Transformations should now consistently work on the context node.
- `.substitute()` now defaults to a count of 0 (ie. all) rather than 1. This is to be consistent with Python's regex substitution.
- `ATTR` events now have a `kind` of `ATTR` in addition to having this as their `mark`.
- Added the `BREAK` `mark`. This allows cuts of otherwise seamlessly joined objects to be operated on.
- Added a full test suite.
author | athomas |
---|---|
date | Mon, 09 Jun 2008 06:39:46 +0000 |
parents | ea2566b2f226 |
children | 8bb31ed1072e |
comparison
equal
deleted
inserted
replaced
741:f4f8ffefbd49 | 744:cd6624cf2f7c |
---|---|
53 from genshi.builder import Element | 53 from genshi.builder import Element |
54 from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup | 54 from genshi.core import Stream, Attrs, QName, TEXT, START, END, _ensure, Markup |
55 from genshi.path import Path | 55 from genshi.path import Path |
56 | 56 |
57 __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', | 57 __all__ = ['Transformer', 'StreamBuffer', 'InjectorTransformation', 'ENTER', |
58 'EXIT', 'INSIDE', 'OUTSIDE'] | 58 'EXIT', 'INSIDE', 'OUTSIDE', 'BREAK'] |
59 | 59 |
60 | 60 |
61 class TransformMark(str): | 61 class TransformMark(str): |
62 """A mark on a transformation stream.""" | 62 """A mark on a transformation stream.""" |
63 __slots__ = [] | 63 __slots__ = [] |
83 """Stream augmentation mark indicating a selected element attribute.""" | 83 """Stream augmentation mark indicating a selected element attribute.""" |
84 | 84 |
85 EXIT = TransformMark('EXIT') | 85 EXIT = TransformMark('EXIT') |
86 """Stream augmentation mark indicating that a selected element is being | 86 """Stream augmentation mark indicating that a selected element is being |
87 exited.""" | 87 exited.""" |
88 | |
89 BREAK = TransformMark('BREAK') | |
90 """Stream augmentation mark indicating a break between two otherwise contiguous | |
91 blocks of marked events. | |
92 | |
93 This is used primarily by the cut() transform to provide later transforms with | |
94 an opportunity to operate on the cut buffer. | |
95 """ | |
96 | |
97 | |
98 class PushBackStream(object): | |
99 """Allows a single event to be pushed back onto the stream and re-consumed. | |
100 """ | |
101 def __init__(self, stream): | |
102 self.stream = iter(stream) | |
103 self.peek = None | |
104 | |
105 def push(self, event): | |
106 assert self.peek is None | |
107 self.peek = event | |
108 | |
109 def __iter__(self): | |
110 while True: | |
111 if self.peek is not None: | |
112 peek = self.peek | |
113 self.peek = None | |
114 yield peek | |
115 else: | |
116 try: | |
117 event = self.stream.next() | |
118 yield event | |
119 except StopIteration: | |
120 if self.peek is None: | |
121 raise | |
88 | 122 |
89 | 123 |
90 class Transformer(object): | 124 class Transformer(object): |
91 """Stream filter that can apply a variety of different transformations to | 125 """Stream filter that can apply a variety of different transformations to |
92 a stream. | 126 a stream. |
148 | 182 |
149 :param path: an XPath expression (as string) or a `Path` instance | 183 :param path: an XPath expression (as string) or a `Path` instance |
150 """ | 184 """ |
151 self.transforms = [SelectTransformation(path)] | 185 self.transforms = [SelectTransformation(path)] |
152 | 186 |
153 def __call__(self, stream): | 187 def __call__(self, stream, keep_marks=False): |
154 """Apply the transform filter to the marked stream. | 188 """Apply the transform filter to the marked stream. |
155 | 189 |
156 :param stream: the marked event stream to filter | 190 :param stream: the marked event stream to filter |
191 :param keep_marks: Do not strip transformer selection marks from the | |
192 stream. Useful for testing. | |
157 :return: the transformed stream | 193 :return: the transformed stream |
158 :rtype: `Stream` | 194 :rtype: `Stream` |
159 """ | 195 """ |
160 transforms = self._mark(stream) | 196 transforms = self._mark(stream) |
161 for link in self.transforms: | 197 for link in self.transforms: |
162 transforms = link(transforms) | 198 transforms = link(transforms) |
163 return Stream(self._unmark(transforms), | 199 if not keep_marks: |
200 transforms = self._unmark(transforms) | |
201 return Stream(transforms, | |
164 serializer=getattr(stream, 'serializer', None)) | 202 serializer=getattr(stream, 'serializer', None)) |
165 | 203 |
166 def apply(self, function): | 204 def apply(self, function): |
167 """Apply a transformation to the stream. | 205 """Apply a transformation to the stream. |
168 | 206 |
327 ... '<body>Some <em>body</em> text.</body></html>') | 365 ... '<body>Some <em>body</em> text.</body></html>') |
328 >>> print html | Transformer('.//title/text()').replace('New Title') | 366 >>> print html | Transformer('.//title/text()').replace('New Title') |
329 <html><head><title>New Title</title></head><body>Some <em>body</em> | 367 <html><head><title>New Title</title></head><body>Some <em>body</em> |
330 text.</body></html> | 368 text.</body></html> |
331 | 369 |
332 :param content: Either an iterable of events or a string to insert. | 370 :param content: Either a callable, an iterable of events, or a string |
371 to insert. | |
333 :rtype: `Transformer` | 372 :rtype: `Transformer` |
334 """ | 373 """ |
335 return self.apply(ReplaceTransformation(content)) | 374 return self.apply(ReplaceTransformation(content)) |
336 | 375 |
337 def before(self, content): | 376 def before(self, content): |
344 ... '<body>Some <em>body</em> text.</body></html>') | 383 ... '<body>Some <em>body</em> text.</body></html>') |
345 >>> print html | Transformer('.//em').before('emphasised ') | 384 >>> print html | Transformer('.//em').before('emphasised ') |
346 <html><head><title>Some Title</title></head><body>Some emphasised | 385 <html><head><title>Some Title</title></head><body>Some emphasised |
347 <em>body</em> text.</body></html> | 386 <em>body</em> text.</body></html> |
348 | 387 |
349 :param content: Either an iterable of events or a string to insert. | 388 :param content: Either a callable, an iterable of events, or a string |
389 to insert. | |
350 :rtype: `Transformer` | 390 :rtype: `Transformer` |
351 """ | 391 """ |
352 return self.apply(BeforeTransformation(content)) | 392 return self.apply(BeforeTransformation(content)) |
353 | 393 |
354 def after(self, content): | 394 def after(self, content): |
360 ... '<body>Some <em>body</em> text.</body></html>') | 400 ... '<body>Some <em>body</em> text.</body></html>') |
361 >>> print html | Transformer('.//em').after(' rock') | 401 >>> print html | Transformer('.//em').after(' rock') |
362 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 402 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
363 rock text.</body></html> | 403 rock text.</body></html> |
364 | 404 |
365 :param content: Either an iterable of events or a string to insert. | 405 :param content: Either a callable, an iterable of events, or a string |
406 to insert. | |
366 :rtype: `Transformer` | 407 :rtype: `Transformer` |
367 """ | 408 """ |
368 return self.apply(AfterTransformation(content)) | 409 return self.apply(AfterTransformation(content)) |
369 | 410 |
370 def prepend(self, content): | 411 def prepend(self, content): |
376 ... '<body>Some <em>body</em> text.</body></html>') | 417 ... '<body>Some <em>body</em> text.</body></html>') |
377 >>> print html | Transformer('.//body').prepend('Some new body text. ') | 418 >>> print html | Transformer('.//body').prepend('Some new body text. ') |
378 <html><head><title>Some Title</title></head><body>Some new body text. | 419 <html><head><title>Some Title</title></head><body>Some new body text. |
379 Some <em>body</em> text.</body></html> | 420 Some <em>body</em> text.</body></html> |
380 | 421 |
381 :param content: Either an iterable of events or a string to insert. | 422 :param content: Either a callable, an iterable of events, or a string |
423 to insert. | |
382 :rtype: `Transformer` | 424 :rtype: `Transformer` |
383 """ | 425 """ |
384 return self.apply(PrependTransformation(content)) | 426 return self.apply(PrependTransformation(content)) |
385 | 427 |
386 def append(self, content): | 428 def append(self, content): |
390 ... '<body>Some <em>body</em> text.</body></html>') | 432 ... '<body>Some <em>body</em> text.</body></html>') |
391 >>> print html | Transformer('.//body').append(' Some new body text.') | 433 >>> print html | Transformer('.//body').append(' Some new body text.') |
392 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 434 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
393 text. Some new body text.</body></html> | 435 text. Some new body text.</body></html> |
394 | 436 |
395 :param content: Either an iterable of events or a string to insert. | 437 :param content: Either a callable, an iterable of events, or a string |
438 to insert. | |
396 :rtype: `Transformer` | 439 :rtype: `Transformer` |
397 """ | 440 """ |
398 return self.apply(AppendTransformation(content)) | 441 return self.apply(AppendTransformation(content)) |
399 | 442 |
400 #{ Attribute manipulation | 443 #{ Attribute manipulation |
441 #{ Buffer operations | 484 #{ Buffer operations |
442 | 485 |
443 def copy(self, buffer, accumulate=False): | 486 def copy(self, buffer, accumulate=False): |
444 """Copy selection into buffer. | 487 """Copy selection into buffer. |
445 | 488 |
446 The buffer is replaced by each contiguous selection before being passed | 489 The buffer is replaced by each *contiguous* selection before being passed |
447 to the next transformation. If accumulate=True, further selections will | 490 to the next transformation. If accumulate=True, further selections will |
448 be appended to the buffer rather than replacing it. | 491 be appended to the buffer rather than replacing it. |
449 | 492 |
450 >>> from genshi.builder import tag | 493 >>> from genshi.builder import tag |
451 >>> buffer = StreamBuffer() | 494 >>> buffer = StreamBuffer() |
484 | 527 |
485 | 528 |
486 :param buffer: the `StreamBuffer` in which the selection should be | 529 :param buffer: the `StreamBuffer` in which the selection should be |
487 stored | 530 stored |
488 :rtype: `Transformer` | 531 :rtype: `Transformer` |
489 :note: this transformation will buffer the entire input stream | 532 note: Copy (and cut) copy each individual selected object into the |
533 buffer before passing to the next transform. For example, the | |
534 XPath ``*|text()`` will select all elements and text, each | |
535 instance of which will be copied to the buffer individually | |
536 before passing to the next transform. This has implications for | |
537 how ``StreamBuffer`` objects can be used, so some | |
538 experimentation may be required. | |
539 | |
490 """ | 540 """ |
491 return self.apply(CopyTransformation(buffer, accumulate)) | 541 return self.apply(CopyTransformation(buffer, accumulate)) |
492 | 542 |
493 def cut(self, buffer, accumulate=False): | 543 def cut(self, buffer, accumulate=False): |
494 """Copy selection into buffer and remove the selection from the stream. | 544 """Copy selection into buffer and remove the selection from the stream. |
632 for event in stream: | 682 for event in stream: |
633 yield OUTSIDE, event | 683 yield OUTSIDE, event |
634 | 684 |
635 def _unmark(self, stream): | 685 def _unmark(self, stream): |
636 for mark, event in stream: | 686 for mark, event in stream: |
637 if event[0] is not None: | 687 kind = event[0] |
688 if not (kind is None or kind is ATTR or kind is BREAK): | |
638 yield event | 689 yield event |
639 | 690 |
640 | 691 |
641 class SelectTransformation(object): | 692 class SelectTransformation(object): |
642 """Select and mark events that match an XPath expression.""" | 693 """Select and mark events that match an XPath expression.""" |
684 else: | 735 else: |
685 yield OUTSIDE, event | 736 yield OUTSIDE, event |
686 elif isinstance(result, Attrs): | 737 elif isinstance(result, Attrs): |
687 # XXX Selected *attributes* are given a "kind" of None to | 738 # XXX Selected *attributes* are given a "kind" of None to |
688 # indicate they are not really part of the stream. | 739 # indicate they are not really part of the stream. |
689 yield ATTR, (None, (QName(event[1][0] + '@*'), result), event[2]) | 740 yield ATTR, (ATTR, (QName(event[1][0] + '@*'), result), event[2]) |
690 yield None, event | 741 yield None, event |
742 elif isinstance(result, tuple): | |
743 yield OUTSIDE, result | |
691 elif result: | 744 elif result: |
745 # XXX Assume everything else is "text"? | |
692 yield None, (TEXT, unicode(result), (None, -1, -1)) | 746 yield None, (TEXT, unicode(result), (None, -1, -1)) |
693 else: | 747 else: |
694 yield None, event | 748 yield None, event |
695 | 749 |
696 | 750 |
732 """Apply the transform filter to the marked stream. | 786 """Apply the transform filter to the marked stream. |
733 | 787 |
734 :param stream: the marked event stream to filter | 788 :param stream: the marked event stream to filter |
735 """ | 789 """ |
736 for mark, event in stream: | 790 for mark, event in stream: |
737 if mark not in (INSIDE, OUTSIDE): | 791 yield mark, event |
738 yield mark, event | 792 if mark is ENTER: |
793 for mark, event in stream: | |
794 if mark is EXIT: | |
795 yield mark, event | |
796 break | |
739 | 797 |
740 | 798 |
741 class RemoveTransformation(object): | 799 class RemoveTransformation(object): |
742 """Remove selection from the stream.""" | 800 """Remove selection from the stream.""" |
743 | 801 |
778 if mark: | 836 if mark: |
779 element = list(self.element.generate()) | 837 element = list(self.element.generate()) |
780 for prefix in element[:-1]: | 838 for prefix in element[:-1]: |
781 yield None, prefix | 839 yield None, prefix |
782 yield mark, event | 840 yield mark, event |
783 while True: | 841 start = mark |
784 try: | 842 stopped = False |
785 mark, event = stream.next() | 843 for mark, event in stream: |
786 except StopIteration: | 844 if start is ENTER and mark is EXIT: |
787 yield None, element[-1] | 845 yield mark, event |
846 stopped = True | |
847 break | |
788 if not mark: | 848 if not mark: |
789 break | 849 break |
790 yield mark, event | 850 yield mark, event |
851 else: | |
852 stopped = True | |
791 yield None, element[-1] | 853 yield None, element[-1] |
792 yield mark, event | 854 if not stopped: |
855 yield mark, event | |
793 else: | 856 else: |
794 yield mark, event | 857 yield mark, event |
795 | 858 |
796 | 859 |
797 class TraceTransformation(object): | 860 class TraceTransformation(object): |
816 yield event | 879 yield event |
817 | 880 |
818 | 881 |
819 class FilterTransformation(object): | 882 class FilterTransformation(object): |
820 """Apply a normal stream filter to the selection. The filter is called once | 883 """Apply a normal stream filter to the selection. The filter is called once |
821 for each contiguous block of marked events.""" | 884 for each selection.""" |
822 | 885 |
823 def __init__(self, filter): | 886 def __init__(self, filter): |
824 """Create the transform. | 887 """Create the transform. |
825 | 888 |
826 :param filter: The stream filter to apply. | 889 :param filter: The stream filter to apply. |
838 yield OUTSIDE, event | 901 yield OUTSIDE, event |
839 del queue[:] | 902 del queue[:] |
840 | 903 |
841 queue = [] | 904 queue = [] |
842 for mark, event in stream: | 905 for mark, event in stream: |
843 if mark: | 906 if mark is ENTER: |
844 queue.append(event) | 907 queue.append(event) |
845 else: | 908 for mark, event in stream: |
909 queue.append(event) | |
910 if mark is EXIT: | |
911 break | |
846 for queue_event in flush(queue): | 912 for queue_event in flush(queue): |
847 yield queue_event | 913 yield queue_event |
848 yield None, event | 914 elif mark is OUTSIDE: |
849 for event in flush(queue): | 915 stopped = True |
850 yield event | 916 queue.append(event) |
917 for mark, event in stream: | |
918 if mark is not OUTSIDE: | |
919 break | |
920 queue.append(event) | |
921 else: | |
922 stopped = True | |
923 for queue_event in flush(queue): | |
924 yield queue_event | |
925 if not stopped: | |
926 yield None, event | |
927 else: | |
928 yield mark, event | |
929 for queue_event in flush(queue): | |
930 yield queue_event | |
851 | 931 |
852 | 932 |
853 class MapTransformation(object): | 933 class MapTransformation(object): |
854 """Apply a function to the `data` element of events of ``kind`` in the | 934 """Apply a function to the `data` element of events of ``kind`` in the |
855 selection. | 935 selection. |
880 class SubstituteTransformation(object): | 960 class SubstituteTransformation(object): |
881 """Replace text matching a regular expression. | 961 """Replace text matching a regular expression. |
882 | 962 |
883 Refer to the documentation for ``re.sub()`` for details. | 963 Refer to the documentation for ``re.sub()`` for details. |
884 """ | 964 """ |
885 def __init__(self, pattern, replace, count=1): | 965 def __init__(self, pattern, replace, count=0): |
886 """Create the transform. | 966 """Create the transform. |
887 | 967 |
888 :param pattern: A regular expression object, or string. | 968 :param pattern: A regular expression object, or string. |
889 :param replace: Replacement pattern. | 969 :param replace: Replacement pattern. |
890 :param count: Number of replacements to make in each text fragment. | 970 :param count: Number of replacements to make in each text fragment. |
954 injected. | 1034 injected. |
955 """ | 1035 """ |
956 self.content = content | 1036 self.content = content |
957 | 1037 |
958 def _inject(self): | 1038 def _inject(self): |
959 for event in _ensure(self.content): | 1039 content = self.content |
1040 if callable(content): | |
1041 content = content() | |
1042 for event in _ensure(content): | |
960 yield None, event | 1043 yield None, event |
961 | 1044 |
962 | 1045 |
963 class ReplaceTransformation(InjectorTransformation): | 1046 class ReplaceTransformation(InjectorTransformation): |
964 """Replace selection with content.""" | 1047 """Replace selection with content.""" |
966 def __call__(self, stream): | 1049 def __call__(self, stream): |
967 """Apply the transform filter to the marked stream. | 1050 """Apply the transform filter to the marked stream. |
968 | 1051 |
969 :param stream: The marked event stream to filter | 1052 :param stream: The marked event stream to filter |
970 """ | 1053 """ |
1054 stream = PushBackStream(stream) | |
971 for mark, event in stream: | 1055 for mark, event in stream: |
972 if mark is not None: | 1056 if mark is not None: |
1057 start = mark | |
973 for subevent in self._inject(): | 1058 for subevent in self._inject(): |
974 yield subevent | 1059 yield subevent |
975 while True: | 1060 for mark, event in stream: |
976 mark, event = stream.next() | 1061 if start is ENTER: |
977 if mark is None: | 1062 if mark is EXIT: |
978 yield mark, event | 1063 break |
1064 elif mark != start: | |
1065 stream.push((mark, event)) | |
979 break | 1066 break |
980 else: | 1067 else: |
981 yield mark, event | 1068 yield mark, event |
982 | 1069 |
983 | 1070 |
987 def __call__(self, stream): | 1074 def __call__(self, stream): |
988 """Apply the transform filter to the marked stream. | 1075 """Apply the transform filter to the marked stream. |
989 | 1076 |
990 :param stream: The marked event stream to filter | 1077 :param stream: The marked event stream to filter |
991 """ | 1078 """ |
1079 stream = PushBackStream(stream) | |
992 for mark, event in stream: | 1080 for mark, event in stream: |
993 if mark is not None: | 1081 if mark is not None: |
1082 start = mark | |
994 for subevent in self._inject(): | 1083 for subevent in self._inject(): |
995 yield subevent | 1084 yield subevent |
996 yield mark, event | 1085 yield mark, event |
997 while True: | 1086 for mark, event in stream: |
998 mark, event = stream.next() | 1087 if mark != start and start is not ENTER: |
999 if not mark: | 1088 stream.push((mark, event)) |
1000 break | 1089 break |
1001 yield mark, event | 1090 yield mark, event |
1002 yield mark, event | 1091 if start is ENTER and mark is EXIT: |
1092 break | |
1093 else: | |
1094 yield mark, event | |
1003 | 1095 |
1004 | 1096 |
1005 class AfterTransformation(InjectorTransformation): | 1097 class AfterTransformation(InjectorTransformation): |
1006 """Insert content after selection.""" | 1098 """Insert content after selection.""" |
1007 | 1099 |
1008 def __call__(self, stream): | 1100 def __call__(self, stream): |
1009 """Apply the transform filter to the marked stream. | 1101 """Apply the transform filter to the marked stream. |
1010 | 1102 |
1011 :param stream: The marked event stream to filter | 1103 :param stream: The marked event stream to filter |
1012 """ | 1104 """ |
1105 stream = PushBackStream(stream) | |
1013 for mark, event in stream: | 1106 for mark, event in stream: |
1014 yield mark, event | 1107 yield mark, event |
1015 if mark: | 1108 if mark: |
1016 while True: | 1109 start = mark |
1017 try: | 1110 for mark, event in stream: |
1018 mark, event = stream.next() | 1111 if start is not ENTER and mark != start: |
1019 except StopIteration: | 1112 stream.push((mark, event)) |
1020 break | |
1021 if not mark: | |
1022 break | 1113 break |
1023 yield mark, event | 1114 yield mark, event |
1115 if start is ENTER and mark is EXIT: | |
1116 break | |
1024 for subevent in self._inject(): | 1117 for subevent in self._inject(): |
1025 yield subevent | 1118 yield subevent |
1026 yield mark, event | |
1027 | 1119 |
1028 | 1120 |
1029 class PrependTransformation(InjectorTransformation): | 1121 class PrependTransformation(InjectorTransformation): |
1030 """Prepend content to the inside of selected elements.""" | 1122 """Prepend content to the inside of selected elements.""" |
1031 | 1123 |
1034 | 1126 |
1035 :param stream: The marked event stream to filter | 1127 :param stream: The marked event stream to filter |
1036 """ | 1128 """ |
1037 for mark, event in stream: | 1129 for mark, event in stream: |
1038 yield mark, event | 1130 yield mark, event |
1039 if mark in (ENTER, OUTSIDE): | 1131 if mark is ENTER: |
1040 for subevent in self._inject(): | 1132 for subevent in self._inject(): |
1041 yield subevent | 1133 yield subevent |
1042 | 1134 |
1043 | 1135 |
1044 class AppendTransformation(InjectorTransformation): | 1136 class AppendTransformation(InjectorTransformation): |
1050 :param stream: The marked event stream to filter | 1142 :param stream: The marked event stream to filter |
1051 """ | 1143 """ |
1052 for mark, event in stream: | 1144 for mark, event in stream: |
1053 yield mark, event | 1145 yield mark, event |
1054 if mark is ENTER: | 1146 if mark is ENTER: |
1055 while True: | 1147 for mark, event in stream: |
1056 mark, event = stream.next() | |
1057 if mark is EXIT: | 1148 if mark is EXIT: |
1058 break | 1149 break |
1059 yield mark, event | 1150 yield mark, event |
1060 for subevent in self._inject(): | 1151 for subevent in self._inject(): |
1061 yield subevent | 1152 yield subevent |
1108 :param event: the markup event to add | 1199 :param event: the markup event to add |
1109 """ | 1200 """ |
1110 self.events.append(event) | 1201 self.events.append(event) |
1111 | 1202 |
1112 def reset(self): | 1203 def reset(self): |
1113 """Reset the buffer so that it's empty.""" | 1204 """Empty the buffer of events.""" |
1114 del self.events[:] | 1205 del self.events[:] |
1115 | 1206 |
1116 | 1207 |
1117 class CopyTransformation(object): | 1208 class CopyTransformation(object): |
1118 """Copy selected events into a buffer for later insertion.""" | 1209 """Copy selected events into a buffer for later insertion.""" |
1131 def __call__(self, stream): | 1222 def __call__(self, stream): |
1132 """Apply the transformation to the marked stream. | 1223 """Apply the transformation to the marked stream. |
1133 | 1224 |
1134 :param stream: the marked event stream to filter | 1225 :param stream: the marked event stream to filter |
1135 """ | 1226 """ |
1136 stream = iter(stream) | 1227 stream = PushBackStream(stream) |
1228 | |
1137 for mark, event in stream: | 1229 for mark, event in stream: |
1138 if mark: | 1230 if mark: |
1139 if not self.accumulate: | 1231 if not self.accumulate: |
1140 self.buffer.reset() | 1232 self.buffer.reset() |
1141 events = [] | 1233 events = [(mark, event)] |
1142 while mark: | 1234 self.buffer.append(event) |
1235 start = mark | |
1236 for mark, event in stream: | |
1237 if start is not ENTER and mark != start: | |
1238 stream.push((mark, event)) | |
1239 break | |
1143 events.append((mark, event)) | 1240 events.append((mark, event)) |
1144 self.buffer.append(event) | 1241 self.buffer.append(event) |
1145 mark, event = stream.next() | 1242 if start is ENTER and mark is EXIT: |
1243 break | |
1146 for i in events: | 1244 for i in events: |
1147 yield i | 1245 yield i |
1148 yield mark, event | 1246 else: |
1247 yield mark, event | |
1149 | 1248 |
1150 | 1249 |
1151 class CutTransformation(object): | 1250 class CutTransformation(object): |
1152 """Cut selected events into a buffer for later insertion and remove the | 1251 """Cut selected events into a buffer for later insertion and remove the |
1153 selection. | 1252 selection. |
1157 """Create the cut transformation. | 1256 """Create the cut transformation. |
1158 | 1257 |
1159 :param buffer: the `StreamBuffer` in which the selection should be | 1258 :param buffer: the `StreamBuffer` in which the selection should be |
1160 stored | 1259 stored |
1161 """ | 1260 """ |
1162 if not accumulate: | |
1163 buffer.reset() | |
1164 self.buffer = buffer | 1261 self.buffer = buffer |
1165 self.accumulate = accumulate | 1262 self.accumulate = accumulate |
1166 | 1263 |
1167 | 1264 |
1168 def __call__(self, stream): | 1265 def __call__(self, stream): |
1169 """Apply the transform filter to the marked stream. | 1266 """Apply the transform filter to the marked stream. |
1170 | 1267 |
1171 :param stream: the marked event stream to filter | 1268 :param stream: the marked event stream to filter |
1172 """ | 1269 """ |
1173 attributes = None | 1270 attributes = [] |
1174 stream = iter(stream) | 1271 stream = PushBackStream(stream) |
1272 broken = False | |
1273 if not self.accumulate: | |
1274 self.buffer.reset() | |
1175 for mark, event in stream: | 1275 for mark, event in stream: |
1176 if mark: | 1276 if mark: |
1277 # Send a BREAK event if there was no other event sent between | |
1177 if not self.accumulate: | 1278 if not self.accumulate: |
1279 if not broken and self.buffer: | |
1280 yield BREAK, (BREAK, None, None) | |
1178 self.buffer.reset() | 1281 self.buffer.reset() |
1179 while mark: | 1282 self.buffer.append(event) |
1180 if mark is ATTR: | 1283 start = mark |
1181 attributes = [name for name, _ in data[1]] | 1284 if mark is ATTR: |
1285 attributes.extend([name for name, _ in event[1][1]]) | |
1286 for mark, event in stream: | |
1287 if start is mark is ATTR: | |
1288 attributes.extend([name for name, _ in event[1][1]]) | |
1289 # Handle non-element contiguous selection | |
1290 if start is not ENTER and mark != start: | |
1291 # Operating on the attributes of a START event | |
1292 if start is ATTR: | |
1293 kind, data, pos = event | |
1294 assert kind is START | |
1295 data = (data[0], data[1] - attributes) | |
1296 attributes = None | |
1297 stream.push((mark, (kind, data, pos))) | |
1298 else: | |
1299 stream.push((mark, event)) | |
1300 break | |
1182 self.buffer.append(event) | 1301 self.buffer.append(event) |
1183 mark, event = stream.next() | 1302 if start is ENTER and mark is EXIT: |
1184 # If we've cut attributes, the associated element should START | 1303 break |
1185 # immediately after. | 1304 broken = False |
1186 if attributes: | 1305 else: |
1187 assert kind is START | 1306 broken = True |
1188 data = (data[0], data[1] - attributes) | 1307 yield mark, event |
1189 attributes = None | 1308 if not broken and self.buffer: |
1190 | 1309 yield BREAK, (BREAK, None, None) |
1191 yield mark, event |