Mercurial > genshi > genshi-test
comparison genshi/filters/transform.py @ 902:09cc3627654c experimental-inline
Sync `experimental/inline` branch with [source:trunk@1126].
author | cmlenz |
---|---|
date | Fri, 23 Apr 2010 21:08:26 +0000 |
parents | 1837f39efd6f |
children |
comparison
equal
deleted
inserted
replaced
830:de82830f8816 | 902:09cc3627654c |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # | 2 # |
3 # Copyright (C) 2007 Edgewall Software | 3 # Copyright (C) 2007-2009 Edgewall Software |
4 # All rights reserved. | 4 # All rights reserved. |
5 # | 5 # |
6 # This software is licensed as described in the file COPYING, which | 6 # This software is licensed as described in the file COPYING, which |
7 # you should have received as part of this distribution. The terms | 7 # you should have received as part of this distribution. The terms |
8 # are also available at http://genshi.edgewall.org/wiki/License. | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
30 ... <head><title>Some Title</title></head> | 30 ... <head><title>Some Title</title></head> |
31 ... <body> | 31 ... <body> |
32 ... Some <em>body</em> text. | 32 ... Some <em>body</em> text. |
33 ... </body> | 33 ... </body> |
34 ... </html>''') | 34 ... </html>''') |
35 >>> print html | Transformer('body/em').map(unicode.upper, TEXT) \\ | 35 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) |
36 ... .unwrap().wrap(tag.u) | 36 ... .unwrap().wrap(tag.u)) |
37 <html> | 37 <html> |
38 <head><title>Some Title</title></head> | 38 <head><title>Some Title</title></head> |
39 <body> | 39 <body> |
40 Some <u>BODY</u> text. | 40 Some <u>BODY</u> text. |
41 </body> | 41 </body> |
140 | 140 |
141 Transformations act on selected stream events matching an XPath expression. | 141 Transformations act on selected stream events matching an XPath expression. |
142 Here's an example of removing some markup (the title, in this case) | 142 Here's an example of removing some markup (the title, in this case) |
143 selected by an expression: | 143 selected by an expression: |
144 | 144 |
145 >>> print html | Transformer('head/title').remove() | 145 >>> print(html | Transformer('head/title').remove()) |
146 <html><head/><body>Some <em>body</em> text.</body></html> | 146 <html><head/><body>Some <em>body</em> text.</body></html> |
147 | 147 |
148 Inserted content can be passed in the form of a string, or a markup event | 148 Inserted content can be passed in the form of a string, or a markup event |
149 stream, which includes streams generated programmatically via the | 149 stream, which includes streams generated programmatically via the |
150 `builder` module: | 150 `builder` module: |
151 | 151 |
152 >>> from genshi.builder import tag | 152 >>> from genshi.builder import tag |
153 >>> print html | Transformer('body').prepend(tag.h1('Document Title')) | 153 >>> print(html | Transformer('body').prepend(tag.h1('Document Title'))) |
154 <html><head><title>Some Title</title></head><body><h1>Document | 154 <html><head><title>Some Title</title></head><body><h1>Document |
155 Title</h1>Some <em>body</em> text.</body></html> | 155 Title</h1>Some <em>body</em> text.</body></html> |
156 | 156 |
157 Each XPath expression determines the set of tags that will be acted upon by | 157 Each XPath expression determines the set of tags that will be acted upon by |
158 subsequent transformations. In this example we select the ``<title>`` text, | 158 subsequent transformations. In this example we select the ``<title>`` text, |
159 copy it into a buffer, then select the ``<body>`` element and paste the | 159 copy it into a buffer, then select the ``<body>`` element and paste the |
160 copied text into the body as ``<h1>`` enclosed text: | 160 copied text into the body as ``<h1>`` enclosed text: |
161 | 161 |
162 >>> buffer = StreamBuffer() | 162 >>> buffer = StreamBuffer() |
163 >>> print html | Transformer('head/title/text()').copy(buffer) \\ | 163 >>> print(html | Transformer('head/title/text()').copy(buffer) |
164 ... .end().select('body').prepend(tag.h1(buffer)) | 164 ... .end().select('body').prepend(tag.h1(buffer))) |
165 <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some | 165 <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some |
166 <em>body</em> text.</body></html> | 166 <em>body</em> text.</body></html> |
167 | 167 |
168 Transformations can also be assigned and reused, although care must be | 168 Transformations can also be assigned and reused, although care must be |
169 taken when using buffers, to ensure that buffers are cleared between | 169 taken when using buffers, to ensure that buffers are cleared between |
170 transforms: | 170 transforms: |
171 | 171 |
172 >>> emphasis = Transformer('body//em').attr('class', 'emphasis') | 172 >>> emphasis = Transformer('body//em').attr('class', 'emphasis') |
173 >>> print html | emphasis | 173 >>> print(html | emphasis) |
174 <html><head><title>Some Title</title></head><body>Some <em | 174 <html><head><title>Some Title</title></head><body>Some <em |
175 class="emphasis">body</em> text.</body></html> | 175 class="emphasis">body</em> text.</body></html> |
176 """ | 176 """ |
177 | 177 |
178 __slots__ = ['transforms'] | 178 __slots__ = ['transforms'] |
214 ... if mark and kind is TEXT: | 214 ... if mark and kind is TEXT: |
215 ... yield mark, (kind, data.upper(), pos) | 215 ... yield mark, (kind, data.upper(), pos) |
216 ... else: | 216 ... else: |
217 ... yield mark, (kind, data, pos) | 217 ... yield mark, (kind, data, pos) |
218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>') | 218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>') |
219 >>> print short_stream | Transformer('.//em/text()').apply(upper) | 219 >>> print(short_stream | Transformer('.//em/text()').apply(upper)) |
220 <body>Some <em>TEST</em> text</body> | 220 <body>Some <em>TEST</em> text</body> |
221 """ | 221 """ |
222 transformer = Transformer() | 222 transformer = Transformer() |
223 transformer.transforms = self.transforms[:] | 223 transformer.transforms = self.transforms[:] |
224 if isinstance(function, Transformer): | 224 if isinstance(function, Transformer): |
232 def select(self, path): | 232 def select(self, path): |
233 """Mark events matching the given XPath expression, within the current | 233 """Mark events matching the given XPath expression, within the current |
234 selection. | 234 selection. |
235 | 235 |
236 >>> html = HTML('<body>Some <em>test</em> text</body>') | 236 >>> html = HTML('<body>Some <em>test</em> text</body>') |
237 >>> print html | Transformer().select('.//em').trace() | 237 >>> print(html | Transformer().select('.//em').trace()) |
238 (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) | 238 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
239 (None, ('TEXT', u'Some ', (None, 1, 6))) | 239 (None, ('TEXT', u'Some ', (None, 1, 6))) |
240 ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) | 240 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
241 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) | 241 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
242 ('EXIT', ('END', QName(u'em'), (None, 1, 19))) | 242 ('EXIT', ('END', QName('em'), (None, 1, 19))) |
243 (None, ('TEXT', u' text', (None, 1, 24))) | 243 (None, ('TEXT', u' text', (None, 1, 24))) |
244 (None, ('END', QName(u'body'), (None, 1, 29))) | 244 (None, ('END', QName('body'), (None, 1, 29))) |
245 <body>Some <em>test</em> text</body> | 245 <body>Some <em>test</em> text</body> |
246 | 246 |
247 :param path: an XPath expression (as string) or a `Path` instance | 247 :param path: an XPath expression (as string) or a `Path` instance |
248 :return: the stream augmented by transformation marks | 248 :return: the stream augmented by transformation marks |
249 :rtype: `Transformer` | 249 :rtype: `Transformer` |
256 | 256 |
257 Specificaly, all marks are converted to null marks, and all null marks | 257 Specificaly, all marks are converted to null marks, and all null marks |
258 are converted to OUTSIDE marks. | 258 are converted to OUTSIDE marks. |
259 | 259 |
260 >>> html = HTML('<body>Some <em>test</em> text</body>') | 260 >>> html = HTML('<body>Some <em>test</em> text</body>') |
261 >>> print html | Transformer('//em').invert().trace() | 261 >>> print(html | Transformer('//em').invert().trace()) |
262 ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0))) | 262 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
263 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) | 263 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
264 (None, ('START', (QName(u'em'), Attrs()), (None, 1, 11))) | 264 (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) |
265 (None, ('TEXT', u'test', (None, 1, 15))) | 265 (None, ('TEXT', u'test', (None, 1, 15))) |
266 (None, ('END', QName(u'em'), (None, 1, 19))) | 266 (None, ('END', QName('em'), (None, 1, 19))) |
267 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) | 267 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) |
268 ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29))) | 268 ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) |
269 <body>Some <em>test</em> text</body> | 269 <body>Some <em>test</em> text</body> |
270 | 270 |
271 :rtype: `Transformer` | 271 :rtype: `Transformer` |
272 """ | 272 """ |
273 return self.apply(InvertTransformation()) | 273 return self.apply(InvertTransformation()) |
276 """End current selection, allowing all events to be selected. | 276 """End current selection, allowing all events to be selected. |
277 | 277 |
278 Example: | 278 Example: |
279 | 279 |
280 >>> html = HTML('<body>Some <em>test</em> text</body>') | 280 >>> html = HTML('<body>Some <em>test</em> text</body>') |
281 >>> print html | Transformer('//em').end().trace() | 281 >>> print(html | Transformer('//em').end().trace()) |
282 ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0))) | 282 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
283 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) | 283 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
284 ('OUTSIDE', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) | 284 ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
285 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) | 285 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) |
286 ('OUTSIDE', ('END', QName(u'em'), (None, 1, 19))) | 286 ('OUTSIDE', ('END', QName('em'), (None, 1, 19))) |
287 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) | 287 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) |
288 ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29))) | 288 ('OUTSIDE', ('END', QName('body'), (None, 1, 29))) |
289 <body>Some <em>test</em> text</body> | 289 <body>Some <em>test</em> text</body> |
290 | 290 |
291 :return: the stream augmented by transformation marks | 291 :return: the stream augmented by transformation marks |
292 :rtype: `Transformer` | 292 :rtype: `Transformer` |
293 """ | 293 """ |
300 | 300 |
301 Example: | 301 Example: |
302 | 302 |
303 >>> html = HTML('<html><head><title>Some Title</title></head>' | 303 >>> html = HTML('<html><head><title>Some Title</title></head>' |
304 ... '<body>Some <em>body</em> text.</body></html>') | 304 ... '<body>Some <em>body</em> text.</body></html>') |
305 >>> print html | Transformer('.//em').empty() | 305 >>> print(html | Transformer('.//em').empty()) |
306 <html><head><title>Some Title</title></head><body>Some <em/> | 306 <html><head><title>Some Title</title></head><body>Some <em/> |
307 text.</body></html> | 307 text.</body></html> |
308 | 308 |
309 :rtype: `Transformer` | 309 :rtype: `Transformer` |
310 """ | 310 """ |
315 | 315 |
316 Example: | 316 Example: |
317 | 317 |
318 >>> html = HTML('<html><head><title>Some Title</title></head>' | 318 >>> html = HTML('<html><head><title>Some Title</title></head>' |
319 ... '<body>Some <em>body</em> text.</body></html>') | 319 ... '<body>Some <em>body</em> text.</body></html>') |
320 >>> print html | Transformer('.//em').remove() | 320 >>> print(html | Transformer('.//em').remove()) |
321 <html><head><title>Some Title</title></head><body>Some | 321 <html><head><title>Some Title</title></head><body>Some |
322 text.</body></html> | 322 text.</body></html> |
323 | 323 |
324 :rtype: `Transformer` | 324 :rtype: `Transformer` |
325 """ | 325 """ |
332 | 332 |
333 Example: | 333 Example: |
334 | 334 |
335 >>> html = HTML('<html><head><title>Some Title</title></head>' | 335 >>> html = HTML('<html><head><title>Some Title</title></head>' |
336 ... '<body>Some <em>body</em> text.</body></html>') | 336 ... '<body>Some <em>body</em> text.</body></html>') |
337 >>> print html | Transformer('.//em').unwrap() | 337 >>> print(html | Transformer('.//em').unwrap()) |
338 <html><head><title>Some Title</title></head><body>Some body | 338 <html><head><title>Some Title</title></head><body>Some body |
339 text.</body></html> | 339 text.</body></html> |
340 | 340 |
341 :rtype: `Transformer` | 341 :rtype: `Transformer` |
342 """ | 342 """ |
345 def wrap(self, element): | 345 def wrap(self, element): |
346 """Wrap selection in an element. | 346 """Wrap selection in an element. |
347 | 347 |
348 >>> html = HTML('<html><head><title>Some Title</title></head>' | 348 >>> html = HTML('<html><head><title>Some Title</title></head>' |
349 ... '<body>Some <em>body</em> text.</body></html>') | 349 ... '<body>Some <em>body</em> text.</body></html>') |
350 >>> print html | Transformer('.//em').wrap('strong') | 350 >>> print(html | Transformer('.//em').wrap('strong')) |
351 <html><head><title>Some Title</title></head><body>Some | 351 <html><head><title>Some Title</title></head><body>Some |
352 <strong><em>body</em></strong> text.</body></html> | 352 <strong><em>body</em></strong> text.</body></html> |
353 | 353 |
354 :param element: either a tag name (as string) or an `Element` object | 354 :param element: either a tag name (as string) or an `Element` object |
355 :rtype: `Transformer` | 355 :rtype: `Transformer` |
361 def replace(self, content): | 361 def replace(self, content): |
362 """Replace selection with content. | 362 """Replace selection with content. |
363 | 363 |
364 >>> html = HTML('<html><head><title>Some Title</title></head>' | 364 >>> html = HTML('<html><head><title>Some Title</title></head>' |
365 ... '<body>Some <em>body</em> text.</body></html>') | 365 ... '<body>Some <em>body</em> text.</body></html>') |
366 >>> print html | Transformer('.//title/text()').replace('New Title') | 366 >>> print(html | Transformer('.//title/text()').replace('New Title')) |
367 <html><head><title>New Title</title></head><body>Some <em>body</em> | 367 <html><head><title>New Title</title></head><body>Some <em>body</em> |
368 text.</body></html> | 368 text.</body></html> |
369 | 369 |
370 :param content: Either a callable, an iterable of events, or a string | 370 :param content: Either a callable, an iterable of events, or a string |
371 to insert. | 371 to insert. |
379 In this example we insert the word 'emphasised' before the <em> opening | 379 In this example we insert the word 'emphasised' before the <em> opening |
380 tag: | 380 tag: |
381 | 381 |
382 >>> html = HTML('<html><head><title>Some Title</title></head>' | 382 >>> html = HTML('<html><head><title>Some Title</title></head>' |
383 ... '<body>Some <em>body</em> text.</body></html>') | 383 ... '<body>Some <em>body</em> text.</body></html>') |
384 >>> print html | Transformer('.//em').before('emphasised ') | 384 >>> print(html | Transformer('.//em').before('emphasised ')) |
385 <html><head><title>Some Title</title></head><body>Some emphasised | 385 <html><head><title>Some Title</title></head><body>Some emphasised |
386 <em>body</em> text.</body></html> | 386 <em>body</em> text.</body></html> |
387 | 387 |
388 :param content: Either a callable, an iterable of events, or a string | 388 :param content: Either a callable, an iterable of events, or a string |
389 to insert. | 389 to insert. |
396 | 396 |
397 Here, we insert some text after the </em> closing tag: | 397 Here, we insert some text after the </em> closing tag: |
398 | 398 |
399 >>> html = HTML('<html><head><title>Some Title</title></head>' | 399 >>> html = HTML('<html><head><title>Some Title</title></head>' |
400 ... '<body>Some <em>body</em> text.</body></html>') | 400 ... '<body>Some <em>body</em> text.</body></html>') |
401 >>> print html | Transformer('.//em').after(' rock') | 401 >>> print(html | Transformer('.//em').after(' rock')) |
402 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 402 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
403 rock text.</body></html> | 403 rock text.</body></html> |
404 | 404 |
405 :param content: Either a callable, an iterable of events, or a string | 405 :param content: Either a callable, an iterable of events, or a string |
406 to insert. | 406 to insert. |
413 | 413 |
414 Inserting some new text at the start of the <body>: | 414 Inserting some new text at the start of the <body>: |
415 | 415 |
416 >>> html = HTML('<html><head><title>Some Title</title></head>' | 416 >>> html = HTML('<html><head><title>Some Title</title></head>' |
417 ... '<body>Some <em>body</em> text.</body></html>') | 417 ... '<body>Some <em>body</em> text.</body></html>') |
418 >>> print html | Transformer('.//body').prepend('Some new body text. ') | 418 >>> print(html | Transformer('.//body').prepend('Some new body text. ')) |
419 <html><head><title>Some Title</title></head><body>Some new body text. | 419 <html><head><title>Some Title</title></head><body>Some new body text. |
420 Some <em>body</em> text.</body></html> | 420 Some <em>body</em> text.</body></html> |
421 | 421 |
422 :param content: Either a callable, an iterable of events, or a string | 422 :param content: Either a callable, an iterable of events, or a string |
423 to insert. | 423 to insert. |
428 def append(self, content): | 428 def append(self, content): |
429 """Insert content before the END event of the selection. | 429 """Insert content before the END event of the selection. |
430 | 430 |
431 >>> html = HTML('<html><head><title>Some Title</title></head>' | 431 >>> html = HTML('<html><head><title>Some Title</title></head>' |
432 ... '<body>Some <em>body</em> text.</body></html>') | 432 ... '<body>Some <em>body</em> text.</body></html>') |
433 >>> print html | Transformer('.//body').append(' Some new body text.') | 433 >>> print(html | Transformer('.//body').append(' Some new body text.')) |
434 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 434 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
435 text. Some new body text.</body></html> | 435 text. Some new body text.</body></html> |
436 | 436 |
437 :param content: Either a callable, an iterable of events, or a string | 437 :param content: Either a callable, an iterable of events, or a string |
438 to insert. | 438 to insert. |
449 element: | 449 element: |
450 | 450 |
451 >>> html = HTML('<html><head><title>Some Title</title></head>' | 451 >>> html = HTML('<html><head><title>Some Title</title></head>' |
452 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' | 452 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' |
453 ... '</html>') | 453 ... '</html>') |
454 >>> print html | Transformer('body/em').attr('class', None) | 454 >>> print(html | Transformer('body/em').attr('class', None)) |
455 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 455 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
456 <em>text</em>.</body></html> | 456 <em>text</em>.</body></html> |
457 | 457 |
458 Otherwise the attribute will be set to `value`: | 458 Otherwise the attribute will be set to `value`: |
459 | 459 |
460 >>> print html | Transformer('body/em').attr('class', 'emphasis') | 460 >>> print(html | Transformer('body/em').attr('class', 'emphasis')) |
461 <html><head><title>Some Title</title></head><body>Some <em | 461 <html><head><title>Some Title</title></head><body>Some <em |
462 class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> | 462 class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> |
463 | 463 |
464 If `value` is a callable it will be called with the attribute name and | 464 If `value` is a callable it will be called with the attribute name and |
465 the `START` event for the matching element. Its return value will then | 465 the `START` event for the matching element. Its return value will then |
466 be used to set the attribute: | 466 be used to set the attribute: |
467 | 467 |
468 >>> def print_attr(name, event): | 468 >>> def print_attr(name, event): |
469 ... attrs = event[1][1] | 469 ... attrs = event[1][1] |
470 ... print attrs | 470 ... print(attrs) |
471 ... return attrs.get(name) | 471 ... return attrs.get(name) |
472 >>> print html | Transformer('body/em').attr('class', print_attr) | 472 >>> print(html | Transformer('body/em').attr('class', print_attr)) |
473 Attrs([(QName(u'class'), u'before')]) | 473 Attrs([(QName('class'), u'before')]) |
474 Attrs() | 474 Attrs() |
475 <html><head><title>Some Title</title></head><body>Some <em | 475 <html><head><title>Some Title</title></head><body>Some <em |
476 class="before">body</em> <em>text</em>.</body></html> | 476 class="before">body</em> <em>text</em>.</body></html> |
477 | 477 |
478 :param name: the name of the attribute | 478 :param name: the name of the attribute |
492 | 492 |
493 >>> from genshi.builder import tag | 493 >>> from genshi.builder import tag |
494 >>> buffer = StreamBuffer() | 494 >>> buffer = StreamBuffer() |
495 >>> html = HTML('<html><head><title>Some Title</title></head>' | 495 >>> html = HTML('<html><head><title>Some Title</title></head>' |
496 ... '<body>Some <em>body</em> text.</body></html>') | 496 ... '<body>Some <em>body</em> text.</body></html>') |
497 >>> print html | Transformer('head/title/text()').copy(buffer) \\ | 497 >>> print(html | Transformer('head/title/text()').copy(buffer) |
498 ... .end().select('body').prepend(tag.h1(buffer)) | 498 ... .end().select('body').prepend(tag.h1(buffer))) |
499 <html><head><title>Some Title</title></head><body><h1>Some | 499 <html><head><title>Some Title</title></head><body><h1>Some |
500 Title</h1>Some <em>body</em> text.</body></html> | 500 Title</h1>Some <em>body</em> text.</body></html> |
501 | 501 |
502 This example illustrates that only a single contiguous selection will | 502 This example illustrates that only a single contiguous selection will |
503 be buffered: | 503 be buffered: |
504 | 504 |
505 >>> print html | Transformer('head/title/text()').copy(buffer) \\ | 505 >>> print(html | Transformer('head/title/text()').copy(buffer) |
506 ... .end().select('body/em').copy(buffer).end().select('body') \\ | 506 ... .end().select('body/em').copy(buffer).end().select('body') |
507 ... .prepend(tag.h1(buffer)) | 507 ... .prepend(tag.h1(buffer))) |
508 <html><head><title>Some Title</title></head><body><h1>Some | 508 <html><head><title>Some Title</title></head><body><h1>Some |
509 Title</h1>Some <em>body</em> text.</body></html> | 509 Title</h1>Some <em>body</em> text.</body></html> |
510 >>> print buffer | 510 >>> print(buffer) |
511 <em>body</em> | 511 <em>body</em> |
512 | 512 |
513 Element attributes can also be copied for later use: | 513 Element attributes can also be copied for later use: |
514 | 514 |
515 >>> html = HTML('<html><head><title>Some Title</title></head>' | 515 >>> html = HTML('<html><head><title>Some Title</title></head>' |
516 ... '<body><em>Some</em> <em class="before">body</em>' | 516 ... '<body><em>Some</em> <em class="before">body</em>' |
517 ... '<em>text</em>.</body></html>') | 517 ... '<em>text</em>.</body></html>') |
518 >>> buffer = StreamBuffer() | 518 >>> buffer = StreamBuffer() |
519 >>> def apply_attr(name, entry): | 519 >>> def apply_attr(name, entry): |
520 ... return list(buffer)[0][1][1].get('class') | 520 ... return list(buffer)[0][1][1].get('class') |
521 >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\ | 521 >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) |
522 ... .end().buffer().select('body/em[not(@class)]') \\ | 522 ... .end().buffer().select('body/em[not(@class)]') |
523 ... .attr('class', apply_attr) | 523 ... .attr('class', apply_attr)) |
524 <html><head><title>Some Title</title></head><body><em | 524 <html><head><title>Some Title</title></head><body><em |
525 class="before">Some</em> <em class="before">body</em><em | 525 class="before">Some</em> <em class="before">body</em><em |
526 class="before">text</em>.</body></html> | 526 class="before">text</em>.</body></html> |
527 | 527 |
528 | 528 |
545 | 545 |
546 >>> from genshi.builder import tag | 546 >>> from genshi.builder import tag |
547 >>> buffer = StreamBuffer() | 547 >>> buffer = StreamBuffer() |
548 >>> html = HTML('<html><head><title>Some Title</title></head>' | 548 >>> html = HTML('<html><head><title>Some Title</title></head>' |
549 ... '<body>Some <em>body</em> text.</body></html>') | 549 ... '<body>Some <em>body</em> text.</body></html>') |
550 >>> print html | Transformer('.//em/text()').cut(buffer) \\ | 550 >>> print(html | Transformer('.//em/text()').cut(buffer) |
551 ... .end().select('.//em').after(tag.h1(buffer)) | 551 ... .end().select('.//em').after(tag.h1(buffer))) |
552 <html><head><title>Some Title</title></head><body>Some | 552 <html><head><title>Some Title</title></head><body>Some |
553 <em/><h1>body</h1> text.</body></html> | 553 <em/><h1>body</h1> text.</body></html> |
554 | 554 |
555 Specifying accumulate=True, appends all selected intervals onto the | 555 Specifying accumulate=True, appends all selected intervals onto the |
556 buffer. Combining this with the .buffer() operation allows us operate | 556 buffer. Combining this with the .buffer() operation allows us operate |
577 top of the document: | 577 top of the document: |
578 | 578 |
579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' | 579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' |
580 ... 'text <note>two</note>.</body></doc>') | 580 ... 'text <note>two</note>.</body></doc>') |
581 >>> buffer = StreamBuffer() | 581 >>> buffer = StreamBuffer() |
582 >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\ | 582 >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) |
583 ... .end().buffer().select('notes').prepend(buffer) | 583 ... .end().buffer().select('notes').prepend(buffer)) |
584 <doc><notes><note>one</note><note>two</note></notes><body>Some text | 584 <doc><notes><note>one</note><note>two</note></notes><body>Some text |
585 .</body></doc> | 585 .</body></doc> |
586 | 586 |
587 """ | 587 """ |
588 return self.apply(list) | 588 return self.apply(list) |
594 once for each contiguous block of marked events. | 594 once for each contiguous block of marked events. |
595 | 595 |
596 >>> from genshi.filters.html import HTMLSanitizer | 596 >>> from genshi.filters.html import HTMLSanitizer |
597 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' | 597 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' |
598 ... '</script> and some more text</body></html>') | 598 ... '</script> and some more text</body></html>') |
599 >>> print html | Transformer('body/*').filter(HTMLSanitizer()) | 599 >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) |
600 <html><body>Some text and some more text</body></html> | 600 <html><body>Some text and some more text</body></html> |
601 | 601 |
602 :param filter: The stream filter to apply. | 602 :param filter: The stream filter to apply. |
603 :rtype: `Transformer` | 603 :rtype: `Transformer` |
604 """ | 604 """ |
608 """Applies a function to the ``data`` element of events of ``kind`` in | 608 """Applies a function to the ``data`` element of events of ``kind`` in |
609 the selection. | 609 the selection. |
610 | 610 |
611 >>> html = HTML('<html><head><title>Some Title</title></head>' | 611 >>> html = HTML('<html><head><title>Some Title</title></head>' |
612 ... '<body>Some <em>body</em> text.</body></html>') | 612 ... '<body>Some <em>body</em> text.</body></html>') |
613 >>> print html | Transformer('head/title').map(unicode.upper, TEXT) | 613 >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) |
614 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> | 614 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> |
615 text.</body></html> | 615 text.</body></html> |
616 | 616 |
617 :param function: the function to apply | 617 :param function: the function to apply |
618 :param kind: the kind of event the function should be applied to | 618 :param kind: the kind of event the function should be applied to |
626 Refer to the documentation for ``re.sub()`` for details. | 626 Refer to the documentation for ``re.sub()`` for details. |
627 | 627 |
628 >>> html = HTML('<html><body>Some text, some more text and ' | 628 >>> html = HTML('<html><body>Some text, some more text and ' |
629 ... '<b>some bold text</b>\\n' | 629 ... '<b>some bold text</b>\\n' |
630 ... '<i>some italicised text</i></body></html>') | 630 ... '<i>some italicised text</i></body></html>') |
631 >>> print html | Transformer('body/b').substitute('(?i)some', 'SOME') | 631 >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) |
632 <html><body>Some text, some more text and <b>SOME bold text</b> | 632 <html><body>Some text, some more text and <b>SOME bold text</b> |
633 <i>some italicised text</i></body></html> | 633 <i>some italicised text</i></body></html> |
634 >>> tags = tag.html(tag.body('Some text, some more text and\\n', | 634 >>> tags = tag.html(tag.body('Some text, some more text and\\n', |
635 ... Markup('<b>some bold text</b>'))) | 635 ... Markup('<b>some bold text</b>'))) |
636 >>> print tags.generate() | Transformer('body').substitute( | 636 >>> print(tags.generate() | Transformer('body').substitute( |
637 ... '(?i)some', 'SOME') | 637 ... '(?i)some', 'SOME')) |
638 <html><body>SOME text, some more text and | 638 <html><body>SOME text, some more text and |
639 <b>SOME bold text</b></body></html> | 639 <b>SOME bold text</b></body></html> |
640 | 640 |
641 :param pattern: A regular expression object or string. | 641 :param pattern: A regular expression object or string. |
642 :param replace: Replacement pattern. | 642 :param replace: Replacement pattern. |
648 def rename(self, name): | 648 def rename(self, name): |
649 """Rename matching elements. | 649 """Rename matching elements. |
650 | 650 |
651 >>> html = HTML('<html><body>Some text, some more text and ' | 651 >>> html = HTML('<html><body>Some text, some more text and ' |
652 ... '<b>some bold text</b></body></html>') | 652 ... '<b>some bold text</b></body></html>') |
653 >>> print html | Transformer('body/b').rename('strong') | 653 >>> print(html | Transformer('body/b').rename('strong')) |
654 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> | 654 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> |
655 """ | 655 """ |
656 return self.apply(RenameTransformation(name)) | 656 return self.apply(RenameTransformation(name)) |
657 | 657 |
658 def trace(self, prefix='', fileobj=None): | 658 def trace(self, prefix='', fileobj=None): |
659 """Print events as they pass through the transform. | 659 """Print events as they pass through the transform. |
660 | 660 |
661 >>> html = HTML('<body>Some <em>test</em> text</body>') | 661 >>> html = HTML('<body>Some <em>test</em> text</body>') |
662 >>> print html | Transformer('em').trace() | 662 >>> print(html | Transformer('em').trace()) |
663 (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) | 663 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
664 (None, ('TEXT', u'Some ', (None, 1, 6))) | 664 (None, ('TEXT', u'Some ', (None, 1, 6))) |
665 ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) | 665 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
666 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) | 666 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
667 ('EXIT', ('END', QName(u'em'), (None, 1, 19))) | 667 ('EXIT', ('END', QName('em'), (None, 1, 19))) |
668 (None, ('TEXT', u' text', (None, 1, 24))) | 668 (None, ('TEXT', u' text', (None, 1, 24))) |
669 (None, ('END', QName(u'body'), (None, 1, 29))) | 669 (None, ('END', QName('body'), (None, 1, 29))) |
670 <body>Some <em>test</em> text</body> | 670 <body>Some <em>test</em> text</body> |
671 | 671 |
672 :param prefix: a string to prefix each event with in the output | 672 :param prefix: a string to prefix each event with in the output |
673 :param fileobj: the writable file-like object to write to; defaults to | 673 :param fileobj: the writable file-like object to write to; defaults to |
674 the standard output stream | 674 the standard output stream |
874 """Apply the transform filter to the marked stream. | 874 """Apply the transform filter to the marked stream. |
875 | 875 |
876 :param stream: the marked event stream to filter | 876 :param stream: the marked event stream to filter |
877 """ | 877 """ |
878 for event in stream: | 878 for event in stream: |
879 print>>self.fileobj, self.prefix + str(event) | 879 self.fileobj.write('%s%s\n' % (self.prefix, event)) |
880 yield event | 880 yield event |
881 | 881 |
882 | 882 |
883 class FilterTransformation(object): | 883 class FilterTransformation(object): |
884 """Apply a normal stream filter to the selection. The filter is called once | 884 """Apply a normal stream filter to the selection. The filter is called once |
1023 ... for event in self._inject(): | 1023 ... for event in self._inject(): |
1024 ... yield event | 1024 ... yield event |
1025 ... for event in stream: | 1025 ... for event in stream: |
1026 ... yield event | 1026 ... yield event |
1027 >>> html = HTML('<body>Some <em>test</em> text</body>') | 1027 >>> html = HTML('<body>Some <em>test</em> text</body>') |
1028 >>> print html | Transformer('.//em').apply(Top('Prefix ')) | 1028 >>> print(html | Transformer('.//em').apply(Top('Prefix '))) |
1029 Prefix <body>Some <em>test</em> text</body> | 1029 Prefix <body>Some <em>test</em> text</body> |
1030 """ | 1030 """ |
1031 def __init__(self, content): | 1031 def __init__(self, content): |
1032 """Create a new injector. | 1032 """Create a new injector. |
1033 | 1033 |