Mercurial > genshi > genshi-test
comparison genshi/filters/transform.py @ 933:feba07fc925b
Merge r1141 from py3k:
add support for python 3 to genshi.filters:
* minor changes to track encoding=None API change in core genshi modules.
* renamed genshi/filters/tests/html.py to test_html.py to avoid clashes with Python 3 top-level html module when running tests subset.
* did not rename genshi/filters/html.py.
* i18n filters:
* ugettext and friends are gone in Python 3 (and only gettext and friends exist and they now handle unicode)
* Some \ line continuations inside doctests confused 2to3 and so were removed them.
* Testing picked up a problem (already present in trunk) where Translator.__call__ could end up defining gettext as an endlessly recursive function. Noted with a TODO.
author | hodgestar |
---|---|
date | Fri, 18 Mar 2011 09:11:53 +0000 |
parents | 24733a5854d9 |
children |
comparison
equal
deleted
inserted
replaced
932:e53161c2773c | 933:feba07fc925b |
---|---|
29 >>> html = HTML('''<html> | 29 >>> html = HTML('''<html> |
30 ... <head><title>Some Title</title></head> | 30 ... <head><title>Some Title</title></head> |
31 ... <body> | 31 ... <body> |
32 ... Some <em>body</em> text. | 32 ... Some <em>body</em> text. |
33 ... </body> | 33 ... </body> |
34 ... </html>''') | 34 ... </html>''', |
35 ... encoding='utf-8') | |
35 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) | 36 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) |
36 ... .unwrap().wrap(tag.u)) | 37 ... .unwrap().wrap(tag.u)) |
37 <html> | 38 <html> |
38 <head><title>Some Title</title></head> | 39 <head><title>Some Title</title></head> |
39 <body> | 40 <body> |
134 contained `INSIDE` any selected XML/HTML element. A non-element match | 135 contained `INSIDE` any selected XML/HTML element. A non-element match |
135 outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` | 136 outside a `START`/`END` container (e.g. ``text()``) will yield an `OUTSIDE` |
136 mark. | 137 mark. |
137 | 138 |
138 >>> html = HTML('<html><head><title>Some Title</title></head>' | 139 >>> html = HTML('<html><head><title>Some Title</title></head>' |
139 ... '<body>Some <em>body</em> text.</body></html>') | 140 ... '<body>Some <em>body</em> text.</body></html>', |
141 ... encoding='utf-8') | |
140 | 142 |
141 Transformations act on selected stream events matching an XPath expression. | 143 Transformations act on selected stream events matching an XPath expression. |
142 Here's an example of removing some markup (the title, in this case) | 144 Here's an example of removing some markup (the title, in this case) |
143 selected by an expression: | 145 selected by an expression: |
144 | 146 |
213 ... for mark, (kind, data, pos) in stream: | 215 ... for mark, (kind, data, pos) in stream: |
214 ... if mark and kind is TEXT: | 216 ... if mark and kind is TEXT: |
215 ... yield mark, (kind, data.upper(), pos) | 217 ... yield mark, (kind, data.upper(), pos) |
216 ... else: | 218 ... else: |
217 ... yield mark, (kind, data, pos) | 219 ... yield mark, (kind, data, pos) |
218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>') | 220 >>> short_stream = HTML('<body>Some <em>test</em> text</body>', |
221 ... encoding='utf-8') | |
219 >>> print(short_stream | Transformer('.//em/text()').apply(upper)) | 222 >>> print(short_stream | Transformer('.//em/text()').apply(upper)) |
220 <body>Some <em>TEST</em> text</body> | 223 <body>Some <em>TEST</em> text</body> |
221 """ | 224 """ |
222 transformer = Transformer() | 225 transformer = Transformer() |
223 transformer.transforms = self.transforms[:] | 226 transformer.transforms = self.transforms[:] |
231 | 234 |
232 def select(self, path): | 235 def select(self, path): |
233 """Mark events matching the given XPath expression, within the current | 236 """Mark events matching the given XPath expression, within the current |
234 selection. | 237 selection. |
235 | 238 |
236 >>> html = HTML('<body>Some <em>test</em> text</body>') | 239 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') |
237 >>> print(html | Transformer().select('.//em').trace()) | 240 >>> print(html | Transformer().select('.//em').trace()) |
238 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) | 241 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
239 (None, ('TEXT', u'Some ', (None, 1, 6))) | 242 (None, ('TEXT', u'Some ', (None, 1, 6))) |
240 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) | 243 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
241 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) | 244 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
255 versa. | 258 versa. |
256 | 259 |
257 Specificaly, all marks are converted to null marks, and all null marks | 260 Specificaly, all marks are converted to null marks, and all null marks |
258 are converted to OUTSIDE marks. | 261 are converted to OUTSIDE marks. |
259 | 262 |
260 >>> html = HTML('<body>Some <em>test</em> text</body>') | 263 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') |
261 >>> print(html | Transformer('//em').invert().trace()) | 264 >>> print(html | Transformer('//em').invert().trace()) |
262 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) | 265 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
263 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) | 266 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
264 (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) | 267 (None, ('START', (QName('em'), Attrs()), (None, 1, 11))) |
265 (None, ('TEXT', u'test', (None, 1, 15))) | 268 (None, ('TEXT', u'test', (None, 1, 15))) |
275 def end(self): | 278 def end(self): |
276 """End current selection, allowing all events to be selected. | 279 """End current selection, allowing all events to be selected. |
277 | 280 |
278 Example: | 281 Example: |
279 | 282 |
280 >>> html = HTML('<body>Some <em>test</em> text</body>') | 283 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') |
281 >>> print(html | Transformer('//em').end().trace()) | 284 >>> print(html | Transformer('//em').end().trace()) |
282 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) | 285 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) |
283 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) | 286 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) |
284 ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) | 287 ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
285 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) | 288 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) |
299 """Empty selected elements of all content. | 302 """Empty selected elements of all content. |
300 | 303 |
301 Example: | 304 Example: |
302 | 305 |
303 >>> html = HTML('<html><head><title>Some Title</title></head>' | 306 >>> html = HTML('<html><head><title>Some Title</title></head>' |
304 ... '<body>Some <em>body</em> text.</body></html>') | 307 ... '<body>Some <em>body</em> text.</body></html>', |
308 ... encoding='utf-8') | |
305 >>> print(html | Transformer('.//em').empty()) | 309 >>> print(html | Transformer('.//em').empty()) |
306 <html><head><title>Some Title</title></head><body>Some <em/> | 310 <html><head><title>Some Title</title></head><body>Some <em/> |
307 text.</body></html> | 311 text.</body></html> |
308 | 312 |
309 :rtype: `Transformer` | 313 :rtype: `Transformer` |
314 """Remove selection from the stream. | 318 """Remove selection from the stream. |
315 | 319 |
316 Example: | 320 Example: |
317 | 321 |
318 >>> html = HTML('<html><head><title>Some Title</title></head>' | 322 >>> html = HTML('<html><head><title>Some Title</title></head>' |
319 ... '<body>Some <em>body</em> text.</body></html>') | 323 ... '<body>Some <em>body</em> text.</body></html>', |
324 ... encoding='utf-8') | |
320 >>> print(html | Transformer('.//em').remove()) | 325 >>> print(html | Transformer('.//em').remove()) |
321 <html><head><title>Some Title</title></head><body>Some | 326 <html><head><title>Some Title</title></head><body>Some |
322 text.</body></html> | 327 text.</body></html> |
323 | 328 |
324 :rtype: `Transformer` | 329 :rtype: `Transformer` |
331 """Remove outermost enclosing elements from selection. | 336 """Remove outermost enclosing elements from selection. |
332 | 337 |
333 Example: | 338 Example: |
334 | 339 |
335 >>> html = HTML('<html><head><title>Some Title</title></head>' | 340 >>> html = HTML('<html><head><title>Some Title</title></head>' |
336 ... '<body>Some <em>body</em> text.</body></html>') | 341 ... '<body>Some <em>body</em> text.</body></html>', |
342 ... encoding='utf-8') | |
337 >>> print(html | Transformer('.//em').unwrap()) | 343 >>> print(html | Transformer('.//em').unwrap()) |
338 <html><head><title>Some Title</title></head><body>Some body | 344 <html><head><title>Some Title</title></head><body>Some body |
339 text.</body></html> | 345 text.</body></html> |
340 | 346 |
341 :rtype: `Transformer` | 347 :rtype: `Transformer` |
344 | 350 |
345 def wrap(self, element): | 351 def wrap(self, element): |
346 """Wrap selection in an element. | 352 """Wrap selection in an element. |
347 | 353 |
348 >>> html = HTML('<html><head><title>Some Title</title></head>' | 354 >>> html = HTML('<html><head><title>Some Title</title></head>' |
349 ... '<body>Some <em>body</em> text.</body></html>') | 355 ... '<body>Some <em>body</em> text.</body></html>', |
356 ... encoding='utf-8') | |
350 >>> print(html | Transformer('.//em').wrap('strong')) | 357 >>> print(html | Transformer('.//em').wrap('strong')) |
351 <html><head><title>Some Title</title></head><body>Some | 358 <html><head><title>Some Title</title></head><body>Some |
352 <strong><em>body</em></strong> text.</body></html> | 359 <strong><em>body</em></strong> text.</body></html> |
353 | 360 |
354 :param element: either a tag name (as string) or an `Element` object | 361 :param element: either a tag name (as string) or an `Element` object |
360 | 367 |
361 def replace(self, content): | 368 def replace(self, content): |
362 """Replace selection with content. | 369 """Replace selection with content. |
363 | 370 |
364 >>> html = HTML('<html><head><title>Some Title</title></head>' | 371 >>> html = HTML('<html><head><title>Some Title</title></head>' |
365 ... '<body>Some <em>body</em> text.</body></html>') | 372 ... '<body>Some <em>body</em> text.</body></html>', |
373 ... encoding='utf-8') | |
366 >>> print(html | Transformer('.//title/text()').replace('New Title')) | 374 >>> print(html | Transformer('.//title/text()').replace('New Title')) |
367 <html><head><title>New Title</title></head><body>Some <em>body</em> | 375 <html><head><title>New Title</title></head><body>Some <em>body</em> |
368 text.</body></html> | 376 text.</body></html> |
369 | 377 |
370 :param content: Either a callable, an iterable of events, or a string | 378 :param content: Either a callable, an iterable of events, or a string |
378 | 386 |
379 In this example we insert the word 'emphasised' before the <em> opening | 387 In this example we insert the word 'emphasised' before the <em> opening |
380 tag: | 388 tag: |
381 | 389 |
382 >>> html = HTML('<html><head><title>Some Title</title></head>' | 390 >>> html = HTML('<html><head><title>Some Title</title></head>' |
383 ... '<body>Some <em>body</em> text.</body></html>') | 391 ... '<body>Some <em>body</em> text.</body></html>', |
392 ... encoding='utf-8') | |
384 >>> print(html | Transformer('.//em').before('emphasised ')) | 393 >>> print(html | Transformer('.//em').before('emphasised ')) |
385 <html><head><title>Some Title</title></head><body>Some emphasised | 394 <html><head><title>Some Title</title></head><body>Some emphasised |
386 <em>body</em> text.</body></html> | 395 <em>body</em> text.</body></html> |
387 | 396 |
388 :param content: Either a callable, an iterable of events, or a string | 397 :param content: Either a callable, an iterable of events, or a string |
395 """Insert content after selection. | 404 """Insert content after selection. |
396 | 405 |
397 Here, we insert some text after the </em> closing tag: | 406 Here, we insert some text after the </em> closing tag: |
398 | 407 |
399 >>> html = HTML('<html><head><title>Some Title</title></head>' | 408 >>> html = HTML('<html><head><title>Some Title</title></head>' |
400 ... '<body>Some <em>body</em> text.</body></html>') | 409 ... '<body>Some <em>body</em> text.</body></html>', |
410 ... encoding='utf-8') | |
401 >>> print(html | Transformer('.//em').after(' rock')) | 411 >>> print(html | Transformer('.//em').after(' rock')) |
402 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 412 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
403 rock text.</body></html> | 413 rock text.</body></html> |
404 | 414 |
405 :param content: Either a callable, an iterable of events, or a string | 415 :param content: Either a callable, an iterable of events, or a string |
412 """Insert content after the ENTER event of the selection. | 422 """Insert content after the ENTER event of the selection. |
413 | 423 |
414 Inserting some new text at the start of the <body>: | 424 Inserting some new text at the start of the <body>: |
415 | 425 |
416 >>> html = HTML('<html><head><title>Some Title</title></head>' | 426 >>> html = HTML('<html><head><title>Some Title</title></head>' |
417 ... '<body>Some <em>body</em> text.</body></html>') | 427 ... '<body>Some <em>body</em> text.</body></html>', |
428 ... encoding='utf-8') | |
418 >>> print(html | Transformer('.//body').prepend('Some new body text. ')) | 429 >>> print(html | Transformer('.//body').prepend('Some new body text. ')) |
419 <html><head><title>Some Title</title></head><body>Some new body text. | 430 <html><head><title>Some Title</title></head><body>Some new body text. |
420 Some <em>body</em> text.</body></html> | 431 Some <em>body</em> text.</body></html> |
421 | 432 |
422 :param content: Either a callable, an iterable of events, or a string | 433 :param content: Either a callable, an iterable of events, or a string |
427 | 438 |
428 def append(self, content): | 439 def append(self, content): |
429 """Insert content before the END event of the selection. | 440 """Insert content before the END event of the selection. |
430 | 441 |
431 >>> html = HTML('<html><head><title>Some Title</title></head>' | 442 >>> html = HTML('<html><head><title>Some Title</title></head>' |
432 ... '<body>Some <em>body</em> text.</body></html>') | 443 ... '<body>Some <em>body</em> text.</body></html>', |
444 ... encoding='utf-8') | |
433 >>> print(html | Transformer('.//body').append(' Some new body text.')) | 445 >>> print(html | Transformer('.//body').append(' Some new body text.')) |
434 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 446 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
435 text. Some new body text.</body></html> | 447 text. Some new body text.</body></html> |
436 | 448 |
437 :param content: Either a callable, an iterable of events, or a string | 449 :param content: Either a callable, an iterable of events, or a string |
448 If `value` evaulates to `None` the attribute will be deleted from the | 460 If `value` evaulates to `None` the attribute will be deleted from the |
449 element: | 461 element: |
450 | 462 |
451 >>> html = HTML('<html><head><title>Some Title</title></head>' | 463 >>> html = HTML('<html><head><title>Some Title</title></head>' |
452 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' | 464 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' |
453 ... '</html>') | 465 ... '</html>', encoding='utf-8') |
454 >>> print(html | Transformer('body/em').attr('class', None)) | 466 >>> print(html | Transformer('body/em').attr('class', None)) |
455 <html><head><title>Some Title</title></head><body>Some <em>body</em> | 467 <html><head><title>Some Title</title></head><body>Some <em>body</em> |
456 <em>text</em>.</body></html> | 468 <em>text</em>.</body></html> |
457 | 469 |
458 Otherwise the attribute will be set to `value`: | 470 Otherwise the attribute will be set to `value`: |
491 be appended to the buffer rather than replacing it. | 503 be appended to the buffer rather than replacing it. |
492 | 504 |
493 >>> from genshi.builder import tag | 505 >>> from genshi.builder import tag |
494 >>> buffer = StreamBuffer() | 506 >>> buffer = StreamBuffer() |
495 >>> html = HTML('<html><head><title>Some Title</title></head>' | 507 >>> html = HTML('<html><head><title>Some Title</title></head>' |
496 ... '<body>Some <em>body</em> text.</body></html>') | 508 ... '<body>Some <em>body</em> text.</body></html>', |
509 ... encoding='utf-8') | |
497 >>> print(html | Transformer('head/title/text()').copy(buffer) | 510 >>> print(html | Transformer('head/title/text()').copy(buffer) |
498 ... .end().select('body').prepend(tag.h1(buffer))) | 511 ... .end().select('body').prepend(tag.h1(buffer))) |
499 <html><head><title>Some Title</title></head><body><h1>Some | 512 <html><head><title>Some Title</title></head><body><h1>Some |
500 Title</h1>Some <em>body</em> text.</body></html> | 513 Title</h1>Some <em>body</em> text.</body></html> |
501 | 514 |
512 | 525 |
513 Element attributes can also be copied for later use: | 526 Element attributes can also be copied for later use: |
514 | 527 |
515 >>> html = HTML('<html><head><title>Some Title</title></head>' | 528 >>> html = HTML('<html><head><title>Some Title</title></head>' |
516 ... '<body><em>Some</em> <em class="before">body</em>' | 529 ... '<body><em>Some</em> <em class="before">body</em>' |
517 ... '<em>text</em>.</body></html>') | 530 ... '<em>text</em>.</body></html>', |
531 ... encoding='utf-8') | |
518 >>> buffer = StreamBuffer() | 532 >>> buffer = StreamBuffer() |
519 >>> def apply_attr(name, entry): | 533 >>> def apply_attr(name, entry): |
520 ... return list(buffer)[0][1][1].get('class') | 534 ... return list(buffer)[0][1][1].get('class') |
521 >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) | 535 >>> print(html | Transformer('body/em[@class]/@class').copy(buffer) |
522 ... .end().buffer().select('body/em[not(@class)]') | 536 ... .end().buffer().select('body/em[not(@class)]') |
544 """Copy selection into buffer and remove the selection from the stream. | 558 """Copy selection into buffer and remove the selection from the stream. |
545 | 559 |
546 >>> from genshi.builder import tag | 560 >>> from genshi.builder import tag |
547 >>> buffer = StreamBuffer() | 561 >>> buffer = StreamBuffer() |
548 >>> html = HTML('<html><head><title>Some Title</title></head>' | 562 >>> html = HTML('<html><head><title>Some Title</title></head>' |
549 ... '<body>Some <em>body</em> text.</body></html>') | 563 ... '<body>Some <em>body</em> text.</body></html>', |
564 ... encoding='utf-8') | |
550 >>> print(html | Transformer('.//em/text()').cut(buffer) | 565 >>> print(html | Transformer('.//em/text()').cut(buffer) |
551 ... .end().select('.//em').after(tag.h1(buffer))) | 566 ... .end().select('.//em').after(tag.h1(buffer))) |
552 <html><head><title>Some Title</title></head><body>Some | 567 <html><head><title>Some Title</title></head><body>Some |
553 <em/><h1>body</h1> text.</body></html> | 568 <em/><h1>body</h1> text.</body></html> |
554 | 569 |
575 | 590 |
576 For example, to move all <note> elements inside a <notes> tag at the | 591 For example, to move all <note> elements inside a <notes> tag at the |
577 top of the document: | 592 top of the document: |
578 | 593 |
579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' | 594 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' |
580 ... 'text <note>two</note>.</body></doc>') | 595 ... 'text <note>two</note>.</body></doc>', |
596 ... encoding='utf-8') | |
581 >>> buffer = StreamBuffer() | 597 >>> buffer = StreamBuffer() |
582 >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) | 598 >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) |
583 ... .end().buffer().select('notes').prepend(buffer)) | 599 ... .end().buffer().select('notes').prepend(buffer)) |
584 <doc><notes><note>one</note><note>two</note></notes><body>Some text | 600 <doc><notes><note>one</note><note>two</note></notes><body>Some text |
585 .</body></doc> | 601 .</body></doc> |
593 """Apply a normal stream filter to the selection. The filter is called | 609 """Apply a normal stream filter to the selection. The filter is called |
594 once for each contiguous block of marked events. | 610 once for each contiguous block of marked events. |
595 | 611 |
596 >>> from genshi.filters.html import HTMLSanitizer | 612 >>> from genshi.filters.html import HTMLSanitizer |
597 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' | 613 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' |
598 ... '</script> and some more text</body></html>') | 614 ... '</script> and some more text</body></html>', |
615 ... encoding='utf-8') | |
599 >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) | 616 >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) |
600 <html><body>Some text and some more text</body></html> | 617 <html><body>Some text and some more text</body></html> |
601 | 618 |
602 :param filter: The stream filter to apply. | 619 :param filter: The stream filter to apply. |
603 :rtype: `Transformer` | 620 :rtype: `Transformer` |
607 def map(self, function, kind): | 624 def map(self, function, kind): |
608 """Applies a function to the ``data`` element of events of ``kind`` in | 625 """Applies a function to the ``data`` element of events of ``kind`` in |
609 the selection. | 626 the selection. |
610 | 627 |
611 >>> html = HTML('<html><head><title>Some Title</title></head>' | 628 >>> html = HTML('<html><head><title>Some Title</title></head>' |
612 ... '<body>Some <em>body</em> text.</body></html>') | 629 ... '<body>Some <em>body</em> text.</body></html>', |
630 ... encoding='utf-8') | |
613 >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) | 631 >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) |
614 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> | 632 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> |
615 text.</body></html> | 633 text.</body></html> |
616 | 634 |
617 :param function: the function to apply | 635 :param function: the function to apply |
625 | 643 |
626 Refer to the documentation for ``re.sub()`` for details. | 644 Refer to the documentation for ``re.sub()`` for details. |
627 | 645 |
628 >>> html = HTML('<html><body>Some text, some more text and ' | 646 >>> html = HTML('<html><body>Some text, some more text and ' |
629 ... '<b>some bold text</b>\\n' | 647 ... '<b>some bold text</b>\\n' |
630 ... '<i>some italicised text</i></body></html>') | 648 ... '<i>some italicised text</i></body></html>', |
649 ... encoding='utf-8') | |
631 >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) | 650 >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) |
632 <html><body>Some text, some more text and <b>SOME bold text</b> | 651 <html><body>Some text, some more text and <b>SOME bold text</b> |
633 <i>some italicised text</i></body></html> | 652 <i>some italicised text</i></body></html> |
634 >>> tags = tag.html(tag.body('Some text, some more text and\\n', | 653 >>> tags = tag.html(tag.body('Some text, some more text and\\n', |
635 ... Markup('<b>some bold text</b>'))) | 654 ... Markup('<b>some bold text</b>'))) |
647 | 666 |
648 def rename(self, name): | 667 def rename(self, name): |
649 """Rename matching elements. | 668 """Rename matching elements. |
650 | 669 |
651 >>> html = HTML('<html><body>Some text, some more text and ' | 670 >>> html = HTML('<html><body>Some text, some more text and ' |
652 ... '<b>some bold text</b></body></html>') | 671 ... '<b>some bold text</b></body></html>', |
672 ... encoding='utf-8') | |
653 >>> print(html | Transformer('body/b').rename('strong')) | 673 >>> print(html | Transformer('body/b').rename('strong')) |
654 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> | 674 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> |
655 """ | 675 """ |
656 return self.apply(RenameTransformation(name)) | 676 return self.apply(RenameTransformation(name)) |
657 | 677 |
658 def trace(self, prefix='', fileobj=None): | 678 def trace(self, prefix='', fileobj=None): |
659 """Print events as they pass through the transform. | 679 """Print events as they pass through the transform. |
660 | 680 |
661 >>> html = HTML('<body>Some <em>test</em> text</body>') | 681 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') |
662 >>> print(html | Transformer('em').trace()) | 682 >>> print(html | Transformer('em').trace()) |
663 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) | 683 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) |
664 (None, ('TEXT', u'Some ', (None, 1, 6))) | 684 (None, ('TEXT', u'Some ', (None, 1, 6))) |
665 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) | 685 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11))) |
666 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) | 686 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) |
1022 ... def __call__(self, stream): | 1042 ... def __call__(self, stream): |
1023 ... for event in self._inject(): | 1043 ... for event in self._inject(): |
1024 ... yield event | 1044 ... yield event |
1025 ... for event in stream: | 1045 ... for event in stream: |
1026 ... yield event | 1046 ... yield event |
1027 >>> html = HTML('<body>Some <em>test</em> text</body>') | 1047 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') |
1028 >>> print(html | Transformer('.//em').apply(Top('Prefix '))) | 1048 >>> print(html | Transformer('.//em').apply(Top('Prefix '))) |
1029 Prefix <body>Some <em>test</em> text</body> | 1049 Prefix <body>Some <em>test</em> text</body> |
1030 """ | 1050 """ |
1031 def __init__(self, content): | 1051 def __init__(self, content): |
1032 """Create a new injector. | 1052 """Create a new injector. |