comparison genshi/filters/transform.py @ 902:09cc3627654c experimental-inline

Sync `experimental/inline` branch with [source:trunk@1126].
author cmlenz
date Fri, 23 Apr 2010 21:08:26 +0000
parents 1837f39efd6f
children
comparison
equal deleted inserted replaced
830:de82830f8816 902:09cc3627654c
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 # 2 #
3 # Copyright (C) 2007 Edgewall Software 3 # Copyright (C) 2007-2009 Edgewall Software
4 # All rights reserved. 4 # All rights reserved.
5 # 5 #
6 # This software is licensed as described in the file COPYING, which 6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms 7 # you should have received as part of this distribution. The terms
8 # are also available at http://genshi.edgewall.org/wiki/License. 8 # are also available at http://genshi.edgewall.org/wiki/License.
30 ... <head><title>Some Title</title></head> 30 ... <head><title>Some Title</title></head>
31 ... <body> 31 ... <body>
32 ... Some <em>body</em> text. 32 ... Some <em>body</em> text.
33 ... </body> 33 ... </body>
34 ... </html>''') 34 ... </html>''')
35 >>> print html | Transformer('body/em').map(unicode.upper, TEXT) \\ 35 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
36 ... .unwrap().wrap(tag.u) 36 ... .unwrap().wrap(tag.u))
37 <html> 37 <html>
38 <head><title>Some Title</title></head> 38 <head><title>Some Title</title></head>
39 <body> 39 <body>
40 Some <u>BODY</u> text. 40 Some <u>BODY</u> text.
41 </body> 41 </body>
140 140
141 Transformations act on selected stream events matching an XPath expression. 141 Transformations act on selected stream events matching an XPath expression.
142 Here's an example of removing some markup (the title, in this case) 142 Here's an example of removing some markup (the title, in this case)
143 selected by an expression: 143 selected by an expression:
144 144
145 >>> print html | Transformer('head/title').remove() 145 >>> print(html | Transformer('head/title').remove())
146 <html><head/><body>Some <em>body</em> text.</body></html> 146 <html><head/><body>Some <em>body</em> text.</body></html>
147 147
148 Inserted content can be passed in the form of a string, or a markup event 148 Inserted content can be passed in the form of a string, or a markup event
149 stream, which includes streams generated programmatically via the 149 stream, which includes streams generated programmatically via the
150 `builder` module: 150 `builder` module:
151 151
152 >>> from genshi.builder import tag 152 >>> from genshi.builder import tag
153 >>> print html | Transformer('body').prepend(tag.h1('Document Title')) 153 >>> print(html | Transformer('body').prepend(tag.h1('Document Title')))
154 <html><head><title>Some Title</title></head><body><h1>Document 154 <html><head><title>Some Title</title></head><body><h1>Document
155 Title</h1>Some <em>body</em> text.</body></html> 155 Title</h1>Some <em>body</em> text.</body></html>
156 156
157 Each XPath expression determines the set of tags that will be acted upon by 157 Each XPath expression determines the set of tags that will be acted upon by
158 subsequent transformations. In this example we select the ``<title>`` text, 158 subsequent transformations. In this example we select the ``<title>`` text,
159 copy it into a buffer, then select the ``<body>`` element and paste the 159 copy it into a buffer, then select the ``<body>`` element and paste the
160 copied text into the body as ``<h1>`` enclosed text: 160 copied text into the body as ``<h1>`` enclosed text:
161 161
162 >>> buffer = StreamBuffer() 162 >>> buffer = StreamBuffer()
163 >>> print html | Transformer('head/title/text()').copy(buffer) \\ 163 >>> print(html | Transformer('head/title/text()').copy(buffer)
164 ... .end().select('body').prepend(tag.h1(buffer)) 164 ... .end().select('body').prepend(tag.h1(buffer)))
165 <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some 165 <html><head><title>Some Title</title></head><body><h1>Some Title</h1>Some
166 <em>body</em> text.</body></html> 166 <em>body</em> text.</body></html>
167 167
168 Transformations can also be assigned and reused, although care must be 168 Transformations can also be assigned and reused, although care must be
169 taken when using buffers, to ensure that buffers are cleared between 169 taken when using buffers, to ensure that buffers are cleared between
170 transforms: 170 transforms:
171 171
172 >>> emphasis = Transformer('body//em').attr('class', 'emphasis') 172 >>> emphasis = Transformer('body//em').attr('class', 'emphasis')
173 >>> print html | emphasis 173 >>> print(html | emphasis)
174 <html><head><title>Some Title</title></head><body>Some <em 174 <html><head><title>Some Title</title></head><body>Some <em
175 class="emphasis">body</em> text.</body></html> 175 class="emphasis">body</em> text.</body></html>
176 """ 176 """
177 177
178 __slots__ = ['transforms'] 178 __slots__ = ['transforms']
214 ... if mark and kind is TEXT: 214 ... if mark and kind is TEXT:
215 ... yield mark, (kind, data.upper(), pos) 215 ... yield mark, (kind, data.upper(), pos)
216 ... else: 216 ... else:
217 ... yield mark, (kind, data, pos) 217 ... yield mark, (kind, data, pos)
218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>') 218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
219 >>> print short_stream | Transformer('.//em/text()').apply(upper) 219 >>> print(short_stream | Transformer('.//em/text()').apply(upper))
220 <body>Some <em>TEST</em> text</body> 220 <body>Some <em>TEST</em> text</body>
221 """ 221 """
222 transformer = Transformer() 222 transformer = Transformer()
223 transformer.transforms = self.transforms[:] 223 transformer.transforms = self.transforms[:]
224 if isinstance(function, Transformer): 224 if isinstance(function, Transformer):
232 def select(self, path): 232 def select(self, path):
233 """Mark events matching the given XPath expression, within the current 233 """Mark events matching the given XPath expression, within the current
234 selection. 234 selection.
235 235
236 >>> html = HTML('<body>Some <em>test</em> text</body>') 236 >>> html = HTML('<body>Some <em>test</em> text</body>')
237 >>> print html | Transformer().select('.//em').trace() 237 >>> print(html | Transformer().select('.//em').trace())
238 (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) 238 (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
239 (None, ('TEXT', u'Some ', (None, 1, 6))) 239 (None, ('TEXT', u'Some ', (None, 1, 6)))
240 ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) 240 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
241 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) 241 ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
242 ('EXIT', ('END', QName(u'em'), (None, 1, 19))) 242 ('EXIT', ('END', QName('em'), (None, 1, 19)))
243 (None, ('TEXT', u' text', (None, 1, 24))) 243 (None, ('TEXT', u' text', (None, 1, 24)))
244 (None, ('END', QName(u'body'), (None, 1, 29))) 244 (None, ('END', QName('body'), (None, 1, 29)))
245 <body>Some <em>test</em> text</body> 245 <body>Some <em>test</em> text</body>
246 246
247 :param path: an XPath expression (as string) or a `Path` instance 247 :param path: an XPath expression (as string) or a `Path` instance
248 :return: the stream augmented by transformation marks 248 :return: the stream augmented by transformation marks
249 :rtype: `Transformer` 249 :rtype: `Transformer`
256 256
257 Specificaly, all marks are converted to null marks, and all null marks 257 Specificaly, all marks are converted to null marks, and all null marks
258 are converted to OUTSIDE marks. 258 are converted to OUTSIDE marks.
259 259
260 >>> html = HTML('<body>Some <em>test</em> text</body>') 260 >>> html = HTML('<body>Some <em>test</em> text</body>')
261 >>> print html | Transformer('//em').invert().trace() 261 >>> print(html | Transformer('//em').invert().trace())
262 ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0))) 262 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
263 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) 263 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
264 (None, ('START', (QName(u'em'), Attrs()), (None, 1, 11))) 264 (None, ('START', (QName('em'), Attrs()), (None, 1, 11)))
265 (None, ('TEXT', u'test', (None, 1, 15))) 265 (None, ('TEXT', u'test', (None, 1, 15)))
266 (None, ('END', QName(u'em'), (None, 1, 19))) 266 (None, ('END', QName('em'), (None, 1, 19)))
267 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) 267 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
268 ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29))) 268 ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
269 <body>Some <em>test</em> text</body> 269 <body>Some <em>test</em> text</body>
270 270
271 :rtype: `Transformer` 271 :rtype: `Transformer`
272 """ 272 """
273 return self.apply(InvertTransformation()) 273 return self.apply(InvertTransformation())
276 """End current selection, allowing all events to be selected. 276 """End current selection, allowing all events to be selected.
277 277
278 Example: 278 Example:
279 279
280 >>> html = HTML('<body>Some <em>test</em> text</body>') 280 >>> html = HTML('<body>Some <em>test</em> text</body>')
281 >>> print html | Transformer('//em').end().trace() 281 >>> print(html | Transformer('//em').end().trace())
282 ('OUTSIDE', ('START', (QName(u'body'), Attrs()), (None, 1, 0))) 282 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
283 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) 283 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
284 ('OUTSIDE', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) 284 ('OUTSIDE', ('START', (QName('em'), Attrs()), (None, 1, 11)))
285 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15))) 285 ('OUTSIDE', ('TEXT', u'test', (None, 1, 15)))
286 ('OUTSIDE', ('END', QName(u'em'), (None, 1, 19))) 286 ('OUTSIDE', ('END', QName('em'), (None, 1, 19)))
287 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24))) 287 ('OUTSIDE', ('TEXT', u' text', (None, 1, 24)))
288 ('OUTSIDE', ('END', QName(u'body'), (None, 1, 29))) 288 ('OUTSIDE', ('END', QName('body'), (None, 1, 29)))
289 <body>Some <em>test</em> text</body> 289 <body>Some <em>test</em> text</body>
290 290
291 :return: the stream augmented by transformation marks 291 :return: the stream augmented by transformation marks
292 :rtype: `Transformer` 292 :rtype: `Transformer`
293 """ 293 """
300 300
301 Example: 301 Example:
302 302
303 >>> html = HTML('<html><head><title>Some Title</title></head>' 303 >>> html = HTML('<html><head><title>Some Title</title></head>'
304 ... '<body>Some <em>body</em> text.</body></html>') 304 ... '<body>Some <em>body</em> text.</body></html>')
305 >>> print html | Transformer('.//em').empty() 305 >>> print(html | Transformer('.//em').empty())
306 <html><head><title>Some Title</title></head><body>Some <em/> 306 <html><head><title>Some Title</title></head><body>Some <em/>
307 text.</body></html> 307 text.</body></html>
308 308
309 :rtype: `Transformer` 309 :rtype: `Transformer`
310 """ 310 """
315 315
316 Example: 316 Example:
317 317
318 >>> html = HTML('<html><head><title>Some Title</title></head>' 318 >>> html = HTML('<html><head><title>Some Title</title></head>'
319 ... '<body>Some <em>body</em> text.</body></html>') 319 ... '<body>Some <em>body</em> text.</body></html>')
320 >>> print html | Transformer('.//em').remove() 320 >>> print(html | Transformer('.//em').remove())
321 <html><head><title>Some Title</title></head><body>Some 321 <html><head><title>Some Title</title></head><body>Some
322 text.</body></html> 322 text.</body></html>
323 323
324 :rtype: `Transformer` 324 :rtype: `Transformer`
325 """ 325 """
332 332
333 Example: 333 Example:
334 334
335 >>> html = HTML('<html><head><title>Some Title</title></head>' 335 >>> html = HTML('<html><head><title>Some Title</title></head>'
336 ... '<body>Some <em>body</em> text.</body></html>') 336 ... '<body>Some <em>body</em> text.</body></html>')
337 >>> print html | Transformer('.//em').unwrap() 337 >>> print(html | Transformer('.//em').unwrap())
338 <html><head><title>Some Title</title></head><body>Some body 338 <html><head><title>Some Title</title></head><body>Some body
339 text.</body></html> 339 text.</body></html>
340 340
341 :rtype: `Transformer` 341 :rtype: `Transformer`
342 """ 342 """
345 def wrap(self, element): 345 def wrap(self, element):
346 """Wrap selection in an element. 346 """Wrap selection in an element.
347 347
348 >>> html = HTML('<html><head><title>Some Title</title></head>' 348 >>> html = HTML('<html><head><title>Some Title</title></head>'
349 ... '<body>Some <em>body</em> text.</body></html>') 349 ... '<body>Some <em>body</em> text.</body></html>')
350 >>> print html | Transformer('.//em').wrap('strong') 350 >>> print(html | Transformer('.//em').wrap('strong'))
351 <html><head><title>Some Title</title></head><body>Some 351 <html><head><title>Some Title</title></head><body>Some
352 <strong><em>body</em></strong> text.</body></html> 352 <strong><em>body</em></strong> text.</body></html>
353 353
354 :param element: either a tag name (as string) or an `Element` object 354 :param element: either a tag name (as string) or an `Element` object
355 :rtype: `Transformer` 355 :rtype: `Transformer`
361 def replace(self, content): 361 def replace(self, content):
362 """Replace selection with content. 362 """Replace selection with content.
363 363
364 >>> html = HTML('<html><head><title>Some Title</title></head>' 364 >>> html = HTML('<html><head><title>Some Title</title></head>'
365 ... '<body>Some <em>body</em> text.</body></html>') 365 ... '<body>Some <em>body</em> text.</body></html>')
366 >>> print html | Transformer('.//title/text()').replace('New Title') 366 >>> print(html | Transformer('.//title/text()').replace('New Title'))
367 <html><head><title>New Title</title></head><body>Some <em>body</em> 367 <html><head><title>New Title</title></head><body>Some <em>body</em>
368 text.</body></html> 368 text.</body></html>
369 369
370 :param content: Either a callable, an iterable of events, or a string 370 :param content: Either a callable, an iterable of events, or a string
371 to insert. 371 to insert.
379 In this example we insert the word 'emphasised' before the <em> opening 379 In this example we insert the word 'emphasised' before the <em> opening
380 tag: 380 tag:
381 381
382 >>> html = HTML('<html><head><title>Some Title</title></head>' 382 >>> html = HTML('<html><head><title>Some Title</title></head>'
383 ... '<body>Some <em>body</em> text.</body></html>') 383 ... '<body>Some <em>body</em> text.</body></html>')
384 >>> print html | Transformer('.//em').before('emphasised ') 384 >>> print(html | Transformer('.//em').before('emphasised '))
385 <html><head><title>Some Title</title></head><body>Some emphasised 385 <html><head><title>Some Title</title></head><body>Some emphasised
386 <em>body</em> text.</body></html> 386 <em>body</em> text.</body></html>
387 387
388 :param content: Either a callable, an iterable of events, or a string 388 :param content: Either a callable, an iterable of events, or a string
389 to insert. 389 to insert.
396 396
397 Here, we insert some text after the </em> closing tag: 397 Here, we insert some text after the </em> closing tag:
398 398
399 >>> html = HTML('<html><head><title>Some Title</title></head>' 399 >>> html = HTML('<html><head><title>Some Title</title></head>'
400 ... '<body>Some <em>body</em> text.</body></html>') 400 ... '<body>Some <em>body</em> text.</body></html>')
401 >>> print html | Transformer('.//em').after(' rock') 401 >>> print(html | Transformer('.//em').after(' rock'))
402 <html><head><title>Some Title</title></head><body>Some <em>body</em> 402 <html><head><title>Some Title</title></head><body>Some <em>body</em>
403 rock text.</body></html> 403 rock text.</body></html>
404 404
405 :param content: Either a callable, an iterable of events, or a string 405 :param content: Either a callable, an iterable of events, or a string
406 to insert. 406 to insert.
413 413
414 Inserting some new text at the start of the <body>: 414 Inserting some new text at the start of the <body>:
415 415
416 >>> html = HTML('<html><head><title>Some Title</title></head>' 416 >>> html = HTML('<html><head><title>Some Title</title></head>'
417 ... '<body>Some <em>body</em> text.</body></html>') 417 ... '<body>Some <em>body</em> text.</body></html>')
418 >>> print html | Transformer('.//body').prepend('Some new body text. ') 418 >>> print(html | Transformer('.//body').prepend('Some new body text. '))
419 <html><head><title>Some Title</title></head><body>Some new body text. 419 <html><head><title>Some Title</title></head><body>Some new body text.
420 Some <em>body</em> text.</body></html> 420 Some <em>body</em> text.</body></html>
421 421
422 :param content: Either a callable, an iterable of events, or a string 422 :param content: Either a callable, an iterable of events, or a string
423 to insert. 423 to insert.
428 def append(self, content): 428 def append(self, content):
429 """Insert content before the END event of the selection. 429 """Insert content before the END event of the selection.
430 430
431 >>> html = HTML('<html><head><title>Some Title</title></head>' 431 >>> html = HTML('<html><head><title>Some Title</title></head>'
432 ... '<body>Some <em>body</em> text.</body></html>') 432 ... '<body>Some <em>body</em> text.</body></html>')
433 >>> print html | Transformer('.//body').append(' Some new body text.') 433 >>> print(html | Transformer('.//body').append(' Some new body text.'))
434 <html><head><title>Some Title</title></head><body>Some <em>body</em> 434 <html><head><title>Some Title</title></head><body>Some <em>body</em>
435 text. Some new body text.</body></html> 435 text. Some new body text.</body></html>
436 436
437 :param content: Either a callable, an iterable of events, or a string 437 :param content: Either a callable, an iterable of events, or a string
438 to insert. 438 to insert.
449 element: 449 element:
450 450
451 >>> html = HTML('<html><head><title>Some Title</title></head>' 451 >>> html = HTML('<html><head><title>Some Title</title></head>'
452 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' 452 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>'
453 ... '</html>') 453 ... '</html>')
454 >>> print html | Transformer('body/em').attr('class', None) 454 >>> print(html | Transformer('body/em').attr('class', None))
455 <html><head><title>Some Title</title></head><body>Some <em>body</em> 455 <html><head><title>Some Title</title></head><body>Some <em>body</em>
456 <em>text</em>.</body></html> 456 <em>text</em>.</body></html>
457 457
458 Otherwise the attribute will be set to `value`: 458 Otherwise the attribute will be set to `value`:
459 459
460 >>> print html | Transformer('body/em').attr('class', 'emphasis') 460 >>> print(html | Transformer('body/em').attr('class', 'emphasis'))
461 <html><head><title>Some Title</title></head><body>Some <em 461 <html><head><title>Some Title</title></head><body>Some <em
462 class="emphasis">body</em> <em class="emphasis">text</em>.</body></html> 462 class="emphasis">body</em> <em class="emphasis">text</em>.</body></html>
463 463
464 If `value` is a callable it will be called with the attribute name and 464 If `value` is a callable it will be called with the attribute name and
465 the `START` event for the matching element. Its return value will then 465 the `START` event for the matching element. Its return value will then
466 be used to set the attribute: 466 be used to set the attribute:
467 467
468 >>> def print_attr(name, event): 468 >>> def print_attr(name, event):
469 ... attrs = event[1][1] 469 ... attrs = event[1][1]
470 ... print attrs 470 ... print(attrs)
471 ... return attrs.get(name) 471 ... return attrs.get(name)
472 >>> print html | Transformer('body/em').attr('class', print_attr) 472 >>> print(html | Transformer('body/em').attr('class', print_attr))
473 Attrs([(QName(u'class'), u'before')]) 473 Attrs([(QName('class'), u'before')])
474 Attrs() 474 Attrs()
475 <html><head><title>Some Title</title></head><body>Some <em 475 <html><head><title>Some Title</title></head><body>Some <em
476 class="before">body</em> <em>text</em>.</body></html> 476 class="before">body</em> <em>text</em>.</body></html>
477 477
478 :param name: the name of the attribute 478 :param name: the name of the attribute
492 492
493 >>> from genshi.builder import tag 493 >>> from genshi.builder import tag
494 >>> buffer = StreamBuffer() 494 >>> buffer = StreamBuffer()
495 >>> html = HTML('<html><head><title>Some Title</title></head>' 495 >>> html = HTML('<html><head><title>Some Title</title></head>'
496 ... '<body>Some <em>body</em> text.</body></html>') 496 ... '<body>Some <em>body</em> text.</body></html>')
497 >>> print html | Transformer('head/title/text()').copy(buffer) \\ 497 >>> print(html | Transformer('head/title/text()').copy(buffer)
498 ... .end().select('body').prepend(tag.h1(buffer)) 498 ... .end().select('body').prepend(tag.h1(buffer)))
499 <html><head><title>Some Title</title></head><body><h1>Some 499 <html><head><title>Some Title</title></head><body><h1>Some
500 Title</h1>Some <em>body</em> text.</body></html> 500 Title</h1>Some <em>body</em> text.</body></html>
501 501
502 This example illustrates that only a single contiguous selection will 502 This example illustrates that only a single contiguous selection will
503 be buffered: 503 be buffered:
504 504
505 >>> print html | Transformer('head/title/text()').copy(buffer) \\ 505 >>> print(html | Transformer('head/title/text()').copy(buffer)
506 ... .end().select('body/em').copy(buffer).end().select('body') \\ 506 ... .end().select('body/em').copy(buffer).end().select('body')
507 ... .prepend(tag.h1(buffer)) 507 ... .prepend(tag.h1(buffer)))
508 <html><head><title>Some Title</title></head><body><h1>Some 508 <html><head><title>Some Title</title></head><body><h1>Some
509 Title</h1>Some <em>body</em> text.</body></html> 509 Title</h1>Some <em>body</em> text.</body></html>
510 >>> print buffer 510 >>> print(buffer)
511 <em>body</em> 511 <em>body</em>
512 512
513 Element attributes can also be copied for later use: 513 Element attributes can also be copied for later use:
514 514
515 >>> html = HTML('<html><head><title>Some Title</title></head>' 515 >>> html = HTML('<html><head><title>Some Title</title></head>'
516 ... '<body><em>Some</em> <em class="before">body</em>' 516 ... '<body><em>Some</em> <em class="before">body</em>'
517 ... '<em>text</em>.</body></html>') 517 ... '<em>text</em>.</body></html>')
518 >>> buffer = StreamBuffer() 518 >>> buffer = StreamBuffer()
519 >>> def apply_attr(name, entry): 519 >>> def apply_attr(name, entry):
520 ... return list(buffer)[0][1][1].get('class') 520 ... return list(buffer)[0][1][1].get('class')
521 >>> print html | Transformer('body/em[@class]/@class').copy(buffer) \\ 521 >>> print(html | Transformer('body/em[@class]/@class').copy(buffer)
522 ... .end().buffer().select('body/em[not(@class)]') \\ 522 ... .end().buffer().select('body/em[not(@class)]')
523 ... .attr('class', apply_attr) 523 ... .attr('class', apply_attr))
524 <html><head><title>Some Title</title></head><body><em 524 <html><head><title>Some Title</title></head><body><em
525 class="before">Some</em> <em class="before">body</em><em 525 class="before">Some</em> <em class="before">body</em><em
526 class="before">text</em>.</body></html> 526 class="before">text</em>.</body></html>
527 527
528 528
545 545
546 >>> from genshi.builder import tag 546 >>> from genshi.builder import tag
547 >>> buffer = StreamBuffer() 547 >>> buffer = StreamBuffer()
548 >>> html = HTML('<html><head><title>Some Title</title></head>' 548 >>> html = HTML('<html><head><title>Some Title</title></head>'
549 ... '<body>Some <em>body</em> text.</body></html>') 549 ... '<body>Some <em>body</em> text.</body></html>')
550 >>> print html | Transformer('.//em/text()').cut(buffer) \\ 550 >>> print(html | Transformer('.//em/text()').cut(buffer)
551 ... .end().select('.//em').after(tag.h1(buffer)) 551 ... .end().select('.//em').after(tag.h1(buffer)))
552 <html><head><title>Some Title</title></head><body>Some 552 <html><head><title>Some Title</title></head><body>Some
553 <em/><h1>body</h1> text.</body></html> 553 <em/><h1>body</h1> text.</body></html>
554 554
555 Specifying accumulate=True, appends all selected intervals onto the 555 Specifying accumulate=True, appends all selected intervals onto the
556 buffer. Combining this with the .buffer() operation allows us operate 556 buffer. Combining this with the .buffer() operation allows us operate
577 top of the document: 577 top of the document:
578 578
579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' 579 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
580 ... 'text <note>two</note>.</body></doc>') 580 ... 'text <note>two</note>.</body></doc>')
581 >>> buffer = StreamBuffer() 581 >>> buffer = StreamBuffer()
582 >>> print doc | Transformer('body/note').cut(buffer, accumulate=True) \\ 582 >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
583 ... .end().buffer().select('notes').prepend(buffer) 583 ... .end().buffer().select('notes').prepend(buffer))
584 <doc><notes><note>one</note><note>two</note></notes><body>Some text 584 <doc><notes><note>one</note><note>two</note></notes><body>Some text
585 .</body></doc> 585 .</body></doc>
586 586
587 """ 587 """
588 return self.apply(list) 588 return self.apply(list)
594 once for each contiguous block of marked events. 594 once for each contiguous block of marked events.
595 595
596 >>> from genshi.filters.html import HTMLSanitizer 596 >>> from genshi.filters.html import HTMLSanitizer
597 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' 597 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
598 ... '</script> and some more text</body></html>') 598 ... '</script> and some more text</body></html>')
599 >>> print html | Transformer('body/*').filter(HTMLSanitizer()) 599 >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
600 <html><body>Some text and some more text</body></html> 600 <html><body>Some text and some more text</body></html>
601 601
602 :param filter: The stream filter to apply. 602 :param filter: The stream filter to apply.
603 :rtype: `Transformer` 603 :rtype: `Transformer`
604 """ 604 """
608 """Applies a function to the ``data`` element of events of ``kind`` in 608 """Applies a function to the ``data`` element of events of ``kind`` in
609 the selection. 609 the selection.
610 610
611 >>> html = HTML('<html><head><title>Some Title</title></head>' 611 >>> html = HTML('<html><head><title>Some Title</title></head>'
612 ... '<body>Some <em>body</em> text.</body></html>') 612 ... '<body>Some <em>body</em> text.</body></html>')
613 >>> print html | Transformer('head/title').map(unicode.upper, TEXT) 613 >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
614 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> 614 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
615 text.</body></html> 615 text.</body></html>
616 616
617 :param function: the function to apply 617 :param function: the function to apply
618 :param kind: the kind of event the function should be applied to 618 :param kind: the kind of event the function should be applied to
626 Refer to the documentation for ``re.sub()`` for details. 626 Refer to the documentation for ``re.sub()`` for details.
627 627
628 >>> html = HTML('<html><body>Some text, some more text and ' 628 >>> html = HTML('<html><body>Some text, some more text and '
629 ... '<b>some bold text</b>\\n' 629 ... '<b>some bold text</b>\\n'
630 ... '<i>some italicised text</i></body></html>') 630 ... '<i>some italicised text</i></body></html>')
631 >>> print html | Transformer('body/b').substitute('(?i)some', 'SOME') 631 >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
632 <html><body>Some text, some more text and <b>SOME bold text</b> 632 <html><body>Some text, some more text and <b>SOME bold text</b>
633 <i>some italicised text</i></body></html> 633 <i>some italicised text</i></body></html>
634 >>> tags = tag.html(tag.body('Some text, some more text and\\n', 634 >>> tags = tag.html(tag.body('Some text, some more text and\\n',
635 ... Markup('<b>some bold text</b>'))) 635 ... Markup('<b>some bold text</b>')))
636 >>> print tags.generate() | Transformer('body').substitute( 636 >>> print(tags.generate() | Transformer('body').substitute(
637 ... '(?i)some', 'SOME') 637 ... '(?i)some', 'SOME'))
638 <html><body>SOME text, some more text and 638 <html><body>SOME text, some more text and
639 <b>SOME bold text</b></body></html> 639 <b>SOME bold text</b></body></html>
640 640
641 :param pattern: A regular expression object or string. 641 :param pattern: A regular expression object or string.
642 :param replace: Replacement pattern. 642 :param replace: Replacement pattern.
648 def rename(self, name): 648 def rename(self, name):
649 """Rename matching elements. 649 """Rename matching elements.
650 650
651 >>> html = HTML('<html><body>Some text, some more text and ' 651 >>> html = HTML('<html><body>Some text, some more text and '
652 ... '<b>some bold text</b></body></html>') 652 ... '<b>some bold text</b></body></html>')
653 >>> print html | Transformer('body/b').rename('strong') 653 >>> print(html | Transformer('body/b').rename('strong'))
654 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> 654 <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
655 """ 655 """
656 return self.apply(RenameTransformation(name)) 656 return self.apply(RenameTransformation(name))
657 657
658 def trace(self, prefix='', fileobj=None): 658 def trace(self, prefix='', fileobj=None):
659 """Print events as they pass through the transform. 659 """Print events as they pass through the transform.
660 660
661 >>> html = HTML('<body>Some <em>test</em> text</body>') 661 >>> html = HTML('<body>Some <em>test</em> text</body>')
662 >>> print html | Transformer('em').trace() 662 >>> print(html | Transformer('em').trace())
663 (None, ('START', (QName(u'body'), Attrs()), (None, 1, 0))) 663 (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
664 (None, ('TEXT', u'Some ', (None, 1, 6))) 664 (None, ('TEXT', u'Some ', (None, 1, 6)))
665 ('ENTER', ('START', (QName(u'em'), Attrs()), (None, 1, 11))) 665 ('ENTER', ('START', (QName('em'), Attrs()), (None, 1, 11)))
666 ('INSIDE', ('TEXT', u'test', (None, 1, 15))) 666 ('INSIDE', ('TEXT', u'test', (None, 1, 15)))
667 ('EXIT', ('END', QName(u'em'), (None, 1, 19))) 667 ('EXIT', ('END', QName('em'), (None, 1, 19)))
668 (None, ('TEXT', u' text', (None, 1, 24))) 668 (None, ('TEXT', u' text', (None, 1, 24)))
669 (None, ('END', QName(u'body'), (None, 1, 29))) 669 (None, ('END', QName('body'), (None, 1, 29)))
670 <body>Some <em>test</em> text</body> 670 <body>Some <em>test</em> text</body>
671 671
672 :param prefix: a string to prefix each event with in the output 672 :param prefix: a string to prefix each event with in the output
673 :param fileobj: the writable file-like object to write to; defaults to 673 :param fileobj: the writable file-like object to write to; defaults to
674 the standard output stream 674 the standard output stream
874 """Apply the transform filter to the marked stream. 874 """Apply the transform filter to the marked stream.
875 875
876 :param stream: the marked event stream to filter 876 :param stream: the marked event stream to filter
877 """ 877 """
878 for event in stream: 878 for event in stream:
879 print>>self.fileobj, self.prefix + str(event) 879 self.fileobj.write('%s%s\n' % (self.prefix, event))
880 yield event 880 yield event
881 881
882 882
883 class FilterTransformation(object): 883 class FilterTransformation(object):
884 """Apply a normal stream filter to the selection. The filter is called once 884 """Apply a normal stream filter to the selection. The filter is called once
1023 ... for event in self._inject(): 1023 ... for event in self._inject():
1024 ... yield event 1024 ... yield event
1025 ... for event in stream: 1025 ... for event in stream:
1026 ... yield event 1026 ... yield event
1027 >>> html = HTML('<body>Some <em>test</em> text</body>') 1027 >>> html = HTML('<body>Some <em>test</em> text</body>')
1028 >>> print html | Transformer('.//em').apply(Top('Prefix ')) 1028 >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
1029 Prefix <body>Some <em>test</em> text</body> 1029 Prefix <body>Some <em>test</em> text</body>
1030 """ 1030 """
1031 def __init__(self, content): 1031 def __init__(self, content):
1032 """Create a new injector. 1032 """Create a new injector.
1033 1033
Copyright (C) 2012-2017 Edgewall Software