Mercurial > genshi > genshi-test
comparison genshi/input.py @ 856:1e2be9fb3348
Add a couple of fallback imports for Python 3.0.
author | cmlenz |
---|---|
date | Thu, 12 Nov 2009 11:36:14 +0000 |
parents | 0d9e87c6cf6e |
children | 24733a5854d9 |
comparison
equal
deleted
inserted
replaced
855:9598353ed630 | 856:1e2be9fb3348 |
---|---|
14 """Support for constructing markup streams from files, strings, or other | 14 """Support for constructing markup streams from files, strings, or other |
15 sources. | 15 sources. |
16 """ | 16 """ |
17 | 17 |
18 from itertools import chain | 18 from itertools import chain |
19 try: | |
20 import htmlentitydefs as entities | |
21 import HTMLParser as html | |
22 except ImportError: | |
23 from html import entities | |
24 from html import parser as html | |
25 try: | |
26 from StringIO import StringIO | |
27 BytesIO = StringIO | |
28 except ImportError: | |
29 from io import BytesIO, StringIO | |
19 from xml.parsers import expat | 30 from xml.parsers import expat |
20 import HTMLParser as html | |
21 import htmlentitydefs | |
22 from StringIO import StringIO | |
23 | 31 |
24 from genshi.core import Attrs, QName, Stream, stripentities | 32 from genshi.core import Attrs, QName, Stream, stripentities |
25 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \ | 33 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \ |
26 START_CDATA, END_CDATA, PI, COMMENT | 34 START_CDATA, END_CDATA, PI, COMMENT |
27 | 35 |
86 END child | 94 END child |
87 END root | 95 END root |
88 """ | 96 """ |
89 | 97 |
90 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in | 98 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in |
91 htmlentitydefs.name2codepoint.items()] | 99 entities.name2codepoint.items()] |
92 _external_dtd = '\n'.join(_entitydefs) | 100 _external_dtd = '\n'.join(_entitydefs) |
93 | 101 |
94 def __init__(self, source, filename=None, encoding=None): | 102 def __init__(self, source, filename=None, encoding=None): |
95 """Initialize the parser for the given XML input. | 103 """Initialize the parser for the given XML input. |
96 | 104 |
167 def __iter__(self): | 175 def __iter__(self): |
168 return iter(self.parse()) | 176 return iter(self.parse()) |
169 | 177 |
170 def _build_foreign(self, context, base, sysid, pubid): | 178 def _build_foreign(self, context, base, sysid, pubid): |
171 parser = self.expat.ExternalEntityParserCreate(context) | 179 parser = self.expat.ExternalEntityParserCreate(context) |
172 parser.ParseFile(StringIO(self._external_dtd)) | 180 parser.ParseFile(BytesIO(self._external_dtd)) |
173 return 1 | 181 return 1 |
174 | 182 |
175 def _enqueue(self, kind, data=None, pos=None): | 183 def _enqueue(self, kind, data=None, pos=None): |
176 if pos is None: | 184 if pos is None: |
177 pos = self._getpos() | 185 pos = self._getpos() |
235 | 243 |
236 def _handle_other(self, text): | 244 def _handle_other(self, text): |
237 if text.startswith('&'): | 245 if text.startswith('&'): |
238 # deal with undefined entities | 246 # deal with undefined entities |
239 try: | 247 try: |
240 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) | 248 text = unichr(entities.name2codepoint[text[1:-1]]) |
241 self._enqueue(TEXT, text) | 249 self._enqueue(TEXT, text) |
242 except KeyError: | 250 except KeyError: |
243 filename, lineno, offset = self._getpos() | 251 filename, lineno, offset = self._getpos() |
244 error = expat.error('undefined entity "%s": line %d, column %d' | 252 error = expat.error('undefined entity "%s": line %d, column %d' |
245 % (text, lineno, offset)) | 253 % (text, lineno, offset)) |
265 | 273 |
266 :param text: the XML source | 274 :param text: the XML source |
267 :return: the parsed XML event stream | 275 :return: the parsed XML event stream |
268 :raises ParseError: if the XML text is not well-formed | 276 :raises ParseError: if the XML text is not well-formed |
269 """ | 277 """ |
270 return Stream(list(XMLParser(StringIO(text)))) | 278 return Stream(list(XMLParser(BytesIO(text)))) |
271 | 279 |
272 | 280 |
273 class HTMLParser(html.HTMLParser, object): | 281 class HTMLParser(html.HTMLParser, object): |
274 """Parser for HTML input based on the Python `HTMLParser` module. | 282 """Parser for HTML input based on the Python `HTMLParser` module. |
275 | 283 |
385 text = unichr(int(name)) | 393 text = unichr(int(name)) |
386 self._enqueue(TEXT, text) | 394 self._enqueue(TEXT, text) |
387 | 395 |
388 def handle_entityref(self, name): | 396 def handle_entityref(self, name): |
389 try: | 397 try: |
390 text = unichr(htmlentitydefs.name2codepoint[name]) | 398 text = unichr(entities.name2codepoint[name]) |
391 except KeyError: | 399 except KeyError: |
392 text = '&%s;' % name | 400 text = '&%s;' % name |
393 self._enqueue(TEXT, text) | 401 self._enqueue(TEXT, text) |
394 | 402 |
395 def handle_pi(self, data): | 403 def handle_pi(self, data): |
419 :param text: the HTML source | 427 :param text: the HTML source |
420 :return: the parsed XML event stream | 428 :return: the parsed XML event stream |
421 :raises ParseError: if the HTML text is not well-formed, and error recovery | 429 :raises ParseError: if the HTML text is not well-formed, and error recovery |
422 fails | 430 fails |
423 """ | 431 """ |
424 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) | 432 return Stream(list(HTMLParser(BytesIO(text), encoding=encoding))) |
425 | 433 |
426 def _coalesce(stream): | 434 def _coalesce(stream): |
427 """Coalesces adjacent TEXT events into a single event.""" | 435 """Coalesces adjacent TEXT events into a single event.""" |
428 textbuf = [] | 436 textbuf = [] |
429 textpos = None | 437 textpos = None |