comparison genshi/input.py @ 856:1e2be9fb3348

Add a couple of fallback imports for Python 3.0.
author cmlenz
date Thu, 12 Nov 2009 11:36:14 +0000
parents 0d9e87c6cf6e
children 24733a5854d9
comparison
equal deleted inserted replaced
855:9598353ed630 856:1e2be9fb3348
14 """Support for constructing markup streams from files, strings, or other 14 """Support for constructing markup streams from files, strings, or other
15 sources. 15 sources.
16 """ 16 """
17 17
18 from itertools import chain 18 from itertools import chain
19 try:
20 import htmlentitydefs as entities
21 import HTMLParser as html
22 except ImportError:
23 from html import entities
24 from html import parser as html
25 try:
26 from StringIO import StringIO
27 BytesIO = StringIO
28 except ImportError:
29 from io import BytesIO, StringIO
19 from xml.parsers import expat 30 from xml.parsers import expat
20 import HTMLParser as html
21 import htmlentitydefs
22 from StringIO import StringIO
23 31
24 from genshi.core import Attrs, QName, Stream, stripentities 32 from genshi.core import Attrs, QName, Stream, stripentities
25 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \ 33 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \
26 START_CDATA, END_CDATA, PI, COMMENT 34 START_CDATA, END_CDATA, PI, COMMENT
27 35
86 END child 94 END child
87 END root 95 END root
88 """ 96 """
89 97
90 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in 98 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
91 htmlentitydefs.name2codepoint.items()] 99 entities.name2codepoint.items()]
92 _external_dtd = '\n'.join(_entitydefs) 100 _external_dtd = '\n'.join(_entitydefs)
93 101
94 def __init__(self, source, filename=None, encoding=None): 102 def __init__(self, source, filename=None, encoding=None):
95 """Initialize the parser for the given XML input. 103 """Initialize the parser for the given XML input.
96 104
167 def __iter__(self): 175 def __iter__(self):
168 return iter(self.parse()) 176 return iter(self.parse())
169 177
170 def _build_foreign(self, context, base, sysid, pubid): 178 def _build_foreign(self, context, base, sysid, pubid):
171 parser = self.expat.ExternalEntityParserCreate(context) 179 parser = self.expat.ExternalEntityParserCreate(context)
172 parser.ParseFile(StringIO(self._external_dtd)) 180 parser.ParseFile(BytesIO(self._external_dtd))
173 return 1 181 return 1
174 182
175 def _enqueue(self, kind, data=None, pos=None): 183 def _enqueue(self, kind, data=None, pos=None):
176 if pos is None: 184 if pos is None:
177 pos = self._getpos() 185 pos = self._getpos()
235 243
236 def _handle_other(self, text): 244 def _handle_other(self, text):
237 if text.startswith('&'): 245 if text.startswith('&'):
238 # deal with undefined entities 246 # deal with undefined entities
239 try: 247 try:
240 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 248 text = unichr(entities.name2codepoint[text[1:-1]])
241 self._enqueue(TEXT, text) 249 self._enqueue(TEXT, text)
242 except KeyError: 250 except KeyError:
243 filename, lineno, offset = self._getpos() 251 filename, lineno, offset = self._getpos()
244 error = expat.error('undefined entity "%s": line %d, column %d' 252 error = expat.error('undefined entity "%s": line %d, column %d'
245 % (text, lineno, offset)) 253 % (text, lineno, offset))
265 273
266 :param text: the XML source 274 :param text: the XML source
267 :return: the parsed XML event stream 275 :return: the parsed XML event stream
268 :raises ParseError: if the XML text is not well-formed 276 :raises ParseError: if the XML text is not well-formed
269 """ 277 """
270 return Stream(list(XMLParser(StringIO(text)))) 278 return Stream(list(XMLParser(BytesIO(text))))
271 279
272 280
273 class HTMLParser(html.HTMLParser, object): 281 class HTMLParser(html.HTMLParser, object):
274 """Parser for HTML input based on the Python `HTMLParser` module. 282 """Parser for HTML input based on the Python `HTMLParser` module.
275 283
385 text = unichr(int(name)) 393 text = unichr(int(name))
386 self._enqueue(TEXT, text) 394 self._enqueue(TEXT, text)
387 395
388 def handle_entityref(self, name): 396 def handle_entityref(self, name):
389 try: 397 try:
390 text = unichr(htmlentitydefs.name2codepoint[name]) 398 text = unichr(entities.name2codepoint[name])
391 except KeyError: 399 except KeyError:
392 text = '&%s;' % name 400 text = '&%s;' % name
393 self._enqueue(TEXT, text) 401 self._enqueue(TEXT, text)
394 402
395 def handle_pi(self, data): 403 def handle_pi(self, data):
419 :param text: the HTML source 427 :param text: the HTML source
420 :return: the parsed XML event stream 428 :return: the parsed XML event stream
421 :raises ParseError: if the HTML text is not well-formed, and error recovery 429 :raises ParseError: if the HTML text is not well-formed, and error recovery
422 fails 430 fails
423 """ 431 """
424 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) 432 return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
425 433
426 def _coalesce(stream): 434 def _coalesce(stream):
427 """Coalesces adjacent TEXT events into a single event.""" 435 """Coalesces adjacent TEXT events into a single event."""
428 textbuf = [] 436 textbuf = []
429 textpos = None 437 textpos = None
Copyright (C) 2012-2017 Edgewall Software