Mercurial > genshi > mirror
diff markup/input.py @ 21:b4d17897d053 trunk
* Include paths are now interpreted relative to the path of the including template. Closes #3.
* The filename is now included as first item in the `pos` tuple of stream events.
* Simplified the "basic" example so that it actually ''is'' basic.
* Added a more complex example using nested relative includes in [source:/trunk/examples/includes/ examples/includes].
author | cmlenz |
---|---|
date | Tue, 20 Jun 2006 13:05:37 +0000 |
parents | 5479aae32f5a |
children | 3c1a022be04c |
line wrap: on
line diff
--- a/markup/input.py +++ b/markup/input.py @@ -24,12 +24,24 @@ from markup.core import Attributes, Markup, QName, Stream +class ParseError(Exception): + """Exception raised when fatal syntax errors are found in the input being + parsed.""" + + def __init__(self, message, filename='<string>', lineno=-1, offset=-1): + Exception.__init__(self, message) + self.filename = filename + self.lineno = lineno + self.offset = offset + + class XMLParser(object): """Generator-based XML parser based on roughly equivalent code in Kid/ElementTree.""" - def __init__(self, source): + def __init__(self, source, filename=None): self.source = source + self.filename = filename # Setup the Expat parser parser = expat.ParserCreate('utf-8', '}') @@ -48,73 +60,80 @@ # Location reporting is only support in Python >= 2.4 if not hasattr(parser, 'CurrentLineNumber'): - self.getpos = self._getpos_unknown + self._getpos = self._getpos_unknown self.expat = parser - self.queue = [] + self._queue = [] def __iter__(self): - bufsize = 4 * 1024 # 4K - done = False - while True: - while not done and len(self.queue) == 0: - data = self.source.read(bufsize) - if data == '': # end of data - if hasattr(self, 'expat'): - self.expat.Parse('', True) - del self.expat # get rid of circular references - done = True - else: - self.expat.Parse(data, False) - for event in self.queue: - yield event - self.queue = [] - if done: - break + try: + bufsize = 4 * 1024 # 4K + done = False + while True: + while not done and len(self._queue) == 0: + data = self.source.read(bufsize) + if data == '': # end of data + if hasattr(self, 'expat'): + self.expat.Parse('', True) + del self.expat # get rid of circular references + done = True + else: + self.expat.Parse(data, False) + for event in self._queue: + yield event + self._queue = [] + if done: + break + except expat.ExpatError, e: + msg = str(e) + if self.filename: + msg += ', in ' + self.filename + raise ParseError(msg, self.filename, e.lineno, e.offset) def _getpos_unknown(self): - return (-1, -1) + return (self.filename or '<string>', -1, -1) - def getpos(self): - return self.expat.CurrentLineNumber, self.expat.CurrentColumnNumber + def _getpos(self): + return (self.filename or '<string>', self.expat.CurrentLineNumber, + self.expat.CurrentColumnNumber) def _handle_start(self, tag, attrib): - self.queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), - self.getpos())) + self._queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), + self._getpos())) def _handle_end(self, tag): - self.queue.append((Stream.END, QName(tag), self.getpos())) + self._queue.append((Stream.END, QName(tag), self._getpos())) def _handle_data(self, text): - self.queue.append((Stream.TEXT, text, self.getpos())) + self._queue.append((Stream.TEXT, text, self._getpos())) def _handle_prolog(self, version, encoding, standalone): - self.queue.append((Stream.PROLOG, (version, encoding, standalone), - self.getpos())) + self._queue.append((Stream.PROLOG, (version, encoding, standalone), + self._getpos())) def _handle_doctype(self, name, sysid, pubid, has_internal_subset): - self.queue.append((Stream.DOCTYPE, (name, pubid, sysid), self.getpos())) + self._queue.append((Stream.DOCTYPE, (name, pubid, sysid), self._getpos())) def _handle_start_ns(self, prefix, uri): - self.queue.append((Stream.START_NS, (prefix or '', uri), self.getpos())) + self._queue.append((Stream.START_NS, (prefix or '', uri), self._getpos())) def _handle_end_ns(self, prefix): - self.queue.append((Stream.END_NS, prefix or '', self.getpos())) + self._queue.append((Stream.END_NS, prefix or '', self._getpos())) def _handle_pi(self, target, data): - self.queue.append((Stream.PI, (target, data), self.getpos())) + self._queue.append((Stream.PI, (target, data), self._getpos())) def _handle_comment(self, text): - self.queue.append((Stream.COMMENT, text, self.getpos())) + self._queue.append((Stream.COMMENT, text, self._getpos())) def _handle_other(self, text): if text.startswith('&'): # deal with undefined entities try: text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - self.queue.append((Stream.TEXT, text, self.getpos())) + self._queue.append((Stream.TEXT, text, self._getpos())) except KeyError: - lineno, offset = self.getpos() + lineno, offset = self._getpos() raise expat.error("undefined entity %s: line %d, column %d" % (text, lineno, offset)) @@ -123,7 +142,7 @@ return Stream(list(XMLParser(StringIO(text)))) -class HTMLParser(html.HTMLParser): +class HTMLParser(html.HTMLParser, object): """Parser for HTML input based on the Python `HTMLParser` module. This class provides the same interface for generating stream events as @@ -134,68 +153,79 @@ 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']) - def __init__(self, source): + def __init__(self, source, filename=None): html.HTMLParser.__init__(self) self.source = source - self.queue = [] + self.filename = filename + self._queue = [] self._open_tags = [] def __iter__(self): - bufsize = 4 * 1024 # 4K - done = False - while True: - while not done and len(self.queue) == 0: - data = self.source.read(bufsize) - if data == '': # end of data - self.close() - done = True - else: - self.feed(data) - for kind, data, pos in self.queue: - yield kind, data, pos - self.queue = [] - if done: - open_tags = self._open_tags - open_tags.reverse() - for tag in open_tags: - yield Stream.END, QName(tag), pos - break + try: + bufsize = 4 * 1024 # 4K + done = False + while True: + while not done and len(self._queue) == 0: + data = self.source.read(bufsize) + if data == '': # end of data + self.close() + done = True + else: + self.feed(data) + for kind, data, pos in self._queue: + yield kind, data, pos + self._queue = [] + if done: + open_tags = self._open_tags + open_tags.reverse() + for tag in open_tags: + yield Stream.END, QName(tag), pos + break + except html.HTMLParseError, e: + msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) + if self.filename: + msg += ', in %s' % self.filename + raise ParseError(msg, self.filename, e.lineno, e.offset) + + def _getpos(self): + lineno, column = self.getpos() + return (self.filename, lineno, column) def handle_starttag(self, tag, attrib): - pos = self.getpos() - self.queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) + pos = self._getpos() + self._queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) if tag in self._EMPTY_ELEMS: - self.queue.append((Stream.END, QName(tag), pos)) + self._queue.append((Stream.END, QName(tag), pos)) else: self._open_tags.append(tag) def handle_endtag(self, tag): if tag not in self._EMPTY_ELEMS: - pos = self.getpos() + pos = self._getpos() while self._open_tags: open_tag = self._open_tags.pop() if open_tag.lower() == tag.lower(): break - self.queue.append((Stream.END, QName(open_tag), pos)) - self.queue.append((Stream.END, QName(tag), pos)) + self._queue.append((Stream.END, QName(open_tag), pos)) + self._queue.append((Stream.END, QName(tag), pos)) def handle_data(self, text): - self.queue.append((Stream.TEXT, text, self.getpos())) + self._queue.append((Stream.TEXT, text, self._getpos())) def handle_charref(self, name): - self.queue.append((Stream.TEXT, Markup('&#%s;' % name), self.getpos())) + self._queue.append((Stream.TEXT, Markup('&#%s;' % name), self._getpos())) def handle_entityref(self, name): - self.queue.append((Stream.TEXT, Markup('&%s;' % name), self.getpos())) + self._queue.append((Stream.TEXT, Markup('&%s;' % name), self._getpos())) def handle_pi(self, data): target, data = data.split(maxsplit=1) data = data.rstrip('?') - self.queue.append((Stream.PI, (target.strip(), data.strip()), - self.getpos())) + self._queue.append((Stream.PI, (target.strip(), data.strip()), + self._getpos())) def handle_comment(self, text): - self.queue.append((Stream.COMMENT, text, self.getpos())) + self._queue.append((Stream.COMMENT, text, self._getpos())) def HTML(text):