Mercurial > genshi > genshi-test
annotate markup/input.py @ 134:df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
* Evaluation errors in expressions now include the original expression code in the traceback.
author | cmlenz |
---|---|
date | Sun, 06 Aug 2006 18:07:21 +0000 |
parents | e9a3930f8823 |
children | a2edde90ad24 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
3 # Copyright (C) 2006 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
8 # are also available at http://markup.edgewall.org/wiki/License. |
1 | 9 # |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
66
822089ae65ce
Switch copyright to Edgewall and URLs to markup.edgewall.org.
cmlenz
parents:
27
diff
changeset
|
12 # history and logs, available at http://markup.edgewall.org/log/. |
1 | 13 |
14 from xml.parsers import expat | |
15 try: | |
16 frozenset | |
17 except NameError: | |
18 from sets import ImmutableSet as frozenset | |
19 import HTMLParser as html | |
20 import htmlentitydefs | |
21 from StringIO import StringIO | |
22 | |
23 from markup.core import Attributes, Markup, QName, Stream | |
24 | |
25 | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
26 class ParseError(Exception): |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
27 """Exception raised when fatal syntax errors are found in the input being |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
28 parsed.""" |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
29 |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
30 def __init__(self, message, filename='<string>', lineno=-1, offset=-1): |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
31 Exception.__init__(self, message) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
32 self.filename = filename |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
33 self.lineno = lineno |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
34 self.offset = offset |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
35 |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
36 |
1 | 37 class XMLParser(object): |
38 """Generator-based XML parser based on roughly equivalent code in | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
39 Kid/ElementTree. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
40 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
41 The parsing is initiated by iterating over the parser object: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
42 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
43 >>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>')) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
44 >>> for kind, data, pos in parser: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
45 ... print kind, data |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
46 START (u'root', [(u'id', u'2')]) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
47 START (u'child', []) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
48 TEXT Foo |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
49 END child |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
50 END root |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
51 """ |
1 | 52 |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
53 def __init__(self, source, filename=None): |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
54 """Initialize the parser for the given XML text. |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
55 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
56 @param source: the XML text as a file-like object |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
57 @param filename: the name of the file, if appropriate |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
58 """ |
1 | 59 self.source = source |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
60 self.filename = filename |
1 | 61 |
62 # Setup the Expat parser | |
63 parser = expat.ParserCreate('utf-8', '}') | |
64 parser.buffer_text = True | |
65 parser.returns_unicode = True | |
66 parser.StartElementHandler = self._handle_start | |
67 parser.EndElementHandler = self._handle_end | |
68 parser.CharacterDataHandler = self._handle_data | |
69 parser.XmlDeclHandler = self._handle_prolog | |
70 parser.StartDoctypeDeclHandler = self._handle_doctype | |
71 parser.StartNamespaceDeclHandler = self._handle_start_ns | |
72 parser.EndNamespaceDeclHandler = self._handle_end_ns | |
73 parser.ProcessingInstructionHandler = self._handle_pi | |
74 parser.CommentHandler = self._handle_comment | |
75 parser.DefaultHandler = self._handle_other | |
76 | |
77 # Location reporting is only support in Python >= 2.4 | |
78 if not hasattr(parser, 'CurrentLineNumber'): | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
79 self._getpos = self._getpos_unknown |
1 | 80 |
81 self.expat = parser | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
82 self._queue = [] |
1 | 83 |
84 def __iter__(self): | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
85 try: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
86 bufsize = 4 * 1024 # 4K |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
87 done = False |
69 | 88 while 1: |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
89 while not done and len(self._queue) == 0: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
90 data = self.source.read(bufsize) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
91 if data == '': # end of data |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
92 if hasattr(self, 'expat'): |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
93 self.expat.Parse('', True) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
94 del self.expat # get rid of circular references |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
95 done = True |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
96 else: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
97 self.expat.Parse(data, False) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
98 for event in self._queue: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
99 yield event |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
100 self._queue = [] |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
101 if done: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
102 break |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
103 except expat.ExpatError, e: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
104 msg = str(e) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
105 if self.filename: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
106 msg += ', in ' + self.filename |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
107 raise ParseError(msg, self.filename, e.lineno, e.offset) |
1 | 108 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
109 def _enqueue(self, kind, data, pos=None): |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
110 if pos is None: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
111 pos = self._getpos() |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
112 if kind is Stream.TEXT: |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
113 # Expat reports the *end* of the text event as current position. We |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
114 # try to fix that up here as much as possible. Unfortunately, the |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
115 # offset is only valid for single-line text. For multi-line text, |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
116 # it is apparently not possible to determine at what offset it |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
117 # started |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
118 if '\n' in data: |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
119 lines = data.splitlines() |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
120 lineno = pos[1] - len(lines) + 1 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
121 offset = -1 |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
122 else: |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
123 lineno = pos[1] |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
124 offset = pos[2] - len(data) |
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
125 pos = (pos[0], lineno, offset) |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
126 self._queue.append((kind, data, pos)) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
127 |
1 | 128 def _getpos_unknown(self): |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
129 return (self.filename, -1, -1) |
1 | 130 |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
131 def _getpos(self): |
134
df44110ca91d
* Improve the accuracy of line numbers for text nodes, so that reported errors about syntax or evaluation errors in expressions point to the right line (not quite perfect yet, though).
cmlenz
parents:
69
diff
changeset
|
132 return (self.filename, self.expat.CurrentLineNumber, |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
133 self.expat.CurrentColumnNumber) |
1 | 134 |
135 def _handle_start(self, tag, attrib): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
136 self._enqueue(Stream.START, (QName(tag), Attributes(attrib.items()))) |
1 | 137 |
138 def _handle_end(self, tag): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
139 self._enqueue(Stream.END, QName(tag)) |
1 | 140 |
141 def _handle_data(self, text): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
142 self._enqueue(Stream.TEXT, text) |
1 | 143 |
144 def _handle_prolog(self, version, encoding, standalone): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
145 self._enqueue(Stream.PROLOG, (version, encoding, standalone)) |
1 | 146 |
147 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
148 self._enqueue(Stream.DOCTYPE, (name, pubid, sysid)) |
1 | 149 |
150 def _handle_start_ns(self, prefix, uri): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
151 self._enqueue(Stream.START_NS, (prefix or '', uri)) |
1 | 152 |
153 def _handle_end_ns(self, prefix): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
154 self._enqueue(Stream.END_NS, prefix or '') |
1 | 155 |
156 def _handle_pi(self, target, data): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
157 self._enqueue(Stream.PI, (target, data)) |
1 | 158 |
159 def _handle_comment(self, text): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
160 self._enqueue(Stream.COMMENT, text) |
1 | 161 |
162 def _handle_other(self, text): | |
163 if text.startswith('&'): | |
164 # deal with undefined entities | |
165 try: | |
166 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
167 self._enqueue(Stream.TEXT, text) |
1 | 168 except KeyError: |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
169 lineno, offset = self._getpos() |
1 | 170 raise expat.error("undefined entity %s: line %d, column %d" % |
171 (text, lineno, offset)) | |
172 | |
173 | |
174 def XML(text): | |
175 return Stream(list(XMLParser(StringIO(text)))) | |
176 | |
177 | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
178 class HTMLParser(html.HTMLParser, object): |
1 | 179 """Parser for HTML input based on the Python `HTMLParser` module. |
180 | |
181 This class provides the same interface for generating stream events as | |
182 `XMLParser`, and attempts to automatically balance tags. | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
183 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
184 The parsing is initiated by iterating over the parser object: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
185 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
186 >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>')) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
187 >>> for kind, data, pos in parser: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
188 ... print kind, data |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
189 START (u'ul', [(u'compact', u'compact')]) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
190 START (u'li', []) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
191 TEXT Foo |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
192 END li |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
193 END ul |
1 | 194 """ |
195 | |
196 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | |
197 'hr', 'img', 'input', 'isindex', 'link', 'meta', | |
198 'param']) | |
199 | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
200 def __init__(self, source, filename=None): |
1 | 201 html.HTMLParser.__init__(self) |
202 self.source = source | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
203 self.filename = filename |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
204 self._queue = [] |
1 | 205 self._open_tags = [] |
206 | |
207 def __iter__(self): | |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
208 try: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
209 bufsize = 4 * 1024 # 4K |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
210 done = False |
69 | 211 while 1: |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
212 while not done and len(self._queue) == 0: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
213 data = self.source.read(bufsize) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
214 if data == '': # end of data |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
215 self.close() |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
216 done = True |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
217 else: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
218 self.feed(data) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
219 for kind, data, pos in self._queue: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
220 yield kind, data, pos |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
221 self._queue = [] |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
222 if done: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
223 open_tags = self._open_tags |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
224 open_tags.reverse() |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
225 for tag in open_tags: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
226 yield Stream.END, QName(tag), pos |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
227 break |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
228 except html.HTMLParseError, e: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
229 msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
230 if self.filename: |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
231 msg += ', in %s' % self.filename |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
232 raise ParseError(msg, self.filename, e.lineno, e.offset) |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
233 |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
234 def _enqueue(self, kind, data, pos=None): |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
235 if pos is None: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
236 pos = self._getpos() |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
237 self._queue.append((kind, data, pos)) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
238 |
21
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
239 def _getpos(self): |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
240 lineno, column = self.getpos() |
eca77129518a
* Include paths are now interpreted relative to the path of the including template. Closes #3.
cmlenz
parents:
1
diff
changeset
|
241 return (self.filename, lineno, column) |
1 | 242 |
243 def handle_starttag(self, tag, attrib): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
244 fixed_attrib = [] |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
245 for name, value in attrib: # Fixup minimized attributes |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
246 if value is None: |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
247 value = name |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
248 fixed_attrib.append((name, unicode(value))) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
249 |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
250 self._enqueue(Stream.START, (QName(tag), Attributes(fixed_attrib))) |
1 | 251 if tag in self._EMPTY_ELEMS: |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
252 self._enqueue(Stream.END, QName(tag)) |
1 | 253 else: |
254 self._open_tags.append(tag) | |
255 | |
256 def handle_endtag(self, tag): | |
257 if tag not in self._EMPTY_ELEMS: | |
258 while self._open_tags: | |
259 open_tag = self._open_tags.pop() | |
260 if open_tag.lower() == tag.lower(): | |
261 break | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
262 self._enqueue(Stream.END, QName(open_tag)) |
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
263 self._enqueue(Stream.END, QName(tag)) |
1 | 264 |
265 def handle_data(self, text): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
266 self._enqueue(Stream.TEXT, text) |
1 | 267 |
268 def handle_charref(self, name): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
269 self._enqueue(Stream.TEXT, Markup('&#%s;' % name)) |
1 | 270 |
271 def handle_entityref(self, name): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
272 self._enqueue(Stream.TEXT, Markup('&%s;' % name)) |
1 | 273 |
274 def handle_pi(self, data): | |
275 target, data = data.split(maxsplit=1) | |
276 data = data.rstrip('?') | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
277 self._enqueue(Stream.PI, (target.strip(), data.strip())) |
1 | 278 |
279 def handle_comment(self, text): | |
26
039fc5b87405
* Split out the XPath tests into a separate `unittest`-based file.
cmlenz
parents:
21
diff
changeset
|
280 self._enqueue(Stream.COMMENT, text) |
1 | 281 |
282 | |
283 def HTML(text): | |
284 return Stream(list(HTMLParser(StringIO(text)))) |