Mercurial > genshi > mirror
comparison markup/input.py @ 21:b4d17897d053 trunk
* Include paths are now interpreted relative to the path of the including template. Closes #3.
* The filename is now included as first item in the `pos` tuple of stream events.
* Simplified the "basic" example so that it actually ''is'' basic.
* Added a more complex example using nested relative includes in [source:/trunk/examples/includes/ examples/includes].
author | cmlenz |
---|---|
date | Tue, 20 Jun 2006 13:05:37 +0000 |
parents | 5479aae32f5a |
children | 3c1a022be04c |
comparison
equal
deleted
inserted
replaced
20:cc92d74ce9e5 | 21:b4d17897d053 |
---|---|
22 from StringIO import StringIO | 22 from StringIO import StringIO |
23 | 23 |
24 from markup.core import Attributes, Markup, QName, Stream | 24 from markup.core import Attributes, Markup, QName, Stream |
25 | 25 |
26 | 26 |
27 class ParseError(Exception): | |
28 """Exception raised when fatal syntax errors are found in the input being | |
29 parsed.""" | |
30 | |
31 def __init__(self, message, filename='<string>', lineno=-1, offset=-1): | |
32 Exception.__init__(self, message) | |
33 self.filename = filename | |
34 self.lineno = lineno | |
35 self.offset = offset | |
36 | |
37 | |
27 class XMLParser(object): | 38 class XMLParser(object): |
28 """Generator-based XML parser based on roughly equivalent code in | 39 """Generator-based XML parser based on roughly equivalent code in |
29 Kid/ElementTree.""" | 40 Kid/ElementTree.""" |
30 | 41 |
31 def __init__(self, source): | 42 def __init__(self, source, filename=None): |
32 self.source = source | 43 self.source = source |
44 self.filename = filename | |
33 | 45 |
34 # Setup the Expat parser | 46 # Setup the Expat parser |
35 parser = expat.ParserCreate('utf-8', '}') | 47 parser = expat.ParserCreate('utf-8', '}') |
36 parser.buffer_text = True | 48 parser.buffer_text = True |
37 parser.returns_unicode = True | 49 parser.returns_unicode = True |
46 parser.CommentHandler = self._handle_comment | 58 parser.CommentHandler = self._handle_comment |
47 parser.DefaultHandler = self._handle_other | 59 parser.DefaultHandler = self._handle_other |
48 | 60 |
49 # Location reporting is only support in Python >= 2.4 | 61 # Location reporting is only support in Python >= 2.4 |
50 if not hasattr(parser, 'CurrentLineNumber'): | 62 if not hasattr(parser, 'CurrentLineNumber'): |
51 self.getpos = self._getpos_unknown | 63 self._getpos = self._getpos_unknown |
52 | 64 |
53 self.expat = parser | 65 self.expat = parser |
54 self.queue = [] | 66 self._queue = [] |
55 | 67 |
56 def __iter__(self): | 68 def __iter__(self): |
57 bufsize = 4 * 1024 # 4K | 69 try: |
58 done = False | 70 bufsize = 4 * 1024 # 4K |
59 while True: | 71 done = False |
60 while not done and len(self.queue) == 0: | 72 while True: |
61 data = self.source.read(bufsize) | 73 while not done and len(self._queue) == 0: |
62 if data == '': # end of data | 74 data = self.source.read(bufsize) |
63 if hasattr(self, 'expat'): | 75 if data == '': # end of data |
64 self.expat.Parse('', True) | 76 if hasattr(self, 'expat'): |
65 del self.expat # get rid of circular references | 77 self.expat.Parse('', True) |
66 done = True | 78 del self.expat # get rid of circular references |
67 else: | 79 done = True |
68 self.expat.Parse(data, False) | 80 else: |
69 for event in self.queue: | 81 self.expat.Parse(data, False) |
70 yield event | 82 for event in self._queue: |
71 self.queue = [] | 83 yield event |
72 if done: | 84 self._queue = [] |
73 break | 85 if done: |
86 break | |
87 except expat.ExpatError, e: | |
88 msg = str(e) | |
89 if self.filename: | |
90 msg += ', in ' + self.filename | |
91 raise ParseError(msg, self.filename, e.lineno, e.offset) | |
74 | 92 |
75 def _getpos_unknown(self): | 93 def _getpos_unknown(self): |
76 return (-1, -1) | 94 return (self.filename or '<string>', -1, -1) |
77 | 95 |
78 def getpos(self): | 96 def _getpos(self): |
79 return self.expat.CurrentLineNumber, self.expat.CurrentColumnNumber | 97 return (self.filename or '<string>', self.expat.CurrentLineNumber, |
98 self.expat.CurrentColumnNumber) | |
80 | 99 |
81 def _handle_start(self, tag, attrib): | 100 def _handle_start(self, tag, attrib): |
82 self.queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), | 101 self._queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), |
83 self.getpos())) | 102 self._getpos())) |
84 | 103 |
85 def _handle_end(self, tag): | 104 def _handle_end(self, tag): |
86 self.queue.append((Stream.END, QName(tag), self.getpos())) | 105 self._queue.append((Stream.END, QName(tag), self._getpos())) |
87 | 106 |
88 def _handle_data(self, text): | 107 def _handle_data(self, text): |
89 self.queue.append((Stream.TEXT, text, self.getpos())) | 108 self._queue.append((Stream.TEXT, text, self._getpos())) |
90 | 109 |
91 def _handle_prolog(self, version, encoding, standalone): | 110 def _handle_prolog(self, version, encoding, standalone): |
92 self.queue.append((Stream.PROLOG, (version, encoding, standalone), | 111 self._queue.append((Stream.PROLOG, (version, encoding, standalone), |
93 self.getpos())) | 112 self._getpos())) |
94 | 113 |
95 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): | 114 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): |
96 self.queue.append((Stream.DOCTYPE, (name, pubid, sysid), self.getpos())) | 115 self._queue.append((Stream.DOCTYPE, (name, pubid, sysid), self._getpos())) |
97 | 116 |
98 def _handle_start_ns(self, prefix, uri): | 117 def _handle_start_ns(self, prefix, uri): |
99 self.queue.append((Stream.START_NS, (prefix or '', uri), self.getpos())) | 118 self._queue.append((Stream.START_NS, (prefix or '', uri), self._getpos())) |
100 | 119 |
101 def _handle_end_ns(self, prefix): | 120 def _handle_end_ns(self, prefix): |
102 self.queue.append((Stream.END_NS, prefix or '', self.getpos())) | 121 self._queue.append((Stream.END_NS, prefix or '', self._getpos())) |
103 | 122 |
104 def _handle_pi(self, target, data): | 123 def _handle_pi(self, target, data): |
105 self.queue.append((Stream.PI, (target, data), self.getpos())) | 124 self._queue.append((Stream.PI, (target, data), self._getpos())) |
106 | 125 |
107 def _handle_comment(self, text): | 126 def _handle_comment(self, text): |
108 self.queue.append((Stream.COMMENT, text, self.getpos())) | 127 self._queue.append((Stream.COMMENT, text, self._getpos())) |
109 | 128 |
110 def _handle_other(self, text): | 129 def _handle_other(self, text): |
111 if text.startswith('&'): | 130 if text.startswith('&'): |
112 # deal with undefined entities | 131 # deal with undefined entities |
113 try: | 132 try: |
114 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) | 133 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) |
115 self.queue.append((Stream.TEXT, text, self.getpos())) | 134 self._queue.append((Stream.TEXT, text, self._getpos())) |
116 except KeyError: | 135 except KeyError: |
117 lineno, offset = self.getpos() | 136 lineno, offset = self._getpos() |
118 raise expat.error("undefined entity %s: line %d, column %d" % | 137 raise expat.error("undefined entity %s: line %d, column %d" % |
119 (text, lineno, offset)) | 138 (text, lineno, offset)) |
120 | 139 |
121 | 140 |
122 def XML(text): | 141 def XML(text): |
123 return Stream(list(XMLParser(StringIO(text)))) | 142 return Stream(list(XMLParser(StringIO(text)))) |
124 | 143 |
125 | 144 |
126 class HTMLParser(html.HTMLParser): | 145 class HTMLParser(html.HTMLParser, object): |
127 """Parser for HTML input based on the Python `HTMLParser` module. | 146 """Parser for HTML input based on the Python `HTMLParser` module. |
128 | 147 |
129 This class provides the same interface for generating stream events as | 148 This class provides the same interface for generating stream events as |
130 `XMLParser`, and attempts to automatically balance tags. | 149 `XMLParser`, and attempts to automatically balance tags. |
131 """ | 150 """ |
132 | 151 |
133 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', | 152 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', |
134 'hr', 'img', 'input', 'isindex', 'link', 'meta', | 153 'hr', 'img', 'input', 'isindex', 'link', 'meta', |
135 'param']) | 154 'param']) |
136 | 155 |
137 def __init__(self, source): | 156 def __init__(self, source, filename=None): |
138 html.HTMLParser.__init__(self) | 157 html.HTMLParser.__init__(self) |
139 self.source = source | 158 self.source = source |
140 self.queue = [] | 159 self.filename = filename |
160 self._queue = [] | |
141 self._open_tags = [] | 161 self._open_tags = [] |
142 | 162 |
143 def __iter__(self): | 163 def __iter__(self): |
144 bufsize = 4 * 1024 # 4K | 164 try: |
145 done = False | 165 bufsize = 4 * 1024 # 4K |
146 while True: | 166 done = False |
147 while not done and len(self.queue) == 0: | 167 while True: |
148 data = self.source.read(bufsize) | 168 while not done and len(self._queue) == 0: |
149 if data == '': # end of data | 169 data = self.source.read(bufsize) |
150 self.close() | 170 if data == '': # end of data |
151 done = True | 171 self.close() |
152 else: | 172 done = True |
153 self.feed(data) | 173 else: |
154 for kind, data, pos in self.queue: | 174 self.feed(data) |
155 yield kind, data, pos | 175 for kind, data, pos in self._queue: |
156 self.queue = [] | 176 yield kind, data, pos |
157 if done: | 177 self._queue = [] |
158 open_tags = self._open_tags | 178 if done: |
159 open_tags.reverse() | 179 open_tags = self._open_tags |
160 for tag in open_tags: | 180 open_tags.reverse() |
161 yield Stream.END, QName(tag), pos | 181 for tag in open_tags: |
162 break | 182 yield Stream.END, QName(tag), pos |
183 break | |
184 except html.HTMLParseError, e: | |
185 msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) | |
186 if self.filename: | |
187 msg += ', in %s' % self.filename | |
188 raise ParseError(msg, self.filename, e.lineno, e.offset) | |
189 | |
190 def _getpos(self): | |
191 lineno, column = self.getpos() | |
192 return (self.filename, lineno, column) | |
163 | 193 |
164 def handle_starttag(self, tag, attrib): | 194 def handle_starttag(self, tag, attrib): |
165 pos = self.getpos() | 195 pos = self._getpos() |
166 self.queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) | 196 self._queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) |
167 if tag in self._EMPTY_ELEMS: | 197 if tag in self._EMPTY_ELEMS: |
168 self.queue.append((Stream.END, QName(tag), pos)) | 198 self._queue.append((Stream.END, QName(tag), pos)) |
169 else: | 199 else: |
170 self._open_tags.append(tag) | 200 self._open_tags.append(tag) |
171 | 201 |
172 def handle_endtag(self, tag): | 202 def handle_endtag(self, tag): |
173 if tag not in self._EMPTY_ELEMS: | 203 if tag not in self._EMPTY_ELEMS: |
174 pos = self.getpos() | 204 pos = self._getpos() |
175 while self._open_tags: | 205 while self._open_tags: |
176 open_tag = self._open_tags.pop() | 206 open_tag = self._open_tags.pop() |
177 if open_tag.lower() == tag.lower(): | 207 if open_tag.lower() == tag.lower(): |
178 break | 208 break |
179 self.queue.append((Stream.END, QName(open_tag), pos)) | 209 self._queue.append((Stream.END, QName(open_tag), pos)) |
180 self.queue.append((Stream.END, QName(tag), pos)) | 210 self._queue.append((Stream.END, QName(tag), pos)) |
181 | 211 |
182 def handle_data(self, text): | 212 def handle_data(self, text): |
183 self.queue.append((Stream.TEXT, text, self.getpos())) | 213 self._queue.append((Stream.TEXT, text, self._getpos())) |
184 | 214 |
185 def handle_charref(self, name): | 215 def handle_charref(self, name): |
186 self.queue.append((Stream.TEXT, Markup('&#%s;' % name), self.getpos())) | 216 self._queue.append((Stream.TEXT, Markup('&#%s;' % name), self._getpos())) |
187 | 217 |
188 def handle_entityref(self, name): | 218 def handle_entityref(self, name): |
189 self.queue.append((Stream.TEXT, Markup('&%s;' % name), self.getpos())) | 219 self._queue.append((Stream.TEXT, Markup('&%s;' % name), self._getpos())) |
190 | 220 |
191 def handle_pi(self, data): | 221 def handle_pi(self, data): |
192 target, data = data.split(maxsplit=1) | 222 target, data = data.split(maxsplit=1) |
193 data = data.rstrip('?') | 223 data = data.rstrip('?') |
194 self.queue.append((Stream.PI, (target.strip(), data.strip()), | 224 self._queue.append((Stream.PI, (target.strip(), data.strip()), |
195 self.getpos())) | 225 self._getpos())) |
196 | 226 |
197 def handle_comment(self, text): | 227 def handle_comment(self, text): |
198 self.queue.append((Stream.COMMENT, text, self.getpos())) | 228 self._queue.append((Stream.COMMENT, text, self._getpos())) |
199 | 229 |
200 | 230 |
201 def HTML(text): | 231 def HTML(text): |
202 return Stream(list(HTMLParser(StringIO(text)))) | 232 return Stream(list(HTMLParser(StringIO(text)))) |