comparison markup/input.py @ 21:b4d17897d053 trunk

* Include paths are now interpreted relative to the path of the including template. Closes #3. * The filename is now included as first item in the `pos` tuple of stream events. * Simplified the "basic" example so that it actually ''is'' basic. * Added a more complex example using nested relative includes in [source:/trunk/examples/includes/ examples/includes].
author cmlenz
date Tue, 20 Jun 2006 13:05:37 +0000
parents 5479aae32f5a
children 3c1a022be04c
comparison
equal deleted inserted replaced
20:cc92d74ce9e5 21:b4d17897d053
22 from StringIO import StringIO 22 from StringIO import StringIO
23 23
24 from markup.core import Attributes, Markup, QName, Stream 24 from markup.core import Attributes, Markup, QName, Stream
25 25
26 26
27 class ParseError(Exception):
28 """Exception raised when fatal syntax errors are found in the input being
29 parsed."""
30
31 def __init__(self, message, filename='<string>', lineno=-1, offset=-1):
32 Exception.__init__(self, message)
33 self.filename = filename
34 self.lineno = lineno
35 self.offset = offset
36
37
27 class XMLParser(object): 38 class XMLParser(object):
28 """Generator-based XML parser based on roughly equivalent code in 39 """Generator-based XML parser based on roughly equivalent code in
29 Kid/ElementTree.""" 40 Kid/ElementTree."""
30 41
31 def __init__(self, source): 42 def __init__(self, source, filename=None):
32 self.source = source 43 self.source = source
44 self.filename = filename
33 45
34 # Setup the Expat parser 46 # Setup the Expat parser
35 parser = expat.ParserCreate('utf-8', '}') 47 parser = expat.ParserCreate('utf-8', '}')
36 parser.buffer_text = True 48 parser.buffer_text = True
37 parser.returns_unicode = True 49 parser.returns_unicode = True
46 parser.CommentHandler = self._handle_comment 58 parser.CommentHandler = self._handle_comment
47 parser.DefaultHandler = self._handle_other 59 parser.DefaultHandler = self._handle_other
48 60
49 # Location reporting is only support in Python >= 2.4 61 # Location reporting is only support in Python >= 2.4
50 if not hasattr(parser, 'CurrentLineNumber'): 62 if not hasattr(parser, 'CurrentLineNumber'):
51 self.getpos = self._getpos_unknown 63 self._getpos = self._getpos_unknown
52 64
53 self.expat = parser 65 self.expat = parser
54 self.queue = [] 66 self._queue = []
55 67
56 def __iter__(self): 68 def __iter__(self):
57 bufsize = 4 * 1024 # 4K 69 try:
58 done = False 70 bufsize = 4 * 1024 # 4K
59 while True: 71 done = False
60 while not done and len(self.queue) == 0: 72 while True:
61 data = self.source.read(bufsize) 73 while not done and len(self._queue) == 0:
62 if data == '': # end of data 74 data = self.source.read(bufsize)
63 if hasattr(self, 'expat'): 75 if data == '': # end of data
64 self.expat.Parse('', True) 76 if hasattr(self, 'expat'):
65 del self.expat # get rid of circular references 77 self.expat.Parse('', True)
66 done = True 78 del self.expat # get rid of circular references
67 else: 79 done = True
68 self.expat.Parse(data, False) 80 else:
69 for event in self.queue: 81 self.expat.Parse(data, False)
70 yield event 82 for event in self._queue:
71 self.queue = [] 83 yield event
72 if done: 84 self._queue = []
73 break 85 if done:
86 break
87 except expat.ExpatError, e:
88 msg = str(e)
89 if self.filename:
90 msg += ', in ' + self.filename
91 raise ParseError(msg, self.filename, e.lineno, e.offset)
74 92
75 def _getpos_unknown(self): 93 def _getpos_unknown(self):
76 return (-1, -1) 94 return (self.filename or '<string>', -1, -1)
77 95
78 def getpos(self): 96 def _getpos(self):
79 return self.expat.CurrentLineNumber, self.expat.CurrentColumnNumber 97 return (self.filename or '<string>', self.expat.CurrentLineNumber,
98 self.expat.CurrentColumnNumber)
80 99
81 def _handle_start(self, tag, attrib): 100 def _handle_start(self, tag, attrib):
82 self.queue.append((Stream.START, (QName(tag), Attributes(attrib.items())), 101 self._queue.append((Stream.START, (QName(tag), Attributes(attrib.items())),
83 self.getpos())) 102 self._getpos()))
84 103
85 def _handle_end(self, tag): 104 def _handle_end(self, tag):
86 self.queue.append((Stream.END, QName(tag), self.getpos())) 105 self._queue.append((Stream.END, QName(tag), self._getpos()))
87 106
88 def _handle_data(self, text): 107 def _handle_data(self, text):
89 self.queue.append((Stream.TEXT, text, self.getpos())) 108 self._queue.append((Stream.TEXT, text, self._getpos()))
90 109
91 def _handle_prolog(self, version, encoding, standalone): 110 def _handle_prolog(self, version, encoding, standalone):
92 self.queue.append((Stream.PROLOG, (version, encoding, standalone), 111 self._queue.append((Stream.PROLOG, (version, encoding, standalone),
93 self.getpos())) 112 self._getpos()))
94 113
95 def _handle_doctype(self, name, sysid, pubid, has_internal_subset): 114 def _handle_doctype(self, name, sysid, pubid, has_internal_subset):
96 self.queue.append((Stream.DOCTYPE, (name, pubid, sysid), self.getpos())) 115 self._queue.append((Stream.DOCTYPE, (name, pubid, sysid), self._getpos()))
97 116
98 def _handle_start_ns(self, prefix, uri): 117 def _handle_start_ns(self, prefix, uri):
99 self.queue.append((Stream.START_NS, (prefix or '', uri), self.getpos())) 118 self._queue.append((Stream.START_NS, (prefix or '', uri), self._getpos()))
100 119
101 def _handle_end_ns(self, prefix): 120 def _handle_end_ns(self, prefix):
102 self.queue.append((Stream.END_NS, prefix or '', self.getpos())) 121 self._queue.append((Stream.END_NS, prefix or '', self._getpos()))
103 122
104 def _handle_pi(self, target, data): 123 def _handle_pi(self, target, data):
105 self.queue.append((Stream.PI, (target, data), self.getpos())) 124 self._queue.append((Stream.PI, (target, data), self._getpos()))
106 125
107 def _handle_comment(self, text): 126 def _handle_comment(self, text):
108 self.queue.append((Stream.COMMENT, text, self.getpos())) 127 self._queue.append((Stream.COMMENT, text, self._getpos()))
109 128
110 def _handle_other(self, text): 129 def _handle_other(self, text):
111 if text.startswith('&'): 130 if text.startswith('&'):
112 # deal with undefined entities 131 # deal with undefined entities
113 try: 132 try:
114 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 133 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
115 self.queue.append((Stream.TEXT, text, self.getpos())) 134 self._queue.append((Stream.TEXT, text, self._getpos()))
116 except KeyError: 135 except KeyError:
117 lineno, offset = self.getpos() 136 lineno, offset = self._getpos()
118 raise expat.error("undefined entity %s: line %d, column %d" % 137 raise expat.error("undefined entity %s: line %d, column %d" %
119 (text, lineno, offset)) 138 (text, lineno, offset))
120 139
121 140
122 def XML(text): 141 def XML(text):
123 return Stream(list(XMLParser(StringIO(text)))) 142 return Stream(list(XMLParser(StringIO(text))))
124 143
125 144
126 class HTMLParser(html.HTMLParser): 145 class HTMLParser(html.HTMLParser, object):
127 """Parser for HTML input based on the Python `HTMLParser` module. 146 """Parser for HTML input based on the Python `HTMLParser` module.
128 147
129 This class provides the same interface for generating stream events as 148 This class provides the same interface for generating stream events as
130 `XMLParser`, and attempts to automatically balance tags. 149 `XMLParser`, and attempts to automatically balance tags.
131 """ 150 """
132 151
133 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', 152 _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
134 'hr', 'img', 'input', 'isindex', 'link', 'meta', 153 'hr', 'img', 'input', 'isindex', 'link', 'meta',
135 'param']) 154 'param'])
136 155
137 def __init__(self, source): 156 def __init__(self, source, filename=None):
138 html.HTMLParser.__init__(self) 157 html.HTMLParser.__init__(self)
139 self.source = source 158 self.source = source
140 self.queue = [] 159 self.filename = filename
160 self._queue = []
141 self._open_tags = [] 161 self._open_tags = []
142 162
143 def __iter__(self): 163 def __iter__(self):
144 bufsize = 4 * 1024 # 4K 164 try:
145 done = False 165 bufsize = 4 * 1024 # 4K
146 while True: 166 done = False
147 while not done and len(self.queue) == 0: 167 while True:
148 data = self.source.read(bufsize) 168 while not done and len(self._queue) == 0:
149 if data == '': # end of data 169 data = self.source.read(bufsize)
150 self.close() 170 if data == '': # end of data
151 done = True 171 self.close()
152 else: 172 done = True
153 self.feed(data) 173 else:
154 for kind, data, pos in self.queue: 174 self.feed(data)
155 yield kind, data, pos 175 for kind, data, pos in self._queue:
156 self.queue = [] 176 yield kind, data, pos
157 if done: 177 self._queue = []
158 open_tags = self._open_tags 178 if done:
159 open_tags.reverse() 179 open_tags = self._open_tags
160 for tag in open_tags: 180 open_tags.reverse()
161 yield Stream.END, QName(tag), pos 181 for tag in open_tags:
162 break 182 yield Stream.END, QName(tag), pos
183 break
184 except html.HTMLParseError, e:
185 msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
186 if self.filename:
187 msg += ', in %s' % self.filename
188 raise ParseError(msg, self.filename, e.lineno, e.offset)
189
190 def _getpos(self):
191 lineno, column = self.getpos()
192 return (self.filename, lineno, column)
163 193
164 def handle_starttag(self, tag, attrib): 194 def handle_starttag(self, tag, attrib):
165 pos = self.getpos() 195 pos = self._getpos()
166 self.queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos)) 196 self._queue.append((Stream.START, (QName(tag), Attributes(attrib)), pos))
167 if tag in self._EMPTY_ELEMS: 197 if tag in self._EMPTY_ELEMS:
168 self.queue.append((Stream.END, QName(tag), pos)) 198 self._queue.append((Stream.END, QName(tag), pos))
169 else: 199 else:
170 self._open_tags.append(tag) 200 self._open_tags.append(tag)
171 201
172 def handle_endtag(self, tag): 202 def handle_endtag(self, tag):
173 if tag not in self._EMPTY_ELEMS: 203 if tag not in self._EMPTY_ELEMS:
174 pos = self.getpos() 204 pos = self._getpos()
175 while self._open_tags: 205 while self._open_tags:
176 open_tag = self._open_tags.pop() 206 open_tag = self._open_tags.pop()
177 if open_tag.lower() == tag.lower(): 207 if open_tag.lower() == tag.lower():
178 break 208 break
179 self.queue.append((Stream.END, QName(open_tag), pos)) 209 self._queue.append((Stream.END, QName(open_tag), pos))
180 self.queue.append((Stream.END, QName(tag), pos)) 210 self._queue.append((Stream.END, QName(tag), pos))
181 211
182 def handle_data(self, text): 212 def handle_data(self, text):
183 self.queue.append((Stream.TEXT, text, self.getpos())) 213 self._queue.append((Stream.TEXT, text, self._getpos()))
184 214
185 def handle_charref(self, name): 215 def handle_charref(self, name):
186 self.queue.append((Stream.TEXT, Markup('&#%s;' % name), self.getpos())) 216 self._queue.append((Stream.TEXT, Markup('&#%s;' % name), self._getpos()))
187 217
188 def handle_entityref(self, name): 218 def handle_entityref(self, name):
189 self.queue.append((Stream.TEXT, Markup('&%s;' % name), self.getpos())) 219 self._queue.append((Stream.TEXT, Markup('&%s;' % name), self._getpos()))
190 220
191 def handle_pi(self, data): 221 def handle_pi(self, data):
192 target, data = data.split(maxsplit=1) 222 target, data = data.split(maxsplit=1)
193 data = data.rstrip('?') 223 data = data.rstrip('?')
194 self.queue.append((Stream.PI, (target.strip(), data.strip()), 224 self._queue.append((Stream.PI, (target.strip(), data.strip()),
195 self.getpos())) 225 self._getpos()))
196 226
197 def handle_comment(self, text): 227 def handle_comment(self, text):
198 self.queue.append((Stream.COMMENT, text, self.getpos())) 228 self._queue.append((Stream.COMMENT, text, self._getpos()))
199 229
200 230
201 def HTML(text): 231 def HTML(text):
202 return Stream(list(HTMLParser(StringIO(text)))) 232 return Stream(list(HTMLParser(StringIO(text))))
Copyright (C) 2012-2017 Edgewall Software