Mercurial > genshi > genshi-test
comparison genshi/util.py @ 902:09cc3627654c experimental-inline
Sync `experimental/inline` branch with [source:trunk@1126].
author | cmlenz |
---|---|
date | Fri, 23 Apr 2010 21:08:26 +0000 |
parents | 1837f39efd6f |
children |
comparison
equal
deleted
inserted
replaced
830:de82830f8816 | 902:09cc3627654c |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # | 2 # |
3 # Copyright (C) 2006-2007 Edgewall Software | 3 # Copyright (C) 2006-2009 Edgewall Software |
4 # All rights reserved. | 4 # All rights reserved. |
5 # | 5 # |
6 # This software is licensed as described in the file COPYING, which | 6 # This software is licensed as described in the file COPYING, which |
7 # you should have received as part of this distribution. The terms | 7 # you should have received as part of this distribution. The terms |
8 # are also available at http://genshi.edgewall.org/wiki/License. | 8 # are also available at http://genshi.edgewall.org/wiki/License. |
11 # individuals. For the exact contribution history, see the revision | 11 # individuals. For the exact contribution history, see the revision |
12 # history and logs, available at http://genshi.edgewall.org/log/. | 12 # history and logs, available at http://genshi.edgewall.org/log/. |
13 | 13 |
14 """Various utility classes and functions.""" | 14 """Various utility classes and functions.""" |
15 | 15 |
16 import htmlentitydefs | 16 import htmlentitydefs as entities |
17 import re | 17 import re |
18 | 18 |
19 __docformat__ = 'restructuredtext en' | 19 __docformat__ = 'restructuredtext en' |
20 | 20 |
21 | 21 |
44 | 44 |
45 Iterating over the cache returns the keys, starting with the most recently | 45 Iterating over the cache returns the keys, starting with the most recently |
46 used: | 46 used: |
47 | 47 |
48 >>> for key in cache: | 48 >>> for key in cache: |
49 ... print key | 49 ... print(key) |
50 D | 50 D |
51 A | 51 A |
52 C | 52 C |
53 | 53 |
54 This code is based on the LRUCache class from ``myghtyutils.util``, written | 54 This code is based on the LRUCache class from ``myghtyutils.util``, written |
57 http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py | 57 http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py |
58 """ | 58 """ |
59 | 59 |
60 class _Item(object): | 60 class _Item(object): |
61 def __init__(self, key, value): | 61 def __init__(self, key, value): |
62 self.previous = self.next = None | 62 self.prv = self.nxt = None |
63 self.key = key | 63 self.key = key |
64 self.value = value | 64 self.value = value |
65 def __repr__(self): | 65 def __repr__(self): |
66 return repr(self.value) | 66 return repr(self.value) |
67 | 67 |
76 | 76 |
77 def __iter__(self): | 77 def __iter__(self): |
78 cur = self.head | 78 cur = self.head |
79 while cur: | 79 while cur: |
80 yield cur.key | 80 yield cur.key |
81 cur = cur.next | 81 cur = cur.nxt |
82 | 82 |
83 def __len__(self): | 83 def __len__(self): |
84 return len(self._dict) | 84 return len(self._dict) |
85 | 85 |
86 def __getitem__(self, key): | 86 def __getitem__(self, key): |
101 | 101 |
102 def __repr__(self): | 102 def __repr__(self): |
103 return repr(self._dict) | 103 return repr(self._dict) |
104 | 104 |
105 def _insert_item(self, item): | 105 def _insert_item(self, item): |
106 item.previous = None | 106 item.prv = None |
107 item.next = self.head | 107 item.nxt = self.head |
108 if self.head is not None: | 108 if self.head is not None: |
109 self.head.previous = item | 109 self.head.prv = item |
110 else: | 110 else: |
111 self.tail = item | 111 self.tail = item |
112 self.head = item | 112 self.head = item |
113 self._manage_size() | 113 self._manage_size() |
114 | 114 |
115 def _manage_size(self): | 115 def _manage_size(self): |
116 while len(self._dict) > self.capacity: | 116 while len(self._dict) > self.capacity: |
117 olditem = self._dict[self.tail.key] | 117 olditem = self._dict[self.tail.key] |
118 del self._dict[self.tail.key] | 118 del self._dict[self.tail.key] |
119 if self.tail != self.head: | 119 if self.tail != self.head: |
120 self.tail = self.tail.previous | 120 self.tail = self.tail.prv |
121 self.tail.next = None | 121 self.tail.nxt = None |
122 else: | 122 else: |
123 self.head = self.tail = None | 123 self.head = self.tail = None |
124 | 124 |
125 def _update_item(self, item): | 125 def _update_item(self, item): |
126 if self.head == item: | 126 if self.head == item: |
127 return | 127 return |
128 | 128 |
129 previous = item.previous | 129 prv = item.prv |
130 previous.next = item.next | 130 prv.nxt = item.nxt |
131 if item.next is not None: | 131 if item.nxt is not None: |
132 item.next.previous = previous | 132 item.nxt.prv = prv |
133 else: | 133 else: |
134 self.tail = previous | 134 self.tail = prv |
135 | 135 |
136 item.previous = None | 136 item.prv = None |
137 item.next = self.head | 137 item.nxt = self.head |
138 self.head.previous = self.head = item | 138 self.head.prv = self.head = item |
139 | 139 |
140 | 140 |
141 def flatten(items): | 141 def flatten(items): |
142 """Flattens a potentially nested sequence into a flat list. | 142 """Flattens a potentially nested sequence into a flat list. |
143 | 143 |
156 retval += flatten(item) | 156 retval += flatten(item) |
157 else: | 157 else: |
158 retval.append(item) | 158 retval.append(item) |
159 return retval | 159 return retval |
160 | 160 |
161 | |
161 def plaintext(text, keeplinebreaks=True): | 162 def plaintext(text, keeplinebreaks=True): |
162 """Returns the text as a `unicode` string with all entities and tags | 163 """Return the text with all entities and tags removed. |
163 removed. | |
164 | 164 |
165 >>> plaintext('<b>1 < 2</b>') | 165 >>> plaintext('<b>1 < 2</b>') |
166 u'1 < 2' | 166 u'1 < 2' |
167 | 167 |
168 The `keeplinebreaks` parameter can be set to ``False`` to replace any line | 168 The `keeplinebreaks` parameter can be set to ``False`` to replace any line |
177 :param keeplinebreaks: whether line breaks in the text should be kept intact | 177 :param keeplinebreaks: whether line breaks in the text should be kept intact |
178 :return: the text with tags and entities removed | 178 :return: the text with tags and entities removed |
179 """ | 179 """ |
180 text = stripentities(striptags(text)) | 180 text = stripentities(striptags(text)) |
181 if not keeplinebreaks: | 181 if not keeplinebreaks: |
182 text = text.replace(u'\n', u' ') | 182 text = text.replace('\n', ' ') |
183 return text | 183 return text |
184 | |
184 | 185 |
185 _STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') | 186 _STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') |
186 def stripentities(text, keepxmlentities=False): | 187 def stripentities(text, keepxmlentities=False): |
187 """Return a copy of the given text with any character or numeric entities | 188 """Return a copy of the given text with any character or numeric entities |
188 replaced by the equivalent UTF-8 characters. | 189 replaced by the equivalent UTF-8 characters. |
211 ref = int(ref, 10) | 212 ref = int(ref, 10) |
212 return unichr(ref) | 213 return unichr(ref) |
213 else: # character entity | 214 else: # character entity |
214 ref = match.group(2) | 215 ref = match.group(2) |
215 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): | 216 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): |
216 return u'&%s;' % ref | 217 return '&%s;' % ref |
217 try: | 218 try: |
218 return unichr(htmlentitydefs.name2codepoint[ref]) | 219 return unichr(entities.name2codepoint[ref]) |
219 except KeyError: | 220 except KeyError: |
220 if keepxmlentities: | 221 if keepxmlentities: |
221 return u'&%s;' % ref | 222 return '&%s;' % ref |
222 else: | 223 else: |
223 return ref | 224 return ref |
224 return _STRIPENTITIES_RE.sub(_replace_entity, text) | 225 return _STRIPENTITIES_RE.sub(_replace_entity, text) |
226 | |
225 | 227 |
226 _STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)') | 228 _STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)') |
227 def striptags(text): | 229 def striptags(text): |
228 """Return a copy of the text with any XML/HTML tags removed. | 230 """Return a copy of the text with any XML/HTML tags removed. |
229 | 231 |
241 | 243 |
242 :param text: the string to remove tags from | 244 :param text: the string to remove tags from |
243 :return: the text with tags removed | 245 :return: the text with tags removed |
244 """ | 246 """ |
245 return _STRIPTAGS_RE.sub('', text) | 247 return _STRIPTAGS_RE.sub('', text) |
248 | |
249 | |
250 def stringrepr(string): | |
251 ascii = string.encode('ascii', 'backslashreplace') | |
252 quoted = "'" + ascii.replace("'", "\\'") + "'" | |
253 if len(ascii) > len(string): | |
254 return 'u' + quoted | |
255 return quoted | |
256 | |
257 | |
258 # Compatibility fallback implementations for older Python versions | |
259 | |
260 try: | |
261 all = all | |
262 any = any | |
263 except NameError: | |
264 def any(S): | |
265 for x in S: | |
266 if x: | |
267 return True | |
268 return False | |
269 | |
270 def all(S): | |
271 for x in S: | |
272 if not x: | |
273 return False | |
274 return True |