Mercurial > genshi > mirror
comparison examples/trac/trac/util/text.py @ 39:93b4dcbafd7b trunk
Copy Trac to main branch.
author | cmlenz |
---|---|
date | Mon, 03 Jul 2006 18:53:27 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
38:ee669cb9cccc | 39:93b4dcbafd7b |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2003-2006 Edgewall Software | |
4 # Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com> | |
5 # Copyright (C) 2006 Matthew Good <trac@matt-good.net> | |
6 # Copyright (C) 2005-2006 Christian Boos <cboos@neuf.fr> | |
7 # All rights reserved. | |
8 # | |
9 # This software is licensed as described in the file COPYING, which | |
10 # you should have received as part of this distribution. The terms | |
11 # are also available at http://trac.edgewall.com/license.html. | |
12 # | |
13 # This software consists of voluntary contributions made by many | |
14 # individuals. For exact contribution history, see the revision | |
15 # history and logs, available at http://projects.edgewall.com/trac/. | |
16 # | |
17 # Author: Jonas Borgström <jonas@edgewall.com> | |
18 # Matthew Good <trac@matt-good.net> | |
19 # Christian Boos <cboos@neuf.fr> | |
20 | |
21 import locale | |
22 import os | |
23 import sys | |
24 from urllib import quote, unquote, urlencode | |
25 | |
26 | |
27 CRLF = '\r\n' | |
28 | |
29 # -- Unicode | |
30 | |
31 def to_unicode(text, charset=None, lossy=True): | |
32 """Convert a `str` object to an `unicode` object. | |
33 | |
34 If `charset` is not specified, we'll make some guesses, | |
35 first trying the UTF-8 encoding then trying the locale | |
36 preferred encoding (this differs from the `unicode` function | |
37 which only tries with the locale preferred encoding, in 'strict' | |
38 mode). | |
39 | |
40 If the `lossy` argument is `True`, which is the default, then | |
41 we use the 'replace' mode: | |
42 | |
43 If the `lossy` argument is `False`, we fallback to the 'iso-8859-15' | |
44 charset in case of an error (encoding a `str` using 'iso-8859-15' | |
45 will always work, as there's one Unicode character for each byte of | |
46 the input). | |
47 """ | |
48 if not isinstance(text, str): | |
49 if isinstance(text, Exception): | |
50 # two possibilities for storing unicode strings in exception data: | |
51 try: | |
52 # custom __str__ method on the exception (e.g. PermissionError) | |
53 return unicode(text) | |
54 except UnicodeError: | |
55 # unicode arguments given to the exception (e.g. parse_date) | |
56 return ' '.join([to_unicode(arg) for arg in text.args]) | |
57 return unicode(text) | |
58 errors = lossy and 'replace' or 'strict' | |
59 try: | |
60 if charset: | |
61 return unicode(text, charset, errors) | |
62 else: | |
63 try: | |
64 return unicode(text, 'utf-8') | |
65 except UnicodeError: | |
66 return unicode(text, locale.getpreferredencoding(), errors) | |
67 except UnicodeError: | |
68 return unicode(text, 'iso-8859-15') | |
69 | |
70 def unicode_quote(value): | |
71 """A unicode aware version of urllib.quote""" | |
72 return quote(value.encode('utf-8')) | |
73 | |
74 def unicode_unquote(value): | |
75 """A unicode aware version of urllib.unquote. | |
76 | |
77 Take `str` value previously obtained by `unicode_quote`. | |
78 """ | |
79 return unquote(value).decode('utf-8') | |
80 | |
81 def unicode_urlencode(params): | |
82 """A unicode aware version of urllib.urlencode""" | |
83 if isinstance(params, dict): | |
84 params = params.items() | |
85 return urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) | |
86 for k, v in params]) | |
87 | |
88 def to_utf8(text, charset='iso-8859-15'): | |
89 """Convert a string to UTF-8, assuming the encoding is either UTF-8, ISO | |
90 Latin-1, or as specified by the optional `charset` parameter. | |
91 | |
92 ''Deprecated in 0.10. You should use `unicode` strings only.'' | |
93 """ | |
94 try: | |
95 # Do nothing if it's already utf-8 | |
96 u = unicode(text, 'utf-8') | |
97 return text | |
98 except UnicodeError: | |
99 try: | |
100 # Use the user supplied charset if possible | |
101 u = unicode(text, charset) | |
102 except UnicodeError: | |
103 # This should always work | |
104 u = unicode(text, 'iso-8859-15') | |
105 return u.encode('utf-8') | |
106 | |
107 | |
108 # -- Plain text formatting | |
109 | |
110 def shorten_line(text, maxlen=75): | |
111 if len(text or '') < maxlen: | |
112 return text | |
113 shortline = text[:maxlen] | |
114 cut = shortline.rfind(' ') + 1 or shortline.rfind('\n') + 1 or maxlen | |
115 shortline = text[:cut]+' ...' | |
116 return shortline | |
117 | |
118 def wrap(t, cols=75, initial_indent='', subsequent_indent='', | |
119 linesep=os.linesep): | |
120 try: | |
121 import textwrap | |
122 t = t.strip().replace('\r\n', '\n').replace('\r', '\n') | |
123 wrapper = textwrap.TextWrapper(cols, replace_whitespace=0, | |
124 break_long_words=0, | |
125 initial_indent=initial_indent, | |
126 subsequent_indent=subsequent_indent) | |
127 wrappedLines = [] | |
128 for line in t.split('\n'): | |
129 wrappedLines += wrapper.wrap(line.rstrip()) or [''] | |
130 return linesep.join(wrappedLines) | |
131 | |
132 except ImportError: | |
133 return t | |
134 | |
135 | |
136 # -- Conversion | |
137 | |
138 def pretty_size(size): | |
139 if size is None: | |
140 return '' | |
141 | |
142 jump = 512 | |
143 if size < jump: | |
144 return '%d bytes' % size | |
145 | |
146 units = ['kB', 'MB', 'GB', 'TB'] | |
147 i = 0 | |
148 while size >= jump and i < len(units): | |
149 i += 1 | |
150 size /= 1024. | |
151 | |
152 return '%.1f %s' % (size, units[i - 1]) |