comparison examples/trac/trac/util/text.py @ 39:93b4dcbafd7b trunk

Copy Trac to main branch.
author cmlenz
date Mon, 03 Jul 2006 18:53:27 +0000
parents
children
comparison
equal deleted inserted replaced
38:ee669cb9cccc 39:93b4dcbafd7b
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2003-2006 Edgewall Software
4 # Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com>
5 # Copyright (C) 2006 Matthew Good <trac@matt-good.net>
6 # Copyright (C) 2005-2006 Christian Boos <cboos@neuf.fr>
7 # All rights reserved.
8 #
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://trac.edgewall.com/license.html.
12 #
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://projects.edgewall.com/trac/.
16 #
17 # Author: Jonas Borgström <jonas@edgewall.com>
18 # Matthew Good <trac@matt-good.net>
19 # Christian Boos <cboos@neuf.fr>
20
21 import locale
22 import os
23 import sys
24 from urllib import quote, unquote, urlencode
25
26
27 CRLF = '\r\n'
28
29 # -- Unicode
30
31 def to_unicode(text, charset=None, lossy=True):
32 """Convert a `str` object to an `unicode` object.
33
34 If `charset` is not specified, we'll make some guesses,
35 first trying the UTF-8 encoding then trying the locale
36 preferred encoding (this differs from the `unicode` function
37 which only tries with the locale preferred encoding, in 'strict'
38 mode).
39
40 If the `lossy` argument is `True`, which is the default, then
41 we use the 'replace' mode:
42
43 If the `lossy` argument is `False`, we fallback to the 'iso-8859-15'
44 charset in case of an error (encoding a `str` using 'iso-8859-15'
45 will always work, as there's one Unicode character for each byte of
46 the input).
47 """
48 if not isinstance(text, str):
49 if isinstance(text, Exception):
50 # two possibilities for storing unicode strings in exception data:
51 try:
52 # custom __str__ method on the exception (e.g. PermissionError)
53 return unicode(text)
54 except UnicodeError:
55 # unicode arguments given to the exception (e.g. parse_date)
56 return ' '.join([to_unicode(arg) for arg in text.args])
57 return unicode(text)
58 errors = lossy and 'replace' or 'strict'
59 try:
60 if charset:
61 return unicode(text, charset, errors)
62 else:
63 try:
64 return unicode(text, 'utf-8')
65 except UnicodeError:
66 return unicode(text, locale.getpreferredencoding(), errors)
67 except UnicodeError:
68 return unicode(text, 'iso-8859-15')
69
70 def unicode_quote(value):
71 """A unicode aware version of urllib.quote"""
72 return quote(value.encode('utf-8'))
73
74 def unicode_unquote(value):
75 """A unicode aware version of urllib.unquote.
76
77 Take `str` value previously obtained by `unicode_quote`.
78 """
79 return unquote(value).decode('utf-8')
80
81 def unicode_urlencode(params):
82 """A unicode aware version of urllib.urlencode"""
83 if isinstance(params, dict):
84 params = params.items()
85 return urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v)
86 for k, v in params])
87
88 def to_utf8(text, charset='iso-8859-15'):
89 """Convert a string to UTF-8, assuming the encoding is either UTF-8, ISO
90 Latin-1, or as specified by the optional `charset` parameter.
91
92 ''Deprecated in 0.10. You should use `unicode` strings only.''
93 """
94 try:
95 # Do nothing if it's already utf-8
96 u = unicode(text, 'utf-8')
97 return text
98 except UnicodeError:
99 try:
100 # Use the user supplied charset if possible
101 u = unicode(text, charset)
102 except UnicodeError:
103 # This should always work
104 u = unicode(text, 'iso-8859-15')
105 return u.encode('utf-8')
106
107
108 # -- Plain text formatting
109
110 def shorten_line(text, maxlen=75):
111 if len(text or '') < maxlen:
112 return text
113 shortline = text[:maxlen]
114 cut = shortline.rfind(' ') + 1 or shortline.rfind('\n') + 1 or maxlen
115 shortline = text[:cut]+' ...'
116 return shortline
117
118 def wrap(t, cols=75, initial_indent='', subsequent_indent='',
119 linesep=os.linesep):
120 try:
121 import textwrap
122 t = t.strip().replace('\r\n', '\n').replace('\r', '\n')
123 wrapper = textwrap.TextWrapper(cols, replace_whitespace=0,
124 break_long_words=0,
125 initial_indent=initial_indent,
126 subsequent_indent=subsequent_indent)
127 wrappedLines = []
128 for line in t.split('\n'):
129 wrappedLines += wrapper.wrap(line.rstrip()) or ['']
130 return linesep.join(wrappedLines)
131
132 except ImportError:
133 return t
134
135
136 # -- Conversion
137
138 def pretty_size(size):
139 if size is None:
140 return ''
141
142 jump = 512
143 if size < jump:
144 return '%d bytes' % size
145
146 units = ['kB', 'MB', 'GB', 'TB']
147 i = 0
148 while size >= jump and i < len(units):
149 i += 1
150 size /= 1024.
151
152 return '%.1f %s' % (size, units[i - 1])
Copyright (C) 2012-2017 Edgewall Software