Mercurial > genshi > mirror

diff examples/trac/trac/util/text.py @ 39:93b4dcbafd7b trunk
Copy Trac to main branch.
author: cmlenz
date: Mon, 03 Jul 2006 18:53:27 +0000
new file mode 100644
--- /dev/null
+++ b/examples/trac/trac/util/text.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2003-2006 Edgewall Software
+# Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com>
+# Copyright (C) 2006 Matthew Good <trac@matt-good.net>
+# Copyright (C) 2005-2006 Christian Boos <cboos@neuf.fr>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://trac.edgewall.com/license.html.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For exact contribution history, see the revision
+# history and logs, available at http://projects.edgewall.com/trac/.
+#
+# Author: Jonas Borgström <jonas@edgewall.com>
+#         Matthew Good <trac@matt-good.net>
+#         Christian Boos <cboos@neuf.fr>
+
+import locale
+import os
+import sys
+from urllib import quote, unquote, urlencode
+
+
+CRLF = '\r\n'
+
+# -- Unicode
+
+def to_unicode(text, charset=None, lossy=True):
+    """Convert a `str` object to an `unicode` object.
+
+    If `charset` is not specified, we'll make some guesses,
+    first trying the UTF-8 encoding then trying the locale
+    preferred encoding (this differs from the `unicode` function
+    which only tries with the locale preferred encoding, in 'strict'
+    mode).
+
+    If the `lossy` argument is `True`, which is the default, then
+    we use the 'replace' mode:
+
+    If the `lossy` argument is `False`, we fallback to the 'iso-8859-15'
+    charset in case of an error (encoding a `str` using 'iso-8859-15'
+    will always work, as there's one Unicode character for each byte of
+    the input).
+    """
+    if not isinstance(text, str):
+        if isinstance(text, Exception):
+            # two possibilities for storing unicode strings in exception data:
+            try:
+                # custom __str__ method on the exception (e.g. PermissionError)
+                return unicode(text)
+            except UnicodeError:
+                # unicode arguments given to the exception (e.g. parse_date)
+                return ' '.join([to_unicode(arg) for arg in text.args])
+        return unicode(text)
+    errors = lossy and 'replace' or 'strict'
+    try:
+        if charset:
+            return unicode(text, charset, errors)
+        else:
+            try:
+                return unicode(text, 'utf-8')
+            except UnicodeError:
+                return unicode(text, locale.getpreferredencoding(), errors)
+    except UnicodeError:
+        return unicode(text, 'iso-8859-15')
+
+def unicode_quote(value):
+    """A unicode aware version of urllib.quote"""
+    return quote(value.encode('utf-8'))
+
+def unicode_unquote(value):
+    """A unicode aware version of urllib.unquote.
+    
+    Take `str` value previously obtained by `unicode_quote`.
+    """
+    return unquote(value).decode('utf-8')
+
+def unicode_urlencode(params):
+    """A unicode aware version of urllib.urlencode"""
+    if isinstance(params, dict):
+        params = params.items()
+    return urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v)
+                      for k, v in params])
+
+def to_utf8(text, charset='iso-8859-15'):
+    """Convert a string to UTF-8, assuming the encoding is either UTF-8, ISO
+    Latin-1, or as specified by the optional `charset` parameter.
+
+    ''Deprecated in 0.10. You should use `unicode` strings only.''
+    """
+    try:
+        # Do nothing if it's already utf-8
+        u = unicode(text, 'utf-8')
+        return text
+    except UnicodeError:
+        try:
+            # Use the user supplied charset if possible
+            u = unicode(text, charset)
+        except UnicodeError:
+            # This should always work
+            u = unicode(text, 'iso-8859-15')
+        return u.encode('utf-8')
+
+
+# -- Plain text formatting
+
+def shorten_line(text, maxlen=75):
+    if len(text or '') < maxlen:
+        return text
+    shortline = text[:maxlen]
+    cut = shortline.rfind(' ') + 1 or shortline.rfind('\n') + 1 or maxlen
+    shortline = text[:cut]+' ...'
+    return shortline
+
+def wrap(t, cols=75, initial_indent='', subsequent_indent='',
+         linesep=os.linesep):
+    try:
+        import textwrap
+        t = t.strip().replace('\r\n', '\n').replace('\r', '\n')
+        wrapper = textwrap.TextWrapper(cols, replace_whitespace=0,
+                                       break_long_words=0,
+                                       initial_indent=initial_indent,
+                                       subsequent_indent=subsequent_indent)
+        wrappedLines = []
+        for line in t.split('\n'):
+            wrappedLines += wrapper.wrap(line.rstrip()) or ['']
+        return linesep.join(wrappedLines)
+
+    except ImportError:
+        return t
+
+
+# -- Conversion
+
+def pretty_size(size):
+    if size is None:
+        return ''
+
+    jump = 512
+    if size < jump:
+        return '%d bytes' % size
+
+    units = ['kB', 'MB', 'GB', 'TB']
+    i = 0
+    while size >= jump and i < len(units):
+        i += 1
+        size /= 1024.
+
+    return '%.1f %s' % (size, units[i - 1])
author	cmlenz
date	Mon, 03 Jul 2006 18:53:27 +0000
parents
children