Mercurial > genshi > mirror
diff examples/trac/trac/util/text.py @ 39:93b4dcbafd7b trunk
Copy Trac to main branch.
author | cmlenz |
---|---|
date | Mon, 03 Jul 2006 18:53:27 +0000 |
parents | |
children |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/examples/trac/trac/util/text.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2003-2006 Edgewall Software +# Copyright (C) 2003-2004 Jonas Borgström <jonas@edgewall.com> +# Copyright (C) 2006 Matthew Good <trac@matt-good.net> +# Copyright (C) 2005-2006 Christian Boos <cboos@neuf.fr> +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://trac.edgewall.com/license.html. +# +# This software consists of voluntary contributions made by many +# individuals. For exact contribution history, see the revision +# history and logs, available at http://projects.edgewall.com/trac/. +# +# Author: Jonas Borgström <jonas@edgewall.com> +# Matthew Good <trac@matt-good.net> +# Christian Boos <cboos@neuf.fr> + +import locale +import os +import sys +from urllib import quote, unquote, urlencode + + +CRLF = '\r\n' + +# -- Unicode + +def to_unicode(text, charset=None, lossy=True): + """Convert a `str` object to an `unicode` object. + + If `charset` is not specified, we'll make some guesses, + first trying the UTF-8 encoding then trying the locale + preferred encoding (this differs from the `unicode` function + which only tries with the locale preferred encoding, in 'strict' + mode). + + If the `lossy` argument is `True`, which is the default, then + we use the 'replace' mode: + + If the `lossy` argument is `False`, we fallback to the 'iso-8859-15' + charset in case of an error (encoding a `str` using 'iso-8859-15' + will always work, as there's one Unicode character for each byte of + the input). + """ + if not isinstance(text, str): + if isinstance(text, Exception): + # two possibilities for storing unicode strings in exception data: + try: + # custom __str__ method on the exception (e.g. PermissionError) + return unicode(text) + except UnicodeError: + # unicode arguments given to the exception (e.g. parse_date) + return ' '.join([to_unicode(arg) for arg in text.args]) + return unicode(text) + errors = lossy and 'replace' or 'strict' + try: + if charset: + return unicode(text, charset, errors) + else: + try: + return unicode(text, 'utf-8') + except UnicodeError: + return unicode(text, locale.getpreferredencoding(), errors) + except UnicodeError: + return unicode(text, 'iso-8859-15') + +def unicode_quote(value): + """A unicode aware version of urllib.quote""" + return quote(value.encode('utf-8')) + +def unicode_unquote(value): + """A unicode aware version of urllib.unquote. + + Take `str` value previously obtained by `unicode_quote`. + """ + return unquote(value).decode('utf-8') + +def unicode_urlencode(params): + """A unicode aware version of urllib.urlencode""" + if isinstance(params, dict): + params = params.items() + return urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) + for k, v in params]) + +def to_utf8(text, charset='iso-8859-15'): + """Convert a string to UTF-8, assuming the encoding is either UTF-8, ISO + Latin-1, or as specified by the optional `charset` parameter. + + ''Deprecated in 0.10. You should use `unicode` strings only.'' + """ + try: + # Do nothing if it's already utf-8 + u = unicode(text, 'utf-8') + return text + except UnicodeError: + try: + # Use the user supplied charset if possible + u = unicode(text, charset) + except UnicodeError: + # This should always work + u = unicode(text, 'iso-8859-15') + return u.encode('utf-8') + + +# -- Plain text formatting + +def shorten_line(text, maxlen=75): + if len(text or '') < maxlen: + return text + shortline = text[:maxlen] + cut = shortline.rfind(' ') + 1 or shortline.rfind('\n') + 1 or maxlen + shortline = text[:cut]+' ...' + return shortline + +def wrap(t, cols=75, initial_indent='', subsequent_indent='', + linesep=os.linesep): + try: + import textwrap + t = t.strip().replace('\r\n', '\n').replace('\r', '\n') + wrapper = textwrap.TextWrapper(cols, replace_whitespace=0, + break_long_words=0, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent) + wrappedLines = [] + for line in t.split('\n'): + wrappedLines += wrapper.wrap(line.rstrip()) or [''] + return linesep.join(wrappedLines) + + except ImportError: + return t + + +# -- Conversion + +def pretty_size(size): + if size is None: + return '' + + jump = 512 + if size < jump: + return '%d bytes' % size + + units = ['kB', 'MB', 'GB', 'TB'] + i = 0 + while size >= jump and i < len(units): + i += 1 + size /= 1024. + + return '%.1f %s' % (size, units[i - 1])