cmlenz@3: # -*- coding: utf-8 -*- cmlenz@3: # cmlenz@3: # Copyright (C) 2007 Edgewall Software cmlenz@3: # All rights reserved. cmlenz@3: # cmlenz@3: # This software is licensed as described in the file COPYING, which cmlenz@3: # you should have received as part of this distribution. The terms cmlenz@3: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@3: # cmlenz@3: # This software consists of voluntary contributions made by many cmlenz@3: # individuals. For the exact contribution history, see the revision cmlenz@3: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@3: cmlenz@3: """Locale dependent formatting and parsing of numeric data. cmlenz@3: cmlenz@3: The default locale for the functions in this module is determined by the cmlenz@3: following environment variables, in that order: cmlenz@3: cmlenz@3: * ``LC_NUMERIC``, cmlenz@3: * ``LC_ALL``, and cmlenz@3: * ``LANG`` cmlenz@3: """ cmlenz@3: # TODO: percent and scientific formatting cmlenz@3: cmlenz@3: import re jonas@220: try: jonas@220: from decimal import Decimal jonas@220: have_decimal = True jonas@220: except ImportError: jonas@220: have_decimal = False cmlenz@3: cmlenz@74: from babel.core import default_locale, Locale cmlenz@3: cmlenz@3: __all__ = ['format_number', 'format_decimal', 'format_currency', cmlenz@3: 'format_percent', 'format_scientific', 'parse_number', cmlenz@34: 'parse_decimal', 'NumberFormatError'] cmlenz@3: __docformat__ = 'restructuredtext en' cmlenz@3: cmlenz@74: LC_NUMERIC = default_locale('LC_NUMERIC') cmlenz@3: cmlenz@127: def get_currency_symbol(currency, locale=LC_NUMERIC): cmlenz@127: """Return the symbol used by the locale for the specified currency. cmlenz@127: cmlenz@127: >>> get_currency_symbol('USD', 'en_US') cmlenz@127: u'$' cmlenz@127: cmlenz@127: :param currency: the currency code cmlenz@127: :param locale: the `Locale` object or locale identifier cmlenz@127: :return: the currency symbol cmlenz@127: :rtype: `unicode` cmlenz@127: """ cmlenz@127: return Locale.parse(locale).currency_symbols.get(currency, currency) cmlenz@127: cmlenz@3: def get_decimal_symbol(locale=LC_NUMERIC): cmlenz@3: """Return the symbol used by the locale to separate decimal fractions. cmlenz@3: cmlenz@3: >>> get_decimal_symbol('en_US') cmlenz@3: u'.' cmlenz@3: cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the decimal symbol cmlenz@3: :rtype: `unicode` cmlenz@3: """ cmlenz@3: return Locale.parse(locale).number_symbols.get('decimal', u'.') cmlenz@3: cmlenz@3: def get_group_symbol(locale=LC_NUMERIC): cmlenz@3: """Return the symbol used by the locale to separate groups of thousands. cmlenz@3: cmlenz@3: >>> get_group_symbol('en_US') cmlenz@3: u',' cmlenz@3: cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the group symbol cmlenz@3: :rtype: `unicode` cmlenz@3: """ jonas@11: return Locale.parse(locale).number_symbols.get('group', u',') cmlenz@3: cmlenz@3: def format_number(number, locale=LC_NUMERIC): cmlenz@103: """Return the given number formatted for a specific locale. cmlenz@3: cmlenz@3: >>> format_number(1099, locale='en_US') cmlenz@3: u'1,099' cmlenz@3: cmlenz@3: :param number: the number to format cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the formatted number cmlenz@3: :rtype: `unicode` cmlenz@3: """ jonas@11: # Do we really need this one? jonas@11: return format_decimal(number, locale=locale) cmlenz@3: jonas@11: def format_decimal(number, format=None, locale=LC_NUMERIC): cmlenz@103: """Return the given decimal number formatted for a specific locale. cmlenz@3: jonas@11: >>> format_decimal(1.2345, locale='en_US') jonas@11: u'1.234' jonas@52: >>> format_decimal(1.2346, locale='en_US') jonas@52: u'1.235' jonas@52: >>> format_decimal(-1.2346, locale='en_US') jonas@52: u'-1.235' jonas@11: >>> format_decimal(1.2345, locale='sv_SE') jonas@11: u'1,234' jonas@52: >>> format_decimal(12345, locale='de') jonas@11: u'12.345' jonas@11: cmlenz@3: The appropriate thousands grouping and the decimal separator are used for cmlenz@3: each locale: cmlenz@3: cmlenz@127: >>> format_decimal(12345.5, locale='en_US') cmlenz@127: u'12,345.5' jonas@11: cmlenz@3: :param number: the number to format jonas@11: :param format: cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the formatted decimal number cmlenz@3: :rtype: `unicode` cmlenz@3: """ cmlenz@3: locale = Locale.parse(locale) cmlenz@127: if not format: cmlenz@127: format = locale.decimal_formats.get(format) cmlenz@127: pattern = parse_pattern(format) jonas@11: return pattern.apply(number, locale) cmlenz@3: cmlenz@127: def format_currency(number, currency, format=None, locale=LC_NUMERIC): cmlenz@135: u"""Return formatted currency value. cmlenz@3: cmlenz@34: >>> format_currency(1099.98, 'USD', locale='en_US') cmlenz@127: u'$1,099.98' cmlenz@127: >>> format_currency(1099.98, 'USD', locale='es_CO') cmlenz@235: u'US$ 1.099,98' cmlenz@127: >>> format_currency(1099.98, 'EUR', locale='de_DE') cmlenz@127: u'1.099,98 \\u20ac' cmlenz@127: cmlenz@127: The pattern can also be specified explicitly: cmlenz@127: cmlenz@127: >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') cmlenz@127: u'EUR 1,099.98' cmlenz@3: cmlenz@28: :param number: the number to format cmlenz@34: :param currency: the currency code cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the formatted currency value cmlenz@3: :rtype: `unicode` cmlenz@3: """ cmlenz@127: locale = Locale.parse(locale) cmlenz@127: if not format: cmlenz@127: format = locale.currency_formats.get(format) cmlenz@127: pattern = parse_pattern(format) cmlenz@127: return pattern.apply(number, locale, currency=currency) cmlenz@3: cmlenz@28: def format_percent(number, format=None, locale=LC_NUMERIC): cmlenz@103: """Return formatted percent value for a specific locale. jonas@24: jonas@24: >>> format_percent(0.34, locale='en_US') jonas@24: u'34%' jonas@24: >>> format_percent(25.1234, locale='en_US') jonas@24: u'2,512%' jonas@24: >>> format_percent(25.1234, locale='sv_SE') cmlenz@235: u'2\\xa0512\\xa0%' jonas@24: cmlenz@128: The format pattern can also be specified explicitly: cmlenz@128: cmlenz@128: >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') cmlenz@128: u'25,123\u2030' cmlenz@128: jonas@24: :param number: the percent number to format jonas@24: :param format: jonas@24: :param locale: the `Locale` object or locale identifier jonas@24: :return: the formatted percent number jonas@24: :rtype: `unicode` jonas@24: """ jonas@24: locale = Locale.parse(locale) cmlenz@127: if not format: cmlenz@127: format = locale.percent_formats.get(format) cmlenz@127: pattern = parse_pattern(format) cmlenz@28: return pattern.apply(number, locale) cmlenz@3: cmlenz@28: def format_scientific(number, locale=LC_NUMERIC): cmlenz@128: # TODO: implement cmlenz@3: raise NotImplementedError cmlenz@3: cmlenz@34: cmlenz@34: class NumberFormatError(ValueError): cmlenz@34: """Exception raised when a string cannot be parsed into a number.""" cmlenz@34: cmlenz@34: cmlenz@3: def parse_number(string, locale=LC_NUMERIC): cmlenz@3: """Parse localized number string into a long integer. cmlenz@3: cmlenz@3: >>> parse_number('1,099', locale='en_US') cmlenz@3: 1099L cmlenz@3: >>> parse_number('1.099', locale='de_DE') cmlenz@3: 1099L cmlenz@3: cmlenz@34: When the given string cannot be parsed, an exception is raised: cmlenz@34: cmlenz@34: >>> parse_number('1.099,98', locale='de') cmlenz@34: Traceback (most recent call last): cmlenz@34: ... cmlenz@34: NumberFormatError: '1.099,98' is not a valid number cmlenz@34: cmlenz@3: :param string: the string to parse cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the parsed number cmlenz@3: :rtype: `long` cmlenz@34: :raise `NumberFormatError`: if the string can not be converted to a number cmlenz@3: """ cmlenz@34: try: cmlenz@34: return long(string.replace(get_group_symbol(locale), '')) cmlenz@34: except ValueError: cmlenz@34: raise NumberFormatError('%r is not a valid number' % string) cmlenz@3: cmlenz@3: def parse_decimal(string, locale=LC_NUMERIC): cmlenz@3: """Parse localized decimal string into a float. cmlenz@3: cmlenz@3: >>> parse_decimal('1,099.98', locale='en_US') cmlenz@3: 1099.98 cmlenz@34: >>> parse_decimal('1.099,98', locale='de') cmlenz@3: 1099.98 cmlenz@3: cmlenz@34: When the given string cannot be parsed, an exception is raised: cmlenz@34: cmlenz@34: >>> parse_decimal('2,109,998', locale='de') cmlenz@34: Traceback (most recent call last): cmlenz@34: ... cmlenz@34: NumberFormatError: '2,109,998' is not a valid decimal number cmlenz@34: cmlenz@3: :param string: the string to parse cmlenz@3: :param locale: the `Locale` object or locale identifier cmlenz@3: :return: the parsed decimal number cmlenz@3: :rtype: `float` cmlenz@34: :raise `NumberFormatError`: if the string can not be converted to a cmlenz@34: decimal number cmlenz@3: """ cmlenz@3: locale = Locale.parse(locale) cmlenz@34: try: cmlenz@34: return float(string.replace(get_group_symbol(locale), '') cmlenz@34: .replace(get_decimal_symbol(locale), '.')) cmlenz@34: except ValueError: cmlenz@34: raise NumberFormatError('%r is not a valid decimal number' % string) jonas@11: jonas@11: jonas@11: PREFIX_END = r'[^0-9@#.,]' jonas@11: NUMBER_TOKEN = r'[0-9@#.\-,E]' jonas@11: jonas@11: PREFIX_PATTERN = r"(?P(?:'[^']*'|%s)*)" % PREFIX_END jonas@11: NUMBER_PATTERN = r"(?P%s+)" % NUMBER_TOKEN jonas@11: SUFFIX_PATTERN = r"(?P.*)" jonas@11: cmlenz@129: number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, jonas@11: SUFFIX_PATTERN)) jonas@11: jonas@220: def split_number(value): jonas@220: """Convert a number into a (intasstring, fractionasstring) tuple""" jonas@220: if have_decimal and isinstance(value, Decimal): jonas@220: text = str(value) jonas@220: else: jonas@220: text = ('%.9f' % value).rstrip('0') jonas@220: if '.' in text: jonas@220: a, b = text.split('.', 1) jonas@220: if b == '0': jonas@220: b = '' jonas@220: else: jonas@220: a, b = text, '' jonas@220: return a, b jonas@220: jonas@214: def bankersround(value, ndigits=0): jonas@214: """Round a number to a given precision. jonas@214: jonas@214: Works like round() except that the round-half-even (banker's rounding) jonas@214: algorithm is used instead of round-half-up. jonas@214: jonas@214: >>> bankersround(5.5, 0) jonas@214: 6.0 jonas@214: >>> bankersround(6.5, 0) jonas@214: 6.0 jonas@214: >>> bankersround(-6.5, 0) jonas@214: -6.0 jonas@220: >>> bankersround(1234.0, -2) jonas@214: 1200.0 jonas@214: """ jonas@214: sign = int(value < 0) and -1 or 1 jonas@214: value = abs(value) jonas@220: a, b = split_number(value) jonas@220: digits = a + b jonas@214: add = 0 jonas@214: i = len(a) + ndigits jonas@214: if i < 0 or i >= len(digits): jonas@214: pass jonas@214: elif digits[i] > '5': jonas@214: add = 1 jonas@214: elif digits[i] == '5' and digits[i-1] in '13579': jonas@214: add = 1 jonas@220: scale = 10**ndigits jonas@220: if have_decimal and isinstance(value, Decimal): jonas@220: return Decimal(int(value * scale + add)) / scale * sign jonas@220: else: jonas@220: return float(int(value * scale + add)) / scale * sign jonas@214: jonas@11: # TODO: jonas@11: # Filling jonas@52: # Rounding increment in pattern jonas@11: # Scientific notation jonas@11: def parse_pattern(pattern): jonas@11: """Parse number format patterns""" jonas@11: if isinstance(pattern, NumberPattern): jonas@11: return pattern jonas@11: jonas@11: # Do we have a negative subpattern? jonas@11: if ';' in pattern: jonas@11: pattern, neg_pattern = pattern.split(';', 1) jonas@11: pos_prefix, number, pos_suffix = number_re.search(pattern).groups() jonas@11: neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() jonas@11: else: jonas@11: pos_prefix, number, pos_suffix = number_re.search(pattern).groups() jonas@11: neg_prefix = '-' + pos_prefix jonas@11: neg_suffix = pos_suffix jonas@213: if '@' in number: jonas@213: if '.' in number and '0' in number: jonas@213: raise ValueError('Significant digit patterns can not contain ' jonas@213: '"@" or "0"') jonas@24: if '.' in number: jonas@24: integer, fraction = number.rsplit('.', 1) jonas@24: else: jonas@24: integer = number jonas@24: fraction = '' jonas@11: min_frac = max_frac = 0 jonas@11: jonas@11: def parse_precision(p): jonas@11: """Calculate the min and max allowed digits""" jonas@11: min = max = 0 jonas@11: for c in p: jonas@213: if c in '@0': jonas@11: min += 1 jonas@11: max += 1 jonas@11: elif c == '#': jonas@11: max += 1 jonas@213: elif c == ',': jonas@213: continue jonas@11: else: jonas@11: break jonas@11: return min, max jonas@11: jonas@11: def parse_grouping(p): jonas@11: """Parse primary and secondary digit grouping jonas@11: jonas@11: >>> parse_grouping('##') jonas@11: 0, 0 jonas@11: >>> parse_grouping('#,###') jonas@11: 3, 3 jonas@11: >>> parse_grouping('#,####,###') jonas@11: 3, 4 jonas@11: """ jonas@11: width = len(p) jonas@11: g1 = p.rfind(',') jonas@11: if g1 == -1: jonas@11: return 1000, 1000 jonas@11: g1 = width - g1 - 1 jonas@11: g2 = p[:-g1 - 1].rfind(',') jonas@11: if g2 == -1: jonas@11: return g1, g1 jonas@11: g2 = width - g1 - g2 - 2 jonas@11: return g1, g2 jonas@11: jonas@11: int_precision = parse_precision(integer) jonas@11: frac_precision = parse_precision(fraction) jonas@11: grouping = parse_grouping(integer) jonas@11: return NumberPattern(pattern, (pos_prefix, neg_prefix), jonas@11: (pos_suffix, neg_suffix), grouping, jonas@11: int_precision, frac_precision) jonas@11: jonas@11: jonas@11: class NumberPattern(object): jonas@24: jonas@11: def __init__(self, pattern, prefix, suffix, grouping, jonas@11: int_precision, frac_precision): jonas@11: self.pattern = pattern jonas@11: self.prefix = prefix jonas@11: self.suffix = suffix jonas@11: self.grouping = grouping jonas@11: self.int_precision = int_precision jonas@11: self.frac_precision = frac_precision jonas@24: if '%' in ''.join(self.prefix + self.suffix): jonas@220: self.scale = 100 jonas@24: elif u'‰' in ''.join(self.prefix + self.suffix): jonas@220: self.scale = 1000 jonas@24: else: jonas@220: self.scale = 1 jonas@11: jonas@11: def __repr__(self): jonas@11: return '<%s %r>' % (type(self).__name__, self.pattern) jonas@11: cmlenz@127: def apply(self, value, locale, currency=None): jonas@24: value *= self.scale jonas@11: negative = int(value < 0) jonas@213: if '@' in self.pattern: # Is it a siginificant digits pattern? jonas@213: text = self._format_sigdig(abs(value), jonas@213: self.int_precision[0], jonas@213: self.int_precision[1]) jonas@213: if '.' in text: jonas@213: a, b = text.split('.') jonas@213: a = self._format_int(a, 0, 1000, locale) jonas@213: if b: jonas@213: b = get_decimal_symbol(locale) + b jonas@213: else: jonas@213: a, b = self._format_int(text, 0, 1000, locale), '' jonas@213: else: # A normal number pattern jonas@220: a, b = split_number(bankersround(abs(value), jonas@220: self.frac_precision[1])) jonas@220: b = b or '0' jonas@213: a = self._format_int(a, self.int_precision[0], jonas@213: self.int_precision[1], locale) jonas@213: b = self._format_frac(b, locale) jonas@213: retval = u'%s%s%s%s' % (self.prefix[negative], a, b, cmlenz@128: self.suffix[negative]) cmlenz@127: if u'¤' in retval: cmlenz@127: retval = retval.replace(u'¤¤', currency.upper()) cmlenz@127: retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) cmlenz@127: return retval jonas@11: jonas@213: def _format_sigdig(self, value, min, max): jonas@213: """Convert value to a string. jonas@213: jonas@213: The resulting string will contain between (min, max) number of jonas@213: significant digits. jonas@213: """ jonas@220: a, b = split_number(value) jonas@213: ndecimals = len(a) jonas@220: if a == '0' and b != '': jonas@213: ndecimals = 0 jonas@213: while b.startswith('0'): jonas@213: b = b[1:] jonas@213: ndecimals -= 1 jonas@220: a, b = split_number(bankersround(value, max - ndecimals)) jonas@220: digits = len((a + b).lstrip('0')) jonas@220: if not digits: jonas@213: digits = 1 jonas@213: # Figure out if we need to add any trailing '0':s jonas@213: if len(a) >= max and a != '0': jonas@213: return a jonas@213: if digits < min: jonas@213: b += ('0' * (min - digits)) jonas@213: if b: jonas@213: return '%s.%s' % (a, b) jonas@213: return a jonas@213: jonas@213: def _format_int(self, value, min, max, locale): jonas@11: width = len(value) jonas@11: if width < min: jonas@11: value += '0' * (min - width) jonas@11: gsize = self.grouping[0] jonas@11: ret = '' jonas@11: symbol = get_group_symbol(locale) jonas@11: while len(value) > gsize: jonas@11: ret = symbol + value[-gsize:] + ret jonas@11: value = value[:-gsize] jonas@11: gsize = self.grouping[1] jonas@11: return value + ret jonas@11: jonas@11: def _format_frac(self, value, locale): jonas@11: min, max = self.frac_precision jonas@220: if len(value) < min: jonas@220: value += ('0' * (min - len(value))) jonas@24: if max == 0 or (min == 0 and int(value) == 0): jonas@11: return '' jonas@11: width = len(value) jonas@52: while len(value) > min and value[-1] == '0': jonas@52: value = value[:-1] jonas@11: return get_decimal_symbol(locale) + value