Mercurial > babel > old > mirror
annotate babel/numbers.py @ 127:a72de8971819
Add currency formatting.
author | cmlenz |
---|---|
date | Mon, 18 Jun 2007 16:19:50 +0000 |
parents | 1ba215a5774d |
children | 24a711a3c174 |
rev | line source |
---|---|
3 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Locale dependent formatting and parsing of numeric data. | |
15 | |
16 The default locale for the functions in this module is determined by the | |
17 following environment variables, in that order: | |
18 | |
19 * ``LC_NUMERIC``, | |
20 * ``LC_ALL``, and | |
21 * ``LANG`` | |
22 """ | |
23 # TODO: percent and scientific formatting | |
24 | |
25 import re | |
26 | |
74
d9c34d2f3d1d
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
52
diff
changeset
|
27 from babel.core import default_locale, Locale |
3 | 28 |
29 __all__ = ['format_number', 'format_decimal', 'format_currency', | |
30 'format_percent', 'format_scientific', 'parse_number', | |
34 | 31 'parse_decimal', 'NumberFormatError'] |
3 | 32 __docformat__ = 'restructuredtext en' |
33 | |
74
d9c34d2f3d1d
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
52
diff
changeset
|
34 LC_NUMERIC = default_locale('LC_NUMERIC') |
3 | 35 |
127 | 36 def get_currency_symbol(currency, locale=LC_NUMERIC): |
37 """Return the symbol used by the locale for the specified currency. | |
38 | |
39 >>> get_currency_symbol('USD', 'en_US') | |
40 u'$' | |
41 | |
42 :param currency: the currency code | |
43 :param locale: the `Locale` object or locale identifier | |
44 :return: the currency symbol | |
45 :rtype: `unicode` | |
46 """ | |
47 return Locale.parse(locale).currency_symbols.get(currency, currency) | |
48 | |
3 | 49 def get_decimal_symbol(locale=LC_NUMERIC): |
50 """Return the symbol used by the locale to separate decimal fractions. | |
51 | |
52 >>> get_decimal_symbol('en_US') | |
53 u'.' | |
54 | |
55 :param locale: the `Locale` object or locale identifier | |
56 :return: the decimal symbol | |
57 :rtype: `unicode` | |
58 """ | |
59 return Locale.parse(locale).number_symbols.get('decimal', u'.') | |
60 | |
61 def get_group_symbol(locale=LC_NUMERIC): | |
62 """Return the symbol used by the locale to separate groups of thousands. | |
63 | |
64 >>> get_group_symbol('en_US') | |
65 u',' | |
66 | |
67 :param locale: the `Locale` object or locale identifier | |
68 :return: the group symbol | |
69 :rtype: `unicode` | |
70 """ | |
11 | 71 return Locale.parse(locale).number_symbols.get('group', u',') |
3 | 72 |
73 def format_number(number, locale=LC_NUMERIC): | |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
74 """Return the given number formatted for a specific locale. |
3 | 75 |
76 >>> format_number(1099, locale='en_US') | |
77 u'1,099' | |
78 | |
79 :param number: the number to format | |
80 :param locale: the `Locale` object or locale identifier | |
81 :return: the formatted number | |
82 :rtype: `unicode` | |
83 """ | |
11 | 84 # Do we really need this one? |
85 return format_decimal(number, locale=locale) | |
3 | 86 |
11 | 87 def format_decimal(number, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
88 """Return the given decimal number formatted for a specific locale. |
3 | 89 |
11 | 90 >>> format_decimal(1.2345, locale='en_US') |
91 u'1.234' | |
52 | 92 >>> format_decimal(1.2346, locale='en_US') |
93 u'1.235' | |
94 >>> format_decimal(-1.2346, locale='en_US') | |
95 u'-1.235' | |
11 | 96 >>> format_decimal(1.2345, locale='sv_SE') |
97 u'1,234' | |
52 | 98 >>> format_decimal(12345, locale='de') |
11 | 99 u'12.345' |
100 | |
3 | 101 The appropriate thousands grouping and the decimal separator are used for |
102 each locale: | |
103 | |
127 | 104 >>> format_decimal(12345.5, locale='en_US') |
105 u'12,345.5' | |
11 | 106 |
3 | 107 :param number: the number to format |
11 | 108 :param format: |
3 | 109 :param locale: the `Locale` object or locale identifier |
110 :return: the formatted decimal number | |
111 :rtype: `unicode` | |
112 """ | |
113 locale = Locale.parse(locale) | |
127 | 114 if not format: |
115 format = locale.decimal_formats.get(format) | |
116 pattern = parse_pattern(format) | |
11 | 117 return pattern.apply(number, locale) |
3 | 118 |
127 | 119 def format_currency(number, currency, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
120 """Return formatted currency value. |
3 | 121 |
34 | 122 >>> format_currency(1099.98, 'USD', locale='en_US') |
127 | 123 u'$1,099.98' |
124 >>> format_currency(1099.98, 'USD', locale='es_CO') | |
125 u'US$1.099,98' | |
126 >>> format_currency(1099.98, 'EUR', locale='de_DE') | |
127 u'1.099,98 \\u20ac' | |
128 | |
129 The pattern can also be specified explicitly: | |
130 | |
131 >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') | |
132 u'EUR 1,099.98' | |
3 | 133 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
134 :param number: the number to format |
34 | 135 :param currency: the currency code |
3 | 136 :param locale: the `Locale` object or locale identifier |
137 :return: the formatted currency value | |
138 :rtype: `unicode` | |
139 """ | |
127 | 140 locale = Locale.parse(locale) |
141 if not format: | |
142 format = locale.currency_formats.get(format) | |
143 pattern = parse_pattern(format) | |
144 return pattern.apply(number, locale, currency=currency) | |
3 | 145 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
146 def format_percent(number, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
147 """Return formatted percent value for a specific locale. |
24 | 148 |
149 >>> format_percent(0.34, locale='en_US') | |
150 u'34%' | |
151 >>> format_percent(25.1234, locale='en_US') | |
152 u'2,512%' | |
153 >>> format_percent(25.1234, locale='sv_SE') | |
154 u'2\\xa0512 %' | |
155 | |
156 :param number: the percent number to format | |
157 :param format: | |
158 :param locale: the `Locale` object or locale identifier | |
159 :return: the formatted percent number | |
160 :rtype: `unicode` | |
161 """ | |
162 locale = Locale.parse(locale) | |
127 | 163 if not format: |
164 format = locale.percent_formats.get(format) | |
165 pattern = parse_pattern(format) | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
166 return pattern.apply(number, locale) |
3 | 167 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
168 def format_scientific(number, locale=LC_NUMERIC): |
3 | 169 raise NotImplementedError |
170 | |
34 | 171 |
172 | |
173 class NumberFormatError(ValueError): | |
174 """Exception raised when a string cannot be parsed into a number.""" | |
175 | |
176 | |
3 | 177 def parse_number(string, locale=LC_NUMERIC): |
178 """Parse localized number string into a long integer. | |
179 | |
180 >>> parse_number('1,099', locale='en_US') | |
181 1099L | |
182 >>> parse_number('1.099', locale='de_DE') | |
183 1099L | |
184 | |
34 | 185 When the given string cannot be parsed, an exception is raised: |
186 | |
187 >>> parse_number('1.099,98', locale='de') | |
188 Traceback (most recent call last): | |
189 ... | |
190 NumberFormatError: '1.099,98' is not a valid number | |
191 | |
3 | 192 :param string: the string to parse |
193 :param locale: the `Locale` object or locale identifier | |
194 :return: the parsed number | |
195 :rtype: `long` | |
34 | 196 :raise `NumberFormatError`: if the string can not be converted to a number |
3 | 197 """ |
34 | 198 try: |
199 return long(string.replace(get_group_symbol(locale), '')) | |
200 except ValueError: | |
201 raise NumberFormatError('%r is not a valid number' % string) | |
3 | 202 |
203 def parse_decimal(string, locale=LC_NUMERIC): | |
204 """Parse localized decimal string into a float. | |
205 | |
206 >>> parse_decimal('1,099.98', locale='en_US') | |
207 1099.98 | |
34 | 208 >>> parse_decimal('1.099,98', locale='de') |
3 | 209 1099.98 |
210 | |
34 | 211 When the given string cannot be parsed, an exception is raised: |
212 | |
213 >>> parse_decimal('2,109,998', locale='de') | |
214 Traceback (most recent call last): | |
215 ... | |
216 NumberFormatError: '2,109,998' is not a valid decimal number | |
217 | |
3 | 218 :param string: the string to parse |
219 :param locale: the `Locale` object or locale identifier | |
220 :return: the parsed decimal number | |
221 :rtype: `float` | |
34 | 222 :raise `NumberFormatError`: if the string can not be converted to a |
223 decimal number | |
3 | 224 """ |
225 locale = Locale.parse(locale) | |
34 | 226 try: |
227 return float(string.replace(get_group_symbol(locale), '') | |
228 .replace(get_decimal_symbol(locale), '.')) | |
229 except ValueError: | |
230 raise NumberFormatError('%r is not a valid decimal number' % string) | |
11 | 231 |
232 | |
233 PREFIX_END = r'[^0-9@#.,]' | |
234 NUMBER_TOKEN = r'[0-9@#.\-,E]' | |
235 | |
236 PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END | |
237 NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN | |
238 SUFFIX_PATTERN = r"(?P<suffix>.*)" | |
239 | |
240 number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, | |
241 SUFFIX_PATTERN)) | |
242 | |
243 # TODO: | |
244 # Filling | |
52 | 245 # Rounding increment in pattern |
11 | 246 # Scientific notation |
247 # Significant Digits | |
248 def parse_pattern(pattern): | |
249 """Parse number format patterns""" | |
250 if isinstance(pattern, NumberPattern): | |
251 return pattern | |
252 | |
253 # Do we have a negative subpattern? | |
254 if ';' in pattern: | |
255 pattern, neg_pattern = pattern.split(';', 1) | |
256 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
257 neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() | |
258 else: | |
259 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
260 neg_prefix = '-' + pos_prefix | |
261 neg_suffix = pos_suffix | |
24 | 262 if '.' in number: |
263 integer, fraction = number.rsplit('.', 1) | |
264 else: | |
265 integer = number | |
266 fraction = '' | |
11 | 267 min_frac = max_frac = 0 |
268 | |
269 def parse_precision(p): | |
270 """Calculate the min and max allowed digits""" | |
271 min = max = 0 | |
272 for c in p: | |
273 if c == '0': | |
274 min += 1 | |
275 max += 1 | |
276 elif c == '#': | |
277 max += 1 | |
278 else: | |
279 break | |
280 return min, max | |
281 | |
282 def parse_grouping(p): | |
283 """Parse primary and secondary digit grouping | |
284 | |
285 >>> parse_grouping('##') | |
286 0, 0 | |
287 >>> parse_grouping('#,###') | |
288 3, 3 | |
289 >>> parse_grouping('#,####,###') | |
290 3, 4 | |
291 """ | |
292 width = len(p) | |
293 g1 = p.rfind(',') | |
294 if g1 == -1: | |
295 return 1000, 1000 | |
296 g1 = width - g1 - 1 | |
297 g2 = p[:-g1 - 1].rfind(',') | |
298 if g2 == -1: | |
299 return g1, g1 | |
300 g2 = width - g1 - g2 - 2 | |
301 return g1, g2 | |
302 | |
303 int_precision = parse_precision(integer) | |
304 frac_precision = parse_precision(fraction) | |
305 grouping = parse_grouping(integer) | |
306 int_precision = (int_precision[0], 1000) # Unlimited | |
307 return NumberPattern(pattern, (pos_prefix, neg_prefix), | |
308 (pos_suffix, neg_suffix), grouping, | |
309 int_precision, frac_precision) | |
310 | |
311 | |
312 class NumberPattern(object): | |
24 | 313 |
11 | 314 def __init__(self, pattern, prefix, suffix, grouping, |
315 int_precision, frac_precision): | |
316 self.pattern = pattern | |
317 self.prefix = prefix | |
318 self.suffix = suffix | |
319 self.grouping = grouping | |
320 self.int_precision = int_precision | |
321 self.frac_precision = frac_precision | |
52 | 322 self.format = '%%.%df' % self.frac_precision[1] |
24 | 323 if '%' in ''.join(self.prefix + self.suffix): |
324 self.scale = 100.0 | |
325 elif u'‰' in ''.join(self.prefix + self.suffix): | |
326 self.scale = 1000.0 | |
327 else: | |
328 self.scale = 1.0 | |
11 | 329 |
330 def __repr__(self): | |
331 return '<%s %r>' % (type(self).__name__, self.pattern) | |
332 | |
127 | 333 def apply(self, value, locale, currency=None): |
24 | 334 value *= self.scale |
11 | 335 negative = int(value < 0) |
52 | 336 a = self.format % value |
337 if self.frac_precision[1] > 0: | |
338 a, b = a.split('.') | |
339 else: | |
340 b = '' | |
11 | 341 a = a.lstrip('-') |
127 | 342 retval = '%s%s%s%s' % (self.prefix[negative], |
343 self._format_int(a, locale), | |
344 self._format_frac(b, locale), | |
345 self.suffix[negative]) | |
346 if u'¤' in retval: | |
347 retval = retval.replace(u'¤¤', currency.upper()) | |
348 retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) | |
349 return retval | |
11 | 350 |
351 def _format_int(self, value, locale): | |
352 min, max = self.int_precision | |
353 width = len(value) | |
354 if width < min: | |
355 value += '0' * (min - width) | |
356 gsize = self.grouping[0] | |
357 ret = '' | |
358 symbol = get_group_symbol(locale) | |
359 while len(value) > gsize: | |
360 ret = symbol + value[-gsize:] + ret | |
361 value = value[:-gsize] | |
362 gsize = self.grouping[1] | |
363 return value + ret | |
364 | |
365 def _format_frac(self, value, locale): | |
366 min, max = self.frac_precision | |
24 | 367 if max == 0 or (min == 0 and int(value) == 0): |
11 | 368 return '' |
369 width = len(value) | |
52 | 370 while len(value) > min and value[-1] == '0': |
371 value = value[:-1] | |
11 | 372 return get_decimal_symbol(locale) + value |