Mercurial > babel > old > mirror
annotate babel/numbers.py @ 129:d6aef0675953
Add a couple of CLI tests.
author | cmlenz |
---|---|
date | Mon, 18 Jun 2007 17:05:00 +0000 |
parents | 24a711a3c174 |
children | 5969b610d0ec |
rev | line source |
---|---|
3 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Locale dependent formatting and parsing of numeric data. | |
15 | |
16 The default locale for the functions in this module is determined by the | |
17 following environment variables, in that order: | |
18 | |
19 * ``LC_NUMERIC``, | |
20 * ``LC_ALL``, and | |
21 * ``LANG`` | |
22 """ | |
23 # TODO: percent and scientific formatting | |
24 | |
25 import re | |
26 | |
74
d9c34d2f3d1d
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
52
diff
changeset
|
27 from babel.core import default_locale, Locale |
3 | 28 |
29 __all__ = ['format_number', 'format_decimal', 'format_currency', | |
30 'format_percent', 'format_scientific', 'parse_number', | |
34 | 31 'parse_decimal', 'NumberFormatError'] |
3 | 32 __docformat__ = 'restructuredtext en' |
33 | |
74
d9c34d2f3d1d
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
52
diff
changeset
|
34 LC_NUMERIC = default_locale('LC_NUMERIC') |
3 | 35 |
127 | 36 def get_currency_symbol(currency, locale=LC_NUMERIC): |
37 """Return the symbol used by the locale for the specified currency. | |
38 | |
39 >>> get_currency_symbol('USD', 'en_US') | |
40 u'$' | |
41 | |
42 :param currency: the currency code | |
43 :param locale: the `Locale` object or locale identifier | |
44 :return: the currency symbol | |
45 :rtype: `unicode` | |
46 """ | |
47 return Locale.parse(locale).currency_symbols.get(currency, currency) | |
48 | |
3 | 49 def get_decimal_symbol(locale=LC_NUMERIC): |
50 """Return the symbol used by the locale to separate decimal fractions. | |
51 | |
52 >>> get_decimal_symbol('en_US') | |
53 u'.' | |
54 | |
55 :param locale: the `Locale` object or locale identifier | |
56 :return: the decimal symbol | |
57 :rtype: `unicode` | |
58 """ | |
59 return Locale.parse(locale).number_symbols.get('decimal', u'.') | |
60 | |
61 def get_group_symbol(locale=LC_NUMERIC): | |
62 """Return the symbol used by the locale to separate groups of thousands. | |
63 | |
64 >>> get_group_symbol('en_US') | |
65 u',' | |
66 | |
67 :param locale: the `Locale` object or locale identifier | |
68 :return: the group symbol | |
69 :rtype: `unicode` | |
70 """ | |
11 | 71 return Locale.parse(locale).number_symbols.get('group', u',') |
3 | 72 |
73 def format_number(number, locale=LC_NUMERIC): | |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
74 """Return the given number formatted for a specific locale. |
3 | 75 |
76 >>> format_number(1099, locale='en_US') | |
77 u'1,099' | |
78 | |
79 :param number: the number to format | |
80 :param locale: the `Locale` object or locale identifier | |
81 :return: the formatted number | |
82 :rtype: `unicode` | |
83 """ | |
11 | 84 # Do we really need this one? |
85 return format_decimal(number, locale=locale) | |
3 | 86 |
11 | 87 def format_decimal(number, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
88 """Return the given decimal number formatted for a specific locale. |
3 | 89 |
11 | 90 >>> format_decimal(1.2345, locale='en_US') |
91 u'1.234' | |
52 | 92 >>> format_decimal(1.2346, locale='en_US') |
93 u'1.235' | |
94 >>> format_decimal(-1.2346, locale='en_US') | |
95 u'-1.235' | |
11 | 96 >>> format_decimal(1.2345, locale='sv_SE') |
97 u'1,234' | |
52 | 98 >>> format_decimal(12345, locale='de') |
11 | 99 u'12.345' |
100 | |
3 | 101 The appropriate thousands grouping and the decimal separator are used for |
102 each locale: | |
103 | |
127 | 104 >>> format_decimal(12345.5, locale='en_US') |
105 u'12,345.5' | |
11 | 106 |
3 | 107 :param number: the number to format |
11 | 108 :param format: |
3 | 109 :param locale: the `Locale` object or locale identifier |
110 :return: the formatted decimal number | |
111 :rtype: `unicode` | |
112 """ | |
113 locale = Locale.parse(locale) | |
127 | 114 if not format: |
115 format = locale.decimal_formats.get(format) | |
116 pattern = parse_pattern(format) | |
11 | 117 return pattern.apply(number, locale) |
3 | 118 |
127 | 119 def format_currency(number, currency, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
120 """Return formatted currency value. |
3 | 121 |
34 | 122 >>> format_currency(1099.98, 'USD', locale='en_US') |
127 | 123 u'$1,099.98' |
124 >>> format_currency(1099.98, 'USD', locale='es_CO') | |
125 u'US$1.099,98' | |
126 >>> format_currency(1099.98, 'EUR', locale='de_DE') | |
127 u'1.099,98 \\u20ac' | |
128 | |
129 The pattern can also be specified explicitly: | |
130 | |
131 >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US') | |
132 u'EUR 1,099.98' | |
3 | 133 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
134 :param number: the number to format |
34 | 135 :param currency: the currency code |
3 | 136 :param locale: the `Locale` object or locale identifier |
137 :return: the formatted currency value | |
138 :rtype: `unicode` | |
139 """ | |
127 | 140 locale = Locale.parse(locale) |
141 if not format: | |
142 format = locale.currency_formats.get(format) | |
143 pattern = parse_pattern(format) | |
144 return pattern.apply(number, locale, currency=currency) | |
3 | 145 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
146 def format_percent(number, format=None, locale=LC_NUMERIC): |
103
1ba215a5774d
Add wrapper class bundling the various formatting functions bound to a specific locale and time-zone.
cmlenz
parents:
74
diff
changeset
|
147 """Return formatted percent value for a specific locale. |
24 | 148 |
149 >>> format_percent(0.34, locale='en_US') | |
150 u'34%' | |
151 >>> format_percent(25.1234, locale='en_US') | |
152 u'2,512%' | |
153 >>> format_percent(25.1234, locale='sv_SE') | |
154 u'2\\xa0512 %' | |
155 | |
128 | 156 The format pattern can also be specified explicitly: |
157 | |
158 >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US') | |
159 u'25,123\u2030' | |
160 | |
24 | 161 :param number: the percent number to format |
162 :param format: | |
163 :param locale: the `Locale` object or locale identifier | |
164 :return: the formatted percent number | |
165 :rtype: `unicode` | |
166 """ | |
167 locale = Locale.parse(locale) | |
127 | 168 if not format: |
169 format = locale.percent_formats.get(format) | |
170 pattern = parse_pattern(format) | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
171 return pattern.apply(number, locale) |
3 | 172 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
173 def format_scientific(number, locale=LC_NUMERIC): |
128 | 174 # TODO: implement |
3 | 175 raise NotImplementedError |
176 | |
34 | 177 |
178 class NumberFormatError(ValueError): | |
179 """Exception raised when a string cannot be parsed into a number.""" | |
180 | |
181 | |
3 | 182 def parse_number(string, locale=LC_NUMERIC): |
183 """Parse localized number string into a long integer. | |
184 | |
185 >>> parse_number('1,099', locale='en_US') | |
186 1099L | |
187 >>> parse_number('1.099', locale='de_DE') | |
188 1099L | |
189 | |
34 | 190 When the given string cannot be parsed, an exception is raised: |
191 | |
192 >>> parse_number('1.099,98', locale='de') | |
193 Traceback (most recent call last): | |
194 ... | |
195 NumberFormatError: '1.099,98' is not a valid number | |
196 | |
3 | 197 :param string: the string to parse |
198 :param locale: the `Locale` object or locale identifier | |
199 :return: the parsed number | |
200 :rtype: `long` | |
34 | 201 :raise `NumberFormatError`: if the string can not be converted to a number |
3 | 202 """ |
34 | 203 try: |
204 return long(string.replace(get_group_symbol(locale), '')) | |
205 except ValueError: | |
206 raise NumberFormatError('%r is not a valid number' % string) | |
3 | 207 |
208 def parse_decimal(string, locale=LC_NUMERIC): | |
209 """Parse localized decimal string into a float. | |
210 | |
211 >>> parse_decimal('1,099.98', locale='en_US') | |
212 1099.98 | |
34 | 213 >>> parse_decimal('1.099,98', locale='de') |
3 | 214 1099.98 |
215 | |
34 | 216 When the given string cannot be parsed, an exception is raised: |
217 | |
218 >>> parse_decimal('2,109,998', locale='de') | |
219 Traceback (most recent call last): | |
220 ... | |
221 NumberFormatError: '2,109,998' is not a valid decimal number | |
222 | |
3 | 223 :param string: the string to parse |
224 :param locale: the `Locale` object or locale identifier | |
225 :return: the parsed decimal number | |
226 :rtype: `float` | |
34 | 227 :raise `NumberFormatError`: if the string can not be converted to a |
228 decimal number | |
3 | 229 """ |
230 locale = Locale.parse(locale) | |
34 | 231 try: |
232 return float(string.replace(get_group_symbol(locale), '') | |
233 .replace(get_decimal_symbol(locale), '.')) | |
234 except ValueError: | |
235 raise NumberFormatError('%r is not a valid decimal number' % string) | |
11 | 236 |
237 | |
238 PREFIX_END = r'[^0-9@#.,]' | |
239 NUMBER_TOKEN = r'[0-9@#.\-,E]' | |
240 | |
241 PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END | |
242 NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN | |
243 SUFFIX_PATTERN = r"(?P<suffix>.*)" | |
244 | |
129 | 245 number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, |
11 | 246 SUFFIX_PATTERN)) |
247 | |
248 # TODO: | |
249 # Filling | |
52 | 250 # Rounding increment in pattern |
11 | 251 # Scientific notation |
252 # Significant Digits | |
253 def parse_pattern(pattern): | |
254 """Parse number format patterns""" | |
255 if isinstance(pattern, NumberPattern): | |
256 return pattern | |
257 | |
258 # Do we have a negative subpattern? | |
259 if ';' in pattern: | |
260 pattern, neg_pattern = pattern.split(';', 1) | |
261 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
262 neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() | |
263 else: | |
264 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
265 neg_prefix = '-' + pos_prefix | |
266 neg_suffix = pos_suffix | |
24 | 267 if '.' in number: |
268 integer, fraction = number.rsplit('.', 1) | |
269 else: | |
270 integer = number | |
271 fraction = '' | |
11 | 272 min_frac = max_frac = 0 |
273 | |
274 def parse_precision(p): | |
275 """Calculate the min and max allowed digits""" | |
276 min = max = 0 | |
277 for c in p: | |
278 if c == '0': | |
279 min += 1 | |
280 max += 1 | |
281 elif c == '#': | |
282 max += 1 | |
283 else: | |
284 break | |
285 return min, max | |
286 | |
287 def parse_grouping(p): | |
288 """Parse primary and secondary digit grouping | |
289 | |
290 >>> parse_grouping('##') | |
291 0, 0 | |
292 >>> parse_grouping('#,###') | |
293 3, 3 | |
294 >>> parse_grouping('#,####,###') | |
295 3, 4 | |
296 """ | |
297 width = len(p) | |
298 g1 = p.rfind(',') | |
299 if g1 == -1: | |
300 return 1000, 1000 | |
301 g1 = width - g1 - 1 | |
302 g2 = p[:-g1 - 1].rfind(',') | |
303 if g2 == -1: | |
304 return g1, g1 | |
305 g2 = width - g1 - g2 - 2 | |
306 return g1, g2 | |
307 | |
308 int_precision = parse_precision(integer) | |
309 frac_precision = parse_precision(fraction) | |
310 grouping = parse_grouping(integer) | |
311 int_precision = (int_precision[0], 1000) # Unlimited | |
312 return NumberPattern(pattern, (pos_prefix, neg_prefix), | |
313 (pos_suffix, neg_suffix), grouping, | |
314 int_precision, frac_precision) | |
315 | |
316 | |
317 class NumberPattern(object): | |
24 | 318 |
11 | 319 def __init__(self, pattern, prefix, suffix, grouping, |
320 int_precision, frac_precision): | |
321 self.pattern = pattern | |
322 self.prefix = prefix | |
323 self.suffix = suffix | |
324 self.grouping = grouping | |
325 self.int_precision = int_precision | |
326 self.frac_precision = frac_precision | |
52 | 327 self.format = '%%.%df' % self.frac_precision[1] |
24 | 328 if '%' in ''.join(self.prefix + self.suffix): |
329 self.scale = 100.0 | |
330 elif u'‰' in ''.join(self.prefix + self.suffix): | |
331 self.scale = 1000.0 | |
332 else: | |
333 self.scale = 1.0 | |
11 | 334 |
335 def __repr__(self): | |
336 return '<%s %r>' % (type(self).__name__, self.pattern) | |
337 | |
127 | 338 def apply(self, value, locale, currency=None): |
24 | 339 value *= self.scale |
11 | 340 negative = int(value < 0) |
52 | 341 a = self.format % value |
342 if self.frac_precision[1] > 0: | |
343 a, b = a.split('.') | |
344 else: | |
345 b = '' | |
11 | 346 a = a.lstrip('-') |
128 | 347 retval = u'%s%s%s%s' % (self.prefix[negative], |
348 self._format_int(a, locale), | |
349 self._format_frac(b, locale), | |
350 self.suffix[negative]) | |
127 | 351 if u'¤' in retval: |
352 retval = retval.replace(u'¤¤', currency.upper()) | |
353 retval = retval.replace(u'¤', get_currency_symbol(currency, locale)) | |
354 return retval | |
11 | 355 |
356 def _format_int(self, value, locale): | |
357 min, max = self.int_precision | |
358 width = len(value) | |
359 if width < min: | |
360 value += '0' * (min - width) | |
361 gsize = self.grouping[0] | |
362 ret = '' | |
363 symbol = get_group_symbol(locale) | |
364 while len(value) > gsize: | |
365 ret = symbol + value[-gsize:] + ret | |
366 value = value[:-gsize] | |
367 gsize = self.grouping[1] | |
368 return value + ret | |
369 | |
370 def _format_frac(self, value, locale): | |
371 min, max = self.frac_precision | |
24 | 372 if max == 0 or (min == 0 and int(value) == 0): |
11 | 373 return '' |
374 width = len(value) | |
52 | 375 while len(value) > min and value[-1] == '0': |
376 value = value[:-1] | |
11 | 377 return get_decimal_symbol(locale) + value |