Mercurial > babel > mirror
annotate babel/numbers.py @ 72:e0bb7dce49ea trunk
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
author | cmlenz |
---|---|
date | Fri, 08 Jun 2007 19:10:51 +0000 |
parents | 0896af2c49ec |
children | 0f641136aa6b |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Locale dependent formatting and parsing of numeric data. | |
15 | |
16 The default locale for the functions in this module is determined by the | |
17 following environment variables, in that order: | |
18 | |
19 * ``LC_NUMERIC``, | |
20 * ``LC_ALL``, and | |
21 * ``LANG`` | |
22 """ | |
23 # TODO: percent and scientific formatting | |
24 | |
25 import re | |
26 | |
72
e0bb7dce49ea
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
50
diff
changeset
|
27 from babel.core import default_locale, Locale |
1 | 28 |
29 __all__ = ['format_number', 'format_decimal', 'format_currency', | |
30 'format_percent', 'format_scientific', 'parse_number', | |
32 | 31 'parse_decimal', 'NumberFormatError'] |
1 | 32 __docformat__ = 'restructuredtext en' |
33 | |
72
e0bb7dce49ea
More explicit module-level function names in `babel.core`. Added `Locale.negotiate` class method.
cmlenz
parents:
50
diff
changeset
|
34 LC_NUMERIC = default_locale('LC_NUMERIC') |
1 | 35 |
36 def get_decimal_symbol(locale=LC_NUMERIC): | |
37 """Return the symbol used by the locale to separate decimal fractions. | |
38 | |
39 >>> get_decimal_symbol('en_US') | |
40 u'.' | |
41 | |
42 :param locale: the `Locale` object or locale identifier | |
43 :return: the decimal symbol | |
44 :rtype: `unicode` | |
45 """ | |
46 return Locale.parse(locale).number_symbols.get('decimal', u'.') | |
47 | |
48 def get_group_symbol(locale=LC_NUMERIC): | |
49 """Return the symbol used by the locale to separate groups of thousands. | |
50 | |
51 >>> get_group_symbol('en_US') | |
52 u',' | |
53 | |
54 :param locale: the `Locale` object or locale identifier | |
55 :return: the group symbol | |
56 :rtype: `unicode` | |
57 """ | |
9 | 58 return Locale.parse(locale).number_symbols.get('group', u',') |
1 | 59 |
60 def format_number(number, locale=LC_NUMERIC): | |
61 """Returns the given number formatted for a specific locale. | |
62 | |
63 >>> format_number(1099, locale='en_US') | |
64 u'1,099' | |
65 | |
66 :param number: the number to format | |
67 :param locale: the `Locale` object or locale identifier | |
68 :return: the formatted number | |
69 :rtype: `unicode` | |
70 """ | |
9 | 71 # Do we really need this one? |
72 return format_decimal(number, locale=locale) | |
1 | 73 |
9 | 74 def format_decimal(number, format=None, locale=LC_NUMERIC): |
1 | 75 """Returns the given decimal number formatted for a specific locale. |
76 | |
9 | 77 >>> format_decimal(1.2345, locale='en_US') |
78 u'1.234' | |
50 | 79 >>> format_decimal(1.2346, locale='en_US') |
80 u'1.235' | |
81 >>> format_decimal(-1.2346, locale='en_US') | |
82 u'-1.235' | |
9 | 83 >>> format_decimal(1.2345, locale='sv_SE') |
84 u'1,234' | |
50 | 85 >>> format_decimal(12345, locale='de') |
9 | 86 u'12.345' |
87 | |
1 | 88 The appropriate thousands grouping and the decimal separator are used for |
89 each locale: | |
90 | |
9 | 91 >>> format_decimal(12345, locale='en_US') |
92 u'12,345' | |
93 | |
1 | 94 :param number: the number to format |
9 | 95 :param format: |
1 | 96 :param locale: the `Locale` object or locale identifier |
97 :return: the formatted decimal number | |
98 :rtype: `unicode` | |
99 """ | |
100 locale = Locale.parse(locale) | |
9 | 101 pattern = locale.decimal_formats.get(format) |
102 if not pattern: | |
103 pattern = parse_pattern(format) | |
104 return pattern.apply(number, locale) | |
1 | 105 |
32 | 106 def format_currency(number, currency, locale=LC_NUMERIC): |
1 | 107 """Returns formatted currency value. |
108 | |
32 | 109 >>> format_currency(1099.98, 'USD', locale='en_US') |
1 | 110 u'1,099.98' |
111 | |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
112 :param number: the number to format |
32 | 113 :param currency: the currency code |
1 | 114 :param locale: the `Locale` object or locale identifier |
115 :return: the formatted currency value | |
116 :rtype: `unicode` | |
117 """ | |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
118 return format_decimal(number, locale=locale) |
1 | 119 |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
120 def format_percent(number, format=None, locale=LC_NUMERIC): |
22 | 121 """Returns formatted percent value for a specific locale. |
122 | |
123 >>> format_percent(0.34, locale='en_US') | |
124 u'34%' | |
125 >>> format_percent(25.1234, locale='en_US') | |
126 u'2,512%' | |
127 >>> format_percent(25.1234, locale='sv_SE') | |
128 u'2\\xa0512 %' | |
129 | |
130 :param number: the percent number to format | |
131 :param format: | |
132 :param locale: the `Locale` object or locale identifier | |
133 :return: the formatted percent number | |
134 :rtype: `unicode` | |
135 """ | |
136 locale = Locale.parse(locale) | |
137 pattern = locale.percent_formats.get(format) | |
138 if not pattern: | |
139 pattern = parse_pattern(format) | |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
140 return pattern.apply(number, locale) |
1 | 141 |
26
6041782ea677
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
22
diff
changeset
|
142 def format_scientific(number, locale=LC_NUMERIC): |
1 | 143 raise NotImplementedError |
144 | |
32 | 145 |
146 | |
147 class NumberFormatError(ValueError): | |
148 """Exception raised when a string cannot be parsed into a number.""" | |
149 | |
150 | |
1 | 151 def parse_number(string, locale=LC_NUMERIC): |
152 """Parse localized number string into a long integer. | |
153 | |
154 >>> parse_number('1,099', locale='en_US') | |
155 1099L | |
156 >>> parse_number('1.099', locale='de_DE') | |
157 1099L | |
158 | |
32 | 159 When the given string cannot be parsed, an exception is raised: |
160 | |
161 >>> parse_number('1.099,98', locale='de') | |
162 Traceback (most recent call last): | |
163 ... | |
164 NumberFormatError: '1.099,98' is not a valid number | |
165 | |
1 | 166 :param string: the string to parse |
167 :param locale: the `Locale` object or locale identifier | |
168 :return: the parsed number | |
169 :rtype: `long` | |
32 | 170 :raise `NumberFormatError`: if the string can not be converted to a number |
1 | 171 """ |
32 | 172 try: |
173 return long(string.replace(get_group_symbol(locale), '')) | |
174 except ValueError: | |
175 raise NumberFormatError('%r is not a valid number' % string) | |
1 | 176 |
177 def parse_decimal(string, locale=LC_NUMERIC): | |
178 """Parse localized decimal string into a float. | |
179 | |
180 >>> parse_decimal('1,099.98', locale='en_US') | |
181 1099.98 | |
32 | 182 >>> parse_decimal('1.099,98', locale='de') |
1 | 183 1099.98 |
184 | |
32 | 185 When the given string cannot be parsed, an exception is raised: |
186 | |
187 >>> parse_decimal('2,109,998', locale='de') | |
188 Traceback (most recent call last): | |
189 ... | |
190 NumberFormatError: '2,109,998' is not a valid decimal number | |
191 | |
1 | 192 :param string: the string to parse |
193 :param locale: the `Locale` object or locale identifier | |
194 :return: the parsed decimal number | |
195 :rtype: `float` | |
32 | 196 :raise `NumberFormatError`: if the string can not be converted to a |
197 decimal number | |
1 | 198 """ |
199 locale = Locale.parse(locale) | |
32 | 200 try: |
201 return float(string.replace(get_group_symbol(locale), '') | |
202 .replace(get_decimal_symbol(locale), '.')) | |
203 except ValueError: | |
204 raise NumberFormatError('%r is not a valid decimal number' % string) | |
9 | 205 |
206 | |
207 PREFIX_END = r'[^0-9@#.,]' | |
208 NUMBER_TOKEN = r'[0-9@#.\-,E]' | |
209 | |
210 PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END | |
211 NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN | |
212 SUFFIX_PATTERN = r"(?P<suffix>.*)" | |
213 | |
214 number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, | |
215 SUFFIX_PATTERN)) | |
216 | |
217 # TODO: | |
218 # Filling | |
50 | 219 # Rounding increment in pattern |
9 | 220 # Scientific notation |
221 # Significant Digits | |
222 def parse_pattern(pattern): | |
223 """Parse number format patterns""" | |
224 if isinstance(pattern, NumberPattern): | |
225 return pattern | |
226 | |
227 # Do we have a negative subpattern? | |
228 if ';' in pattern: | |
229 pattern, neg_pattern = pattern.split(';', 1) | |
230 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
231 neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() | |
232 else: | |
233 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
234 neg_prefix = '-' + pos_prefix | |
235 neg_suffix = pos_suffix | |
22 | 236 if '.' in number: |
237 integer, fraction = number.rsplit('.', 1) | |
238 else: | |
239 integer = number | |
240 fraction = '' | |
9 | 241 min_frac = max_frac = 0 |
242 | |
243 def parse_precision(p): | |
244 """Calculate the min and max allowed digits""" | |
245 min = max = 0 | |
246 for c in p: | |
247 if c == '0': | |
248 min += 1 | |
249 max += 1 | |
250 elif c == '#': | |
251 max += 1 | |
252 else: | |
253 break | |
254 return min, max | |
255 | |
256 def parse_grouping(p): | |
257 """Parse primary and secondary digit grouping | |
258 | |
259 >>> parse_grouping('##') | |
260 0, 0 | |
261 >>> parse_grouping('#,###') | |
262 3, 3 | |
263 >>> parse_grouping('#,####,###') | |
264 3, 4 | |
265 """ | |
266 width = len(p) | |
267 g1 = p.rfind(',') | |
268 if g1 == -1: | |
269 return 1000, 1000 | |
270 g1 = width - g1 - 1 | |
271 g2 = p[:-g1 - 1].rfind(',') | |
272 if g2 == -1: | |
273 return g1, g1 | |
274 g2 = width - g1 - g2 - 2 | |
275 return g1, g2 | |
276 | |
277 int_precision = parse_precision(integer) | |
278 frac_precision = parse_precision(fraction) | |
279 grouping = parse_grouping(integer) | |
280 int_precision = (int_precision[0], 1000) # Unlimited | |
281 return NumberPattern(pattern, (pos_prefix, neg_prefix), | |
282 (pos_suffix, neg_suffix), grouping, | |
283 int_precision, frac_precision) | |
284 | |
285 | |
286 class NumberPattern(object): | |
22 | 287 |
9 | 288 def __init__(self, pattern, prefix, suffix, grouping, |
289 int_precision, frac_precision): | |
290 self.pattern = pattern | |
291 self.prefix = prefix | |
292 self.suffix = suffix | |
293 self.grouping = grouping | |
294 self.int_precision = int_precision | |
295 self.frac_precision = frac_precision | |
50 | 296 self.format = '%%.%df' % self.frac_precision[1] |
22 | 297 if '%' in ''.join(self.prefix + self.suffix): |
298 self.scale = 100.0 | |
299 elif u'‰' in ''.join(self.prefix + self.suffix): | |
300 self.scale = 1000.0 | |
301 else: | |
302 self.scale = 1.0 | |
9 | 303 |
304 def __repr__(self): | |
305 return '<%s %r>' % (type(self).__name__, self.pattern) | |
306 | |
307 def apply(self, value, locale): | |
22 | 308 value *= self.scale |
9 | 309 negative = int(value < 0) |
50 | 310 a = self.format % value |
311 if self.frac_precision[1] > 0: | |
312 a, b = a.split('.') | |
313 else: | |
314 b = '' | |
9 | 315 a = a.lstrip('-') |
316 return '%s%s%s%s' % (self.prefix[negative], | |
317 self._format_int(a, locale), | |
318 self._format_frac(b, locale), | |
319 self.suffix[negative]) | |
320 | |
321 def _format_int(self, value, locale): | |
322 min, max = self.int_precision | |
323 width = len(value) | |
324 if width < min: | |
325 value += '0' * (min - width) | |
326 gsize = self.grouping[0] | |
327 ret = '' | |
328 symbol = get_group_symbol(locale) | |
329 while len(value) > gsize: | |
330 ret = symbol + value[-gsize:] + ret | |
331 value = value[:-gsize] | |
332 gsize = self.grouping[1] | |
333 return value + ret | |
334 | |
335 def _format_frac(self, value, locale): | |
336 min, max = self.frac_precision | |
22 | 337 if max == 0 or (min == 0 and int(value) == 0): |
9 | 338 return '' |
339 width = len(value) | |
50 | 340 while len(value) > min and value[-1] == '0': |
341 value = value[:-1] | |
9 | 342 return get_decimal_symbol(locale) + value |