Mercurial > babel > old > mirror
annotate babel/numbers.py @ 28:695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
* Move locale data loading from `babel.core` into a separate `babel.localedata` module.
* Add curency names and symbols to locale data.
author | cmlenz |
---|---|
date | Sun, 03 Jun 2007 15:27:27 +0000 |
parents | 6c2c9fc7d787 |
children | 8b6804eac9e5 |
rev | line source |
---|---|
3 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Locale dependent formatting and parsing of numeric data. | |
15 | |
16 The default locale for the functions in this module is determined by the | |
17 following environment variables, in that order: | |
18 | |
19 * ``LC_NUMERIC``, | |
20 * ``LC_ALL``, and | |
21 * ``LANG`` | |
22 """ | |
23 # TODO: percent and scientific formatting | |
24 | |
25 import re | |
26 | |
27 from babel.core import Locale | |
28 from babel.util import default_locale | |
29 | |
30 __all__ = ['format_number', 'format_decimal', 'format_currency', | |
31 'format_percent', 'format_scientific', 'parse_number', | |
32 'parse_decimal'] | |
33 __docformat__ = 'restructuredtext en' | |
34 | |
35 LC_NUMERIC = default_locale('LC_NUMERIC') | |
36 | |
37 def get_decimal_symbol(locale=LC_NUMERIC): | |
38 """Return the symbol used by the locale to separate decimal fractions. | |
39 | |
40 >>> get_decimal_symbol('en_US') | |
41 u'.' | |
42 | |
43 :param locale: the `Locale` object or locale identifier | |
44 :return: the decimal symbol | |
45 :rtype: `unicode` | |
46 """ | |
47 return Locale.parse(locale).number_symbols.get('decimal', u'.') | |
48 | |
49 def get_group_symbol(locale=LC_NUMERIC): | |
50 """Return the symbol used by the locale to separate groups of thousands. | |
51 | |
52 >>> get_group_symbol('en_US') | |
53 u',' | |
54 | |
55 :param locale: the `Locale` object or locale identifier | |
56 :return: the group symbol | |
57 :rtype: `unicode` | |
58 """ | |
11 | 59 return Locale.parse(locale).number_symbols.get('group', u',') |
3 | 60 |
61 def format_number(number, locale=LC_NUMERIC): | |
62 """Returns the given number formatted for a specific locale. | |
63 | |
64 >>> format_number(1099, locale='en_US') | |
65 u'1,099' | |
66 | |
67 :param number: the number to format | |
68 :param locale: the `Locale` object or locale identifier | |
69 :return: the formatted number | |
70 :rtype: `unicode` | |
71 """ | |
11 | 72 # Do we really need this one? |
73 return format_decimal(number, locale=locale) | |
3 | 74 |
11 | 75 def format_decimal(number, format=None, locale=LC_NUMERIC): |
3 | 76 """Returns the given decimal number formatted for a specific locale. |
77 | |
11 | 78 >>> format_decimal(1, locale='en_US') |
79 u'1' | |
80 >>> format_decimal(1.2345, locale='en_US') | |
81 u'1.234' | |
82 >>> format_decimal(1.2345, locale='sv_SE') | |
83 u'1,234' | |
84 >>> format_decimal(12345, locale='de_DE') | |
85 u'12.345' | |
86 >>> format_decimal(-1.2345, format='#,##0.##;-#', locale='sv_SE') | |
87 u'-1,23' | |
88 >>> format_decimal(-1.2345, format='#,##0.##;(#)', locale='sv_SE') | |
89 u'(1,23)' | |
90 | |
3 | 91 The appropriate thousands grouping and the decimal separator are used for |
92 each locale: | |
93 | |
11 | 94 >>> format_decimal(12345, locale='en_US') |
95 u'12,345' | |
96 | |
3 | 97 :param number: the number to format |
11 | 98 :param format: |
3 | 99 :param locale: the `Locale` object or locale identifier |
100 :return: the formatted decimal number | |
101 :rtype: `unicode` | |
102 """ | |
103 locale = Locale.parse(locale) | |
11 | 104 pattern = locale.decimal_formats.get(format) |
105 if not pattern: | |
106 pattern = parse_pattern(format) | |
107 return pattern.apply(number, locale) | |
3 | 108 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
109 def format_currency(number, locale=LC_NUMERIC): |
3 | 110 """Returns formatted currency value. |
111 | |
112 >>> format_currency(1099.98, locale='en_US') | |
113 u'1,099.98' | |
114 | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
115 :param number: the number to format |
3 | 116 :param locale: the `Locale` object or locale identifier |
117 :return: the formatted currency value | |
118 :rtype: `unicode` | |
119 """ | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
120 return format_decimal(number, locale=locale) |
3 | 121 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
122 def format_percent(number, format=None, locale=LC_NUMERIC): |
24 | 123 """Returns formatted percent value for a specific locale. |
124 | |
125 >>> format_percent(0.34, locale='en_US') | |
126 u'34%' | |
127 >>> format_percent(25.1234, locale='en_US') | |
128 u'2,512%' | |
129 >>> format_percent(25.1234, locale='sv_SE') | |
130 u'2\\xa0512 %' | |
131 | |
132 :param number: the percent number to format | |
133 :param format: | |
134 :param locale: the `Locale` object or locale identifier | |
135 :return: the formatted percent number | |
136 :rtype: `unicode` | |
137 """ | |
138 locale = Locale.parse(locale) | |
139 pattern = locale.percent_formats.get(format) | |
140 if not pattern: | |
141 pattern = parse_pattern(format) | |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
142 return pattern.apply(number, locale) |
3 | 143 |
28
695884591af6
* Reduce size of locale data pickles by only storing the data provided by each locale itself, and merging inherited data at runtime.
cmlenz
parents:
24
diff
changeset
|
144 def format_scientific(number, locale=LC_NUMERIC): |
3 | 145 raise NotImplementedError |
146 | |
147 def parse_number(string, locale=LC_NUMERIC): | |
148 """Parse localized number string into a long integer. | |
149 | |
150 >>> parse_number('1,099', locale='en_US') | |
151 1099L | |
152 >>> parse_number('1.099', locale='de_DE') | |
153 1099L | |
154 | |
155 :param string: the string to parse | |
156 :param locale: the `Locale` object or locale identifier | |
157 :return: the parsed number | |
158 :rtype: `long` | |
159 :raise `ValueError`: if the string can not be converted to a number | |
160 """ | |
161 return long(string.replace(get_group_symbol(locale), '')) | |
162 | |
163 def parse_decimal(string, locale=LC_NUMERIC): | |
164 """Parse localized decimal string into a float. | |
165 | |
166 >>> parse_decimal('1,099.98', locale='en_US') | |
167 1099.98 | |
168 >>> parse_decimal('1.099,98', locale='de_DE') | |
169 1099.98 | |
170 | |
171 :param string: the string to parse | |
172 :param locale: the `Locale` object or locale identifier | |
173 :return: the parsed decimal number | |
174 :rtype: `float` | |
175 :raise `ValueError`: if the string can not be converted to a decimal number | |
176 """ | |
177 locale = Locale.parse(locale) | |
178 string = string.replace(get_group_symbol(locale), '') \ | |
179 .replace(get_decimal_symbol(locale), '.') | |
180 return float(string) | |
11 | 181 |
182 | |
183 PREFIX_END = r'[^0-9@#.,]' | |
184 NUMBER_TOKEN = r'[0-9@#.\-,E]' | |
185 | |
186 PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END | |
187 NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN | |
188 SUFFIX_PATTERN = r"(?P<suffix>.*)" | |
189 | |
190 number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN, | |
191 SUFFIX_PATTERN)) | |
192 | |
193 # TODO: | |
194 # Filling | |
195 # Rounding | |
196 # Scientific notation | |
197 # Significant Digits | |
198 def parse_pattern(pattern): | |
199 """Parse number format patterns""" | |
200 if isinstance(pattern, NumberPattern): | |
201 return pattern | |
202 | |
203 # Do we have a negative subpattern? | |
204 if ';' in pattern: | |
205 pattern, neg_pattern = pattern.split(';', 1) | |
206 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
207 neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups() | |
208 else: | |
209 pos_prefix, number, pos_suffix = number_re.search(pattern).groups() | |
210 neg_prefix = '-' + pos_prefix | |
211 neg_suffix = pos_suffix | |
24 | 212 if '.' in number: |
213 integer, fraction = number.rsplit('.', 1) | |
214 else: | |
215 integer = number | |
216 fraction = '' | |
11 | 217 min_frac = max_frac = 0 |
218 | |
219 def parse_precision(p): | |
220 """Calculate the min and max allowed digits""" | |
221 min = max = 0 | |
222 for c in p: | |
223 if c == '0': | |
224 min += 1 | |
225 max += 1 | |
226 elif c == '#': | |
227 max += 1 | |
228 else: | |
229 break | |
230 return min, max | |
231 | |
232 def parse_grouping(p): | |
233 """Parse primary and secondary digit grouping | |
234 | |
235 >>> parse_grouping('##') | |
236 0, 0 | |
237 >>> parse_grouping('#,###') | |
238 3, 3 | |
239 >>> parse_grouping('#,####,###') | |
240 3, 4 | |
241 """ | |
242 width = len(p) | |
243 g1 = p.rfind(',') | |
244 if g1 == -1: | |
245 return 1000, 1000 | |
246 g1 = width - g1 - 1 | |
247 g2 = p[:-g1 - 1].rfind(',') | |
248 if g2 == -1: | |
249 return g1, g1 | |
250 g2 = width - g1 - g2 - 2 | |
251 return g1, g2 | |
252 | |
253 int_precision = parse_precision(integer) | |
254 frac_precision = parse_precision(fraction) | |
255 grouping = parse_grouping(integer) | |
256 int_precision = (int_precision[0], 1000) # Unlimited | |
257 return NumberPattern(pattern, (pos_prefix, neg_prefix), | |
258 (pos_suffix, neg_suffix), grouping, | |
259 int_precision, frac_precision) | |
260 | |
261 | |
262 class NumberPattern(object): | |
24 | 263 |
11 | 264 def __init__(self, pattern, prefix, suffix, grouping, |
265 int_precision, frac_precision): | |
266 self.pattern = pattern | |
267 self.prefix = prefix | |
268 self.suffix = suffix | |
269 self.grouping = grouping | |
270 self.int_precision = int_precision | |
271 self.frac_precision = frac_precision | |
24 | 272 if '%' in ''.join(self.prefix + self.suffix): |
273 self.scale = 100.0 | |
274 elif u'‰' in ''.join(self.prefix + self.suffix): | |
275 self.scale = 1000.0 | |
276 else: | |
277 self.scale = 1.0 | |
11 | 278 |
279 def __repr__(self): | |
280 return '<%s %r>' % (type(self).__name__, self.pattern) | |
281 | |
282 def apply(self, value, locale): | |
24 | 283 value *= self.scale |
11 | 284 negative = int(value < 0) |
24 | 285 a, b = str(value).split('.') |
11 | 286 a = a.lstrip('-') |
287 return '%s%s%s%s' % (self.prefix[negative], | |
288 self._format_int(a, locale), | |
289 self._format_frac(b, locale), | |
290 self.suffix[negative]) | |
291 | |
292 def _format_int(self, value, locale): | |
293 min, max = self.int_precision | |
294 width = len(value) | |
295 if width < min: | |
296 value += '0' * (min - width) | |
297 gsize = self.grouping[0] | |
298 ret = '' | |
299 symbol = get_group_symbol(locale) | |
300 while len(value) > gsize: | |
301 ret = symbol + value[-gsize:] + ret | |
302 value = value[:-gsize] | |
303 gsize = self.grouping[1] | |
304 return value + ret | |
305 | |
306 def _format_frac(self, value, locale): | |
307 min, max = self.frac_precision | |
24 | 308 if max == 0 or (min == 0 and int(value) == 0): |
11 | 309 return '' |
310 width = len(value) | |
311 if width < min: | |
312 value += '0' * (min - width) | |
313 if width > max: | |
314 value = value[:max] # FIXME: Rounding?!? | |
315 return get_decimal_symbol(locale) + value |