Mercurial > babel > old > mirror
annotate 0.8.x/babel/core.py @ 159:fbc3f7b43577 stable
Ported [159] to 0.8.x branch.
author | cmlenz |
---|---|
date | Thu, 21 Jun 2007 11:43:36 +0000 |
parents | 90866b11734f |
children | 5649355a3a03 |
rev | line source |
---|---|
142 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Core locale representation and locale data access.""" | |
15 | |
16 import os | |
17 | |
18 from babel import localedata | |
19 | |
20 __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', | |
21 'parse_locale'] | |
22 __docformat__ = 'restructuredtext en' | |
23 | |
24 | |
25 class UnknownLocaleError(Exception): | |
26 """Exception thrown when a locale is requested for which no locale data | |
27 is available. | |
28 """ | |
29 | |
30 def __init__(self, identifier): | |
31 """Create the exception. | |
32 | |
33 :param identifier: the identifier string of the unsupported locale | |
34 """ | |
35 Exception.__init__(self, 'unknown locale %r' % identifier) | |
36 self.identifier = identifier | |
37 | |
38 | |
39 class Locale(object): | |
40 """Representation of a specific locale. | |
41 | |
42 >>> locale = Locale('en', territory='US') | |
43 >>> repr(locale) | |
44 '<Locale "en_US">' | |
45 >>> locale.display_name | |
46 u'English (United States)' | |
47 | |
48 A `Locale` object can also be instantiated from a raw locale string: | |
49 | |
50 >>> locale = Locale.parse('en-US', sep='-') | |
51 >>> repr(locale) | |
52 '<Locale "en_US">' | |
53 | |
54 `Locale` objects provide access to a collection of locale data, such as | |
55 territory and language names, number and date format patterns, and more: | |
56 | |
57 >>> locale.number_symbols['decimal'] | |
58 u'.' | |
59 | |
60 If a locale is requested for which no locale data is available, an | |
61 `UnknownLocaleError` is raised: | |
62 | |
63 >>> Locale.parse('en_DE') | |
64 Traceback (most recent call last): | |
65 ... | |
66 UnknownLocaleError: unknown locale 'en_DE' | |
67 | |
68 :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_ | |
69 """ | |
70 | |
71 def __init__(self, language, territory=None, variant=None): | |
72 """Initialize the locale object from the given identifier components. | |
73 | |
74 >>> locale = Locale('en', 'US') | |
75 >>> locale.language | |
76 'en' | |
77 >>> locale.territory | |
78 'US' | |
79 | |
80 :param language: the language code | |
81 :param territory: the territory (country or region) code | |
82 :param variant: the variant code | |
83 :raise `UnknownLocaleError`: if no locale data is available for the | |
84 requested locale | |
85 """ | |
86 self.language = language | |
87 self.territory = territory | |
88 self.variant = variant | |
89 self.__data = None | |
90 | |
91 identifier = str(self) | |
92 if not localedata.exists(identifier): | |
93 raise UnknownLocaleError(identifier) | |
94 | |
95 def default(cls, category=None): | |
96 """Return the system default locale for the specified category. | |
97 | |
98 >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: | |
99 ... os.environ[name] = '' | |
100 >>> os.environ['LANG'] = 'fr_FR.UTF-8' | |
101 >>> Locale.default('LC_MESSAGES') | |
102 <Locale "fr_FR"> | |
103 | |
104 :param category: one of the ``LC_XXX`` environment variable names | |
105 :return: the value of the variable, or any of the fallbacks | |
106 (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) | |
107 :rtype: `Locale` | |
108 """ | |
109 return cls(default_locale(category)) | |
110 default = classmethod(default) | |
111 | |
112 def negotiate(cls, preferred, available, sep='_'): | |
113 """Find the best match between available and requested locale strings. | |
159 | 114 |
142 | 115 >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) |
116 <Locale "de_DE"> | |
117 >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de']) | |
118 <Locale "de"> | |
119 >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) | |
159 | 120 |
121 You can specify the character used in the locale identifiers to separate | |
122 the differnet components. This separator is applied to both lists. Also, | |
123 case is ignored in the comparison: | |
124 | |
125 >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') | |
126 <Locale "de_DE"> | |
127 | |
142 | 128 :param preferred: the list of locale identifers preferred by the user |
129 :param available: the list of locale identifiers available | |
130 :return: the `Locale` object for the best match, or `None` if no match | |
131 was found | |
132 :rtype: `Locale` | |
133 """ | |
134 identifier = negotiate_locale(preferred, available, sep=sep) | |
135 if identifier: | |
159 | 136 return Locale.parse(identifier, sep=sep) |
142 | 137 negotiate = classmethod(negotiate) |
138 | |
139 def parse(cls, identifier, sep='_'): | |
140 """Create a `Locale` instance for the given locale identifier. | |
141 | |
142 >>> l = Locale.parse('de-DE', sep='-') | |
143 >>> l.display_name | |
144 u'Deutsch (Deutschland)' | |
145 | |
146 If the `identifier` parameter is not a string, but actually a `Locale` | |
147 object, that object is returned: | |
148 | |
149 >>> Locale.parse(l) | |
150 <Locale "de_DE"> | |
151 | |
152 :param identifier: the locale identifier string | |
153 :param sep: optional component separator | |
154 :return: a corresponding `Locale` instance | |
155 :rtype: `Locale` | |
156 :raise `ValueError`: if the string does not appear to be a valid locale | |
157 identifier | |
158 :raise `UnknownLocaleError`: if no locale data is available for the | |
159 requested locale | |
160 """ | |
161 if type(identifier) is cls: | |
162 return identifier | |
163 return cls(*parse_locale(identifier, sep=sep)) | |
164 parse = classmethod(parse) | |
165 | |
166 def __eq__(self, other): | |
167 return str(self) == str(other) | |
168 | |
169 def __repr__(self): | |
170 return '<Locale "%s">' % str(self) | |
171 | |
172 def __str__(self): | |
173 return '_'.join(filter(None, [self.language, self.territory, | |
174 self.variant])) | |
175 | |
176 def _data(self): | |
177 if self.__data is None: | |
178 self.__data = localedata.load(str(self)) | |
179 return self.__data | |
180 _data = property(_data) | |
181 | |
182 def display_name(self): | |
183 retval = self.languages.get(self.language) | |
184 if self.territory: | |
185 variant = '' | |
186 if self.variant: | |
187 variant = ', %s' % self.variants.get(self.variant) | |
188 retval += ' (%s%s)' % (self.territories.get(self.territory), | |
189 variant) | |
190 return retval | |
191 display_name = property(display_name, doc="""\ | |
192 The localized display name of the locale. | |
193 | |
194 >>> Locale('en').display_name | |
195 u'English' | |
196 >>> Locale('en', 'US').display_name | |
197 u'English (United States)' | |
198 >>> Locale('sv').display_name | |
199 u'svenska' | |
200 | |
201 :type: `unicode` | |
202 """) | |
203 | |
204 def english_name(self): | |
205 en = Locale('en') | |
206 retval = en.languages.get(self.language) | |
207 if self.territory: | |
208 variant = '' | |
209 if self.variant: | |
210 variant = ', %s' % en.variants.get(self.variant) | |
211 retval += ' (%s%s)' % (en.territories.get(self.territory), | |
212 variant) | |
213 return retval | |
214 english_name = property(english_name, doc="""\ | |
215 The english display name of the locale. | |
216 | |
217 >>> Locale('de').english_name | |
218 u'German' | |
219 >>> Locale('de', 'DE').english_name | |
220 u'German (Germany)' | |
221 | |
222 :type: `unicode` | |
223 """) | |
224 | |
225 #{ General Locale Display Names | |
226 | |
227 def languages(self): | |
228 return self._data['languages'] | |
229 languages = property(languages, doc="""\ | |
230 Mapping of language codes to translated language names. | |
231 | |
232 >>> Locale('de', 'DE').languages['ja'] | |
233 u'Japanisch' | |
234 | |
235 :type: `dict` | |
236 :see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ | |
237 """) | |
238 | |
239 def scripts(self): | |
240 return self._data['scripts'] | |
241 scripts = property(scripts, doc="""\ | |
242 Mapping of script codes to translated script names. | |
243 | |
244 >>> Locale('en', 'US').scripts['Hira'] | |
245 u'Hiragana' | |
246 | |
247 :type: `dict` | |
248 :see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_ | |
249 """) | |
250 | |
251 def territories(self): | |
252 return self._data['territories'] | |
253 territories = property(territories, doc="""\ | |
254 Mapping of script codes to translated script names. | |
255 | |
256 >>> Locale('es', 'CO').territories['DE'] | |
257 u'Alemania' | |
258 | |
259 :type: `dict` | |
260 :see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_ | |
261 """) | |
262 | |
263 def variants(self): | |
264 return self._data['variants'] | |
265 variants = property(variants, doc="""\ | |
266 Mapping of script codes to translated script names. | |
267 | |
268 >>> Locale('de', 'DE').variants['1901'] | |
269 u'alte deutsche Rechtschreibung' | |
270 | |
271 :type: `dict` | |
272 """) | |
273 | |
274 #{ Number Formatting | |
275 | |
276 def currencies(self): | |
277 return self._data['currency_names'] | |
278 currencies = property(currencies, doc="""\ | |
279 Mapping of currency codes to translated currency names. | |
280 | |
281 >>> Locale('en').currencies['COP'] | |
282 u'Colombian Peso' | |
283 >>> Locale('de', 'DE').currencies['COP'] | |
284 u'Kolumbianischer Peso' | |
285 | |
286 :type: `dict` | |
287 """) | |
288 | |
289 def currency_symbols(self): | |
290 return self._data['currency_symbols'] | |
291 currency_symbols = property(currency_symbols, doc="""\ | |
292 Mapping of currency codes to symbols. | |
293 | |
294 >>> Locale('en').currency_symbols['USD'] | |
295 u'US$' | |
296 >>> Locale('en', 'US').currency_symbols['USD'] | |
297 u'$' | |
298 | |
299 :type: `dict` | |
300 """) | |
301 | |
302 def number_symbols(self): | |
303 return self._data['number_symbols'] | |
304 number_symbols = property(number_symbols, doc="""\ | |
305 Symbols used in number formatting. | |
306 | |
307 >>> Locale('fr', 'FR').number_symbols['decimal'] | |
308 u',' | |
309 | |
310 :type: `dict` | |
311 """) | |
312 | |
313 def decimal_formats(self): | |
314 return self._data['decimal_formats'] | |
315 decimal_formats = property(decimal_formats, doc="""\ | |
316 Locale patterns for decimal number formatting. | |
317 | |
318 >>> Locale('en', 'US').decimal_formats[None] | |
319 <NumberPattern u'#,##0.###'> | |
320 | |
321 :type: `dict` | |
322 """) | |
323 | |
324 def currency_formats(self): | |
325 return self._data['currency_formats'] | |
326 currency_formats = property(currency_formats, doc=r"""\ | |
327 Locale patterns for currency number formatting. | |
328 | |
329 >>> print Locale('en', 'US').currency_formats[None] | |
330 <NumberPattern u'\xa4#,##0.00'> | |
331 | |
332 :type: `dict` | |
333 """) | |
334 | |
335 def percent_formats(self): | |
336 return self._data['percent_formats'] | |
337 percent_formats = property(percent_formats, doc="""\ | |
338 Locale patterns for percent number formatting. | |
339 | |
340 >>> Locale('en', 'US').percent_formats[None] | |
341 <NumberPattern u'#,##0%'> | |
342 | |
343 :type: `dict` | |
344 """) | |
345 | |
346 def scientific_formats(self): | |
347 return self._data['scientific_formats'] | |
348 scientific_formats = property(scientific_formats, doc="""\ | |
349 Locale patterns for scientific number formatting. | |
350 | |
351 >>> Locale('en', 'US').scientific_formats[None] | |
352 <NumberPattern u'#E0'> | |
353 | |
354 :type: `dict` | |
355 """) | |
356 | |
357 #{ Calendar Information and Date Formatting | |
358 | |
359 def periods(self): | |
360 return self._data['periods'] | |
361 periods = property(periods, doc="""\ | |
362 Locale display names for day periods (AM/PM). | |
363 | |
364 >>> Locale('en', 'US').periods['am'] | |
365 u'AM' | |
366 | |
367 :type: `dict` | |
368 """) | |
369 | |
370 def days(self): | |
371 return self._data['days'] | |
372 days = property(days, doc="""\ | |
373 Locale display names for weekdays. | |
374 | |
375 >>> Locale('de', 'DE').days['format']['wide'][3] | |
376 u'Donnerstag' | |
377 | |
378 :type: `dict` | |
379 """) | |
380 | |
381 def months(self): | |
382 return self._data['months'] | |
383 months = property(months, doc="""\ | |
384 Locale display names for months. | |
385 | |
386 >>> Locale('de', 'DE').months['format']['wide'][10] | |
387 u'Oktober' | |
388 | |
389 :type: `dict` | |
390 """) | |
391 | |
392 def quarters(self): | |
393 return self._data['quarters'] | |
394 quarters = property(quarters, doc="""\ | |
395 Locale display names for quarters. | |
396 | |
397 >>> Locale('de', 'DE').quarters['format']['wide'][1] | |
398 u'1. Quartal' | |
399 | |
400 :type: `dict` | |
401 """) | |
402 | |
403 def eras(self): | |
404 return self._data['eras'] | |
405 eras = property(eras, doc="""\ | |
406 Locale display names for eras. | |
407 | |
408 >>> Locale('en', 'US').eras['wide'][1] | |
409 u'Anno Domini' | |
410 >>> Locale('en', 'US').eras['abbreviated'][0] | |
411 u'BC' | |
412 | |
413 :type: `dict` | |
414 """) | |
415 | |
416 def time_zones(self): | |
417 return self._data['time_zones'] | |
418 time_zones = property(time_zones, doc="""\ | |
419 Locale display names for time zones. | |
420 | |
421 >>> Locale('en', 'US').time_zones['America/Los_Angeles']['long']['standard'] | |
422 u'Pacific Standard Time' | |
423 >>> Locale('en', 'US').time_zones['Europe/Dublin']['city'] | |
424 u'Dublin' | |
425 | |
426 :type: `dict` | |
427 """) | |
428 | |
429 def zone_aliases(self): | |
430 return self._data['zone_aliases'] | |
431 zone_aliases = property(zone_aliases, doc="""\ | |
432 Mapping of time zone aliases to their respective canonical identifer. | |
433 | |
434 >>> Locale('en').zone_aliases['UTC'] | |
435 'Etc/GMT' | |
436 | |
437 :type: `dict` | |
438 :note: this doesn't really belong here, as it does not change between | |
439 locales | |
440 """) | |
441 | |
442 def first_week_day(self): | |
443 return self._data['week_data']['first_day'] | |
444 first_week_day = property(first_week_day, doc="""\ | |
445 The first day of a week. | |
446 | |
447 >>> Locale('de', 'DE').first_week_day | |
448 0 | |
449 >>> Locale('en', 'US').first_week_day | |
450 6 | |
451 | |
452 :type: `int` | |
453 """) | |
454 | |
455 def weekend_start(self): | |
456 return self._data['week_data']['weekend_start'] | |
457 weekend_start = property(weekend_start, doc="""\ | |
458 The day the weekend starts. | |
459 | |
460 >>> Locale('de', 'DE').weekend_start | |
461 5 | |
462 | |
463 :type: `int` | |
464 """) | |
465 | |
466 def weekend_end(self): | |
467 return self._data['week_data']['weekend_end'] | |
468 weekend_end = property(weekend_end, doc="""\ | |
469 The day the weekend ends. | |
470 | |
471 >>> Locale('de', 'DE').weekend_end | |
472 6 | |
473 | |
474 :type: `int` | |
475 """) | |
476 | |
477 def min_week_days(self): | |
478 return self._data['week_data']['min_days'] | |
479 min_week_days = property(min_week_days, doc="""\ | |
480 The minimum number of days in a week so that the week is counted as the | |
481 first week of a year or month. | |
482 | |
483 >>> Locale('de', 'DE').min_week_days | |
484 4 | |
485 | |
486 :type: `int` | |
487 """) | |
488 | |
489 def date_formats(self): | |
490 return self._data['date_formats'] | |
491 date_formats = property(date_formats, doc="""\ | |
492 Locale patterns for date formatting. | |
493 | |
494 >>> Locale('en', 'US').date_formats['short'] | |
495 <DateTimePattern u'M/d/yy'> | |
496 >>> Locale('fr', 'FR').date_formats['long'] | |
497 <DateTimePattern u'd MMMM yyyy'> | |
498 | |
499 :type: `dict` | |
500 """) | |
501 | |
502 def time_formats(self): | |
503 return self._data['time_formats'] | |
504 time_formats = property(time_formats, doc="""\ | |
505 Locale patterns for time formatting. | |
506 | |
507 >>> Locale('en', 'US').time_formats['short'] | |
508 <DateTimePattern u'h:mm a'> | |
509 >>> Locale('fr', 'FR').time_formats['long'] | |
510 <DateTimePattern u'HH:mm:ss z'> | |
511 | |
512 :type: `dict` | |
513 """) | |
514 | |
515 def datetime_formats(self): | |
516 return self._data['datetime_formats'] | |
517 datetime_formats = property(datetime_formats, doc="""\ | |
518 Locale patterns for datetime formatting. | |
519 | |
520 >>> Locale('en').datetime_formats[None] | |
521 u'{1} {0}' | |
522 >>> Locale('th').datetime_formats[None] | |
523 u'{1}, {0}' | |
524 | |
525 :type: `dict` | |
526 """) | |
527 | |
528 | |
529 def default_locale(category=None): | |
530 """Returns the system default locale for a given category, based on | |
531 environment variables. | |
532 | |
533 >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: | |
534 ... os.environ[name] = '' | |
535 >>> os.environ['LANG'] = 'fr_FR.UTF-8' | |
536 >>> default_locale('LC_MESSAGES') | |
537 'fr_FR' | |
538 | |
539 :param category: one of the ``LC_XXX`` environment variable names | |
540 :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, | |
541 ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) | |
542 | |
543 :rtype: `str` | |
544 """ | |
545 varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') | |
546 for name in filter(None, varnames): | |
547 locale = os.getenv(name) | |
548 if locale: | |
153
90866b11734f
Merged [141:143] and [145:153] via svnmerge from [source:/trunk].
cmlenz
parents:
142
diff
changeset
|
549 if name == 'LANGUAGE' and ':' in locale: |
90866b11734f
Merged [141:143] and [145:153] via svnmerge from [source:/trunk].
cmlenz
parents:
142
diff
changeset
|
550 # the LANGUAGE variable may contain a colon-separated list of |
90866b11734f
Merged [141:143] and [145:153] via svnmerge from [source:/trunk].
cmlenz
parents:
142
diff
changeset
|
551 # language codes; we just pick the language on the list |
90866b11734f
Merged [141:143] and [145:153] via svnmerge from [source:/trunk].
cmlenz
parents:
142
diff
changeset
|
552 locale = locale.split(':')[0] |
142 | 553 return '_'.join(filter(None, parse_locale(locale))) |
554 | |
555 def negotiate_locale(preferred, available, sep='_'): | |
556 """Find the best match between available and requested locale strings. | |
557 | |
558 >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) | |
559 'de_DE' | |
560 >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de']) | |
561 'de' | |
562 | |
159 | 563 Case is ignored by the algorithm, the result uses the case of the preferred |
564 locale identifier: | |
565 | |
566 >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) | |
567 'de_DE' | |
568 | |
142 | 569 :param preferred: the list of locale strings preferred by the user |
570 :param available: the list of locale strings available | |
571 :param sep: character that separates the different parts of the locale | |
572 strings | |
573 :return: the locale identifier for the best match, or `None` if no match | |
574 was found | |
575 :rtype: `str` | |
576 """ | |
159 | 577 available = [a.lower() for a in available if a] |
142 | 578 for locale in preferred: |
159 | 579 if locale.lower() in available: |
142 | 580 return locale |
581 parts = locale.split(sep) | |
159 | 582 if len(parts) > 1 and parts[0].lower() in available: |
142 | 583 return parts[0] |
584 return None | |
585 | |
586 def parse_locale(identifier, sep='_'): | |
587 """Parse a locale identifier into a ``(language, territory, variant)`` | |
588 tuple. | |
589 | |
590 >>> parse_locale('zh_CN') | |
591 ('zh', 'CN', None) | |
592 | |
593 The default component separator is "_", but a different separator can be | |
594 specified using the `sep` parameter: | |
595 | |
596 >>> parse_locale('zh-CN', sep='-') | |
597 ('zh', 'CN', None) | |
598 | |
599 :param identifier: the locale identifier string | |
600 :param sep: character that separates the different parts of the locale | |
601 string | |
602 :return: the ``(language, territory, variant)`` tuple | |
603 :rtype: `tuple` | |
604 :raise `ValueError`: if the string does not appear to be a valid locale | |
605 identifier | |
606 | |
607 :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_ | |
608 """ | |
609 if '.' in identifier: | |
610 # this is probably the charset/encoding, which we don't care about | |
611 identifier = identifier.split('.', 1)[0] | |
612 parts = identifier.split(sep) | |
613 lang, territory, variant = parts[0].lower(), None, None | |
614 if not lang.isalpha(): | |
615 raise ValueError('expected only letters, got %r' % lang) | |
616 if len(parts) > 1: | |
617 territory = parts[1].upper().split('.', 1)[0] | |
618 if not territory.isalpha(): | |
619 raise ValueError('expected only letters, got %r' % territory) | |
620 if len(parts) > 2: | |
621 variant = parts[2].upper().split('.', 1)[0] | |
622 return lang, territory, variant |