Mercurial > babel > old > mirror
annotate 0.9.x/babel/core.py @ 544:1e1bcb890af1 stable
merge r593 from trunk
author | fschwarz |
---|---|
date | Wed, 16 Mar 2011 14:32:34 +0000 |
parents | 621e636b988e |
children |
rev | line source |
---|---|
263 | 1 # -*- coding: utf-8 -*- |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Core locale representation and locale data access.""" | |
15 | |
16 import os | |
17 import pickle | |
18 | |
19 from babel import localedata | |
20 | |
21 __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', | |
22 'parse_locale'] | |
23 __docformat__ = 'restructuredtext en' | |
24 | |
25 _global_data = None | |
26 | |
27 def get_global(key): | |
28 """Return the dictionary for the given key in the global data. | |
29 | |
30 The global data is stored in the ``babel/global.dat`` file and contains | |
31 information independent of individual locales. | |
32 | |
33 >>> get_global('zone_aliases')['UTC'] | |
34 'Etc/GMT' | |
35 >>> get_global('zone_territories')['Europe/Berlin'] | |
36 'DE' | |
37 | |
38 :param key: the data key | |
274
4dbe3a1b3462
Merged revisions [299] via svnmerge from [source:trunk].
cmlenz
parents:
263
diff
changeset
|
39 :return: the dictionary found in the global data under the given key |
263 | 40 :rtype: `dict` |
41 :since: version 0.9 | |
42 """ | |
43 global _global_data | |
44 if _global_data is None: | |
45 dirname = os.path.join(os.path.dirname(__file__)) | |
46 filename = os.path.join(dirname, 'global.dat') | |
47 fileobj = open(filename, 'rb') | |
48 try: | |
49 _global_data = pickle.load(fileobj) | |
50 finally: | |
51 fileobj.close() | |
52 return _global_data.get(key, {}) | |
53 | |
409 | 54 |
263 | 55 LOCALE_ALIASES = { |
56 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ', | |
57 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', | |
58 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES', | |
59 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT', | |
60 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV', | |
61 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL', | |
62 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI', | |
63 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA' | |
64 } | |
65 | |
66 | |
67 class UnknownLocaleError(Exception): | |
68 """Exception thrown when a locale is requested for which no locale data | |
69 is available. | |
70 """ | |
71 | |
72 def __init__(self, identifier): | |
73 """Create the exception. | |
74 | |
75 :param identifier: the identifier string of the unsupported locale | |
76 """ | |
77 Exception.__init__(self, 'unknown locale %r' % identifier) | |
78 self.identifier = identifier | |
79 | |
80 | |
81 class Locale(object): | |
82 """Representation of a specific locale. | |
83 | |
84 >>> locale = Locale('en', 'US') | |
85 >>> repr(locale) | |
86 '<Locale "en_US">' | |
87 >>> locale.display_name | |
88 u'English (United States)' | |
89 | |
90 A `Locale` object can also be instantiated from a raw locale string: | |
91 | |
92 >>> locale = Locale.parse('en-US', sep='-') | |
93 >>> repr(locale) | |
94 '<Locale "en_US">' | |
95 | |
96 `Locale` objects provide access to a collection of locale data, such as | |
97 territory and language names, number and date format patterns, and more: | |
98 | |
99 >>> locale.number_symbols['decimal'] | |
100 u'.' | |
101 | |
102 If a locale is requested for which no locale data is available, an | |
103 `UnknownLocaleError` is raised: | |
104 | |
105 >>> Locale.parse('en_DE') | |
106 Traceback (most recent call last): | |
107 ... | |
108 UnknownLocaleError: unknown locale 'en_DE' | |
109 | |
110 :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_ | |
111 """ | |
112 | |
113 def __init__(self, language, territory=None, script=None, variant=None): | |
114 """Initialize the locale object from the given identifier components. | |
115 | |
116 >>> locale = Locale('en', 'US') | |
117 >>> locale.language | |
118 'en' | |
119 >>> locale.territory | |
120 'US' | |
121 | |
122 :param language: the language code | |
123 :param territory: the territory (country or region) code | |
124 :param script: the script code | |
125 :param variant: the variant code | |
126 :raise `UnknownLocaleError`: if no locale data is available for the | |
127 requested locale | |
128 """ | |
129 self.language = language | |
130 self.territory = territory | |
131 self.script = script | |
132 self.variant = variant | |
133 self.__data = None | |
134 | |
135 identifier = str(self) | |
136 if not localedata.exists(identifier): | |
137 raise UnknownLocaleError(identifier) | |
138 | |
409 | 139 def default(cls, category=None, aliases=LOCALE_ALIASES): |
263 | 140 """Return the system default locale for the specified category. |
141 | |
142 >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: | |
143 ... os.environ[name] = '' | |
144 >>> os.environ['LANG'] = 'fr_FR.UTF-8' | |
145 >>> Locale.default('LC_MESSAGES') | |
146 <Locale "fr_FR"> | |
409 | 147 |
263 | 148 :param category: one of the ``LC_XXX`` environment variable names |
409 | 149 :param aliases: a dictionary of aliases for locale identifiers |
263 | 150 :return: the value of the variable, or any of the fallbacks |
151 (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) | |
152 :rtype: `Locale` | |
153 :see: `default_locale` | |
154 """ | |
409 | 155 return cls(default_locale(category, aliases=aliases)) |
263 | 156 default = classmethod(default) |
157 | |
158 def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): | |
159 """Find the best match between available and requested locale strings. | |
160 | |
161 >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) | |
162 <Locale "de_DE"> | |
163 >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de']) | |
164 <Locale "de"> | |
165 >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) | |
166 | |
167 You can specify the character used in the locale identifiers to separate | |
168 the differnet components. This separator is applied to both lists. Also, | |
169 case is ignored in the comparison: | |
170 | |
171 >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') | |
172 <Locale "de_DE"> | |
173 | |
174 :param preferred: the list of locale identifers preferred by the user | |
175 :param available: the list of locale identifiers available | |
176 :param aliases: a dictionary of aliases for locale identifiers | |
177 :return: the `Locale` object for the best match, or `None` if no match | |
178 was found | |
179 :rtype: `Locale` | |
180 :see: `negotiate_locale` | |
181 """ | |
182 identifier = negotiate_locale(preferred, available, sep=sep, | |
183 aliases=aliases) | |
184 if identifier: | |
185 return Locale.parse(identifier, sep=sep) | |
186 negotiate = classmethod(negotiate) | |
187 | |
188 def parse(cls, identifier, sep='_'): | |
189 """Create a `Locale` instance for the given locale identifier. | |
190 | |
191 >>> l = Locale.parse('de-DE', sep='-') | |
192 >>> l.display_name | |
193 u'Deutsch (Deutschland)' | |
194 | |
195 If the `identifier` parameter is not a string, but actually a `Locale` | |
196 object, that object is returned: | |
197 | |
198 >>> Locale.parse(l) | |
199 <Locale "de_DE"> | |
200 | |
201 :param identifier: the locale identifier string | |
202 :param sep: optional component separator | |
203 :return: a corresponding `Locale` instance | |
204 :rtype: `Locale` | |
205 :raise `ValueError`: if the string does not appear to be a valid locale | |
206 identifier | |
207 :raise `UnknownLocaleError`: if no locale data is available for the | |
208 requested locale | |
209 :see: `parse_locale` | |
210 """ | |
284 | 211 if isinstance(identifier, basestring): |
212 return cls(*parse_locale(identifier, sep=sep)) | |
213 return identifier | |
263 | 214 parse = classmethod(parse) |
215 | |
216 def __eq__(self, other): | |
217 return str(self) == str(other) | |
218 | |
485 | 219 def __ne__(self, other): |
220 return not self.__eq__(other) | |
221 | |
263 | 222 def __repr__(self): |
223 return '<Locale "%s">' % str(self) | |
224 | |
225 def __str__(self): | |
226 return '_'.join(filter(None, [self.language, self.script, | |
227 self.territory, self.variant])) | |
228 | |
229 def _data(self): | |
230 if self.__data is None: | |
381 | 231 self.__data = localedata.LocaleDataDict(localedata.load(str(self))) |
263 | 232 return self.__data |
233 _data = property(_data) | |
234 | |
235 def get_display_name(self, locale=None): | |
236 """Return the display name of the locale using the given locale. | |
237 | |
238 The display name will include the language, territory, script, and | |
239 variant, if those are specified. | |
240 | |
241 >>> Locale('zh', 'CN', script='Hans').get_display_name('en') | |
242 u'Chinese (Simplified Han, China)' | |
243 | |
244 :param locale: the locale to use | |
245 :return: the display name | |
246 """ | |
247 if locale is None: | |
248 locale = self | |
249 locale = Locale.parse(locale) | |
250 retval = locale.languages.get(self.language) | |
251 if self.territory or self.script or self.variant: | |
252 details = [] | |
253 if self.script: | |
254 details.append(locale.scripts.get(self.script)) | |
255 if self.territory: | |
256 details.append(locale.territories.get(self.territory)) | |
257 if self.variant: | |
258 details.append(locale.variants.get(self.variant)) | |
259 details = filter(None, details) | |
260 if details: | |
261 retval += ' (%s)' % u', '.join(details) | |
262 return retval | |
263 | |
264 display_name = property(get_display_name, doc="""\ | |
265 The localized display name of the locale. | |
266 | |
267 >>> Locale('en').display_name | |
268 u'English' | |
269 >>> Locale('en', 'US').display_name | |
270 u'English (United States)' | |
271 >>> Locale('sv').display_name | |
272 u'svenska' | |
273 | |
274 :type: `unicode` | |
275 """) | |
276 | |
277 def english_name(self): | |
278 return self.get_display_name(Locale('en')) | |
279 english_name = property(english_name, doc="""\ | |
280 The english display name of the locale. | |
281 | |
282 >>> Locale('de').english_name | |
283 u'German' | |
284 >>> Locale('de', 'DE').english_name | |
285 u'German (Germany)' | |
286 | |
287 :type: `unicode` | |
288 """) | |
289 | |
290 #{ General Locale Display Names | |
291 | |
292 def languages(self): | |
293 return self._data['languages'] | |
294 languages = property(languages, doc="""\ | |
295 Mapping of language codes to translated language names. | |
296 | |
297 >>> Locale('de', 'DE').languages['ja'] | |
298 u'Japanisch' | |
299 | |
300 :type: `dict` | |
301 :see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ | |
302 """) | |
303 | |
304 def scripts(self): | |
305 return self._data['scripts'] | |
306 scripts = property(scripts, doc="""\ | |
307 Mapping of script codes to translated script names. | |
308 | |
309 >>> Locale('en', 'US').scripts['Hira'] | |
310 u'Hiragana' | |
311 | |
312 :type: `dict` | |
313 :see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_ | |
314 """) | |
315 | |
316 def territories(self): | |
317 return self._data['territories'] | |
318 territories = property(territories, doc="""\ | |
319 Mapping of script codes to translated script names. | |
320 | |
321 >>> Locale('es', 'CO').territories['DE'] | |
322 u'Alemania' | |
323 | |
324 :type: `dict` | |
325 :see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_ | |
326 """) | |
327 | |
328 def variants(self): | |
329 return self._data['variants'] | |
330 variants = property(variants, doc="""\ | |
331 Mapping of script codes to translated script names. | |
332 | |
333 >>> Locale('de', 'DE').variants['1901'] | |
381 | 334 u'Alte deutsche Rechtschreibung' |
263 | 335 |
336 :type: `dict` | |
337 """) | |
338 | |
339 #{ Number Formatting | |
340 | |
341 def currencies(self): | |
342 return self._data['currency_names'] | |
343 currencies = property(currencies, doc="""\ | |
344 Mapping of currency codes to translated currency names. | |
345 | |
346 >>> Locale('en').currencies['COP'] | |
347 u'Colombian Peso' | |
348 >>> Locale('de', 'DE').currencies['COP'] | |
349 u'Kolumbianischer Peso' | |
350 | |
351 :type: `dict` | |
352 """) | |
353 | |
354 def currency_symbols(self): | |
355 return self._data['currency_symbols'] | |
356 currency_symbols = property(currency_symbols, doc="""\ | |
357 Mapping of currency codes to symbols. | |
358 | |
359 >>> Locale('en', 'US').currency_symbols['USD'] | |
360 u'$' | |
361 >>> Locale('es', 'CO').currency_symbols['USD'] | |
362 u'US$' | |
363 | |
364 :type: `dict` | |
365 """) | |
366 | |
367 def number_symbols(self): | |
368 return self._data['number_symbols'] | |
369 number_symbols = property(number_symbols, doc="""\ | |
370 Symbols used in number formatting. | |
371 | |
372 >>> Locale('fr', 'FR').number_symbols['decimal'] | |
373 u',' | |
374 | |
375 :type: `dict` | |
376 """) | |
377 | |
378 def decimal_formats(self): | |
379 return self._data['decimal_formats'] | |
380 decimal_formats = property(decimal_formats, doc="""\ | |
381 Locale patterns for decimal number formatting. | |
382 | |
383 >>> Locale('en', 'US').decimal_formats[None] | |
384 <NumberPattern u'#,##0.###'> | |
385 | |
386 :type: `dict` | |
387 """) | |
388 | |
389 def currency_formats(self): | |
390 return self._data['currency_formats'] | |
391 currency_formats = property(currency_formats, doc=r"""\ | |
392 Locale patterns for currency number formatting. | |
393 | |
394 >>> print Locale('en', 'US').currency_formats[None] | |
395 <NumberPattern u'\xa4#,##0.00'> | |
396 | |
397 :type: `dict` | |
398 """) | |
399 | |
400 def percent_formats(self): | |
401 return self._data['percent_formats'] | |
402 percent_formats = property(percent_formats, doc="""\ | |
403 Locale patterns for percent number formatting. | |
404 | |
405 >>> Locale('en', 'US').percent_formats[None] | |
406 <NumberPattern u'#,##0%'> | |
407 | |
408 :type: `dict` | |
409 """) | |
410 | |
411 def scientific_formats(self): | |
412 return self._data['scientific_formats'] | |
413 scientific_formats = property(scientific_formats, doc="""\ | |
414 Locale patterns for scientific number formatting. | |
415 | |
416 >>> Locale('en', 'US').scientific_formats[None] | |
417 <NumberPattern u'#E0'> | |
418 | |
419 :type: `dict` | |
420 """) | |
421 | |
422 #{ Calendar Information and Date Formatting | |
423 | |
424 def periods(self): | |
425 return self._data['periods'] | |
426 periods = property(periods, doc="""\ | |
427 Locale display names for day periods (AM/PM). | |
428 | |
429 >>> Locale('en', 'US').periods['am'] | |
430 u'AM' | |
431 | |
432 :type: `dict` | |
433 """) | |
434 | |
435 def days(self): | |
436 return self._data['days'] | |
437 days = property(days, doc="""\ | |
438 Locale display names for weekdays. | |
439 | |
440 >>> Locale('de', 'DE').days['format']['wide'][3] | |
441 u'Donnerstag' | |
442 | |
443 :type: `dict` | |
444 """) | |
445 | |
446 def months(self): | |
447 return self._data['months'] | |
448 months = property(months, doc="""\ | |
449 Locale display names for months. | |
450 | |
451 >>> Locale('de', 'DE').months['format']['wide'][10] | |
452 u'Oktober' | |
453 | |
454 :type: `dict` | |
455 """) | |
456 | |
457 def quarters(self): | |
458 return self._data['quarters'] | |
459 quarters = property(quarters, doc="""\ | |
460 Locale display names for quarters. | |
461 | |
462 >>> Locale('de', 'DE').quarters['format']['wide'][1] | |
463 u'1. Quartal' | |
464 | |
465 :type: `dict` | |
466 """) | |
467 | |
468 def eras(self): | |
469 return self._data['eras'] | |
470 eras = property(eras, doc="""\ | |
471 Locale display names for eras. | |
472 | |
473 >>> Locale('en', 'US').eras['wide'][1] | |
474 u'Anno Domini' | |
475 >>> Locale('en', 'US').eras['abbreviated'][0] | |
476 u'BC' | |
477 | |
478 :type: `dict` | |
479 """) | |
480 | |
481 def time_zones(self): | |
482 return self._data['time_zones'] | |
483 time_zones = property(time_zones, doc="""\ | |
484 Locale display names for time zones. | |
485 | |
486 >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] | |
487 u'British Summer Time' | |
488 >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] | |
381 | 489 u"St. John's" |
263 | 490 |
491 :type: `dict` | |
492 """) | |
493 | |
494 def meta_zones(self): | |
495 return self._data['meta_zones'] | |
496 meta_zones = property(meta_zones, doc="""\ | |
497 Locale display names for meta time zones. | |
498 | |
499 Meta time zones are basically groups of different Olson time zones that | |
500 have the same GMT offset and daylight savings time. | |
501 | |
502 >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] | |
503 u'Central European Summer Time' | |
504 | |
505 :type: `dict` | |
506 :since: version 0.9 | |
507 """) | |
508 | |
509 def zone_formats(self): | |
510 return self._data['zone_formats'] | |
511 zone_formats = property(zone_formats, doc=r"""\ | |
512 Patterns related to the formatting of time zones. | |
513 | |
514 >>> Locale('en', 'US').zone_formats['fallback'] | |
515 u'%(1)s (%(0)s)' | |
516 >>> Locale('pt', 'BR').zone_formats['region'] | |
517 u'Hor\xe1rio %s' | |
518 | |
519 :type: `dict` | |
520 :since: version 0.9 | |
521 """) | |
522 | |
523 def first_week_day(self): | |
524 return self._data['week_data']['first_day'] | |
525 first_week_day = property(first_week_day, doc="""\ | |
391
e69a068990f0
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
381
diff
changeset
|
526 The first day of a week, with 0 being Monday. |
263 | 527 |
528 >>> Locale('de', 'DE').first_week_day | |
529 0 | |
530 >>> Locale('en', 'US').first_week_day | |
531 6 | |
532 | |
533 :type: `int` | |
534 """) | |
535 | |
536 def weekend_start(self): | |
537 return self._data['week_data']['weekend_start'] | |
538 weekend_start = property(weekend_start, doc="""\ | |
391
e69a068990f0
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
381
diff
changeset
|
539 The day the weekend starts, with 0 being Monday. |
263 | 540 |
541 >>> Locale('de', 'DE').weekend_start | |
542 5 | |
543 | |
544 :type: `int` | |
545 """) | |
546 | |
547 def weekend_end(self): | |
548 return self._data['week_data']['weekend_end'] | |
549 weekend_end = property(weekend_end, doc="""\ | |
391
e69a068990f0
Ported [424], [425], and [428] back to 0.9.x branch.
cmlenz
parents:
381
diff
changeset
|
550 The day the weekend ends, with 0 being Monday. |
263 | 551 |
552 >>> Locale('de', 'DE').weekend_end | |
553 6 | |
554 | |
555 :type: `int` | |
556 """) | |
557 | |
558 def min_week_days(self): | |
559 return self._data['week_data']['min_days'] | |
560 min_week_days = property(min_week_days, doc="""\ | |
561 The minimum number of days in a week so that the week is counted as the | |
562 first week of a year or month. | |
563 | |
564 >>> Locale('de', 'DE').min_week_days | |
565 4 | |
566 | |
567 :type: `int` | |
568 """) | |
569 | |
570 def date_formats(self): | |
571 return self._data['date_formats'] | |
572 date_formats = property(date_formats, doc="""\ | |
573 Locale patterns for date formatting. | |
574 | |
575 >>> Locale('en', 'US').date_formats['short'] | |
576 <DateTimePattern u'M/d/yy'> | |
577 >>> Locale('fr', 'FR').date_formats['long'] | |
578 <DateTimePattern u'd MMMM yyyy'> | |
579 | |
580 :type: `dict` | |
581 """) | |
582 | |
583 def time_formats(self): | |
584 return self._data['time_formats'] | |
585 time_formats = property(time_formats, doc="""\ | |
586 Locale patterns for time formatting. | |
587 | |
588 >>> Locale('en', 'US').time_formats['short'] | |
589 <DateTimePattern u'h:mm a'> | |
590 >>> Locale('fr', 'FR').time_formats['long'] | |
591 <DateTimePattern u'HH:mm:ss z'> | |
592 | |
593 :type: `dict` | |
594 """) | |
595 | |
596 def datetime_formats(self): | |
597 return self._data['datetime_formats'] | |
598 datetime_formats = property(datetime_formats, doc="""\ | |
599 Locale patterns for datetime formatting. | |
600 | |
601 >>> Locale('en').datetime_formats[None] | |
602 u'{1} {0}' | |
603 >>> Locale('th').datetime_formats[None] | |
604 u'{1}, {0}' | |
605 | |
606 :type: `dict` | |
607 """) | |
608 | |
609 | |
409 | 610 def default_locale(category=None, aliases=LOCALE_ALIASES): |
263 | 611 """Returns the system default locale for a given category, based on |
612 environment variables. | |
613 | |
614 >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: | |
615 ... os.environ[name] = '' | |
616 >>> os.environ['LANG'] = 'fr_FR.UTF-8' | |
617 >>> default_locale('LC_MESSAGES') | |
618 'fr_FR' | |
409 | 619 |
620 The "C" or "POSIX" pseudo-locales are treated as aliases for the | |
621 "en_US_POSIX" locale: | |
622 | |
623 >>> os.environ['LC_MESSAGES'] = 'POSIX' | |
624 >>> default_locale('LC_MESSAGES') | |
625 'en_US_POSIX' | |
626 | |
263 | 627 :param category: one of the ``LC_XXX`` environment variable names |
409 | 628 :param aliases: a dictionary of aliases for locale identifiers |
263 | 629 :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, |
630 ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) | |
631 :rtype: `str` | |
632 """ | |
633 varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') | |
634 for name in filter(None, varnames): | |
635 locale = os.getenv(name) | |
636 if locale: | |
637 if name == 'LANGUAGE' and ':' in locale: | |
638 # the LANGUAGE variable may contain a colon-separated list of | |
639 # language codes; we just pick the language on the list | |
640 locale = locale.split(':')[0] | |
409 | 641 if locale in ('C', 'POSIX'): |
642 locale = 'en_US_POSIX' | |
643 elif aliases and locale in aliases: | |
644 locale = aliases[locale] | |
537 | 645 try: |
646 return '_'.join(filter(None, parse_locale(locale))) | |
647 except ValueError: | |
648 pass | |
263 | 649 |
650 def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): | |
651 """Find the best match between available and requested locale strings. | |
652 | |
653 >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) | |
654 'de_DE' | |
655 >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de']) | |
656 'de' | |
657 | |
658 Case is ignored by the algorithm, the result uses the case of the preferred | |
659 locale identifier: | |
660 | |
661 >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) | |
662 'de_DE' | |
663 | |
664 >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) | |
665 'de_DE' | |
666 | |
667 By default, some web browsers unfortunately do not include the territory | |
668 in the locale identifier for many locales, and some don't even allow the | |
669 user to easily add the territory. So while you may prefer using qualified | |
670 locale identifiers in your web-application, they would not normally match | |
671 the language-only locale sent by such browsers. To workaround that, this | |
672 function uses a default mapping of commonly used langauge-only locale | |
673 identifiers to identifiers including the territory: | |
674 | |
675 >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US']) | |
676 'ja_JP' | |
677 | |
678 Some browsers even use an incorrect or outdated language code, such as "no" | |
679 for Norwegian, where the correct locale identifier would actually be "nb_NO" | |
680 (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of | |
681 such cases, too: | |
682 | |
683 >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE']) | |
684 'nb_NO' | |
685 | |
686 You can override this default mapping by passing a different `aliases` | |
687 dictionary to this function, or you can bypass the behavior althogher by | |
688 setting the `aliases` parameter to `None`. | |
689 | |
690 :param preferred: the list of locale strings preferred by the user | |
691 :param available: the list of locale strings available | |
692 :param sep: character that separates the different parts of the locale | |
693 strings | |
694 :param aliases: a dictionary of aliases for locale identifiers | |
695 :return: the locale identifier for the best match, or `None` if no match | |
696 was found | |
697 :rtype: `str` | |
698 """ | |
699 available = [a.lower() for a in available if a] | |
700 for locale in preferred: | |
701 ll = locale.lower() | |
702 if ll in available: | |
703 return locale | |
704 if aliases: | |
705 alias = aliases.get(ll) | |
706 if alias: | |
707 alias = alias.replace('_', sep) | |
708 if alias.lower() in available: | |
709 return alias | |
710 parts = locale.split(sep) | |
711 if len(parts) > 1 and parts[0].lower() in available: | |
712 return parts[0] | |
713 return None | |
714 | |
715 def parse_locale(identifier, sep='_'): | |
716 """Parse a locale identifier into a tuple of the form:: | |
717 | |
718 ``(language, territory, script, variant)`` | |
719 | |
720 >>> parse_locale('zh_CN') | |
721 ('zh', 'CN', None, None) | |
722 >>> parse_locale('zh_Hans_CN') | |
723 ('zh', 'CN', 'Hans', None) | |
724 | |
725 The default component separator is "_", but a different separator can be | |
726 specified using the `sep` parameter: | |
727 | |
728 >>> parse_locale('zh-CN', sep='-') | |
729 ('zh', 'CN', None, None) | |
730 | |
731 If the identifier cannot be parsed into a locale, a `ValueError` exception | |
732 is raised: | |
733 | |
734 >>> parse_locale('not_a_LOCALE_String') | |
735 Traceback (most recent call last): | |
736 ... | |
737 ValueError: 'not_a_LOCALE_String' is not a valid locale identifier | |
738 | |
409 | 739 Encoding information and locale modifiers are removed from the identifier: |
740 | |
741 >>> parse_locale('it_IT@euro') | |
742 ('it', 'IT', None, None) | |
743 >>> parse_locale('en_US.UTF-8') | |
744 ('en', 'US', None, None) | |
745 >>> parse_locale('de_DE.iso885915@euro') | |
746 ('de', 'DE', None, None) | |
747 | |
263 | 748 :param identifier: the locale identifier string |
749 :param sep: character that separates the different components of the locale | |
750 identifier | |
751 :return: the ``(language, territory, script, variant)`` tuple | |
752 :rtype: `tuple` | |
753 :raise `ValueError`: if the string does not appear to be a valid locale | |
754 identifier | |
755 | |
756 :see: `IETF RFC 4646 <http://www.ietf.org/rfc/rfc4646.txt>`_ | |
757 """ | |
758 if '.' in identifier: | |
759 # this is probably the charset/encoding, which we don't care about | |
760 identifier = identifier.split('.', 1)[0] | |
409 | 761 if '@' in identifier: |
762 # this is a locale modifier such as @euro, which we don't care about | |
763 # either | |
764 identifier = identifier.split('@', 1)[0] | |
263 | 765 |
766 parts = identifier.split(sep) | |
767 lang = parts.pop(0).lower() | |
768 if not lang.isalpha(): | |
769 raise ValueError('expected only letters, got %r' % lang) | |
770 | |
771 script = territory = variant = None | |
772 if parts: | |
773 if len(parts[0]) == 4 and parts[0].isalpha(): | |
774 script = parts.pop(0).title() | |
775 | |
776 if parts: | |
777 if len(parts[0]) == 2 and parts[0].isalpha(): | |
778 territory = parts.pop(0).upper() | |
779 elif len(parts[0]) == 3 and parts[0].isdigit(): | |
780 territory = parts.pop(0) | |
781 | |
782 if parts: | |
783 if len(parts[0]) == 4 and parts[0][0].isdigit() or \ | |
784 len(parts[0]) >= 5 and parts[0][0].isalpha(): | |
785 variant = parts.pop() | |
786 | |
787 if parts: | |
788 raise ValueError('%r is not a valid locale identifier' % identifier) | |
789 | |
790 return lang, territory, script, variant |