1
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright (C) 2006 Edgewall Software
|
|
4 # All rights reserved.
|
|
5 #
|
|
6 # This software is licensed as described in the file COPYING, which
|
|
7 # you should have received as part of this distribution. The terms
|
|
8 # are also available at http://babel.edgewall.org/wiki/License.
|
|
9 #
|
|
10 # This software consists of voluntary contributions made by many
|
|
11 # individuals. For the exact contribution history, see the revision
|
|
12 # history and logs, available at http://babel.edgewall.org/log/.
|
|
13
|
|
14 """Core locale representation and locale data access gateway."""
|
|
15
|
|
16 import pickle
|
|
17 from pkg_resources import resource_filename
|
|
18 try:
|
|
19 import threading
|
|
20 except ImportError:
|
|
21 import dummy_threading as threading
|
|
22
|
|
23 __all__ = ['Locale', 'negotiate', 'parse']
|
|
24 __docformat__ = 'restructuredtext en'
|
|
25
|
|
26
|
|
27 class Locale(object):
|
|
28 """Representation of a specific locale.
|
|
29
|
|
30 >>> locale = Locale('en', territory='US')
|
|
31 >>> repr(locale)
|
|
32 '<Locale "en_US">'
|
|
33 >>> locale.display_name
|
|
34 u'English (United States)'
|
|
35
|
|
36 A `Locale` object can also be instantiated from a raw locale string:
|
|
37
|
|
38 >>> locale = Locale.parse('en-US', sep='-')
|
|
39 >>> repr(locale)
|
|
40 '<Locale "en_US">'
|
|
41
|
|
42 `Locale` objects provide access to a collection of locale data, such as
|
|
43 territory and language names, number and date format patterns, and more:
|
|
44
|
|
45 >>> locale.number_symbols['decimal']
|
|
46 u'.'
|
|
47
|
|
48 :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
|
|
49 """
|
|
50 _cache = {}
|
|
51 _cache_lock = threading.Lock()
|
|
52
|
|
53 def __new__(cls, language, territory=None, variant=None):
|
|
54 """Create new locale object, or load it from the cache if it had already
|
|
55 been instantiated.
|
|
56
|
|
57 >>> l1 = Locale('en')
|
|
58 >>> l2 = Locale('en')
|
|
59 >>> l1 is l2
|
|
60 True
|
|
61
|
|
62 :param language: the language code
|
|
63 :param territory: the territory (country or region) code
|
|
64 :param variant: the variant code
|
|
65 :return: new or existing `Locale` instance
|
|
66 :rtype: `Locale`
|
|
67 """
|
|
68 key = (language, territory, variant)
|
|
69 cls._cache_lock.acquire()
|
|
70 try:
|
|
71 self = cls._cache.get(key)
|
|
72 if self is None:
|
|
73 self = super(Locale, cls).__new__(cls, language, territory,
|
|
74 variant)
|
|
75 cls._cache[key] = self
|
|
76 return self
|
|
77 finally:
|
|
78 self._cache_lock.release()
|
|
79
|
|
80 def __init__(self, language, territory=None, variant=None):
|
|
81 """Initialize the locale object from the given identifier components.
|
|
82
|
|
83 >>> locale = Locale('en', 'US')
|
|
84 >>> locale.language
|
|
85 'en'
|
|
86 >>> locale.territory
|
|
87 'US'
|
|
88
|
|
89 :param language: the language code
|
|
90 :param territory: the territory (country or region) code
|
|
91 :param variant: the variant code
|
|
92 """
|
|
93 self.language = language
|
|
94 self.territory = territory
|
|
95 self.variant = variant
|
|
96 self.__data = None
|
|
97
|
|
98 def parse(cls, identifier, sep='_'):
|
|
99 """Create a `Locale` instance for the given locale identifier.
|
|
100
|
|
101 >>> l = Locale.parse('de-DE', sep='-')
|
|
102 >>> l.display_name
|
|
103 u'Deutsch (Deutschland)'
|
|
104
|
|
105 If the `identifier` parameter is not a string, but actually a `Locale`
|
|
106 object, that object is returned:
|
|
107
|
|
108 >>> Locale.parse(l)
|
|
109 <Locale "de_DE">
|
|
110
|
|
111 :param identifier: the locale identifier string
|
|
112 :param sep: optional component separator
|
|
113 :return: a corresponding `Locale` instance
|
|
114 :rtype: `Locale`
|
|
115 :raise `ValueError`: if the string does not appear to be a valid locale
|
|
116 identifier
|
|
117 """
|
|
118 if type(identifier) is cls:
|
|
119 return identifier
|
|
120 return cls(*parse(identifier, sep=sep))
|
|
121 parse = classmethod(parse)
|
|
122
|
|
123 def __repr__(self):
|
|
124 return '<Locale "%s">' % str(self)
|
|
125
|
|
126 def __str__(self):
|
|
127 return '_'.join(filter(None, [self.language, self.territory,
|
|
128 self.variant]))
|
|
129
|
|
130 def _data(self):
|
|
131 if self.__data is None:
|
|
132 filename = resource_filename(__name__, 'localedata/%s.dat' % self)
|
|
133 fileobj = open(filename, 'rb')
|
|
134 try:
|
|
135 self.__data = pickle.load(fileobj)
|
|
136 finally:
|
|
137 fileobj.close()
|
|
138 return self.__data
|
|
139 _data = property(_data)
|
|
140
|
|
141 def display_name(self):
|
|
142 retval = self.languages.get(self.language)
|
|
143 if self.territory:
|
|
144 variant = ''
|
|
145 if self.variant:
|
|
146 variant = ', %s' % self.variants.get(self.variant)
|
|
147 retval += ' (%s%s)' % (self.territories.get(self.territory), variant)
|
|
148 return retval
|
|
149 display_name = property(display_name, doc="""\
|
|
150 The localized display name of the locale.
|
|
151
|
|
152 >>> Locale('en').display_name
|
|
153 u'English'
|
|
154 >>> Locale('en', 'US').display_name
|
|
155 u'English (United States)'
|
|
156
|
|
157 :type: `unicode`
|
|
158 """)
|
|
159
|
|
160 def languages(self):
|
|
161 return self._data['languages']
|
|
162 languages = property(languages, doc="""\
|
|
163 Mapping of language codes to translated language names.
|
|
164
|
|
165 >>> Locale('de', 'DE').languages['ja']
|
|
166 u'Japanisch'
|
|
167
|
|
168 :type: `dict`
|
|
169 :see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_
|
|
170 """)
|
|
171
|
|
172 def scripts(self):
|
|
173 return self._data['scripts']
|
|
174 scripts = property(scripts, doc="""\
|
|
175 Mapping of script codes to translated script names.
|
|
176
|
|
177 >>> Locale('en', 'US').scripts['Hira']
|
|
178 u'Hiragana'
|
|
179
|
|
180 :type: `dict`
|
|
181 :see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
|
|
182 """)
|
|
183
|
|
184 def territories(self):
|
|
185 return self._data['territories']
|
|
186 territories = property(territories, doc="""\
|
|
187 Mapping of script codes to translated script names.
|
|
188
|
|
189 >>> Locale('es', 'CO').territories['DE']
|
|
190 u'Alemania'
|
|
191
|
|
192 :type: `dict`
|
|
193 :see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
|
|
194 """)
|
|
195
|
|
196 def variants(self):
|
|
197 return self._data['variants']
|
|
198 variants = property(variants, doc="""\
|
|
199 Mapping of script codes to translated script names.
|
|
200
|
|
201 >>> Locale('de', 'DE').variants['1901']
|
|
202 u'alte deutsche Rechtschreibung'
|
|
203
|
|
204 :type: `dict`
|
|
205 """)
|
|
206
|
|
207 def number_symbols(self):
|
|
208 return self._data['number_symbols']
|
|
209 number_symbols = property(number_symbols, doc="""\
|
|
210 Symbols used in number formatting.
|
|
211
|
|
212 >>> Locale('fr', 'FR').number_symbols['decimal']
|
|
213 u','
|
|
214
|
|
215 :type: `dict`
|
|
216 """)
|
|
217
|
|
218 def periods(self):
|
|
219 return self._data['periods']
|
|
220 periods = property(periods, doc="""\
|
|
221 Locale display names for day periods (AM/PM).
|
|
222
|
|
223 >>> Locale('en', 'US').periods['am']
|
|
224 u'AM'
|
|
225
|
|
226 :type: `dict`
|
|
227 """)
|
|
228
|
|
229 def days(self):
|
|
230 return self._data['days']
|
|
231 days = property(days, doc="""\
|
|
232 Locale display names for weekdays.
|
|
233
|
|
234 >>> Locale('de', 'DE').days['format']['wide'][4]
|
|
235 u'Donnerstag'
|
|
236
|
|
237 :type: `dict`
|
|
238 """)
|
|
239
|
|
240 def months(self):
|
|
241 return self._data['months']
|
|
242 months = property(months, doc="""\
|
|
243 Locale display names for months.
|
|
244
|
|
245 >>> Locale('de', 'DE').months['format']['wide'][10]
|
|
246 u'Oktober'
|
|
247
|
|
248 :type: `dict`
|
|
249 """)
|
|
250
|
|
251 def quarters(self):
|
|
252 return self._data['quarters']
|
|
253 quarters = property(quarters, doc="""\
|
|
254 Locale display names for quarters.
|
|
255
|
|
256 >>> Locale('de', 'DE').quarters['format']['wide'][1]
|
|
257 u'1. Quartal'
|
|
258
|
|
259 :type: `dict`
|
|
260 """)
|
|
261
|
|
262 def eras(self):
|
|
263 return self._data['eras']
|
|
264 eras = property(eras, doc="""\
|
|
265 Locale display names for eras.
|
|
266
|
|
267 >>> Locale('en', 'US').eras['wide'][1]
|
|
268 u'Anno Domini'
|
|
269 >>> Locale('en', 'US').eras['abbreviated'][0]
|
|
270 u'BC'
|
|
271
|
|
272 :type: `dict`
|
|
273 """)
|
|
274
|
|
275 def date_formats(self):
|
|
276 return self._data['date_formats']
|
|
277 date_formats = property(date_formats, doc="""\
|
|
278 Locale patterns for date formatting.
|
|
279
|
|
280 >>> Locale('en', 'US').date_formats['short']
|
|
281 <DateTimeFormatPattern u'M/d/yy'>
|
|
282 >>> Locale('fr', 'FR').date_formats['long']
|
|
283 <DateTimeFormatPattern u'd MMMM yyyy'>
|
|
284
|
|
285 :type: `dict`
|
|
286 """)
|
|
287
|
|
288 def time_formats(self):
|
|
289 return self._data['time_formats']
|
|
290 time_formats = property(time_formats, doc="""\
|
|
291 Locale patterns for time formatting.
|
|
292
|
|
293 >>> Locale('en', 'US').time_formats['short']
|
|
294 <DateTimeFormatPattern u'h:mm a'>
|
|
295 >>> Locale('fr', 'FR').time_formats['long']
|
|
296 <DateTimeFormatPattern u'HH:mm:ss z'>
|
|
297
|
|
298 :type: `dict`
|
|
299 """)
|
|
300
|
|
301
|
|
302 def negotiate(preferred, available):
|
|
303 """Find the best match between available and requested locale strings.
|
|
304
|
|
305 >>> negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
|
|
306 'de_DE'
|
|
307 >>> negotiate(['de_DE', 'en_US'], ['en', 'de'])
|
|
308 'de'
|
|
309
|
|
310 :param preferred: the list of locale strings preferred by the user
|
|
311 :param available: the list of locale strings available
|
|
312 :return: the locale identifier for the best match, or `None` if no match
|
|
313 was found
|
|
314 :rtype: `str`
|
|
315 """
|
|
316 for locale in preferred:
|
|
317 if locale in available:
|
|
318 return locale
|
|
319 parts = locale.split('_')
|
|
320 if len(parts) > 1 and parts[0] in available:
|
|
321 return parts[0]
|
|
322 return None
|
|
323
|
|
324 def parse(identifier, sep='_'):
|
|
325 """Parse a locale identifier into a ``(language, territory, variant)``
|
|
326 tuple.
|
|
327
|
|
328 >>> parse('zh_CN')
|
|
329 ('zh', 'CN', None)
|
|
330
|
|
331 The default component separator is "_", but a different separator can be
|
|
332 specified using the `sep` parameter:
|
|
333
|
|
334 >>> parse('zh-CN', sep='-')
|
|
335 ('zh', 'CN', None)
|
|
336
|
|
337 :param identifier: the locale identifier string
|
|
338 :param sep: character that separates the different parts of the locale
|
|
339 string
|
|
340 :return: the ``(language, territory, variant)`` tuple
|
|
341 :rtype: `tuple`
|
|
342 :raise `ValueError`: if the string does not appear to be a valid locale
|
|
343 identifier
|
|
344
|
|
345 :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
|
|
346 """
|
|
347 parts = identifier.split(sep)
|
|
348 lang, territory, variant = parts[0].lower(), None, None
|
|
349 if not lang.isalpha():
|
|
350 raise ValueError('expected only letters, got %r' % lang)
|
|
351 if len(parts) > 1:
|
|
352 territory = parts[1].upper().split('.', 1)[0]
|
|
353 if not territory.isalpha():
|
|
354 raise ValueError('expected only letters, got %r' % territory)
|
|
355 if len(parts) > 2:
|
|
356 variant = parts[2].upper().split('.', 1)[0]
|
|
357 return lang, territory, variant
|