view babel/core.py @ 240:e37046445638

Back out [251] which contained an accidential commit.
author cmlenz
date Tue, 07 Aug 2007 17:16:10 +0000
parents 4c5fc8879d03
children 722a49ec74d6
line wrap: on
line source
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.

"""Core locale representation and locale data access."""

import os
import pickle

from babel import localedata

__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
           'parse_locale']
__docformat__ = 'restructuredtext en'

_global_data = None

def get_global(key):
    """
    Return the dictionary for the given key in the global data.
    
    The global data is stored in the ``babel/global.dat`` file and contains
    information independent of individual locales.
    
    >>> get_global('zone_aliases')['UTC']
    'Etc/GMT'
    >>> get_global('zone_territories')['Europe/Berlin']
    'DE'
    
    :since: version 0.9
    """
    global _global_data
    if _global_data is None:
        dirname = os.path.join(os.path.dirname(__file__))
        filename = os.path.join(dirname, 'global.dat')
        fileobj = open(filename, 'rb')
        try:
            _global_data = pickle.load(fileobj)
        finally:
            fileobj.close()
    return _global_data.get(key, {})


class UnknownLocaleError(Exception):
    """Exception thrown when a locale is requested for which no locale data
    is available.
    """

    def __init__(self, identifier):
        """Create the exception.
        
        :param identifier: the identifier string of the unsupported locale
        """
        Exception.__init__(self, 'unknown locale %r' % identifier)
        self.identifier = identifier


class Locale(object):
    """Representation of a specific locale.
    
    >>> locale = Locale('en', 'US')
    >>> repr(locale)
    '<Locale "en_US">'
    >>> locale.display_name
    u'English (United States)'
    
    A `Locale` object can also be instantiated from a raw locale string:
    
    >>> locale = Locale.parse('en-US', sep='-')
    >>> repr(locale)
    '<Locale "en_US">'
    
    `Locale` objects provide access to a collection of locale data, such as
    territory and language names, number and date format patterns, and more:
    
    >>> locale.number_symbols['decimal']
    u'.'
    
    If a locale is requested for which no locale data is available, an
    `UnknownLocaleError` is raised:
    
    >>> Locale.parse('en_DE')
    Traceback (most recent call last):
        ...
    UnknownLocaleError: unknown locale 'en_DE'
    
    :see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
    """

    def __init__(self, language, territory=None, script=None, variant=None):
        """Initialize the locale object from the given identifier components.
        
        >>> locale = Locale('en', 'US')
        >>> locale.language
        'en'
        >>> locale.territory
        'US'
        
        :param language: the language code
        :param territory: the territory (country or region) code
        :param script: the script code
        :param variant: the variant code
        :raise `UnknownLocaleError`: if no locale data is available for the
                                     requested locale
        """
        self.language = language
        self.territory = territory
        self.script = script
        self.variant = variant
        self.__data = None

        identifier = str(self)
        if not localedata.exists(identifier):
            raise UnknownLocaleError(identifier)

    def default(cls, category=None):
        """Return the system default locale for the specified category.
        
        >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
        ...     os.environ[name] = ''
        >>> os.environ['LANG'] = 'fr_FR.UTF-8'
        >>> Locale.default('LC_MESSAGES')
        <Locale "fr_FR">
    
        :param category: one of the ``LC_XXX`` environment variable names
        :return: the value of the variable, or any of the fallbacks
                 (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``)
        :rtype: `Locale`
        """
        return cls(default_locale(category))
    default = classmethod(default)

    def negotiate(cls, preferred, available, sep='_'):
        """Find the best match between available and requested locale strings.
        
        >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
        <Locale "de_DE">
        >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
        <Locale "de">
        >>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
        
        You can specify the character used in the locale identifiers to separate
        the differnet components. This separator is applied to both lists. Also,
        case is ignored in the comparison:
        
        >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
        <Locale "de_DE">
        
        :param preferred: the list of locale identifers preferred by the user
        :param available: the list of locale identifiers available
        :return: the `Locale` object for the best match, or `None` if no match
                 was found
        :rtype: `Locale`
        """
        identifier = negotiate_locale(preferred, available, sep=sep)
        if identifier:
            return Locale.parse(identifier, sep=sep)
    negotiate = classmethod(negotiate)

    def parse(cls, identifier, sep='_'):
        """Create a `Locale` instance for the given locale identifier.
        
        >>> l = Locale.parse('de-DE', sep='-')
        >>> l.display_name
        u'Deutsch (Deutschland)'
        
        If the `identifier` parameter is not a string, but actually a `Locale`
        object, that object is returned:
        
        >>> Locale.parse(l)
        <Locale "de_DE">
        
        :param identifier: the locale identifier string
        :param sep: optional component separator
        :return: a corresponding `Locale` instance
        :rtype: `Locale`
        :raise `ValueError`: if the string does not appear to be a valid locale
                             identifier
        :raise `UnknownLocaleError`: if no locale data is available for the
                                     requested locale
        """
        if type(identifier) is cls:
            return identifier
        return cls(*parse_locale(identifier, sep=sep))
    parse = classmethod(parse)

    def __eq__(self, other):
        return str(self) == str(other)

    def __repr__(self):
        return '<Locale "%s">' % str(self)

    def __str__(self):
        return '_'.join(filter(None, [self.language, self.script,
                                      self.territory, self.variant]))

    def _data(self):
        if self.__data is None:
            self.__data = localedata.load(str(self))
        return self.__data
    _data = property(_data)

    def get_display_name(self, locale=None):
        """Return the display name of the locale using the given locale.
        
        The display name will include the language, territory, script, and
        variant, if those are specified.
        
        >>> Locale('zh', 'CN', script='Hans').get_display_name('en')
        u'Chinese (Simplified Han, China)'
        
        :param locale: the locale to use
        :return: the display name
        """
        if locale is None:
            locale = self
        locale = Locale.parse(locale)
        retval = locale.languages.get(self.language)
        if self.territory or self.script or self.variant:
            details = []
            if self.script:
                details.append(locale.scripts.get(self.script))
            if self.territory:
                details.append(locale.territories.get(self.territory))
            if self.variant:
                details.append(locale.variants.get(self.variant))
            details = filter(None, details)
            if details:
                retval += ' (%s)' % u', '.join(details)
        return retval

    display_name = property(get_display_name, doc="""\
        The localized display name of the locale.
        
        >>> Locale('en').display_name
        u'English'
        >>> Locale('en', 'US').display_name
        u'English (United States)'
        >>> Locale('sv').display_name
        u'svenska'
        
        :type: `unicode`
        """)

    def english_name(self):
        return self.get_display_name(Locale('en'))
    english_name = property(english_name, doc="""\
        The english display name of the locale.
        
        >>> Locale('de').english_name
        u'German'
        >>> Locale('de', 'DE').english_name
        u'German (Germany)'
        
        :type: `unicode`
        """)

    #{ General Locale Display Names

    def languages(self):
        return self._data['languages']
    languages = property(languages, doc="""\
        Mapping of language codes to translated language names.
        
        >>> Locale('de', 'DE').languages['ja']
        u'Japanisch'
        
        :type: `dict`
        :see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_
        """)

    def scripts(self):
        return self._data['scripts']
    scripts = property(scripts, doc="""\
        Mapping of script codes to translated script names.
        
        >>> Locale('en', 'US').scripts['Hira']
        u'Hiragana'
        
        :type: `dict`
        :see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
        """)

    def territories(self):
        return self._data['territories']
    territories = property(territories, doc="""\
        Mapping of script codes to translated script names.
        
        >>> Locale('es', 'CO').territories['DE']
        u'Alemania'
        
        :type: `dict`
        :see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
        """)

    def variants(self):
        return self._data['variants']
    variants = property(variants, doc="""\
        Mapping of script codes to translated script names.
        
        >>> Locale('de', 'DE').variants['1901']
        u'alte deutsche Rechtschreibung'
        
        :type: `dict`
        """)

    #{ Number Formatting

    def currencies(self):
        return self._data['currency_names']
    currencies = property(currencies, doc="""\
        Mapping of currency codes to translated currency names.
        
        >>> Locale('en').currencies['COP']
        u'Colombian Peso'
        >>> Locale('de', 'DE').currencies['COP']
        u'Kolumbianischer Peso'
        
        :type: `dict`
        """)

    def currency_symbols(self):
        return self._data['currency_symbols']
    currency_symbols = property(currency_symbols, doc="""\
        Mapping of currency codes to symbols.
        
        >>> Locale('en', 'US').currency_symbols['USD']
        u'$'
        >>> Locale('es', 'CO').currency_symbols['USD']
        u'US$'
        
        :type: `dict`
        """)

    def number_symbols(self):
        return self._data['number_symbols']
    number_symbols = property(number_symbols, doc="""\
        Symbols used in number formatting.
        
        >>> Locale('fr', 'FR').number_symbols['decimal']
        u','
        
        :type: `dict`
        """)

    def decimal_formats(self):
        return self._data['decimal_formats']
    decimal_formats = property(decimal_formats, doc="""\
        Locale patterns for decimal number formatting.
        
        >>> Locale('en', 'US').decimal_formats[None]
        <NumberPattern u'#,##0.###'>
        
        :type: `dict`
        """)

    def currency_formats(self):
        return self._data['currency_formats']
    currency_formats = property(currency_formats, doc=r"""\
        Locale patterns for currency number formatting.
        
        >>> print Locale('en', 'US').currency_formats[None]
        <NumberPattern u'\xa4#,##0.00'>
        
        :type: `dict`
        """)

    def percent_formats(self):
        return self._data['percent_formats']
    percent_formats = property(percent_formats, doc="""\
        Locale patterns for percent number formatting.
        
        >>> Locale('en', 'US').percent_formats[None]
        <NumberPattern u'#,##0%'>
        
        :type: `dict`
        """)

    def scientific_formats(self):
        return self._data['scientific_formats']
    scientific_formats = property(scientific_formats, doc="""\
        Locale patterns for scientific number formatting.
        
        >>> Locale('en', 'US').scientific_formats[None]
        <NumberPattern u'#E0'>
        
        :type: `dict`
        """)

    #{ Calendar Information and Date Formatting

    def periods(self):
        return self._data['periods']
    periods = property(periods, doc="""\
        Locale display names for day periods (AM/PM).
        
        >>> Locale('en', 'US').periods['am']
        u'AM'
        
        :type: `dict`
        """)

    def days(self):
        return self._data['days']
    days = property(days, doc="""\
        Locale display names for weekdays.
        
        >>> Locale('de', 'DE').days['format']['wide'][3]
        u'Donnerstag'
        
        :type: `dict`
        """)

    def months(self):
        return self._data['months']
    months = property(months, doc="""\
        Locale display names for months.
        
        >>> Locale('de', 'DE').months['format']['wide'][10]
        u'Oktober'
        
        :type: `dict`
        """)

    def quarters(self):
        return self._data['quarters']
    quarters = property(quarters, doc="""\
        Locale display names for quarters.
        
        >>> Locale('de', 'DE').quarters['format']['wide'][1]
        u'1. Quartal'
        
        :type: `dict`
        """)

    def eras(self):
        return self._data['eras']
    eras = property(eras, doc="""\
        Locale display names for eras.
        
        >>> Locale('en', 'US').eras['wide'][1]
        u'Anno Domini'
        >>> Locale('en', 'US').eras['abbreviated'][0]
        u'BC'
        
        :type: `dict`
        """)

    def time_zones(self):
        return self._data['time_zones']
    time_zones = property(time_zones, doc="""\
        Locale display names for time zones.
        
        >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
        u'British Summer Time'
        >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
        u'St. John\u2019s'
        
        :type: `dict`
        """)

    def meta_zones(self):
        return self._data['meta_zones']
    meta_zones = property(meta_zones, doc="""\
        Locale display names for meta time zones.
        
        Meta time zones are basically groups of different Olson time zones that
        have the same GMT offset and daylight savings time.
        
        >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
        u'Central European Summer Time'
        
        :type: `dict`
        :since: version 0.9
        """)

    def zone_formats(self):
        return self._data['zone_formats']
    zone_formats = property(zone_formats, doc=r"""\
        Patterns related to the formatting of time zones.
        
        >>> Locale('en', 'US').zone_formats['fallback']
        u'%(1)s (%(0)s)'
        >>> Locale('pt', 'BR').zone_formats['region']
        u'Hor\xe1rio %s'
        
        :type: `dict`
        :since: version 0.9
        """)

    def first_week_day(self):
        return self._data['week_data']['first_day']
    first_week_day = property(first_week_day, doc="""\
        The first day of a week.
        
        >>> Locale('de', 'DE').first_week_day
        0
        >>> Locale('en', 'US').first_week_day
        6
        
        :type: `int`
        """)

    def weekend_start(self):
        return self._data['week_data']['weekend_start']
    weekend_start = property(weekend_start, doc="""\
        The day the weekend starts.
        
        >>> Locale('de', 'DE').weekend_start
        5
        
        :type: `int`
        """)

    def weekend_end(self):
        return self._data['week_data']['weekend_end']
    weekend_end = property(weekend_end, doc="""\
        The day the weekend ends.
        
        >>> Locale('de', 'DE').weekend_end
        6
        
        :type: `int`
        """)

    def min_week_days(self):
        return self._data['week_data']['min_days']
    min_week_days = property(min_week_days, doc="""\
        The minimum number of days in a week so that the week is counted as the
        first week of a year or month.
        
        >>> Locale('de', 'DE').min_week_days
        4
        
        :type: `int`
        """)

    def date_formats(self):
        return self._data['date_formats']
    date_formats = property(date_formats, doc="""\
        Locale patterns for date formatting.
        
        >>> Locale('en', 'US').date_formats['short']
        <DateTimePattern u'M/d/yy'>
        >>> Locale('fr', 'FR').date_formats['long']
        <DateTimePattern u'd MMMM yyyy'>
        
        :type: `dict`
        """)

    def time_formats(self):
        return self._data['time_formats']
    time_formats = property(time_formats, doc="""\
        Locale patterns for time formatting.
        
        >>> Locale('en', 'US').time_formats['short']
        <DateTimePattern u'h:mm a'>
        >>> Locale('fr', 'FR').time_formats['long']
        <DateTimePattern u'HH:mm:ss z'>
        
        :type: `dict`
        """)

    def datetime_formats(self):
        return self._data['datetime_formats']
    datetime_formats = property(datetime_formats, doc="""\
        Locale patterns for datetime formatting.
        
        >>> Locale('en').datetime_formats[None]
        u'{1} {0}'
        >>> Locale('th').datetime_formats[None]
        u'{1}, {0}'
        
        :type: `dict`
        """)


def default_locale(category=None):
    """Returns the system default locale for a given category, based on
    environment variables.
    
    >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
    ...     os.environ[name] = ''
    >>> os.environ['LANG'] = 'fr_FR.UTF-8'
    >>> default_locale('LC_MESSAGES')
    'fr_FR'
    
    :param category: one of the ``LC_XXX`` environment variable names
    :return: the value of the variable, or any of the fallbacks (``LANGUAGE``,
             ``LC_ALL``, ``LC_CTYPE``, and ``LANG``)

    :rtype: `str`
    """
    varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
    for name in filter(None, varnames):
        locale = os.getenv(name)
        if locale:
            if name == 'LANGUAGE' and ':' in locale:
                # the LANGUAGE variable may contain a colon-separated list of
                # language codes; we just pick the language on the list
                locale = locale.split(':')[0]
            return '_'.join(filter(None, parse_locale(locale)))

def negotiate_locale(preferred, available, sep='_'):
    """Find the best match between available and requested locale strings.
    
    >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
    'de_DE'
    >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
    'de'
    
    Case is ignored by the algorithm, the result uses the case of the preferred
    locale identifier:
    
    >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
    'de_DE'
    
    :param preferred: the list of locale strings preferred by the user
    :param available: the list of locale strings available
    :param sep: character that separates the different parts of the locale
                strings
    :return: the locale identifier for the best match, or `None` if no match
             was found
    :rtype: `str`
    """
    available = [a.lower() for a in available if a]
    for locale in preferred:
        if locale.lower() in available:
            return locale
        parts = locale.split(sep)
        if len(parts) > 1 and parts[0].lower() in available:
            return parts[0]
    return None

def parse_locale(identifier, sep='_'):
    """Parse a locale identifier into a tuple of the form::
    
      ``(language, territory, script, variant)``
    
    >>> parse_locale('zh_CN')
    ('zh', 'CN', None, None)
    >>> parse_locale('zh_Hans_CN')
    ('zh', 'CN', 'Hans', None)
    
    The default component separator is "_", but a different separator can be
    specified using the `sep` parameter:
    
    >>> parse_locale('zh-CN', sep='-')
    ('zh', 'CN', None, None)
    
    If the identifier cannot be parsed into a locale, a `ValueError` exception
    is raised:
    
    >>> parse_locale('not_a_LOCALE_String')
    Traceback (most recent call last):
      ...
    ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
    
    :param identifier: the locale identifier string
    :param sep: character that separates the different components of the locale
                identifier
    :return: the ``(language, territory, script, variant)`` tuple
    :rtype: `tuple`
    :raise `ValueError`: if the string does not appear to be a valid locale
                         identifier
    
    :see: `IETF RFC 4646 <http://www.ietf.org/rfc/rfc4646.txt>`_
    """
    if '.' in identifier:
        # this is probably the charset/encoding, which we don't care about
        identifier = identifier.split('.', 1)[0]

    parts = identifier.split(sep)
    lang = parts.pop(0).lower()
    if not lang.isalpha():
        raise ValueError('expected only letters, got %r' % lang)

    script = territory = variant = None
    if parts:
        if len(parts[0]) == 4 and parts[0].isalpha():
            script = parts.pop(0).title()

    if parts:
        if len(parts[0]) == 2 and parts[0].isalpha():
            territory = parts.pop(0).upper()
        elif len(parts[0]) == 3 and parts[0].isdigit():
            territory = parts.pop(0)

    if parts:
        if len(parts[0]) == 4 and parts[0][0].isdigit() or \
                len(parts[0]) >= 5 and parts[0][0].isalpha():
            variant = parts.pop()

    if parts:
        raise ValueError('%r is not a valid locale identifier' % identifier)

    return lang, territory, script, variant
Copyright (C) 2012-2017 Edgewall Software