# HG changeset patch # User jruigrok # Date 1271310868 0 # Node ID 0b228ee775fe375f513c02568047ba5835e8a091 # Parent 2dcbbdabfb71f727b010b164516080fcbc47bd46 Copy over trunk. diff --git a/babel3/COPYING b/babel3/COPYING new file mode 100644 --- /dev/null +++ b/babel3/COPYING @@ -0,0 +1,28 @@ +Copyright (C) 2007 Edgewall Software +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/babel3/ChangeLog b/babel3/ChangeLog new file mode 100644 --- /dev/null +++ b/babel3/ChangeLog @@ -0,0 +1,163 @@ +Version 1.0 +http://svn.edgewall.org/repos/babel/tags/1.0.0/ +(???, from branches/stable/1.0.x) + + * Added support for the locale plural rules defined by the CLDR. + * Added `format_timedelta` function to support localized formatting of + relative times with strings such as "2 days" or "1 month" (ticket #126). + * Fixed Python 2.3 compatibility (ticket #146). + * Fixed negative offset handling of Catalog._set_mime_headers (ticket #165). + * Fixed the case where messages containing square brackets would break with + an unpack error. + * Updated to CLDR 1.7. + * Make the CLDR import script work with Python 2.7. + * Fix Serbian plural forms (ticket #213). + + +Version 0.9.5 +http://svn.edgewall.org/repos/babel/tags/0.9.5/ +(Apr 6 2010, from branches/stable/0.9.x) + + * Fixed the case where messages containing square brackets would break with + an unpack error. + * Backport of r467: Fuzzy matching regarding plurals should *NOT* be checked + against len(message.id) because this is always 2, instead, it's should be + checked against catalog.num_plurals (ticket #212). + + +Version 0.9.4 +http://svn.edgewall.org/repos/babel/tags/0.9.4/ +(Aug 25 2008, from branches/stable/0.9.x) + + * Currency symbol definitions that is defined with choice patterns in the + CLDR data are no longer imported, so the symbol code will be used instead. + * Fixed quarter support in date formatting. + * Fixed a serious memory leak that was introduces by the support for CLDR + aliases in 0.9.3 (ticket #128). + * Locale modifiers such as "@euro" are now stripped from locale identifiers + when parsing (ticket #136). + * The system locales "C" and "POSIX" are now treated as aliases for + "en_US_POSIX", for which the CLDR provides the appropriate data. Thanks to + Manlio Perillo for the suggestion. + * Fixed JavaScript extraction for regular expression literals (ticket #138) + and concatenated strings. + * The `Translation` class in `babel.support` can now manage catalogs with + different message domains, and exposes the family of `d*gettext` functions + (ticket #137). + + +Version 0.9.3 +http://svn.edgewall.org/repos/babel/tags/0.9.3/ +(Jul 9 2008, from branches/stable/0.9.x) + + * Fixed invalid message extraction methods causing an UnboundLocalError. + * Extraction method specification can now use a dot instead of the colon to + separate module and function name (ticket #105). + * Fixed message catalog compilation for locales with more than two plural + forms (ticket #95). + * Fixed compilation of message catalogs for locales with more than two plural + forms where the translations were empty (ticket #97). + * The stripping of the comment tags in comments is optional now and + is done for each line in a comment. + * Added a JavaScript message extractor. + * Updated to CLDR 1.6. + * Fixed timezone calculations when formatting datetime and time values. + * Added a `get_plural` function into the plurals module that returns the + correct plural forms for a locale as tuple. + * Added support for alias definitions in the CLDR data files, meaning that + the chance for items missing in certain locales should be greatly reduced + (ticket #68). + + +Version 0.9.2 +http://svn.edgewall.org/repos/babel/tags/0.9.2/ +(Feb 4 2008, from branches/stable/0.9.x) + + * Fixed catalogs' charset values not being recognized (ticket #66). + * Numerous improvements to the default plural forms. + * Fixed fuzzy matching when updating message catalogs (ticket #82). + * Fixed bug in catalog updating, that in some cases pulled in translations + from different catalogs based on the same template. + * Location lines in PO files do no longer get wrapped at hyphens in file + names (ticket #79). + * Fixed division by zero error in catalog compilation on empty catalogs + (ticket #60). + + +Version 0.9.1 +http://svn.edgewall.org/repos/babel/tags/0.9.1/ +(Sep 7 2007, from branches/stable/0.9.x) + + * Fixed catalog updating when a message is merged that was previously simple + but now has a plural form, for example by moving from `gettext` to + `ngettext`, or vice versa. + * Fixed time formatting for 12 am and 12 pm. + * Fixed output encoding of the `pybabel --list-locales` command. + * MO files are now written in binary mode on windows (ticket #61). + + +Version 0.9 +http://svn.edgewall.org/repos/babel/tags/0.9.0/ +(Aug 20 2007, from branches/stable/0.9.x) + + * The `new_catalog` distutils command has been renamed to `init_catalog` for + consistency with the command-line frontend. + * Added compilation of message catalogs to MO files (ticket #21). + * Added updating of message catalogs from POT files (ticket #22). + * Support for significant digits in number formatting. + * Apply proper "banker's rounding" in number formatting in a cross-platform + manner. + * The number formatting functions now also work with numbers represented by + Python `Decimal` objects (ticket #53). + * Added extensible infrastructure for validating translation catalogs. + * Fixed the extractor not filtering out messages that didn't validate against + the keyword's specification (ticket #39). + * Fixed the extractor raising an exception when encountering an empty string + msgid. It now emits a warning to stderr. + * Numerous Python message extractor fixes: it now handles nested function + calls within a gettext function call correctly, uses the correct line number + for multi-line function calls, and other small fixes (tickets #38 and #39). + * Improved support for detecting Python string formatting fields in message + strings (ticket #57). + * CLDR upgraded to the 1.5 release. + * Improved timezone formatting. + * Implemented scientific number formatting. + * Added mechanism to lookup locales by alias, for cases where browsers insist + on including only the language code in the `Accept-Language` header, and + sometimes even the incorrect language code. + + +Version 0.8.1 +http://svn.edgewall.org/repos/babel/tags/0.8.1/ +(Jul 2 2007, from branches/stable/0.8.x) + + * `default_locale()` would fail when the value of the `LANGUAGE` environment + variable contained multiple language codes separated by colon, as is + explicitly allowed by the GNU gettext tools. As the `default_locale()` + function is called at the module level in some modules, this bug would + completely break importing these modules on systems where `LANGUAGE` is set + that way. + * The character set specified in PO template files is now respected when + creating new catalog files based on that template. This allows the use of + characters outside the ASCII range in POT files (ticket #17). + * The default ordering of messages in generated POT files, which is based on + the order those messages are found when walking the source tree, is no + longer subject to differences between platforms; directory and file names + are now always sorted alphabetically. + * The Python message extractor now respects the special encoding comment to be + able to handle files containing non-ASCII characters (ticket #23). + * Added 'N_' (gettext noop) to the extractor's default keywords. + * Made locale string parsing more robust, and also take the script part into + account (ticket #27). + * Added a function to list all locales for which locale data is available. + * Added a command-line option to the `pybabel` command which prints out all + available locales (ticket #24). + * The name of the command-line script has been changed from just `babel` to + `pybabel` to avoid a conflict with the OpenBabel project (ticket #34). + + +Version 0.8 +http://svn.edgewall.org/repos/babel/tags/0.8.0/ +(Jun 20 2007, from branches/stable/0.8.x) + + * First public release diff --git a/babel3/INSTALL.txt b/babel3/INSTALL.txt new file mode 100644 --- /dev/null +++ b/babel3/INSTALL.txt @@ -0,0 +1,39 @@ +Installing Babel +================ + +Prerequisites +------------- + + * Python 2.3 or later (2.4 or later is recommended) + * CLDR 1.7 + * Optional: setuptools 0.6b1 or later + * Optional: pytz (strongly recommended for real time-zone support) + + +Installation +------------ + +Once you've downloaded and unpacked a Babel source release, enter the +directory where the archive was unpacked, and run: + + $ python setup.py install + +Note that you may need administrator/root privileges for this step, as +this command will by default attempt to install Babel to the Python +site-packages directory on your system. + +For advanced options, please refer to the easy_install and/or the distutils +documentation: + + http://peak.telecommunity.com/DevCenter/EasyInstall + http://docs.python.org/inst/inst.html + + +Support +------- + +If you encounter any problems with Babel, please don't hesitate to ask +questions on the Babel mailing list or IRC channel: + + http://babel.edgewall.org/wiki/MailingList + http://babel.edgewall.org/wiki/IrcChannel diff --git a/babel3/MANIFEST.in b/babel3/MANIFEST.in new file mode 100644 --- /dev/null +++ b/babel3/MANIFEST.in @@ -0,0 +1,4 @@ +include babel/global.dat +include babel/localedata/*.dat +include doc/api/*.* +include doc/*.html diff --git a/babel3/README.txt b/babel3/README.txt new file mode 100644 --- /dev/null +++ b/babel3/README.txt @@ -0,0 +1,12 @@ +About Babel +=========== + +Babel is a Python library that provides an integrated collection of +utilities that assist with internationalizing and localizing Python +applications (in particular web-based applications.) + +Details can be found in the HTML files in the `doc` folder. + +For more information please visit the Babel web site: + + diff --git a/babel3/babel/__init__.py b/babel3/babel/__init__.py new file mode 100644 --- /dev/null +++ b/babel3/babel/__init__.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Integrated collection of utilities that assist in internationalizing and +localizing applications. + +This package is basically composed of two major parts: + + * tools to build and work with ``gettext`` message catalogs + * a Python interface to the CLDR (Common Locale Data Repository), providing + access to various locale display names, localized number and date + formatting, etc. + +:see: http://www.gnu.org/software/gettext/ +:see: http://docs.python.org/lib/module-gettext.html +:see: http://www.unicode.org/cldr/ +""" + +from babel.core import * + +__docformat__ = 'restructuredtext en' +try: + from pkg_resources import get_distribution, ResolutionError + try: + __version__ = get_distribution('Babel').version + except ResolutionError: + __version__ = None # unknown +except ImportError: + __version__ = None # unknown diff --git a/babel3/babel/core.py b/babel3/babel/core.py new file mode 100644 --- /dev/null +++ b/babel3/babel/core.py @@ -0,0 +1,804 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Core locale representation and locale data access.""" + +import os +import pickle + +from babel import localedata + +__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', + 'parse_locale'] +__docformat__ = 'restructuredtext en' + +_global_data = None + +def get_global(key): + """Return the dictionary for the given key in the global data. + + The global data is stored in the ``babel/global.dat`` file and contains + information independent of individual locales. + + >>> get_global('zone_aliases')['UTC'] + 'Etc/GMT' + >>> get_global('zone_territories')['Europe/Berlin'] + 'DE' + + :param key: the data key + :return: the dictionary found in the global data under the given key + :rtype: `dict` + :since: version 0.9 + """ + global _global_data + if _global_data is None: + dirname = os.path.join(os.path.dirname(__file__)) + filename = os.path.join(dirname, 'global.dat') + fileobj = open(filename, 'rb') + try: + _global_data = pickle.load(fileobj) + finally: + fileobj.close() + return _global_data.get(key, {}) + + +LOCALE_ALIASES = { + 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ', + 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', + 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES', + 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT', + 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV', + 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL', + 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI', + 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA' +} + + +class UnknownLocaleError(Exception): + """Exception thrown when a locale is requested for which no locale data + is available. + """ + + def __init__(self, identifier): + """Create the exception. + + :param identifier: the identifier string of the unsupported locale + """ + Exception.__init__(self, 'unknown locale %r' % identifier) + self.identifier = identifier + + +class Locale(object): + """Representation of a specific locale. + + >>> locale = Locale('en', 'US') + >>> repr(locale) + '' + >>> locale.display_name + u'English (United States)' + + A `Locale` object can also be instantiated from a raw locale string: + + >>> locale = Locale.parse('en-US', sep='-') + >>> repr(locale) + '' + + `Locale` objects provide access to a collection of locale data, such as + territory and language names, number and date format patterns, and more: + + >>> locale.number_symbols['decimal'] + u'.' + + If a locale is requested for which no locale data is available, an + `UnknownLocaleError` is raised: + + >>> Locale.parse('en_DE') + Traceback (most recent call last): + ... + UnknownLocaleError: unknown locale 'en_DE' + + :see: `IETF RFC 3066 `_ + """ + + def __init__(self, language, territory=None, script=None, variant=None): + """Initialize the locale object from the given identifier components. + + >>> locale = Locale('en', 'US') + >>> locale.language + 'en' + >>> locale.territory + 'US' + + :param language: the language code + :param territory: the territory (country or region) code + :param script: the script code + :param variant: the variant code + :raise `UnknownLocaleError`: if no locale data is available for the + requested locale + """ + self.language = language + self.territory = territory + self.script = script + self.variant = variant + self.__data = None + + identifier = str(self) + if not localedata.exists(identifier): + raise UnknownLocaleError(identifier) + + def default(cls, category=None, aliases=LOCALE_ALIASES): + """Return the system default locale for the specified category. + + >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: + ... os.environ[name] = '' + >>> os.environ['LANG'] = 'fr_FR.UTF-8' + >>> Locale.default('LC_MESSAGES') + + + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers + :return: the value of the variable, or any of the fallbacks + (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) + :rtype: `Locale` + :see: `default_locale` + """ + return cls(default_locale(category, aliases=aliases)) + default = classmethod(default) + + def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): + """Find the best match between available and requested locale strings. + + >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT']) + + >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de']) + + >>> Locale.negotiate(['de_DE', 'de'], ['en_US']) + + You can specify the character used in the locale identifiers to separate + the differnet components. This separator is applied to both lists. Also, + case is ignored in the comparison: + + >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-') + + + :param preferred: the list of locale identifers preferred by the user + :param available: the list of locale identifiers available + :param aliases: a dictionary of aliases for locale identifiers + :return: the `Locale` object for the best match, or `None` if no match + was found + :rtype: `Locale` + :see: `negotiate_locale` + """ + identifier = negotiate_locale(preferred, available, sep=sep, + aliases=aliases) + if identifier: + return Locale.parse(identifier, sep=sep) + negotiate = classmethod(negotiate) + + def parse(cls, identifier, sep='_'): + """Create a `Locale` instance for the given locale identifier. + + >>> l = Locale.parse('de-DE', sep='-') + >>> l.display_name + u'Deutsch (Deutschland)' + + If the `identifier` parameter is not a string, but actually a `Locale` + object, that object is returned: + + >>> Locale.parse(l) + + + :param identifier: the locale identifier string + :param sep: optional component separator + :return: a corresponding `Locale` instance + :rtype: `Locale` + :raise `ValueError`: if the string does not appear to be a valid locale + identifier + :raise `UnknownLocaleError`: if no locale data is available for the + requested locale + :see: `parse_locale` + """ + if isinstance(identifier, basestring): + return cls(*parse_locale(identifier, sep=sep)) + return identifier + parse = classmethod(parse) + + def __eq__(self, other): + return str(self) == str(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return '' % str(self) + + def __str__(self): + return '_'.join(filter(None, [self.language, self.script, + self.territory, self.variant])) + + def _data(self): + if self.__data is None: + self.__data = localedata.LocaleDataDict(localedata.load(str(self))) + return self.__data + _data = property(_data) + + def get_display_name(self, locale=None): + """Return the display name of the locale using the given locale. + + The display name will include the language, territory, script, and + variant, if those are specified. + + >>> Locale('zh', 'CN', script='Hans').get_display_name('en') + u'Chinese (Simplified Han, China)' + + :param locale: the locale to use + :return: the display name + """ + if locale is None: + locale = self + locale = Locale.parse(locale) + retval = locale.languages.get(self.language) + if self.territory or self.script or self.variant: + details = [] + if self.script: + details.append(locale.scripts.get(self.script)) + if self.territory: + details.append(locale.territories.get(self.territory)) + if self.variant: + details.append(locale.variants.get(self.variant)) + details = filter(None, details) + if details: + retval += ' (%s)' % u', '.join(details) + return retval + + display_name = property(get_display_name, doc="""\ + The localized display name of the locale. + + >>> Locale('en').display_name + u'English' + >>> Locale('en', 'US').display_name + u'English (United States)' + >>> Locale('sv').display_name + u'svenska' + + :type: `unicode` + """) + + def english_name(self): + return self.get_display_name(Locale('en')) + english_name = property(english_name, doc="""\ + The english display name of the locale. + + >>> Locale('de').english_name + u'German' + >>> Locale('de', 'DE').english_name + u'German (Germany)' + + :type: `unicode` + """) + + #{ General Locale Display Names + + def languages(self): + return self._data['languages'] + languages = property(languages, doc="""\ + Mapping of language codes to translated language names. + + >>> Locale('de', 'DE').languages['ja'] + u'Japanisch' + + :type: `dict` + :see: `ISO 639 `_ + """) + + def scripts(self): + return self._data['scripts'] + scripts = property(scripts, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('en', 'US').scripts['Hira'] + u'Hiragana' + + :type: `dict` + :see: `ISO 15924 `_ + """) + + def territories(self): + return self._data['territories'] + territories = property(territories, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('es', 'CO').territories['DE'] + u'Alemania' + + :type: `dict` + :see: `ISO 3166 `_ + """) + + def variants(self): + return self._data['variants'] + variants = property(variants, doc="""\ + Mapping of script codes to translated script names. + + >>> Locale('de', 'DE').variants['1901'] + u'Alte deutsche Rechtschreibung' + + :type: `dict` + """) + + #{ Number Formatting + + def currencies(self): + return self._data['currency_names'] + currencies = property(currencies, doc="""\ + Mapping of currency codes to translated currency names. + + >>> Locale('en').currencies['COP'] + u'Colombian Peso' + >>> Locale('de', 'DE').currencies['COP'] + u'Kolumbianischer Peso' + + :type: `dict` + """) + + def currency_symbols(self): + return self._data['currency_symbols'] + currency_symbols = property(currency_symbols, doc="""\ + Mapping of currency codes to symbols. + + >>> Locale('en', 'US').currency_symbols['USD'] + u'$' + >>> Locale('es', 'CO').currency_symbols['USD'] + u'US$' + + :type: `dict` + """) + + def number_symbols(self): + return self._data['number_symbols'] + number_symbols = property(number_symbols, doc="""\ + Symbols used in number formatting. + + >>> Locale('fr', 'FR').number_symbols['decimal'] + u',' + + :type: `dict` + """) + + def decimal_formats(self): + return self._data['decimal_formats'] + decimal_formats = property(decimal_formats, doc="""\ + Locale patterns for decimal number formatting. + + >>> Locale('en', 'US').decimal_formats[None] + + + :type: `dict` + """) + + def currency_formats(self): + return self._data['currency_formats'] + currency_formats = property(currency_formats, doc=r"""\ + Locale patterns for currency number formatting. + + >>> print Locale('en', 'US').currency_formats[None] + + + :type: `dict` + """) + + def percent_formats(self): + return self._data['percent_formats'] + percent_formats = property(percent_formats, doc="""\ + Locale patterns for percent number formatting. + + >>> Locale('en', 'US').percent_formats[None] + + + :type: `dict` + """) + + def scientific_formats(self): + return self._data['scientific_formats'] + scientific_formats = property(scientific_formats, doc="""\ + Locale patterns for scientific number formatting. + + >>> Locale('en', 'US').scientific_formats[None] + + + :type: `dict` + """) + + #{ Calendar Information and Date Formatting + + def periods(self): + return self._data['periods'] + periods = property(periods, doc="""\ + Locale display names for day periods (AM/PM). + + >>> Locale('en', 'US').periods['am'] + u'AM' + + :type: `dict` + """) + + def days(self): + return self._data['days'] + days = property(days, doc="""\ + Locale display names for weekdays. + + >>> Locale('de', 'DE').days['format']['wide'][3] + u'Donnerstag' + + :type: `dict` + """) + + def months(self): + return self._data['months'] + months = property(months, doc="""\ + Locale display names for months. + + >>> Locale('de', 'DE').months['format']['wide'][10] + u'Oktober' + + :type: `dict` + """) + + def quarters(self): + return self._data['quarters'] + quarters = property(quarters, doc="""\ + Locale display names for quarters. + + >>> Locale('de', 'DE').quarters['format']['wide'][1] + u'1. Quartal' + + :type: `dict` + """) + + def eras(self): + return self._data['eras'] + eras = property(eras, doc="""\ + Locale display names for eras. + + >>> Locale('en', 'US').eras['wide'][1] + u'Anno Domini' + >>> Locale('en', 'US').eras['abbreviated'][0] + u'BC' + + :type: `dict` + """) + + def time_zones(self): + return self._data['time_zones'] + time_zones = property(time_zones, doc="""\ + Locale display names for time zones. + + >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight'] + u'British Summer Time' + >>> Locale('en', 'US').time_zones['America/St_Johns']['city'] + u"St. John's" + + :type: `dict` + """) + + def meta_zones(self): + return self._data['meta_zones'] + meta_zones = property(meta_zones, doc="""\ + Locale display names for meta time zones. + + Meta time zones are basically groups of different Olson time zones that + have the same GMT offset and daylight savings time. + + >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight'] + u'Central European Summer Time' + + :type: `dict` + :since: version 0.9 + """) + + def zone_formats(self): + return self._data['zone_formats'] + zone_formats = property(zone_formats, doc=r"""\ + Patterns related to the formatting of time zones. + + >>> Locale('en', 'US').zone_formats['fallback'] + u'%(1)s (%(0)s)' + >>> Locale('pt', 'BR').zone_formats['region'] + u'Hor\xe1rio %s' + + :type: `dict` + :since: version 0.9 + """) + + def first_week_day(self): + return self._data['week_data']['first_day'] + first_week_day = property(first_week_day, doc="""\ + The first day of a week, with 0 being Monday. + + >>> Locale('de', 'DE').first_week_day + 0 + >>> Locale('en', 'US').first_week_day + 6 + + :type: `int` + """) + + def weekend_start(self): + return self._data['week_data']['weekend_start'] + weekend_start = property(weekend_start, doc="""\ + The day the weekend starts, with 0 being Monday. + + >>> Locale('de', 'DE').weekend_start + 5 + + :type: `int` + """) + + def weekend_end(self): + return self._data['week_data']['weekend_end'] + weekend_end = property(weekend_end, doc="""\ + The day the weekend ends, with 0 being Monday. + + >>> Locale('de', 'DE').weekend_end + 6 + + :type: `int` + """) + + def min_week_days(self): + return self._data['week_data']['min_days'] + min_week_days = property(min_week_days, doc="""\ + The minimum number of days in a week so that the week is counted as the + first week of a year or month. + + >>> Locale('de', 'DE').min_week_days + 4 + + :type: `int` + """) + + def date_formats(self): + return self._data['date_formats'] + date_formats = property(date_formats, doc="""\ + Locale patterns for date formatting. + + >>> Locale('en', 'US').date_formats['short'] + + >>> Locale('fr', 'FR').date_formats['long'] + + + :type: `dict` + """) + + def time_formats(self): + return self._data['time_formats'] + time_formats = property(time_formats, doc="""\ + Locale patterns for time formatting. + + >>> Locale('en', 'US').time_formats['short'] + + >>> Locale('fr', 'FR').time_formats['long'] + + + :type: `dict` + """) + + def datetime_formats(self): + return self._data['datetime_formats'] + datetime_formats = property(datetime_formats, doc="""\ + Locale patterns for datetime formatting. + + >>> Locale('en').datetime_formats['full'] + u'{1} {0}' + >>> Locale('th').datetime_formats['medium'] + u'{1}, {0}' + + :type: `dict` + """) + + def plural_form(self): + return self._data['plural_form'] + plural_form = property(plural_form, doc="""\ + Plural rules for the locale. + + >>> Locale('en').plural_form(1) + 'one' + >>> Locale('en').plural_form(0) + 'other' + >>> Locale('fr').plural_form(0) + 'one' + >>> Locale('ru').plural_form(100) + 'many' + + :type: `PluralRule` + """) + + +def default_locale(category=None, aliases=LOCALE_ALIASES): + """Returns the system default locale for a given category, based on + environment variables. + + >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: + ... os.environ[name] = '' + >>> os.environ['LANG'] = 'fr_FR.UTF-8' + >>> default_locale('LC_MESSAGES') + 'fr_FR' + + The "C" or "POSIX" pseudo-locales are treated as aliases for the + "en_US_POSIX" locale: + + >>> os.environ['LC_MESSAGES'] = 'POSIX' + >>> default_locale('LC_MESSAGES') + 'en_US_POSIX' + + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers + :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, + ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) + :rtype: `str` + """ + varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') + for name in filter(None, varnames): + locale = os.getenv(name) + if locale: + if name == 'LANGUAGE' and ':' in locale: + # the LANGUAGE variable may contain a colon-separated list of + # language codes; we just pick the language on the list + locale = locale.split(':')[0] + if locale in ('C', 'POSIX'): + locale = 'en_US_POSIX' + elif aliases and locale in aliases: + locale = aliases[locale] + return '_'.join(filter(None, parse_locale(locale))) + +def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): + """Find the best match between available and requested locale strings. + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT']) + 'de_DE' + >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de']) + 'de' + + Case is ignored by the algorithm, the result uses the case of the preferred + locale identifier: + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) + 'de_DE' + + >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at']) + 'de_DE' + + By default, some web browsers unfortunately do not include the territory + in the locale identifier for many locales, and some don't even allow the + user to easily add the territory. So while you may prefer using qualified + locale identifiers in your web-application, they would not normally match + the language-only locale sent by such browsers. To workaround that, this + function uses a default mapping of commonly used langauge-only locale + identifiers to identifiers including the territory: + + >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US']) + 'ja_JP' + + Some browsers even use an incorrect or outdated language code, such as "no" + for Norwegian, where the correct locale identifier would actually be "nb_NO" + (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of + such cases, too: + + >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE']) + 'nb_NO' + + You can override this default mapping by passing a different `aliases` + dictionary to this function, or you can bypass the behavior althogher by + setting the `aliases` parameter to `None`. + + :param preferred: the list of locale strings preferred by the user + :param available: the list of locale strings available + :param sep: character that separates the different parts of the locale + strings + :param aliases: a dictionary of aliases for locale identifiers + :return: the locale identifier for the best match, or `None` if no match + was found + :rtype: `str` + """ + available = [a.lower() for a in available if a] + for locale in preferred: + ll = locale.lower() + if ll in available: + return locale + if aliases: + alias = aliases.get(ll) + if alias: + alias = alias.replace('_', sep) + if alias.lower() in available: + return alias + parts = locale.split(sep) + if len(parts) > 1 and parts[0].lower() in available: + return parts[0] + return None + +def parse_locale(identifier, sep='_'): + """Parse a locale identifier into a tuple of the form:: + + ``(language, territory, script, variant)`` + + >>> parse_locale('zh_CN') + ('zh', 'CN', None, None) + >>> parse_locale('zh_Hans_CN') + ('zh', 'CN', 'Hans', None) + + The default component separator is "_", but a different separator can be + specified using the `sep` parameter: + + >>> parse_locale('zh-CN', sep='-') + ('zh', 'CN', None, None) + + If the identifier cannot be parsed into a locale, a `ValueError` exception + is raised: + + >>> parse_locale('not_a_LOCALE_String') + Traceback (most recent call last): + ... + ValueError: 'not_a_LOCALE_String' is not a valid locale identifier + + Encoding information and locale modifiers are removed from the identifier: + + >>> parse_locale('it_IT@euro') + ('it', 'IT', None, None) + >>> parse_locale('en_US.UTF-8') + ('en', 'US', None, None) + >>> parse_locale('de_DE.iso885915@euro') + ('de', 'DE', None, None) + + :param identifier: the locale identifier string + :param sep: character that separates the different components of the locale + identifier + :return: the ``(language, territory, script, variant)`` tuple + :rtype: `tuple` + :raise `ValueError`: if the string does not appear to be a valid locale + identifier + + :see: `IETF RFC 4646 `_ + """ + if '.' in identifier: + # this is probably the charset/encoding, which we don't care about + identifier = identifier.split('.', 1)[0] + if '@' in identifier: + # this is a locale modifier such as @euro, which we don't care about + # either + identifier = identifier.split('@', 1)[0] + + parts = identifier.split(sep) + lang = parts.pop(0).lower() + if not lang.isalpha(): + raise ValueError('expected only letters, got %r' % lang) + + script = territory = variant = None + if parts: + if len(parts[0]) == 4 and parts[0].isalpha(): + script = parts.pop(0).title() + + if parts: + if len(parts[0]) == 2 and parts[0].isalpha(): + territory = parts.pop(0).upper() + elif len(parts[0]) == 3 and parts[0].isdigit(): + territory = parts.pop(0) + + if parts: + if len(parts[0]) == 4 and parts[0][0].isdigit() or \ + len(parts[0]) >= 5 and parts[0][0].isalpha(): + variant = parts.pop() + + if parts: + raise ValueError('%r is not a valid locale identifier' % identifier) + + return lang, territory, script, variant diff --git a/babel3/babel/dates.py b/babel3/babel/dates.py new file mode 100644 --- /dev/null +++ b/babel3/babel/dates.py @@ -0,0 +1,1055 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Locale dependent formatting and parsing of dates and times. + +The default locale for the functions in this module is determined by the +following environment variables, in that order: + + * ``LC_TIME``, + * ``LC_ALL``, and + * ``LANG`` +""" + +from __future__ import division +from datetime import date, datetime, time, timedelta, tzinfo +import re + +from babel.core import default_locale, get_global, Locale +from babel.util import UTC + +__all__ = ['format_date', 'format_datetime', 'format_time', 'format_timedelta', + 'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time'] +__docformat__ = 'restructuredtext en' + +LC_TIME = default_locale('LC_TIME') + +# Aliases for use in scopes where the modules are shadowed by local variables +date_ = date +datetime_ = datetime +time_ = time + +def get_period_names(locale=LC_TIME): + """Return the names for day periods (AM/PM) used by the locale. + + >>> get_period_names(locale='en_US')['am'] + u'AM' + + :param locale: the `Locale` object, or a locale string + :return: the dictionary of period names + :rtype: `dict` + """ + return Locale.parse(locale).periods + +def get_day_names(width='wide', context='format', locale=LC_TIME): + """Return the day names used by the locale for the specified format. + + >>> get_day_names('wide', locale='en_US')[1] + u'Tuesday' + >>> get_day_names('abbreviated', locale='es')[1] + u'mar' + >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1] + u'D' + + :param width: the width to use, one of "wide", "abbreviated", or "narrow" + :param context: the context, either "format" or "stand-alone" + :param locale: the `Locale` object, or a locale string + :return: the dictionary of day names + :rtype: `dict` + """ + return Locale.parse(locale).days[context][width] + +def get_month_names(width='wide', context='format', locale=LC_TIME): + """Return the month names used by the locale for the specified format. + + >>> get_month_names('wide', locale='en_US')[1] + u'January' + >>> get_month_names('abbreviated', locale='es')[1] + u'ene' + >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1] + u'J' + + :param width: the width to use, one of "wide", "abbreviated", or "narrow" + :param context: the context, either "format" or "stand-alone" + :param locale: the `Locale` object, or a locale string + :return: the dictionary of month names + :rtype: `dict` + """ + return Locale.parse(locale).months[context][width] + +def get_quarter_names(width='wide', context='format', locale=LC_TIME): + """Return the quarter names used by the locale for the specified format. + + >>> get_quarter_names('wide', locale='en_US')[1] + u'1st quarter' + >>> get_quarter_names('abbreviated', locale='de_DE')[1] + u'Q1' + + :param width: the width to use, one of "wide", "abbreviated", or "narrow" + :param context: the context, either "format" or "stand-alone" + :param locale: the `Locale` object, or a locale string + :return: the dictionary of quarter names + :rtype: `dict` + """ + return Locale.parse(locale).quarters[context][width] + +def get_era_names(width='wide', locale=LC_TIME): + """Return the era names used by the locale for the specified format. + + >>> get_era_names('wide', locale='en_US')[1] + u'Anno Domini' + >>> get_era_names('abbreviated', locale='de_DE')[1] + u'n. Chr.' + + :param width: the width to use, either "wide", "abbreviated", or "narrow" + :param locale: the `Locale` object, or a locale string + :return: the dictionary of era names + :rtype: `dict` + """ + return Locale.parse(locale).eras[width] + +def get_date_format(format='medium', locale=LC_TIME): + """Return the date formatting patterns used by the locale for the specified + format. + + >>> get_date_format(locale='en_US') + + >>> get_date_format('full', locale='de_DE') + + + :param format: the format to use, one of "full", "long", "medium", or + "short" + :param locale: the `Locale` object, or a locale string + :return: the date format pattern + :rtype: `DateTimePattern` + """ + return Locale.parse(locale).date_formats[format] + +def get_datetime_format(format='medium', locale=LC_TIME): + """Return the datetime formatting patterns used by the locale for the + specified format. + + >>> get_datetime_format(locale='en_US') + u'{1} {0}' + + :param format: the format to use, one of "full", "long", "medium", or + "short" + :param locale: the `Locale` object, or a locale string + :return: the datetime format pattern + :rtype: `unicode` + """ + patterns = Locale.parse(locale).datetime_formats + if format not in patterns: + format = None + return patterns[format] + +def get_time_format(format='medium', locale=LC_TIME): + """Return the time formatting patterns used by the locale for the specified + format. + + >>> get_time_format(locale='en_US') + + >>> get_time_format('full', locale='de_DE') + + + :param format: the format to use, one of "full", "long", "medium", or + "short" + :param locale: the `Locale` object, or a locale string + :return: the time format pattern + :rtype: `DateTimePattern` + """ + return Locale.parse(locale).time_formats[format] + +def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME): + """Return the timezone associated with the given `datetime` object formatted + as string indicating the offset from GMT. + + >>> dt = datetime(2007, 4, 1, 15, 30) + >>> get_timezone_gmt(dt, locale='en') + u'GMT+00:00' + + >>> from pytz import timezone + >>> tz = timezone('America/Los_Angeles') + >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz) + >>> get_timezone_gmt(dt, locale='en') + u'GMT-08:00' + >>> get_timezone_gmt(dt, 'short', locale='en') + u'-0800' + + The long format depends on the locale, for example in France the acronym + UTC string is used instead of GMT: + + >>> get_timezone_gmt(dt, 'long', locale='fr_FR') + u'UTC-08:00' + + :param datetime: the ``datetime`` object; if `None`, the current date and + time in UTC is used + :param width: either "long" or "short" + :param locale: the `Locale` object, or a locale string + :return: the GMT offset representation of the timezone + :rtype: `unicode` + :since: version 0.9 + """ + if datetime is None: + datetime = datetime_.utcnow() + elif isinstance(datetime, (int, long)): + datetime = datetime_.utcfromtimestamp(datetime).time() + if datetime.tzinfo is None: + datetime = datetime.replace(tzinfo=UTC) + locale = Locale.parse(locale) + + offset = datetime.tzinfo.utcoffset(datetime) + seconds = offset.days * 24 * 60 * 60 + offset.seconds + hours, seconds = divmod(seconds, 3600) + if width == 'short': + pattern = u'%+03d%02d' + else: + pattern = locale.zone_formats['gmt'] % '%+03d:%02d' + return pattern % (hours, seconds // 60) + +def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME): + """Return a representation of the given timezone using "location format". + + The result depends on both the local display name of the country and the + city assocaited with the time zone: + + >>> from pytz import timezone + >>> tz = timezone('America/St_Johns') + >>> get_timezone_location(tz, locale='de_DE') + u"Kanada (St. John's)" + >>> tz = timezone('America/Mexico_City') + >>> get_timezone_location(tz, locale='de_DE') + u'Mexiko (Mexiko-Stadt)' + + If the timezone is associated with a country that uses only a single + timezone, just the localized country name is returned: + + >>> tz = timezone('Europe/Berlin') + >>> get_timezone_name(tz, locale='de_DE') + u'Deutschland' + + :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines + the timezone; if `None`, the current date and time in + UTC is assumed + :param locale: the `Locale` object, or a locale string + :return: the localized timezone name using location format + :rtype: `unicode` + :since: version 0.9 + """ + if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)): + dt = None + tzinfo = UTC + elif isinstance(dt_or_tzinfo, (datetime, time)): + dt = dt_or_tzinfo + if dt.tzinfo is not None: + tzinfo = dt.tzinfo + else: + tzinfo = UTC + else: + dt = None + tzinfo = dt_or_tzinfo + locale = Locale.parse(locale) + + if hasattr(tzinfo, 'zone'): + zone = tzinfo.zone + else: + zone = tzinfo.tzname(dt or datetime.utcnow()) + + # Get the canonical time-zone code + zone = get_global('zone_aliases').get(zone, zone) + + info = locale.time_zones.get(zone, {}) + + # Otherwise, if there is only one timezone for the country, return the + # localized country name + region_format = locale.zone_formats['region'] + territory = get_global('zone_territories').get(zone) + if territory not in locale.territories: + territory = 'ZZ' # invalid/unknown + territory_name = locale.territories[territory] + if territory and len(get_global('territory_zones').get(territory, [])) == 1: + return region_format % (territory_name) + + # Otherwise, include the city in the output + fallback_format = locale.zone_formats['fallback'] + if 'city' in info: + city_name = info['city'] + else: + metazone = get_global('meta_zones').get(zone) + metazone_info = locale.meta_zones.get(metazone, {}) + if 'city' in metazone_info: + city_name = metainfo['city'] + elif '/' in zone: + city_name = zone.split('/', 1)[1].replace('_', ' ') + else: + city_name = zone.replace('_', ' ') + + return region_format % (fallback_format % { + '0': city_name, + '1': territory_name + }) + +def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False, + locale=LC_TIME): + r"""Return the localized display name for the given timezone. The timezone + may be specified using a ``datetime`` or `tzinfo` object. + + >>> from pytz import timezone + >>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles')) + >>> get_timezone_name(dt, locale='en_US') + u'Pacific Standard Time' + >>> get_timezone_name(dt, width='short', locale='en_US') + u'PST' + + If this function gets passed only a `tzinfo` object and no concrete + `datetime`, the returned display name is indenpendent of daylight savings + time. This can be used for example for selecting timezones, or to set the + time of events that recur across DST changes: + + >>> tz = timezone('America/Los_Angeles') + >>> get_timezone_name(tz, locale='en_US') + u'Pacific Time' + >>> get_timezone_name(tz, 'short', locale='en_US') + u'PT' + + If no localized display name for the timezone is available, and the timezone + is associated with a country that uses only a single timezone, the name of + that country is returned, formatted according to the locale: + + >>> tz = timezone('Europe/Berlin') + >>> get_timezone_name(tz, locale='de_DE') + u'Deutschland' + >>> get_timezone_name(tz, locale='pt_BR') + u'Hor\xe1rio Alemanha' + + On the other hand, if the country uses multiple timezones, the city is also + included in the representation: + + >>> tz = timezone('America/St_Johns') + >>> get_timezone_name(tz, locale='de_DE') + u"Kanada (St. John's)" + + The `uncommon` parameter can be set to `True` to enable the use of timezone + representations that are not commonly used by the requested locale. For + example, while in French the central European timezone is usually + abbreviated as "HEC", in Canadian French, this abbreviation is not in + common use, so a generic name would be chosen by default: + + >>> tz = timezone('Europe/Paris') + >>> get_timezone_name(tz, 'short', locale='fr_CA') + u'France' + >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA') + u'HEC' + + :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines + the timezone; if a ``tzinfo`` object is used, the + resulting display name will be generic, i.e. + independent of daylight savings time; if `None`, the + current date in UTC is assumed + :param width: either "long" or "short" + :param uncommon: whether even uncommon timezone abbreviations should be used + :param locale: the `Locale` object, or a locale string + :return: the timezone display name + :rtype: `unicode` + :since: version 0.9 + :see: `LDML Appendix J: Time Zone Display Names + `_ + """ + if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)): + dt = None + tzinfo = UTC + elif isinstance(dt_or_tzinfo, (datetime, time)): + dt = dt_or_tzinfo + if dt.tzinfo is not None: + tzinfo = dt.tzinfo + else: + tzinfo = UTC + else: + dt = None + tzinfo = dt_or_tzinfo + locale = Locale.parse(locale) + + if hasattr(tzinfo, 'zone'): + zone = tzinfo.zone + else: + zone = tzinfo.tzname(dt) + + # Get the canonical time-zone code + zone = get_global('zone_aliases').get(zone, zone) + + info = locale.time_zones.get(zone, {}) + # Try explicitly translated zone names first + if width in info: + if dt is None: + field = 'generic' + else: + dst = tzinfo.dst(dt) + if dst is None: + field = 'generic' + elif dst == 0: + field = 'standard' + else: + field = 'daylight' + if field in info[width]: + return info[width][field] + + metazone = get_global('meta_zones').get(zone) + if metazone: + metazone_info = locale.meta_zones.get(metazone, {}) + if width in metazone_info and (uncommon or metazone_info.get('common')): + if dt is None: + field = 'generic' + else: + field = tzinfo.dst(dt) and 'daylight' or 'standard' + if field in metazone_info[width]: + return metazone_info[width][field] + + # If we have a concrete datetime, we assume that the result can't be + # independent of daylight savings time, so we return the GMT offset + if dt is not None: + return get_timezone_gmt(dt, width=width, locale=locale) + + return get_timezone_location(dt_or_tzinfo, locale=locale) + +def format_date(date=None, format='medium', locale=LC_TIME): + """Return a date formatted according to the given pattern. + + >>> d = date(2007, 04, 01) + >>> format_date(d, locale='en_US') + u'Apr 1, 2007' + >>> format_date(d, format='full', locale='de_DE') + u'Sonntag, 1. April 2007' + + If you don't want to use the locale default formats, you can specify a + custom date pattern: + + >>> format_date(d, "EEE, MMM d, ''yy", locale='en') + u"Sun, Apr 1, '07" + + :param date: the ``date`` or ``datetime`` object; if `None`, the current + date is used + :param format: one of "full", "long", "medium", or "short", or a custom + date/time pattern + :param locale: a `Locale` object or a locale identifier + :rtype: `unicode` + + :note: If the pattern contains time fields, an `AttributeError` will be + raised when trying to apply the formatting. This is also true if + the value of ``date`` parameter is actually a ``datetime`` object, + as this function automatically converts that to a ``date``. + """ + if date is None: + date = date_.today() + elif isinstance(date, datetime): + date = date.date() + + locale = Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + format = get_date_format(format, locale=locale) + pattern = parse_pattern(format) + return parse_pattern(format).apply(date, locale) + +def format_datetime(datetime=None, format='medium', tzinfo=None, + locale=LC_TIME): + r"""Return a date formatted according to the given pattern. + + >>> dt = datetime(2007, 04, 01, 15, 30) + >>> format_datetime(dt, locale='en_US') + u'Apr 1, 2007 3:30:00 PM' + + For any pattern requiring the display of the time-zone, the third-party + ``pytz`` package is needed to explicitly specify the time-zone: + + >>> from pytz import timezone + >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'), + ... locale='fr_FR') + u'dimanche 1 avril 2007 17:30:00 Heure avanc\xe9e de l\u2019Europe centrale' + >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz", + ... tzinfo=timezone('US/Eastern'), locale='en') + u'2007.04.01 AD at 11:30:00 EDT' + + :param datetime: the `datetime` object; if `None`, the current date and + time is used + :param format: one of "full", "long", "medium", or "short", or a custom + date/time pattern + :param tzinfo: the timezone to apply to the time for display + :param locale: a `Locale` object or a locale identifier + :rtype: `unicode` + """ + if datetime is None: + datetime = datetime_.utcnow() + elif isinstance(datetime, (int, long)): + datetime = datetime_.utcfromtimestamp(datetime) + elif isinstance(datetime, time): + datetime = datetime_.combine(date.today(), datetime) + if datetime.tzinfo is None: + datetime = datetime.replace(tzinfo=UTC) + if tzinfo is not None: + datetime = datetime.astimezone(tzinfo) + if hasattr(tzinfo, 'normalize'): # pytz + datetime = tzinfo.normalize(datetime) + + locale = Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + return get_datetime_format(format, locale=locale) \ + .replace('{0}', format_time(datetime, format, tzinfo=None, + locale=locale)) \ + .replace('{1}', format_date(datetime, format, locale=locale)) + else: + return parse_pattern(format).apply(datetime, locale) + +def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME): + r"""Return a time formatted according to the given pattern. + + >>> t = time(15, 30) + >>> format_time(t, locale='en_US') + u'3:30:00 PM' + >>> format_time(t, format='short', locale='de_DE') + u'15:30' + + If you don't want to use the locale default formats, you can specify a + custom time pattern: + + >>> format_time(t, "hh 'o''clock' a", locale='en') + u"03 o'clock PM" + + For any pattern requiring the display of the time-zone, the third-party + ``pytz`` package is needed to explicitly specify the time-zone: + + >>> from pytz import timezone + >>> t = datetime(2007, 4, 1, 15, 30) + >>> tzinfo = timezone('Europe/Paris') + >>> t = tzinfo.localize(t) + >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR') + u'15:30:00 Heure avanc\xe9e de l\u2019Europe centrale' + >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'), + ... locale='en') + u"09 o'clock AM, Eastern Daylight Time" + + As that example shows, when this function gets passed a + ``datetime.datetime`` value, the actual time in the formatted string is + adjusted to the timezone specified by the `tzinfo` parameter. If the + ``datetime`` is "naive" (i.e. it has no associated timezone information), + it is assumed to be in UTC. + + These timezone calculations are **not** performed if the value is of type + ``datetime.time``, as without date information there's no way to determine + what a given time would translate to in a different timezone without + information about whether daylight savings time is in effect or not. This + means that time values are left as-is, and the value of the `tzinfo` + parameter is only used to display the timezone name if needed: + + >>> t = time(15, 30) + >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'), + ... locale='fr_FR') + u'15:30:00 Heure normale de l\u2019Europe centrale' + >>> format_time(t, format='full', tzinfo=timezone('US/Eastern'), + ... locale='en_US') + u'3:30:00 PM Eastern Standard Time' + + :param time: the ``time`` or ``datetime`` object; if `None`, the current + time in UTC is used + :param format: one of "full", "long", "medium", or "short", or a custom + date/time pattern + :param tzinfo: the time-zone to apply to the time for display + :param locale: a `Locale` object or a locale identifier + :rtype: `unicode` + + :note: If the pattern contains date fields, an `AttributeError` will be + raised when trying to apply the formatting. This is also true if + the value of ``time`` parameter is actually a ``datetime`` object, + as this function automatically converts that to a ``time``. + """ + if time is None: + time = datetime.utcnow() + elif isinstance(time, (int, long)): + time = datetime.utcfromtimestamp(time) + if time.tzinfo is None: + time = time.replace(tzinfo=UTC) + if isinstance(time, datetime): + if tzinfo is not None: + time = time.astimezone(tzinfo) + if hasattr(tzinfo, 'normalize'): # pytz + time = tzinfo.normalize(time) + time = time.timetz() + elif tzinfo is not None: + time = time.replace(tzinfo=tzinfo) + + locale = Locale.parse(locale) + if format in ('full', 'long', 'medium', 'short'): + format = get_time_format(format, locale=locale) + return parse_pattern(format).apply(time, locale) + +TIMEDELTA_UNITS = ( + ('year', 3600 * 24 * 365), + ('month', 3600 * 24 * 30), + ('week', 3600 * 24 * 7), + ('day', 3600 * 24), + ('hour', 3600), + ('minute', 60), + ('second', 1) +) + +def format_timedelta(delta, granularity='second', threshold=.85, locale=LC_TIME): + """Return a time delta according to the rules of the given locale. + + >>> format_timedelta(timedelta(weeks=12), locale='en_US') + u'3 mths' + >>> format_timedelta(timedelta(seconds=1), locale='es') + u'1 s' + + The granularity parameter can be provided to alter the lowest unit + presented, which defaults to a second. + + >>> format_timedelta(timedelta(hours=3), granularity='day', + ... locale='en_US') + u'1 day' + + The threshold parameter can be used to determine at which value the + presentation switches to the next higher unit. A higher threshold factor + means the presentation will switch later. For example: + + >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US') + u'1 day' + >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US') + u'23 hrs' + + :param delta: a ``timedelta`` object representing the time difference to + format, or the delta in seconds as an `int` value + :param granularity: determines the smallest unit that should be displayed, + the value can be one of "year", "month", "week", "day", + "hour", "minute" or "second" + :param threshold: factor that determines at which point the presentation + switches to the next higher unit + :param locale: a `Locale` object or a locale identifier + :rtype: `unicode` + """ + if isinstance(delta, timedelta): + seconds = int((delta.days * 86400) + delta.seconds) + else: + seconds = delta + locale = Locale.parse(locale) + + for unit, secs_per_unit in TIMEDELTA_UNITS: + value = abs(seconds) / secs_per_unit + if value >= threshold or unit == granularity: + if unit == granularity and value > 0: + value = max(1, value) + value = int(round(value)) + plural_form = locale.plural_form(value) + pattern = locale._data['unit_patterns'][unit][plural_form] + return pattern.replace('{0}', str(value)) + + return u'' + +def parse_date(string, locale=LC_TIME): + """Parse a date from a string. + + This function uses the date format for the locale as a hint to determine + the order in which the date fields appear in the string. + + >>> parse_date('4/1/04', locale='en_US') + datetime.date(2004, 4, 1) + >>> parse_date('01.04.2004', locale='de_DE') + datetime.date(2004, 4, 1) + + :param string: the string containing the date + :param locale: a `Locale` object or a locale identifier + :return: the parsed date + :rtype: `date` + """ + # TODO: try ISO format first? + format = get_date_format(locale=locale).pattern.lower() + year_idx = format.index('y') + month_idx = format.index('m') + if month_idx < 0: + month_idx = format.index('l') + day_idx = format.index('d') + + indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')] + indexes.sort() + indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)]) + + # FIXME: this currently only supports numbers, but should also support month + # names, both in the requested locale, and english + + numbers = re.findall('(\d+)', string) + year = numbers[indexes['Y']] + if len(year) == 2: + year = 2000 + int(year) + else: + year = int(year) + month = int(numbers[indexes['M']]) + day = int(numbers[indexes['D']]) + if month > 12: + month, day = day, month + return date(year, month, day) + +def parse_datetime(string, locale=LC_TIME): + """Parse a date and time from a string. + + This function uses the date and time formats for the locale as a hint to + determine the order in which the time fields appear in the string. + + :param string: the string containing the date and time + :param locale: a `Locale` object or a locale identifier + :return: the parsed date/time + :rtype: `datetime` + """ + raise NotImplementedError + +def parse_time(string, locale=LC_TIME): + """Parse a time from a string. + + This function uses the time format for the locale as a hint to determine + the order in which the time fields appear in the string. + + >>> parse_time('15:30:00', locale='en_US') + datetime.time(15, 30) + + :param string: the string containing the time + :param locale: a `Locale` object or a locale identifier + :return: the parsed time + :rtype: `time` + """ + # TODO: try ISO format first? + format = get_time_format(locale=locale).pattern.lower() + hour_idx = format.index('h') + if hour_idx < 0: + hour_idx = format.index('k') + min_idx = format.index('m') + sec_idx = format.index('s') + + indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')] + indexes.sort() + indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)]) + + # FIXME: support 12 hour clock, and 0-based hour specification + # and seconds should be optional, maybe minutes too + # oh, and time-zones, of course + + numbers = re.findall('(\d+)', string) + hour = int(numbers[indexes['H']]) + minute = int(numbers[indexes['M']]) + second = int(numbers[indexes['S']]) + return time(hour, minute, second) + + +class DateTimePattern(object): + + def __init__(self, pattern, format): + self.pattern = pattern + self.format = format + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.pattern) + + def __unicode__(self): + return self.pattern + + def __mod__(self, other): + if type(other) is not DateTimeFormat: + return NotImplemented + return self.format % other + + def apply(self, datetime, locale): + return self % DateTimeFormat(datetime, locale) + + +class DateTimeFormat(object): + + def __init__(self, value, locale): + assert isinstance(value, (date, datetime, time)) + if isinstance(value, (datetime, time)) and value.tzinfo is None: + value = value.replace(tzinfo=UTC) + self.value = value + self.locale = Locale.parse(locale) + + def __getitem__(self, name): + char = name[0] + num = len(name) + if char == 'G': + return self.format_era(char, num) + elif char in ('y', 'Y', 'u'): + return self.format_year(char, num) + elif char in ('Q', 'q'): + return self.format_quarter(char, num) + elif char in ('M', 'L'): + return self.format_month(char, num) + elif char in ('w', 'W'): + return self.format_week(char, num) + elif char == 'd': + return self.format(self.value.day, num) + elif char == 'D': + return self.format_day_of_year(num) + elif char == 'F': + return self.format_day_of_week_in_month() + elif char in ('E', 'e', 'c'): + return self.format_weekday(char, num) + elif char == 'a': + return self.format_period(char) + elif char == 'h': + if self.value.hour % 12 == 0: + return self.format(12, num) + else: + return self.format(self.value.hour % 12, num) + elif char == 'H': + return self.format(self.value.hour, num) + elif char == 'K': + return self.format(self.value.hour % 12, num) + elif char == 'k': + if self.value.hour == 0: + return self.format(24, num) + else: + return self.format(self.value.hour, num) + elif char == 'm': + return self.format(self.value.minute, num) + elif char == 's': + return self.format(self.value.second, num) + elif char == 'S': + return self.format_frac_seconds(num) + elif char == 'A': + return self.format_milliseconds_in_day(num) + elif char in ('z', 'Z', 'v', 'V'): + return self.format_timezone(char, num) + else: + raise KeyError('Unsupported date/time field %r' % char) + + def format_era(self, char, num): + width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)] + era = int(self.value.year >= 0) + return get_era_names(width, self.locale)[era] + + def format_year(self, char, num): + value = self.value.year + if char.isupper(): + week = self.get_week_number(self.get_day_of_year()) + if week == 0: + value -= 1 + year = self.format(value, num) + if num == 2: + year = year[-2:] + return year + + def format_quarter(self, char, num): + quarter = (self.value.month - 1) // 3 + 1 + if num <= 2: + return ('%%0%dd' % num) % quarter + width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] + context = {'Q': 'format', 'q': 'stand-alone'}[char] + return get_quarter_names(width, context, self.locale)[quarter] + + def format_month(self, char, num): + if num <= 2: + return ('%%0%dd' % num) % self.value.month + width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] + context = {'M': 'format', 'L': 'stand-alone'}[char] + return get_month_names(width, context, self.locale)[self.value.month] + + def format_week(self, char, num): + if char.islower(): # week of year + day_of_year = self.get_day_of_year() + week = self.get_week_number(day_of_year) + if week == 0: + date = self.value - timedelta(days=day_of_year) + week = self.get_week_number(self.get_day_of_year(date), + date.weekday()) + return self.format(week, num) + else: # week of month + week = self.get_week_number(self.value.day) + if week == 0: + date = self.value - timedelta(days=self.value.day) + week = self.get_week_number(date.day, date.weekday()) + pass + return '%d' % week + + def format_weekday(self, char, num): + if num < 3: + if char.islower(): + value = 7 - self.locale.first_week_day + self.value.weekday() + return self.format(value % 7 + 1, num) + num = 3 + weekday = self.value.weekday() + width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] + context = {3: 'format', 4: 'format', 5: 'stand-alone'}[num] + return get_day_names(width, context, self.locale)[weekday] + + def format_day_of_year(self, num): + return self.format(self.get_day_of_year(), num) + + def format_day_of_week_in_month(self): + return '%d' % ((self.value.day - 1) // 7 + 1) + + def format_period(self, char): + period = {0: 'am', 1: 'pm'}[int(self.value.hour >= 12)] + return get_period_names(locale=self.locale)[period] + + def format_frac_seconds(self, num): + value = str(self.value.microsecond) + return self.format(round(float('.%s' % value), num) * 10**num, num) + + def format_milliseconds_in_day(self, num): + msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \ + self.value.minute * 60000 + self.value.hour * 3600000 + return self.format(msecs, num) + + def format_timezone(self, char, num): + width = {3: 'short', 4: 'long'}[max(3, num)] + if char == 'z': + return get_timezone_name(self.value, width, locale=self.locale) + elif char == 'Z': + return get_timezone_gmt(self.value, width, locale=self.locale) + elif char == 'v': + return get_timezone_name(self.value.tzinfo, width, + locale=self.locale) + elif char == 'V': + if num == 1: + return get_timezone_name(self.value.tzinfo, width, + uncommon=True, locale=self.locale) + return get_timezone_location(self.value.tzinfo, locale=self.locale) + + def format(self, value, length): + return ('%%0%dd' % length) % value + + def get_day_of_year(self, date=None): + if date is None: + date = self.value + return (date - date_(date.year, 1, 1)).days + 1 + + def get_week_number(self, day_of_period, day_of_week=None): + """Return the number of the week of a day within a period. This may be + the week number in a year or the week number in a month. + + Usually this will return a value equal to or greater than 1, but if the + first week of the period is so short that it actually counts as the last + week of the previous period, this function will return 0. + + >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('de_DE')) + >>> format.get_week_number(6) + 1 + + >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('en_US')) + >>> format.get_week_number(6) + 2 + + :param day_of_period: the number of the day in the period (usually + either the day of month or the day of year) + :param day_of_week: the week day; if ommitted, the week day of the + current date is assumed + """ + if day_of_week is None: + day_of_week = self.value.weekday() + first_day = (day_of_week - self.locale.first_week_day - + day_of_period + 1) % 7 + if first_day < 0: + first_day += 7 + week_number = (day_of_period + first_day - 1) // 7 + if 7 - first_day >= self.locale.min_week_days: + week_number += 1 + return week_number + + +PATTERN_CHARS = { + 'G': [1, 2, 3, 4, 5], # era + 'y': None, 'Y': None, 'u': None, # year + 'Q': [1, 2, 3, 4], 'q': [1, 2, 3, 4], # quarter + 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month + 'w': [1, 2], 'W': [1], # week + 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day + 'E': [1, 2, 3, 4, 5], 'e': [1, 2, 3, 4, 5], 'c': [1, 3, 4, 5], # week day + 'a': [1], # period + 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour + 'm': [1, 2], # minute + 's': [1, 2], 'S': None, 'A': None, # second + 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4] # zone +} + +def parse_pattern(pattern): + """Parse date, time, and datetime format patterns. + + >>> parse_pattern("MMMMd").format + u'%(MMMM)s%(d)s' + >>> parse_pattern("MMM d, yyyy").format + u'%(MMM)s %(d)s, %(yyyy)s' + + Pattern can contain literal strings in single quotes: + + >>> parse_pattern("H:mm' Uhr 'z").format + u'%(H)s:%(mm)s Uhr %(z)s' + + An actual single quote can be used by using two adjacent single quote + characters: + + >>> parse_pattern("hh' o''clock'").format + u"%(hh)s o'clock" + + :param pattern: the formatting pattern to parse + """ + if type(pattern) is DateTimePattern: + return pattern + + result = [] + quotebuf = None + charbuf = [] + fieldchar = [''] + fieldnum = [0] + + def append_chars(): + result.append(''.join(charbuf).replace('%', '%%')) + del charbuf[:] + + def append_field(): + limit = PATTERN_CHARS[fieldchar[0]] + if limit and fieldnum[0] not in limit: + raise ValueError('Invalid length for field: %r' + % (fieldchar[0] * fieldnum[0])) + result.append('%%(%s)s' % (fieldchar[0] * fieldnum[0])) + fieldchar[0] = '' + fieldnum[0] = 0 + + for idx, char in enumerate(pattern.replace("''", '\0')): + if quotebuf is None: + if char == "'": # quote started + if fieldchar[0]: + append_field() + elif charbuf: + append_chars() + quotebuf = [] + elif char in PATTERN_CHARS: + if charbuf: + append_chars() + if char == fieldchar[0]: + fieldnum[0] += 1 + else: + if fieldchar[0]: + append_field() + fieldchar[0] = char + fieldnum[0] = 1 + else: + if fieldchar[0]: + append_field() + charbuf.append(char) + + elif quotebuf is not None: + if char == "'": # end of quote + charbuf.extend(quotebuf) + quotebuf = None + else: # inside quote + quotebuf.append(char) + + if fieldchar[0]: + append_field() + elif charbuf: + append_chars() + + return DateTimePattern(pattern, u''.join(result).replace('\0', "'")) diff --git a/babel3/babel/localedata.py b/babel3/babel/localedata.py new file mode 100644 --- /dev/null +++ b/babel3/babel/localedata.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Low-level locale data access. + +:note: The `Locale` class, which uses this module under the hood, provides a + more convenient interface for accessing the locale data. +""" + +import os +import pickle +try: + import threading +except ImportError: + import dummy_threading as threading +from UserDict import DictMixin + +__all__ = ['exists', 'list', 'load'] +__docformat__ = 'restructuredtext en' + +_cache = {} +_cache_lock = threading.RLock() +_dirname = os.path.join(os.path.dirname(__file__), 'localedata') + + +def exists(name): + """Check whether locale data is available for the given locale. + + :param name: the locale identifier string + :return: `True` if the locale data exists, `False` otherwise + :rtype: `bool` + """ + if name in _cache: + return True + return os.path.exists(os.path.join(_dirname, '%s.dat' % name)) + + +def list(): + """Return a list of all locale identifiers for which locale data is + available. + + :return: a list of locale identifiers (strings) + :rtype: `list` + :since: version 0.8.1 + """ + return [stem for stem, extension in [ + os.path.splitext(filename) for filename in os.listdir(_dirname) + ] if extension == '.dat' and stem != 'root'] + + +def load(name, merge_inherited=True): + """Load the locale data for the given locale. + + The locale data is a dictionary that contains much of the data defined by + the Common Locale Data Repository (CLDR). This data is stored as a + collection of pickle files inside the ``babel`` package. + + >>> d = load('en_US') + >>> d['languages']['sv'] + u'Swedish' + + Note that the results are cached, and subsequent requests for the same + locale return the same dictionary: + + >>> d1 = load('en_US') + >>> d2 = load('en_US') + >>> d1 is d2 + True + + :param name: the locale identifier string (or "root") + :param merge_inherited: whether the inherited data should be merged into + the data of the requested locale + :return: the locale data + :rtype: `dict` + :raise `IOError`: if no locale data file is found for the given locale + identifer, or one of the locales it inherits from + """ + _cache_lock.acquire() + try: + data = _cache.get(name) + if not data: + # Load inherited data + if name == 'root' or not merge_inherited: + data = {} + else: + parts = name.split('_') + if len(parts) == 1: + parent = 'root' + else: + parent = '_'.join(parts[:-1]) + data = load(parent).copy() + filename = os.path.join(_dirname, '%s.dat' % name) + fileobj = open(filename, 'rb') + try: + if name != 'root' and merge_inherited: + merge(data, pickle.load(fileobj)) + else: + data = pickle.load(fileobj) + _cache[name] = data + finally: + fileobj.close() + return data + finally: + _cache_lock.release() + + +def merge(dict1, dict2): + """Merge the data from `dict2` into the `dict1` dictionary, making copies + of nested dictionaries. + + >>> d = {1: 'foo', 3: 'baz'} + >>> merge(d, {1: 'Foo', 2: 'Bar'}) + >>> items = d.items(); items.sort(); items + [(1, 'Foo'), (2, 'Bar'), (3, 'baz')] + + :param dict1: the dictionary to merge into + :param dict2: the dictionary containing the data that should be merged + """ + for key, val2 in dict2.items(): + if val2 is not None: + val1 = dict1.get(key) + if isinstance(val2, dict): + if val1 is None: + val1 = {} + if isinstance(val1, Alias): + val1 = (val1, val2) + elif isinstance(val1, tuple): + alias, others = val1 + others = others.copy() + merge(others, val2) + val1 = (alias, others) + else: + val1 = val1.copy() + merge(val1, val2) + else: + val1 = val2 + dict1[key] = val1 + + +class Alias(object): + """Representation of an alias in the locale data. + + An alias is a value that refers to some other part of the locale data, + as specified by the `keys`. + """ + + def __init__(self, keys): + self.keys = tuple(keys) + + def __repr__(self): + return '<%s %r>' % (type(self).__name__, self.keys) + + def resolve(self, data): + """Resolve the alias based on the given data. + + This is done recursively, so if one alias resolves to a second alias, + that second alias will also be resolved. + + :param data: the locale data + :type data: `dict` + """ + base = data + for key in self.keys: + data = data[key] + if isinstance(data, Alias): + data = data.resolve(base) + elif isinstance(data, tuple): + alias, others = data + data = alias.resolve(base) + return data + + +class LocaleDataDict(DictMixin, dict): + """Dictionary wrapper that automatically resolves aliases to the actual + values. + """ + + def __init__(self, data, base=None): + dict.__init__(self, data) + if base is None: + base = data + self.base = base + + def __getitem__(self, key): + orig = val = dict.__getitem__(self, key) + if isinstance(val, Alias): # resolve an alias + val = val.resolve(self.base) + if isinstance(val, tuple): # Merge a partial dict with an alias + alias, others = val + val = alias.resolve(self.base).copy() + merge(val, others) + if type(val) is dict: # Return a nested alias-resolving dict + val = LocaleDataDict(val, base=self.base) + if val is not orig: + self[key] = val + return val + + def copy(self): + return LocaleDataDict(dict.copy(self), base=self.base) diff --git a/babel3/babel/messages/__init__.py b/babel3/babel/messages/__init__.py new file mode 100644 --- /dev/null +++ b/babel3/babel/messages/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Support for ``gettext`` message catalogs.""" + +from babel.messages.catalog import * diff --git a/babel3/babel/messages/catalog.py b/babel3/babel/messages/catalog.py new file mode 100644 --- /dev/null +++ b/babel3/babel/messages/catalog.py @@ -0,0 +1,802 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Data structures for message catalogs.""" + +from cgi import parse_header +from datetime import datetime +from difflib import get_close_matches +from email import message_from_string +from copy import copy +import re +import time + +from babel import __version__ as VERSION +from babel.core import Locale +from babel.dates import format_datetime +from babel.messages.plurals import get_plural +from babel.util import odict, distinct, set, LOCALTZ, UTC, FixedOffsetTimezone + +__all__ = ['Message', 'Catalog', 'TranslationError'] +__docformat__ = 'restructuredtext en' + + +PYTHON_FORMAT = re.compile(r'''(?x) + \% + (?:\(([\w]*)\))? + ( + [-#0\ +]?(?:\*|[\d]+)? + (?:\.(?:\*|[\d]+))? + [hlL]? + ) + ([diouxXeEfFgGcrs%]) +''') + + +class Message(object): + """Representation of a single message in a catalog.""" + + def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), + user_comments=(), previous_id=(), lineno=None, context=None): + """Create the message object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filenname, lineno)`` tuples + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments for the message + :param user_comments: a sequence of user comments for the message + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + """ + self.id = id #: The message ID + if not string and self.pluralizable: + string = (u'', u'') + self.string = string #: The message translation + self.locations = list(distinct(locations)) + self.flags = set(flags) + if id and self.python_format: + self.flags.add('python-format') + else: + self.flags.discard('python-format') + self.auto_comments = list(distinct(auto_comments)) + self.user_comments = list(distinct(user_comments)) + if isinstance(previous_id, basestring): + self.previous_id = [previous_id] + else: + self.previous_id = list(previous_id) + self.lineno = lineno + self.context = context + + def __repr__(self): + return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, + list(self.flags)) + + def __cmp__(self, obj): + """Compare Messages, taking into account plural ids""" + if isinstance(obj, Message): + plural = self.pluralizable + obj_plural = obj.pluralizable + if plural and obj_plural: + return cmp(self.id[0], obj.id[0]) + elif plural: + return cmp(self.id[0], obj.id) + elif obj_plural: + return cmp(self.id, obj.id[0]) + return cmp(self.id, obj.id) + + def clone(self): + return Message(*map(copy, (self.id, self.string, self.locations, + self.flags, self.auto_comments, + self.user_comments, self.previous_id, + self.lineno, self.context))) + + def check(self, catalog=None): + """Run various validation checks on the message. Some validations + are only performed if the catalog is provided. This method returns + a sequence of `TranslationError` objects. + + :rtype: ``iterator`` + :param catalog: A catalog instance that is passed to the checkers + :see: `Catalog.check` for a way to perform checks for all messages + in a catalog. + """ + from babel.messages.checkers import checkers + errors = [] + for checker in checkers: + try: + checker(catalog, self) + except TranslationError, e: + errors.append(e) + return errors + + def fuzzy(self): + return 'fuzzy' in self.flags + fuzzy = property(fuzzy, doc="""\ + Whether the translation is fuzzy. + + >>> Message('foo').fuzzy + False + >>> msg = Message('foo', 'foo', flags=['fuzzy']) + >>> msg.fuzzy + True + >>> msg + + + :type: `bool` + """) + + def pluralizable(self): + return isinstance(self.id, (list, tuple)) + pluralizable = property(pluralizable, doc="""\ + Whether the message is plurizable. + + >>> Message('foo').pluralizable + False + >>> Message(('foo', 'bar')).pluralizable + True + + :type: `bool` + """) + + def python_format(self): + ids = self.id + if not isinstance(ids, (list, tuple)): + ids = [ids] + return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids])) + python_format = property(python_format, doc="""\ + Whether the message contains Python-style parameters. + + >>> Message('foo %(name)s bar').python_format + True + >>> Message(('foo %(name)s', 'foo %(name)s')).python_format + True + + :type: `bool` + """) + + +class TranslationError(Exception): + """Exception thrown by translation checkers when invalid message + translations are encountered.""" + + +DEFAULT_HEADER = u"""\ +# Translations template for PROJECT. +# Copyright (C) YEAR ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , YEAR. +#""" + + +class Catalog(object): + """Representation of a message catalog.""" + + def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, + project=None, version=None, copyright_holder=None, + msgid_bugs_address=None, creation_date=None, + revision_date=None, last_translator=None, language_team=None, + charset='utf-8', fuzzy=True): + """Initialize the catalog object. + + :param locale: the locale identifier or `Locale` object, or `None` + if the catalog is not bound to a locale (which basically + means it's a template) + :param domain: the message domain + :param header_comment: the header comment as string, or `None` for the + default header + :param project: the project's name + :param version: the project's version + :param copyright_holder: the copyright holder of the catalog + :param msgid_bugs_address: the email address or URL to submit bug + reports to + :param creation_date: the date the catalog was created + :param revision_date: the date the catalog was revised + :param last_translator: the name and email of the last translator + :param language_team: the name and email of the language team + :param charset: the encoding to use in the output + :param fuzzy: the fuzzy bit on the catalog header + """ + self.domain = domain #: The message domain + if locale: + locale = Locale.parse(locale) + self.locale = locale #: The locale or `None` + self._header_comment = header_comment + self._messages = odict() + + self.project = project or 'PROJECT' #: The project name + self.version = version or 'VERSION' #: The project version + self.copyright_holder = copyright_holder or 'ORGANIZATION' + self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' + + self.last_translator = last_translator or 'FULL NAME ' + """Name and email address of the last translator.""" + self.language_team = language_team or 'LANGUAGE ' + """Name and email address of the language team.""" + + self.charset = charset or 'utf-8' + + if creation_date is None: + creation_date = datetime.now(LOCALTZ) + elif isinstance(creation_date, datetime) and not creation_date.tzinfo: + creation_date = creation_date.replace(tzinfo=LOCALTZ) + self.creation_date = creation_date #: Creation date of the template + if revision_date is None: + revision_date = datetime.now(LOCALTZ) + elif isinstance(revision_date, datetime) and not revision_date.tzinfo: + revision_date = revision_date.replace(tzinfo=LOCALTZ) + self.revision_date = revision_date #: Last revision date of the catalog + self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`) + + self.obsolete = odict() #: Dictionary of obsolete messages + self._num_plurals = None + self._plural_expr = None + + def _get_header_comment(self): + comment = self._header_comment + comment = comment.replace('PROJECT', self.project) \ + .replace('VERSION', self.version) \ + .replace('YEAR', self.revision_date.strftime('%Y')) \ + .replace('ORGANIZATION', self.copyright_holder) + if self.locale: + comment = comment.replace('Translations template', '%s translations' + % self.locale.english_name) + return comment + + def _set_header_comment(self, string): + self._header_comment = string + + header_comment = property(_get_header_comment, _set_header_comment, doc="""\ + The header comment for the catalog. + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> print catalog.header_comment #doctest: +ELLIPSIS + # Translations template for Foobar. + # Copyright (C) ... Foo Company + # This file is distributed under the same license as the Foobar project. + # FIRST AUTHOR , .... + # + + The header can also be set from a string. Any known upper-case variables + will be replaced when the header is retrieved again: + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> catalog.header_comment = '''\\ + ... # The POT for my really cool PROJECT project. + ... # Copyright (C) 1990-2003 ORGANIZATION + ... # This file is distributed under the same license as the PROJECT + ... # project. + ... #''' + >>> print catalog.header_comment + # The POT for my really cool Foobar project. + # Copyright (C) 1990-2003 Foo Company + # This file is distributed under the same license as the Foobar + # project. + # + + :type: `unicode` + """) + + def _get_mime_headers(self): + headers = [] + headers.append(('Project-Id-Version', + '%s %s' % (self.project, self.version))) + headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) + headers.append(('POT-Creation-Date', + format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', + locale='en'))) + if self.locale is None: + headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE')) + headers.append(('Last-Translator', 'FULL NAME ')) + headers.append(('Language-Team', 'LANGUAGE ')) + else: + headers.append(('PO-Revision-Date', + format_datetime(self.revision_date, + 'yyyy-MM-dd HH:mmZ', locale='en'))) + headers.append(('Last-Translator', self.last_translator)) + headers.append(('Language-Team', + self.language_team.replace('LANGUAGE', + str(self.locale)))) + headers.append(('Plural-Forms', self.plural_forms)) + headers.append(('MIME-Version', '1.0')) + headers.append(('Content-Type', + 'text/plain; charset=%s' % self.charset)) + headers.append(('Content-Transfer-Encoding', '8bit')) + headers.append(('Generated-By', 'Babel %s\n' % VERSION)) + return headers + + def _set_mime_headers(self, headers): + for name, value in headers: + if name.lower() == 'content-type': + mimetype, params = parse_header(value) + if 'charset' in params: + self.charset = params['charset'].lower() + break + for name, value in headers: + name = name.lower().decode(self.charset) + value = value.decode(self.charset) + if name == 'project-id-version': + parts = value.split(' ') + self.project = u' '.join(parts[:-1]) + self.version = parts[-1] + elif name == 'report-msgid-bugs-to': + self.msgid_bugs_address = value + elif name == 'last-translator': + self.last_translator = value + elif name == 'language-team': + self.language_team = value + elif name == 'plural-forms': + _, params = parse_header(' ;' + value) + self._num_plurals = int(params.get('nplurals', 2)) + self._plural_expr = params.get('plural', '(n != 1)') + elif name == 'pot-creation-date': + # FIXME: this should use dates.parse_datetime as soon as that + # is ready + value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) + + tt = time.strptime(value, '%Y-%m-%d %H:%M') + ts = time.mktime(tt) + + # Separate the offset into a sign component, hours, and minutes + plus_minus_s, rest = tzoffset[0], tzoffset[1:] + hours_offset_s, mins_offset_s = rest[:2], rest[2:] + + # Make them all integers + plus_minus = int(plus_minus_s + '1') + hours_offset = int(hours_offset_s) + mins_offset = int(mins_offset_s) + + # Calculate net offset + net_mins_offset = hours_offset * 60 + net_mins_offset += mins_offset + net_mins_offset *= plus_minus + + # Create an offset object + tzoffset = FixedOffsetTimezone(net_mins_offset) + + # Store the offset in a datetime object + dt = datetime.fromtimestamp(ts) + self.creation_date = dt.replace(tzinfo=tzoffset) + elif name == 'po-revision-date': + # Keep the value if it's not the default one + if 'YEAR' not in value: + # FIXME: this should use dates.parse_datetime as soon as + # that is ready + value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) + tt = time.strptime(value, '%Y-%m-%d %H:%M') + ts = time.mktime(tt) + + # Separate the offset into a sign component, hours, and + # minutes + plus_minus_s, rest = tzoffset[0], tzoffset[1:] + hours_offset_s, mins_offset_s = rest[:2], rest[2:] + + # Make them all integers + plus_minus = int(plus_minus_s + '1') + hours_offset = int(hours_offset_s) + mins_offset = int(mins_offset_s) + + # Calculate net offset + net_mins_offset = hours_offset * 60 + net_mins_offset += mins_offset + net_mins_offset *= plus_minus + + # Create an offset object + tzoffset = FixedOffsetTimezone(net_mins_offset) + + # Store the offset in a datetime object + dt = datetime.fromtimestamp(ts) + self.revision_date = dt.replace(tzinfo=tzoffset) + + mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ + The MIME headers of the catalog, used for the special ``msgid ""`` entry. + + The behavior of this property changes slightly depending on whether a locale + is set or not, the latter indicating that the catalog is actually a template + for actual translations. + + Here's an example of the output for such a catalog template: + + >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) + >>> catalog = Catalog(project='Foobar', version='1.0', + ... creation_date=created) + >>> for name, value in catalog.mime_headers: + ... print '%s: %s' % (name, value) + Project-Id-Version: Foobar 1.0 + Report-Msgid-Bugs-To: EMAIL@ADDRESS + POT-Creation-Date: 1990-04-01 15:30+0000 + PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE + Last-Translator: FULL NAME + Language-Team: LANGUAGE + MIME-Version: 1.0 + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + Generated-By: Babel ... + + And here's an example of the output when the locale is set: + + >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) + >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', + ... creation_date=created, revision_date=revised, + ... last_translator='John Doe ', + ... language_team='de_DE ') + >>> for name, value in catalog.mime_headers: + ... print '%s: %s' % (name, value) + Project-Id-Version: Foobar 1.0 + Report-Msgid-Bugs-To: EMAIL@ADDRESS + POT-Creation-Date: 1990-04-01 15:30+0000 + PO-Revision-Date: 1990-08-03 12:00+0000 + Last-Translator: John Doe + Language-Team: de_DE + Plural-Forms: nplurals=2; plural=(n != 1) + MIME-Version: 1.0 + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + Generated-By: Babel ... + + :type: `list` + """) + + def num_plurals(self): + if self._num_plurals is None: + num = 2 + if self.locale: + num = get_plural(self.locale)[0] + self._num_plurals = num + return self._num_plurals + num_plurals = property(num_plurals, doc="""\ + The number of plurals used by the catalog or locale. + + >>> Catalog(locale='en').num_plurals + 2 + >>> Catalog(locale='ga').num_plurals + 3 + + :type: `int` + """) + + def plural_expr(self): + if self._plural_expr is None: + expr = '(n != 1)' + if self.locale: + expr = get_plural(self.locale)[1] + self._plural_expr = expr + return self._plural_expr + plural_expr = property(plural_expr, doc="""\ + The plural expression used by the catalog or locale. + + >>> Catalog(locale='en').plural_expr + '(n != 1)' + >>> Catalog(locale='ga').plural_expr + '(n==1 ? 0 : n==2 ? 1 : 2)' + + :type: `basestring` + """) + + def plural_forms(self): + return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) + plural_forms = property(plural_forms, doc="""\ + Return the plural forms declaration for the locale. + + >>> Catalog(locale='en').plural_forms + 'nplurals=2; plural=(n != 1)' + >>> Catalog(locale='pt_BR').plural_forms + 'nplurals=2; plural=(n > 1)' + + :type: `str` + """) + + def __contains__(self, id): + """Return whether the catalog has a message with the specified ID.""" + return self._key_for(id) in self._messages + + def __len__(self): + """The number of messages in the catalog. + + This does not include the special ``msgid ""`` entry. + """ + return len(self._messages) + + def __iter__(self): + """Iterates through all the entries in the catalog, in the order they + were added, yielding a `Message` object for every entry. + + :rtype: ``iterator`` + """ + buf = [] + for name, value in self.mime_headers: + buf.append('%s: %s' % (name, value)) + flags = set() + if self.fuzzy: + flags |= set(['fuzzy']) + yield Message(u'', '\n'.join(buf), flags=flags) + for key in self._messages: + yield self._messages[key] + + def __repr__(self): + locale = '' + if self.locale: + locale = ' %s' % self.locale + return '<%s %r%s>' % (type(self).__name__, self.domain, locale) + + def __delitem__(self, id): + """Delete the message with the specified ID.""" + self.delete(id) + + def __getitem__(self, id): + """Return the message with the specified ID. + + :param id: the message ID + :return: the message with the specified ID, or `None` if no such + message is in the catalog + :rtype: `Message` + """ + return self.get(id) + + def __setitem__(self, id, message): + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo') + >>> catalog[u'foo'] + + + If a message with that ID is already in the catalog, it is updated + to include the locations and flags of the new message. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) + >>> catalog[u'foo'].locations + [('main.py', 1)] + >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) + >>> catalog[u'foo'].locations + [('main.py', 1), ('utils.py', 5)] + + :param id: the message ID + :param message: the `Message` object + """ + assert isinstance(message, Message), 'expected a Message object' + key = self._key_for(id, message.context) + current = self._messages.get(key) + if current: + if message.pluralizable and not current.pluralizable: + # The new message adds pluralization + current.id = message.id + current.string = message.string + current.locations = list(distinct(current.locations + + message.locations)) + current.auto_comments = list(distinct(current.auto_comments + + message.auto_comments)) + current.user_comments = list(distinct(current.user_comments + + message.user_comments)) + current.flags |= message.flags + message = current + elif id == '': + # special treatment for the header message + headers = message_from_string(message.string.encode(self.charset)) + self.mime_headers = headers.items() + self.header_comment = '\n'.join(['# %s' % comment for comment + in message.user_comments]) + self.fuzzy = message.fuzzy + else: + if isinstance(id, (list, tuple)): + assert isinstance(message.string, (list, tuple)), \ + 'Expected sequence but got %s' % type(message.string) + self._messages[key] = message + + def add(self, id, string=None, locations=(), flags=(), auto_comments=(), + user_comments=(), previous_id=(), lineno=None, context=None): + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog.add(u'foo') + >>> catalog[u'foo'] + + + This method simply constructs a `Message` object with the given + arguments and invokes `__setitem__` with that object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filenname, lineno)`` tuples + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments + :param user_comments: a sequence of user comments + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + """ + self[id] = Message(id, string, list(locations), flags, auto_comments, + user_comments, previous_id, lineno=lineno, + context=context) + + def check(self): + """Run various validation checks on the translations in the catalog. + + For every message which fails validation, this method yield a + ``(message, errors)`` tuple, where ``message`` is the `Message` object + and ``errors`` is a sequence of `TranslationError` objects. + + :rtype: ``iterator`` + """ + for message in self._messages.values(): + errors = message.check(catalog=self) + if errors: + yield message, errors + + def get(self, id, context=None): + """Return the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + :return: the message with the specified ID, or `None` if no such + message is in the catalog + :rtype: `Message` + """ + return self._messages.get(self._key_for(id, context)) + + def delete(self, id, context=None): + """Delete the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + """ + key = self._key_for(id, context) + if key in self._messages: + del self._messages[key] + + def update(self, template, no_fuzzy_matching=False): + """Update the catalog based on the given template catalog. + + >>> from babel.messages import Catalog + >>> template = Catalog() + >>> template.add('green', locations=[('main.py', 99)]) + >>> template.add('blue', locations=[('main.py', 100)]) + >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) + >>> catalog = Catalog(locale='de_DE') + >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) + >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) + >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), + ... locations=[('util.py', 38)]) + + >>> catalog.update(template) + >>> len(catalog) + 3 + + >>> msg1 = catalog['green'] + >>> msg1.string + >>> msg1.locations + [('main.py', 99)] + + >>> msg2 = catalog['blue'] + >>> msg2.string + u'blau' + >>> msg2.locations + [('main.py', 100)] + + >>> msg3 = catalog['salad'] + >>> msg3.string + (u'Salat', u'Salate') + >>> msg3.locations + [('util.py', 42)] + + Messages that are in the catalog but not in the template are removed + from the main collection, but can still be accessed via the `obsolete` + member: + + >>> 'head' in catalog + False + >>> catalog.obsolete.values() + [] + + :param template: the reference catalog, usually read from a POT file + :param no_fuzzy_matching: whether to use fuzzy matching of message IDs + """ + messages = self._messages + remaining = messages.copy() + self._messages = odict() + + # Prepare for fuzzy matching + fuzzy_candidates = [] + if not no_fuzzy_matching: + fuzzy_candidates = dict([ + (self._key_for(msgid), messages[msgid].context) + for msgid in messages if msgid and messages[msgid].string + ]) + fuzzy_matches = set() + + def _merge(message, oldkey, newkey): + message = message.clone() + fuzzy = False + if oldkey != newkey: + fuzzy = True + fuzzy_matches.add(oldkey) + oldmsg = messages.get(oldkey) + if isinstance(oldmsg.id, basestring): + message.previous_id = [oldmsg.id] + else: + message.previous_id = list(oldmsg.id) + else: + oldmsg = remaining.pop(oldkey, None) + message.string = oldmsg.string + if isinstance(message.id, (list, tuple)): + if not isinstance(message.string, (list, tuple)): + fuzzy = True + message.string = tuple( + [message.string] + ([u''] * (len(message.id) - 1)) + ) + elif len(message.string) != self.num_plurals: + fuzzy = True + message.string = tuple(message.string[:len(oldmsg.string)]) + elif isinstance(message.string, (list, tuple)): + fuzzy = True + message.string = message.string[0] + message.flags |= oldmsg.flags + if fuzzy: + message.flags |= set([u'fuzzy']) + self[message.id] = message + + for message in template: + if message.id: + key = self._key_for(message.id, message.context) + if key in messages: + _merge(message, key, key) + else: + if no_fuzzy_matching is False: + # do some fuzzy matching with difflib + if isinstance(key, tuple): + matchkey = key[0] # just the msgid, no context + else: + matchkey = key + matches = get_close_matches(matchkey.lower().strip(), + fuzzy_candidates.keys(), 1) + if matches: + newkey = matches[0] + newctxt = fuzzy_candidates[newkey] + if newctxt is not None: + newkey = newkey, newctxt + _merge(message, newkey, key) + continue + + self[message.id] = message + + self.obsolete = odict() + for msgid in remaining: + if no_fuzzy_matching or msgid not in fuzzy_matches: + self.obsolete[msgid] = remaining[msgid] + # Make updated catalog's POT-Creation-Date equal to the template + # used to update the catalog + self.creation_date = template.creation_date + + def _key_for(self, id, context=None): + """The key for a message is just the singular ID even for pluralizable + messages, but is a ``(msgid, msgctxt)`` tuple for context-specific + messages. + """ + key = id + if isinstance(key, (list, tuple)): + key = id[0] + if context is not None: + key = (key, context) + return key diff --git a/babel3/babel/messages/checkers.py b/babel3/babel/messages/checkers.py new file mode 100644 --- /dev/null +++ b/babel3/babel/messages/checkers.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Various routines that help with validation of translations. + +:since: version 0.9 +""" + +from itertools import izip +from babel.messages.catalog import TranslationError, PYTHON_FORMAT +from babel.util import set + +#: list of format chars that are compatible to each other +_string_format_compatibilities = [ + set(['i', 'd', 'u']), + set(['x', 'X']), + set(['f', 'F', 'g', 'G']) +] + + +def num_plurals(catalog, message): + """Verify the number of plurals in the translation.""" + if not message.pluralizable: + if not isinstance(message.string, basestring): + raise TranslationError("Found plural forms for non-pluralizable " + "message") + return + + # skip further tests if no catalog is provided. + elif catalog is None: + return + + msgstrs = message.string + if not isinstance(msgstrs, (list, tuple)): + msgstrs = (msgstrs,) + if len(msgstrs) != catalog.num_plurals: + raise TranslationError("Wrong number of plural forms (expected %d)" % + catalog.num_plurals) + + +def python_format(catalog, message): + """Verify the format string placeholders in the translation.""" + if 'python-format' not in message.flags: + return + msgids = message.id + if not isinstance(msgids, (list, tuple)): + msgids = (msgids,) + msgstrs = message.string + if not isinstance(msgstrs, (list, tuple)): + msgstrs = (msgstrs,) + + for msgid, msgstr in izip(msgids, msgstrs): + if msgstr: + _validate_format(msgid, msgstr) + + +def _validate_format(format, alternative): + """Test format string `alternative` against `format`. `format` can be the + msgid of a message and `alternative` one of the `msgstr`\s. The two + arguments are not interchangeable as `alternative` may contain less + placeholders if `format` uses named placeholders. + + The behavior of this function is undefined if the string does not use + string formattings. + + If the string formatting of `alternative` is compatible to `format` the + function returns `None`, otherwise a `TranslationError` is raised. + + Examples for compatible format strings: + + >>> _validate_format('Hello %s!', 'Hallo %s!') + >>> _validate_format('Hello %i!', 'Hallo %d!') + + Example for an incompatible format strings: + + >>> _validate_format('Hello %(name)s!', 'Hallo %s!') + Traceback (most recent call last): + ... + TranslationError: the format strings are of different kinds + + This function is used by the `python_format` checker. + + :param format: The original format string + :param alternative: The alternative format string that should be checked + against format + :return: None on success + :raises TranslationError: on formatting errors + """ + + def _parse(string): + result = [] + for match in PYTHON_FORMAT.finditer(string): + name, format, typechar = match.groups() + if typechar == '%' and name is None: + continue + result.append((name, str(typechar))) + return result + + def _compatible(a, b): + if a == b: + return True + for set in _string_format_compatibilities: + if a in set and b in set: + return True + return False + + def _check_positional(results): + positional = None + for name, char in results: + if positional is None: + positional = name is None + else: + if (name is None) != positional: + raise TranslationError('format string mixes positional ' + 'and named placeholders') + return bool(positional) + + a, b = map(_parse, (format, alternative)) + + # now check if both strings are positional or named + a_positional, b_positional = map(_check_positional, (a, b)) + if a_positional and not b_positional and not b: + raise TranslationError('placeholders are incompatible') + elif a_positional != b_positional: + raise TranslationError('the format strings are of different kinds') + + # if we are operating on positional strings both must have the + # same number of format chars and those must be compatible + if a_positional: + if len(a) != len(b): + raise TranslationError('positional format placeholders are ' + 'unbalanced') + for idx, ((_, first), (_, second)) in enumerate(izip(a, b)): + if not _compatible(first, second): + raise TranslationError('incompatible format for placeholder ' + '%d: %r and %r are not compatible' % + (idx + 1, first, second)) + + # otherwise the second string must not have names the first one + # doesn't have and the types of those included must be compatible + else: + type_map = dict(a) + for name, typechar in b: + if name not in type_map: + raise TranslationError('unknown named placeholder %r' % name) + elif not _compatible(typechar, type_map[name]): + raise TranslationError('incompatible format for ' + 'placeholder %r: ' + '%r and %r are not compatible' % + (name, typechar, type_map[name])) + + +def _find_checkers(): + try: + from pkg_resources import working_set + except ImportError: + return [num_plurals, python_format] + checkers = [] + for entry_point in working_set.iter_entry_points('babel.checkers'): + checkers.append(entry_point.load()) + return checkers + + +checkers = _find_checkers() diff --git a/babel3/babel/messages/extract.py b/babel3/babel/messages/extract.py new file mode 100644 --- /dev/null +++ b/babel3/babel/messages/extract.py @@ -0,0 +1,550 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Basic infrastructure for extracting localizable messages from source files. + +This module defines an extensible system for collecting localizable message +strings from a variety of sources. A native extractor for Python source files +is builtin, extractors for other sources can be added using very simple plugins. + +The main entry points into the extraction functionality are the functions +`extract_from_dir` and `extract_from_file`. +""" + +import os +import sys +from tokenize import generate_tokens, COMMENT, NAME, OP, STRING + +from babel.util import parse_encoding, pathmatch, relpath, set +from textwrap import dedent + +__all__ = ['extract', 'extract_from_dir', 'extract_from_file'] +__docformat__ = 'restructuredtext en' + +GROUP_NAME = 'babel.extractors' + +DEFAULT_KEYWORDS = { + '_': None, + 'gettext': None, + 'ngettext': (1, 2), + 'ugettext': None, + 'ungettext': (1, 2), + 'dgettext': (2,), + 'dngettext': (2, 3), + 'N_': None +} + +DEFAULT_MAPPING = [('**.py', 'python')] + +empty_msgid_warning = ( +'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' +'returns the header entry with meta information, not the empty string.') + + +def _strip_comment_tags(comments, tags): + """Helper function for `extract` that strips comment tags from strings + in a list of comment lines. This functions operates in-place. + """ + def _strip(line): + for tag in tags: + if line.startswith(tag): + return line[len(tag):].strip() + return line + comments[:] = map(_strip, comments) + + +def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, + options_map=None, keywords=DEFAULT_KEYWORDS, + comment_tags=(), callback=None, strip_comment_tags=False): + """Extract messages from any source files found in the given directory. + + This function generates tuples of the form: + + ``(filename, lineno, message, comments)`` + + Which extraction method is used per file is determined by the `method_map` + parameter, which maps extended glob patterns to extraction method names. + For example, the following is the default mapping: + + >>> method_map = [ + ... ('**.py', 'python') + ... ] + + This basically says that files with the filename extension ".py" at any + level inside the directory should be processed by the "python" extraction + method. Files that don't match any of the mapping patterns are ignored. See + the documentation of the `pathmatch` function for details on the pattern + syntax. + + The following extended mapping would also use the "genshi" extraction + method on any file in "templates" subdirectory: + + >>> method_map = [ + ... ('**/templates/**.*', 'genshi'), + ... ('**.py', 'python') + ... ] + + The dictionary provided by the optional `options_map` parameter augments + these mappings. It uses extended glob patterns as keys, and the values are + dictionaries mapping options names to option values (both strings). + + The glob patterns of the `options_map` do not necessarily need to be the + same as those used in the method mapping. For example, while all files in + the ``templates`` folders in an application may be Genshi applications, the + options for those files may differ based on extension: + + >>> options_map = { + ... '**/templates/**.txt': { + ... 'template_class': 'genshi.template:TextTemplate', + ... 'encoding': 'latin-1' + ... }, + ... '**/templates/**.html': { + ... 'include_attrs': '' + ... } + ... } + + :param dirname: the path to the directory to extract messages from + :param method_map: a list of ``(pattern, method)`` tuples that maps of + extraction method names to extended glob patterns + :param options_map: a dictionary of additional options (optional) + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of tags of translator comments to search for + and include in the results + :param callback: a function that is called for every file that message are + extracted from, just before the extraction itself is + performed; the function is passed the filename, the name + of the extraction method and and the options dictionary as + positional arguments, in that order + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :return: an iterator over ``(filename, lineno, funcname, message)`` tuples + :rtype: ``iterator`` + :see: `pathmatch` + """ + if options_map is None: + options_map = {} + + absname = os.path.abspath(dirname) + for root, dirnames, filenames in os.walk(absname): + for subdir in dirnames: + if subdir.startswith('.') or subdir.startswith('_'): + dirnames.remove(subdir) + dirnames.sort() + filenames.sort() + for filename in filenames: + filename = relpath( + os.path.join(root, filename).replace(os.sep, '/'), + dirname + ) + for pattern, method in method_map: + if pathmatch(pattern, filename): + filepath = os.path.join(absname, filename) + options = {} + for opattern, odict in options_map.items(): + if pathmatch(opattern, filename): + options = odict + if callback: + callback(filename, method, options) + for lineno, message, comments in \ + extract_from_file(method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options, + strip_comment_tags= + strip_comment_tags): + yield filename, lineno, message, comments + break + + +def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, + comment_tags=(), options=None, strip_comment_tags=False): + """Extract messages from a specific file. + + This function returns a list of tuples of the form: + + ``(lineno, funcname, message)`` + + :param filename: the path to the file to extract messages from + :param method: a string specifying the extraction method (.e.g. "python") + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :param options: a dictionary of additional options (optional) + :return: the list of extracted messages + :rtype: `list` + """ + fileobj = open(filename, 'U') + try: + return list(extract(method, fileobj, keywords, comment_tags, options, + strip_comment_tags)) + finally: + fileobj.close() + + +def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), + options=None, strip_comment_tags=False): + """Extract messages from the given file-like object using the specified + extraction method. + + This function returns a list of tuples of the form: + + ``(lineno, message, comments)`` + + The implementation dispatches the actual extraction to plugins, based on the + value of the ``method`` parameter. + + >>> source = '''# foo module + ... def run(argv): + ... print _('Hello, world!') + ... ''' + + >>> from StringIO import StringIO + >>> for message in extract('python', StringIO(source)): + ... print message + (3, u'Hello, world!', []) + + :param method: a string specifying the extraction method (.e.g. "python"); + if this is a simple name, the extraction function will be + looked up by entry point; if it is an explicit reference + to a function (of the form ``package.module:funcname`` or + ``package.module.funcname``), the corresponding function + will be imported and used + :param fileobj: the file-like object the messages should be extracted from + :param keywords: a dictionary mapping keywords (i.e. names of functions + that should be recognized as translation functions) to + tuples that specify which of their arguments contain + localizable strings + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. + :return: the list of extracted messages + :rtype: `list` + :raise ValueError: if the extraction method is not registered + """ + func = None + if ':' in method or '.' in method: + if ':' not in method: + lastdot = method.rfind('.') + module, attrname = method[:lastdot], method[lastdot + 1:] + else: + module, attrname = method.split(':', 1) + func = getattr(__import__(module, {}, {}, [attrname]), attrname) + else: + try: + from pkg_resources import working_set + except ImportError: + # pkg_resources is not available, so we resort to looking up the + # builtin extractors directly + builtin = {'ignore': extract_nothing, 'python': extract_python} + func = builtin.get(method) + else: + for entry_point in working_set.iter_entry_points(GROUP_NAME, + method): + func = entry_point.load(require=True) + break + if func is None: + raise ValueError('Unknown extraction method %r' % method) + + results = func(fileobj, keywords.keys(), comment_tags, + options=options or {}) + + for lineno, funcname, messages, comments in results: + if funcname: + spec = keywords[funcname] or (1,) + else: + spec = (1,) + if not isinstance(messages, (list, tuple)): + messages = [messages] + if not messages: + continue + + # Validate the messages against the keyword's specification + msgs = [] + invalid = False + # last_index is 1 based like the keyword spec + last_index = len(messages) + for index in spec: + if last_index < index: + # Not enough arguments + invalid = True + break + message = messages[index - 1] + if message is None: + invalid = True + break + msgs.append(message) + if invalid: + continue + + first_msg_index = spec[0] - 1 + if not messages[first_msg_index]: + # An empty string msgid isn't valid, emit a warning + where = '%s:%i' % (hasattr(fileobj, 'name') and \ + fileobj.name or '(unknown)', lineno) + print >> sys.stderr, empty_msgid_warning % where + continue + + messages = tuple(msgs) + if len(messages) == 1: + messages = messages[0] + + if strip_comment_tags: + _strip_comment_tags(comments, comment_tags) + yield lineno, messages, comments + + +def extract_nothing(fileobj, keywords, comment_tags, options): + """Pseudo extractor that does not actually extract anything, but simply + returns an empty list. + """ + return [] + + +def extract_python(fileobj, keywords, comment_tags, options): + """Extract messages from Python source code. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + funcname = lineno = message_lineno = None + call_stack = -1 + buf = [] + messages = [] + translator_comments = [] + in_def = in_translator_comments = False + comment_tag = None + + encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') + + tokens = generate_tokens(fileobj.readline) + for tok, value, (lineno, _), _, _ in tokens: + if call_stack == -1 and tok == NAME and value in ('def', 'class'): + in_def = True + elif tok == OP and value == '(': + if in_def: + # Avoid false positives for declarations such as: + # def gettext(arg='message'): + in_def = False + continue + if funcname: + message_lineno = lineno + call_stack += 1 + elif in_def and tok == OP and value == ':': + # End of a class definition without parens + in_def = False + continue + elif call_stack == -1 and tok == COMMENT: + # Strip the comment token from the line + value = value.decode(encoding)[1:].strip() + if in_translator_comments and \ + translator_comments[-1][0] == lineno - 1: + # We're already inside a translator comment, continue appending + translator_comments.append((lineno, value)) + continue + # If execution reaches this point, let's see if comment line + # starts with one of the comment tags + for comment_tag in comment_tags: + if value.startswith(comment_tag): + in_translator_comments = True + translator_comments.append((lineno, value)) + break + elif funcname and call_stack == 0: + if tok == OP and value == ')': + if buf: + messages.append(''.join(buf)) + del buf[:] + else: + messages.append(None) + + if len(messages) > 1: + messages = tuple(messages) + else: + messages = messages[0] + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = lineno = message_lineno = None + call_stack = -1 + messages = [] + translator_comments = [] + in_translator_comments = False + elif tok == STRING: + # Unwrap quotes in a safe manner, maintaining the string's + # encoding + # https://sourceforge.net/tracker/?func=detail&atid=355470& + # aid=617979&group_id=5470 + value = eval('# coding=%s\n%s' % (encoding, value), + {'__builtins__':{}}, {}) + if isinstance(value, str): + value = value.decode(encoding) + buf.append(value) + elif tok == OP and value == ',': + if buf: + messages.append(''.join(buf)) + del buf[:] + else: + messages.append(None) + if translator_comments: + # We have translator comments, and since we're on a + # comma(,) user is allowed to break into a new line + # Let's increase the last comment's lineno in order + # for the comment to still be a valid one + old_lineno, old_comment = translator_comments.pop() + translator_comments.append((old_lineno+1, old_comment)) + elif call_stack > 0 and tok == OP and value == ')': + call_stack -= 1 + elif funcname and call_stack == -1: + funcname = None + elif tok == NAME and value in keywords: + funcname = value + + +def extract_javascript(fileobj, keywords, comment_tags, options): + """Extract messages from JavaScript source code. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + from babel.messages.jslexer import tokenize, unquote_string + funcname = message_lineno = None + messages = [] + last_argument = None + translator_comments = [] + concatenate_next = False + encoding = options.get('encoding', 'utf-8') + last_token = None + call_stack = -1 + + for token in tokenize(fileobj.read().decode(encoding)): + if token.type == 'operator' and token.value == '(': + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == 'linecomment': + value = token.value[2:].strip() + if translator_comments and \ + translator_comments[-1][0] == token.lineno - 1: + translator_comments.append((token.lineno, value)) + continue + + for comment_tag in comment_tags: + if value.startswith(comment_tag): + translator_comments.append((token.lineno, value.strip())) + break + + elif token.type == 'multilinecomment': + # only one multi-line comment may preceed a translation + translator_comments = [] + value = token.value[2:-2].strip() + for comment_tag in comment_tags: + if value.startswith(comment_tag): + lines = value.splitlines() + if lines: + lines[0] = lines[0].strip() + lines[1:] = dedent('\n'.join(lines[1:])).splitlines() + for offset, line in enumerate(lines): + translator_comments.append((token.lineno + offset, + line)) + break + + elif funcname and call_stack == 0: + if token.type == 'operator' and token.value == ')': + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + # Comments don't apply unless they immediately precede the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + if messages is not None: + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = message_lineno = last_argument = None + concatenate_next = False + translator_comments = [] + messages = [] + call_stack = -1 + + elif token.type == 'string': + new_value = unquote_string(token.value) + if concatenate_next: + last_argument = (last_argument or '') + new_value + concatenate_next = False + else: + last_argument = new_value + + elif token.type == 'operator': + if token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + concatenate_next = False + elif token.value == '+': + concatenate_next = True + + elif call_stack > 0 and token.type == 'operator' \ + and token.value == ')': + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif call_stack == -1 and token.type == 'name' and \ + token.value in keywords and \ + (last_token is None or last_token.type != 'name' or + last_token.value != 'function'): + funcname = token.value + + last_token = token diff --git a/babel3/babel/messages/frontend.py b/babel3/babel/messages/frontend.py new file mode 100755 --- /dev/null +++ b/babel3/babel/messages/frontend.py @@ -0,0 +1,1201 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007-2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""Frontends for the message extraction functionality.""" + +from ConfigParser import RawConfigParser +from datetime import datetime +from distutils import log +from distutils.cmd import Command +from distutils.errors import DistutilsOptionError, DistutilsSetupError +from locale import getpreferredencoding +import logging +from optparse import OptionParser +import os +import re +import shutil +from StringIO import StringIO +import sys +import tempfile + +from babel import __version__ as VERSION +from babel import Locale, localedata +from babel.core import UnknownLocaleError +from babel.messages.catalog import Catalog +from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS, \ + DEFAULT_MAPPING +from babel.messages.mofile import write_mo +from babel.messages.pofile import read_po, write_po +from babel.messages.plurals import PLURALS +from babel.util import odict, LOCALTZ + +__all__ = ['CommandLineInterface', 'compile_catalog', 'extract_messages', + 'init_catalog', 'check_message_extractors', 'update_catalog'] +__docformat__ = 'restructuredtext en' + + +class compile_catalog(Command): + """Catalog compilation command for use in ``setup.py`` scripts. + + If correctly installed, this command is available to Setuptools-using + setup scripts automatically. For projects using plain old ``distutils``, + the command needs to be registered explicitly in ``setup.py``:: + + from babel.messages.frontend import compile_catalog + + setup( + ... + cmdclass = {'compile_catalog': compile_catalog} + ) + + :since: version 0.9 + :see: `Integrating new distutils commands `_ + :see: `setuptools `_ + """ + + description = 'compile message catalogs to binary MO files' + user_options = [ + ('domain=', 'D', + "domain of PO file (default 'messages')"), + ('directory=', 'd', + 'path to base directory containing the catalogs'), + ('input-file=', 'i', + 'name of the input file'), + ('output-file=', 'o', + "name of the output file (default " + "'//LC_MESSAGES/.po')"), + ('locale=', 'l', + 'locale of the catalog to compile'), + ('use-fuzzy', 'f', + 'also include fuzzy translations'), + ('statistics', None, + 'print statistics about translations') + ] + boolean_options = ['use-fuzzy', 'statistics'] + + def initialize_options(self): + self.domain = 'messages' + self.directory = None + self.input_file = None + self.output_file = None + self.locale = None + self.use_fuzzy = False + self.statistics = False + + def finalize_options(self): + if not self.input_file and not self.directory: + raise DistutilsOptionError('you must specify either the input file ' + 'or the base directory') + if not self.output_file and not self.directory: + raise DistutilsOptionError('you must specify either the input file ' + 'or the base directory') + + def run(self): + po_files = [] + mo_files = [] + + if not self.input_file: + if self.locale: + po_files.append((self.locale, + os.path.join(self.directory, self.locale, + 'LC_MESSAGES', + self.domain + '.po'))) + mo_files.append(os.path.join(self.directory, self.locale, + 'LC_MESSAGES', + self.domain + '.mo')) + else: + for locale in os.listdir(self.directory): + po_file = os.path.join(self.directory, locale, + 'LC_MESSAGES', self.domain + '.po') + if os.path.exists(po_file): + po_files.append((locale, po_file)) + mo_files.append(os.path.join(self.directory, locale, + 'LC_MESSAGES', + self.domain + '.mo')) + else: + po_files.append((self.locale, self.input_file)) + if self.output_file: + mo_files.append(self.output_file) + else: + mo_files.append(os.path.join(self.directory, self.locale, + 'LC_MESSAGES', + self.domain + '.mo')) + + if not po_files: + raise DistutilsOptionError('no message catalogs found') + + for idx, (locale, po_file) in enumerate(po_files): + mo_file = mo_files[idx] + infile = open(po_file, 'r') + try: + catalog = read_po(infile, locale) + finally: + infile.close() + + if self.statistics: + translated = 0 + for message in list(catalog)[1:]: + if message.string: + translated +=1 + percentage = 0 + if len(catalog): + percentage = translated * 100 // len(catalog) + log.info('%d of %d messages (%d%%) translated in %r', + translated, len(catalog), percentage, po_file) + + if catalog.fuzzy and not self.use_fuzzy: + log.warn('catalog %r is marked as fuzzy, skipping', po_file) + continue + + for message, errors in catalog.check(): + for error in errors: + log.error('error: %s:%d: %s', po_file, message.lineno, + error) + + log.info('compiling catalog %r to %r', po_file, mo_file) + + outfile = open(mo_file, 'wb') + try: + write_mo(outfile, catalog, use_fuzzy=self.use_fuzzy) + finally: + outfile.close() + + +class extract_messages(Command): + """Message extraction command for use in ``setup.py`` scripts. + + If correctly installed, this command is available to Setuptools-using + setup scripts automatically. For projects using plain old ``distutils``, + the command needs to be registered explicitly in ``setup.py``:: + + from babel.messages.frontend import extract_messages + + setup( + ... + cmdclass = {'extract_messages': extract_messages} + ) + + :see: `Integrating new distutils commands `_ + :see: `setuptools `_ + """ + + description = 'extract localizable strings from the project code' + user_options = [ + ('charset=', None, + 'charset to use in the output file'), + ('keywords=', 'k', + 'space-separated list of keywords to look for in addition to the ' + 'defaults'), + ('no-default-keywords', None, + 'do not include the default keywords'), + ('mapping-file=', 'F', + 'path to the mapping configuration file'), + ('no-location', None, + 'do not include location comments with filename and line number'), + ('omit-header', None, + 'do not include msgid "" entry in header'), + ('output-file=', 'o', + 'name of the output file'), + ('width=', 'w', + 'set output line width (default 76)'), + ('no-wrap', None, + 'do not break long message lines, longer than the output line width, ' + 'into several lines'), + ('sort-output', None, + 'generate sorted output (default False)'), + ('sort-by-file', None, + 'sort output by file location (default False)'), + ('msgid-bugs-address=', None, + 'set report address for msgid'), + ('copyright-holder=', None, + 'set copyright holder in output'), + ('add-comments=', 'c', + 'place comment block with TAG (or those preceding keyword lines) in ' + 'output file. Seperate multiple TAGs with commas(,)'), + ('strip-comments', None, + 'strip the comment TAGs from the comments.'), + ('input-dirs=', None, + 'directories that should be scanned for messages'), + ] + boolean_options = [ + 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', + 'sort-output', 'sort-by-file', 'strip-comments' + ] + + def initialize_options(self): + self.charset = 'utf-8' + self.keywords = '' + self._keywords = DEFAULT_KEYWORDS.copy() + self.no_default_keywords = False + self.mapping_file = None + self.no_location = False + self.omit_header = False + self.output_file = None + self.input_dirs = None + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False + self.msgid_bugs_address = None + self.copyright_holder = None + self.add_comments = None + self._add_comments = [] + self.strip_comments = False + + def finalize_options(self): + if self.no_default_keywords and not self.keywords: + raise DistutilsOptionError('you must specify new keywords if you ' + 'disable the default ones') + if self.no_default_keywords: + self._keywords = {} + if self.keywords: + self._keywords.update(parse_keywords(self.keywords.split())) + + if not self.output_file: + raise DistutilsOptionError('no output file specified') + if self.no_wrap and self.width: + raise DistutilsOptionError("'--no-wrap' and '--width' are mutually " + "exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.sort_output and self.sort_by_file: + raise DistutilsOptionError("'--sort-output' and '--sort-by-file' " + "are mutually exclusive") + + if not self.input_dirs: + self.input_dirs = dict.fromkeys([k.split('.',1)[0] + for k in self.distribution.packages + ]).keys() + + if self.add_comments: + self._add_comments = self.add_comments.split(',') + + def run(self): + mappings = self._get_mappings() + outfile = open(self.output_file, 'w') + try: + catalog = Catalog(project=self.distribution.get_name(), + version=self.distribution.get_version(), + msgid_bugs_address=self.msgid_bugs_address, + copyright_holder=self.copyright_holder, + charset=self.charset) + + for dirname, (method_map, options_map) in mappings.items(): + def callback(filename, method, options): + if method == 'ignore': + return + filepath = os.path.normpath(os.path.join(dirname, filename)) + optstr = '' + if options: + optstr = ' (%s)' % ', '.join(['%s="%s"' % (k, v) for + k, v in options.items()]) + log.info('extracting messages from %s%s', filepath, optstr) + + extracted = extract_from_dir(dirname, method_map, options_map, + keywords=self._keywords, + comment_tags=self._add_comments, + callback=callback, + strip_comment_tags= + self.strip_comments) + for filename, lineno, message, comments in extracted: + filepath = os.path.normpath(os.path.join(dirname, filename)) + catalog.add(message, None, [(filepath, lineno)], + auto_comments=comments) + + log.info('writing PO template file to %s' % self.output_file) + write_po(outfile, catalog, width=self.width, + no_location=self.no_location, + omit_header=self.omit_header, + sort_output=self.sort_output, + sort_by_file=self.sort_by_file) + finally: + outfile.close() + + def _get_mappings(self): + mappings = {} + + if self.mapping_file: + fileobj = open(self.mapping_file, 'U') + try: + method_map, options_map = parse_mapping(fileobj) + for dirname in self.input_dirs: + mappings[dirname] = method_map, options_map + finally: + fileobj.close() + + elif getattr(self.distribution, 'message_extractors', None): + message_extractors = self.distribution.message_extractors + for dirname, mapping in message_extractors.items(): + if isinstance(mapping, basestring): + method_map, options_map = parse_mapping(StringIO(mapping)) + else: + method_map, options_map = [], {} + for pattern, method, options in mapping: + method_map.append((pattern, method)) + options_map[pattern] = options or {} + mappings[dirname] = method_map, options_map + + else: + for dirname in self.input_dirs: + mappings[dirname] = DEFAULT_MAPPING, {} + + return mappings + + +def check_message_extractors(dist, name, value): + """Validate the ``message_extractors`` keyword argument to ``setup()``. + + :param dist: the distutils/setuptools ``Distribution`` object + :param name: the name of the keyword argument (should always be + "message_extractors") + :param value: the value of the keyword argument + :raise `DistutilsSetupError`: if the value is not valid + :see: `Adding setup() arguments + `_ + """ + assert name == 'message_extractors' + if not isinstance(value, dict): + raise DistutilsSetupError('the value of the "message_extractors" ' + 'parameter must be a dictionary') + + +class init_catalog(Command): + """New catalog initialization command for use in ``setup.py`` scripts. + + If correctly installed, this command is available to Setuptools-using + setup scripts automatically. For projects using plain old ``distutils``, + the command needs to be registered explicitly in ``setup.py``:: + + from babel.messages.frontend import init_catalog + + setup( + ... + cmdclass = {'init_catalog': init_catalog} + ) + + :see: `Integrating new distutils commands `_ + :see: `setuptools `_ + """ + + description = 'create a new catalog based on a POT file' + user_options = [ + ('domain=', 'D', + "domain of PO file (default 'messages')"), + ('input-file=', 'i', + 'name of the input file'), + ('output-dir=', 'd', + 'path to output directory'), + ('output-file=', 'o', + "name of the output file (default " + "'//LC_MESSAGES/.po')"), + ('locale=', 'l', + 'locale for the new localized catalog'), + ] + + def initialize_options(self): + self.output_dir = None + self.output_file = None + self.input_file = None + self.locale = None + self.domain = 'messages' + + def finalize_options(self): + if not self.input_file: + raise DistutilsOptionError('you must specify the input file') + + if not self.locale: + raise DistutilsOptionError('you must provide a locale for the ' + 'new catalog') + try: + self._locale = Locale.parse(self.locale) + except UnknownLocaleError, e: + raise DistutilsOptionError(e) + + if not self.output_file and not self.output_dir: + raise DistutilsOptionError('you must specify the output directory') + if not self.output_file: + self.output_file = os.path.join(self.output_dir, self.locale, + 'LC_MESSAGES', self.domain + '.po') + + if not os.path.exists(os.path.dirname(self.output_file)): + os.makedirs(os.path.dirname(self.output_file)) + + def run(self): + log.info('creating catalog %r based on %r', self.output_file, + self.input_file) + + infile = open(self.input_file, 'r') + try: + # Although reading from the catalog template, read_po must be fed + # the locale in order to correcly calculate plurals + catalog = read_po(infile, locale=self.locale) + finally: + infile.close() + + catalog.locale = self._locale + catalog.fuzzy = False + + outfile = open(self.output_file, 'w') + try: + write_po(outfile, catalog) + finally: + outfile.close() + + +class update_catalog(Command): + """Catalog merging command for use in ``setup.py`` scripts. + + If correctly installed, this command is available to Setuptools-using + setup scripts automatically. For projects using plain old ``distutils``, + the command needs to be registered explicitly in ``setup.py``:: + + from babel.messages.frontend import update_catalog + + setup( + ... + cmdclass = {'update_catalog': update_catalog} + ) + + :since: version 0.9 + :see: `Integrating new distutils commands `_ + :see: `setuptools `_ + """ + + description = 'update message catalogs from a POT file' + user_options = [ + ('domain=', 'D', + "domain of PO file (default 'messages')"), + ('input-file=', 'i', + 'name of the input file'), + ('output-dir=', 'd', + 'path to base directory containing the catalogs'), + ('output-file=', 'o', + "name of the output file (default " + "'//LC_MESSAGES/.po')"), + ('locale=', 'l', + 'locale of the catalog to compile'), + ('ignore-obsolete=', None, + 'whether to omit obsolete messages from the output'), + ('no-fuzzy-matching', 'N', + 'do not use fuzzy matching'), + ('previous', None, + 'keep previous msgids of translated messages') + ] + boolean_options = ['ignore_obsolete', 'no_fuzzy_matching', 'previous'] + + def initialize_options(self): + self.domain = 'messages' + self.input_file = None + self.output_dir = None + self.output_file = None + self.locale = None + self.ignore_obsolete = False + self.no_fuzzy_matching = False + self.previous = False + + def finalize_options(self): + if not self.input_file: + raise DistutilsOptionError('you must specify the input file') + if not self.output_file and not self.output_dir: + raise DistutilsOptionError('you must specify the output file or ' + 'directory') + if self.output_file and not self.locale: + raise DistutilsOptionError('you must specify the locale') + if self.no_fuzzy_matching and self.previous: + self.previous = False + + def run(self): + po_files = [] + if not self.output_file: + if self.locale: + po_files.append((self.locale, + os.path.join(self.output_dir, self.locale, + 'LC_MESSAGES', + self.domain + '.po'))) + else: + for locale in os.listdir(self.output_dir): + po_file = os.path.join(self.output_dir, locale, + 'LC_MESSAGES', + self.domain + '.po') + if os.path.exists(po_file): + po_files.append((locale, po_file)) + else: + po_files.append((self.locale, self.output_file)) + + domain = self.domain + if not domain: + domain = os.path.splitext(os.path.basename(self.input_file))[0] + + infile = open(self.input_file, 'U') + try: + template = read_po(infile) + finally: + infile.close() + + if not po_files: + raise DistutilsOptionError('no message catalogs found') + + for locale, filename in po_files: + log.info('updating catalog %r based on %r', filename, + self.input_file) + infile = open(filename, 'U') + try: + catalog = read_po(infile, locale=locale, domain=domain) + finally: + infile.close() + + catalog.update(template, self.no_fuzzy_matching) + + tmpname = os.path.join(os.path.dirname(filename), + tempfile.gettempprefix() + + os.path.basename(filename)) + tmpfile = open(tmpname, 'w') + try: + try: + write_po(tmpfile, catalog, + ignore_obsolete=self.ignore_obsolete, + include_previous=self.previous) + finally: + tmpfile.close() + except: + os.remove(tmpname) + raise + + try: + os.rename(tmpname, filename) + except OSError: + # We're probably on Windows, which doesn't support atomic + # renames, at least not through Python + # If the error is in fact due to a permissions problem, that + # same error is going to be raised from one of the following + # operations + os.remove(filename) + shutil.copy(tmpname, filename) + os.remove(tmpname) + + +class CommandLineInterface(object): + """Command-line interface. + + This class provides a simple command-line interface to the message + extraction and PO file generation functionality. + """ + + usage = '%%prog %s [options] %s' + version = '%%prog %s' % VERSION + commands = { + 'compile': 'compile message catalogs to MO files', + 'extract': 'extract messages from source files and generate a POT file', + 'init': 'create new message catalogs from a POT file', + 'update': 'update existing message catalogs from a POT file' + } + + def run(self, argv=sys.argv): + """Main entry point of the command-line interface. + + :param argv: list of arguments passed on the command-line + """ + self.parser = OptionParser(usage=self.usage % ('command', '[args]'), + version=self.version) + self.parser.disable_interspersed_args() + self.parser.print_help = self._help + self.parser.add_option('--list-locales', dest='list_locales', + action='store_true', + help="print all known locales and exit") + self.parser.add_option('-v', '--verbose', action='store_const', + dest='loglevel', const=logging.DEBUG, + help='print as much as possible') + self.parser.add_option('-q', '--quiet', action='store_const', + dest='loglevel', const=logging.ERROR, + help='print as little as possible') + self.parser.set_defaults(list_locales=False, loglevel=logging.INFO) + + options, args = self.parser.parse_args(argv[1:]) + + # Configure logging + self.log = logging.getLogger('babel') + self.log.setLevel(options.loglevel) + handler = logging.StreamHandler() + handler.setLevel(options.loglevel) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + self.log.addHandler(handler) + + if options.list_locales: + identifiers = localedata.list() + longest = max([len(identifier) for identifier in identifiers]) + identifiers.sort() + format = u'%%-%ds %%s' % (longest + 1) + for identifier in identifiers: + locale = Locale.parse(identifier) + output = format % (identifier, locale.english_name) + print output.encode(sys.stdout.encoding or + getpreferredencoding() or + 'ascii', 'replace') + return 0 + + if not args: + self.parser.error('no valid command or option passed. ' + 'Try the -h/--help option for more information.') + + cmdname = args[0] + if cmdname not in self.commands: + self.parser.error('unknown command "%s"' % cmdname) + + return getattr(self, cmdname)(args[1:]) + + def _help(self): + print self.parser.format_help() + print "commands:" + longest = max([len(command) for command in self.commands]) + format = " %%-%ds %%s" % max(8, longest + 1) + commands = self.commands.items() + commands.sort() + for name, description in commands: + print format % (name, description) + + def compile(self, argv): + """Subcommand for compiling a message catalog to a MO file. + + :param argv: the command arguments + :since: version 0.9 + """ + parser = OptionParser(usage=self.usage % ('compile', ''), + description=self.commands['compile']) + parser.add_option('--domain', '-D', dest='domain', + help="domain of MO and PO files (default '%default')") + parser.add_option('--directory', '-d', dest='directory', + metavar='DIR', help='base directory of catalog files') + parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE', + help='locale of the catalog') + parser.add_option('--input-file', '-i', dest='input_file', + metavar='FILE', help='name of the input file') + parser.add_option('--output-file', '-o', dest='output_file', + metavar='FILE', + help="name of the output file (default " + "'//LC_MESSAGES/" + ".mo')") + parser.add_option('--use-fuzzy', '-f', dest='use_fuzzy', + action='store_true', + help='also include fuzzy translations (default ' + '%default)') + parser.add_option('--statistics', dest='statistics', + action='store_true', + help='print statistics about translations') + + parser.set_defaults(domain='messages', use_fuzzy=False, + compile_all=False, statistics=False) + options, args = parser.parse_args(argv) + + po_files = [] + mo_files = [] + if not options.input_file: + if not options.directory: + parser.error('you must specify either the input file or the ' + 'base directory') + if options.locale: + po_files.append((options.locale, + os.path.join(options.directory, + options.locale, 'LC_MESSAGES', + options.domain + '.po'))) + mo_files.append(os.path.join(options.directory, options.locale, + 'LC_MESSAGES', + options.domain + '.mo')) + else: + for locale in os.listdir(options.directory): + po_file = os.path.join(options.directory, locale, + 'LC_MESSAGES', options.domain + '.po') + if os.path.exists(po_file): + po_files.append((locale, po_file)) + mo_files.append(os.path.join(options.directory, locale, + 'LC_MESSAGES', + options.domain + '.mo')) + else: + po_files.append((options.locale, options.input_file)) + if options.output_file: + mo_files.append(options.output_file) + else: + if not options.directory: + parser.error('you must specify either the input file or ' + 'the base directory') + mo_files.append(os.path.join(options.directory, options.locale, + 'LC_MESSAGES', + options.domain + '.mo')) + if not po_files: + parser.error('no message catalogs found') + + for idx, (locale, po_file) in enumerate(po_files): + mo_file = mo_files[idx] + infile = open(po_file, 'r') + try: + catalog = read_po(infile, locale) + finally: + infile.close() + + if options.statistics: + translated = 0 + for message in list(catalog)[1:]: + if message.string: + translated +=1 + percentage = 0 + if len(catalog): + percentage = translated * 100 // len(catalog) + self.log.info("%d of %d messages (%d%%) translated in %r", + translated, len(catalog), percentage, po_file) + + if catalog.fuzzy and not options.use_fuzzy: + self.log.warn('catalog %r is marked as fuzzy, skipping', + po_file) + continue + + for message, errors in catalog.check(): + for error in errors: + self.log.error('error: %s:%d: %s', po_file, message.lineno, + error) + + self.log.info('compiling catalog %r to %r', po_file, mo_file) + + outfile = open(mo_file, 'wb') + try: + write_mo(outfile, catalog, use_fuzzy=options.use_fuzzy) + finally: + outfile.close() + + def extract(self, argv): + """Subcommand for extracting messages from source files and generating + a POT file. + + :param argv: the command arguments + """ + parser = OptionParser(usage=self.usage % ('extract', 'dir1 ...'), + description=self.commands['extract']) + parser.add_option('--charset', dest='charset', + help='charset to use in the output (default ' + '"%default")') + parser.add_option('-k', '--keyword', dest='keywords', action='append', + help='keywords to look for in addition to the ' + 'defaults. You can specify multiple -k flags on ' + 'the command line.') + parser.add_option('--no-default-keywords', dest='no_default_keywords', + action='store_true', + help="do not include the default keywords") + parser.add_option('--mapping', '-F', dest='mapping_file', + help='path to the extraction mapping file') + parser.add_option('--no-location', dest='no_location', + action='store_true', + help='do not include location comments with filename ' + 'and line number') + parser.add_option('--omit-header', dest='omit_header', + action='store_true', + help='do not include msgid "" entry in header') + parser.add_option('-o', '--output', dest='output', + help='path to the output POT file') + parser.add_option('-w', '--width', dest='width', type='int', + help="set output line width (default 76)") + parser.add_option('--no-wrap', dest='no_wrap', action = 'store_true', + help='do not break long message lines, longer than ' + 'the output line width, into several lines') + parser.add_option('--sort-output', dest='sort_output', + action='store_true', + help='generate sorted output (default False)') + parser.add_option('--sort-by-file', dest='sort_by_file', + action='store_true', + help='sort output by file location (default False)') + parser.add_option('--msgid-bugs-address', dest='msgid_bugs_address', + metavar='EMAIL@ADDRESS', + help='set report address for msgid') + parser.add_option('--copyright-holder', dest='copyright_holder', + help='set copyright holder in output') + parser.add_option('--project', dest='project', + help='set project name in output') + parser.add_option('--version', dest='version', + help='set project version in output') + parser.add_option('--add-comments', '-c', dest='comment_tags', + metavar='TAG', action='append', + help='place comment block with TAG (or those ' + 'preceding keyword lines) in output file. One ' + 'TAG per argument call') + parser.add_option('--strip-comment-tags', '-s', + dest='strip_comment_tags', action='store_true', + help='Strip the comment tags from the comments.') + + parser.set_defaults(charset='utf-8', keywords=[], + no_default_keywords=False, no_location=False, + omit_header = False, width=None, no_wrap=False, + sort_output=False, sort_by_file=False, + comment_tags=[], strip_comment_tags=False) + options, args = parser.parse_args(argv) + if not args: + parser.error('incorrect number of arguments') + + if options.output not in (None, '-'): + outfile = open(options.output, 'w') + else: + outfile = sys.stdout + + keywords = DEFAULT_KEYWORDS.copy() + if options.no_default_keywords: + if not options.keywords: + parser.error('you must specify new keywords if you disable the ' + 'default ones') + keywords = {} + if options.keywords: + keywords.update(parse_keywords(options.keywords)) + + if options.mapping_file: + fileobj = open(options.mapping_file, 'U') + try: + method_map, options_map = parse_mapping(fileobj) + finally: + fileobj.close() + else: + method_map = DEFAULT_MAPPING + options_map = {} + + if options.width and options.no_wrap: + parser.error("'--no-wrap' and '--width' are mutually exclusive.") + elif not options.width and not options.no_wrap: + options.width = 76 + + if options.sort_output and options.sort_by_file: + parser.error("'--sort-output' and '--sort-by-file' are mutually " + "exclusive") + + try: + catalog = Catalog(project=options.project, + version=options.version, + msgid_bugs_address=options.msgid_bugs_address, + copyright_holder=options.copyright_holder, + charset=options.charset) + + for dirname in args: + if not os.path.isdir(dirname): + parser.error('%r is not a directory' % dirname) + + def callback(filename, method, options): + if method == 'ignore': + return + filepath = os.path.normpath(os.path.join(dirname, filename)) + optstr = '' + if options: + optstr = ' (%s)' % ', '.join(['%s="%s"' % (k, v) for + k, v in options.items()]) + self.log.info('extracting messages from %s%s', filepath, + optstr) + + extracted = extract_from_dir(dirname, method_map, options_map, + keywords, options.comment_tags, + callback=callback, + strip_comment_tags= + options.strip_comment_tags) + for filename, lineno, message, comments in extracted: + filepath = os.path.normpath(os.path.join(dirname, filename)) + catalog.add(message, None, [(filepath, lineno)], + auto_comments=comments) + + if options.output not in (None, '-'): + self.log.info('writing PO template file to %s' % options.output) + write_po(outfile, catalog, width=options.width, + no_location=options.no_location, + omit_header=options.omit_header, + sort_output=options.sort_output, + sort_by_file=options.sort_by_file) + finally: + if options.output: + outfile.close() + + def init(self, argv): + """Subcommand for creating new message catalogs from a template. + + :param argv: the command arguments + """ + parser = OptionParser(usage=self.usage % ('init', ''), + description=self.commands['init']) + parser.add_option('--domain', '-D', dest='domain', + help="domain of PO file (default '%default')") + parser.add_option('--input-file', '-i', dest='input_file', + metavar='FILE', help='name of the input file') + parser.add_option('--output-dir', '-d', dest='output_dir', + metavar='DIR', help='path to output directory') + parser.add_option('--output-file', '-o', dest='output_file', + metavar='FILE', + help="name of the output file (default " + "'//LC_MESSAGES/" + ".po')") + parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE', + help='locale for the new localized catalog') + + parser.set_defaults(domain='messages') + options, args = parser.parse_args(argv) + + if not options.locale: + parser.error('you must provide a locale for the new catalog') + try: + locale = Locale.parse(options.locale) + except UnknownLocaleError, e: + parser.error(e) + + if not options.input_file: + parser.error('you must specify the input file') + + if not options.output_file and not options.output_dir: + parser.error('you must specify the output file or directory') + + if not options.output_file: + options.output_file = os.path.join(options.output_dir, + options.locale, 'LC_MESSAGES', + options.domain + '.po') + if not os.path.exists(os.path.dirname(options.output_file)): + os.makedirs(os.path.dirname(options.output_file)) + + infile = open(options.input_file, 'r') + try: + # Although reading from the catalog template, read_po must be fed + # the locale in order to correcly calculate plurals + catalog = read_po(infile, locale=options.locale) + finally: + infile.close() + + catalog.locale = locale + catalog.revision_date = datetime.now(LOCALTZ) + + self.log.info('creating catalog %r based on %r', options.output_file, + options.input_file) + + outfile = open(options.output_file, 'w') + try: + write_po(outfile, catalog) + finally: + outfile.close() + + def update(self, argv): + """Subcommand for updating existing message catalogs from a template. + + :param argv: the command arguments + :since: version 0.9 + """ + parser = OptionParser(usage=self.usage % ('update', ''), + description=self.commands['update']) + parser.add_option('--domain', '-D', dest='domain', + help="domain of PO file (default '%default')") + parser.add_option('--input-file', '-i', dest='input_file', + metavar='FILE', help='name of the input file') + parser.add_option('--output-dir', '-d', dest='output_dir', + metavar='DIR', help='path to output directory') + parser.add_option('--output-file', '-o', dest='output_file', + metavar='FILE', + help="name of the output file (default " + "'//LC_MESSAGES/" + ".po')") + parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE', + help='locale of the translations catalog') + parser.add_option('--ignore-obsolete', dest='ignore_obsolete', + action='store_true', + help='do not include obsolete messages in the output ' + '(default %default)') + parser.add_option('--no-fuzzy-matching', '-N', dest='no_fuzzy_matching', + action='store_true', + help='do not use fuzzy matching (default %default)') + parser.add_option('--previous', dest='previous', action='store_true', + help='keep previous msgids of translated messages ' + '(default %default)') + + parser.set_defaults(domain='messages', ignore_obsolete=False, + no_fuzzy_matching=False, previous=False) + options, args = parser.parse_args(argv) + + if not options.input_file: + parser.error('you must specify the input file') + if not options.output_file and not options.output_dir: + parser.error('you must specify the output file or directory') + if options.output_file and not options.locale: + parser.error('you must specify the locale') + if options.no_fuzzy_matching and options.previous: + options.previous = False + + po_files = [] + if not options.output_file: + if options.locale: + po_files.append((options.locale, + os.path.join(options.output_dir, + options.locale, 'LC_MESSAGES', + options.domain + '.po'))) + else: + for locale in os.listdir(options.output_dir): + po_file = os.path.join(options.output_dir, locale, + 'LC_MESSAGES', + options.domain + '.po') + if os.path.exists(po_file): + po_files.append((locale, po_file)) + else: + po_files.append((options.locale, options.output_file)) + + domain = options.domain + if not domain: + domain = os.path.splitext(os.path.basename(options.input_file))[0] + + infile = open(options.input_file, 'U') + try: + template = read_po(infile) + finally: + infile.close() + + if not po_files: + parser.error('no message catalogs found') + + for locale, filename in po_files: + self.log.info('updating catalog %r based on %r', filename, + options.input_file) + infile = open(filename, 'U') + try: + catalog = read_po(infile, locale=locale, domain=domain) + finally: + infile.close() + + catalog.update(template, options.no_fuzzy_matching) + + tmpname = os.path.join(os.path.dirname(filename), + tempfile.gettempprefix() + + os.path.basename(filename)) + tmpfile = open(tmpname, 'w') + try: + try: + write_po(tmpfile, catalog, + ignore_obsolete=options.ignore_obsolete, + include_previous=options.previous) + finally: + tmpfile.close() + except: + os.remove(tmpname) + raise + + try: + os.rename(tmpname, filename) + except OSError: + # We're probably on Windows, which doesn't support atomic + # renames, at least not through Python + # If the error is in fact due to a permissions problem, that + # same error is going to be raised from one of the following + # operations + os.remove(filename) + shutil.copy(tmpname, filename) + os.remove(tmpname) + + +def main(): + return CommandLineInterface().run(sys.argv) + +def parse_mapping(fileobj, filename=None): + """Parse an extraction method mapping from a file-like object. + + >>> buf = StringIO(''' + ... [extractors] + ... custom = mypackage.module:myfunc + ... + ... # Python source files + ... [python: **.py] + ... + ... # Genshi templates + ... [genshi: **/templates/**.html] + ... include_attrs = + ... [genshi: **/templates/**.txt] + ... template_class = genshi.template:TextTemplate + ... encoding = latin-1 + ... + ... # Some custom extractor + ... [custom: **/custom/*.*] + ... ''') + + >>> method_map, options_map = parse_mapping(buf) + >>> len(method_map) + 4 + + >>> method_map[0] + ('**.py', 'python') + >>> options_map['**.py'] + {} + >>> method_map[1] + ('**/templates/**.html', 'genshi') + >>> options_map['**/templates/**.html']['include_attrs'] + '' + >>> method_map[2] + ('**/templates/**.txt', 'genshi') + >>> options_map['**/templates/**.txt']['template_class'] + 'genshi.template:TextTemplate' + >>> options_map['**/templates/**.txt']['encoding'] + 'latin-1' + + >>> method_map[3] + ('**/custom/*.*', 'mypackage.module:myfunc') + >>> options_map['**/custom/*.*'] + {} + + :param fileobj: a readable file-like object containing the configuration + text to parse + :return: a `(method_map, options_map)` tuple + :rtype: `tuple` + :see: `extract_from_directory` + """ + extractors = {} + method_map = [] + options_map = {} + + parser = RawConfigParser() + parser._sections = odict(parser._sections) # We need ordered sections + parser.readfp(fileobj, filename) + for section in parser.sections(): + if section == 'extractors': + extractors = dict(parser.items(section)) + else: + method, pattern = [part.strip() for part in section.split(':', 1)] + method_map.append((pattern, method)) + options_map[pattern] = dict(parser.items(section)) + + if extractors: + for idx, (pattern, method) in enumerate(method_map): + if method in extractors: + method = extractors[method] + method_map[idx] = (pattern, method) + + return (method_map, options_map) + +def parse_keywords(strings=[]): + """Parse keywords specifications from the given list of strings. + + >>> kw = parse_keywords(['_', 'dgettext:2', 'dngettext:2,3']).items() + >>> kw.sort() + >>> for keyword, indices in kw: + ... print (keyword, indices) + ('_', None) + ('dgettext', (2,)) + ('dngettext', (2, 3)) + """ + keywords = {} + for string in strings: + if ':' in string: + funcname, indices = string.split(':') + else: + funcname, indices = string, None + if funcname not in keywords: + if indices: + indices = tuple([(int(x)) for x in indices.split(',')]) + keywords[funcname] = indices + return keywords + + +if __name__ == '__main__': + main() diff --git a/babel3/babel/messages/jslexer.py b/babel3/babel/messages/jslexer.py new file mode 100644 --- /dev/null +++ b/babel3/babel/messages/jslexer.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""A simple JavaScript 1.5 lexer which is used for the JavaScript +extractor. +""" + +import re + +from babel.util import itemgetter + + +operators = [ + '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', + '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', + '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', + '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' +] +operators.sort(lambda a, b: cmp(-len(a), -len(b))) + +escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} + +rules = [ + (None, re.compile(r'\s+(?u)')), + (None, re.compile(r'