# HG changeset patch
# User jruigrok
# Date 1271310868 0
# Node ID 0b228ee775fe375f513c02568047ba5835e8a091
# Parent 2dcbbdabfb71f727b010b164516080fcbc47bd46
Copy over trunk.
diff --git a/babel3/COPYING b/babel3/COPYING
new file mode 100644
--- /dev/null
+++ b/babel3/COPYING
@@ -0,0 +1,28 @@
+Copyright (C) 2007 Edgewall Software
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ 3. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior
+ written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/babel3/ChangeLog b/babel3/ChangeLog
new file mode 100644
--- /dev/null
+++ b/babel3/ChangeLog
@@ -0,0 +1,163 @@
+Version 1.0
+http://svn.edgewall.org/repos/babel/tags/1.0.0/
+(???, from branches/stable/1.0.x)
+
+ * Added support for the locale plural rules defined by the CLDR.
+ * Added `format_timedelta` function to support localized formatting of
+ relative times with strings such as "2 days" or "1 month" (ticket #126).
+ * Fixed Python 2.3 compatibility (ticket #146).
+ * Fixed negative offset handling of Catalog._set_mime_headers (ticket #165).
+ * Fixed the case where messages containing square brackets would break with
+ an unpack error.
+ * Updated to CLDR 1.7.
+ * Make the CLDR import script work with Python 2.7.
+ * Fix Serbian plural forms (ticket #213).
+
+
+Version 0.9.5
+http://svn.edgewall.org/repos/babel/tags/0.9.5/
+(Apr 6 2010, from branches/stable/0.9.x)
+
+ * Fixed the case where messages containing square brackets would break with
+ an unpack error.
+ * Backport of r467: Fuzzy matching regarding plurals should *NOT* be checked
+ against len(message.id) because this is always 2, instead, it's should be
+ checked against catalog.num_plurals (ticket #212).
+
+
+Version 0.9.4
+http://svn.edgewall.org/repos/babel/tags/0.9.4/
+(Aug 25 2008, from branches/stable/0.9.x)
+
+ * Currency symbol definitions that is defined with choice patterns in the
+ CLDR data are no longer imported, so the symbol code will be used instead.
+ * Fixed quarter support in date formatting.
+ * Fixed a serious memory leak that was introduces by the support for CLDR
+ aliases in 0.9.3 (ticket #128).
+ * Locale modifiers such as "@euro" are now stripped from locale identifiers
+ when parsing (ticket #136).
+ * The system locales "C" and "POSIX" are now treated as aliases for
+ "en_US_POSIX", for which the CLDR provides the appropriate data. Thanks to
+ Manlio Perillo for the suggestion.
+ * Fixed JavaScript extraction for regular expression literals (ticket #138)
+ and concatenated strings.
+ * The `Translation` class in `babel.support` can now manage catalogs with
+ different message domains, and exposes the family of `d*gettext` functions
+ (ticket #137).
+
+
+Version 0.9.3
+http://svn.edgewall.org/repos/babel/tags/0.9.3/
+(Jul 9 2008, from branches/stable/0.9.x)
+
+ * Fixed invalid message extraction methods causing an UnboundLocalError.
+ * Extraction method specification can now use a dot instead of the colon to
+ separate module and function name (ticket #105).
+ * Fixed message catalog compilation for locales with more than two plural
+ forms (ticket #95).
+ * Fixed compilation of message catalogs for locales with more than two plural
+ forms where the translations were empty (ticket #97).
+ * The stripping of the comment tags in comments is optional now and
+ is done for each line in a comment.
+ * Added a JavaScript message extractor.
+ * Updated to CLDR 1.6.
+ * Fixed timezone calculations when formatting datetime and time values.
+ * Added a `get_plural` function into the plurals module that returns the
+ correct plural forms for a locale as tuple.
+ * Added support for alias definitions in the CLDR data files, meaning that
+ the chance for items missing in certain locales should be greatly reduced
+ (ticket #68).
+
+
+Version 0.9.2
+http://svn.edgewall.org/repos/babel/tags/0.9.2/
+(Feb 4 2008, from branches/stable/0.9.x)
+
+ * Fixed catalogs' charset values not being recognized (ticket #66).
+ * Numerous improvements to the default plural forms.
+ * Fixed fuzzy matching when updating message catalogs (ticket #82).
+ * Fixed bug in catalog updating, that in some cases pulled in translations
+ from different catalogs based on the same template.
+ * Location lines in PO files do no longer get wrapped at hyphens in file
+ names (ticket #79).
+ * Fixed division by zero error in catalog compilation on empty catalogs
+ (ticket #60).
+
+
+Version 0.9.1
+http://svn.edgewall.org/repos/babel/tags/0.9.1/
+(Sep 7 2007, from branches/stable/0.9.x)
+
+ * Fixed catalog updating when a message is merged that was previously simple
+ but now has a plural form, for example by moving from `gettext` to
+ `ngettext`, or vice versa.
+ * Fixed time formatting for 12 am and 12 pm.
+ * Fixed output encoding of the `pybabel --list-locales` command.
+ * MO files are now written in binary mode on windows (ticket #61).
+
+
+Version 0.9
+http://svn.edgewall.org/repos/babel/tags/0.9.0/
+(Aug 20 2007, from branches/stable/0.9.x)
+
+ * The `new_catalog` distutils command has been renamed to `init_catalog` for
+ consistency with the command-line frontend.
+ * Added compilation of message catalogs to MO files (ticket #21).
+ * Added updating of message catalogs from POT files (ticket #22).
+ * Support for significant digits in number formatting.
+ * Apply proper "banker's rounding" in number formatting in a cross-platform
+ manner.
+ * The number formatting functions now also work with numbers represented by
+ Python `Decimal` objects (ticket #53).
+ * Added extensible infrastructure for validating translation catalogs.
+ * Fixed the extractor not filtering out messages that didn't validate against
+ the keyword's specification (ticket #39).
+ * Fixed the extractor raising an exception when encountering an empty string
+ msgid. It now emits a warning to stderr.
+ * Numerous Python message extractor fixes: it now handles nested function
+ calls within a gettext function call correctly, uses the correct line number
+ for multi-line function calls, and other small fixes (tickets #38 and #39).
+ * Improved support for detecting Python string formatting fields in message
+ strings (ticket #57).
+ * CLDR upgraded to the 1.5 release.
+ * Improved timezone formatting.
+ * Implemented scientific number formatting.
+ * Added mechanism to lookup locales by alias, for cases where browsers insist
+ on including only the language code in the `Accept-Language` header, and
+ sometimes even the incorrect language code.
+
+
+Version 0.8.1
+http://svn.edgewall.org/repos/babel/tags/0.8.1/
+(Jul 2 2007, from branches/stable/0.8.x)
+
+ * `default_locale()` would fail when the value of the `LANGUAGE` environment
+ variable contained multiple language codes separated by colon, as is
+ explicitly allowed by the GNU gettext tools. As the `default_locale()`
+ function is called at the module level in some modules, this bug would
+ completely break importing these modules on systems where `LANGUAGE` is set
+ that way.
+ * The character set specified in PO template files is now respected when
+ creating new catalog files based on that template. This allows the use of
+ characters outside the ASCII range in POT files (ticket #17).
+ * The default ordering of messages in generated POT files, which is based on
+ the order those messages are found when walking the source tree, is no
+ longer subject to differences between platforms; directory and file names
+ are now always sorted alphabetically.
+ * The Python message extractor now respects the special encoding comment to be
+ able to handle files containing non-ASCII characters (ticket #23).
+ * Added 'N_' (gettext noop) to the extractor's default keywords.
+ * Made locale string parsing more robust, and also take the script part into
+ account (ticket #27).
+ * Added a function to list all locales for which locale data is available.
+ * Added a command-line option to the `pybabel` command which prints out all
+ available locales (ticket #24).
+ * The name of the command-line script has been changed from just `babel` to
+ `pybabel` to avoid a conflict with the OpenBabel project (ticket #34).
+
+
+Version 0.8
+http://svn.edgewall.org/repos/babel/tags/0.8.0/
+(Jun 20 2007, from branches/stable/0.8.x)
+
+ * First public release
diff --git a/babel3/INSTALL.txt b/babel3/INSTALL.txt
new file mode 100644
--- /dev/null
+++ b/babel3/INSTALL.txt
@@ -0,0 +1,39 @@
+Installing Babel
+================
+
+Prerequisites
+-------------
+
+ * Python 2.3 or later (2.4 or later is recommended)
+ * CLDR 1.7
+ * Optional: setuptools 0.6b1 or later
+ * Optional: pytz (strongly recommended for real time-zone support)
+
+
+Installation
+------------
+
+Once you've downloaded and unpacked a Babel source release, enter the
+directory where the archive was unpacked, and run:
+
+ $ python setup.py install
+
+Note that you may need administrator/root privileges for this step, as
+this command will by default attempt to install Babel to the Python
+site-packages directory on your system.
+
+For advanced options, please refer to the easy_install and/or the distutils
+documentation:
+
+ http://peak.telecommunity.com/DevCenter/EasyInstall
+ http://docs.python.org/inst/inst.html
+
+
+Support
+-------
+
+If you encounter any problems with Babel, please don't hesitate to ask
+questions on the Babel mailing list or IRC channel:
+
+ http://babel.edgewall.org/wiki/MailingList
+ http://babel.edgewall.org/wiki/IrcChannel
diff --git a/babel3/MANIFEST.in b/babel3/MANIFEST.in
new file mode 100644
--- /dev/null
+++ b/babel3/MANIFEST.in
@@ -0,0 +1,4 @@
+include babel/global.dat
+include babel/localedata/*.dat
+include doc/api/*.*
+include doc/*.html
diff --git a/babel3/README.txt b/babel3/README.txt
new file mode 100644
--- /dev/null
+++ b/babel3/README.txt
@@ -0,0 +1,12 @@
+About Babel
+===========
+
+Babel is a Python library that provides an integrated collection of
+utilities that assist with internationalizing and localizing Python
+applications (in particular web-based applications.)
+
+Details can be found in the HTML files in the `doc` folder.
+
+For more information please visit the Babel web site:
+
+
diff --git a/babel3/babel/__init__.py b/babel3/babel/__init__.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/__init__.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Integrated collection of utilities that assist in internationalizing and
+localizing applications.
+
+This package is basically composed of two major parts:
+
+ * tools to build and work with ``gettext`` message catalogs
+ * a Python interface to the CLDR (Common Locale Data Repository), providing
+ access to various locale display names, localized number and date
+ formatting, etc.
+
+:see: http://www.gnu.org/software/gettext/
+:see: http://docs.python.org/lib/module-gettext.html
+:see: http://www.unicode.org/cldr/
+"""
+
+from babel.core import *
+
+__docformat__ = 'restructuredtext en'
+try:
+ from pkg_resources import get_distribution, ResolutionError
+ try:
+ __version__ = get_distribution('Babel').version
+ except ResolutionError:
+ __version__ = None # unknown
+except ImportError:
+ __version__ = None # unknown
diff --git a/babel3/babel/core.py b/babel3/babel/core.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/core.py
@@ -0,0 +1,804 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Core locale representation and locale data access."""
+
+import os
+import pickle
+
+from babel import localedata
+
+__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
+ 'parse_locale']
+__docformat__ = 'restructuredtext en'
+
+_global_data = None
+
+def get_global(key):
+ """Return the dictionary for the given key in the global data.
+
+ The global data is stored in the ``babel/global.dat`` file and contains
+ information independent of individual locales.
+
+ >>> get_global('zone_aliases')['UTC']
+ 'Etc/GMT'
+ >>> get_global('zone_territories')['Europe/Berlin']
+ 'DE'
+
+ :param key: the data key
+ :return: the dictionary found in the global data under the given key
+ :rtype: `dict`
+ :since: version 0.9
+ """
+ global _global_data
+ if _global_data is None:
+ dirname = os.path.join(os.path.dirname(__file__))
+ filename = os.path.join(dirname, 'global.dat')
+ fileobj = open(filename, 'rb')
+ try:
+ _global_data = pickle.load(fileobj)
+ finally:
+ fileobj.close()
+ return _global_data.get(key, {})
+
+
+LOCALE_ALIASES = {
+ 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
+ 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
+ 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
+ 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
+ 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
+ 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
+ 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
+ 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA'
+}
+
+
+class UnknownLocaleError(Exception):
+ """Exception thrown when a locale is requested for which no locale data
+ is available.
+ """
+
+ def __init__(self, identifier):
+ """Create the exception.
+
+ :param identifier: the identifier string of the unsupported locale
+ """
+ Exception.__init__(self, 'unknown locale %r' % identifier)
+ self.identifier = identifier
+
+
+class Locale(object):
+ """Representation of a specific locale.
+
+ >>> locale = Locale('en', 'US')
+ >>> repr(locale)
+ ''
+ >>> locale.display_name
+ u'English (United States)'
+
+ A `Locale` object can also be instantiated from a raw locale string:
+
+ >>> locale = Locale.parse('en-US', sep='-')
+ >>> repr(locale)
+ ''
+
+ `Locale` objects provide access to a collection of locale data, such as
+ territory and language names, number and date format patterns, and more:
+
+ >>> locale.number_symbols['decimal']
+ u'.'
+
+ If a locale is requested for which no locale data is available, an
+ `UnknownLocaleError` is raised:
+
+ >>> Locale.parse('en_DE')
+ Traceback (most recent call last):
+ ...
+ UnknownLocaleError: unknown locale 'en_DE'
+
+ :see: `IETF RFC 3066 `_
+ """
+
+ def __init__(self, language, territory=None, script=None, variant=None):
+ """Initialize the locale object from the given identifier components.
+
+ >>> locale = Locale('en', 'US')
+ >>> locale.language
+ 'en'
+ >>> locale.territory
+ 'US'
+
+ :param language: the language code
+ :param territory: the territory (country or region) code
+ :param script: the script code
+ :param variant: the variant code
+ :raise `UnknownLocaleError`: if no locale data is available for the
+ requested locale
+ """
+ self.language = language
+ self.territory = territory
+ self.script = script
+ self.variant = variant
+ self.__data = None
+
+ identifier = str(self)
+ if not localedata.exists(identifier):
+ raise UnknownLocaleError(identifier)
+
+ def default(cls, category=None, aliases=LOCALE_ALIASES):
+ """Return the system default locale for the specified category.
+
+ >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
+ ... os.environ[name] = ''
+ >>> os.environ['LANG'] = 'fr_FR.UTF-8'
+ >>> Locale.default('LC_MESSAGES')
+
+
+ :param category: one of the ``LC_XXX`` environment variable names
+ :param aliases: a dictionary of aliases for locale identifiers
+ :return: the value of the variable, or any of the fallbacks
+ (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``)
+ :rtype: `Locale`
+ :see: `default_locale`
+ """
+ return cls(default_locale(category, aliases=aliases))
+ default = classmethod(default)
+
+ def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES):
+ """Find the best match between available and requested locale strings.
+
+ >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
+
+ >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
+
+ >>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
+
+ You can specify the character used in the locale identifiers to separate
+ the differnet components. This separator is applied to both lists. Also,
+ case is ignored in the comparison:
+
+ >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
+
+
+ :param preferred: the list of locale identifers preferred by the user
+ :param available: the list of locale identifiers available
+ :param aliases: a dictionary of aliases for locale identifiers
+ :return: the `Locale` object for the best match, or `None` if no match
+ was found
+ :rtype: `Locale`
+ :see: `negotiate_locale`
+ """
+ identifier = negotiate_locale(preferred, available, sep=sep,
+ aliases=aliases)
+ if identifier:
+ return Locale.parse(identifier, sep=sep)
+ negotiate = classmethod(negotiate)
+
+ def parse(cls, identifier, sep='_'):
+ """Create a `Locale` instance for the given locale identifier.
+
+ >>> l = Locale.parse('de-DE', sep='-')
+ >>> l.display_name
+ u'Deutsch (Deutschland)'
+
+ If the `identifier` parameter is not a string, but actually a `Locale`
+ object, that object is returned:
+
+ >>> Locale.parse(l)
+
+
+ :param identifier: the locale identifier string
+ :param sep: optional component separator
+ :return: a corresponding `Locale` instance
+ :rtype: `Locale`
+ :raise `ValueError`: if the string does not appear to be a valid locale
+ identifier
+ :raise `UnknownLocaleError`: if no locale data is available for the
+ requested locale
+ :see: `parse_locale`
+ """
+ if isinstance(identifier, basestring):
+ return cls(*parse_locale(identifier, sep=sep))
+ return identifier
+ parse = classmethod(parse)
+
+ def __eq__(self, other):
+ return str(self) == str(other)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __repr__(self):
+ return '' % str(self)
+
+ def __str__(self):
+ return '_'.join(filter(None, [self.language, self.script,
+ self.territory, self.variant]))
+
+ def _data(self):
+ if self.__data is None:
+ self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
+ return self.__data
+ _data = property(_data)
+
+ def get_display_name(self, locale=None):
+ """Return the display name of the locale using the given locale.
+
+ The display name will include the language, territory, script, and
+ variant, if those are specified.
+
+ >>> Locale('zh', 'CN', script='Hans').get_display_name('en')
+ u'Chinese (Simplified Han, China)'
+
+ :param locale: the locale to use
+ :return: the display name
+ """
+ if locale is None:
+ locale = self
+ locale = Locale.parse(locale)
+ retval = locale.languages.get(self.language)
+ if self.territory or self.script or self.variant:
+ details = []
+ if self.script:
+ details.append(locale.scripts.get(self.script))
+ if self.territory:
+ details.append(locale.territories.get(self.territory))
+ if self.variant:
+ details.append(locale.variants.get(self.variant))
+ details = filter(None, details)
+ if details:
+ retval += ' (%s)' % u', '.join(details)
+ return retval
+
+ display_name = property(get_display_name, doc="""\
+ The localized display name of the locale.
+
+ >>> Locale('en').display_name
+ u'English'
+ >>> Locale('en', 'US').display_name
+ u'English (United States)'
+ >>> Locale('sv').display_name
+ u'svenska'
+
+ :type: `unicode`
+ """)
+
+ def english_name(self):
+ return self.get_display_name(Locale('en'))
+ english_name = property(english_name, doc="""\
+ The english display name of the locale.
+
+ >>> Locale('de').english_name
+ u'German'
+ >>> Locale('de', 'DE').english_name
+ u'German (Germany)'
+
+ :type: `unicode`
+ """)
+
+ #{ General Locale Display Names
+
+ def languages(self):
+ return self._data['languages']
+ languages = property(languages, doc="""\
+ Mapping of language codes to translated language names.
+
+ >>> Locale('de', 'DE').languages['ja']
+ u'Japanisch'
+
+ :type: `dict`
+ :see: `ISO 639 `_
+ """)
+
+ def scripts(self):
+ return self._data['scripts']
+ scripts = property(scripts, doc="""\
+ Mapping of script codes to translated script names.
+
+ >>> Locale('en', 'US').scripts['Hira']
+ u'Hiragana'
+
+ :type: `dict`
+ :see: `ISO 15924 `_
+ """)
+
+ def territories(self):
+ return self._data['territories']
+ territories = property(territories, doc="""\
+ Mapping of script codes to translated script names.
+
+ >>> Locale('es', 'CO').territories['DE']
+ u'Alemania'
+
+ :type: `dict`
+ :see: `ISO 3166 `_
+ """)
+
+ def variants(self):
+ return self._data['variants']
+ variants = property(variants, doc="""\
+ Mapping of script codes to translated script names.
+
+ >>> Locale('de', 'DE').variants['1901']
+ u'Alte deutsche Rechtschreibung'
+
+ :type: `dict`
+ """)
+
+ #{ Number Formatting
+
+ def currencies(self):
+ return self._data['currency_names']
+ currencies = property(currencies, doc="""\
+ Mapping of currency codes to translated currency names.
+
+ >>> Locale('en').currencies['COP']
+ u'Colombian Peso'
+ >>> Locale('de', 'DE').currencies['COP']
+ u'Kolumbianischer Peso'
+
+ :type: `dict`
+ """)
+
+ def currency_symbols(self):
+ return self._data['currency_symbols']
+ currency_symbols = property(currency_symbols, doc="""\
+ Mapping of currency codes to symbols.
+
+ >>> Locale('en', 'US').currency_symbols['USD']
+ u'$'
+ >>> Locale('es', 'CO').currency_symbols['USD']
+ u'US$'
+
+ :type: `dict`
+ """)
+
+ def number_symbols(self):
+ return self._data['number_symbols']
+ number_symbols = property(number_symbols, doc="""\
+ Symbols used in number formatting.
+
+ >>> Locale('fr', 'FR').number_symbols['decimal']
+ u','
+
+ :type: `dict`
+ """)
+
+ def decimal_formats(self):
+ return self._data['decimal_formats']
+ decimal_formats = property(decimal_formats, doc="""\
+ Locale patterns for decimal number formatting.
+
+ >>> Locale('en', 'US').decimal_formats[None]
+
+
+ :type: `dict`
+ """)
+
+ def currency_formats(self):
+ return self._data['currency_formats']
+ currency_formats = property(currency_formats, doc=r"""\
+ Locale patterns for currency number formatting.
+
+ >>> print Locale('en', 'US').currency_formats[None]
+
+
+ :type: `dict`
+ """)
+
+ def percent_formats(self):
+ return self._data['percent_formats']
+ percent_formats = property(percent_formats, doc="""\
+ Locale patterns for percent number formatting.
+
+ >>> Locale('en', 'US').percent_formats[None]
+
+
+ :type: `dict`
+ """)
+
+ def scientific_formats(self):
+ return self._data['scientific_formats']
+ scientific_formats = property(scientific_formats, doc="""\
+ Locale patterns for scientific number formatting.
+
+ >>> Locale('en', 'US').scientific_formats[None]
+
+
+ :type: `dict`
+ """)
+
+ #{ Calendar Information and Date Formatting
+
+ def periods(self):
+ return self._data['periods']
+ periods = property(periods, doc="""\
+ Locale display names for day periods (AM/PM).
+
+ >>> Locale('en', 'US').periods['am']
+ u'AM'
+
+ :type: `dict`
+ """)
+
+ def days(self):
+ return self._data['days']
+ days = property(days, doc="""\
+ Locale display names for weekdays.
+
+ >>> Locale('de', 'DE').days['format']['wide'][3]
+ u'Donnerstag'
+
+ :type: `dict`
+ """)
+
+ def months(self):
+ return self._data['months']
+ months = property(months, doc="""\
+ Locale display names for months.
+
+ >>> Locale('de', 'DE').months['format']['wide'][10]
+ u'Oktober'
+
+ :type: `dict`
+ """)
+
+ def quarters(self):
+ return self._data['quarters']
+ quarters = property(quarters, doc="""\
+ Locale display names for quarters.
+
+ >>> Locale('de', 'DE').quarters['format']['wide'][1]
+ u'1. Quartal'
+
+ :type: `dict`
+ """)
+
+ def eras(self):
+ return self._data['eras']
+ eras = property(eras, doc="""\
+ Locale display names for eras.
+
+ >>> Locale('en', 'US').eras['wide'][1]
+ u'Anno Domini'
+ >>> Locale('en', 'US').eras['abbreviated'][0]
+ u'BC'
+
+ :type: `dict`
+ """)
+
+ def time_zones(self):
+ return self._data['time_zones']
+ time_zones = property(time_zones, doc="""\
+ Locale display names for time zones.
+
+ >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
+ u'British Summer Time'
+ >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
+ u"St. John's"
+
+ :type: `dict`
+ """)
+
+ def meta_zones(self):
+ return self._data['meta_zones']
+ meta_zones = property(meta_zones, doc="""\
+ Locale display names for meta time zones.
+
+ Meta time zones are basically groups of different Olson time zones that
+ have the same GMT offset and daylight savings time.
+
+ >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
+ u'Central European Summer Time'
+
+ :type: `dict`
+ :since: version 0.9
+ """)
+
+ def zone_formats(self):
+ return self._data['zone_formats']
+ zone_formats = property(zone_formats, doc=r"""\
+ Patterns related to the formatting of time zones.
+
+ >>> Locale('en', 'US').zone_formats['fallback']
+ u'%(1)s (%(0)s)'
+ >>> Locale('pt', 'BR').zone_formats['region']
+ u'Hor\xe1rio %s'
+
+ :type: `dict`
+ :since: version 0.9
+ """)
+
+ def first_week_day(self):
+ return self._data['week_data']['first_day']
+ first_week_day = property(first_week_day, doc="""\
+ The first day of a week, with 0 being Monday.
+
+ >>> Locale('de', 'DE').first_week_day
+ 0
+ >>> Locale('en', 'US').first_week_day
+ 6
+
+ :type: `int`
+ """)
+
+ def weekend_start(self):
+ return self._data['week_data']['weekend_start']
+ weekend_start = property(weekend_start, doc="""\
+ The day the weekend starts, with 0 being Monday.
+
+ >>> Locale('de', 'DE').weekend_start
+ 5
+
+ :type: `int`
+ """)
+
+ def weekend_end(self):
+ return self._data['week_data']['weekend_end']
+ weekend_end = property(weekend_end, doc="""\
+ The day the weekend ends, with 0 being Monday.
+
+ >>> Locale('de', 'DE').weekend_end
+ 6
+
+ :type: `int`
+ """)
+
+ def min_week_days(self):
+ return self._data['week_data']['min_days']
+ min_week_days = property(min_week_days, doc="""\
+ The minimum number of days in a week so that the week is counted as the
+ first week of a year or month.
+
+ >>> Locale('de', 'DE').min_week_days
+ 4
+
+ :type: `int`
+ """)
+
+ def date_formats(self):
+ return self._data['date_formats']
+ date_formats = property(date_formats, doc="""\
+ Locale patterns for date formatting.
+
+ >>> Locale('en', 'US').date_formats['short']
+
+ >>> Locale('fr', 'FR').date_formats['long']
+
+
+ :type: `dict`
+ """)
+
+ def time_formats(self):
+ return self._data['time_formats']
+ time_formats = property(time_formats, doc="""\
+ Locale patterns for time formatting.
+
+ >>> Locale('en', 'US').time_formats['short']
+
+ >>> Locale('fr', 'FR').time_formats['long']
+
+
+ :type: `dict`
+ """)
+
+ def datetime_formats(self):
+ return self._data['datetime_formats']
+ datetime_formats = property(datetime_formats, doc="""\
+ Locale patterns for datetime formatting.
+
+ >>> Locale('en').datetime_formats['full']
+ u'{1} {0}'
+ >>> Locale('th').datetime_formats['medium']
+ u'{1}, {0}'
+
+ :type: `dict`
+ """)
+
+ def plural_form(self):
+ return self._data['plural_form']
+ plural_form = property(plural_form, doc="""\
+ Plural rules for the locale.
+
+ >>> Locale('en').plural_form(1)
+ 'one'
+ >>> Locale('en').plural_form(0)
+ 'other'
+ >>> Locale('fr').plural_form(0)
+ 'one'
+ >>> Locale('ru').plural_form(100)
+ 'many'
+
+ :type: `PluralRule`
+ """)
+
+
+def default_locale(category=None, aliases=LOCALE_ALIASES):
+ """Returns the system default locale for a given category, based on
+ environment variables.
+
+ >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
+ ... os.environ[name] = ''
+ >>> os.environ['LANG'] = 'fr_FR.UTF-8'
+ >>> default_locale('LC_MESSAGES')
+ 'fr_FR'
+
+ The "C" or "POSIX" pseudo-locales are treated as aliases for the
+ "en_US_POSIX" locale:
+
+ >>> os.environ['LC_MESSAGES'] = 'POSIX'
+ >>> default_locale('LC_MESSAGES')
+ 'en_US_POSIX'
+
+ :param category: one of the ``LC_XXX`` environment variable names
+ :param aliases: a dictionary of aliases for locale identifiers
+ :return: the value of the variable, or any of the fallbacks (``LANGUAGE``,
+ ``LC_ALL``, ``LC_CTYPE``, and ``LANG``)
+ :rtype: `str`
+ """
+ varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
+ for name in filter(None, varnames):
+ locale = os.getenv(name)
+ if locale:
+ if name == 'LANGUAGE' and ':' in locale:
+ # the LANGUAGE variable may contain a colon-separated list of
+ # language codes; we just pick the language on the list
+ locale = locale.split(':')[0]
+ if locale in ('C', 'POSIX'):
+ locale = 'en_US_POSIX'
+ elif aliases and locale in aliases:
+ locale = aliases[locale]
+ return '_'.join(filter(None, parse_locale(locale)))
+
+def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES):
+ """Find the best match between available and requested locale strings.
+
+ >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
+ 'de_DE'
+ >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
+ 'de'
+
+ Case is ignored by the algorithm, the result uses the case of the preferred
+ locale identifier:
+
+ >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
+ 'de_DE'
+
+ >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
+ 'de_DE'
+
+ By default, some web browsers unfortunately do not include the territory
+ in the locale identifier for many locales, and some don't even allow the
+ user to easily add the territory. So while you may prefer using qualified
+ locale identifiers in your web-application, they would not normally match
+ the language-only locale sent by such browsers. To workaround that, this
+ function uses a default mapping of commonly used langauge-only locale
+ identifiers to identifiers including the territory:
+
+ >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US'])
+ 'ja_JP'
+
+ Some browsers even use an incorrect or outdated language code, such as "no"
+ for Norwegian, where the correct locale identifier would actually be "nb_NO"
+ (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of
+ such cases, too:
+
+ >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE'])
+ 'nb_NO'
+
+ You can override this default mapping by passing a different `aliases`
+ dictionary to this function, or you can bypass the behavior althogher by
+ setting the `aliases` parameter to `None`.
+
+ :param preferred: the list of locale strings preferred by the user
+ :param available: the list of locale strings available
+ :param sep: character that separates the different parts of the locale
+ strings
+ :param aliases: a dictionary of aliases for locale identifiers
+ :return: the locale identifier for the best match, or `None` if no match
+ was found
+ :rtype: `str`
+ """
+ available = [a.lower() for a in available if a]
+ for locale in preferred:
+ ll = locale.lower()
+ if ll in available:
+ return locale
+ if aliases:
+ alias = aliases.get(ll)
+ if alias:
+ alias = alias.replace('_', sep)
+ if alias.lower() in available:
+ return alias
+ parts = locale.split(sep)
+ if len(parts) > 1 and parts[0].lower() in available:
+ return parts[0]
+ return None
+
+def parse_locale(identifier, sep='_'):
+ """Parse a locale identifier into a tuple of the form::
+
+ ``(language, territory, script, variant)``
+
+ >>> parse_locale('zh_CN')
+ ('zh', 'CN', None, None)
+ >>> parse_locale('zh_Hans_CN')
+ ('zh', 'CN', 'Hans', None)
+
+ The default component separator is "_", but a different separator can be
+ specified using the `sep` parameter:
+
+ >>> parse_locale('zh-CN', sep='-')
+ ('zh', 'CN', None, None)
+
+ If the identifier cannot be parsed into a locale, a `ValueError` exception
+ is raised:
+
+ >>> parse_locale('not_a_LOCALE_String')
+ Traceback (most recent call last):
+ ...
+ ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
+
+ Encoding information and locale modifiers are removed from the identifier:
+
+ >>> parse_locale('it_IT@euro')
+ ('it', 'IT', None, None)
+ >>> parse_locale('en_US.UTF-8')
+ ('en', 'US', None, None)
+ >>> parse_locale('de_DE.iso885915@euro')
+ ('de', 'DE', None, None)
+
+ :param identifier: the locale identifier string
+ :param sep: character that separates the different components of the locale
+ identifier
+ :return: the ``(language, territory, script, variant)`` tuple
+ :rtype: `tuple`
+ :raise `ValueError`: if the string does not appear to be a valid locale
+ identifier
+
+ :see: `IETF RFC 4646 `_
+ """
+ if '.' in identifier:
+ # this is probably the charset/encoding, which we don't care about
+ identifier = identifier.split('.', 1)[0]
+ if '@' in identifier:
+ # this is a locale modifier such as @euro, which we don't care about
+ # either
+ identifier = identifier.split('@', 1)[0]
+
+ parts = identifier.split(sep)
+ lang = parts.pop(0).lower()
+ if not lang.isalpha():
+ raise ValueError('expected only letters, got %r' % lang)
+
+ script = territory = variant = None
+ if parts:
+ if len(parts[0]) == 4 and parts[0].isalpha():
+ script = parts.pop(0).title()
+
+ if parts:
+ if len(parts[0]) == 2 and parts[0].isalpha():
+ territory = parts.pop(0).upper()
+ elif len(parts[0]) == 3 and parts[0].isdigit():
+ territory = parts.pop(0)
+
+ if parts:
+ if len(parts[0]) == 4 and parts[0][0].isdigit() or \
+ len(parts[0]) >= 5 and parts[0][0].isalpha():
+ variant = parts.pop()
+
+ if parts:
+ raise ValueError('%r is not a valid locale identifier' % identifier)
+
+ return lang, territory, script, variant
diff --git a/babel3/babel/dates.py b/babel3/babel/dates.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/dates.py
@@ -0,0 +1,1055 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Locale dependent formatting and parsing of dates and times.
+
+The default locale for the functions in this module is determined by the
+following environment variables, in that order:
+
+ * ``LC_TIME``,
+ * ``LC_ALL``, and
+ * ``LANG``
+"""
+
+from __future__ import division
+from datetime import date, datetime, time, timedelta, tzinfo
+import re
+
+from babel.core import default_locale, get_global, Locale
+from babel.util import UTC
+
+__all__ = ['format_date', 'format_datetime', 'format_time', 'format_timedelta',
+ 'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time']
+__docformat__ = 'restructuredtext en'
+
+LC_TIME = default_locale('LC_TIME')
+
+# Aliases for use in scopes where the modules are shadowed by local variables
+date_ = date
+datetime_ = datetime
+time_ = time
+
+def get_period_names(locale=LC_TIME):
+ """Return the names for day periods (AM/PM) used by the locale.
+
+ >>> get_period_names(locale='en_US')['am']
+ u'AM'
+
+ :param locale: the `Locale` object, or a locale string
+ :return: the dictionary of period names
+ :rtype: `dict`
+ """
+ return Locale.parse(locale).periods
+
+def get_day_names(width='wide', context='format', locale=LC_TIME):
+ """Return the day names used by the locale for the specified format.
+
+ >>> get_day_names('wide', locale='en_US')[1]
+ u'Tuesday'
+ >>> get_day_names('abbreviated', locale='es')[1]
+ u'mar'
+ >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
+ u'D'
+
+ :param width: the width to use, one of "wide", "abbreviated", or "narrow"
+ :param context: the context, either "format" or "stand-alone"
+ :param locale: the `Locale` object, or a locale string
+ :return: the dictionary of day names
+ :rtype: `dict`
+ """
+ return Locale.parse(locale).days[context][width]
+
+def get_month_names(width='wide', context='format', locale=LC_TIME):
+ """Return the month names used by the locale for the specified format.
+
+ >>> get_month_names('wide', locale='en_US')[1]
+ u'January'
+ >>> get_month_names('abbreviated', locale='es')[1]
+ u'ene'
+ >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
+ u'J'
+
+ :param width: the width to use, one of "wide", "abbreviated", or "narrow"
+ :param context: the context, either "format" or "stand-alone"
+ :param locale: the `Locale` object, or a locale string
+ :return: the dictionary of month names
+ :rtype: `dict`
+ """
+ return Locale.parse(locale).months[context][width]
+
+def get_quarter_names(width='wide', context='format', locale=LC_TIME):
+ """Return the quarter names used by the locale for the specified format.
+
+ >>> get_quarter_names('wide', locale='en_US')[1]
+ u'1st quarter'
+ >>> get_quarter_names('abbreviated', locale='de_DE')[1]
+ u'Q1'
+
+ :param width: the width to use, one of "wide", "abbreviated", or "narrow"
+ :param context: the context, either "format" or "stand-alone"
+ :param locale: the `Locale` object, or a locale string
+ :return: the dictionary of quarter names
+ :rtype: `dict`
+ """
+ return Locale.parse(locale).quarters[context][width]
+
+def get_era_names(width='wide', locale=LC_TIME):
+ """Return the era names used by the locale for the specified format.
+
+ >>> get_era_names('wide', locale='en_US')[1]
+ u'Anno Domini'
+ >>> get_era_names('abbreviated', locale='de_DE')[1]
+ u'n. Chr.'
+
+ :param width: the width to use, either "wide", "abbreviated", or "narrow"
+ :param locale: the `Locale` object, or a locale string
+ :return: the dictionary of era names
+ :rtype: `dict`
+ """
+ return Locale.parse(locale).eras[width]
+
+def get_date_format(format='medium', locale=LC_TIME):
+ """Return the date formatting patterns used by the locale for the specified
+ format.
+
+ >>> get_date_format(locale='en_US')
+
+ >>> get_date_format('full', locale='de_DE')
+
+
+ :param format: the format to use, one of "full", "long", "medium", or
+ "short"
+ :param locale: the `Locale` object, or a locale string
+ :return: the date format pattern
+ :rtype: `DateTimePattern`
+ """
+ return Locale.parse(locale).date_formats[format]
+
+def get_datetime_format(format='medium', locale=LC_TIME):
+ """Return the datetime formatting patterns used by the locale for the
+ specified format.
+
+ >>> get_datetime_format(locale='en_US')
+ u'{1} {0}'
+
+ :param format: the format to use, one of "full", "long", "medium", or
+ "short"
+ :param locale: the `Locale` object, or a locale string
+ :return: the datetime format pattern
+ :rtype: `unicode`
+ """
+ patterns = Locale.parse(locale).datetime_formats
+ if format not in patterns:
+ format = None
+ return patterns[format]
+
+def get_time_format(format='medium', locale=LC_TIME):
+ """Return the time formatting patterns used by the locale for the specified
+ format.
+
+ >>> get_time_format(locale='en_US')
+
+ >>> get_time_format('full', locale='de_DE')
+
+
+ :param format: the format to use, one of "full", "long", "medium", or
+ "short"
+ :param locale: the `Locale` object, or a locale string
+ :return: the time format pattern
+ :rtype: `DateTimePattern`
+ """
+ return Locale.parse(locale).time_formats[format]
+
+def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME):
+ """Return the timezone associated with the given `datetime` object formatted
+ as string indicating the offset from GMT.
+
+ >>> dt = datetime(2007, 4, 1, 15, 30)
+ >>> get_timezone_gmt(dt, locale='en')
+ u'GMT+00:00'
+
+ >>> from pytz import timezone
+ >>> tz = timezone('America/Los_Angeles')
+ >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+ >>> get_timezone_gmt(dt, locale='en')
+ u'GMT-08:00'
+ >>> get_timezone_gmt(dt, 'short', locale='en')
+ u'-0800'
+
+ The long format depends on the locale, for example in France the acronym
+ UTC string is used instead of GMT:
+
+ >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
+ u'UTC-08:00'
+
+ :param datetime: the ``datetime`` object; if `None`, the current date and
+ time in UTC is used
+ :param width: either "long" or "short"
+ :param locale: the `Locale` object, or a locale string
+ :return: the GMT offset representation of the timezone
+ :rtype: `unicode`
+ :since: version 0.9
+ """
+ if datetime is None:
+ datetime = datetime_.utcnow()
+ elif isinstance(datetime, (int, long)):
+ datetime = datetime_.utcfromtimestamp(datetime).time()
+ if datetime.tzinfo is None:
+ datetime = datetime.replace(tzinfo=UTC)
+ locale = Locale.parse(locale)
+
+ offset = datetime.tzinfo.utcoffset(datetime)
+ seconds = offset.days * 24 * 60 * 60 + offset.seconds
+ hours, seconds = divmod(seconds, 3600)
+ if width == 'short':
+ pattern = u'%+03d%02d'
+ else:
+ pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
+ return pattern % (hours, seconds // 60)
+
+def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME):
+ """Return a representation of the given timezone using "location format".
+
+ The result depends on both the local display name of the country and the
+ city assocaited with the time zone:
+
+ >>> from pytz import timezone
+ >>> tz = timezone('America/St_Johns')
+ >>> get_timezone_location(tz, locale='de_DE')
+ u"Kanada (St. John's)"
+ >>> tz = timezone('America/Mexico_City')
+ >>> get_timezone_location(tz, locale='de_DE')
+ u'Mexiko (Mexiko-Stadt)'
+
+ If the timezone is associated with a country that uses only a single
+ timezone, just the localized country name is returned:
+
+ >>> tz = timezone('Europe/Berlin')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u'Deutschland'
+
+ :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+ the timezone; if `None`, the current date and time in
+ UTC is assumed
+ :param locale: the `Locale` object, or a locale string
+ :return: the localized timezone name using location format
+ :rtype: `unicode`
+ :since: version 0.9
+ """
+ if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+ dt = None
+ tzinfo = UTC
+ elif isinstance(dt_or_tzinfo, (datetime, time)):
+ dt = dt_or_tzinfo
+ if dt.tzinfo is not None:
+ tzinfo = dt.tzinfo
+ else:
+ tzinfo = UTC
+ else:
+ dt = None
+ tzinfo = dt_or_tzinfo
+ locale = Locale.parse(locale)
+
+ if hasattr(tzinfo, 'zone'):
+ zone = tzinfo.zone
+ else:
+ zone = tzinfo.tzname(dt or datetime.utcnow())
+
+ # Get the canonical time-zone code
+ zone = get_global('zone_aliases').get(zone, zone)
+
+ info = locale.time_zones.get(zone, {})
+
+ # Otherwise, if there is only one timezone for the country, return the
+ # localized country name
+ region_format = locale.zone_formats['region']
+ territory = get_global('zone_territories').get(zone)
+ if territory not in locale.territories:
+ territory = 'ZZ' # invalid/unknown
+ territory_name = locale.territories[territory]
+ if territory and len(get_global('territory_zones').get(territory, [])) == 1:
+ return region_format % (territory_name)
+
+ # Otherwise, include the city in the output
+ fallback_format = locale.zone_formats['fallback']
+ if 'city' in info:
+ city_name = info['city']
+ else:
+ metazone = get_global('meta_zones').get(zone)
+ metazone_info = locale.meta_zones.get(metazone, {})
+ if 'city' in metazone_info:
+ city_name = metainfo['city']
+ elif '/' in zone:
+ city_name = zone.split('/', 1)[1].replace('_', ' ')
+ else:
+ city_name = zone.replace('_', ' ')
+
+ return region_format % (fallback_format % {
+ '0': city_name,
+ '1': territory_name
+ })
+
+def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
+ locale=LC_TIME):
+ r"""Return the localized display name for the given timezone. The timezone
+ may be specified using a ``datetime`` or `tzinfo` object.
+
+ >>> from pytz import timezone
+ >>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles'))
+ >>> get_timezone_name(dt, locale='en_US')
+ u'Pacific Standard Time'
+ >>> get_timezone_name(dt, width='short', locale='en_US')
+ u'PST'
+
+ If this function gets passed only a `tzinfo` object and no concrete
+ `datetime`, the returned display name is indenpendent of daylight savings
+ time. This can be used for example for selecting timezones, or to set the
+ time of events that recur across DST changes:
+
+ >>> tz = timezone('America/Los_Angeles')
+ >>> get_timezone_name(tz, locale='en_US')
+ u'Pacific Time'
+ >>> get_timezone_name(tz, 'short', locale='en_US')
+ u'PT'
+
+ If no localized display name for the timezone is available, and the timezone
+ is associated with a country that uses only a single timezone, the name of
+ that country is returned, formatted according to the locale:
+
+ >>> tz = timezone('Europe/Berlin')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u'Deutschland'
+ >>> get_timezone_name(tz, locale='pt_BR')
+ u'Hor\xe1rio Alemanha'
+
+ On the other hand, if the country uses multiple timezones, the city is also
+ included in the representation:
+
+ >>> tz = timezone('America/St_Johns')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u"Kanada (St. John's)"
+
+ The `uncommon` parameter can be set to `True` to enable the use of timezone
+ representations that are not commonly used by the requested locale. For
+ example, while in French the central European timezone is usually
+ abbreviated as "HEC", in Canadian French, this abbreviation is not in
+ common use, so a generic name would be chosen by default:
+
+ >>> tz = timezone('Europe/Paris')
+ >>> get_timezone_name(tz, 'short', locale='fr_CA')
+ u'France'
+ >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA')
+ u'HEC'
+
+ :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+ the timezone; if a ``tzinfo`` object is used, the
+ resulting display name will be generic, i.e.
+ independent of daylight savings time; if `None`, the
+ current date in UTC is assumed
+ :param width: either "long" or "short"
+ :param uncommon: whether even uncommon timezone abbreviations should be used
+ :param locale: the `Locale` object, or a locale string
+ :return: the timezone display name
+ :rtype: `unicode`
+ :since: version 0.9
+ :see: `LDML Appendix J: Time Zone Display Names
+ `_
+ """
+ if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+ dt = None
+ tzinfo = UTC
+ elif isinstance(dt_or_tzinfo, (datetime, time)):
+ dt = dt_or_tzinfo
+ if dt.tzinfo is not None:
+ tzinfo = dt.tzinfo
+ else:
+ tzinfo = UTC
+ else:
+ dt = None
+ tzinfo = dt_or_tzinfo
+ locale = Locale.parse(locale)
+
+ if hasattr(tzinfo, 'zone'):
+ zone = tzinfo.zone
+ else:
+ zone = tzinfo.tzname(dt)
+
+ # Get the canonical time-zone code
+ zone = get_global('zone_aliases').get(zone, zone)
+
+ info = locale.time_zones.get(zone, {})
+ # Try explicitly translated zone names first
+ if width in info:
+ if dt is None:
+ field = 'generic'
+ else:
+ dst = tzinfo.dst(dt)
+ if dst is None:
+ field = 'generic'
+ elif dst == 0:
+ field = 'standard'
+ else:
+ field = 'daylight'
+ if field in info[width]:
+ return info[width][field]
+
+ metazone = get_global('meta_zones').get(zone)
+ if metazone:
+ metazone_info = locale.meta_zones.get(metazone, {})
+ if width in metazone_info and (uncommon or metazone_info.get('common')):
+ if dt is None:
+ field = 'generic'
+ else:
+ field = tzinfo.dst(dt) and 'daylight' or 'standard'
+ if field in metazone_info[width]:
+ return metazone_info[width][field]
+
+ # If we have a concrete datetime, we assume that the result can't be
+ # independent of daylight savings time, so we return the GMT offset
+ if dt is not None:
+ return get_timezone_gmt(dt, width=width, locale=locale)
+
+ return get_timezone_location(dt_or_tzinfo, locale=locale)
+
+def format_date(date=None, format='medium', locale=LC_TIME):
+ """Return a date formatted according to the given pattern.
+
+ >>> d = date(2007, 04, 01)
+ >>> format_date(d, locale='en_US')
+ u'Apr 1, 2007'
+ >>> format_date(d, format='full', locale='de_DE')
+ u'Sonntag, 1. April 2007'
+
+ If you don't want to use the locale default formats, you can specify a
+ custom date pattern:
+
+ >>> format_date(d, "EEE, MMM d, ''yy", locale='en')
+ u"Sun, Apr 1, '07"
+
+ :param date: the ``date`` or ``datetime`` object; if `None`, the current
+ date is used
+ :param format: one of "full", "long", "medium", or "short", or a custom
+ date/time pattern
+ :param locale: a `Locale` object or a locale identifier
+ :rtype: `unicode`
+
+ :note: If the pattern contains time fields, an `AttributeError` will be
+ raised when trying to apply the formatting. This is also true if
+ the value of ``date`` parameter is actually a ``datetime`` object,
+ as this function automatically converts that to a ``date``.
+ """
+ if date is None:
+ date = date_.today()
+ elif isinstance(date, datetime):
+ date = date.date()
+
+ locale = Locale.parse(locale)
+ if format in ('full', 'long', 'medium', 'short'):
+ format = get_date_format(format, locale=locale)
+ pattern = parse_pattern(format)
+ return parse_pattern(format).apply(date, locale)
+
+def format_datetime(datetime=None, format='medium', tzinfo=None,
+ locale=LC_TIME):
+ r"""Return a date formatted according to the given pattern.
+
+ >>> dt = datetime(2007, 04, 01, 15, 30)
+ >>> format_datetime(dt, locale='en_US')
+ u'Apr 1, 2007 3:30:00 PM'
+
+ For any pattern requiring the display of the time-zone, the third-party
+ ``pytz`` package is needed to explicitly specify the time-zone:
+
+ >>> from pytz import timezone
+ >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
+ ... locale='fr_FR')
+ u'dimanche 1 avril 2007 17:30:00 Heure avanc\xe9e de l\u2019Europe centrale'
+ >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
+ ... tzinfo=timezone('US/Eastern'), locale='en')
+ u'2007.04.01 AD at 11:30:00 EDT'
+
+ :param datetime: the `datetime` object; if `None`, the current date and
+ time is used
+ :param format: one of "full", "long", "medium", or "short", or a custom
+ date/time pattern
+ :param tzinfo: the timezone to apply to the time for display
+ :param locale: a `Locale` object or a locale identifier
+ :rtype: `unicode`
+ """
+ if datetime is None:
+ datetime = datetime_.utcnow()
+ elif isinstance(datetime, (int, long)):
+ datetime = datetime_.utcfromtimestamp(datetime)
+ elif isinstance(datetime, time):
+ datetime = datetime_.combine(date.today(), datetime)
+ if datetime.tzinfo is None:
+ datetime = datetime.replace(tzinfo=UTC)
+ if tzinfo is not None:
+ datetime = datetime.astimezone(tzinfo)
+ if hasattr(tzinfo, 'normalize'): # pytz
+ datetime = tzinfo.normalize(datetime)
+
+ locale = Locale.parse(locale)
+ if format in ('full', 'long', 'medium', 'short'):
+ return get_datetime_format(format, locale=locale) \
+ .replace('{0}', format_time(datetime, format, tzinfo=None,
+ locale=locale)) \
+ .replace('{1}', format_date(datetime, format, locale=locale))
+ else:
+ return parse_pattern(format).apply(datetime, locale)
+
+def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
+ r"""Return a time formatted according to the given pattern.
+
+ >>> t = time(15, 30)
+ >>> format_time(t, locale='en_US')
+ u'3:30:00 PM'
+ >>> format_time(t, format='short', locale='de_DE')
+ u'15:30'
+
+ If you don't want to use the locale default formats, you can specify a
+ custom time pattern:
+
+ >>> format_time(t, "hh 'o''clock' a", locale='en')
+ u"03 o'clock PM"
+
+ For any pattern requiring the display of the time-zone, the third-party
+ ``pytz`` package is needed to explicitly specify the time-zone:
+
+ >>> from pytz import timezone
+ >>> t = datetime(2007, 4, 1, 15, 30)
+ >>> tzinfo = timezone('Europe/Paris')
+ >>> t = tzinfo.localize(t)
+ >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR')
+ u'15:30:00 Heure avanc\xe9e de l\u2019Europe centrale'
+ >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'),
+ ... locale='en')
+ u"09 o'clock AM, Eastern Daylight Time"
+
+ As that example shows, when this function gets passed a
+ ``datetime.datetime`` value, the actual time in the formatted string is
+ adjusted to the timezone specified by the `tzinfo` parameter. If the
+ ``datetime`` is "naive" (i.e. it has no associated timezone information),
+ it is assumed to be in UTC.
+
+ These timezone calculations are **not** performed if the value is of type
+ ``datetime.time``, as without date information there's no way to determine
+ what a given time would translate to in a different timezone without
+ information about whether daylight savings time is in effect or not. This
+ means that time values are left as-is, and the value of the `tzinfo`
+ parameter is only used to display the timezone name if needed:
+
+ >>> t = time(15, 30)
+ >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'),
+ ... locale='fr_FR')
+ u'15:30:00 Heure normale de l\u2019Europe centrale'
+ >>> format_time(t, format='full', tzinfo=timezone('US/Eastern'),
+ ... locale='en_US')
+ u'3:30:00 PM Eastern Standard Time'
+
+ :param time: the ``time`` or ``datetime`` object; if `None`, the current
+ time in UTC is used
+ :param format: one of "full", "long", "medium", or "short", or a custom
+ date/time pattern
+ :param tzinfo: the time-zone to apply to the time for display
+ :param locale: a `Locale` object or a locale identifier
+ :rtype: `unicode`
+
+ :note: If the pattern contains date fields, an `AttributeError` will be
+ raised when trying to apply the formatting. This is also true if
+ the value of ``time`` parameter is actually a ``datetime`` object,
+ as this function automatically converts that to a ``time``.
+ """
+ if time is None:
+ time = datetime.utcnow()
+ elif isinstance(time, (int, long)):
+ time = datetime.utcfromtimestamp(time)
+ if time.tzinfo is None:
+ time = time.replace(tzinfo=UTC)
+ if isinstance(time, datetime):
+ if tzinfo is not None:
+ time = time.astimezone(tzinfo)
+ if hasattr(tzinfo, 'normalize'): # pytz
+ time = tzinfo.normalize(time)
+ time = time.timetz()
+ elif tzinfo is not None:
+ time = time.replace(tzinfo=tzinfo)
+
+ locale = Locale.parse(locale)
+ if format in ('full', 'long', 'medium', 'short'):
+ format = get_time_format(format, locale=locale)
+ return parse_pattern(format).apply(time, locale)
+
+TIMEDELTA_UNITS = (
+ ('year', 3600 * 24 * 365),
+ ('month', 3600 * 24 * 30),
+ ('week', 3600 * 24 * 7),
+ ('day', 3600 * 24),
+ ('hour', 3600),
+ ('minute', 60),
+ ('second', 1)
+)
+
+def format_timedelta(delta, granularity='second', threshold=.85, locale=LC_TIME):
+ """Return a time delta according to the rules of the given locale.
+
+ >>> format_timedelta(timedelta(weeks=12), locale='en_US')
+ u'3 mths'
+ >>> format_timedelta(timedelta(seconds=1), locale='es')
+ u'1 s'
+
+ The granularity parameter can be provided to alter the lowest unit
+ presented, which defaults to a second.
+
+ >>> format_timedelta(timedelta(hours=3), granularity='day',
+ ... locale='en_US')
+ u'1 day'
+
+ The threshold parameter can be used to determine at which value the
+ presentation switches to the next higher unit. A higher threshold factor
+ means the presentation will switch later. For example:
+
+ >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US')
+ u'1 day'
+ >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US')
+ u'23 hrs'
+
+ :param delta: a ``timedelta`` object representing the time difference to
+ format, or the delta in seconds as an `int` value
+ :param granularity: determines the smallest unit that should be displayed,
+ the value can be one of "year", "month", "week", "day",
+ "hour", "minute" or "second"
+ :param threshold: factor that determines at which point the presentation
+ switches to the next higher unit
+ :param locale: a `Locale` object or a locale identifier
+ :rtype: `unicode`
+ """
+ if isinstance(delta, timedelta):
+ seconds = int((delta.days * 86400) + delta.seconds)
+ else:
+ seconds = delta
+ locale = Locale.parse(locale)
+
+ for unit, secs_per_unit in TIMEDELTA_UNITS:
+ value = abs(seconds) / secs_per_unit
+ if value >= threshold or unit == granularity:
+ if unit == granularity and value > 0:
+ value = max(1, value)
+ value = int(round(value))
+ plural_form = locale.plural_form(value)
+ pattern = locale._data['unit_patterns'][unit][plural_form]
+ return pattern.replace('{0}', str(value))
+
+ return u''
+
+def parse_date(string, locale=LC_TIME):
+ """Parse a date from a string.
+
+ This function uses the date format for the locale as a hint to determine
+ the order in which the date fields appear in the string.
+
+ >>> parse_date('4/1/04', locale='en_US')
+ datetime.date(2004, 4, 1)
+ >>> parse_date('01.04.2004', locale='de_DE')
+ datetime.date(2004, 4, 1)
+
+ :param string: the string containing the date
+ :param locale: a `Locale` object or a locale identifier
+ :return: the parsed date
+ :rtype: `date`
+ """
+ # TODO: try ISO format first?
+ format = get_date_format(locale=locale).pattern.lower()
+ year_idx = format.index('y')
+ month_idx = format.index('m')
+ if month_idx < 0:
+ month_idx = format.index('l')
+ day_idx = format.index('d')
+
+ indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]
+ indexes.sort()
+ indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
+
+ # FIXME: this currently only supports numbers, but should also support month
+ # names, both in the requested locale, and english
+
+ numbers = re.findall('(\d+)', string)
+ year = numbers[indexes['Y']]
+ if len(year) == 2:
+ year = 2000 + int(year)
+ else:
+ year = int(year)
+ month = int(numbers[indexes['M']])
+ day = int(numbers[indexes['D']])
+ if month > 12:
+ month, day = day, month
+ return date(year, month, day)
+
+def parse_datetime(string, locale=LC_TIME):
+ """Parse a date and time from a string.
+
+ This function uses the date and time formats for the locale as a hint to
+ determine the order in which the time fields appear in the string.
+
+ :param string: the string containing the date and time
+ :param locale: a `Locale` object or a locale identifier
+ :return: the parsed date/time
+ :rtype: `datetime`
+ """
+ raise NotImplementedError
+
+def parse_time(string, locale=LC_TIME):
+ """Parse a time from a string.
+
+ This function uses the time format for the locale as a hint to determine
+ the order in which the time fields appear in the string.
+
+ >>> parse_time('15:30:00', locale='en_US')
+ datetime.time(15, 30)
+
+ :param string: the string containing the time
+ :param locale: a `Locale` object or a locale identifier
+ :return: the parsed time
+ :rtype: `time`
+ """
+ # TODO: try ISO format first?
+ format = get_time_format(locale=locale).pattern.lower()
+ hour_idx = format.index('h')
+ if hour_idx < 0:
+ hour_idx = format.index('k')
+ min_idx = format.index('m')
+ sec_idx = format.index('s')
+
+ indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]
+ indexes.sort()
+ indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
+
+ # FIXME: support 12 hour clock, and 0-based hour specification
+ # and seconds should be optional, maybe minutes too
+ # oh, and time-zones, of course
+
+ numbers = re.findall('(\d+)', string)
+ hour = int(numbers[indexes['H']])
+ minute = int(numbers[indexes['M']])
+ second = int(numbers[indexes['S']])
+ return time(hour, minute, second)
+
+
+class DateTimePattern(object):
+
+ def __init__(self, pattern, format):
+ self.pattern = pattern
+ self.format = format
+
+ def __repr__(self):
+ return '<%s %r>' % (type(self).__name__, self.pattern)
+
+ def __unicode__(self):
+ return self.pattern
+
+ def __mod__(self, other):
+ if type(other) is not DateTimeFormat:
+ return NotImplemented
+ return self.format % other
+
+ def apply(self, datetime, locale):
+ return self % DateTimeFormat(datetime, locale)
+
+
+class DateTimeFormat(object):
+
+ def __init__(self, value, locale):
+ assert isinstance(value, (date, datetime, time))
+ if isinstance(value, (datetime, time)) and value.tzinfo is None:
+ value = value.replace(tzinfo=UTC)
+ self.value = value
+ self.locale = Locale.parse(locale)
+
+ def __getitem__(self, name):
+ char = name[0]
+ num = len(name)
+ if char == 'G':
+ return self.format_era(char, num)
+ elif char in ('y', 'Y', 'u'):
+ return self.format_year(char, num)
+ elif char in ('Q', 'q'):
+ return self.format_quarter(char, num)
+ elif char in ('M', 'L'):
+ return self.format_month(char, num)
+ elif char in ('w', 'W'):
+ return self.format_week(char, num)
+ elif char == 'd':
+ return self.format(self.value.day, num)
+ elif char == 'D':
+ return self.format_day_of_year(num)
+ elif char == 'F':
+ return self.format_day_of_week_in_month()
+ elif char in ('E', 'e', 'c'):
+ return self.format_weekday(char, num)
+ elif char == 'a':
+ return self.format_period(char)
+ elif char == 'h':
+ if self.value.hour % 12 == 0:
+ return self.format(12, num)
+ else:
+ return self.format(self.value.hour % 12, num)
+ elif char == 'H':
+ return self.format(self.value.hour, num)
+ elif char == 'K':
+ return self.format(self.value.hour % 12, num)
+ elif char == 'k':
+ if self.value.hour == 0:
+ return self.format(24, num)
+ else:
+ return self.format(self.value.hour, num)
+ elif char == 'm':
+ return self.format(self.value.minute, num)
+ elif char == 's':
+ return self.format(self.value.second, num)
+ elif char == 'S':
+ return self.format_frac_seconds(num)
+ elif char == 'A':
+ return self.format_milliseconds_in_day(num)
+ elif char in ('z', 'Z', 'v', 'V'):
+ return self.format_timezone(char, num)
+ else:
+ raise KeyError('Unsupported date/time field %r' % char)
+
+ def format_era(self, char, num):
+ width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)]
+ era = int(self.value.year >= 0)
+ return get_era_names(width, self.locale)[era]
+
+ def format_year(self, char, num):
+ value = self.value.year
+ if char.isupper():
+ week = self.get_week_number(self.get_day_of_year())
+ if week == 0:
+ value -= 1
+ year = self.format(value, num)
+ if num == 2:
+ year = year[-2:]
+ return year
+
+ def format_quarter(self, char, num):
+ quarter = (self.value.month - 1) // 3 + 1
+ if num <= 2:
+ return ('%%0%dd' % num) % quarter
+ width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
+ context = {'Q': 'format', 'q': 'stand-alone'}[char]
+ return get_quarter_names(width, context, self.locale)[quarter]
+
+ def format_month(self, char, num):
+ if num <= 2:
+ return ('%%0%dd' % num) % self.value.month
+ width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
+ context = {'M': 'format', 'L': 'stand-alone'}[char]
+ return get_month_names(width, context, self.locale)[self.value.month]
+
+ def format_week(self, char, num):
+ if char.islower(): # week of year
+ day_of_year = self.get_day_of_year()
+ week = self.get_week_number(day_of_year)
+ if week == 0:
+ date = self.value - timedelta(days=day_of_year)
+ week = self.get_week_number(self.get_day_of_year(date),
+ date.weekday())
+ return self.format(week, num)
+ else: # week of month
+ week = self.get_week_number(self.value.day)
+ if week == 0:
+ date = self.value - timedelta(days=self.value.day)
+ week = self.get_week_number(date.day, date.weekday())
+ pass
+ return '%d' % week
+
+ def format_weekday(self, char, num):
+ if num < 3:
+ if char.islower():
+ value = 7 - self.locale.first_week_day + self.value.weekday()
+ return self.format(value % 7 + 1, num)
+ num = 3
+ weekday = self.value.weekday()
+ width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
+ context = {3: 'format', 4: 'format', 5: 'stand-alone'}[num]
+ return get_day_names(width, context, self.locale)[weekday]
+
+ def format_day_of_year(self, num):
+ return self.format(self.get_day_of_year(), num)
+
+ def format_day_of_week_in_month(self):
+ return '%d' % ((self.value.day - 1) // 7 + 1)
+
+ def format_period(self, char):
+ period = {0: 'am', 1: 'pm'}[int(self.value.hour >= 12)]
+ return get_period_names(locale=self.locale)[period]
+
+ def format_frac_seconds(self, num):
+ value = str(self.value.microsecond)
+ return self.format(round(float('.%s' % value), num) * 10**num, num)
+
+ def format_milliseconds_in_day(self, num):
+ msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \
+ self.value.minute * 60000 + self.value.hour * 3600000
+ return self.format(msecs, num)
+
+ def format_timezone(self, char, num):
+ width = {3: 'short', 4: 'long'}[max(3, num)]
+ if char == 'z':
+ return get_timezone_name(self.value, width, locale=self.locale)
+ elif char == 'Z':
+ return get_timezone_gmt(self.value, width, locale=self.locale)
+ elif char == 'v':
+ return get_timezone_name(self.value.tzinfo, width,
+ locale=self.locale)
+ elif char == 'V':
+ if num == 1:
+ return get_timezone_name(self.value.tzinfo, width,
+ uncommon=True, locale=self.locale)
+ return get_timezone_location(self.value.tzinfo, locale=self.locale)
+
+ def format(self, value, length):
+ return ('%%0%dd' % length) % value
+
+ def get_day_of_year(self, date=None):
+ if date is None:
+ date = self.value
+ return (date - date_(date.year, 1, 1)).days + 1
+
+ def get_week_number(self, day_of_period, day_of_week=None):
+ """Return the number of the week of a day within a period. This may be
+ the week number in a year or the week number in a month.
+
+ Usually this will return a value equal to or greater than 1, but if the
+ first week of the period is so short that it actually counts as the last
+ week of the previous period, this function will return 0.
+
+ >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('de_DE'))
+ >>> format.get_week_number(6)
+ 1
+
+ >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('en_US'))
+ >>> format.get_week_number(6)
+ 2
+
+ :param day_of_period: the number of the day in the period (usually
+ either the day of month or the day of year)
+ :param day_of_week: the week day; if ommitted, the week day of the
+ current date is assumed
+ """
+ if day_of_week is None:
+ day_of_week = self.value.weekday()
+ first_day = (day_of_week - self.locale.first_week_day -
+ day_of_period + 1) % 7
+ if first_day < 0:
+ first_day += 7
+ week_number = (day_of_period + first_day - 1) // 7
+ if 7 - first_day >= self.locale.min_week_days:
+ week_number += 1
+ return week_number
+
+
+PATTERN_CHARS = {
+ 'G': [1, 2, 3, 4, 5], # era
+ 'y': None, 'Y': None, 'u': None, # year
+ 'Q': [1, 2, 3, 4], 'q': [1, 2, 3, 4], # quarter
+ 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month
+ 'w': [1, 2], 'W': [1], # week
+ 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day
+ 'E': [1, 2, 3, 4, 5], 'e': [1, 2, 3, 4, 5], 'c': [1, 3, 4, 5], # week day
+ 'a': [1], # period
+ 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour
+ 'm': [1, 2], # minute
+ 's': [1, 2], 'S': None, 'A': None, # second
+ 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4] # zone
+}
+
+def parse_pattern(pattern):
+ """Parse date, time, and datetime format patterns.
+
+ >>> parse_pattern("MMMMd").format
+ u'%(MMMM)s%(d)s'
+ >>> parse_pattern("MMM d, yyyy").format
+ u'%(MMM)s %(d)s, %(yyyy)s'
+
+ Pattern can contain literal strings in single quotes:
+
+ >>> parse_pattern("H:mm' Uhr 'z").format
+ u'%(H)s:%(mm)s Uhr %(z)s'
+
+ An actual single quote can be used by using two adjacent single quote
+ characters:
+
+ >>> parse_pattern("hh' o''clock'").format
+ u"%(hh)s o'clock"
+
+ :param pattern: the formatting pattern to parse
+ """
+ if type(pattern) is DateTimePattern:
+ return pattern
+
+ result = []
+ quotebuf = None
+ charbuf = []
+ fieldchar = ['']
+ fieldnum = [0]
+
+ def append_chars():
+ result.append(''.join(charbuf).replace('%', '%%'))
+ del charbuf[:]
+
+ def append_field():
+ limit = PATTERN_CHARS[fieldchar[0]]
+ if limit and fieldnum[0] not in limit:
+ raise ValueError('Invalid length for field: %r'
+ % (fieldchar[0] * fieldnum[0]))
+ result.append('%%(%s)s' % (fieldchar[0] * fieldnum[0]))
+ fieldchar[0] = ''
+ fieldnum[0] = 0
+
+ for idx, char in enumerate(pattern.replace("''", '\0')):
+ if quotebuf is None:
+ if char == "'": # quote started
+ if fieldchar[0]:
+ append_field()
+ elif charbuf:
+ append_chars()
+ quotebuf = []
+ elif char in PATTERN_CHARS:
+ if charbuf:
+ append_chars()
+ if char == fieldchar[0]:
+ fieldnum[0] += 1
+ else:
+ if fieldchar[0]:
+ append_field()
+ fieldchar[0] = char
+ fieldnum[0] = 1
+ else:
+ if fieldchar[0]:
+ append_field()
+ charbuf.append(char)
+
+ elif quotebuf is not None:
+ if char == "'": # end of quote
+ charbuf.extend(quotebuf)
+ quotebuf = None
+ else: # inside quote
+ quotebuf.append(char)
+
+ if fieldchar[0]:
+ append_field()
+ elif charbuf:
+ append_chars()
+
+ return DateTimePattern(pattern, u''.join(result).replace('\0', "'"))
diff --git a/babel3/babel/localedata.py b/babel3/babel/localedata.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/localedata.py
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Low-level locale data access.
+
+:note: The `Locale` class, which uses this module under the hood, provides a
+ more convenient interface for accessing the locale data.
+"""
+
+import os
+import pickle
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+from UserDict import DictMixin
+
+__all__ = ['exists', 'list', 'load']
+__docformat__ = 'restructuredtext en'
+
+_cache = {}
+_cache_lock = threading.RLock()
+_dirname = os.path.join(os.path.dirname(__file__), 'localedata')
+
+
+def exists(name):
+ """Check whether locale data is available for the given locale.
+
+ :param name: the locale identifier string
+ :return: `True` if the locale data exists, `False` otherwise
+ :rtype: `bool`
+ """
+ if name in _cache:
+ return True
+ return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
+
+
+def list():
+ """Return a list of all locale identifiers for which locale data is
+ available.
+
+ :return: a list of locale identifiers (strings)
+ :rtype: `list`
+ :since: version 0.8.1
+ """
+ return [stem for stem, extension in [
+ os.path.splitext(filename) for filename in os.listdir(_dirname)
+ ] if extension == '.dat' and stem != 'root']
+
+
+def load(name, merge_inherited=True):
+ """Load the locale data for the given locale.
+
+ The locale data is a dictionary that contains much of the data defined by
+ the Common Locale Data Repository (CLDR). This data is stored as a
+ collection of pickle files inside the ``babel`` package.
+
+ >>> d = load('en_US')
+ >>> d['languages']['sv']
+ u'Swedish'
+
+ Note that the results are cached, and subsequent requests for the same
+ locale return the same dictionary:
+
+ >>> d1 = load('en_US')
+ >>> d2 = load('en_US')
+ >>> d1 is d2
+ True
+
+ :param name: the locale identifier string (or "root")
+ :param merge_inherited: whether the inherited data should be merged into
+ the data of the requested locale
+ :return: the locale data
+ :rtype: `dict`
+ :raise `IOError`: if no locale data file is found for the given locale
+ identifer, or one of the locales it inherits from
+ """
+ _cache_lock.acquire()
+ try:
+ data = _cache.get(name)
+ if not data:
+ # Load inherited data
+ if name == 'root' or not merge_inherited:
+ data = {}
+ else:
+ parts = name.split('_')
+ if len(parts) == 1:
+ parent = 'root'
+ else:
+ parent = '_'.join(parts[:-1])
+ data = load(parent).copy()
+ filename = os.path.join(_dirname, '%s.dat' % name)
+ fileobj = open(filename, 'rb')
+ try:
+ if name != 'root' and merge_inherited:
+ merge(data, pickle.load(fileobj))
+ else:
+ data = pickle.load(fileobj)
+ _cache[name] = data
+ finally:
+ fileobj.close()
+ return data
+ finally:
+ _cache_lock.release()
+
+
+def merge(dict1, dict2):
+ """Merge the data from `dict2` into the `dict1` dictionary, making copies
+ of nested dictionaries.
+
+ >>> d = {1: 'foo', 3: 'baz'}
+ >>> merge(d, {1: 'Foo', 2: 'Bar'})
+ >>> items = d.items(); items.sort(); items
+ [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
+
+ :param dict1: the dictionary to merge into
+ :param dict2: the dictionary containing the data that should be merged
+ """
+ for key, val2 in dict2.items():
+ if val2 is not None:
+ val1 = dict1.get(key)
+ if isinstance(val2, dict):
+ if val1 is None:
+ val1 = {}
+ if isinstance(val1, Alias):
+ val1 = (val1, val2)
+ elif isinstance(val1, tuple):
+ alias, others = val1
+ others = others.copy()
+ merge(others, val2)
+ val1 = (alias, others)
+ else:
+ val1 = val1.copy()
+ merge(val1, val2)
+ else:
+ val1 = val2
+ dict1[key] = val1
+
+
+class Alias(object):
+ """Representation of an alias in the locale data.
+
+ An alias is a value that refers to some other part of the locale data,
+ as specified by the `keys`.
+ """
+
+ def __init__(self, keys):
+ self.keys = tuple(keys)
+
+ def __repr__(self):
+ return '<%s %r>' % (type(self).__name__, self.keys)
+
+ def resolve(self, data):
+ """Resolve the alias based on the given data.
+
+ This is done recursively, so if one alias resolves to a second alias,
+ that second alias will also be resolved.
+
+ :param data: the locale data
+ :type data: `dict`
+ """
+ base = data
+ for key in self.keys:
+ data = data[key]
+ if isinstance(data, Alias):
+ data = data.resolve(base)
+ elif isinstance(data, tuple):
+ alias, others = data
+ data = alias.resolve(base)
+ return data
+
+
+class LocaleDataDict(DictMixin, dict):
+ """Dictionary wrapper that automatically resolves aliases to the actual
+ values.
+ """
+
+ def __init__(self, data, base=None):
+ dict.__init__(self, data)
+ if base is None:
+ base = data
+ self.base = base
+
+ def __getitem__(self, key):
+ orig = val = dict.__getitem__(self, key)
+ if isinstance(val, Alias): # resolve an alias
+ val = val.resolve(self.base)
+ if isinstance(val, tuple): # Merge a partial dict with an alias
+ alias, others = val
+ val = alias.resolve(self.base).copy()
+ merge(val, others)
+ if type(val) is dict: # Return a nested alias-resolving dict
+ val = LocaleDataDict(val, base=self.base)
+ if val is not orig:
+ self[key] = val
+ return val
+
+ def copy(self):
+ return LocaleDataDict(dict.copy(self), base=self.base)
diff --git a/babel3/babel/messages/__init__.py b/babel3/babel/messages/__init__.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/messages/__init__.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Support for ``gettext`` message catalogs."""
+
+from babel.messages.catalog import *
diff --git a/babel3/babel/messages/catalog.py b/babel3/babel/messages/catalog.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/messages/catalog.py
@@ -0,0 +1,802 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Data structures for message catalogs."""
+
+from cgi import parse_header
+from datetime import datetime
+from difflib import get_close_matches
+from email import message_from_string
+from copy import copy
+import re
+import time
+
+from babel import __version__ as VERSION
+from babel.core import Locale
+from babel.dates import format_datetime
+from babel.messages.plurals import get_plural
+from babel.util import odict, distinct, set, LOCALTZ, UTC, FixedOffsetTimezone
+
+__all__ = ['Message', 'Catalog', 'TranslationError']
+__docformat__ = 'restructuredtext en'
+
+
+PYTHON_FORMAT = re.compile(r'''(?x)
+ \%
+ (?:\(([\w]*)\))?
+ (
+ [-#0\ +]?(?:\*|[\d]+)?
+ (?:\.(?:\*|[\d]+))?
+ [hlL]?
+ )
+ ([diouxXeEfFgGcrs%])
+''')
+
+
+class Message(object):
+ """Representation of a single message in a catalog."""
+
+ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
+ user_comments=(), previous_id=(), lineno=None, context=None):
+ """Create the message object.
+
+ :param id: the message ID, or a ``(singular, plural)`` tuple for
+ pluralizable messages
+ :param string: the translated message string, or a
+ ``(singular, plural)`` tuple for pluralizable messages
+ :param locations: a sequence of ``(filenname, lineno)`` tuples
+ :param flags: a set or sequence of flags
+ :param auto_comments: a sequence of automatic comments for the message
+ :param user_comments: a sequence of user comments for the message
+ :param previous_id: the previous message ID, or a ``(singular, plural)``
+ tuple for pluralizable messages
+ :param lineno: the line number on which the msgid line was found in the
+ PO file, if any
+ :param context: the message context
+ """
+ self.id = id #: The message ID
+ if not string and self.pluralizable:
+ string = (u'', u'')
+ self.string = string #: The message translation
+ self.locations = list(distinct(locations))
+ self.flags = set(flags)
+ if id and self.python_format:
+ self.flags.add('python-format')
+ else:
+ self.flags.discard('python-format')
+ self.auto_comments = list(distinct(auto_comments))
+ self.user_comments = list(distinct(user_comments))
+ if isinstance(previous_id, basestring):
+ self.previous_id = [previous_id]
+ else:
+ self.previous_id = list(previous_id)
+ self.lineno = lineno
+ self.context = context
+
+ def __repr__(self):
+ return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
+ list(self.flags))
+
+ def __cmp__(self, obj):
+ """Compare Messages, taking into account plural ids"""
+ if isinstance(obj, Message):
+ plural = self.pluralizable
+ obj_plural = obj.pluralizable
+ if plural and obj_plural:
+ return cmp(self.id[0], obj.id[0])
+ elif plural:
+ return cmp(self.id[0], obj.id)
+ elif obj_plural:
+ return cmp(self.id, obj.id[0])
+ return cmp(self.id, obj.id)
+
+ def clone(self):
+ return Message(*map(copy, (self.id, self.string, self.locations,
+ self.flags, self.auto_comments,
+ self.user_comments, self.previous_id,
+ self.lineno, self.context)))
+
+ def check(self, catalog=None):
+ """Run various validation checks on the message. Some validations
+ are only performed if the catalog is provided. This method returns
+ a sequence of `TranslationError` objects.
+
+ :rtype: ``iterator``
+ :param catalog: A catalog instance that is passed to the checkers
+ :see: `Catalog.check` for a way to perform checks for all messages
+ in a catalog.
+ """
+ from babel.messages.checkers import checkers
+ errors = []
+ for checker in checkers:
+ try:
+ checker(catalog, self)
+ except TranslationError, e:
+ errors.append(e)
+ return errors
+
+ def fuzzy(self):
+ return 'fuzzy' in self.flags
+ fuzzy = property(fuzzy, doc="""\
+ Whether the translation is fuzzy.
+
+ >>> Message('foo').fuzzy
+ False
+ >>> msg = Message('foo', 'foo', flags=['fuzzy'])
+ >>> msg.fuzzy
+ True
+ >>> msg
+
+
+ :type: `bool`
+ """)
+
+ def pluralizable(self):
+ return isinstance(self.id, (list, tuple))
+ pluralizable = property(pluralizable, doc="""\
+ Whether the message is plurizable.
+
+ >>> Message('foo').pluralizable
+ False
+ >>> Message(('foo', 'bar')).pluralizable
+ True
+
+ :type: `bool`
+ """)
+
+ def python_format(self):
+ ids = self.id
+ if not isinstance(ids, (list, tuple)):
+ ids = [ids]
+ return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids]))
+ python_format = property(python_format, doc="""\
+ Whether the message contains Python-style parameters.
+
+ >>> Message('foo %(name)s bar').python_format
+ True
+ >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
+ True
+
+ :type: `bool`
+ """)
+
+
+class TranslationError(Exception):
+ """Exception thrown by translation checkers when invalid message
+ translations are encountered."""
+
+
+DEFAULT_HEADER = u"""\
+# Translations template for PROJECT.
+# Copyright (C) YEAR ORGANIZATION
+# This file is distributed under the same license as the PROJECT project.
+# FIRST AUTHOR , YEAR.
+#"""
+
+
+class Catalog(object):
+ """Representation of a message catalog."""
+
+ def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
+ project=None, version=None, copyright_holder=None,
+ msgid_bugs_address=None, creation_date=None,
+ revision_date=None, last_translator=None, language_team=None,
+ charset='utf-8', fuzzy=True):
+ """Initialize the catalog object.
+
+ :param locale: the locale identifier or `Locale` object, or `None`
+ if the catalog is not bound to a locale (which basically
+ means it's a template)
+ :param domain: the message domain
+ :param header_comment: the header comment as string, or `None` for the
+ default header
+ :param project: the project's name
+ :param version: the project's version
+ :param copyright_holder: the copyright holder of the catalog
+ :param msgid_bugs_address: the email address or URL to submit bug
+ reports to
+ :param creation_date: the date the catalog was created
+ :param revision_date: the date the catalog was revised
+ :param last_translator: the name and email of the last translator
+ :param language_team: the name and email of the language team
+ :param charset: the encoding to use in the output
+ :param fuzzy: the fuzzy bit on the catalog header
+ """
+ self.domain = domain #: The message domain
+ if locale:
+ locale = Locale.parse(locale)
+ self.locale = locale #: The locale or `None`
+ self._header_comment = header_comment
+ self._messages = odict()
+
+ self.project = project or 'PROJECT' #: The project name
+ self.version = version or 'VERSION' #: The project version
+ self.copyright_holder = copyright_holder or 'ORGANIZATION'
+ self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
+
+ self.last_translator = last_translator or 'FULL NAME '
+ """Name and email address of the last translator."""
+ self.language_team = language_team or 'LANGUAGE '
+ """Name and email address of the language team."""
+
+ self.charset = charset or 'utf-8'
+
+ if creation_date is None:
+ creation_date = datetime.now(LOCALTZ)
+ elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
+ creation_date = creation_date.replace(tzinfo=LOCALTZ)
+ self.creation_date = creation_date #: Creation date of the template
+ if revision_date is None:
+ revision_date = datetime.now(LOCALTZ)
+ elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
+ revision_date = revision_date.replace(tzinfo=LOCALTZ)
+ self.revision_date = revision_date #: Last revision date of the catalog
+ self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`)
+
+ self.obsolete = odict() #: Dictionary of obsolete messages
+ self._num_plurals = None
+ self._plural_expr = None
+
+ def _get_header_comment(self):
+ comment = self._header_comment
+ comment = comment.replace('PROJECT', self.project) \
+ .replace('VERSION', self.version) \
+ .replace('YEAR', self.revision_date.strftime('%Y')) \
+ .replace('ORGANIZATION', self.copyright_holder)
+ if self.locale:
+ comment = comment.replace('Translations template', '%s translations'
+ % self.locale.english_name)
+ return comment
+
+ def _set_header_comment(self, string):
+ self._header_comment = string
+
+ header_comment = property(_get_header_comment, _set_header_comment, doc="""\
+ The header comment for the catalog.
+
+ >>> catalog = Catalog(project='Foobar', version='1.0',
+ ... copyright_holder='Foo Company')
+ >>> print catalog.header_comment #doctest: +ELLIPSIS
+ # Translations template for Foobar.
+ # Copyright (C) ... Foo Company
+ # This file is distributed under the same license as the Foobar project.
+ # FIRST AUTHOR , ....
+ #
+
+ The header can also be set from a string. Any known upper-case variables
+ will be replaced when the header is retrieved again:
+
+ >>> catalog = Catalog(project='Foobar', version='1.0',
+ ... copyright_holder='Foo Company')
+ >>> catalog.header_comment = '''\\
+ ... # The POT for my really cool PROJECT project.
+ ... # Copyright (C) 1990-2003 ORGANIZATION
+ ... # This file is distributed under the same license as the PROJECT
+ ... # project.
+ ... #'''
+ >>> print catalog.header_comment
+ # The POT for my really cool Foobar project.
+ # Copyright (C) 1990-2003 Foo Company
+ # This file is distributed under the same license as the Foobar
+ # project.
+ #
+
+ :type: `unicode`
+ """)
+
+ def _get_mime_headers(self):
+ headers = []
+ headers.append(('Project-Id-Version',
+ '%s %s' % (self.project, self.version)))
+ headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
+ headers.append(('POT-Creation-Date',
+ format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
+ locale='en')))
+ if self.locale is None:
+ headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE'))
+ headers.append(('Last-Translator', 'FULL NAME '))
+ headers.append(('Language-Team', 'LANGUAGE '))
+ else:
+ headers.append(('PO-Revision-Date',
+ format_datetime(self.revision_date,
+ 'yyyy-MM-dd HH:mmZ', locale='en')))
+ headers.append(('Last-Translator', self.last_translator))
+ headers.append(('Language-Team',
+ self.language_team.replace('LANGUAGE',
+ str(self.locale))))
+ headers.append(('Plural-Forms', self.plural_forms))
+ headers.append(('MIME-Version', '1.0'))
+ headers.append(('Content-Type',
+ 'text/plain; charset=%s' % self.charset))
+ headers.append(('Content-Transfer-Encoding', '8bit'))
+ headers.append(('Generated-By', 'Babel %s\n' % VERSION))
+ return headers
+
+ def _set_mime_headers(self, headers):
+ for name, value in headers:
+ if name.lower() == 'content-type':
+ mimetype, params = parse_header(value)
+ if 'charset' in params:
+ self.charset = params['charset'].lower()
+ break
+ for name, value in headers:
+ name = name.lower().decode(self.charset)
+ value = value.decode(self.charset)
+ if name == 'project-id-version':
+ parts = value.split(' ')
+ self.project = u' '.join(parts[:-1])
+ self.version = parts[-1]
+ elif name == 'report-msgid-bugs-to':
+ self.msgid_bugs_address = value
+ elif name == 'last-translator':
+ self.last_translator = value
+ elif name == 'language-team':
+ self.language_team = value
+ elif name == 'plural-forms':
+ _, params = parse_header(' ;' + value)
+ self._num_plurals = int(params.get('nplurals', 2))
+ self._plural_expr = params.get('plural', '(n != 1)')
+ elif name == 'pot-creation-date':
+ # FIXME: this should use dates.parse_datetime as soon as that
+ # is ready
+ value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
+
+ tt = time.strptime(value, '%Y-%m-%d %H:%M')
+ ts = time.mktime(tt)
+
+ # Separate the offset into a sign component, hours, and minutes
+ plus_minus_s, rest = tzoffset[0], tzoffset[1:]
+ hours_offset_s, mins_offset_s = rest[:2], rest[2:]
+
+ # Make them all integers
+ plus_minus = int(plus_minus_s + '1')
+ hours_offset = int(hours_offset_s)
+ mins_offset = int(mins_offset_s)
+
+ # Calculate net offset
+ net_mins_offset = hours_offset * 60
+ net_mins_offset += mins_offset
+ net_mins_offset *= plus_minus
+
+ # Create an offset object
+ tzoffset = FixedOffsetTimezone(net_mins_offset)
+
+ # Store the offset in a datetime object
+ dt = datetime.fromtimestamp(ts)
+ self.creation_date = dt.replace(tzinfo=tzoffset)
+ elif name == 'po-revision-date':
+ # Keep the value if it's not the default one
+ if 'YEAR' not in value:
+ # FIXME: this should use dates.parse_datetime as soon as
+ # that is ready
+ value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
+ tt = time.strptime(value, '%Y-%m-%d %H:%M')
+ ts = time.mktime(tt)
+
+ # Separate the offset into a sign component, hours, and
+ # minutes
+ plus_minus_s, rest = tzoffset[0], tzoffset[1:]
+ hours_offset_s, mins_offset_s = rest[:2], rest[2:]
+
+ # Make them all integers
+ plus_minus = int(plus_minus_s + '1')
+ hours_offset = int(hours_offset_s)
+ mins_offset = int(mins_offset_s)
+
+ # Calculate net offset
+ net_mins_offset = hours_offset * 60
+ net_mins_offset += mins_offset
+ net_mins_offset *= plus_minus
+
+ # Create an offset object
+ tzoffset = FixedOffsetTimezone(net_mins_offset)
+
+ # Store the offset in a datetime object
+ dt = datetime.fromtimestamp(ts)
+ self.revision_date = dt.replace(tzinfo=tzoffset)
+
+ mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
+ The MIME headers of the catalog, used for the special ``msgid ""`` entry.
+
+ The behavior of this property changes slightly depending on whether a locale
+ is set or not, the latter indicating that the catalog is actually a template
+ for actual translations.
+
+ Here's an example of the output for such a catalog template:
+
+ >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
+ >>> catalog = Catalog(project='Foobar', version='1.0',
+ ... creation_date=created)
+ >>> for name, value in catalog.mime_headers:
+ ... print '%s: %s' % (name, value)
+ Project-Id-Version: Foobar 1.0
+ Report-Msgid-Bugs-To: EMAIL@ADDRESS
+ POT-Creation-Date: 1990-04-01 15:30+0000
+ PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
+ Last-Translator: FULL NAME
+ Language-Team: LANGUAGE
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset=utf-8
+ Content-Transfer-Encoding: 8bit
+ Generated-By: Babel ...
+
+ And here's an example of the output when the locale is set:
+
+ >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
+ >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
+ ... creation_date=created, revision_date=revised,
+ ... last_translator='John Doe ',
+ ... language_team='de_DE ')
+ >>> for name, value in catalog.mime_headers:
+ ... print '%s: %s' % (name, value)
+ Project-Id-Version: Foobar 1.0
+ Report-Msgid-Bugs-To: EMAIL@ADDRESS
+ POT-Creation-Date: 1990-04-01 15:30+0000
+ PO-Revision-Date: 1990-08-03 12:00+0000
+ Last-Translator: John Doe
+ Language-Team: de_DE
+ Plural-Forms: nplurals=2; plural=(n != 1)
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset=utf-8
+ Content-Transfer-Encoding: 8bit
+ Generated-By: Babel ...
+
+ :type: `list`
+ """)
+
+ def num_plurals(self):
+ if self._num_plurals is None:
+ num = 2
+ if self.locale:
+ num = get_plural(self.locale)[0]
+ self._num_plurals = num
+ return self._num_plurals
+ num_plurals = property(num_plurals, doc="""\
+ The number of plurals used by the catalog or locale.
+
+ >>> Catalog(locale='en').num_plurals
+ 2
+ >>> Catalog(locale='ga').num_plurals
+ 3
+
+ :type: `int`
+ """)
+
+ def plural_expr(self):
+ if self._plural_expr is None:
+ expr = '(n != 1)'
+ if self.locale:
+ expr = get_plural(self.locale)[1]
+ self._plural_expr = expr
+ return self._plural_expr
+ plural_expr = property(plural_expr, doc="""\
+ The plural expression used by the catalog or locale.
+
+ >>> Catalog(locale='en').plural_expr
+ '(n != 1)'
+ >>> Catalog(locale='ga').plural_expr
+ '(n==1 ? 0 : n==2 ? 1 : 2)'
+
+ :type: `basestring`
+ """)
+
+ def plural_forms(self):
+ return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
+ plural_forms = property(plural_forms, doc="""\
+ Return the plural forms declaration for the locale.
+
+ >>> Catalog(locale='en').plural_forms
+ 'nplurals=2; plural=(n != 1)'
+ >>> Catalog(locale='pt_BR').plural_forms
+ 'nplurals=2; plural=(n > 1)'
+
+ :type: `str`
+ """)
+
+ def __contains__(self, id):
+ """Return whether the catalog has a message with the specified ID."""
+ return self._key_for(id) in self._messages
+
+ def __len__(self):
+ """The number of messages in the catalog.
+
+ This does not include the special ``msgid ""`` entry.
+ """
+ return len(self._messages)
+
+ def __iter__(self):
+ """Iterates through all the entries in the catalog, in the order they
+ were added, yielding a `Message` object for every entry.
+
+ :rtype: ``iterator``
+ """
+ buf = []
+ for name, value in self.mime_headers:
+ buf.append('%s: %s' % (name, value))
+ flags = set()
+ if self.fuzzy:
+ flags |= set(['fuzzy'])
+ yield Message(u'', '\n'.join(buf), flags=flags)
+ for key in self._messages:
+ yield self._messages[key]
+
+ def __repr__(self):
+ locale = ''
+ if self.locale:
+ locale = ' %s' % self.locale
+ return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
+
+ def __delitem__(self, id):
+ """Delete the message with the specified ID."""
+ self.delete(id)
+
+ def __getitem__(self, id):
+ """Return the message with the specified ID.
+
+ :param id: the message ID
+ :return: the message with the specified ID, or `None` if no such
+ message is in the catalog
+ :rtype: `Message`
+ """
+ return self.get(id)
+
+ def __setitem__(self, id, message):
+ """Add or update the message with the specified ID.
+
+ >>> catalog = Catalog()
+ >>> catalog[u'foo'] = Message(u'foo')
+ >>> catalog[u'foo']
+
+
+ If a message with that ID is already in the catalog, it is updated
+ to include the locations and flags of the new message.
+
+ >>> catalog = Catalog()
+ >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
+ >>> catalog[u'foo'].locations
+ [('main.py', 1)]
+ >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
+ >>> catalog[u'foo'].locations
+ [('main.py', 1), ('utils.py', 5)]
+
+ :param id: the message ID
+ :param message: the `Message` object
+ """
+ assert isinstance(message, Message), 'expected a Message object'
+ key = self._key_for(id, message.context)
+ current = self._messages.get(key)
+ if current:
+ if message.pluralizable and not current.pluralizable:
+ # The new message adds pluralization
+ current.id = message.id
+ current.string = message.string
+ current.locations = list(distinct(current.locations +
+ message.locations))
+ current.auto_comments = list(distinct(current.auto_comments +
+ message.auto_comments))
+ current.user_comments = list(distinct(current.user_comments +
+ message.user_comments))
+ current.flags |= message.flags
+ message = current
+ elif id == '':
+ # special treatment for the header message
+ headers = message_from_string(message.string.encode(self.charset))
+ self.mime_headers = headers.items()
+ self.header_comment = '\n'.join(['# %s' % comment for comment
+ in message.user_comments])
+ self.fuzzy = message.fuzzy
+ else:
+ if isinstance(id, (list, tuple)):
+ assert isinstance(message.string, (list, tuple)), \
+ 'Expected sequence but got %s' % type(message.string)
+ self._messages[key] = message
+
+ def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
+ user_comments=(), previous_id=(), lineno=None, context=None):
+ """Add or update the message with the specified ID.
+
+ >>> catalog = Catalog()
+ >>> catalog.add(u'foo')
+ >>> catalog[u'foo']
+
+
+ This method simply constructs a `Message` object with the given
+ arguments and invokes `__setitem__` with that object.
+
+ :param id: the message ID, or a ``(singular, plural)`` tuple for
+ pluralizable messages
+ :param string: the translated message string, or a
+ ``(singular, plural)`` tuple for pluralizable messages
+ :param locations: a sequence of ``(filenname, lineno)`` tuples
+ :param flags: a set or sequence of flags
+ :param auto_comments: a sequence of automatic comments
+ :param user_comments: a sequence of user comments
+ :param previous_id: the previous message ID, or a ``(singular, plural)``
+ tuple for pluralizable messages
+ :param lineno: the line number on which the msgid line was found in the
+ PO file, if any
+ :param context: the message context
+ """
+ self[id] = Message(id, string, list(locations), flags, auto_comments,
+ user_comments, previous_id, lineno=lineno,
+ context=context)
+
+ def check(self):
+ """Run various validation checks on the translations in the catalog.
+
+ For every message which fails validation, this method yield a
+ ``(message, errors)`` tuple, where ``message`` is the `Message` object
+ and ``errors`` is a sequence of `TranslationError` objects.
+
+ :rtype: ``iterator``
+ """
+ for message in self._messages.values():
+ errors = message.check(catalog=self)
+ if errors:
+ yield message, errors
+
+ def get(self, id, context=None):
+ """Return the message with the specified ID and context.
+
+ :param id: the message ID
+ :param context: the message context, or ``None`` for no context
+ :return: the message with the specified ID, or `None` if no such
+ message is in the catalog
+ :rtype: `Message`
+ """
+ return self._messages.get(self._key_for(id, context))
+
+ def delete(self, id, context=None):
+ """Delete the message with the specified ID and context.
+
+ :param id: the message ID
+ :param context: the message context, or ``None`` for no context
+ """
+ key = self._key_for(id, context)
+ if key in self._messages:
+ del self._messages[key]
+
+ def update(self, template, no_fuzzy_matching=False):
+ """Update the catalog based on the given template catalog.
+
+ >>> from babel.messages import Catalog
+ >>> template = Catalog()
+ >>> template.add('green', locations=[('main.py', 99)])
+ >>> template.add('blue', locations=[('main.py', 100)])
+ >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
+ >>> catalog = Catalog(locale='de_DE')
+ >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
+ >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
+ >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
+ ... locations=[('util.py', 38)])
+
+ >>> catalog.update(template)
+ >>> len(catalog)
+ 3
+
+ >>> msg1 = catalog['green']
+ >>> msg1.string
+ >>> msg1.locations
+ [('main.py', 99)]
+
+ >>> msg2 = catalog['blue']
+ >>> msg2.string
+ u'blau'
+ >>> msg2.locations
+ [('main.py', 100)]
+
+ >>> msg3 = catalog['salad']
+ >>> msg3.string
+ (u'Salat', u'Salate')
+ >>> msg3.locations
+ [('util.py', 42)]
+
+ Messages that are in the catalog but not in the template are removed
+ from the main collection, but can still be accessed via the `obsolete`
+ member:
+
+ >>> 'head' in catalog
+ False
+ >>> catalog.obsolete.values()
+ []
+
+ :param template: the reference catalog, usually read from a POT file
+ :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
+ """
+ messages = self._messages
+ remaining = messages.copy()
+ self._messages = odict()
+
+ # Prepare for fuzzy matching
+ fuzzy_candidates = []
+ if not no_fuzzy_matching:
+ fuzzy_candidates = dict([
+ (self._key_for(msgid), messages[msgid].context)
+ for msgid in messages if msgid and messages[msgid].string
+ ])
+ fuzzy_matches = set()
+
+ def _merge(message, oldkey, newkey):
+ message = message.clone()
+ fuzzy = False
+ if oldkey != newkey:
+ fuzzy = True
+ fuzzy_matches.add(oldkey)
+ oldmsg = messages.get(oldkey)
+ if isinstance(oldmsg.id, basestring):
+ message.previous_id = [oldmsg.id]
+ else:
+ message.previous_id = list(oldmsg.id)
+ else:
+ oldmsg = remaining.pop(oldkey, None)
+ message.string = oldmsg.string
+ if isinstance(message.id, (list, tuple)):
+ if not isinstance(message.string, (list, tuple)):
+ fuzzy = True
+ message.string = tuple(
+ [message.string] + ([u''] * (len(message.id) - 1))
+ )
+ elif len(message.string) != self.num_plurals:
+ fuzzy = True
+ message.string = tuple(message.string[:len(oldmsg.string)])
+ elif isinstance(message.string, (list, tuple)):
+ fuzzy = True
+ message.string = message.string[0]
+ message.flags |= oldmsg.flags
+ if fuzzy:
+ message.flags |= set([u'fuzzy'])
+ self[message.id] = message
+
+ for message in template:
+ if message.id:
+ key = self._key_for(message.id, message.context)
+ if key in messages:
+ _merge(message, key, key)
+ else:
+ if no_fuzzy_matching is False:
+ # do some fuzzy matching with difflib
+ if isinstance(key, tuple):
+ matchkey = key[0] # just the msgid, no context
+ else:
+ matchkey = key
+ matches = get_close_matches(matchkey.lower().strip(),
+ fuzzy_candidates.keys(), 1)
+ if matches:
+ newkey = matches[0]
+ newctxt = fuzzy_candidates[newkey]
+ if newctxt is not None:
+ newkey = newkey, newctxt
+ _merge(message, newkey, key)
+ continue
+
+ self[message.id] = message
+
+ self.obsolete = odict()
+ for msgid in remaining:
+ if no_fuzzy_matching or msgid not in fuzzy_matches:
+ self.obsolete[msgid] = remaining[msgid]
+ # Make updated catalog's POT-Creation-Date equal to the template
+ # used to update the catalog
+ self.creation_date = template.creation_date
+
+ def _key_for(self, id, context=None):
+ """The key for a message is just the singular ID even for pluralizable
+ messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
+ messages.
+ """
+ key = id
+ if isinstance(key, (list, tuple)):
+ key = id[0]
+ if context is not None:
+ key = (key, context)
+ return key
diff --git a/babel3/babel/messages/checkers.py b/babel3/babel/messages/checkers.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/messages/checkers.py
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Various routines that help with validation of translations.
+
+:since: version 0.9
+"""
+
+from itertools import izip
+from babel.messages.catalog import TranslationError, PYTHON_FORMAT
+from babel.util import set
+
+#: list of format chars that are compatible to each other
+_string_format_compatibilities = [
+ set(['i', 'd', 'u']),
+ set(['x', 'X']),
+ set(['f', 'F', 'g', 'G'])
+]
+
+
+def num_plurals(catalog, message):
+ """Verify the number of plurals in the translation."""
+ if not message.pluralizable:
+ if not isinstance(message.string, basestring):
+ raise TranslationError("Found plural forms for non-pluralizable "
+ "message")
+ return
+
+ # skip further tests if no catalog is provided.
+ elif catalog is None:
+ return
+
+ msgstrs = message.string
+ if not isinstance(msgstrs, (list, tuple)):
+ msgstrs = (msgstrs,)
+ if len(msgstrs) != catalog.num_plurals:
+ raise TranslationError("Wrong number of plural forms (expected %d)" %
+ catalog.num_plurals)
+
+
+def python_format(catalog, message):
+ """Verify the format string placeholders in the translation."""
+ if 'python-format' not in message.flags:
+ return
+ msgids = message.id
+ if not isinstance(msgids, (list, tuple)):
+ msgids = (msgids,)
+ msgstrs = message.string
+ if not isinstance(msgstrs, (list, tuple)):
+ msgstrs = (msgstrs,)
+
+ for msgid, msgstr in izip(msgids, msgstrs):
+ if msgstr:
+ _validate_format(msgid, msgstr)
+
+
+def _validate_format(format, alternative):
+ """Test format string `alternative` against `format`. `format` can be the
+ msgid of a message and `alternative` one of the `msgstr`\s. The two
+ arguments are not interchangeable as `alternative` may contain less
+ placeholders if `format` uses named placeholders.
+
+ The behavior of this function is undefined if the string does not use
+ string formattings.
+
+ If the string formatting of `alternative` is compatible to `format` the
+ function returns `None`, otherwise a `TranslationError` is raised.
+
+ Examples for compatible format strings:
+
+ >>> _validate_format('Hello %s!', 'Hallo %s!')
+ >>> _validate_format('Hello %i!', 'Hallo %d!')
+
+ Example for an incompatible format strings:
+
+ >>> _validate_format('Hello %(name)s!', 'Hallo %s!')
+ Traceback (most recent call last):
+ ...
+ TranslationError: the format strings are of different kinds
+
+ This function is used by the `python_format` checker.
+
+ :param format: The original format string
+ :param alternative: The alternative format string that should be checked
+ against format
+ :return: None on success
+ :raises TranslationError: on formatting errors
+ """
+
+ def _parse(string):
+ result = []
+ for match in PYTHON_FORMAT.finditer(string):
+ name, format, typechar = match.groups()
+ if typechar == '%' and name is None:
+ continue
+ result.append((name, str(typechar)))
+ return result
+
+ def _compatible(a, b):
+ if a == b:
+ return True
+ for set in _string_format_compatibilities:
+ if a in set and b in set:
+ return True
+ return False
+
+ def _check_positional(results):
+ positional = None
+ for name, char in results:
+ if positional is None:
+ positional = name is None
+ else:
+ if (name is None) != positional:
+ raise TranslationError('format string mixes positional '
+ 'and named placeholders')
+ return bool(positional)
+
+ a, b = map(_parse, (format, alternative))
+
+ # now check if both strings are positional or named
+ a_positional, b_positional = map(_check_positional, (a, b))
+ if a_positional and not b_positional and not b:
+ raise TranslationError('placeholders are incompatible')
+ elif a_positional != b_positional:
+ raise TranslationError('the format strings are of different kinds')
+
+ # if we are operating on positional strings both must have the
+ # same number of format chars and those must be compatible
+ if a_positional:
+ if len(a) != len(b):
+ raise TranslationError('positional format placeholders are '
+ 'unbalanced')
+ for idx, ((_, first), (_, second)) in enumerate(izip(a, b)):
+ if not _compatible(first, second):
+ raise TranslationError('incompatible format for placeholder '
+ '%d: %r and %r are not compatible' %
+ (idx + 1, first, second))
+
+ # otherwise the second string must not have names the first one
+ # doesn't have and the types of those included must be compatible
+ else:
+ type_map = dict(a)
+ for name, typechar in b:
+ if name not in type_map:
+ raise TranslationError('unknown named placeholder %r' % name)
+ elif not _compatible(typechar, type_map[name]):
+ raise TranslationError('incompatible format for '
+ 'placeholder %r: '
+ '%r and %r are not compatible' %
+ (name, typechar, type_map[name]))
+
+
+def _find_checkers():
+ try:
+ from pkg_resources import working_set
+ except ImportError:
+ return [num_plurals, python_format]
+ checkers = []
+ for entry_point in working_set.iter_entry_points('babel.checkers'):
+ checkers.append(entry_point.load())
+ return checkers
+
+
+checkers = _find_checkers()
diff --git a/babel3/babel/messages/extract.py b/babel3/babel/messages/extract.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/messages/extract.py
@@ -0,0 +1,550 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Basic infrastructure for extracting localizable messages from source files.
+
+This module defines an extensible system for collecting localizable message
+strings from a variety of sources. A native extractor for Python source files
+is builtin, extractors for other sources can be added using very simple plugins.
+
+The main entry points into the extraction functionality are the functions
+`extract_from_dir` and `extract_from_file`.
+"""
+
+import os
+import sys
+from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
+
+from babel.util import parse_encoding, pathmatch, relpath, set
+from textwrap import dedent
+
+__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
+__docformat__ = 'restructuredtext en'
+
+GROUP_NAME = 'babel.extractors'
+
+DEFAULT_KEYWORDS = {
+ '_': None,
+ 'gettext': None,
+ 'ngettext': (1, 2),
+ 'ugettext': None,
+ 'ungettext': (1, 2),
+ 'dgettext': (2,),
+ 'dngettext': (2, 3),
+ 'N_': None
+}
+
+DEFAULT_MAPPING = [('**.py', 'python')]
+
+empty_msgid_warning = (
+'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") '
+'returns the header entry with meta information, not the empty string.')
+
+
+def _strip_comment_tags(comments, tags):
+ """Helper function for `extract` that strips comment tags from strings
+ in a list of comment lines. This functions operates in-place.
+ """
+ def _strip(line):
+ for tag in tags:
+ if line.startswith(tag):
+ return line[len(tag):].strip()
+ return line
+ comments[:] = map(_strip, comments)
+
+
+def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
+ options_map=None, keywords=DEFAULT_KEYWORDS,
+ comment_tags=(), callback=None, strip_comment_tags=False):
+ """Extract messages from any source files found in the given directory.
+
+ This function generates tuples of the form:
+
+ ``(filename, lineno, message, comments)``
+
+ Which extraction method is used per file is determined by the `method_map`
+ parameter, which maps extended glob patterns to extraction method names.
+ For example, the following is the default mapping:
+
+ >>> method_map = [
+ ... ('**.py', 'python')
+ ... ]
+
+ This basically says that files with the filename extension ".py" at any
+ level inside the directory should be processed by the "python" extraction
+ method. Files that don't match any of the mapping patterns are ignored. See
+ the documentation of the `pathmatch` function for details on the pattern
+ syntax.
+
+ The following extended mapping would also use the "genshi" extraction
+ method on any file in "templates" subdirectory:
+
+ >>> method_map = [
+ ... ('**/templates/**.*', 'genshi'),
+ ... ('**.py', 'python')
+ ... ]
+
+ The dictionary provided by the optional `options_map` parameter augments
+ these mappings. It uses extended glob patterns as keys, and the values are
+ dictionaries mapping options names to option values (both strings).
+
+ The glob patterns of the `options_map` do not necessarily need to be the
+ same as those used in the method mapping. For example, while all files in
+ the ``templates`` folders in an application may be Genshi applications, the
+ options for those files may differ based on extension:
+
+ >>> options_map = {
+ ... '**/templates/**.txt': {
+ ... 'template_class': 'genshi.template:TextTemplate',
+ ... 'encoding': 'latin-1'
+ ... },
+ ... '**/templates/**.html': {
+ ... 'include_attrs': ''
+ ... }
+ ... }
+
+ :param dirname: the path to the directory to extract messages from
+ :param method_map: a list of ``(pattern, method)`` tuples that maps of
+ extraction method names to extended glob patterns
+ :param options_map: a dictionary of additional options (optional)
+ :param keywords: a dictionary mapping keywords (i.e. names of functions
+ that should be recognized as translation functions) to
+ tuples that specify which of their arguments contain
+ localizable strings
+ :param comment_tags: a list of tags of translator comments to search for
+ and include in the results
+ :param callback: a function that is called for every file that message are
+ extracted from, just before the extraction itself is
+ performed; the function is passed the filename, the name
+ of the extraction method and and the options dictionary as
+ positional arguments, in that order
+ :param strip_comment_tags: a flag that if set to `True` causes all comment
+ tags to be removed from the collected comments.
+ :return: an iterator over ``(filename, lineno, funcname, message)`` tuples
+ :rtype: ``iterator``
+ :see: `pathmatch`
+ """
+ if options_map is None:
+ options_map = {}
+
+ absname = os.path.abspath(dirname)
+ for root, dirnames, filenames in os.walk(absname):
+ for subdir in dirnames:
+ if subdir.startswith('.') or subdir.startswith('_'):
+ dirnames.remove(subdir)
+ dirnames.sort()
+ filenames.sort()
+ for filename in filenames:
+ filename = relpath(
+ os.path.join(root, filename).replace(os.sep, '/'),
+ dirname
+ )
+ for pattern, method in method_map:
+ if pathmatch(pattern, filename):
+ filepath = os.path.join(absname, filename)
+ options = {}
+ for opattern, odict in options_map.items():
+ if pathmatch(opattern, filename):
+ options = odict
+ if callback:
+ callback(filename, method, options)
+ for lineno, message, comments in \
+ extract_from_file(method, filepath,
+ keywords=keywords,
+ comment_tags=comment_tags,
+ options=options,
+ strip_comment_tags=
+ strip_comment_tags):
+ yield filename, lineno, message, comments
+ break
+
+
+def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
+ comment_tags=(), options=None, strip_comment_tags=False):
+ """Extract messages from a specific file.
+
+ This function returns a list of tuples of the form:
+
+ ``(lineno, funcname, message)``
+
+ :param filename: the path to the file to extract messages from
+ :param method: a string specifying the extraction method (.e.g. "python")
+ :param keywords: a dictionary mapping keywords (i.e. names of functions
+ that should be recognized as translation functions) to
+ tuples that specify which of their arguments contain
+ localizable strings
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param strip_comment_tags: a flag that if set to `True` causes all comment
+ tags to be removed from the collected comments.
+ :param options: a dictionary of additional options (optional)
+ :return: the list of extracted messages
+ :rtype: `list`
+ """
+ fileobj = open(filename, 'U')
+ try:
+ return list(extract(method, fileobj, keywords, comment_tags, options,
+ strip_comment_tags))
+ finally:
+ fileobj.close()
+
+
+def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
+ options=None, strip_comment_tags=False):
+ """Extract messages from the given file-like object using the specified
+ extraction method.
+
+ This function returns a list of tuples of the form:
+
+ ``(lineno, message, comments)``
+
+ The implementation dispatches the actual extraction to plugins, based on the
+ value of the ``method`` parameter.
+
+ >>> source = '''# foo module
+ ... def run(argv):
+ ... print _('Hello, world!')
+ ... '''
+
+ >>> from StringIO import StringIO
+ >>> for message in extract('python', StringIO(source)):
+ ... print message
+ (3, u'Hello, world!', [])
+
+ :param method: a string specifying the extraction method (.e.g. "python");
+ if this is a simple name, the extraction function will be
+ looked up by entry point; if it is an explicit reference
+ to a function (of the form ``package.module:funcname`` or
+ ``package.module.funcname``), the corresponding function
+ will be imported and used
+ :param fileobj: the file-like object the messages should be extracted from
+ :param keywords: a dictionary mapping keywords (i.e. names of functions
+ that should be recognized as translation functions) to
+ tuples that specify which of their arguments contain
+ localizable strings
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param options: a dictionary of additional options (optional)
+ :param strip_comment_tags: a flag that if set to `True` causes all comment
+ tags to be removed from the collected comments.
+ :return: the list of extracted messages
+ :rtype: `list`
+ :raise ValueError: if the extraction method is not registered
+ """
+ func = None
+ if ':' in method or '.' in method:
+ if ':' not in method:
+ lastdot = method.rfind('.')
+ module, attrname = method[:lastdot], method[lastdot + 1:]
+ else:
+ module, attrname = method.split(':', 1)
+ func = getattr(__import__(module, {}, {}, [attrname]), attrname)
+ else:
+ try:
+ from pkg_resources import working_set
+ except ImportError:
+ # pkg_resources is not available, so we resort to looking up the
+ # builtin extractors directly
+ builtin = {'ignore': extract_nothing, 'python': extract_python}
+ func = builtin.get(method)
+ else:
+ for entry_point in working_set.iter_entry_points(GROUP_NAME,
+ method):
+ func = entry_point.load(require=True)
+ break
+ if func is None:
+ raise ValueError('Unknown extraction method %r' % method)
+
+ results = func(fileobj, keywords.keys(), comment_tags,
+ options=options or {})
+
+ for lineno, funcname, messages, comments in results:
+ if funcname:
+ spec = keywords[funcname] or (1,)
+ else:
+ spec = (1,)
+ if not isinstance(messages, (list, tuple)):
+ messages = [messages]
+ if not messages:
+ continue
+
+ # Validate the messages against the keyword's specification
+ msgs = []
+ invalid = False
+ # last_index is 1 based like the keyword spec
+ last_index = len(messages)
+ for index in spec:
+ if last_index < index:
+ # Not enough arguments
+ invalid = True
+ break
+ message = messages[index - 1]
+ if message is None:
+ invalid = True
+ break
+ msgs.append(message)
+ if invalid:
+ continue
+
+ first_msg_index = spec[0] - 1
+ if not messages[first_msg_index]:
+ # An empty string msgid isn't valid, emit a warning
+ where = '%s:%i' % (hasattr(fileobj, 'name') and \
+ fileobj.name or '(unknown)', lineno)
+ print >> sys.stderr, empty_msgid_warning % where
+ continue
+
+ messages = tuple(msgs)
+ if len(messages) == 1:
+ messages = messages[0]
+
+ if strip_comment_tags:
+ _strip_comment_tags(comments, comment_tags)
+ yield lineno, messages, comments
+
+
+def extract_nothing(fileobj, keywords, comment_tags, options):
+ """Pseudo extractor that does not actually extract anything, but simply
+ returns an empty list.
+ """
+ return []
+
+
+def extract_python(fileobj, keywords, comment_tags, options):
+ """Extract messages from Python source code.
+
+ :param fileobj: the seekable, file-like object the messages should be
+ extracted from
+ :param keywords: a list of keywords (i.e. function names) that should be
+ recognized as translation functions
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param options: a dictionary of additional options (optional)
+ :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+ :rtype: ``iterator``
+ """
+ funcname = lineno = message_lineno = None
+ call_stack = -1
+ buf = []
+ messages = []
+ translator_comments = []
+ in_def = in_translator_comments = False
+ comment_tag = None
+
+ encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
+
+ tokens = generate_tokens(fileobj.readline)
+ for tok, value, (lineno, _), _, _ in tokens:
+ if call_stack == -1 and tok == NAME and value in ('def', 'class'):
+ in_def = True
+ elif tok == OP and value == '(':
+ if in_def:
+ # Avoid false positives for declarations such as:
+ # def gettext(arg='message'):
+ in_def = False
+ continue
+ if funcname:
+ message_lineno = lineno
+ call_stack += 1
+ elif in_def and tok == OP and value == ':':
+ # End of a class definition without parens
+ in_def = False
+ continue
+ elif call_stack == -1 and tok == COMMENT:
+ # Strip the comment token from the line
+ value = value.decode(encoding)[1:].strip()
+ if in_translator_comments and \
+ translator_comments[-1][0] == lineno - 1:
+ # We're already inside a translator comment, continue appending
+ translator_comments.append((lineno, value))
+ continue
+ # If execution reaches this point, let's see if comment line
+ # starts with one of the comment tags
+ for comment_tag in comment_tags:
+ if value.startswith(comment_tag):
+ in_translator_comments = True
+ translator_comments.append((lineno, value))
+ break
+ elif funcname and call_stack == 0:
+ if tok == OP and value == ')':
+ if buf:
+ messages.append(''.join(buf))
+ del buf[:]
+ else:
+ messages.append(None)
+
+ if len(messages) > 1:
+ messages = tuple(messages)
+ else:
+ messages = messages[0]
+ # Comments don't apply unless they immediately preceed the
+ # message
+ if translator_comments and \
+ translator_comments[-1][0] < message_lineno - 1:
+ translator_comments = []
+
+ yield (message_lineno, funcname, messages,
+ [comment[1] for comment in translator_comments])
+
+ funcname = lineno = message_lineno = None
+ call_stack = -1
+ messages = []
+ translator_comments = []
+ in_translator_comments = False
+ elif tok == STRING:
+ # Unwrap quotes in a safe manner, maintaining the string's
+ # encoding
+ # https://sourceforge.net/tracker/?func=detail&atid=355470&
+ # aid=617979&group_id=5470
+ value = eval('# coding=%s\n%s' % (encoding, value),
+ {'__builtins__':{}}, {})
+ if isinstance(value, str):
+ value = value.decode(encoding)
+ buf.append(value)
+ elif tok == OP and value == ',':
+ if buf:
+ messages.append(''.join(buf))
+ del buf[:]
+ else:
+ messages.append(None)
+ if translator_comments:
+ # We have translator comments, and since we're on a
+ # comma(,) user is allowed to break into a new line
+ # Let's increase the last comment's lineno in order
+ # for the comment to still be a valid one
+ old_lineno, old_comment = translator_comments.pop()
+ translator_comments.append((old_lineno+1, old_comment))
+ elif call_stack > 0 and tok == OP and value == ')':
+ call_stack -= 1
+ elif funcname and call_stack == -1:
+ funcname = None
+ elif tok == NAME and value in keywords:
+ funcname = value
+
+
+def extract_javascript(fileobj, keywords, comment_tags, options):
+ """Extract messages from JavaScript source code.
+
+ :param fileobj: the seekable, file-like object the messages should be
+ extracted from
+ :param keywords: a list of keywords (i.e. function names) that should be
+ recognized as translation functions
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param options: a dictionary of additional options (optional)
+ :return: an iterator over ``(lineno, funcname, message, comments)`` tuples
+ :rtype: ``iterator``
+ """
+ from babel.messages.jslexer import tokenize, unquote_string
+ funcname = message_lineno = None
+ messages = []
+ last_argument = None
+ translator_comments = []
+ concatenate_next = False
+ encoding = options.get('encoding', 'utf-8')
+ last_token = None
+ call_stack = -1
+
+ for token in tokenize(fileobj.read().decode(encoding)):
+ if token.type == 'operator' and token.value == '(':
+ if funcname:
+ message_lineno = token.lineno
+ call_stack += 1
+
+ elif call_stack == -1 and token.type == 'linecomment':
+ value = token.value[2:].strip()
+ if translator_comments and \
+ translator_comments[-1][0] == token.lineno - 1:
+ translator_comments.append((token.lineno, value))
+ continue
+
+ for comment_tag in comment_tags:
+ if value.startswith(comment_tag):
+ translator_comments.append((token.lineno, value.strip()))
+ break
+
+ elif token.type == 'multilinecomment':
+ # only one multi-line comment may preceed a translation
+ translator_comments = []
+ value = token.value[2:-2].strip()
+ for comment_tag in comment_tags:
+ if value.startswith(comment_tag):
+ lines = value.splitlines()
+ if lines:
+ lines[0] = lines[0].strip()
+ lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
+ for offset, line in enumerate(lines):
+ translator_comments.append((token.lineno + offset,
+ line))
+ break
+
+ elif funcname and call_stack == 0:
+ if token.type == 'operator' and token.value == ')':
+ if last_argument is not None:
+ messages.append(last_argument)
+ if len(messages) > 1:
+ messages = tuple(messages)
+ elif messages:
+ messages = messages[0]
+ else:
+ messages = None
+
+ # Comments don't apply unless they immediately precede the
+ # message
+ if translator_comments and \
+ translator_comments[-1][0] < message_lineno - 1:
+ translator_comments = []
+
+ if messages is not None:
+ yield (message_lineno, funcname, messages,
+ [comment[1] for comment in translator_comments])
+
+ funcname = message_lineno = last_argument = None
+ concatenate_next = False
+ translator_comments = []
+ messages = []
+ call_stack = -1
+
+ elif token.type == 'string':
+ new_value = unquote_string(token.value)
+ if concatenate_next:
+ last_argument = (last_argument or '') + new_value
+ concatenate_next = False
+ else:
+ last_argument = new_value
+
+ elif token.type == 'operator':
+ if token.value == ',':
+ if last_argument is not None:
+ messages.append(last_argument)
+ last_argument = None
+ else:
+ messages.append(None)
+ concatenate_next = False
+ elif token.value == '+':
+ concatenate_next = True
+
+ elif call_stack > 0 and token.type == 'operator' \
+ and token.value == ')':
+ call_stack -= 1
+
+ elif funcname and call_stack == -1:
+ funcname = None
+
+ elif call_stack == -1 and token.type == 'name' and \
+ token.value in keywords and \
+ (last_token is None or last_token.type != 'name' or
+ last_token.value != 'function'):
+ funcname = token.value
+
+ last_token = token
diff --git a/babel3/babel/messages/frontend.py b/babel3/babel/messages/frontend.py
new file mode 100755
--- /dev/null
+++ b/babel3/babel/messages/frontend.py
@@ -0,0 +1,1201 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007-2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""Frontends for the message extraction functionality."""
+
+from ConfigParser import RawConfigParser
+from datetime import datetime
+from distutils import log
+from distutils.cmd import Command
+from distutils.errors import DistutilsOptionError, DistutilsSetupError
+from locale import getpreferredencoding
+import logging
+from optparse import OptionParser
+import os
+import re
+import shutil
+from StringIO import StringIO
+import sys
+import tempfile
+
+from babel import __version__ as VERSION
+from babel import Locale, localedata
+from babel.core import UnknownLocaleError
+from babel.messages.catalog import Catalog
+from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS, \
+ DEFAULT_MAPPING
+from babel.messages.mofile import write_mo
+from babel.messages.pofile import read_po, write_po
+from babel.messages.plurals import PLURALS
+from babel.util import odict, LOCALTZ
+
+__all__ = ['CommandLineInterface', 'compile_catalog', 'extract_messages',
+ 'init_catalog', 'check_message_extractors', 'update_catalog']
+__docformat__ = 'restructuredtext en'
+
+
+class compile_catalog(Command):
+ """Catalog compilation command for use in ``setup.py`` scripts.
+
+ If correctly installed, this command is available to Setuptools-using
+ setup scripts automatically. For projects using plain old ``distutils``,
+ the command needs to be registered explicitly in ``setup.py``::
+
+ from babel.messages.frontend import compile_catalog
+
+ setup(
+ ...
+ cmdclass = {'compile_catalog': compile_catalog}
+ )
+
+ :since: version 0.9
+ :see: `Integrating new distutils commands `_
+ :see: `setuptools `_
+ """
+
+ description = 'compile message catalogs to binary MO files'
+ user_options = [
+ ('domain=', 'D',
+ "domain of PO file (default 'messages')"),
+ ('directory=', 'd',
+ 'path to base directory containing the catalogs'),
+ ('input-file=', 'i',
+ 'name of the input file'),
+ ('output-file=', 'o',
+ "name of the output file (default "
+ "'//LC_MESSAGES/.po')"),
+ ('locale=', 'l',
+ 'locale of the catalog to compile'),
+ ('use-fuzzy', 'f',
+ 'also include fuzzy translations'),
+ ('statistics', None,
+ 'print statistics about translations')
+ ]
+ boolean_options = ['use-fuzzy', 'statistics']
+
+ def initialize_options(self):
+ self.domain = 'messages'
+ self.directory = None
+ self.input_file = None
+ self.output_file = None
+ self.locale = None
+ self.use_fuzzy = False
+ self.statistics = False
+
+ def finalize_options(self):
+ if not self.input_file and not self.directory:
+ raise DistutilsOptionError('you must specify either the input file '
+ 'or the base directory')
+ if not self.output_file and not self.directory:
+ raise DistutilsOptionError('you must specify either the input file '
+ 'or the base directory')
+
+ def run(self):
+ po_files = []
+ mo_files = []
+
+ if not self.input_file:
+ if self.locale:
+ po_files.append((self.locale,
+ os.path.join(self.directory, self.locale,
+ 'LC_MESSAGES',
+ self.domain + '.po')))
+ mo_files.append(os.path.join(self.directory, self.locale,
+ 'LC_MESSAGES',
+ self.domain + '.mo'))
+ else:
+ for locale in os.listdir(self.directory):
+ po_file = os.path.join(self.directory, locale,
+ 'LC_MESSAGES', self.domain + '.po')
+ if os.path.exists(po_file):
+ po_files.append((locale, po_file))
+ mo_files.append(os.path.join(self.directory, locale,
+ 'LC_MESSAGES',
+ self.domain + '.mo'))
+ else:
+ po_files.append((self.locale, self.input_file))
+ if self.output_file:
+ mo_files.append(self.output_file)
+ else:
+ mo_files.append(os.path.join(self.directory, self.locale,
+ 'LC_MESSAGES',
+ self.domain + '.mo'))
+
+ if not po_files:
+ raise DistutilsOptionError('no message catalogs found')
+
+ for idx, (locale, po_file) in enumerate(po_files):
+ mo_file = mo_files[idx]
+ infile = open(po_file, 'r')
+ try:
+ catalog = read_po(infile, locale)
+ finally:
+ infile.close()
+
+ if self.statistics:
+ translated = 0
+ for message in list(catalog)[1:]:
+ if message.string:
+ translated +=1
+ percentage = 0
+ if len(catalog):
+ percentage = translated * 100 // len(catalog)
+ log.info('%d of %d messages (%d%%) translated in %r',
+ translated, len(catalog), percentage, po_file)
+
+ if catalog.fuzzy and not self.use_fuzzy:
+ log.warn('catalog %r is marked as fuzzy, skipping', po_file)
+ continue
+
+ for message, errors in catalog.check():
+ for error in errors:
+ log.error('error: %s:%d: %s', po_file, message.lineno,
+ error)
+
+ log.info('compiling catalog %r to %r', po_file, mo_file)
+
+ outfile = open(mo_file, 'wb')
+ try:
+ write_mo(outfile, catalog, use_fuzzy=self.use_fuzzy)
+ finally:
+ outfile.close()
+
+
+class extract_messages(Command):
+ """Message extraction command for use in ``setup.py`` scripts.
+
+ If correctly installed, this command is available to Setuptools-using
+ setup scripts automatically. For projects using plain old ``distutils``,
+ the command needs to be registered explicitly in ``setup.py``::
+
+ from babel.messages.frontend import extract_messages
+
+ setup(
+ ...
+ cmdclass = {'extract_messages': extract_messages}
+ )
+
+ :see: `Integrating new distutils commands `_
+ :see: `setuptools `_
+ """
+
+ description = 'extract localizable strings from the project code'
+ user_options = [
+ ('charset=', None,
+ 'charset to use in the output file'),
+ ('keywords=', 'k',
+ 'space-separated list of keywords to look for in addition to the '
+ 'defaults'),
+ ('no-default-keywords', None,
+ 'do not include the default keywords'),
+ ('mapping-file=', 'F',
+ 'path to the mapping configuration file'),
+ ('no-location', None,
+ 'do not include location comments with filename and line number'),
+ ('omit-header', None,
+ 'do not include msgid "" entry in header'),
+ ('output-file=', 'o',
+ 'name of the output file'),
+ ('width=', 'w',
+ 'set output line width (default 76)'),
+ ('no-wrap', None,
+ 'do not break long message lines, longer than the output line width, '
+ 'into several lines'),
+ ('sort-output', None,
+ 'generate sorted output (default False)'),
+ ('sort-by-file', None,
+ 'sort output by file location (default False)'),
+ ('msgid-bugs-address=', None,
+ 'set report address for msgid'),
+ ('copyright-holder=', None,
+ 'set copyright holder in output'),
+ ('add-comments=', 'c',
+ 'place comment block with TAG (or those preceding keyword lines) in '
+ 'output file. Seperate multiple TAGs with commas(,)'),
+ ('strip-comments', None,
+ 'strip the comment TAGs from the comments.'),
+ ('input-dirs=', None,
+ 'directories that should be scanned for messages'),
+ ]
+ boolean_options = [
+ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap',
+ 'sort-output', 'sort-by-file', 'strip-comments'
+ ]
+
+ def initialize_options(self):
+ self.charset = 'utf-8'
+ self.keywords = ''
+ self._keywords = DEFAULT_KEYWORDS.copy()
+ self.no_default_keywords = False
+ self.mapping_file = None
+ self.no_location = False
+ self.omit_header = False
+ self.output_file = None
+ self.input_dirs = None
+ self.width = None
+ self.no_wrap = False
+ self.sort_output = False
+ self.sort_by_file = False
+ self.msgid_bugs_address = None
+ self.copyright_holder = None
+ self.add_comments = None
+ self._add_comments = []
+ self.strip_comments = False
+
+ def finalize_options(self):
+ if self.no_default_keywords and not self.keywords:
+ raise DistutilsOptionError('you must specify new keywords if you '
+ 'disable the default ones')
+ if self.no_default_keywords:
+ self._keywords = {}
+ if self.keywords:
+ self._keywords.update(parse_keywords(self.keywords.split()))
+
+ if not self.output_file:
+ raise DistutilsOptionError('no output file specified')
+ if self.no_wrap and self.width:
+ raise DistutilsOptionError("'--no-wrap' and '--width' are mutually "
+ "exclusive")
+ if not self.no_wrap and not self.width:
+ self.width = 76
+ elif self.width is not None:
+ self.width = int(self.width)
+
+ if self.sort_output and self.sort_by_file:
+ raise DistutilsOptionError("'--sort-output' and '--sort-by-file' "
+ "are mutually exclusive")
+
+ if not self.input_dirs:
+ self.input_dirs = dict.fromkeys([k.split('.',1)[0]
+ for k in self.distribution.packages
+ ]).keys()
+
+ if self.add_comments:
+ self._add_comments = self.add_comments.split(',')
+
+ def run(self):
+ mappings = self._get_mappings()
+ outfile = open(self.output_file, 'w')
+ try:
+ catalog = Catalog(project=self.distribution.get_name(),
+ version=self.distribution.get_version(),
+ msgid_bugs_address=self.msgid_bugs_address,
+ copyright_holder=self.copyright_holder,
+ charset=self.charset)
+
+ for dirname, (method_map, options_map) in mappings.items():
+ def callback(filename, method, options):
+ if method == 'ignore':
+ return
+ filepath = os.path.normpath(os.path.join(dirname, filename))
+ optstr = ''
+ if options:
+ optstr = ' (%s)' % ', '.join(['%s="%s"' % (k, v) for
+ k, v in options.items()])
+ log.info('extracting messages from %s%s', filepath, optstr)
+
+ extracted = extract_from_dir(dirname, method_map, options_map,
+ keywords=self._keywords,
+ comment_tags=self._add_comments,
+ callback=callback,
+ strip_comment_tags=
+ self.strip_comments)
+ for filename, lineno, message, comments in extracted:
+ filepath = os.path.normpath(os.path.join(dirname, filename))
+ catalog.add(message, None, [(filepath, lineno)],
+ auto_comments=comments)
+
+ log.info('writing PO template file to %s' % self.output_file)
+ write_po(outfile, catalog, width=self.width,
+ no_location=self.no_location,
+ omit_header=self.omit_header,
+ sort_output=self.sort_output,
+ sort_by_file=self.sort_by_file)
+ finally:
+ outfile.close()
+
+ def _get_mappings(self):
+ mappings = {}
+
+ if self.mapping_file:
+ fileobj = open(self.mapping_file, 'U')
+ try:
+ method_map, options_map = parse_mapping(fileobj)
+ for dirname in self.input_dirs:
+ mappings[dirname] = method_map, options_map
+ finally:
+ fileobj.close()
+
+ elif getattr(self.distribution, 'message_extractors', None):
+ message_extractors = self.distribution.message_extractors
+ for dirname, mapping in message_extractors.items():
+ if isinstance(mapping, basestring):
+ method_map, options_map = parse_mapping(StringIO(mapping))
+ else:
+ method_map, options_map = [], {}
+ for pattern, method, options in mapping:
+ method_map.append((pattern, method))
+ options_map[pattern] = options or {}
+ mappings[dirname] = method_map, options_map
+
+ else:
+ for dirname in self.input_dirs:
+ mappings[dirname] = DEFAULT_MAPPING, {}
+
+ return mappings
+
+
+def check_message_extractors(dist, name, value):
+ """Validate the ``message_extractors`` keyword argument to ``setup()``.
+
+ :param dist: the distutils/setuptools ``Distribution`` object
+ :param name: the name of the keyword argument (should always be
+ "message_extractors")
+ :param value: the value of the keyword argument
+ :raise `DistutilsSetupError`: if the value is not valid
+ :see: `Adding setup() arguments
+ `_
+ """
+ assert name == 'message_extractors'
+ if not isinstance(value, dict):
+ raise DistutilsSetupError('the value of the "message_extractors" '
+ 'parameter must be a dictionary')
+
+
+class init_catalog(Command):
+ """New catalog initialization command for use in ``setup.py`` scripts.
+
+ If correctly installed, this command is available to Setuptools-using
+ setup scripts automatically. For projects using plain old ``distutils``,
+ the command needs to be registered explicitly in ``setup.py``::
+
+ from babel.messages.frontend import init_catalog
+
+ setup(
+ ...
+ cmdclass = {'init_catalog': init_catalog}
+ )
+
+ :see: `Integrating new distutils commands `_
+ :see: `setuptools `_
+ """
+
+ description = 'create a new catalog based on a POT file'
+ user_options = [
+ ('domain=', 'D',
+ "domain of PO file (default 'messages')"),
+ ('input-file=', 'i',
+ 'name of the input file'),
+ ('output-dir=', 'd',
+ 'path to output directory'),
+ ('output-file=', 'o',
+ "name of the output file (default "
+ "'//LC_MESSAGES/.po')"),
+ ('locale=', 'l',
+ 'locale for the new localized catalog'),
+ ]
+
+ def initialize_options(self):
+ self.output_dir = None
+ self.output_file = None
+ self.input_file = None
+ self.locale = None
+ self.domain = 'messages'
+
+ def finalize_options(self):
+ if not self.input_file:
+ raise DistutilsOptionError('you must specify the input file')
+
+ if not self.locale:
+ raise DistutilsOptionError('you must provide a locale for the '
+ 'new catalog')
+ try:
+ self._locale = Locale.parse(self.locale)
+ except UnknownLocaleError, e:
+ raise DistutilsOptionError(e)
+
+ if not self.output_file and not self.output_dir:
+ raise DistutilsOptionError('you must specify the output directory')
+ if not self.output_file:
+ self.output_file = os.path.join(self.output_dir, self.locale,
+ 'LC_MESSAGES', self.domain + '.po')
+
+ if not os.path.exists(os.path.dirname(self.output_file)):
+ os.makedirs(os.path.dirname(self.output_file))
+
+ def run(self):
+ log.info('creating catalog %r based on %r', self.output_file,
+ self.input_file)
+
+ infile = open(self.input_file, 'r')
+ try:
+ # Although reading from the catalog template, read_po must be fed
+ # the locale in order to correcly calculate plurals
+ catalog = read_po(infile, locale=self.locale)
+ finally:
+ infile.close()
+
+ catalog.locale = self._locale
+ catalog.fuzzy = False
+
+ outfile = open(self.output_file, 'w')
+ try:
+ write_po(outfile, catalog)
+ finally:
+ outfile.close()
+
+
+class update_catalog(Command):
+ """Catalog merging command for use in ``setup.py`` scripts.
+
+ If correctly installed, this command is available to Setuptools-using
+ setup scripts automatically. For projects using plain old ``distutils``,
+ the command needs to be registered explicitly in ``setup.py``::
+
+ from babel.messages.frontend import update_catalog
+
+ setup(
+ ...
+ cmdclass = {'update_catalog': update_catalog}
+ )
+
+ :since: version 0.9
+ :see: `Integrating new distutils commands `_
+ :see: `setuptools `_
+ """
+
+ description = 'update message catalogs from a POT file'
+ user_options = [
+ ('domain=', 'D',
+ "domain of PO file (default 'messages')"),
+ ('input-file=', 'i',
+ 'name of the input file'),
+ ('output-dir=', 'd',
+ 'path to base directory containing the catalogs'),
+ ('output-file=', 'o',
+ "name of the output file (default "
+ "'//LC_MESSAGES/.po')"),
+ ('locale=', 'l',
+ 'locale of the catalog to compile'),
+ ('ignore-obsolete=', None,
+ 'whether to omit obsolete messages from the output'),
+ ('no-fuzzy-matching', 'N',
+ 'do not use fuzzy matching'),
+ ('previous', None,
+ 'keep previous msgids of translated messages')
+ ]
+ boolean_options = ['ignore_obsolete', 'no_fuzzy_matching', 'previous']
+
+ def initialize_options(self):
+ self.domain = 'messages'
+ self.input_file = None
+ self.output_dir = None
+ self.output_file = None
+ self.locale = None
+ self.ignore_obsolete = False
+ self.no_fuzzy_matching = False
+ self.previous = False
+
+ def finalize_options(self):
+ if not self.input_file:
+ raise DistutilsOptionError('you must specify the input file')
+ if not self.output_file and not self.output_dir:
+ raise DistutilsOptionError('you must specify the output file or '
+ 'directory')
+ if self.output_file and not self.locale:
+ raise DistutilsOptionError('you must specify the locale')
+ if self.no_fuzzy_matching and self.previous:
+ self.previous = False
+
+ def run(self):
+ po_files = []
+ if not self.output_file:
+ if self.locale:
+ po_files.append((self.locale,
+ os.path.join(self.output_dir, self.locale,
+ 'LC_MESSAGES',
+ self.domain + '.po')))
+ else:
+ for locale in os.listdir(self.output_dir):
+ po_file = os.path.join(self.output_dir, locale,
+ 'LC_MESSAGES',
+ self.domain + '.po')
+ if os.path.exists(po_file):
+ po_files.append((locale, po_file))
+ else:
+ po_files.append((self.locale, self.output_file))
+
+ domain = self.domain
+ if not domain:
+ domain = os.path.splitext(os.path.basename(self.input_file))[0]
+
+ infile = open(self.input_file, 'U')
+ try:
+ template = read_po(infile)
+ finally:
+ infile.close()
+
+ if not po_files:
+ raise DistutilsOptionError('no message catalogs found')
+
+ for locale, filename in po_files:
+ log.info('updating catalog %r based on %r', filename,
+ self.input_file)
+ infile = open(filename, 'U')
+ try:
+ catalog = read_po(infile, locale=locale, domain=domain)
+ finally:
+ infile.close()
+
+ catalog.update(template, self.no_fuzzy_matching)
+
+ tmpname = os.path.join(os.path.dirname(filename),
+ tempfile.gettempprefix() +
+ os.path.basename(filename))
+ tmpfile = open(tmpname, 'w')
+ try:
+ try:
+ write_po(tmpfile, catalog,
+ ignore_obsolete=self.ignore_obsolete,
+ include_previous=self.previous)
+ finally:
+ tmpfile.close()
+ except:
+ os.remove(tmpname)
+ raise
+
+ try:
+ os.rename(tmpname, filename)
+ except OSError:
+ # We're probably on Windows, which doesn't support atomic
+ # renames, at least not through Python
+ # If the error is in fact due to a permissions problem, that
+ # same error is going to be raised from one of the following
+ # operations
+ os.remove(filename)
+ shutil.copy(tmpname, filename)
+ os.remove(tmpname)
+
+
+class CommandLineInterface(object):
+ """Command-line interface.
+
+ This class provides a simple command-line interface to the message
+ extraction and PO file generation functionality.
+ """
+
+ usage = '%%prog %s [options] %s'
+ version = '%%prog %s' % VERSION
+ commands = {
+ 'compile': 'compile message catalogs to MO files',
+ 'extract': 'extract messages from source files and generate a POT file',
+ 'init': 'create new message catalogs from a POT file',
+ 'update': 'update existing message catalogs from a POT file'
+ }
+
+ def run(self, argv=sys.argv):
+ """Main entry point of the command-line interface.
+
+ :param argv: list of arguments passed on the command-line
+ """
+ self.parser = OptionParser(usage=self.usage % ('command', '[args]'),
+ version=self.version)
+ self.parser.disable_interspersed_args()
+ self.parser.print_help = self._help
+ self.parser.add_option('--list-locales', dest='list_locales',
+ action='store_true',
+ help="print all known locales and exit")
+ self.parser.add_option('-v', '--verbose', action='store_const',
+ dest='loglevel', const=logging.DEBUG,
+ help='print as much as possible')
+ self.parser.add_option('-q', '--quiet', action='store_const',
+ dest='loglevel', const=logging.ERROR,
+ help='print as little as possible')
+ self.parser.set_defaults(list_locales=False, loglevel=logging.INFO)
+
+ options, args = self.parser.parse_args(argv[1:])
+
+ # Configure logging
+ self.log = logging.getLogger('babel')
+ self.log.setLevel(options.loglevel)
+ handler = logging.StreamHandler()
+ handler.setLevel(options.loglevel)
+ formatter = logging.Formatter('%(message)s')
+ handler.setFormatter(formatter)
+ self.log.addHandler(handler)
+
+ if options.list_locales:
+ identifiers = localedata.list()
+ longest = max([len(identifier) for identifier in identifiers])
+ identifiers.sort()
+ format = u'%%-%ds %%s' % (longest + 1)
+ for identifier in identifiers:
+ locale = Locale.parse(identifier)
+ output = format % (identifier, locale.english_name)
+ print output.encode(sys.stdout.encoding or
+ getpreferredencoding() or
+ 'ascii', 'replace')
+ return 0
+
+ if not args:
+ self.parser.error('no valid command or option passed. '
+ 'Try the -h/--help option for more information.')
+
+ cmdname = args[0]
+ if cmdname not in self.commands:
+ self.parser.error('unknown command "%s"' % cmdname)
+
+ return getattr(self, cmdname)(args[1:])
+
+ def _help(self):
+ print self.parser.format_help()
+ print "commands:"
+ longest = max([len(command) for command in self.commands])
+ format = " %%-%ds %%s" % max(8, longest + 1)
+ commands = self.commands.items()
+ commands.sort()
+ for name, description in commands:
+ print format % (name, description)
+
+ def compile(self, argv):
+ """Subcommand for compiling a message catalog to a MO file.
+
+ :param argv: the command arguments
+ :since: version 0.9
+ """
+ parser = OptionParser(usage=self.usage % ('compile', ''),
+ description=self.commands['compile'])
+ parser.add_option('--domain', '-D', dest='domain',
+ help="domain of MO and PO files (default '%default')")
+ parser.add_option('--directory', '-d', dest='directory',
+ metavar='DIR', help='base directory of catalog files')
+ parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE',
+ help='locale of the catalog')
+ parser.add_option('--input-file', '-i', dest='input_file',
+ metavar='FILE', help='name of the input file')
+ parser.add_option('--output-file', '-o', dest='output_file',
+ metavar='FILE',
+ help="name of the output file (default "
+ "'//LC_MESSAGES/"
+ ".mo')")
+ parser.add_option('--use-fuzzy', '-f', dest='use_fuzzy',
+ action='store_true',
+ help='also include fuzzy translations (default '
+ '%default)')
+ parser.add_option('--statistics', dest='statistics',
+ action='store_true',
+ help='print statistics about translations')
+
+ parser.set_defaults(domain='messages', use_fuzzy=False,
+ compile_all=False, statistics=False)
+ options, args = parser.parse_args(argv)
+
+ po_files = []
+ mo_files = []
+ if not options.input_file:
+ if not options.directory:
+ parser.error('you must specify either the input file or the '
+ 'base directory')
+ if options.locale:
+ po_files.append((options.locale,
+ os.path.join(options.directory,
+ options.locale, 'LC_MESSAGES',
+ options.domain + '.po')))
+ mo_files.append(os.path.join(options.directory, options.locale,
+ 'LC_MESSAGES',
+ options.domain + '.mo'))
+ else:
+ for locale in os.listdir(options.directory):
+ po_file = os.path.join(options.directory, locale,
+ 'LC_MESSAGES', options.domain + '.po')
+ if os.path.exists(po_file):
+ po_files.append((locale, po_file))
+ mo_files.append(os.path.join(options.directory, locale,
+ 'LC_MESSAGES',
+ options.domain + '.mo'))
+ else:
+ po_files.append((options.locale, options.input_file))
+ if options.output_file:
+ mo_files.append(options.output_file)
+ else:
+ if not options.directory:
+ parser.error('you must specify either the input file or '
+ 'the base directory')
+ mo_files.append(os.path.join(options.directory, options.locale,
+ 'LC_MESSAGES',
+ options.domain + '.mo'))
+ if not po_files:
+ parser.error('no message catalogs found')
+
+ for idx, (locale, po_file) in enumerate(po_files):
+ mo_file = mo_files[idx]
+ infile = open(po_file, 'r')
+ try:
+ catalog = read_po(infile, locale)
+ finally:
+ infile.close()
+
+ if options.statistics:
+ translated = 0
+ for message in list(catalog)[1:]:
+ if message.string:
+ translated +=1
+ percentage = 0
+ if len(catalog):
+ percentage = translated * 100 // len(catalog)
+ self.log.info("%d of %d messages (%d%%) translated in %r",
+ translated, len(catalog), percentage, po_file)
+
+ if catalog.fuzzy and not options.use_fuzzy:
+ self.log.warn('catalog %r is marked as fuzzy, skipping',
+ po_file)
+ continue
+
+ for message, errors in catalog.check():
+ for error in errors:
+ self.log.error('error: %s:%d: %s', po_file, message.lineno,
+ error)
+
+ self.log.info('compiling catalog %r to %r', po_file, mo_file)
+
+ outfile = open(mo_file, 'wb')
+ try:
+ write_mo(outfile, catalog, use_fuzzy=options.use_fuzzy)
+ finally:
+ outfile.close()
+
+ def extract(self, argv):
+ """Subcommand for extracting messages from source files and generating
+ a POT file.
+
+ :param argv: the command arguments
+ """
+ parser = OptionParser(usage=self.usage % ('extract', 'dir1 ...'),
+ description=self.commands['extract'])
+ parser.add_option('--charset', dest='charset',
+ help='charset to use in the output (default '
+ '"%default")')
+ parser.add_option('-k', '--keyword', dest='keywords', action='append',
+ help='keywords to look for in addition to the '
+ 'defaults. You can specify multiple -k flags on '
+ 'the command line.')
+ parser.add_option('--no-default-keywords', dest='no_default_keywords',
+ action='store_true',
+ help="do not include the default keywords")
+ parser.add_option('--mapping', '-F', dest='mapping_file',
+ help='path to the extraction mapping file')
+ parser.add_option('--no-location', dest='no_location',
+ action='store_true',
+ help='do not include location comments with filename '
+ 'and line number')
+ parser.add_option('--omit-header', dest='omit_header',
+ action='store_true',
+ help='do not include msgid "" entry in header')
+ parser.add_option('-o', '--output', dest='output',
+ help='path to the output POT file')
+ parser.add_option('-w', '--width', dest='width', type='int',
+ help="set output line width (default 76)")
+ parser.add_option('--no-wrap', dest='no_wrap', action = 'store_true',
+ help='do not break long message lines, longer than '
+ 'the output line width, into several lines')
+ parser.add_option('--sort-output', dest='sort_output',
+ action='store_true',
+ help='generate sorted output (default False)')
+ parser.add_option('--sort-by-file', dest='sort_by_file',
+ action='store_true',
+ help='sort output by file location (default False)')
+ parser.add_option('--msgid-bugs-address', dest='msgid_bugs_address',
+ metavar='EMAIL@ADDRESS',
+ help='set report address for msgid')
+ parser.add_option('--copyright-holder', dest='copyright_holder',
+ help='set copyright holder in output')
+ parser.add_option('--project', dest='project',
+ help='set project name in output')
+ parser.add_option('--version', dest='version',
+ help='set project version in output')
+ parser.add_option('--add-comments', '-c', dest='comment_tags',
+ metavar='TAG', action='append',
+ help='place comment block with TAG (or those '
+ 'preceding keyword lines) in output file. One '
+ 'TAG per argument call')
+ parser.add_option('--strip-comment-tags', '-s',
+ dest='strip_comment_tags', action='store_true',
+ help='Strip the comment tags from the comments.')
+
+ parser.set_defaults(charset='utf-8', keywords=[],
+ no_default_keywords=False, no_location=False,
+ omit_header = False, width=None, no_wrap=False,
+ sort_output=False, sort_by_file=False,
+ comment_tags=[], strip_comment_tags=False)
+ options, args = parser.parse_args(argv)
+ if not args:
+ parser.error('incorrect number of arguments')
+
+ if options.output not in (None, '-'):
+ outfile = open(options.output, 'w')
+ else:
+ outfile = sys.stdout
+
+ keywords = DEFAULT_KEYWORDS.copy()
+ if options.no_default_keywords:
+ if not options.keywords:
+ parser.error('you must specify new keywords if you disable the '
+ 'default ones')
+ keywords = {}
+ if options.keywords:
+ keywords.update(parse_keywords(options.keywords))
+
+ if options.mapping_file:
+ fileobj = open(options.mapping_file, 'U')
+ try:
+ method_map, options_map = parse_mapping(fileobj)
+ finally:
+ fileobj.close()
+ else:
+ method_map = DEFAULT_MAPPING
+ options_map = {}
+
+ if options.width and options.no_wrap:
+ parser.error("'--no-wrap' and '--width' are mutually exclusive.")
+ elif not options.width and not options.no_wrap:
+ options.width = 76
+
+ if options.sort_output and options.sort_by_file:
+ parser.error("'--sort-output' and '--sort-by-file' are mutually "
+ "exclusive")
+
+ try:
+ catalog = Catalog(project=options.project,
+ version=options.version,
+ msgid_bugs_address=options.msgid_bugs_address,
+ copyright_holder=options.copyright_holder,
+ charset=options.charset)
+
+ for dirname in args:
+ if not os.path.isdir(dirname):
+ parser.error('%r is not a directory' % dirname)
+
+ def callback(filename, method, options):
+ if method == 'ignore':
+ return
+ filepath = os.path.normpath(os.path.join(dirname, filename))
+ optstr = ''
+ if options:
+ optstr = ' (%s)' % ', '.join(['%s="%s"' % (k, v) for
+ k, v in options.items()])
+ self.log.info('extracting messages from %s%s', filepath,
+ optstr)
+
+ extracted = extract_from_dir(dirname, method_map, options_map,
+ keywords, options.comment_tags,
+ callback=callback,
+ strip_comment_tags=
+ options.strip_comment_tags)
+ for filename, lineno, message, comments in extracted:
+ filepath = os.path.normpath(os.path.join(dirname, filename))
+ catalog.add(message, None, [(filepath, lineno)],
+ auto_comments=comments)
+
+ if options.output not in (None, '-'):
+ self.log.info('writing PO template file to %s' % options.output)
+ write_po(outfile, catalog, width=options.width,
+ no_location=options.no_location,
+ omit_header=options.omit_header,
+ sort_output=options.sort_output,
+ sort_by_file=options.sort_by_file)
+ finally:
+ if options.output:
+ outfile.close()
+
+ def init(self, argv):
+ """Subcommand for creating new message catalogs from a template.
+
+ :param argv: the command arguments
+ """
+ parser = OptionParser(usage=self.usage % ('init', ''),
+ description=self.commands['init'])
+ parser.add_option('--domain', '-D', dest='domain',
+ help="domain of PO file (default '%default')")
+ parser.add_option('--input-file', '-i', dest='input_file',
+ metavar='FILE', help='name of the input file')
+ parser.add_option('--output-dir', '-d', dest='output_dir',
+ metavar='DIR', help='path to output directory')
+ parser.add_option('--output-file', '-o', dest='output_file',
+ metavar='FILE',
+ help="name of the output file (default "
+ "'//LC_MESSAGES/"
+ ".po')")
+ parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE',
+ help='locale for the new localized catalog')
+
+ parser.set_defaults(domain='messages')
+ options, args = parser.parse_args(argv)
+
+ if not options.locale:
+ parser.error('you must provide a locale for the new catalog')
+ try:
+ locale = Locale.parse(options.locale)
+ except UnknownLocaleError, e:
+ parser.error(e)
+
+ if not options.input_file:
+ parser.error('you must specify the input file')
+
+ if not options.output_file and not options.output_dir:
+ parser.error('you must specify the output file or directory')
+
+ if not options.output_file:
+ options.output_file = os.path.join(options.output_dir,
+ options.locale, 'LC_MESSAGES',
+ options.domain + '.po')
+ if not os.path.exists(os.path.dirname(options.output_file)):
+ os.makedirs(os.path.dirname(options.output_file))
+
+ infile = open(options.input_file, 'r')
+ try:
+ # Although reading from the catalog template, read_po must be fed
+ # the locale in order to correcly calculate plurals
+ catalog = read_po(infile, locale=options.locale)
+ finally:
+ infile.close()
+
+ catalog.locale = locale
+ catalog.revision_date = datetime.now(LOCALTZ)
+
+ self.log.info('creating catalog %r based on %r', options.output_file,
+ options.input_file)
+
+ outfile = open(options.output_file, 'w')
+ try:
+ write_po(outfile, catalog)
+ finally:
+ outfile.close()
+
+ def update(self, argv):
+ """Subcommand for updating existing message catalogs from a template.
+
+ :param argv: the command arguments
+ :since: version 0.9
+ """
+ parser = OptionParser(usage=self.usage % ('update', ''),
+ description=self.commands['update'])
+ parser.add_option('--domain', '-D', dest='domain',
+ help="domain of PO file (default '%default')")
+ parser.add_option('--input-file', '-i', dest='input_file',
+ metavar='FILE', help='name of the input file')
+ parser.add_option('--output-dir', '-d', dest='output_dir',
+ metavar='DIR', help='path to output directory')
+ parser.add_option('--output-file', '-o', dest='output_file',
+ metavar='FILE',
+ help="name of the output file (default "
+ "'//LC_MESSAGES/"
+ ".po')")
+ parser.add_option('--locale', '-l', dest='locale', metavar='LOCALE',
+ help='locale of the translations catalog')
+ parser.add_option('--ignore-obsolete', dest='ignore_obsolete',
+ action='store_true',
+ help='do not include obsolete messages in the output '
+ '(default %default)')
+ parser.add_option('--no-fuzzy-matching', '-N', dest='no_fuzzy_matching',
+ action='store_true',
+ help='do not use fuzzy matching (default %default)')
+ parser.add_option('--previous', dest='previous', action='store_true',
+ help='keep previous msgids of translated messages '
+ '(default %default)')
+
+ parser.set_defaults(domain='messages', ignore_obsolete=False,
+ no_fuzzy_matching=False, previous=False)
+ options, args = parser.parse_args(argv)
+
+ if not options.input_file:
+ parser.error('you must specify the input file')
+ if not options.output_file and not options.output_dir:
+ parser.error('you must specify the output file or directory')
+ if options.output_file and not options.locale:
+ parser.error('you must specify the locale')
+ if options.no_fuzzy_matching and options.previous:
+ options.previous = False
+
+ po_files = []
+ if not options.output_file:
+ if options.locale:
+ po_files.append((options.locale,
+ os.path.join(options.output_dir,
+ options.locale, 'LC_MESSAGES',
+ options.domain + '.po')))
+ else:
+ for locale in os.listdir(options.output_dir):
+ po_file = os.path.join(options.output_dir, locale,
+ 'LC_MESSAGES',
+ options.domain + '.po')
+ if os.path.exists(po_file):
+ po_files.append((locale, po_file))
+ else:
+ po_files.append((options.locale, options.output_file))
+
+ domain = options.domain
+ if not domain:
+ domain = os.path.splitext(os.path.basename(options.input_file))[0]
+
+ infile = open(options.input_file, 'U')
+ try:
+ template = read_po(infile)
+ finally:
+ infile.close()
+
+ if not po_files:
+ parser.error('no message catalogs found')
+
+ for locale, filename in po_files:
+ self.log.info('updating catalog %r based on %r', filename,
+ options.input_file)
+ infile = open(filename, 'U')
+ try:
+ catalog = read_po(infile, locale=locale, domain=domain)
+ finally:
+ infile.close()
+
+ catalog.update(template, options.no_fuzzy_matching)
+
+ tmpname = os.path.join(os.path.dirname(filename),
+ tempfile.gettempprefix() +
+ os.path.basename(filename))
+ tmpfile = open(tmpname, 'w')
+ try:
+ try:
+ write_po(tmpfile, catalog,
+ ignore_obsolete=options.ignore_obsolete,
+ include_previous=options.previous)
+ finally:
+ tmpfile.close()
+ except:
+ os.remove(tmpname)
+ raise
+
+ try:
+ os.rename(tmpname, filename)
+ except OSError:
+ # We're probably on Windows, which doesn't support atomic
+ # renames, at least not through Python
+ # If the error is in fact due to a permissions problem, that
+ # same error is going to be raised from one of the following
+ # operations
+ os.remove(filename)
+ shutil.copy(tmpname, filename)
+ os.remove(tmpname)
+
+
+def main():
+ return CommandLineInterface().run(sys.argv)
+
+def parse_mapping(fileobj, filename=None):
+ """Parse an extraction method mapping from a file-like object.
+
+ >>> buf = StringIO('''
+ ... [extractors]
+ ... custom = mypackage.module:myfunc
+ ...
+ ... # Python source files
+ ... [python: **.py]
+ ...
+ ... # Genshi templates
+ ... [genshi: **/templates/**.html]
+ ... include_attrs =
+ ... [genshi: **/templates/**.txt]
+ ... template_class = genshi.template:TextTemplate
+ ... encoding = latin-1
+ ...
+ ... # Some custom extractor
+ ... [custom: **/custom/*.*]
+ ... ''')
+
+ >>> method_map, options_map = parse_mapping(buf)
+ >>> len(method_map)
+ 4
+
+ >>> method_map[0]
+ ('**.py', 'python')
+ >>> options_map['**.py']
+ {}
+ >>> method_map[1]
+ ('**/templates/**.html', 'genshi')
+ >>> options_map['**/templates/**.html']['include_attrs']
+ ''
+ >>> method_map[2]
+ ('**/templates/**.txt', 'genshi')
+ >>> options_map['**/templates/**.txt']['template_class']
+ 'genshi.template:TextTemplate'
+ >>> options_map['**/templates/**.txt']['encoding']
+ 'latin-1'
+
+ >>> method_map[3]
+ ('**/custom/*.*', 'mypackage.module:myfunc')
+ >>> options_map['**/custom/*.*']
+ {}
+
+ :param fileobj: a readable file-like object containing the configuration
+ text to parse
+ :return: a `(method_map, options_map)` tuple
+ :rtype: `tuple`
+ :see: `extract_from_directory`
+ """
+ extractors = {}
+ method_map = []
+ options_map = {}
+
+ parser = RawConfigParser()
+ parser._sections = odict(parser._sections) # We need ordered sections
+ parser.readfp(fileobj, filename)
+ for section in parser.sections():
+ if section == 'extractors':
+ extractors = dict(parser.items(section))
+ else:
+ method, pattern = [part.strip() for part in section.split(':', 1)]
+ method_map.append((pattern, method))
+ options_map[pattern] = dict(parser.items(section))
+
+ if extractors:
+ for idx, (pattern, method) in enumerate(method_map):
+ if method in extractors:
+ method = extractors[method]
+ method_map[idx] = (pattern, method)
+
+ return (method_map, options_map)
+
+def parse_keywords(strings=[]):
+ """Parse keywords specifications from the given list of strings.
+
+ >>> kw = parse_keywords(['_', 'dgettext:2', 'dngettext:2,3']).items()
+ >>> kw.sort()
+ >>> for keyword, indices in kw:
+ ... print (keyword, indices)
+ ('_', None)
+ ('dgettext', (2,))
+ ('dngettext', (2, 3))
+ """
+ keywords = {}
+ for string in strings:
+ if ':' in string:
+ funcname, indices = string.split(':')
+ else:
+ funcname, indices = string, None
+ if funcname not in keywords:
+ if indices:
+ indices = tuple([(int(x)) for x in indices.split(',')])
+ keywords[funcname] = indices
+ return keywords
+
+
+if __name__ == '__main__':
+ main()
diff --git a/babel3/babel/messages/jslexer.py b/babel3/babel/messages/jslexer.py
new file mode 100644
--- /dev/null
+++ b/babel3/babel/messages/jslexer.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2008 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+"""A simple JavaScript 1.5 lexer which is used for the JavaScript
+extractor.
+"""
+
+import re
+
+from babel.util import itemgetter
+
+
+operators = [
+ '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
+ '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
+ '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
+ '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
+]
+operators.sort(lambda a, b: cmp(-len(a), -len(b)))
+
+escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
+
+rules = [
+ (None, re.compile(r'\s+(?u)')),
+ (None, re.compile(r'