# HG changeset patch # User cmlenz # Date 1219658902 0 # Node ID fe7ef7b4cb0b4a17779a7b7a0de3ba6c7aa4f1bd # Parent cdf6daa1e3cc53b6fb7353a3b4e995048c1c9f49 Ported [442:446/trunk] to 0.9.x branch. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,19 @@ Version 0.9.4 http://svn.edgewall.org/repos/babel/tags/0.9.4/ -(???, from branches/stable/0.9.x) +(Aug 25 2008, from branches/stable/0.9.x) * Currency symbol definitions that is defined with choice patterns in the CLDR data are no longer imported, so the symbol code will be used instead. * Fixed quarter support in date formatting. * Fixed a serious memory leak that was introduces by the support for CLDR aliases in 0.9.3 (ticket #128). + * Locale modifiers such as "@euro" are now stripped from locale identifiers + when parsing (ticket #136). + * The system locales "C" and "POSIX" are now treated as aliases for + "en_US_POSIX", for which the CLDR provides the appropriate data. Thanks to + Manlio Perillo for the suggestion. + * Fixed JavaScript extraction for regular expression literals (ticket #138) + and concatenated strings. Version 0.9.3 diff --git a/babel/core.py b/babel/core.py --- a/babel/core.py +++ b/babel/core.py @@ -51,6 +51,7 @@ fileobj.close() return _global_data.get(key, {}) + LOCALE_ALIASES = { 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ', 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', @@ -135,7 +136,7 @@ if not localedata.exists(identifier): raise UnknownLocaleError(identifier) - def default(cls, category=None): + def default(cls, category=None, aliases=LOCALE_ALIASES): """Return the system default locale for the specified category. >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']: @@ -143,14 +144,15 @@ >>> os.environ['LANG'] = 'fr_FR.UTF-8' >>> Locale.default('LC_MESSAGES') - + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) :rtype: `Locale` :see: `default_locale` """ - return cls(default_locale(category)) + return cls(default_locale(category, aliases=aliases)) default = classmethod(default) def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES): @@ -602,7 +604,7 @@ """) -def default_locale(category=None): +def default_locale(category=None, aliases=LOCALE_ALIASES): """Returns the system default locale for a given category, based on environment variables. @@ -611,11 +613,18 @@ >>> os.environ['LANG'] = 'fr_FR.UTF-8' >>> default_locale('LC_MESSAGES') 'fr_FR' - + + The "C" or "POSIX" pseudo-locales are treated as aliases for the + "en_US_POSIX" locale: + + >>> os.environ['LC_MESSAGES'] = 'POSIX' + >>> default_locale('LC_MESSAGES') + 'en_US_POSIX' + :param category: one of the ``LC_XXX`` environment variable names + :param aliases: a dictionary of aliases for locale identifiers :return: the value of the variable, or any of the fallbacks (``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``) - :rtype: `str` """ varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG') @@ -626,6 +635,10 @@ # the LANGUAGE variable may contain a colon-separated list of # language codes; we just pick the language on the list locale = locale.split(':')[0] + if locale in ('C', 'POSIX'): + locale = 'en_US_POSIX' + elif aliases and locale in aliases: + locale = aliases[locale] return '_'.join(filter(None, parse_locale(locale))) def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES): @@ -717,6 +730,15 @@ ... ValueError: 'not_a_LOCALE_String' is not a valid locale identifier + Encoding information and locale modifiers are removed from the identifier: + + >>> parse_locale('it_IT@euro') + ('it', 'IT', None, None) + >>> parse_locale('en_US.UTF-8') + ('en', 'US', None, None) + >>> parse_locale('de_DE.iso885915@euro') + ('de', 'DE', None, None) + :param identifier: the locale identifier string :param sep: character that separates the different components of the locale identifier @@ -730,6 +752,10 @@ if '.' in identifier: # this is probably the charset/encoding, which we don't care about identifier = identifier.split('.', 1)[0] + if '@' in identifier: + # this is a locale modifier such as @euro, which we don't care about + # either + identifier = identifier.split('@', 1)[0] parts = identifier.split(sep) lang = parts.pop(0).lower() diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -454,6 +454,7 @@ messages = [] last_argument = None translator_comments = [] + concatenate_next = False encoding = options.get('encoding', 'utf-8') last_token = None call_stack = -1 @@ -513,19 +514,29 @@ [comment[1] for comment in translator_comments]) funcname = message_lineno = last_argument = None + concatenate_next = False translator_comments = [] messages = [] call_stack = -1 elif token.type == 'string': - last_argument = unquote_string(token.value) + new_value = unquote_string(token.value) + if concatenate_next: + last_argument = (last_argument or '') + new_value + concatenate_next = False + else: + last_argument = new_value - elif token.type == 'operator' and token.value == ',': - if last_argument is not None: - messages.append(last_argument) - last_argument = None - else: - messages.append(None) + elif token.type == 'operator': + if token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + concatenate_next = False + elif token.value == '+': + concatenate_next = True elif call_stack > 0 and token.type == 'operator' \ and token.value == ')': diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py --- a/babel/messages/jslexer.py +++ b/babel/messages/jslexer.py @@ -49,7 +49,7 @@ ] division_re = re.compile(r'/=?') -regex_re = re.compile(r'/.+?/[a-zA-Z]*(?s)') +regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)') line_re = re.compile(r'(\r\n|\n|\r)') line_join_re = re.compile(r'\\' + line_re.pattern) uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') diff --git a/babel/support.py b/babel/support.py --- a/babel/support.py +++ b/babel/support.py @@ -268,8 +268,8 @@ def __setitem__(self, key, value): self.value[key] = value - -class Translations(gettext.GNUTranslations): + +class Translations(gettext.GNUTranslations, object): """An extended translation catalog class.""" DEFAULT_DOMAIN = 'messages' @@ -308,7 +308,7 @@ def merge(self, translations): """Merge the given translations into the catalog. - Message translations in the specfied catalog override any messages with + Message translations in the specified catalog override any messages with the same identifier in the existing catalog. :param translations: the `Translations` instance with the messages to @@ -323,4 +323,5 @@ return self def __repr__(self): - return "<%s>" % (type(self).__name__) + return '<%s: "%s">' % (type(self).__name__, + self._info.get('project-id-version'))