# HG changeset patch # User cmlenz # Date 1213620523 0 # Node ID 5e58ea360a5c32d0d2383b8a627f4d81171bdfa8 # Parent eb7894f3323f58fff16493b65227387561a82edf Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk]. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Version 0.9.3 +http://svn.edgewall.org/repos/babel/tags/0.9.3/ +(?, from branches/stable/0.9.x) + + * Fixed invalid message extraction methods causing an UnboundLocalError. + * Extraction method specification can now use a dot instead of the colon to + separate module and function name (ticket #105). + * Fixed message catalog compilation for locales with more than two plural + forms (ticket #95). + * Fixed compilation of message catalogs for locales with more than two plural + forms where the translations were empty (ticket #97). + * The stripping of the comment tags in comments is optional now and + is done for each line in a comment. + * A JavaScript message extractor was added. + * Updated to CLDR 1.5.1. + + Version 0.9.2 http://svn.edgewall.org/repos/babel/tags/0.9.2/ (Feb 4 2007, from branches/stable/0.9.x) diff --git a/babel/dates.py b/babel/dates.py --- a/babel/dates.py +++ b/babel/dates.py @@ -267,10 +267,7 @@ # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) - metainfo = {} info = locale.time_zones.get(zone, {}) - if 'use_metazone' in info: - metainfo = locale.meta_zones.get(info['use_metazone'], {}) # Otherwise, if there is only one timezone for the country, return the # localized country name @@ -286,12 +283,15 @@ fallback_format = locale.zone_formats['fallback'] if 'city' in info: city_name = info['city'] - elif 'city' in metainfo: - city_name = metainfo['city'] - elif '/' in zone: - city_name = zone.split('/', 1)[1].replace('_', ' ') else: - city_name = zone.replace('_', ' ') + metazone = get_global('meta_zones').get(zone) + metazone_info = locale.meta_zones.get(metazone, {}) + if 'city' in metazone_info: + city_name = metainfo['city'] + elif '/' in zone: + city_name = zone.split('/', 1)[1].replace('_', ' ') + else: + city_name = zone.replace('_', ' ') return region_format % (fallback_format % { '0': city_name, @@ -341,8 +341,8 @@ The `uncommon` parameter can be set to `True` to enable the use of timezone representations that are not commonly used by the requested locale. For example, while in frensh the central europian timezone is usually - abbreviated as "HEC", in Canadian frensh, this abbreviation is not in common - use, so a generic name would be chosen by default: + abbreviated as "HEC", in Canadian French, this abbreviation is not in + common use, so a generic name would be chosen by default: >>> tz = timezone('Europe/Paris') >>> get_timezone_name(tz, 'short', locale='fr_CA') @@ -386,7 +386,6 @@ # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) - metainfo = {} info = locale.time_zones.get(zone, {}) # Try explicitly translated zone names first if width in info: @@ -397,15 +396,16 @@ if field in info[width]: return info[width][field] - if 'use_metazone' in info: - metainfo = locale.meta_zones.get(info['use_metazone'], {}) - if width in metainfo and (uncommon or metainfo.get('common')): + metazone = get_global('meta_zones').get(zone) + if metazone: + metazone_info = locale.meta_zones.get(metazone, {}) + if width in metazone_info and (uncommon or metazone_info.get('common')): if dt is None: field = 'generic' else: field = tzinfo.dst(dt) and 'daylight' or 'standard' - if field in metainfo[width]: - return metainfo[width][field] + if field in metazone_info[width]: + return metazone_info[width][field] # If we have a concrete datetime, we assume that the result can't be # independent of daylight savings time, so we return the GMT offset @@ -521,9 +521,9 @@ >>> from pytz import timezone >>> t = time(15, 30) - >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'), + >>> format_time(t, format='full', tzinfo=timezone('Universal'), ... locale='fr_FR') - u'17:30:00 HEC' + u'15:30:00 Monde (GMT)' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'), ... locale='en') u"11 o'clock AM, Eastern Daylight Time" @@ -752,7 +752,7 @@ if num <= 2: return ('%%0%dd' % num) % self.value.month width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] - context = {3: 'format', 4: 'format', 5: 'stand-alone'}[num] + context = {'M': 'format', 'L': 'stand-alone'}[char] return get_month_names(width, context, self.locale)[self.value.month] def format_week(self, char, num): diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -216,6 +216,8 @@ self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`) self.obsolete = odict() #: Dictionary of obsolete messages + self._num_plurals = None + self._plural_expr = None def _get_header_comment(self): comment = self._header_comment @@ -312,6 +314,10 @@ self.last_translator = value elif name == 'language-team': self.language_team = value + elif name == 'plural-forms': + _, params = parse_header(' ;' + value) + self._num_plurals = int(params.get('nplurals', 2)) + self._plural_expr = params.get('plural', '(n != 1)') elif name == 'pot-creation-date': # FIXME: this should use dates.parse_datetime as soon as that # is ready @@ -373,32 +379,49 @@ """) def num_plurals(self): - num = 2 - if self.locale: - if str(self.locale) in PLURALS: - num = PLURALS[str(self.locale)][0] - elif self.locale.language in PLURALS: - num = PLURALS[self.locale.language][0] - return num + if not self._num_plurals: + num = 2 + if self.locale: + if str(self.locale) in PLURALS: + num = PLURALS[str(self.locale)][0] + elif self.locale.language in PLURALS: + num = PLURALS[self.locale.language][0] + self._num_plurals = num + return self._num_plurals num_plurals = property(num_plurals, doc="""\ - The number of plurals used by the locale. + The number of plurals used by the catalog or locale. >>> Catalog(locale='en').num_plurals 2 - >>> Catalog(locale='cs_CZ').num_plurals + >>> Catalog(locale='ga').num_plurals 3 :type: `int` """) + def plural_expr(self): + if not self._plural_expr: + expr = '(n != 1)' + if self.locale: + if str(self.locale) in PLURALS: + expr = PLURALS[str(self.locale)][1] + elif self.locale.language in PLURALS: + expr = PLURALS[self.locale.language][1] + self._plural_expr = expr + return self._plural_expr + plural_expr = property(plural_expr, doc="""\ + The plural expression used by the catalog or locale. + + >>> Catalog(locale='en').plural_expr + '(n != 1)' + >>> Catalog(locale='ga').plural_expr + '(n==1 ? 0 : n==2 ? 1 : 2)' + + :type: `basestring` + """) + def plural_forms(self): - num, expr = ('INTEGER', 'EXPRESSION') - if self.locale: - if str(self.locale) in PLURALS: - num, expr = PLURALS[str(self.locale)] - elif self.locale.language in PLURALS: - num, expr = PLURALS[self.locale.language] - return 'nplurals=%s; plural=%s' % (num, expr) + return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) plural_forms = property(plural_forms, doc="""\ Return the plural forms declaration for the locale. @@ -640,7 +663,7 @@ else: message.previous_id = list(oldmsg.id) else: - oldmsg = remaining.pop(oldkey) + oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -30,6 +30,7 @@ from tokenize import generate_tokens, COMMENT, NAME, OP, STRING from babel.util import parse_encoding, pathmatch, relpath +from textwrap import dedent __all__ = ['extract', 'extract_from_dir', 'extract_from_file'] __docformat__ = 'restructuredtext en' @@ -53,9 +54,22 @@ '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' 'returns the header entry with meta information, not the empty string.') + +def _strip_comment_tags(comments, tags): + """Helper function for `extract` that strips comment tags from strings + in a list of comment lines. This functions operates in-place. + """ + def _strip(line): + for tag in tags: + if line.startswith(tag): + return line[len(tag):].strip() + return line + comments[:] = map(_strip, comments) + + def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, options_map=None, keywords=DEFAULT_KEYWORDS, - comment_tags=(), callback=None): + comment_tags=(), callback=None, strip_comment_tags=False): """Extract messages from any source files found in the given directory. This function generates tuples of the form: @@ -118,6 +132,8 @@ performed; the function is passed the filename, the name of the extraction method and and the options dictionary as positional arguments, in that order + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: an iterator over ``(filename, lineno, funcname, message)`` tuples :rtype: ``iterator`` :see: `pathmatch` @@ -147,15 +163,18 @@ if callback: callback(filename, method, options) for lineno, message, comments in \ - extract_from_file(method, filepath, - keywords=keywords, - comment_tags=comment_tags, - options=options): + extract_from_file(method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options, + strip_comment_tags= + strip_comment_tags): yield filename, lineno, message, comments break + def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, - comment_tags=(), options=None): + comment_tags=(), options=None, strip_comment_tags=False): """Extract messages from a specific file. This function returns a list of tuples of the form: @@ -170,18 +189,22 @@ localizable strings :param comment_tags: a list of translator tags to search for and include in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :param options: a dictionary of additional options (optional) :return: the list of extracted messages :rtype: `list` """ fileobj = open(filename, 'U') try: - return list(extract(method, fileobj, keywords, comment_tags, options)) + return list(extract(method, fileobj, keywords, comment_tags, options, + strip_comment_tags)) finally: fileobj.close() + def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), - options=None): + options=None, strip_comment_tags=False): """Extract messages from the given file-like object using the specified extraction method. @@ -205,8 +228,9 @@ :param method: a string specifying the extraction method (.e.g. "python"); if this is a simple name, the extraction function will be looked up by entry point; if it is an explicit reference - to a function (of the form ``package.module:funcname``), the - corresponding function will be imported and used + to a function (of the form ``package.module:funcname`` or + ``package.module.funcname``), the corresponding function + will be imported and used :param fileobj: the file-like object the messages should be extracted from :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to @@ -215,13 +239,20 @@ :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: the list of extracted messages :rtype: `list` :raise ValueError: if the extraction method is not registered """ - if ':' in method: - module, clsname = method.split(':', 1) - func = getattr(__import__(module, {}, {}, [clsname]), clsname) + func = None + if ':' in method or '.' in method: + if ':' not in method: + lastdot = method.rfind('.') + module, attrname = method[:lastdot], method[lastdot + 1:] + else: + module, attrname = method.split(':', 1) + func = getattr(__import__(module, {}, {}, [attrname]), attrname) else: try: from pkg_resources import working_set @@ -279,14 +310,20 @@ messages = tuple(msgs) if len(messages) == 1: messages = messages[0] + + if strip_comment_tags: + _strip_comment_tags(comments, comment_tags) + yield lineno, messages, comments + def extract_nothing(fileobj, keywords, comment_tags, options): """Pseudo extractor that does not actually extract anything, but simply returns an empty list. """ return [] + def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. @@ -306,6 +343,7 @@ messages = [] translator_comments = [] in_def = in_translator_comments = False + comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') @@ -332,8 +370,6 @@ if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending - # XXX: Should we check if the programmer keeps adding the - # comment_tag for every comment line??? probably not! translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line @@ -341,8 +377,7 @@ for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True - comment = value[len(comment_tag):].strip() - translator_comments.append((lineno, comment)) + translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': @@ -392,3 +427,110 @@ funcname = None elif tok == NAME and value in keywords: funcname = value + + +def extract_javascript(fileobj, keywords, comment_tags, options): + """Extract messages from JavaScript source code. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + from babel.messages.jslexer import tokenize, unquote_string + funcname = message_lineno = None + messages = [] + last_argument = None + translator_comments = [] + encoding = options.get('encoding', 'utf-8') + last_token = None + call_stack = -1 + + for token in tokenize(fileobj.read().decode(encoding)): + if token.type == 'operator' and token.value == '(': + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == 'linecomment': + value = token.value[2:].strip() + if translator_comments and \ + translator_comments[-1][0] == token.lineno - 1: + translator_comments.append((token.lineno, value)) + continue + + for comment_tag in comment_tags: + if value.startswith(comment_tag): + translator_comments.append((token.lineno, value.strip())) + break + + elif token.type == 'multilinecomment': + # only one multi-line comment may preceed a translation + translator_comments = [] + value = token.value[2:-2].strip() + for comment_tag in comment_tags: + if value.startswith(comment_tag): + lines = value.splitlines() + if lines: + lines[0] = lines[0].strip() + lines[1:] = dedent('\n'.join(lines[1:])).splitlines() + for offset, line in enumerate(lines): + translator_comments.append((token.lineno + offset, + line)) + break + + elif funcname and call_stack == 0: + if token.type == 'operator' and token.value == ')': + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + if messages is not None: + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = message_lineno = last_argument = None + translator_comments = [] + messages = [] + call_stack = -1 + + elif token.type == 'string': + last_argument = unquote_string(token.value) + + elif token.type == 'operator' and token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + + elif call_stack > 0 and token.type == 'operator' \ + and token.value == ')': + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif call_stack == -1 and token.type == 'name' and \ + token.value in keywords and \ + (last_token is None or last_token.type != 'name' or + last_token.value != 'function'): + funcname = token.value + + last_token = token diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -107,9 +107,10 @@ if not self.input_file: if self.locale: - po_files.append(os.path.join(self.directory, self.locale, - 'LC_MESSAGES', - self.domain + '.po')) + po_files.append((self.locale, + os.path.join(self.directory, self.locale, + 'LC_MESSAGES', + self.domain + '.po'))) mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', self.domain + '.mo')) @@ -118,12 +119,12 @@ po_file = os.path.join(self.directory, locale, 'LC_MESSAGES', self.domain + '.po') if os.path.exists(po_file): - po_files.append(po_file) + po_files.append((locale, po_file)) mo_files.append(os.path.join(self.directory, locale, 'LC_MESSAGES', self.domain + '.mo')) else: - po_files.append(self.input_file) + po_files.append((self.locale, self.input_file)) if self.output_file: mo_files.append(self.output_file) else: @@ -134,11 +135,11 @@ if not po_files: raise DistutilsOptionError('no message catalogs found') - for idx, po_file in enumerate(po_files): + for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] infile = open(po_file, 'r') try: - catalog = read_po(infile) + catalog = read_po(infile, locale) finally: infile.close() @@ -222,12 +223,14 @@ ('add-comments=', 'c', 'place comment block with TAG (or those preceding keyword lines) in ' 'output file. Seperate multiple TAGs with commas(,)'), + ('strip-comments', None, + 'strip the comment TAGs from the comments.'), ('input-dirs=', None, 'directories that should be scanned for messages'), ] boolean_options = [ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', - 'sort-output', 'sort-by-file' + 'sort-output', 'sort-by-file', 'strip-comments' ] def initialize_options(self): @@ -248,6 +251,7 @@ self.copyright_holder = None self.add_comments = None self._add_comments = [] + self.strip_comments = False def finalize_options(self): if self.no_default_keywords and not self.keywords: @@ -304,7 +308,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords=self._keywords, comment_tags=self._add_comments, - callback=callback) + callback=callback, + strip_comment_tags= + self.strip_comments) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)], @@ -698,9 +704,10 @@ parser.error('you must specify either the input file or the ' 'base directory') if options.locale: - po_files.append(os.path.join(options.directory, options.locale, - 'LC_MESSAGES', - options.domain + '.po')) + po_files.append((options.locale, + os.path.join(options.directory, + options.locale, 'LC_MESSAGES', + options.domain + '.po'))) mo_files.append(os.path.join(options.directory, options.locale, 'LC_MESSAGES', options.domain + '.mo')) @@ -709,12 +716,12 @@ po_file = os.path.join(options.directory, locale, 'LC_MESSAGES', options.domain + '.po') if os.path.exists(po_file): - po_files.append(po_file) + po_files.append((locale, po_file)) mo_files.append(os.path.join(options.directory, locale, 'LC_MESSAGES', options.domain + '.mo')) else: - po_files.append(options.input_file) + po_files.append((options.locale, options.input_file)) if options.output_file: mo_files.append(options.output_file) else: @@ -727,11 +734,11 @@ if not po_files: parser.error('no message catalogs found') - for idx, po_file in enumerate(po_files): + for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] infile = open(po_file, 'r') try: - catalog = read_po(infile) + catalog = read_po(infile, locale) finally: infile.close() @@ -814,12 +821,15 @@ help='place comment block with TAG (or those ' 'preceding keyword lines) in output file. One ' 'TAG per argument call') + parser.add_option('--strip-comment-tags', '-s', + dest='strip_comment_tags', action='store_true', + help='Strip the comment tags from the comments.') parser.set_defaults(charset='utf-8', keywords=[], no_default_keywords=False, no_location=False, omit_header = False, width=76, no_wrap=False, sort_output=False, sort_by_file=False, - comment_tags=[]) + comment_tags=[], strip_comment_tags=False) options, args = parser.parse_args(argv) if not args: parser.error('incorrect number of arguments') @@ -881,7 +891,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords, options.comment_tags, - callback=callback) + callback=callback, + strip_comment_tags= + options.strip_comment_tags) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)], diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py new file mode 100644 --- /dev/null +++ b/babel/messages/jslexer.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""A simple JavaScript 1.5 lexer which is used for the JavaScript +extractor. +""" + +import re +from operator import itemgetter + + +operators = [ + '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', + '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', + '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', + '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' +] +operators.sort(lambda a, b: cmp(-len(a), -len(b))) + +escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} + +rules = [ + (None, re.compile(r'\s+(?u)')), + (None, re.compile(r'