Mercurial > babel > old > mirror
changeset 348:05975a0e7021 stable
Merged revisions [358:360], [364:370], [373:378], [380:382] from [source:trunk].
author | cmlenz |
---|---|
date | Mon, 16 Jun 2008 12:48:43 +0000 |
parents | f2d6c593bc43 |
children | 5c606de6497e |
files | 0.9.x/ChangeLog 0.9.x/babel/dates.py 0.9.x/babel/messages/catalog.py 0.9.x/babel/messages/extract.py 0.9.x/babel/messages/frontend.py 0.9.x/babel/messages/jslexer.py 0.9.x/babel/messages/mofile.py 0.9.x/babel/messages/plurals.py 0.9.x/babel/messages/tests/data/project/i18n/ru_RU/LC_MESSAGES/messages.po 0.9.x/babel/messages/tests/extract.py 0.9.x/babel/messages/tests/frontend.py 0.9.x/babel/messages/tests/mofile.py 0.9.x/babel/support.py 0.9.x/babel/tests/dates.py 0.9.x/babel/util.py 0.9.x/scripts/import_cldr.py 0.9.x/setup.py |
diffstat | 17 files changed, 674 insertions(+), 103 deletions(-) [+] |
line wrap: on
line diff
--- a/0.9.x/ChangeLog +++ b/0.9.x/ChangeLog @@ -1,3 +1,20 @@ +Version 0.9.3 +http://svn.edgewall.org/repos/babel/tags/0.9.3/ +(?, from branches/stable/0.9.x) + + * Fixed invalid message extraction methods causing an UnboundLocalError. + * Extraction method specification can now use a dot instead of the colon to + separate module and function name (ticket #105). + * Fixed message catalog compilation for locales with more than two plural + forms (ticket #95). + * Fixed compilation of message catalogs for locales with more than two plural + forms where the translations were empty (ticket #97). + * The stripping of the comment tags in comments is optional now and + is done for each line in a comment. + * A JavaScript message extractor was added. + * Updated to CLDR 1.5.1. + + Version 0.9.2 http://svn.edgewall.org/repos/babel/tags/0.9.2/ (Feb 4 2007, from branches/stable/0.9.x)
--- a/0.9.x/babel/dates.py +++ b/0.9.x/babel/dates.py @@ -267,10 +267,7 @@ # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) - metainfo = {} info = locale.time_zones.get(zone, {}) - if 'use_metazone' in info: - metainfo = locale.meta_zones.get(info['use_metazone'], {}) # Otherwise, if there is only one timezone for the country, return the # localized country name @@ -286,12 +283,15 @@ fallback_format = locale.zone_formats['fallback'] if 'city' in info: city_name = info['city'] - elif 'city' in metainfo: - city_name = metainfo['city'] - elif '/' in zone: - city_name = zone.split('/', 1)[1].replace('_', ' ') else: - city_name = zone.replace('_', ' ') + metazone = get_global('meta_zones').get(zone) + metazone_info = locale.meta_zones.get(metazone, {}) + if 'city' in metazone_info: + city_name = metainfo['city'] + elif '/' in zone: + city_name = zone.split('/', 1)[1].replace('_', ' ') + else: + city_name = zone.replace('_', ' ') return region_format % (fallback_format % { '0': city_name, @@ -341,8 +341,8 @@ The `uncommon` parameter can be set to `True` to enable the use of timezone representations that are not commonly used by the requested locale. For example, while in frensh the central europian timezone is usually - abbreviated as "HEC", in Canadian frensh, this abbreviation is not in common - use, so a generic name would be chosen by default: + abbreviated as "HEC", in Canadian French, this abbreviation is not in + common use, so a generic name would be chosen by default: >>> tz = timezone('Europe/Paris') >>> get_timezone_name(tz, 'short', locale='fr_CA') @@ -386,7 +386,6 @@ # Get the canonical time-zone code zone = get_global('zone_aliases').get(zone, zone) - metainfo = {} info = locale.time_zones.get(zone, {}) # Try explicitly translated zone names first if width in info: @@ -397,15 +396,16 @@ if field in info[width]: return info[width][field] - if 'use_metazone' in info: - metainfo = locale.meta_zones.get(info['use_metazone'], {}) - if width in metainfo and (uncommon or metainfo.get('common')): + metazone = get_global('meta_zones').get(zone) + if metazone: + metazone_info = locale.meta_zones.get(metazone, {}) + if width in metazone_info and (uncommon or metazone_info.get('common')): if dt is None: field = 'generic' else: field = tzinfo.dst(dt) and 'daylight' or 'standard' - if field in metainfo[width]: - return metainfo[width][field] + if field in metazone_info[width]: + return metazone_info[width][field] # If we have a concrete datetime, we assume that the result can't be # independent of daylight savings time, so we return the GMT offset @@ -521,9 +521,9 @@ >>> from pytz import timezone >>> t = time(15, 30) - >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'), + >>> format_time(t, format='full', tzinfo=timezone('Universal'), ... locale='fr_FR') - u'17:30:00 HEC' + u'15:30:00 Monde (GMT)' >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'), ... locale='en') u"11 o'clock AM, Eastern Daylight Time" @@ -752,7 +752,7 @@ if num <= 2: return ('%%0%dd' % num) % self.value.month width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num] - context = {3: 'format', 4: 'format', 5: 'stand-alone'}[num] + context = {'M': 'format', 'L': 'stand-alone'}[char] return get_month_names(width, context, self.locale)[self.value.month] def format_week(self, char, num):
--- a/0.9.x/babel/messages/catalog.py +++ b/0.9.x/babel/messages/catalog.py @@ -216,6 +216,8 @@ self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`) self.obsolete = odict() #: Dictionary of obsolete messages + self._num_plurals = None + self._plural_expr = None def _get_header_comment(self): comment = self._header_comment @@ -312,6 +314,10 @@ self.last_translator = value elif name == 'language-team': self.language_team = value + elif name == 'plural-forms': + _, params = parse_header(' ;' + value) + self._num_plurals = int(params.get('nplurals', 2)) + self._plural_expr = params.get('plural', '(n != 1)') elif name == 'pot-creation-date': # FIXME: this should use dates.parse_datetime as soon as that # is ready @@ -373,32 +379,49 @@ """) def num_plurals(self): - num = 2 - if self.locale: - if str(self.locale) in PLURALS: - num = PLURALS[str(self.locale)][0] - elif self.locale.language in PLURALS: - num = PLURALS[self.locale.language][0] - return num + if not self._num_plurals: + num = 2 + if self.locale: + if str(self.locale) in PLURALS: + num = PLURALS[str(self.locale)][0] + elif self.locale.language in PLURALS: + num = PLURALS[self.locale.language][0] + self._num_plurals = num + return self._num_plurals num_plurals = property(num_plurals, doc="""\ - The number of plurals used by the locale. + The number of plurals used by the catalog or locale. >>> Catalog(locale='en').num_plurals 2 - >>> Catalog(locale='cs_CZ').num_plurals + >>> Catalog(locale='ga').num_plurals 3 :type: `int` """) + def plural_expr(self): + if not self._plural_expr: + expr = '(n != 1)' + if self.locale: + if str(self.locale) in PLURALS: + expr = PLURALS[str(self.locale)][1] + elif self.locale.language in PLURALS: + expr = PLURALS[self.locale.language][1] + self._plural_expr = expr + return self._plural_expr + plural_expr = property(plural_expr, doc="""\ + The plural expression used by the catalog or locale. + + >>> Catalog(locale='en').plural_expr + '(n != 1)' + >>> Catalog(locale='ga').plural_expr + '(n==1 ? 0 : n==2 ? 1 : 2)' + + :type: `basestring` + """) + def plural_forms(self): - num, expr = ('INTEGER', 'EXPRESSION') - if self.locale: - if str(self.locale) in PLURALS: - num, expr = PLURALS[str(self.locale)] - elif self.locale.language in PLURALS: - num, expr = PLURALS[self.locale.language] - return 'nplurals=%s; plural=%s' % (num, expr) + return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) plural_forms = property(plural_forms, doc="""\ Return the plural forms declaration for the locale. @@ -640,7 +663,7 @@ else: message.previous_id = list(oldmsg.id) else: - oldmsg = remaining.pop(oldkey) + oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)):
--- a/0.9.x/babel/messages/extract.py +++ b/0.9.x/babel/messages/extract.py @@ -30,6 +30,7 @@ from tokenize import generate_tokens, COMMENT, NAME, OP, STRING from babel.util import parse_encoding, pathmatch, relpath +from textwrap import dedent __all__ = ['extract', 'extract_from_dir', 'extract_from_file'] __docformat__ = 'restructuredtext en' @@ -53,9 +54,22 @@ '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' 'returns the header entry with meta information, not the empty string.') + +def _strip_comment_tags(comments, tags): + """Helper function for `extract` that strips comment tags from strings + in a list of comment lines. This functions operates in-place. + """ + def _strip(line): + for tag in tags: + if line.startswith(tag): + return line[len(tag):].strip() + return line + comments[:] = map(_strip, comments) + + def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, options_map=None, keywords=DEFAULT_KEYWORDS, - comment_tags=(), callback=None): + comment_tags=(), callback=None, strip_comment_tags=False): """Extract messages from any source files found in the given directory. This function generates tuples of the form: @@ -118,6 +132,8 @@ performed; the function is passed the filename, the name of the extraction method and and the options dictionary as positional arguments, in that order + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: an iterator over ``(filename, lineno, funcname, message)`` tuples :rtype: ``iterator`` :see: `pathmatch` @@ -147,15 +163,18 @@ if callback: callback(filename, method, options) for lineno, message, comments in \ - extract_from_file(method, filepath, - keywords=keywords, - comment_tags=comment_tags, - options=options): + extract_from_file(method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options, + strip_comment_tags= + strip_comment_tags): yield filename, lineno, message, comments break + def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, - comment_tags=(), options=None): + comment_tags=(), options=None, strip_comment_tags=False): """Extract messages from a specific file. This function returns a list of tuples of the form: @@ -170,18 +189,22 @@ localizable strings :param comment_tags: a list of translator tags to search for and include in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :param options: a dictionary of additional options (optional) :return: the list of extracted messages :rtype: `list` """ fileobj = open(filename, 'U') try: - return list(extract(method, fileobj, keywords, comment_tags, options)) + return list(extract(method, fileobj, keywords, comment_tags, options, + strip_comment_tags)) finally: fileobj.close() + def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), - options=None): + options=None, strip_comment_tags=False): """Extract messages from the given file-like object using the specified extraction method. @@ -205,8 +228,9 @@ :param method: a string specifying the extraction method (.e.g. "python"); if this is a simple name, the extraction function will be looked up by entry point; if it is an explicit reference - to a function (of the form ``package.module:funcname``), the - corresponding function will be imported and used + to a function (of the form ``package.module:funcname`` or + ``package.module.funcname``), the corresponding function + will be imported and used :param fileobj: the file-like object the messages should be extracted from :param keywords: a dictionary mapping keywords (i.e. names of functions that should be recognized as translation functions) to @@ -215,13 +239,20 @@ :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: the list of extracted messages :rtype: `list` :raise ValueError: if the extraction method is not registered """ - if ':' in method: - module, clsname = method.split(':', 1) - func = getattr(__import__(module, {}, {}, [clsname]), clsname) + func = None + if ':' in method or '.' in method: + if ':' not in method: + lastdot = method.rfind('.') + module, attrname = method[:lastdot], method[lastdot + 1:] + else: + module, attrname = method.split(':', 1) + func = getattr(__import__(module, {}, {}, [attrname]), attrname) else: try: from pkg_resources import working_set @@ -279,14 +310,20 @@ messages = tuple(msgs) if len(messages) == 1: messages = messages[0] + + if strip_comment_tags: + _strip_comment_tags(comments, comment_tags) + yield lineno, messages, comments + def extract_nothing(fileobj, keywords, comment_tags, options): """Pseudo extractor that does not actually extract anything, but simply returns an empty list. """ return [] + def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. @@ -306,6 +343,7 @@ messages = [] translator_comments = [] in_def = in_translator_comments = False + comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') @@ -332,8 +370,6 @@ if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending - # XXX: Should we check if the programmer keeps adding the - # comment_tag for every comment line??? probably not! translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line @@ -341,8 +377,7 @@ for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True - comment = value[len(comment_tag):].strip() - translator_comments.append((lineno, comment)) + translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': @@ -392,3 +427,110 @@ funcname = None elif tok == NAME and value in keywords: funcname = value + + +def extract_javascript(fileobj, keywords, comment_tags, options): + """Extract messages from JavaScript source code. + + :param fileobj: the seekable, file-like object the messages should be + extracted from + :param keywords: a list of keywords (i.e. function names) that should be + recognized as translation functions + :param comment_tags: a list of translator tags to search for and include + in the results + :param options: a dictionary of additional options (optional) + :return: an iterator over ``(lineno, funcname, message, comments)`` tuples + :rtype: ``iterator`` + """ + from babel.messages.jslexer import tokenize, unquote_string + funcname = message_lineno = None + messages = [] + last_argument = None + translator_comments = [] + encoding = options.get('encoding', 'utf-8') + last_token = None + call_stack = -1 + + for token in tokenize(fileobj.read().decode(encoding)): + if token.type == 'operator' and token.value == '(': + if funcname: + message_lineno = token.lineno + call_stack += 1 + + elif call_stack == -1 and token.type == 'linecomment': + value = token.value[2:].strip() + if translator_comments and \ + translator_comments[-1][0] == token.lineno - 1: + translator_comments.append((token.lineno, value)) + continue + + for comment_tag in comment_tags: + if value.startswith(comment_tag): + translator_comments.append((token.lineno, value.strip())) + break + + elif token.type == 'multilinecomment': + # only one multi-line comment may preceed a translation + translator_comments = [] + value = token.value[2:-2].strip() + for comment_tag in comment_tags: + if value.startswith(comment_tag): + lines = value.splitlines() + if lines: + lines[0] = lines[0].strip() + lines[1:] = dedent('\n'.join(lines[1:])).splitlines() + for offset, line in enumerate(lines): + translator_comments.append((token.lineno + offset, + line)) + break + + elif funcname and call_stack == 0: + if token.type == 'operator' and token.value == ')': + if last_argument is not None: + messages.append(last_argument) + if len(messages) > 1: + messages = tuple(messages) + elif messages: + messages = messages[0] + else: + messages = None + + # Comments don't apply unless they immediately preceed the + # message + if translator_comments and \ + translator_comments[-1][0] < message_lineno - 1: + translator_comments = [] + + if messages is not None: + yield (message_lineno, funcname, messages, + [comment[1] for comment in translator_comments]) + + funcname = message_lineno = last_argument = None + translator_comments = [] + messages = [] + call_stack = -1 + + elif token.type == 'string': + last_argument = unquote_string(token.value) + + elif token.type == 'operator' and token.value == ',': + if last_argument is not None: + messages.append(last_argument) + last_argument = None + else: + messages.append(None) + + elif call_stack > 0 and token.type == 'operator' \ + and token.value == ')': + call_stack -= 1 + + elif funcname and call_stack == -1: + funcname = None + + elif call_stack == -1 and token.type == 'name' and \ + token.value in keywords and \ + (last_token is None or last_token.type != 'name' or + last_token.value != 'function'): + funcname = token.value + + last_token = token
--- a/0.9.x/babel/messages/frontend.py +++ b/0.9.x/babel/messages/frontend.py @@ -107,9 +107,10 @@ if not self.input_file: if self.locale: - po_files.append(os.path.join(self.directory, self.locale, - 'LC_MESSAGES', - self.domain + '.po')) + po_files.append((self.locale, + os.path.join(self.directory, self.locale, + 'LC_MESSAGES', + self.domain + '.po'))) mo_files.append(os.path.join(self.directory, self.locale, 'LC_MESSAGES', self.domain + '.mo')) @@ -118,12 +119,12 @@ po_file = os.path.join(self.directory, locale, 'LC_MESSAGES', self.domain + '.po') if os.path.exists(po_file): - po_files.append(po_file) + po_files.append((locale, po_file)) mo_files.append(os.path.join(self.directory, locale, 'LC_MESSAGES', self.domain + '.mo')) else: - po_files.append(self.input_file) + po_files.append((self.locale, self.input_file)) if self.output_file: mo_files.append(self.output_file) else: @@ -134,11 +135,11 @@ if not po_files: raise DistutilsOptionError('no message catalogs found') - for idx, po_file in enumerate(po_files): + for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] infile = open(po_file, 'r') try: - catalog = read_po(infile) + catalog = read_po(infile, locale) finally: infile.close() @@ -222,12 +223,14 @@ ('add-comments=', 'c', 'place comment block with TAG (or those preceding keyword lines) in ' 'output file. Seperate multiple TAGs with commas(,)'), + ('strip-comments', None, + 'strip the comment TAGs from the comments.'), ('input-dirs=', None, 'directories that should be scanned for messages'), ] boolean_options = [ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', - 'sort-output', 'sort-by-file' + 'sort-output', 'sort-by-file', 'strip-comments' ] def initialize_options(self): @@ -248,6 +251,7 @@ self.copyright_holder = None self.add_comments = None self._add_comments = [] + self.strip_comments = False def finalize_options(self): if self.no_default_keywords and not self.keywords: @@ -304,7 +308,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords=self._keywords, comment_tags=self._add_comments, - callback=callback) + callback=callback, + strip_comment_tags= + self.strip_comments) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)], @@ -698,9 +704,10 @@ parser.error('you must specify either the input file or the ' 'base directory') if options.locale: - po_files.append(os.path.join(options.directory, options.locale, - 'LC_MESSAGES', - options.domain + '.po')) + po_files.append((options.locale, + os.path.join(options.directory, + options.locale, 'LC_MESSAGES', + options.domain + '.po'))) mo_files.append(os.path.join(options.directory, options.locale, 'LC_MESSAGES', options.domain + '.mo')) @@ -709,12 +716,12 @@ po_file = os.path.join(options.directory, locale, 'LC_MESSAGES', options.domain + '.po') if os.path.exists(po_file): - po_files.append(po_file) + po_files.append((locale, po_file)) mo_files.append(os.path.join(options.directory, locale, 'LC_MESSAGES', options.domain + '.mo')) else: - po_files.append(options.input_file) + po_files.append((options.locale, options.input_file)) if options.output_file: mo_files.append(options.output_file) else: @@ -727,11 +734,11 @@ if not po_files: parser.error('no message catalogs found') - for idx, po_file in enumerate(po_files): + for idx, (locale, po_file) in enumerate(po_files): mo_file = mo_files[idx] infile = open(po_file, 'r') try: - catalog = read_po(infile) + catalog = read_po(infile, locale) finally: infile.close() @@ -814,12 +821,15 @@ help='place comment block with TAG (or those ' 'preceding keyword lines) in output file. One ' 'TAG per argument call') + parser.add_option('--strip-comment-tags', '-s', + dest='strip_comment_tags', action='store_true', + help='Strip the comment tags from the comments.') parser.set_defaults(charset='utf-8', keywords=[], no_default_keywords=False, no_location=False, omit_header = False, width=76, no_wrap=False, sort_output=False, sort_by_file=False, - comment_tags=[]) + comment_tags=[], strip_comment_tags=False) options, args = parser.parse_args(argv) if not args: parser.error('incorrect number of arguments') @@ -881,7 +891,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords, options.comment_tags, - callback=callback) + callback=callback, + strip_comment_tags= + options.strip_comment_tags) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)],
new file mode 100644 --- /dev/null +++ b/0.9.x/babel/messages/jslexer.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://babel.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://babel.edgewall.org/log/. + +"""A simple JavaScript 1.5 lexer which is used for the JavaScript +extractor. +""" + +import re +from operator import itemgetter + + +operators = [ + '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', + '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', + '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', + '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' +] +operators.sort(lambda a, b: cmp(-len(a), -len(b))) + +escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} + +rules = [ + (None, re.compile(r'\s+(?u)')), + (None, re.compile(r'<!--.*')), + ('linecomment', re.compile(r'//.*')), + ('multilinecomment', re.compile(r'/\*.*?\*/(?us)')), + ('name', re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')), + ('number', re.compile(r'''(?x)( + (?:0|[1-9]\d*) + (\.\d+)? + ([eE][-+]?\d+)? | + (0x[a-fA-F0-9]+) + )''')), + ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))), + ('string', re.compile(r'''(?xs)( + '(?:[^'\\]*(?:\\.[^'\\]*)*)' | + "(?:[^"\\]*(?:\\.[^"\\]*)*)" + )''')) +] + +division_re = re.compile(r'/=?') +regex_re = re.compile(r'/.+?/[a-zA-Z]*(?s)') +line_re = re.compile(r'(\r\n|\n|\r)') +line_join_re = re.compile(r'\\' + line_re.pattern) +uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') + + +class Token(tuple): + """Represents a token as returned by `tokenize`.""" + __slots__ = () + + def __new__(cls, type, value, lineno): + return tuple.__new__(cls, (type, value, lineno)) + + type = property(itemgetter(0)) + value = property(itemgetter(1)) + lineno = property(itemgetter(2)) + + +def indicates_division(token): + """A helper function that helps the tokenizer to decide if the current + token may be followed by a division operator. + """ + if token.type == 'operator': + return token.value in (')', ']', '}', '++', '--') + return token.type in ('name', 'number', 'string', 'regexp') + + +def unquote_string(string): + """Unquote a string with JavaScript rules. The string has to start with + string delimiters (``'`` or ``"``.) + + :return: a string + """ + assert string and string[0] == string[-1] and string[0] in '"\'', \ + 'string provided is not properly delimited' + string = line_join_re.sub('\\1', string[1:-1]) + result = [] + add = result.append + pos = 0 + + while 1: + # scan for the next escape + escape_pos = string.find('\\', pos) + if escape_pos < 0: + break + add(string[pos:escape_pos]) + + # check which character is escaped + next_char = string[escape_pos + 1] + if next_char in escapes: + add(escapes[next_char]) + + # unicode escapes. trie to consume up to four characters of + # hexadecimal characters and try to interpret them as unicode + # character point. If there is no such character point, put + # all the consumed characters into the string. + elif next_char in 'uU': + escaped = uni_escape_re.match(string, escape_pos + 2) + if escaped is not None: + escaped_value = escaped.group() + if len(escaped_value) == 4: + try: + add(unichr(int(escaped_value, 16))) + except ValueError: + pass + else: + pos = escape_pos + 6 + continue + add(next_char + escaped_value) + pos = escaped.end() + continue + else: + add(next_char) + + # bogus escape. Just remove the backslash. + else: + add(next_char) + pos = escape_pos + 2 + + if pos < len(string): + add(string[pos:]) + + return u''.join(result) + + +def tokenize(source): + """Tokenize a JavaScript source. + + :return: generator of `Token`\s + """ + may_divide = False + pos = 0 + lineno = 1 + end = len(source) + + while pos < end: + # handle regular rules first + for token_type, rule in rules: + match = rule.match(source, pos) + if match is not None: + break + # if we don't have a match we don't give up yet, but check for + # division operators or regular expression literals, based on + # the status of `may_divide` which is determined by the last + # processed non-whitespace token using `indicates_division`. + else: + if may_divide: + match = division_re.match(source, pos) + token_type = 'operator' + else: + match = regex_re.match(source, pos) + token_type = 'regexp' + if match is None: + # woops. invalid syntax. jump one char ahead and try again. + pos += 1 + continue + + token_value = match.group() + if token_type is not None: + token = Token(token_type, token_value, lineno) + may_divide = indicates_division(token) + yield token + lineno += len(line_re.findall(token_value)) + pos = match.end()
--- a/0.9.x/babel/messages/mofile.py +++ b/0.9.x/babel/messages/mofile.py @@ -81,7 +81,7 @@ msgstrs = [] for idx, string in enumerate(message.string): if not string: - msgstrs.append(message.id[idx]) + msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) msgstr = '\x00'.join([
--- a/0.9.x/babel/messages/plurals.py +++ b/0.9.x/babel/messages/plurals.py @@ -75,7 +75,7 @@ # Chuvash 'cv': (1, '0'), # Welsh - 'cy': (5, 'n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0'), + 'cy': (5, '(n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0)'), # Danish 'da': (2, '(n != 1)'), # German @@ -105,15 +105,15 @@ # Friulian - From Pootle's PO's 'fur': (2, '(n > 1)'), # Irish - 'ga': (3, 'n==1 ? 0 : n==2 ? 1 : 2'), + 'ga': (3, '(n==1 ? 0 : n==2 ? 1 : 2)'), # Galician - From Pootle's PO's 'gl': (2, '(n != 1)'), # Hausa - From Pootle's PO's - 'ha': (2, '(n != 1)'), + 'ha': (2, '(n != 1)'), # Hebrew 'he': (2, '(n != 1)'), # Hindi - From Pootle's PO's - 'hi': (2, '(n != 1)'), + 'hi': (2, '(n != 1)'), # Croatian 'hr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), # Hungarian
new file mode 100644 --- /dev/null +++ b/0.9.x/babel/messages/tests/data/project/i18n/ru_RU/LC_MESSAGES/messages.po @@ -0,0 +1,34 @@ +# Russian (Russia) translations for TestProject. +# Copyright (C) 2007 FooBar, Inc. +# This file is distributed under the same license as the TestProject +# project. +# FIRST AUTHOR <EMAIL@ADDRESS>, 2007. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: TestProject 0.1\n" +"Report-Msgid-Bugs-To: bugs.address@email.tld\n" +"POT-Creation-Date: 2007-04-01 15:30+0200\n" +"PO-Revision-Date: 2007-07-30 22:18+0200\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: ru_RU <LL@li.org>\n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && " +"n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 0.9dev-r363\n" + +#. This will be a translator coment, +#. that will include several lines +#: project/file1.py:8 +msgid "bar" +msgstr "" + +#: project/file2.py:9 +msgid "foobar" +msgid_plural "foobars" +msgstr[0] "" +msgstr[1] "" +msgstr[2] ""
--- a/0.9.x/babel/messages/tests/extract.py +++ b/0.9.x/babel/messages/tests/extract.py @@ -145,7 +145,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment'], messages[0][3]) + self.assertEqual([u'NOTE: A translation comment'], messages[0][3]) def test_comment_tag_multiline(self): buf = StringIO(""" @@ -155,7 +155,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment', u'with a second line'], + self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3]) def test_translator_comments_with_previous_non_translator_comments(self): @@ -168,7 +168,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment', u'with a second line'], + self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3]) def test_comment_tags_not_on_start_of_comment(self): @@ -181,7 +181,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'This one will be'], messages[0][3]) + self.assertEqual([u'NOTE: This one will be'], messages[0][3]) def test_multiple_comment_tags(self): buf = StringIO(""" @@ -195,10 +195,10 @@ messages = list(extract.extract_python(buf, ('_',), ['NOTE1:', 'NOTE2:'], {})) self.assertEqual(u'Foo Bar1', messages[0][2]) - self.assertEqual([u'A translation comment for tag1', + self.assertEqual([u'NOTE1: A translation comment for tag1', u'with a second line'], messages[0][3]) self.assertEqual(u'Foo Bar2', messages[1][2]) - self.assertEqual([u'A translation comment for tag2'], messages[1][3]) + self.assertEqual([u'NOTE2: A translation comment for tag2'], messages[1][3]) def test_two_succeeding_comments(self): buf = StringIO(""" @@ -208,7 +208,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'one', u'NOTE: two'], messages[0][3]) + self.assertEqual([u'NOTE: one', u'NOTE: two'], messages[0][3]) def test_invalid_translator_comments(self): buf = StringIO(""" @@ -234,7 +234,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Hi there!', messages[0][2]) - self.assertEqual([u'Hi!'], messages[0][3]) + self.assertEqual([u'NOTE: Hi!'], messages[0][3]) self.assertEqual(u'Hello', messages[1][2]) self.assertEqual([], messages[1][3]) @@ -274,7 +274,7 @@ messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {'encoding': 'utf-8'})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_message_with_magic_comment(self): buf = StringIO("""# -*- coding: utf-8 -*- @@ -283,7 +283,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_message_with_utf8_bom(self): buf = StringIO(codecs.BOM_UTF8 + """ @@ -292,7 +292,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_raw_strings_match_unicode_strings(self): buf = StringIO(codecs.BOM_UTF8 + """ @@ -303,6 +303,116 @@ self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual(messages[0][2], messages[1][2]) + def test_extract_strip_comment_tags(self): + buf = StringIO("""\ +#: This is a comment with a very simple +#: prefix specified +_('Servus') + +# NOTE: This is a multiline comment with +# a prefix too +_('Babatschi')""") + messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'], + strip_comment_tags=True)) + self.assertEqual(u'Servus', messages[0][1]) + self.assertEqual([u'This is a comment with a very simple', + u'prefix specified'], messages[0][2]) + self.assertEqual(u'Babatschi', messages[1][1]) + self.assertEqual([u'This is a multiline comment with', + u'a prefix too'], messages[1][2]) + + +class ExtractJavaScriptTestCase(unittest.TestCase): + + def test_simple_extract(self): + buf = StringIO("""\ +msg1 = _('simple') +msg2 = gettext('simple') +msg3 = ngettext('s', 'p', 42) + """) + messages = \ + list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, + [], {})) + + self.assertEqual([(1, 'simple', []), + (2, 'simple', []), + (3, ('s', 'p'), [])], messages) + + def test_various_calls(self): + buf = StringIO("""\ +msg1 = _(i18n_arg.replace(/"/, '"')) +msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2) +msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2) +msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2) +msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1)) +msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1)) +msg7 = _(hello.there) +msg8 = gettext('Rabbit') +msg9 = dgettext('wiki', model.addPage()) +msg10 = dngettext(domain, 'Page', 'Pages', 3) +""") + messages = \ + list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], + {})) + self.assertEqual([(5, (u'bunny', u'bunnies'), []), + (8, u'Rabbit', []), + (10, (u'Page', u'Pages'), [])], messages) + + def test_message_with_line_comment(self): + buf = StringIO("""\ +// NOTE: hello +msg = _('Bonjour à tous') +""") + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + self.assertEqual(u'Bonjour à tous', messages[0][2]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) + + def test_message_with_multiline_comment(self): + buf = StringIO("""\ +/* NOTE: hello + and bonjour + and servus */ +msg = _('Bonjour à tous') +""") + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + self.assertEqual(u'Bonjour à tous', messages[0][2]) + self.assertEqual([u'NOTE: hello', 'and bonjour', ' and servus'], messages[0][3]) + + def test_ignore_function_definitions(self): + buf = StringIO("""\ +function gettext(value) { + return translations[language][value] || value; +}""") + + messages = list(extract.extract_javascript(buf, ('gettext',), [], {})) + self.assertEqual(messages, []) + + def test_misplaced_comments(self): + buf = StringIO("""\ +/* NOTE: this won't show up */ +foo() + +/* NOTE: this will */ +msg = _('Something') + +// NOTE: this will show up +// too. +msg = _('Something else') + +// NOTE: but this won't +bar() + +_('no comment here') +""") + messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {})) + self.assertEqual(u'Something', messages[0][2]) + self.assertEqual([u'NOTE: this will'], messages[0][3]) + self.assertEqual(u'Something else', messages[1][2]) + self.assertEqual([u'NOTE: this will show up', 'too.'], messages[1][3]) + self.assertEqual(u'no comment here', messages[2][2]) + self.assertEqual([], messages[2][3]) + + class ExtractTestCase(unittest.TestCase): def test_invalid_filter(self): @@ -325,6 +435,10 @@ (8, u'Rabbit', []), (10, (u'Page', u'Pages'), [])], messages) + def test_invalid_extract_method(self): + buf = StringIO('') + self.assertRaises(ValueError, list, extract.extract('spam', buf)) + def test_different_signatures(self): buf = StringIO(""" foo = _('foo', 'bar') @@ -356,10 +470,12 @@ finally: sys.stderr = stderr + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(extract)) suite.addTest(unittest.makeSuite(ExtractPythonTestCase)) + suite.addTest(unittest.makeSuite(ExtractJavaScriptTestCase)) suite.addTest(unittest.makeSuite(ExtractTestCase)) return suite
--- a/0.9.x/babel/messages/tests/frontend.py +++ b/0.9.x/babel/messages/tests/frontend.py @@ -129,7 +129,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar" @@ -187,7 +187,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar" @@ -244,7 +244,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar" @@ -660,6 +660,22 @@ if os.path.isfile(mo_file): os.unlink(mo_file) + def test_compile_catalog_with_more_than_2_plural_forms(self): + po_file = os.path.join(self.datadir, 'project', 'i18n', 'ru_RU', + 'LC_MESSAGES', 'messages.po') + mo_file = po_file.replace('.po', '.mo') + try: + self.cli.run(sys.argv + ['compile', + '--locale', 'ru_RU', '--use-fuzzy', + '-d', os.path.join(self.datadir, 'project', 'i18n')]) + assert os.path.isfile(mo_file) + self.assertEqual("""\ +compiling catalog %r to %r +""" % (po_file, mo_file), sys.stderr.getvalue()) + finally: + if os.path.isfile(mo_file): + os.unlink(mo_file) + def suite(): suite = unittest.TestSuite()
--- a/0.9.x/babel/messages/tests/mofile.py +++ b/0.9.x/babel/messages/tests/mofile.py @@ -47,6 +47,13 @@ self.assertEqual(u'Fuzzes', translations.ugettext('Fuzzes')) assert isinstance(translations.ugettext('Fuzzes'), unicode) + def test_more_plural_forms(self): + catalog2 = Catalog(locale='ru_RU') + catalog2.add(('Fuzz', 'Fuzzes'), ('', '', '')) + buf = StringIO() + mofile.write_mo(buf, catalog2) + + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(mofile))
--- a/0.9.x/babel/support.py +++ b/0.9.x/babel/support.py @@ -276,7 +276,7 @@ from """ gettext.GNUTranslations.__init__(self, fp=fileobj) - self.files = [getattr(fileobj, 'name')] + self.files = filter(None, [getattr(fileobj, 'name', None)]) def load(cls, dirname=None, locales=None, domain=DEFAULT_DOMAIN): """Load translations from the given directory. @@ -290,9 +290,10 @@ matching translations were found :rtype: `Translations` """ - if not isinstance(locales, (list, tuple)): - locales = [locales] - locales = [str(locale) for locale in locales] + if locales is not None: + if not isinstance(locales, (list, tuple)): + locales = [locales] + locales = [str(locale) for locale in locales] filename = gettext.find(domain or cls.DEFAULT_DOMAIN, dirname, locales) if not filename: return gettext.NullTranslations()
--- a/0.9.x/babel/tests/dates.py +++ b/0.9.x/babel/tests/dates.py @@ -22,6 +22,13 @@ class DateTimeFormatTestCase(unittest.TestCase): + def test_month_context(self): + d = date(2006, 1, 8) + fmt = dates.DateTimeFormat(d, locale='cs_CZ') + self.assertEqual('1', fmt['MMM']) + fmt = dates.DateTimeFormat(d, locale='cs_CZ') + self.assertEqual('1.', fmt['LLL']) + def test_week_of_year_first(self): d = date(2006, 1, 8) fmt = dates.DateTimeFormat(d, locale='de_DE')
--- a/0.9.x/babel/util.py +++ b/0.9.x/babel/util.py @@ -16,7 +16,6 @@ import codecs from datetime import timedelta, tzinfo import os -import parser import re try: set @@ -24,6 +23,8 @@ from sets import Set as set import textwrap import time +from itertools import izip, imap +missing = object() __all__ = ['distinct', 'pathmatch', 'relpath', 'wraptext', 'odict', 'UTC', 'LOCALTZ'] @@ -75,8 +76,9 @@ m = PYTHON_MAGIC_COMMENT_re.match(line1) if not m: try: + import parser parser.suite(line1) - except SyntaxError: + except (ImportError, SyntaxError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, # in which case we don't want to scan line2 for a magic @@ -193,6 +195,7 @@ def __iter__(self): return iter(self._keys) + iterkeys = __iter__ def clear(self): dict.clear(self) @@ -206,14 +209,23 @@ def items(self): return zip(self._keys, self.values()) + def iteritems(self): + return izip(self._keys, self.itervalues()) + def keys(self): return self._keys[:] - def pop(self, key, default=None): - if key not in self: + def pop(self, key, default=missing): + if default is missing: + return dict.pop(self, key) + elif key not in self: return default self._keys.remove(key) - return dict.pop(self, key) + return dict.pop(self, key, default) + + def popitem(self, key): + self._keys.remove(key) + return dict.popitem(key) def setdefault(self, key, failobj = None): dict.setdefault(self, key, failobj) @@ -227,6 +239,9 @@ def values(self): return map(self.get, self._keys) + def itervalues(self): + return imap(self.get, self._keys) + try: relpath = os.path.relpath
--- a/0.9.x/scripts/import_cldr.py +++ b/0.9.x/scripts/import_cldr.py @@ -55,7 +55,7 @@ sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) - # import global data from the supplemental files + # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault('territory_zones', {}) @@ -69,6 +69,14 @@ for alias in elem.attrib['aliases'].split(): zone_aliases[alias] = tzid + # Import Metazone mapping + meta_zones = global_data.setdefault('meta_zones', {}) + tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) + for elem in tzsup.findall('//timezone'): + for child in elem.findall('usesMetazone'): + if 'to' not in child.attrib: # FIXME: support old mappings + meta_zones[elem.attrib['type']] = child.attrib['mzone'] + outfile = open(os.path.join(destdir, 'global.dat'), 'wb') try: pickle.dump(global_data, outfile, 2) @@ -197,9 +205,6 @@ info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) - for child in elem.findall('usesMetazone'): - if 'to' not in child.attrib: # FIXME: support old mappings - info['use_metazone'] = child.attrib['mzone'] time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {})