cmlenz@58: # -*- coding: utf-8 -*- cmlenz@58: # jruigrok@532: # Copyright (C) 2007-2011 Edgewall Software cmlenz@58: # All rights reserved. cmlenz@58: # cmlenz@58: # This software is licensed as described in the file COPYING, which cmlenz@58: # you should have received as part of this distribution. The terms cmlenz@58: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@58: # cmlenz@58: # This software consists of voluntary contributions made by many cmlenz@58: # individuals. For the exact contribution history, see the revision cmlenz@58: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@58: cmlenz@58: """Data structures for message catalogs.""" cmlenz@58: cmlenz@151: from cgi import parse_header cmlenz@69: from datetime import datetime cmlenz@167: from difflib import get_close_matches cmlenz@108: from email import message_from_string aronacher@360: from copy import copy cmlenz@58: import re cmlenz@69: import time cmlenz@58: cmlenz@69: from babel import __version__ as VERSION cmlenz@66: from babel.core import Locale cmlenz@133: from babel.dates import format_datetime aronacher@375: from babel.messages.plurals import get_plural jruigrok@527: from babel.util import odict, distinct, LOCALTZ, UTC, FixedOffsetTimezone cmlenz@58: cmlenz@222: __all__ = ['Message', 'Catalog', 'TranslationError'] cmlenz@58: __docformat__ = 'restructuredtext en' cmlenz@58: cmlenz@58: aronacher@356: PYTHON_FORMAT = re.compile(r'''(?x) aronacher@356: \% aronacher@356: (?:\(([\w]*)\))? aronacher@356: ( aronacher@356: [-#0\ +]?(?:\*|[\d]+)? aronacher@356: (?:\.(?:\*|[\d]+))? aronacher@356: [hlL]? aronacher@356: ) aronacher@356: ([diouxXeEfFgGcrs%]) aronacher@356: ''') aronacher@356: aronacher@356: cmlenz@58: class Message(object): cmlenz@58: """Representation of a single message in a catalog.""" cmlenz@58: cmlenz@151: def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), cmlenz@337: user_comments=(), previous_id=(), lineno=None, context=None): cmlenz@58: """Create the message object. palgarvio@202: cmlenz@58: :param id: the message ID, or a ``(singular, plural)`` tuple for cmlenz@58: pluralizable messages cmlenz@58: :param string: the translated message string, or a cmlenz@58: ``(singular, plural)`` tuple for pluralizable messages cmlenz@58: :param locations: a sequence of ``(filenname, lineno)`` tuples cmlenz@58: :param flags: a set or sequence of flags cmlenz@108: :param auto_comments: a sequence of automatic comments for the message cmlenz@108: :param user_comments: a sequence of user comments for the message cmlenz@205: :param previous_id: the previous message ID, or a ``(singular, plural)`` cmlenz@205: tuple for pluralizable messages cmlenz@222: :param lineno: the line number on which the msgid line was found in the cmlenz@222: PO file, if any cmlenz@337: :param context: the message context cmlenz@58: """ cmlenz@109: self.id = id #: The message ID cmlenz@70: if not string and self.pluralizable: cmlenz@70: string = (u'', u'') cmlenz@109: self.string = string #: The message translation cmlenz@231: self.locations = list(distinct(locations)) cmlenz@58: self.flags = set(flags) cmlenz@69: if id and self.python_format: cmlenz@58: self.flags.add('python-format') cmlenz@58: else: cmlenz@58: self.flags.discard('python-format') cmlenz@229: self.auto_comments = list(distinct(auto_comments)) cmlenz@229: self.user_comments = list(distinct(user_comments)) cmlenz@205: if isinstance(previous_id, basestring): cmlenz@205: self.previous_id = [previous_id] palgarvio@202: else: cmlenz@205: self.previous_id = list(previous_id) cmlenz@222: self.lineno = lineno cmlenz@337: self.context = context cmlenz@58: cmlenz@58: def __repr__(self): cmlenz@198: return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, cmlenz@198: list(self.flags)) cmlenz@58: pjenvey@250: def __cmp__(self, obj): pjenvey@250: """Compare Messages, taking into account plural ids""" pjenvey@250: if isinstance(obj, Message): pjenvey@250: plural = self.pluralizable pjenvey@250: obj_plural = obj.pluralizable pjenvey@250: if plural and obj_plural: pjenvey@250: return cmp(self.id[0], obj.id[0]) pjenvey@250: elif plural: pjenvey@250: return cmp(self.id[0], obj.id) pjenvey@250: elif obj_plural: pjenvey@250: return cmp(self.id, obj.id[0]) pjenvey@250: return cmp(self.id, obj.id) pjenvey@250: cmlenz@315: def clone(self): aronacher@360: return Message(*map(copy, (self.id, self.string, self.locations, aronacher@360: self.flags, self.auto_comments, aronacher@360: self.user_comments, self.previous_id, aronacher@360: self.lineno, self.context))) cmlenz@315: aronacher@357: def check(self, catalog=None): aronacher@357: """Run various validation checks on the message. Some validations aronacher@357: are only performed if the catalog is provided. This method returns aronacher@357: a sequence of `TranslationError` objects. aronacher@357: aronacher@357: :rtype: ``iterator`` aronacher@357: :param catalog: A catalog instance that is passed to the checkers aronacher@357: :see: `Catalog.check` for a way to perform checks for all messages aronacher@357: in a catalog. aronacher@357: """ aronacher@357: from babel.messages.checkers import checkers aronacher@357: errors = [] aronacher@357: for checker in checkers: aronacher@357: try: aronacher@357: checker(catalog, self) aronacher@357: except TranslationError, e: aronacher@357: errors.append(e) aronacher@357: return errors aronacher@357: cmlenz@69: def fuzzy(self): cmlenz@69: return 'fuzzy' in self.flags cmlenz@69: fuzzy = property(fuzzy, doc="""\ cmlenz@69: Whether the translation is fuzzy. palgarvio@202: cmlenz@69: >>> Message('foo').fuzzy cmlenz@69: False palgarvio@177: >>> msg = Message('foo', 'foo', flags=['fuzzy']) palgarvio@177: >>> msg.fuzzy cmlenz@69: True palgarvio@177: >>> msg cmlenz@198: palgarvio@202: cmlenz@69: :type: `bool` cmlenz@69: """) cmlenz@69: cmlenz@58: def pluralizable(self): cmlenz@58: return isinstance(self.id, (list, tuple)) cmlenz@58: pluralizable = property(pluralizable, doc="""\ cmlenz@58: Whether the message is plurizable. palgarvio@202: cmlenz@58: >>> Message('foo').pluralizable cmlenz@58: False cmlenz@58: >>> Message(('foo', 'bar')).pluralizable cmlenz@58: True palgarvio@202: cmlenz@63: :type: `bool` cmlenz@58: """) cmlenz@58: cmlenz@58: def python_format(self): cmlenz@58: ids = self.id cmlenz@58: if not isinstance(ids, (list, tuple)): cmlenz@58: ids = [ids] cmlenz@222: return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids])) cmlenz@58: python_format = property(python_format, doc="""\ cmlenz@58: Whether the message contains Python-style parameters. palgarvio@202: cmlenz@58: >>> Message('foo %(name)s bar').python_format cmlenz@58: True cmlenz@58: >>> Message(('foo %(name)s', 'foo %(name)s')).python_format cmlenz@58: True palgarvio@202: cmlenz@63: :type: `bool` cmlenz@58: """) cmlenz@58: palgarvio@107: cmlenz@222: class TranslationError(Exception): cmlenz@222: """Exception thrown by translation checkers when invalid message cmlenz@222: translations are encountered.""" cmlenz@222: cmlenz@222: cmlenz@106: DEFAULT_HEADER = u"""\ cmlenz@106: # Translations template for PROJECT. cmlenz@122: # Copyright (C) YEAR ORGANIZATION cmlenz@106: # This file is distributed under the same license as the PROJECT project. cmlenz@106: # FIRST AUTHOR , YEAR. cmlenz@106: #""" cmlenz@58: cmlenz@198: cmlenz@58: class Catalog(object): palgarvio@80: """Representation of a message catalog.""" cmlenz@58: cmlenz@106: def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, cmlenz@106: project=None, version=None, copyright_holder=None, palgarvio@80: msgid_bugs_address=None, creation_date=None, cmlenz@208: revision_date=None, last_translator=None, language_team=None, cmlenz@208: charset='utf-8', fuzzy=True): cmlenz@66: """Initialize the catalog object. palgarvio@202: cmlenz@66: :param locale: the locale identifier or `Locale` object, or `None` cmlenz@66: if the catalog is not bound to a locale (which basically cmlenz@66: means it's a template) palgarvio@80: :param domain: the message domain cmlenz@106: :param header_comment: the header comment as string, or `None` for the cmlenz@106: default header palgarvio@80: :param project: the project's name palgarvio@80: :param version: the project's version cmlenz@106: :param copyright_holder: the copyright holder of the catalog cmlenz@106: :param msgid_bugs_address: the email address or URL to submit bug cmlenz@106: reports to palgarvio@80: :param creation_date: the date the catalog was created palgarvio@80: :param revision_date: the date the catalog was revised palgarvio@80: :param last_translator: the name and email of the last translator cmlenz@208: :param language_team: the name and email of the language team cmlenz@106: :param charset: the encoding to use in the output palgarvio@177: :param fuzzy: the fuzzy bit on the catalog header cmlenz@66: """ cmlenz@109: self.domain = domain #: The message domain cmlenz@66: if locale: cmlenz@66: locale = Locale.parse(locale) cmlenz@109: self.locale = locale #: The locale or `None` cmlenz@106: self._header_comment = header_comment cmlenz@69: self._messages = odict() cmlenz@69: cmlenz@109: self.project = project or 'PROJECT' #: The project name cmlenz@109: self.version = version or 'VERSION' #: The project version cmlenz@106: self.copyright_holder = copyright_holder or 'ORGANIZATION' palgarvio@80: self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' cmlenz@108: cmlenz@108: self.last_translator = last_translator or 'FULL NAME ' cmlenz@108: """Name and email address of the last translator.""" cmlenz@208: self.language_team = language_team or 'LANGUAGE ' cmlenz@208: """Name and email address of the language team.""" cmlenz@108: cmlenz@97: self.charset = charset or 'utf-8' cmlenz@86: cmlenz@69: if creation_date is None: cmlenz@99: creation_date = datetime.now(LOCALTZ) cmlenz@97: elif isinstance(creation_date, datetime) and not creation_date.tzinfo: cmlenz@99: creation_date = creation_date.replace(tzinfo=LOCALTZ) cmlenz@109: self.creation_date = creation_date #: Creation date of the template cmlenz@69: if revision_date is None: cmlenz@99: revision_date = datetime.now(LOCALTZ) cmlenz@97: elif isinstance(revision_date, datetime) and not revision_date.tzinfo: cmlenz@99: revision_date = revision_date.replace(tzinfo=LOCALTZ) cmlenz@109: self.revision_date = revision_date #: Last revision date of the catalog cmlenz@183: self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`) cmlenz@183: cmlenz@183: self.obsolete = odict() #: Dictionary of obsolete messages cmlenz@335: self._num_plurals = None cmlenz@335: self._plural_expr = None cmlenz@69: cmlenz@109: def _get_header_comment(self): cmlenz@106: comment = self._header_comment cmlenz@106: comment = comment.replace('PROJECT', self.project) \ cmlenz@106: .replace('VERSION', self.version) \ cmlenz@106: .replace('YEAR', self.revision_date.strftime('%Y')) \ cmlenz@122: .replace('ORGANIZATION', self.copyright_holder) cmlenz@106: if self.locale: cmlenz@109: comment = comment.replace('Translations template', '%s translations' cmlenz@109: % self.locale.english_name) cmlenz@106: return comment cmlenz@122: cmlenz@109: def _set_header_comment(self, string): cmlenz@106: self._header_comment = string cmlenz@109: cmlenz@109: header_comment = property(_get_header_comment, _set_header_comment, doc="""\ cmlenz@106: The header comment for the catalog. palgarvio@202: cmlenz@106: >>> catalog = Catalog(project='Foobar', version='1.0', cmlenz@106: ... copyright_holder='Foo Company') cmlenz@314: >>> print catalog.header_comment #doctest: +ELLIPSIS cmlenz@106: # Translations template for Foobar. cmlenz@314: # Copyright (C) ... Foo Company cmlenz@106: # This file is distributed under the same license as the Foobar project. cmlenz@314: # FIRST AUTHOR , .... cmlenz@106: # palgarvio@202: cmlenz@122: The header can also be set from a string. Any known upper-case variables cmlenz@122: will be replaced when the header is retrieved again: palgarvio@202: cmlenz@122: >>> catalog = Catalog(project='Foobar', version='1.0', cmlenz@122: ... copyright_holder='Foo Company') cmlenz@122: >>> catalog.header_comment = '''\\ cmlenz@122: ... # The POT for my really cool PROJECT project. cmlenz@122: ... # Copyright (C) 1990-2003 ORGANIZATION cmlenz@122: ... # This file is distributed under the same license as the PROJECT cmlenz@122: ... # project. cmlenz@122: ... #''' cmlenz@122: >>> print catalog.header_comment cmlenz@122: # The POT for my really cool Foobar project. cmlenz@122: # Copyright (C) 1990-2003 Foo Company cmlenz@122: # This file is distributed under the same license as the Foobar cmlenz@122: # project. cmlenz@122: # cmlenz@122: cmlenz@106: :type: `unicode` cmlenz@106: """) cmlenz@106: cmlenz@108: def _get_mime_headers(self): cmlenz@69: headers = [] cmlenz@69: headers.append(('Project-Id-Version', cmlenz@69: '%s %s' % (self.project, self.version))) palgarvio@80: headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) cmlenz@69: headers.append(('POT-Creation-Date', cmlenz@133: format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', cmlenz@133: locale='en'))) cmlenz@69: if self.locale is None: cmlenz@69: headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE')) cmlenz@69: headers.append(('Last-Translator', 'FULL NAME ')) cmlenz@69: headers.append(('Language-Team', 'LANGUAGE ')) cmlenz@69: else: cmlenz@69: headers.append(('PO-Revision-Date', cmlenz@133: format_datetime(self.revision_date, cmlenz@133: 'yyyy-MM-dd HH:mmZ', locale='en'))) cmlenz@69: headers.append(('Last-Translator', self.last_translator)) cmlenz@208: headers.append(('Language-Team', cmlenz@208: self.language_team.replace('LANGUAGE', cmlenz@208: str(self.locale)))) cmlenz@86: headers.append(('Plural-Forms', self.plural_forms)) cmlenz@69: headers.append(('MIME-Version', '1.0')) cmlenz@70: headers.append(('Content-Type', cmlenz@70: 'text/plain; charset=%s' % self.charset)) cmlenz@69: headers.append(('Content-Transfer-Encoding', '8bit')) palgarvio@107: headers.append(('Generated-By', 'Babel %s\n' % VERSION)) cmlenz@69: return headers cmlenz@108: cmlenz@108: def _set_mime_headers(self, headers): cmlenz@108: for name, value in headers: pjenvey@293: if name.lower() == 'content-type': cmlenz@212: mimetype, params = parse_header(value) cmlenz@212: if 'charset' in params: cmlenz@212: self.charset = params['charset'].lower() cmlenz@212: break cmlenz@212: for name, value in headers: cmlenz@212: name = name.lower().decode(self.charset) cmlenz@212: value = value.decode(self.charset) cmlenz@108: if name == 'project-id-version': cmlenz@108: parts = value.split(' ') cmlenz@212: self.project = u' '.join(parts[:-1]) cmlenz@108: self.version = parts[-1] cmlenz@108: elif name == 'report-msgid-bugs-to': cmlenz@108: self.msgid_bugs_address = value cmlenz@108: elif name == 'last-translator': cmlenz@108: self.last_translator = value cmlenz@208: elif name == 'language-team': cmlenz@208: self.language_team = value cmlenz@335: elif name == 'plural-forms': cmlenz@335: _, params = parse_header(' ;' + value) cmlenz@335: self._num_plurals = int(params.get('nplurals', 2)) cmlenz@335: self._plural_expr = params.get('plural', '(n != 1)') cmlenz@108: elif name == 'pot-creation-date': cmlenz@108: # FIXME: this should use dates.parse_datetime as soon as that cmlenz@108: # is ready jruigrok@429: value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) jruigrok@429: cmlenz@108: tt = time.strptime(value, '%Y-%m-%d %H:%M') cmlenz@108: ts = time.mktime(tt) jruigrok@429: jruigrok@480: # Separate the offset into a sign component, hours, and minutes jruigrok@429: plus_minus_s, rest = tzoffset[0], tzoffset[1:] jruigrok@429: hours_offset_s, mins_offset_s = rest[:2], rest[2:] jruigrok@429: jruigrok@429: # Make them all integers jruigrok@429: plus_minus = int(plus_minus_s + '1') jruigrok@429: hours_offset = int(hours_offset_s) jruigrok@429: mins_offset = int(mins_offset_s) jruigrok@429: jruigrok@429: # Calculate net offset jruigrok@429: net_mins_offset = hours_offset * 60 jruigrok@429: net_mins_offset += mins_offset jruigrok@429: net_mins_offset *= plus_minus jruigrok@429: jruigrok@429: # Create an offset object jruigrok@429: tzoffset = FixedOffsetTimezone(net_mins_offset) jruigrok@429: jruigrok@429: # Store the offset in a datetime object cmlenz@123: dt = datetime.fromtimestamp(ts) cmlenz@123: self.creation_date = dt.replace(tzinfo=tzoffset) palgarvio@424: elif name == 'po-revision-date': palgarvio@424: # Keep the value if it's not the default one palgarvio@424: if 'YEAR' not in value: palgarvio@424: # FIXME: this should use dates.parse_datetime as soon as palgarvio@424: # that is ready jruigrok@429: value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1) palgarvio@424: tt = time.strptime(value, '%Y-%m-%d %H:%M') palgarvio@424: ts = time.mktime(tt) jruigrok@429: jruigrok@480: # Separate the offset into a sign component, hours, and jruigrok@429: # minutes jruigrok@429: plus_minus_s, rest = tzoffset[0], tzoffset[1:] jruigrok@429: hours_offset_s, mins_offset_s = rest[:2], rest[2:] jruigrok@429: jruigrok@429: # Make them all integers jruigrok@429: plus_minus = int(plus_minus_s + '1') jruigrok@429: hours_offset = int(hours_offset_s) jruigrok@429: mins_offset = int(mins_offset_s) jruigrok@429: jruigrok@429: # Calculate net offset jruigrok@429: net_mins_offset = hours_offset * 60 jruigrok@429: net_mins_offset += mins_offset jruigrok@429: net_mins_offset *= plus_minus jruigrok@429: jruigrok@429: # Create an offset object jruigrok@429: tzoffset = FixedOffsetTimezone(net_mins_offset) jruigrok@429: jruigrok@429: # Store the offset in a datetime object palgarvio@424: dt = datetime.fromtimestamp(ts) palgarvio@424: self.revision_date = dt.replace(tzinfo=tzoffset) cmlenz@108: cmlenz@108: mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ cmlenz@69: The MIME headers of the catalog, used for the special ``msgid ""`` entry. palgarvio@202: cmlenz@69: The behavior of this property changes slightly depending on whether a locale cmlenz@69: is set or not, the latter indicating that the catalog is actually a template cmlenz@69: for actual translations. palgarvio@202: cmlenz@69: Here's an example of the output for such a catalog template: palgarvio@202: cmlenz@97: >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) cmlenz@69: >>> catalog = Catalog(project='Foobar', version='1.0', cmlenz@97: ... creation_date=created) cmlenz@106: >>> for name, value in catalog.mime_headers: cmlenz@69: ... print '%s: %s' % (name, value) cmlenz@69: Project-Id-Version: Foobar 1.0 palgarvio@80: Report-Msgid-Bugs-To: EMAIL@ADDRESS cmlenz@69: POT-Creation-Date: 1990-04-01 15:30+0000 cmlenz@69: PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE cmlenz@69: Last-Translator: FULL NAME cmlenz@69: Language-Team: LANGUAGE cmlenz@69: MIME-Version: 1.0 cmlenz@69: Content-Type: text/plain; charset=utf-8 cmlenz@69: Content-Transfer-Encoding: 8bit cmlenz@69: Generated-By: Babel ... palgarvio@202: cmlenz@69: And here's an example of the output when the locale is set: palgarvio@202: cmlenz@97: >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) cmlenz@69: >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', cmlenz@97: ... creation_date=created, revision_date=revised, cmlenz@208: ... last_translator='John Doe ', cmlenz@208: ... language_team='de_DE ') cmlenz@106: >>> for name, value in catalog.mime_headers: cmlenz@69: ... print '%s: %s' % (name, value) cmlenz@69: Project-Id-Version: Foobar 1.0 palgarvio@80: Report-Msgid-Bugs-To: EMAIL@ADDRESS cmlenz@69: POT-Creation-Date: 1990-04-01 15:30+0000 cmlenz@69: PO-Revision-Date: 1990-08-03 12:00+0000 cmlenz@69: Last-Translator: John Doe cmlenz@208: Language-Team: de_DE cmlenz@86: Plural-Forms: nplurals=2; plural=(n != 1) cmlenz@69: MIME-Version: 1.0 cmlenz@69: Content-Type: text/plain; charset=utf-8 cmlenz@69: Content-Transfer-Encoding: 8bit cmlenz@69: Generated-By: Babel ... palgarvio@202: cmlenz@69: :type: `list` cmlenz@69: """) cmlenz@69: cmlenz@70: def num_plurals(self): aronacher@375: if self._num_plurals is None: cmlenz@335: num = 2 cmlenz@335: if self.locale: aronacher@375: num = get_plural(self.locale)[0] cmlenz@335: self._num_plurals = num cmlenz@335: return self._num_plurals cmlenz@86: num_plurals = property(num_plurals, doc="""\ cmlenz@335: The number of plurals used by the catalog or locale. palgarvio@202: cmlenz@105: >>> Catalog(locale='en').num_plurals cmlenz@105: 2 cmlenz@335: >>> Catalog(locale='ga').num_plurals cmlenz@105: 3 palgarvio@202: cmlenz@105: :type: `int` cmlenz@86: """) cmlenz@70: cmlenz@335: def plural_expr(self): aronacher@375: if self._plural_expr is None: cmlenz@335: expr = '(n != 1)' cmlenz@335: if self.locale: aronacher@375: expr = get_plural(self.locale)[1] cmlenz@335: self._plural_expr = expr cmlenz@335: return self._plural_expr cmlenz@335: plural_expr = property(plural_expr, doc="""\ cmlenz@335: The plural expression used by the catalog or locale. cmlenz@335: cmlenz@335: >>> Catalog(locale='en').plural_expr cmlenz@335: '(n != 1)' cmlenz@335: >>> Catalog(locale='ga').plural_expr cmlenz@335: '(n==1 ? 0 : n==2 ? 1 : 2)' cmlenz@335: cmlenz@335: :type: `basestring` cmlenz@335: """) cmlenz@335: cmlenz@69: def plural_forms(self): cmlenz@335: return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) cmlenz@69: plural_forms = property(plural_forms, doc="""\ cmlenz@69: Return the plural forms declaration for the locale. palgarvio@202: cmlenz@105: >>> Catalog(locale='en').plural_forms cmlenz@69: 'nplurals=2; plural=(n != 1)' cmlenz@69: >>> Catalog(locale='pt_BR').plural_forms cmlenz@69: 'nplurals=2; plural=(n > 1)' palgarvio@202: cmlenz@69: :type: `str` cmlenz@69: """) cmlenz@69: cmlenz@69: def __contains__(self, id): cmlenz@69: """Return whether the catalog has a message with the specified ID.""" cmlenz@71: return self._key_for(id) in self._messages cmlenz@71: cmlenz@71: def __len__(self): cmlenz@86: """The number of messages in the catalog. palgarvio@202: cmlenz@86: This does not include the special ``msgid ""`` entry. cmlenz@86: """ cmlenz@71: return len(self._messages) cmlenz@58: cmlenz@58: def __iter__(self): cmlenz@66: """Iterates through all the entries in the catalog, in the order they cmlenz@66: were added, yielding a `Message` object for every entry. palgarvio@202: cmlenz@66: :rtype: ``iterator`` cmlenz@66: """ cmlenz@69: buf = [] cmlenz@106: for name, value in self.mime_headers: cmlenz@69: buf.append('%s: %s' % (name, value)) cmlenz@200: flags = set() palgarvio@177: if self.fuzzy: cmlenz@200: flags |= set(['fuzzy']) cmlenz@212: yield Message(u'', '\n'.join(buf), flags=flags) cmlenz@71: for key in self._messages: cmlenz@71: yield self._messages[key] cmlenz@58: cmlenz@58: def __repr__(self): cmlenz@66: locale = '' cmlenz@66: if self.locale: cmlenz@66: locale = ' %s' % self.locale cmlenz@66: return '<%s %r%s>' % (type(self).__name__, self.domain, locale) cmlenz@58: cmlenz@58: def __delitem__(self, id): cmlenz@66: """Delete the message with the specified ID.""" cmlenz@352: self.delete(id) cmlenz@58: cmlenz@58: def __getitem__(self, id): cmlenz@66: """Return the message with the specified ID. palgarvio@202: cmlenz@66: :param id: the message ID cmlenz@352: :return: the message with the specified ID, or `None` if no such cmlenz@352: message is in the catalog cmlenz@69: :rtype: `Message` cmlenz@66: """ cmlenz@352: return self.get(id) cmlenz@58: cmlenz@58: def __setitem__(self, id, message): cmlenz@66: """Add or update the message with the specified ID. palgarvio@202: cmlenz@66: >>> catalog = Catalog() cmlenz@66: >>> catalog[u'foo'] = Message(u'foo') cmlenz@66: >>> catalog[u'foo'] cmlenz@198: palgarvio@202: cmlenz@66: If a message with that ID is already in the catalog, it is updated cmlenz@66: to include the locations and flags of the new message. palgarvio@202: cmlenz@66: >>> catalog = Catalog() cmlenz@66: >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) cmlenz@66: >>> catalog[u'foo'].locations cmlenz@66: [('main.py', 1)] cmlenz@66: >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) cmlenz@66: >>> catalog[u'foo'].locations cmlenz@66: [('main.py', 1), ('utils.py', 5)] palgarvio@202: cmlenz@66: :param id: the message ID cmlenz@66: :param message: the `Message` object cmlenz@66: """ cmlenz@58: assert isinstance(message, Message), 'expected a Message object' cmlenz@352: key = self._key_for(id, message.context) cmlenz@71: current = self._messages.get(key) cmlenz@58: if current: cmlenz@71: if message.pluralizable and not current.pluralizable: cmlenz@71: # The new message adds pluralization cmlenz@71: current.id = message.id cmlenz@72: current.string = message.string cmlenz@231: current.locations = list(distinct(current.locations + cmlenz@231: message.locations)) cmlenz@230: current.auto_comments = list(distinct(current.auto_comments + cmlenz@230: message.auto_comments)) cmlenz@230: current.user_comments = list(distinct(current.user_comments + cmlenz@230: message.user_comments)) cmlenz@58: current.flags |= message.flags cmlenz@58: message = current cmlenz@108: elif id == '': cmlenz@108: # special treatment for the header message cmlenz@108: headers = message_from_string(message.string.encode(self.charset)) cmlenz@108: self.mime_headers = headers.items() cmlenz@122: self.header_comment = '\n'.join(['# %s' % comment for comment cmlenz@122: in message.user_comments]) cmlenz@198: self.fuzzy = message.fuzzy cmlenz@58: else: cmlenz@58: if isinstance(id, (list, tuple)): cmlenz@279: assert isinstance(message.string, (list, tuple)), \ cmlenz@279: 'Expected sequence but got %s' % type(message.string) cmlenz@71: self._messages[key] = message cmlenz@58: palgarvio@107: def add(self, id, string=None, locations=(), flags=(), auto_comments=(), cmlenz@337: user_comments=(), previous_id=(), lineno=None, context=None): cmlenz@66: """Add or update the message with the specified ID. palgarvio@202: cmlenz@66: >>> catalog = Catalog() cmlenz@66: >>> catalog.add(u'foo') fschwarz@546: cmlenz@66: >>> catalog[u'foo'] cmlenz@198: palgarvio@202: cmlenz@66: This method simply constructs a `Message` object with the given cmlenz@66: arguments and invokes `__setitem__` with that object. palgarvio@202: cmlenz@66: :param id: the message ID, or a ``(singular, plural)`` tuple for cmlenz@66: pluralizable messages cmlenz@66: :param string: the translated message string, or a cmlenz@66: ``(singular, plural)`` tuple for pluralizable messages cmlenz@66: :param locations: a sequence of ``(filenname, lineno)`` tuples cmlenz@66: :param flags: a set or sequence of flags cmlenz@108: :param auto_comments: a sequence of automatic comments cmlenz@108: :param user_comments: a sequence of user comments cmlenz@205: :param previous_id: the previous message ID, or a ``(singular, plural)`` cmlenz@205: tuple for pluralizable messages cmlenz@222: :param lineno: the line number on which the msgid line was found in the cmlenz@222: PO file, if any cmlenz@337: :param context: the message context fschwarz@546: :return: the newly added message fschwarz@546: :rtype: `Message` cmlenz@66: """ fschwarz@546: message = Message(id, string, list(locations), flags, auto_comments, fschwarz@546: user_comments, previous_id, lineno=lineno, fschwarz@546: context=context) fschwarz@546: self[id] = message fschwarz@546: return message cmlenz@222: cmlenz@222: def check(self): cmlenz@222: """Run various validation checks on the translations in the catalog. palgarvio@228: cmlenz@222: For every message which fails validation, this method yield a cmlenz@222: ``(message, errors)`` tuple, where ``message`` is the `Message` object cmlenz@222: and ``errors`` is a sequence of `TranslationError` objects. palgarvio@228: cmlenz@222: :rtype: ``iterator`` cmlenz@222: """ aronacher@354: for message in self._messages.values(): aronacher@357: errors = message.check(catalog=self) aronacher@354: if errors: aronacher@354: yield message, errors cmlenz@71: cmlenz@352: def get(self, id, context=None): cmlenz@352: """Return the message with the specified ID and context. cmlenz@352: cmlenz@352: :param id: the message ID cmlenz@352: :param context: the message context, or ``None`` for no context cmlenz@352: :return: the message with the specified ID, or `None` if no such cmlenz@352: message is in the catalog cmlenz@352: :rtype: `Message` cmlenz@352: """ cmlenz@352: return self._messages.get(self._key_for(id, context)) cmlenz@352: cmlenz@352: def delete(self, id, context=None): cmlenz@352: """Delete the message with the specified ID and context. cmlenz@352: cmlenz@352: :param id: the message ID cmlenz@352: :param context: the message context, or ``None`` for no context cmlenz@352: """ cmlenz@352: key = self._key_for(id, context) cmlenz@352: if key in self._messages: cmlenz@352: del self._messages[key] cmlenz@352: cmlenz@205: def update(self, template, no_fuzzy_matching=False): cmlenz@165: """Update the catalog based on the given template catalog. palgarvio@202: cmlenz@165: >>> from babel.messages import Catalog cmlenz@165: >>> template = Catalog() cmlenz@190: >>> template.add('green', locations=[('main.py', 99)]) fschwarz@546: cmlenz@165: >>> template.add('blue', locations=[('main.py', 100)]) fschwarz@546: cmlenz@165: >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) fschwarz@546: cmlenz@165: >>> catalog = Catalog(locale='de_DE') cmlenz@165: >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) fschwarz@546: cmlenz@165: >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) fschwarz@546: cmlenz@165: >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), cmlenz@165: ... locations=[('util.py', 38)]) fschwarz@546: palgarvio@202: cmlenz@183: >>> catalog.update(template) cmlenz@165: >>> len(catalog) cmlenz@190: 3 palgarvio@202: cmlenz@190: >>> msg1 = catalog['green'] cmlenz@165: >>> msg1.string cmlenz@190: >>> msg1.locations cmlenz@190: [('main.py', 99)] palgarvio@202: cmlenz@190: >>> msg2 = catalog['blue'] cmlenz@190: >>> msg2.string cmlenz@165: u'blau' cmlenz@190: >>> msg2.locations cmlenz@165: [('main.py', 100)] palgarvio@202: cmlenz@190: >>> msg3 = catalog['salad'] cmlenz@190: >>> msg3.string cmlenz@165: (u'Salat', u'Salate') cmlenz@190: >>> msg3.locations cmlenz@165: [('util.py', 42)] palgarvio@202: cmlenz@183: Messages that are in the catalog but not in the template are removed cmlenz@183: from the main collection, but can still be accessed via the `obsolete` cmlenz@183: member: palgarvio@202: cmlenz@165: >>> 'head' in catalog cmlenz@165: False cmlenz@183: >>> catalog.obsolete.values() cmlenz@198: [] palgarvio@202: cmlenz@165: :param template: the reference catalog, usually read from a POT file palgarvio@202: :param no_fuzzy_matching: whether to use fuzzy matching of message IDs cmlenz@165: """ cmlenz@165: messages = self._messages cmlenz@314: remaining = messages.copy() cmlenz@165: self._messages = odict() cmlenz@165: cmlenz@314: # Prepare for fuzzy matching cmlenz@314: fuzzy_candidates = [] cmlenz@314: if not no_fuzzy_matching: cmlenz@352: fuzzy_candidates = dict([ cmlenz@352: (self._key_for(msgid), messages[msgid].context) cmlenz@352: for msgid in messages if msgid and messages[msgid].string cmlenz@352: ]) cmlenz@314: fuzzy_matches = set() cmlenz@314: cmlenz@279: def _merge(message, oldkey, newkey): cmlenz@315: message = message.clone() cmlenz@279: fuzzy = False cmlenz@279: if oldkey != newkey: cmlenz@279: fuzzy = True cmlenz@314: fuzzy_matches.add(oldkey) cmlenz@314: oldmsg = messages.get(oldkey) cmlenz@279: if isinstance(oldmsg.id, basestring): cmlenz@279: message.previous_id = [oldmsg.id] cmlenz@279: else: cmlenz@279: message.previous_id = list(oldmsg.id) cmlenz@314: else: cmlenz@339: oldmsg = remaining.pop(oldkey, None) cmlenz@279: message.string = oldmsg.string cmlenz@279: if isinstance(message.id, (list, tuple)): cmlenz@279: if not isinstance(message.string, (list, tuple)): cmlenz@279: fuzzy = True cmlenz@279: message.string = tuple( cmlenz@279: [message.string] + ([u''] * (len(message.id) - 1)) cmlenz@279: ) palgarvio@427: elif len(message.string) != self.num_plurals: cmlenz@279: fuzzy = True cmlenz@279: message.string = tuple(message.string[:len(oldmsg.string)]) cmlenz@279: elif isinstance(message.string, (list, tuple)): cmlenz@279: fuzzy = True cmlenz@279: message.string = message.string[0] cmlenz@279: message.flags |= oldmsg.flags cmlenz@279: if fuzzy: cmlenz@279: message.flags |= set([u'fuzzy']) cmlenz@279: self[message.id] = message cmlenz@279: cmlenz@165: for message in template: cmlenz@165: if message.id: cmlenz@352: key = self._key_for(message.id, message.context) cmlenz@165: if key in messages: cmlenz@279: _merge(message, key, key) cmlenz@165: else: palgarvio@202: if no_fuzzy_matching is False: cmlenz@167: # do some fuzzy matching with difflib cmlenz@352: if isinstance(key, tuple): cmlenz@352: matchkey = key[0] # just the msgid, no context cmlenz@352: else: cmlenz@352: matchkey = key cmlenz@352: matches = get_close_matches(matchkey.lower().strip(), cmlenz@352: fuzzy_candidates.keys(), 1) cmlenz@167: if matches: cmlenz@352: newkey = matches[0] cmlenz@352: newctxt = fuzzy_candidates[newkey] cmlenz@352: if newctxt is not None: cmlenz@352: newkey = newkey, newctxt cmlenz@352: _merge(message, newkey, key) cmlenz@190: continue cmlenz@165: cmlenz@167: self[message.id] = message cmlenz@167: cmlenz@314: self.obsolete = odict() cmlenz@314: for msgid in remaining: cmlenz@314: if no_fuzzy_matching or msgid not in fuzzy_matches: cmlenz@314: self.obsolete[msgid] = remaining[msgid] palgarvio@420: # Make updated catalog's POT-Creation-Date equal to the template palgarvio@420: # used to update the catalog palgarvio@420: self.creation_date = template.creation_date cmlenz@165: cmlenz@352: def _key_for(self, id, context=None): cmlenz@71: """The key for a message is just the singular ID even for pluralizable cmlenz@352: messages, but is a ``(msgid, msgctxt)`` tuple for context-specific cmlenz@71: messages. cmlenz@71: """ cmlenz@71: key = id cmlenz@71: if isinstance(key, (list, tuple)): cmlenz@71: key = id[0] cmlenz@352: if context is not None: cmlenz@352: key = (key, context) cmlenz@71: return key