# HG changeset patch # User palgarvio # Date 1183485334 0 # Node ID 2983c718f6e2f8e3244f0fd65df04cb0e06134ba # Parent ace575fff5ac7a2abcd91cb0c228dcb7a977392b Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31. diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -40,9 +40,9 @@ """Representation of a single message in a catalog.""" def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), - user_comments=()): + user_comments=(), old_msgid=()): """Create the message object. - + :param id: the message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param string: the translated message string, or a @@ -51,6 +51,8 @@ :param flags: a set or sequence of flags :param auto_comments: a sequence of automatic comments for the message :param user_comments: a sequence of user comments for the message + :param old_message: the old message ID, or a ``(singular, plural)`` + tuple for old pluralizable messages """ self.id = id #: The message ID if not string and self.pluralizable: @@ -64,6 +66,10 @@ self.flags.discard('python-format') self.auto_comments = list(auto_comments) self.user_comments = list(user_comments) + if isinstance(old_msgid, basestring): + self.old_msgid = [old_msgid] + else: + self.old_msgid = list(old_msgid) def __repr__(self): return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, @@ -73,7 +79,7 @@ return 'fuzzy' in self.flags fuzzy = property(fuzzy, doc="""\ Whether the translation is fuzzy. - + >>> Message('foo').fuzzy False >>> msg = Message('foo', 'foo', flags=['fuzzy']) @@ -81,7 +87,7 @@ True >>> msg - + :type: `bool` """) @@ -89,12 +95,12 @@ return isinstance(self.id, (list, tuple)) pluralizable = property(pluralizable, doc="""\ Whether the message is plurizable. - + >>> Message('foo').pluralizable False >>> Message(('foo', 'bar')).pluralizable True - + :type: `bool` """) @@ -105,12 +111,12 @@ return bool(filter(None, [PYTHON_FORMAT(id) for id in ids])) python_format = property(python_format, doc="""\ Whether the message contains Python-style parameters. - + >>> Message('foo %(name)s bar').python_format True >>> Message(('foo %(name)s', 'foo %(name)s')).python_format True - + :type: `bool` """) @@ -132,7 +138,7 @@ revision_date=None, last_translator=None, charset='utf-8', fuzzy=True): """Initialize the catalog object. - + :param locale: the locale identifier or `Locale` object, or `None` if the catalog is not bound to a locale (which basically means it's a template) @@ -197,7 +203,7 @@ header_comment = property(_get_header_comment, _set_header_comment, doc="""\ The header comment for the catalog. - + >>> catalog = Catalog(project='Foobar', version='1.0', ... copyright_holder='Foo Company') >>> print catalog.header_comment @@ -206,10 +212,10 @@ # This file is distributed under the same license as the Foobar project. # FIRST AUTHOR , 2007. # - + The header can also be set from a string. Any known upper-case variables will be replaced when the header is retrieved again: - + >>> catalog = Catalog(project='Foobar', version='1.0', ... copyright_holder='Foo Company') >>> catalog.header_comment = '''\\ @@ -282,13 +288,13 @@ mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ The MIME headers of the catalog, used for the special ``msgid ""`` entry. - + The behavior of this property changes slightly depending on whether a locale is set or not, the latter indicating that the catalog is actually a template for actual translations. - + Here's an example of the output for such a catalog template: - + >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) >>> catalog = Catalog(project='Foobar', version='1.0', ... creation_date=created) @@ -304,9 +310,9 @@ Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Generated-By: Babel ... - + And here's an example of the output when the locale is set: - + >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', ... creation_date=created, revision_date=revised, @@ -324,7 +330,7 @@ Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Generated-By: Babel ... - + :type: `list` """) @@ -338,12 +344,12 @@ return num num_plurals = property(num_plurals, doc="""\ The number of plurals used by the locale. - + >>> Catalog(locale='en').num_plurals 2 >>> Catalog(locale='cs_CZ').num_plurals 3 - + :type: `int` """) @@ -357,12 +363,12 @@ return 'nplurals=%s; plural=%s' % (num, expr) plural_forms = property(plural_forms, doc="""\ Return the plural forms declaration for the locale. - + >>> Catalog(locale='en').plural_forms 'nplurals=2; plural=(n != 1)' >>> Catalog(locale='pt_BR').plural_forms 'nplurals=2; plural=(n > 1)' - + :type: `str` """) @@ -372,7 +378,7 @@ def __len__(self): """The number of messages in the catalog. - + This does not include the special ``msgid ""`` entry. """ return len(self._messages) @@ -380,7 +386,7 @@ def __iter__(self): """Iterates through all the entries in the catalog, in the order they were added, yielding a `Message` object for every entry. - + :rtype: ``iterator`` """ buf = [] @@ -407,7 +413,7 @@ def __getitem__(self, id): """Return the message with the specified ID. - + :param id: the message ID :return: the message with the specified ID, or `None` if no such message is in the catalog @@ -417,15 +423,15 @@ def __setitem__(self, id, message): """Add or update the message with the specified ID. - + >>> catalog = Catalog() >>> catalog[u'foo'] = Message(u'foo') >>> catalog[u'foo'] - + If a message with that ID is already in the catalog, it is updated to include the locations and flags of the new message. - + >>> catalog = Catalog() >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) >>> catalog[u'foo'].locations @@ -433,7 +439,7 @@ >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) >>> catalog[u'foo'].locations [('main.py', 1), ('utils.py', 5)] - + :param id: the message ID :param message: the `Message` object """ @@ -463,17 +469,17 @@ self._messages[key] = message def add(self, id, string=None, locations=(), flags=(), auto_comments=(), - user_comments=()): + user_comments=(), old_message=()): """Add or update the message with the specified ID. - + >>> catalog = Catalog() >>> catalog.add(u'foo') >>> catalog[u'foo'] - + This method simply constructs a `Message` object with the given arguments and invokes `__setitem__` with that object. - + :param id: the message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param string: the translated message string, or a @@ -484,11 +490,12 @@ :param user_comments: a sequence of user comments """ self[id] = Message(id, string, list(locations), flags, auto_comments, - user_comments) + user_comments, old_message) - def update(self, template, fuzzy_matching=True): + def update(self, template, no_fuzzy_matching=False, + include_old_msgid=False): """Update the catalog based on the given template catalog. - + >>> from babel.messages import Catalog >>> template = Catalog() >>> template.add('green', locations=[('main.py', 99)]) @@ -499,39 +506,41 @@ >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), ... locations=[('util.py', 38)]) - + >>> catalog.update(template) >>> len(catalog) 3 - + >>> msg1 = catalog['green'] >>> msg1.string >>> msg1.locations [('main.py', 99)] - + >>> msg2 = catalog['blue'] >>> msg2.string u'blau' >>> msg2.locations [('main.py', 100)] - + >>> msg3 = catalog['salad'] >>> msg3.string (u'Salat', u'Salate') >>> msg3.locations [('util.py', 42)] - + Messages that are in the catalog but not in the template are removed from the main collection, but can still be accessed via the `obsolete` member: - + >>> 'head' in catalog False >>> catalog.obsolete.values() [] - + :param template: the reference catalog, usually read from a POT file - :param fuzzy_matching: whether to use fuzzy matching of message IDs + :param no_fuzzy_matching: whether to use fuzzy matching of message IDs + :param include_old_msgid: include the old msgid as a comment when + updating the catalog """ messages = self._messages self._messages = odict() @@ -546,7 +555,7 @@ self[message.id] = message else: - if fuzzy_matching: + if no_fuzzy_matching is False: # do some fuzzy matching with difflib matches = get_close_matches(key.lower().strip(), [self._key_for(msgid) for msgid in messages], 1) @@ -554,6 +563,11 @@ oldmsg = messages.pop(matches[0]) message.string = oldmsg.string message.flags |= oldmsg.flags | set([u'fuzzy']) + if include_old_msgid: + if isinstance(oldmsg.id, basestring): + message.old_msgid = [oldmsg.id] + else: + message.old_msgid = list(oldmsg.id) self[message.id] = message continue diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -455,9 +455,13 @@ ('locale=', 'l', 'locale of the catalog to compile'), ('ignore-obsolete=', None, - 'whether to omit obsolete messages from the output') + 'whether to omit obsolete messages from the output'), + ('no-fuzzy-matching', 'N', + 'do not use fuzzy matching'), + ('previous', None, + 'keep previous msgids of translated messages') ] - boolean_options = ['ignore_obsolete'] + boolean_options = ['ignore_obsolete', 'no_fuzzy_matching', 'previous'] def initialize_options(self): self.domain = 'messages' @@ -466,6 +470,8 @@ self.output_file = None self.locale = None self.ignore_obsolete = False + self.no_fuzzy_matching = False + self.previous = False def finalize_options(self): if not self.input_file: @@ -475,6 +481,8 @@ 'directory') if self.output_file and not self.locale: raise DistutilsOptionError('you must specify the locale') + if self.no_fuzzy_matching and self.previous: + self.previous = False def run(self): po_files = [] @@ -513,16 +521,17 @@ finally: infile.close() - catalog.update(template) + catalog.update(template, self.no_fuzzy_matching, self.previous) tmpname = os.path.join(os.path.dirname(filename), - tempfile.gettempprefix() + + tempfile.gettempprefix() + os.path.basename(filename)) tmpfile = open(tmpname, 'w') try: try: write_po(tmpfile, catalog, - ignore_obsolete=self.ignore_obsolete) + ignore_obsolete=self.ignore_obsolete, + include_old_msgid=self.previous) finally: tmpfile.close() except: @@ -890,8 +899,15 @@ action='store_true', help='do not include obsolete messages in the output ' '(default %default)'), + parser.add_option('--no-fuzzy-matching', '-N', dest='no_fuzzy_matching', + action='store_true', + help='do not use fuzzy matching (default %default)'), + parser.add_option('--previous', dest='previous', action='store_true', + help='keep previous msgids of translated messages ' + '(default %default)'), - parser.set_defaults(domain='messages', ignore_obsolete=False) + parser.set_defaults(domain='messages', ignore_obsolete=False, + no_fuzzy_matching=False, previous=False) options, args = parser.parse_args(argv) if not options.input_file: @@ -900,6 +916,8 @@ parser.error('you must specify the output file or directory') if options.output_file and not options.locale: parser.error('you must specify the loicale') + if options.no_fuzzy_matching and options.previous: + options.previous = False po_files = [] if not options.output_file: @@ -937,18 +955,18 @@ finally: infile.close() - catalog.update(template) - - catalog.update(template) + catalog.update(template, options.no_fuzzy_matching, + options.previous) tmpname = os.path.join(os.path.dirname(filename), - tempfile.gettempprefix() + + tempfile.gettempprefix() + os.path.basename(filename)) tmpfile = open(tmpname, 'w') try: try: write_po(tmpfile, catalog, - ignore_obsolete=options.ignore_obsolete) + ignore_obsolete=options.ignore_obsolete, + include_old_msgid=options.previous) finally: tmpfile.close() except: diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -36,12 +36,12 @@ def unescape(string): r"""Reverse `escape` the given string. - + >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') Say: "hello, world!" - + :param string: the string to unescape :return: the unescaped string :rtype: `str` or `unicode` @@ -54,14 +54,14 @@ def denormalize(string): r"""Reverse the normalization done by the `normalize` function. - + >>> print denormalize(r'''"" ... "Say:\n" ... " \"hello, world!\"\n"''') Say: "hello, world!" - + >>> print denormalize(r'''"" ... "Say:\n" ... " \"Lorem ipsum dolor sit " @@ -70,7 +70,7 @@ Say: "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " - + :param string: the string to denormalize :return: the denormalized string :rtype: `unicode` or `str` @@ -86,7 +86,7 @@ def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): """Read messages from a ``gettext`` PO (portable object) file from the given file-like object and return a `Catalog`. - + >>> from StringIO import StringIO >>> buf = StringIO(''' ... #: main.py:1 @@ -104,7 +104,7 @@ ... ''') >>> catalog = read_po(buf) >>> catalog.revision_date = datetime(2007, 04, 01) - + >>> for message in catalog: ... if message.id: ... print (message.id, message.string) @@ -116,7 +116,7 @@ ((u'bar', u'baz'), ('', '')) ([(u'main.py', 3)], set([])) ([u'A user comment'], [u'An auto comment']) - + :param fileobj: the file-like object to read the PO file from :param locale: the locale identifier or `Locale` object, or `None` if the catalog is not bound to a locale (which basically @@ -235,12 +235,12 @@ def escape(string): r"""Escape the given string so that it can be included in double-quoted strings in ``PO`` files. - + >>> escape('''Say: ... "hello, world!" ... ''') '"Say:\\n \\"hello, world!\\"\\n"' - + :param string: the string to escape :return: the escaped string :rtype: `str` or `unicode` @@ -253,14 +253,14 @@ def normalize(string, prefix='', width=76): r"""Convert a string into a format that is appropriate for .po files. - + >>> print normalize('''Say: ... "hello, world!" ... ''', width=None) "" "Say:\n" " \"hello, world!\"\n" - + >>> print normalize('''Say: ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " ... ''', width=32) @@ -269,7 +269,7 @@ " \"Lorem ipsum dolor sit " "amet, consectetur adipisicing" " elit, \"\n" - + :param string: the string to normalize :param prefix: a string that should be prepended to every line :param width: the maximum line width; use `None`, 0, or a negative number @@ -314,10 +314,11 @@ return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, - sort_output=False, sort_by_file=False, ignore_obsolete=False): + sort_output=False, sort_by_file=False, ignore_obsolete=False, + include_old_msgid=False): r"""Write a ``gettext`` PO (portable object) template file for a given message catalog to the provided file-like object. - + >>> catalog = Catalog() >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], ... flags=('fuzzy',)) @@ -338,7 +339,7 @@ msgstr[1] "" - + :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance :param width: the maximum line width for the generated output; use `None`, @@ -350,6 +351,8 @@ :sort_by_file: whether to sort the messages in the output by their locations :ignore_obsolete: whether to ignore obsolete messages and not include them in the output; by default they are included as comments + :param include_old_msgid: include the old msgid as a comment when + updating the catalog """ def _normalize(key, prefix=''): return normalize(key, prefix=prefix, width=width) \ @@ -414,6 +417,11 @@ if message.flags: _write('#%s\n' % ', '.join([''] + list(message.flags))) + if message.old_msgid and include_old_msgid: + _write_comment(message.old_msgid[0], prefix='| msgid') + if len(message.old_msgid) > 1: + _write_comment(message.old_msgid[1], prefix='| msgid_plural') + _write_message(message) _write('\n') diff --git a/babel/messages/tests/catalog.py b/babel/messages/tests/catalog.py --- a/babel/messages/tests/catalog.py +++ b/babel/messages/tests/catalog.py @@ -22,11 +22,11 @@ def test_python_format(self): assert catalog.PYTHON_FORMAT('foo %d bar') assert catalog.PYTHON_FORMAT('foo %s bar') - assert catalog.PYTHON_FORMAT('foo %r bar') + assert catalog.PYTHON_FORMAT('foo %r bar') def test_translator_comments(self): mess = catalog.Message('foo', user_comments=['Comment About `foo`']) - self.assertEqual(mess.user_comments, ['Comment About `foo`']) + self.assertEqual(mess.user_comments, ['Comment About `foo`']) mess = catalog.Message('foo', auto_comments=['Comment 1 About `foo`', 'Comment 2 About `foo`']) @@ -53,7 +53,7 @@ self.assertEqual(cat[u'foo'].user_comments, ['Foo Bar comment 1']) # now add yet another location with another comment cat[u'foo'] = catalog.Message('foo', locations=[('main.py', 9)], - auto_comments=['Foo Bar comment 2']) + auto_comments=['Foo Bar comment 2']) self.assertEqual(cat[u'foo'].auto_comments, ['Foo Bar comment 2']) def test_update_fuzzy_matching_with_case_change(self): @@ -88,7 +88,7 @@ cat.add('bar', 'Bahr') tmpl = catalog.Catalog() tmpl.add('foo') - cat.update(tmpl, fuzzy_matching=False) + cat.update(tmpl, no_fuzzy_matching=True) self.assertEqual(2, len(cat.obsolete))