Mercurial > babel > old > mirror
changeset 167:533baef258bb
Implement fuzzy matching to catalog updates. No frontend yet.
author | cmlenz |
---|---|
date | Fri, 22 Jun 2007 08:39:04 +0000 |
parents | 0eccbe635dba |
children | d99cd18cc912 |
files | babel/messages/catalog.py babel/messages/frontend.py babel/messages/tests/catalog.py babel/util.py |
diffstat | 4 files changed, 63 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -15,6 +15,7 @@ from cgi import parse_header from datetime import datetime +from difflib import get_close_matches from email import message_from_string import re try: @@ -471,7 +472,7 @@ self[id] = Message(id, string, list(locations), flags, auto_comments, user_comments) - def update(self, template): + def update(self, template, fuzzy_matching=True): """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog @@ -506,10 +507,10 @@ [<Message 'head'>] :param template: the reference catalog, usually read from a POT file + :param fuzzy_matching: whether to use fuzzy matching of message IDs :return: a list of `Message` objects that the catalog contained before the updated, but couldn't be found in the template """ - rest = odict([(message.id, message) for message in self if message.id]) messages = self._messages self._messages = odict() @@ -521,15 +522,22 @@ message.string = oldmsg.string message.flags |= oldmsg.flags self[message.id] = message - del rest[message.id] + else: - for oldmsg in messages: - # TODO: fuzzy matching - pass - else: - self[message.id] = message + if fuzzy_matching: + # do some fuzzy matching with difflib + matches = get_close_matches(key.lower().strip(), + [self._key_for(msgid) for msgid in messages], 1) + if matches: + oldmsg = messages.pop(matches[0]) + message.string = oldmsg.string + message.flags |= oldmsg.flags | set([u'fuzzy']) + self[message.id] = message + continue - return rest.values() + self[message.id] = message + + return messages.values() def _key_for(self, id): """The key for a message is just the singular ID even for pluralizable
--- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -414,7 +414,7 @@ commands = { 'compile': 'compile a message catalog to a MO file', 'extract': 'extract messages from source files and generate a POT file', - 'init': 'create new message catalogs from a template' + 'init': 'create new message catalogs from a template', } def run(self, argv=sys.argv): @@ -451,7 +451,7 @@ :param argv: the command arguments """ - parser = OptionParser(usage=self.usage % ('init',''), + parser = OptionParser(usage=self.usage % ('init', ''), description=self.commands['init']) parser.add_option('--domain', '-D', dest='domain', help="domain of MO and PO files (default '%default')") @@ -633,10 +633,10 @@ def init(self, argv): """Subcommand for creating new message catalogs from a template. - + :param argv: the command arguments """ - parser = OptionParser(usage=self.usage % ('init',''), + parser = OptionParser(usage=self.usage % ('init', ''), description=self.commands['init']) parser.add_option('--domain', '-D', dest='domain', help="domain of PO file (default '%default')") @@ -693,6 +693,7 @@ finally: outfile.close() + def main(): CommandLineInterface().run(sys.argv)
--- a/babel/messages/tests/catalog.py +++ b/babel/messages/tests/catalog.py @@ -56,6 +56,41 @@ auto_comments=['Foo Bar comment 2']) self.assertEqual(cat[u'foo'].auto_comments, ['Foo Bar comment 2']) + def test_update_fuzzy_matching_with_case_change(self): + cat = catalog.Catalog() + cat.add('foo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('Foo') + rest = cat.update(tmpl) + self.assertEqual(1, len(rest)) + assert 'foo' not in cat + + self.assertEqual('Voh', cat['Foo'].string) + self.assertEqual(True, cat['Foo'].fuzzy) + + def test_update_fuzzy_matching_with_char_change(self): + cat = catalog.Catalog() + cat.add('fo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('foo') + rest = cat.update(tmpl) + self.assertEqual(1, len(rest)) + assert 'fo' not in cat + + self.assertEqual('Voh', cat['foo'].string) + self.assertEqual(True, cat['foo'].fuzzy) + + def test_update_without_fuzzy_matching(self): + cat = catalog.Catalog() + cat.add('fo', 'Voh') + cat.add('bar', 'Bahr') + tmpl = catalog.Catalog() + tmpl.add('foo') + rest = cat.update(tmpl, fuzzy_matching=False) + self.assertEqual(2, len(rest)) + def suite(): suite = unittest.TestSuite()
--- a/babel/util.py +++ b/babel/util.py @@ -156,6 +156,12 @@ def keys(self): return self._keys[:] + def pop(self, key, default=None): + if key not in self: + return default + self._keys.remove(key) + return dict.pop(self, key) + def setdefault(self, key, failobj = None): dict.setdefault(self, key, failobj) if key not in self._keys: