changeset 167:533baef258bb

Implement fuzzy matching to catalog updates. No frontend yet.
author cmlenz
date Fri, 22 Jun 2007 08:39:04 +0000
parents 0eccbe635dba
children d99cd18cc912
files babel/messages/catalog.py babel/messages/frontend.py babel/messages/tests/catalog.py babel/util.py
diffstat 4 files changed, 63 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -15,6 +15,7 @@
 
 from cgi import parse_header
 from datetime import datetime
+from difflib import get_close_matches
 from email import message_from_string
 import re
 try:
@@ -471,7 +472,7 @@
         self[id] = Message(id, string, list(locations), flags, auto_comments,
                            user_comments)
 
-    def update(self, template):
+    def update(self, template, fuzzy_matching=True):
         """Update the catalog based on the given template catalog.
         
         >>> from babel.messages import Catalog
@@ -506,10 +507,10 @@
         [<Message 'head'>]
         
         :param template: the reference catalog, usually read from a POT file
+        :param fuzzy_matching: whether to use fuzzy matching of message IDs
         :return: a list of `Message` objects that the catalog contained before
                  the updated, but couldn't be found in the template
         """
-        rest = odict([(message.id, message) for message in self if message.id])
         messages = self._messages
         self._messages = odict()
 
@@ -521,15 +522,22 @@
                     message.string = oldmsg.string
                     message.flags |= oldmsg.flags
                     self[message.id] = message
-                    del rest[message.id]
+
                 else:
-                    for oldmsg in messages:
-                        # TODO: fuzzy matching
-                        pass
-                    else:
-                        self[message.id] = message
+                    if fuzzy_matching:
+                        # do some fuzzy matching with difflib
+                        matches = get_close_matches(key.lower().strip(),
+                            [self._key_for(msgid) for msgid in messages], 1)
+                        if matches:
+                            oldmsg = messages.pop(matches[0])
+                            message.string = oldmsg.string
+                            message.flags |= oldmsg.flags | set([u'fuzzy'])
+                            self[message.id] = message
+                        continue
 
-        return rest.values()
+                    self[message.id] = message
+
+        return messages.values()
 
     def _key_for(self, id):
         """The key for a message is just the singular ID even for pluralizable
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -414,7 +414,7 @@
     commands = {
         'compile': 'compile a message catalog to a MO file',
         'extract': 'extract messages from source files and generate a POT file',
-        'init': 'create new message catalogs from a template'
+        'init': 'create new message catalogs from a template',
     }
 
     def run(self, argv=sys.argv):
@@ -451,7 +451,7 @@
 
         :param argv: the command arguments
         """
-        parser = OptionParser(usage=self.usage % ('init',''),
+        parser = OptionParser(usage=self.usage % ('init', ''),
                               description=self.commands['init'])
         parser.add_option('--domain', '-D', dest='domain',
                           help="domain of MO and PO files (default '%default')")
@@ -633,10 +633,10 @@
 
     def init(self, argv):
         """Subcommand for creating new message catalogs from a template.
-
+        
         :param argv: the command arguments
         """
-        parser = OptionParser(usage=self.usage % ('init',''),
+        parser = OptionParser(usage=self.usage % ('init', ''),
                               description=self.commands['init'])
         parser.add_option('--domain', '-D', dest='domain',
                           help="domain of PO file (default '%default')")
@@ -693,6 +693,7 @@
         finally:
             outfile.close()
 
+
 def main():
     CommandLineInterface().run(sys.argv)
 
--- a/babel/messages/tests/catalog.py
+++ b/babel/messages/tests/catalog.py
@@ -56,6 +56,41 @@
                                       auto_comments=['Foo Bar comment 2'])        
         self.assertEqual(cat[u'foo'].auto_comments, ['Foo Bar comment 2'])
 
+    def test_update_fuzzy_matching_with_case_change(self):
+        cat = catalog.Catalog()
+        cat.add('foo', 'Voh')
+        cat.add('bar', 'Bahr')
+        tmpl = catalog.Catalog()
+        tmpl.add('Foo')
+        rest = cat.update(tmpl)
+        self.assertEqual(1, len(rest))
+        assert 'foo' not in cat
+
+        self.assertEqual('Voh', cat['Foo'].string)
+        self.assertEqual(True, cat['Foo'].fuzzy)
+
+    def test_update_fuzzy_matching_with_char_change(self):
+        cat = catalog.Catalog()
+        cat.add('fo', 'Voh')
+        cat.add('bar', 'Bahr')
+        tmpl = catalog.Catalog()
+        tmpl.add('foo')
+        rest = cat.update(tmpl)
+        self.assertEqual(1, len(rest))
+        assert 'fo' not in cat
+
+        self.assertEqual('Voh', cat['foo'].string)
+        self.assertEqual(True, cat['foo'].fuzzy)
+
+    def test_update_without_fuzzy_matching(self):
+        cat = catalog.Catalog()
+        cat.add('fo', 'Voh')
+        cat.add('bar', 'Bahr')
+        tmpl = catalog.Catalog()
+        tmpl.add('foo')
+        rest = cat.update(tmpl, fuzzy_matching=False)
+        self.assertEqual(2, len(rest))
+
 
 def suite():
     suite = unittest.TestSuite()
--- a/babel/util.py
+++ b/babel/util.py
@@ -156,6 +156,12 @@
     def keys(self):
         return self._keys[:]
 
+    def pop(self, key, default=None):
+        if key not in self:
+            return default
+        self._keys.remove(key)
+        return dict.pop(self, key)
+
     def setdefault(self, key, failobj = None):
         dict.setdefault(self, key, failobj)
         if key not in self._keys:
Copyright (C) 2012-2017 Edgewall Software