comparison babel/messages/catalog.py @ 314:5c0bda4f20b1

Fix catalog updating with fuzzy matches. Closes #82.
author cmlenz
date Fri, 01 Feb 2008 13:38:09 +0000
parents 62d4f85d33ea
children 654b632e5482
comparison
equal deleted inserted replaced
313:2d039dbe2f52 314:5c0bda4f20b1
229 header_comment = property(_get_header_comment, _set_header_comment, doc="""\ 229 header_comment = property(_get_header_comment, _set_header_comment, doc="""\
230 The header comment for the catalog. 230 The header comment for the catalog.
231 231
232 >>> catalog = Catalog(project='Foobar', version='1.0', 232 >>> catalog = Catalog(project='Foobar', version='1.0',
233 ... copyright_holder='Foo Company') 233 ... copyright_holder='Foo Company')
234 >>> print catalog.header_comment 234 >>> print catalog.header_comment #doctest: +ELLIPSIS
235 # Translations template for Foobar. 235 # Translations template for Foobar.
236 # Copyright (C) 2007 Foo Company 236 # Copyright (C) ... Foo Company
237 # This file is distributed under the same license as the Foobar project. 237 # This file is distributed under the same license as the Foobar project.
238 # FIRST AUTHOR <EMAIL@ADDRESS>, 2007. 238 # FIRST AUTHOR <EMAIL@ADDRESS>, ....
239 # 239 #
240 240
241 The header can also be set from a string. Any known upper-case variables 241 The header can also be set from a string. Any known upper-case variables
242 will be replaced when the header is retrieved again: 242 will be replaced when the header is retrieved again:
243 243
609 609
610 :param template: the reference catalog, usually read from a POT file 610 :param template: the reference catalog, usually read from a POT file
611 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs 611 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
612 """ 612 """
613 messages = self._messages 613 messages = self._messages
614 remaining = messages.copy()
614 self._messages = odict() 615 self._messages = odict()
616
617 # Prepare for fuzzy matching
618 fuzzy_candidates = []
619 if not no_fuzzy_matching:
620 fuzzy_candidates = [
621 self._key_for(msgid) for msgid in messages
622 if msgid and messages[msgid].string
623 ]
624 fuzzy_matches = set()
615 625
616 def _merge(message, oldkey, newkey): 626 def _merge(message, oldkey, newkey):
617 fuzzy = False 627 fuzzy = False
618 oldmsg = messages.pop(oldkey)
619 if oldkey != newkey: 628 if oldkey != newkey:
620 fuzzy = True 629 fuzzy = True
630 fuzzy_matches.add(oldkey)
631 oldmsg = messages.get(oldkey)
621 if isinstance(oldmsg.id, basestring): 632 if isinstance(oldmsg.id, basestring):
622 message.previous_id = [oldmsg.id] 633 message.previous_id = [oldmsg.id]
623 else: 634 else:
624 message.previous_id = list(oldmsg.id) 635 message.previous_id = list(oldmsg.id)
636 else:
637 oldmsg = remaining.pop(oldkey)
625 message.string = oldmsg.string 638 message.string = oldmsg.string
626 if isinstance(message.id, (list, tuple)): 639 if isinstance(message.id, (list, tuple)):
627 if not isinstance(message.string, (list, tuple)): 640 if not isinstance(message.string, (list, tuple)):
628 fuzzy = True 641 fuzzy = True
629 message.string = tuple( 642 message.string = tuple(
647 _merge(message, key, key) 660 _merge(message, key, key)
648 else: 661 else:
649 if no_fuzzy_matching is False: 662 if no_fuzzy_matching is False:
650 # do some fuzzy matching with difflib 663 # do some fuzzy matching with difflib
651 matches = get_close_matches(key.lower().strip(), 664 matches = get_close_matches(key.lower().strip(),
652 [self._key_for(msgid) for msgid in messages], 1) 665 fuzzy_candidates, 1)
653 if matches: 666 if matches:
654 _merge(message, matches[0], key) 667 _merge(message, matches[0], key)
655 continue 668 continue
656 669
657 self[message.id] = message 670 self[message.id] = message
658 671
659 self.obsolete = messages 672 self.obsolete = odict()
673 for msgid in remaining:
674 if no_fuzzy_matching or msgid not in fuzzy_matches:
675 self.obsolete[msgid] = remaining[msgid]
660 676
661 def _key_for(self, id): 677 def _key_for(self, id):
662 """The key for a message is just the singular ID even for pluralizable 678 """The key for a message is just the singular ID even for pluralizable
663 messages. 679 messages.
664 """ 680 """
Copyright (C) 2012-2017 Edgewall Software