changeset 202:d3c272492053

Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
author palgarvio
date Tue, 03 Jul 2007 17:55:34 +0000
parents 10e8d072e2d1
children 3476d17c9909
files babel/messages/catalog.py babel/messages/frontend.py babel/messages/pofile.py babel/messages/tests/catalog.py
diffstat 4 files changed, 116 insertions(+), 76 deletions(-) [+]
line wrap: on
line diff
--- a/babel/messages/catalog.py
+++ b/babel/messages/catalog.py
@@ -40,9 +40,9 @@
     """Representation of a single message in a catalog."""
 
     def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
-                 user_comments=()):
+                 user_comments=(), old_msgid=()):
         """Create the message object.
-        
+
         :param id: the message ID, or a ``(singular, plural)`` tuple for
                    pluralizable messages
         :param string: the translated message string, or a
@@ -51,6 +51,8 @@
         :param flags: a set or sequence of flags
         :param auto_comments: a sequence of automatic comments for the message
         :param user_comments: a sequence of user comments for the message
+        :param old_message: the old message ID, or a ``(singular, plural)``
+                            tuple for old pluralizable messages
         """
         self.id = id #: The message ID
         if not string and self.pluralizable:
@@ -64,6 +66,10 @@
             self.flags.discard('python-format')
         self.auto_comments = list(auto_comments)
         self.user_comments = list(user_comments)
+        if isinstance(old_msgid, basestring):
+            self.old_msgid = [old_msgid]
+        else:
+            self.old_msgid = list(old_msgid)
 
     def __repr__(self):
         return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
@@ -73,7 +79,7 @@
         return 'fuzzy' in self.flags
     fuzzy = property(fuzzy, doc="""\
         Whether the translation is fuzzy.
-        
+
         >>> Message('foo').fuzzy
         False
         >>> msg = Message('foo', 'foo', flags=['fuzzy'])
@@ -81,7 +87,7 @@
         True
         >>> msg
         <Message 'foo' (flags: ['fuzzy'])>
-        
+
         :type:  `bool`
         """)
 
@@ -89,12 +95,12 @@
         return isinstance(self.id, (list, tuple))
     pluralizable = property(pluralizable, doc="""\
         Whether the message is plurizable.
-        
+
         >>> Message('foo').pluralizable
         False
         >>> Message(('foo', 'bar')).pluralizable
         True
-        
+
         :type:  `bool`
         """)
 
@@ -105,12 +111,12 @@
         return bool(filter(None, [PYTHON_FORMAT(id) for id in ids]))
     python_format = property(python_format, doc="""\
         Whether the message contains Python-style parameters.
-        
+
         >>> Message('foo %(name)s bar').python_format
         True
         >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
         True
-        
+
         :type:  `bool`
         """)
 
@@ -132,7 +138,7 @@
                  revision_date=None, last_translator=None, charset='utf-8',
                  fuzzy=True):
         """Initialize the catalog object.
-        
+
         :param locale: the locale identifier or `Locale` object, or `None`
                        if the catalog is not bound to a locale (which basically
                        means it's a template)
@@ -197,7 +203,7 @@
 
     header_comment = property(_get_header_comment, _set_header_comment, doc="""\
     The header comment for the catalog.
-    
+
     >>> catalog = Catalog(project='Foobar', version='1.0',
     ...                   copyright_holder='Foo Company')
     >>> print catalog.header_comment
@@ -206,10 +212,10 @@
     # This file is distributed under the same license as the Foobar project.
     # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
     #
-    
+
     The header can also be set from a string. Any known upper-case variables
     will be replaced when the header is retrieved again:
-    
+
     >>> catalog = Catalog(project='Foobar', version='1.0',
     ...                   copyright_holder='Foo Company')
     >>> catalog.header_comment = '''\\
@@ -282,13 +288,13 @@
 
     mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
     The MIME headers of the catalog, used for the special ``msgid ""`` entry.
-    
+
     The behavior of this property changes slightly depending on whether a locale
     is set or not, the latter indicating that the catalog is actually a template
     for actual translations.
-    
+
     Here's an example of the output for such a catalog template:
-    
+
     >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
     >>> catalog = Catalog(project='Foobar', version='1.0',
     ...                   creation_date=created)
@@ -304,9 +310,9 @@
     Content-Type: text/plain; charset=utf-8
     Content-Transfer-Encoding: 8bit
     Generated-By: Babel ...
-    
+
     And here's an example of the output when the locale is set:
-    
+
     >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
     >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
     ...                   creation_date=created, revision_date=revised,
@@ -324,7 +330,7 @@
     Content-Type: text/plain; charset=utf-8
     Content-Transfer-Encoding: 8bit
     Generated-By: Babel ...
-    
+
     :type: `list`
     """)
 
@@ -338,12 +344,12 @@
         return num
     num_plurals = property(num_plurals, doc="""\
     The number of plurals used by the locale.
-    
+
     >>> Catalog(locale='en').num_plurals
     2
     >>> Catalog(locale='cs_CZ').num_plurals
     3
-    
+
     :type: `int`
     """)
 
@@ -357,12 +363,12 @@
         return 'nplurals=%s; plural=%s' % (num, expr)
     plural_forms = property(plural_forms, doc="""\
     Return the plural forms declaration for the locale.
-    
+
     >>> Catalog(locale='en').plural_forms
     'nplurals=2; plural=(n != 1)'
     >>> Catalog(locale='pt_BR').plural_forms
     'nplurals=2; plural=(n > 1)'
-    
+
     :type: `str`
     """)
 
@@ -372,7 +378,7 @@
 
     def __len__(self):
         """The number of messages in the catalog.
-        
+
         This does not include the special ``msgid ""`` entry.
         """
         return len(self._messages)
@@ -380,7 +386,7 @@
     def __iter__(self):
         """Iterates through all the entries in the catalog, in the order they
         were added, yielding a `Message` object for every entry.
-        
+
         :rtype: ``iterator``
         """
         buf = []
@@ -407,7 +413,7 @@
 
     def __getitem__(self, id):
         """Return the message with the specified ID.
-        
+
         :param id: the message ID
         :return: the message with the specified ID, or `None` if no such message
                  is in the catalog
@@ -417,15 +423,15 @@
 
     def __setitem__(self, id, message):
         """Add or update the message with the specified ID.
-        
+
         >>> catalog = Catalog()
         >>> catalog[u'foo'] = Message(u'foo')
         >>> catalog[u'foo']
         <Message u'foo' (flags: [])>
-        
+
         If a message with that ID is already in the catalog, it is updated
         to include the locations and flags of the new message.
-        
+
         >>> catalog = Catalog()
         >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
         >>> catalog[u'foo'].locations
@@ -433,7 +439,7 @@
         >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
         >>> catalog[u'foo'].locations
         [('main.py', 1), ('utils.py', 5)]
-        
+
         :param id: the message ID
         :param message: the `Message` object
         """
@@ -463,17 +469,17 @@
             self._messages[key] = message
 
     def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
-            user_comments=()):
+            user_comments=(), old_message=()):
         """Add or update the message with the specified ID.
-        
+
         >>> catalog = Catalog()
         >>> catalog.add(u'foo')
         >>> catalog[u'foo']
         <Message u'foo' (flags: [])>
-        
+
         This method simply constructs a `Message` object with the given
         arguments and invokes `__setitem__` with that object.
-        
+
         :param id: the message ID, or a ``(singular, plural)`` tuple for
                    pluralizable messages
         :param string: the translated message string, or a
@@ -484,11 +490,12 @@
         :param user_comments: a sequence of user comments
         """
         self[id] = Message(id, string, list(locations), flags, auto_comments,
-                           user_comments)
+                           user_comments, old_message)
 
-    def update(self, template, fuzzy_matching=True):
+    def update(self, template, no_fuzzy_matching=False,
+               include_old_msgid=False):
         """Update the catalog based on the given template catalog.
-        
+
         >>> from babel.messages import Catalog
         >>> template = Catalog()
         >>> template.add('green', locations=[('main.py', 99)])
@@ -499,39 +506,41 @@
         >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
         >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
         ...             locations=[('util.py', 38)])
-        
+
         >>> catalog.update(template)
         >>> len(catalog)
         3
-        
+
         >>> msg1 = catalog['green']
         >>> msg1.string
         >>> msg1.locations
         [('main.py', 99)]
-        
+
         >>> msg2 = catalog['blue']
         >>> msg2.string
         u'blau'
         >>> msg2.locations
         [('main.py', 100)]
-        
+
         >>> msg3 = catalog['salad']
         >>> msg3.string
         (u'Salat', u'Salate')
         >>> msg3.locations
         [('util.py', 42)]
-        
+
         Messages that are in the catalog but not in the template are removed
         from the main collection, but can still be accessed via the `obsolete`
         member:
-        
+
         >>> 'head' in catalog
         False
         >>> catalog.obsolete.values()
         [<Message 'head' (flags: [])>]
-        
+
         :param template: the reference catalog, usually read from a POT file
-        :param fuzzy_matching: whether to use fuzzy matching of message IDs
+        :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
+        :param include_old_msgid: include the old msgid as a comment when
+                                  updating the catalog
         """
         messages = self._messages
         self._messages = odict()
@@ -546,7 +555,7 @@
                     self[message.id] = message
 
                 else:
-                    if fuzzy_matching:
+                    if no_fuzzy_matching is False:
                         # do some fuzzy matching with difflib
                         matches = get_close_matches(key.lower().strip(),
                             [self._key_for(msgid) for msgid in messages], 1)
@@ -554,6 +563,11 @@
                             oldmsg = messages.pop(matches[0])
                             message.string = oldmsg.string
                             message.flags |= oldmsg.flags | set([u'fuzzy'])
+                            if include_old_msgid:
+                                if isinstance(oldmsg.id, basestring):
+                                    message.old_msgid = [oldmsg.id]
+                                else:
+                                    message.old_msgid = list(oldmsg.id)
                             self[message.id] = message
                             continue
 
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -455,9 +455,13 @@
         ('locale=', 'l',
          'locale of the catalog to compile'),
         ('ignore-obsolete=', None,
-         'whether to omit obsolete messages from the output')
+         'whether to omit obsolete messages from the output'),
+        ('no-fuzzy-matching', 'N',
+         'do not use fuzzy matching'),
+        ('previous', None,
+         'keep previous msgids of translated messages')
     ]
-    boolean_options = ['ignore_obsolete']
+    boolean_options = ['ignore_obsolete', 'no_fuzzy_matching', 'previous']
 
     def initialize_options(self):
         self.domain = 'messages'
@@ -466,6 +470,8 @@
         self.output_file = None
         self.locale = None
         self.ignore_obsolete = False
+        self.no_fuzzy_matching = False
+        self.previous = False
 
     def finalize_options(self):
         if not self.input_file:
@@ -475,6 +481,8 @@
                                        'directory')
         if self.output_file and not self.locale:
             raise DistutilsOptionError('you must specify the locale')
+        if self.no_fuzzy_matching and self.previous:
+            self.previous = False
 
     def run(self):
         po_files = []
@@ -513,16 +521,17 @@
             finally:
                 infile.close()
 
-            catalog.update(template)
+            catalog.update(template, self.no_fuzzy_matching, self.previous)
 
             tmpname = os.path.join(os.path.dirname(filename),
-                                   tempfile.gettempprefix() + 
+                                   tempfile.gettempprefix() +
                                    os.path.basename(filename))
             tmpfile = open(tmpname, 'w')
             try:
                 try:
                     write_po(tmpfile, catalog,
-                             ignore_obsolete=self.ignore_obsolete)
+                             ignore_obsolete=self.ignore_obsolete,
+                             include_old_msgid=self.previous)
                 finally:
                     tmpfile.close()
             except:
@@ -890,8 +899,15 @@
                           action='store_true',
                           help='do not include obsolete messages in the output '
                                '(default %default)'),
+        parser.add_option('--no-fuzzy-matching', '-N', dest='no_fuzzy_matching',
+                          action='store_true',
+                          help='do not use fuzzy matching (default %default)'),
+        parser.add_option('--previous', dest='previous', action='store_true',
+                          help='keep previous msgids of translated messages '
+                               '(default %default)'),
 
-        parser.set_defaults(domain='messages', ignore_obsolete=False)
+        parser.set_defaults(domain='messages', ignore_obsolete=False,
+                            no_fuzzy_matching=False, previous=False)
         options, args = parser.parse_args(argv)
 
         if not options.input_file:
@@ -900,6 +916,8 @@
             parser.error('you must specify the output file or directory')
         if options.output_file and not options.locale:
             parser.error('you must specify the loicale')
+        if options.no_fuzzy_matching and options.previous:
+            options.previous = False
 
         po_files = []
         if not options.output_file:
@@ -937,18 +955,18 @@
             finally:
                 infile.close()
 
-            catalog.update(template)
-
-            catalog.update(template)
+            catalog.update(template, options.no_fuzzy_matching,
+                           options.previous)
 
             tmpname = os.path.join(os.path.dirname(filename),
-                                   tempfile.gettempprefix() + 
+                                   tempfile.gettempprefix() +
                                    os.path.basename(filename))
             tmpfile = open(tmpname, 'w')
             try:
                 try:
                     write_po(tmpfile, catalog,
-                             ignore_obsolete=options.ignore_obsolete)
+                             ignore_obsolete=options.ignore_obsolete,
+                             include_old_msgid=options.previous)
                 finally:
                     tmpfile.close()
             except:
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -36,12 +36,12 @@
 
 def unescape(string):
     r"""Reverse `escape` the given string.
-    
+
     >>> print unescape('"Say:\\n  \\"hello, world!\\"\\n"')
     Say:
       "hello, world!"
     <BLANKLINE>
-    
+
     :param string: the string to unescape
     :return: the unescaped string
     :rtype: `str` or `unicode`
@@ -54,14 +54,14 @@
 
 def denormalize(string):
     r"""Reverse the normalization done by the `normalize` function.
-    
+
     >>> print denormalize(r'''""
     ... "Say:\n"
     ... "  \"hello, world!\"\n"''')
     Say:
       "hello, world!"
     <BLANKLINE>
-    
+
     >>> print denormalize(r'''""
     ... "Say:\n"
     ... "  \"Lorem ipsum dolor sit "
@@ -70,7 +70,7 @@
     Say:
       "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
     <BLANKLINE>
-    
+
     :param string: the string to denormalize
     :return: the denormalized string
     :rtype: `unicode` or `str`
@@ -86,7 +86,7 @@
 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
     """Read messages from a ``gettext`` PO (portable object) file from the given
     file-like object and return a `Catalog`.
-    
+
     >>> from StringIO import StringIO
     >>> buf = StringIO('''
     ... #: main.py:1
@@ -104,7 +104,7 @@
     ... ''')
     >>> catalog = read_po(buf)
     >>> catalog.revision_date = datetime(2007, 04, 01)
-    
+
     >>> for message in catalog:
     ...     if message.id:
     ...         print (message.id, message.string)
@@ -116,7 +116,7 @@
     ((u'bar', u'baz'), ('', ''))
       ([(u'main.py', 3)], set([]))
       ([u'A user comment'], [u'An auto comment'])
-    
+
     :param fileobj: the file-like object to read the PO file from
     :param locale: the locale identifier or `Locale` object, or `None`
                    if the catalog is not bound to a locale (which basically
@@ -235,12 +235,12 @@
 def escape(string):
     r"""Escape the given string so that it can be included in double-quoted
     strings in ``PO`` files.
-    
+
     >>> escape('''Say:
     ...   "hello, world!"
     ... ''')
     '"Say:\\n  \\"hello, world!\\"\\n"'
-    
+
     :param string: the string to escape
     :return: the escaped string
     :rtype: `str` or `unicode`
@@ -253,14 +253,14 @@
 
 def normalize(string, prefix='', width=76):
     r"""Convert a string into a format that is appropriate for .po files.
-    
+
     >>> print normalize('''Say:
     ...   "hello, world!"
     ... ''', width=None)
     ""
     "Say:\n"
     "  \"hello, world!\"\n"
-    
+
     >>> print normalize('''Say:
     ...   "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
     ... ''', width=32)
@@ -269,7 +269,7 @@
     "  \"Lorem ipsum dolor sit "
     "amet, consectetur adipisicing"
     " elit, \"\n"
-    
+
     :param string: the string to normalize
     :param prefix: a string that should be prepended to every line
     :param width: the maximum line width; use `None`, 0, or a negative number
@@ -314,10 +314,11 @@
     return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
 
 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
-             sort_output=False, sort_by_file=False, ignore_obsolete=False):
+             sort_output=False, sort_by_file=False, ignore_obsolete=False,
+             include_old_msgid=False):
     r"""Write a ``gettext`` PO (portable object) template file for a given
     message catalog to the provided file-like object.
-    
+
     >>> catalog = Catalog()
     >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
     ...             flags=('fuzzy',))
@@ -338,7 +339,7 @@
     msgstr[1] ""
     <BLANKLINE>
     <BLANKLINE>
-    
+
     :param fileobj: the file-like object to write to
     :param catalog: the `Catalog` instance
     :param width: the maximum line width for the generated output; use `None`,
@@ -350,6 +351,8 @@
     :sort_by_file: whether to sort the messages in the output by their locations
     :ignore_obsolete: whether to ignore obsolete messages and not include them
                       in the output; by default they are included as comments
+    :param include_old_msgid: include the old msgid as a comment when
+                              updating the catalog
     """
     def _normalize(key, prefix=''):
         return normalize(key, prefix=prefix, width=width) \
@@ -414,6 +417,11 @@
         if message.flags:
             _write('#%s\n' % ', '.join([''] + list(message.flags)))
 
+        if message.old_msgid and include_old_msgid:
+            _write_comment(message.old_msgid[0], prefix='| msgid')
+            if len(message.old_msgid) > 1:
+                _write_comment(message.old_msgid[1], prefix='| msgid_plural')
+
         _write_message(message)
         _write('\n')
 
--- a/babel/messages/tests/catalog.py
+++ b/babel/messages/tests/catalog.py
@@ -22,11 +22,11 @@
     def test_python_format(self):
         assert catalog.PYTHON_FORMAT('foo %d bar')
         assert catalog.PYTHON_FORMAT('foo %s bar')
-        assert catalog.PYTHON_FORMAT('foo %r bar')        
+        assert catalog.PYTHON_FORMAT('foo %r bar')
 
     def test_translator_comments(self):
         mess = catalog.Message('foo', user_comments=['Comment About `foo`'])
-        self.assertEqual(mess.user_comments, ['Comment About `foo`'])        
+        self.assertEqual(mess.user_comments, ['Comment About `foo`'])
         mess = catalog.Message('foo',
                                auto_comments=['Comment 1 About `foo`',
                                          'Comment 2 About `foo`'])
@@ -53,7 +53,7 @@
         self.assertEqual(cat[u'foo'].user_comments, ['Foo Bar comment 1'])
         # now add yet another location with another comment
         cat[u'foo'] = catalog.Message('foo', locations=[('main.py', 9)],
-                                      auto_comments=['Foo Bar comment 2'])        
+                                      auto_comments=['Foo Bar comment 2'])
         self.assertEqual(cat[u'foo'].auto_comments, ['Foo Bar comment 2'])
 
     def test_update_fuzzy_matching_with_case_change(self):
@@ -88,7 +88,7 @@
         cat.add('bar', 'Bahr')
         tmpl = catalog.Catalog()
         tmpl.add('foo')
-        cat.update(tmpl, fuzzy_matching=False)
+        cat.update(tmpl, no_fuzzy_matching=True)
         self.assertEqual(2, len(cat.obsolete))
 
 
Copyright (C) 2012-2017 Edgewall Software