# HG changeset patch
# User aronacher
# Date 1213287865 0
# Node ID 292c639506a3b910c8274154f59d5262bed6e721
# Parent  6811369cb912e7c29a8422cb07a55f7ea3b390e0
Stripping of comment tags is optional now. If enabled it will strip the tags from all lines of a comment now.

diff --git a/ChangeLog b/ChangeLog
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,7 +2,9 @@
 http://svn.edgewall.org/repos/babel/tags/0.9.3/
 (?, from branches/stable/0.9.x)
 
-* Fixed invalid message extraction methods causing an UnboundLocalError.
+ * Fixed invalid message extraction methods causing an UnboundLocalError.
+ * The stripping of the comment tags in comments is optional now and
+   is done for each line in a comment.
 	
 
 Version 0.9.2
diff --git a/babel/messages/extract.py b/babel/messages/extract.py
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -30,6 +30,7 @@
 from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
 
 from babel.util import parse_encoding, pathmatch, relpath
+from textwrap import dedent
 
 __all__ = ['extract', 'extract_from_dir', 'extract_from_file']
 __docformat__ = 'restructuredtext en'
@@ -53,9 +54,21 @@
 '%s: warning: Empty msgid.  It is reserved by GNU gettext: gettext("") '
 'returns the header entry with meta information, not the empty string.')
 
+
+def _strip_comment_tags(comments, tags):
+    """Helper function for `extract` that strips comment tags from strings
+    in a list of comment lines.  This functions operates in-place.
+    """
+    def _strip(line):
+        for tag in tags:
+            if line.startswith(tag):
+                return line[len(tag):].strip()
+        return line
+    comments[:] = map(_strip, comments)
+
 def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
                      options_map=None, keywords=DEFAULT_KEYWORDS,
-                     comment_tags=(), callback=None):
+                     comment_tags=(), callback=None, strip_comment_tags=False):
     """Extract messages from any source files found in the given directory.
 
     This function generates tuples of the form:
@@ -118,6 +131,8 @@
                      performed; the function is passed the filename, the name
                      of the extraction method and and the options dictionary as
                      positional arguments, in that order
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
     :return: an iterator over ``(filename, lineno, funcname, message)`` tuples
     :rtype: ``iterator``
     :see: `pathmatch`
@@ -147,15 +162,17 @@
                     if callback:
                         callback(filename, method, options)
                     for lineno, message, comments in \
-                                  extract_from_file(method, filepath,
-                                                    keywords=keywords,
-                                                    comment_tags=comment_tags,
-                                                    options=options):
+                          extract_from_file(method, filepath,
+                                            keywords=keywords,
+                                            comment_tags=comment_tags,
+                                            options=options,
+                                            strip_comment_tags=
+                                                strip_comment_tags):
                         yield filename, lineno, message, comments
                     break
 
 def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
-                      comment_tags=(), options=None):
+                      comment_tags=(), options=None, strip_comment_tags=False):
     """Extract messages from a specific file.
 
     This function returns a list of tuples of the form:
@@ -170,18 +187,21 @@
                      localizable strings
     :param comment_tags: a list of translator tags to search for and include
                          in the results
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
     :param options: a dictionary of additional options (optional)
     :return: the list of extracted messages
     :rtype: `list`
     """
     fileobj = open(filename, 'U')
     try:
-        return list(extract(method, fileobj, keywords, comment_tags, options))
+        return list(extract(method, fileobj, keywords, comment_tags, options,
+                            strip_comment_tags))
     finally:
         fileobj.close()
 
 def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
-            options=None):
+            options=None, strip_comment_tags=False):
     """Extract messages from the given file-like object using the specified
     extraction method.
 
@@ -216,6 +236,8 @@
     :param comment_tags: a list of translator tags to search for and include
                          in the results
     :param options: a dictionary of additional options (optional)
+    :param strip_comment_tags: a flag that if set to `True` causes all comment
+                               tags to be removed from the collected comments.
     :return: the list of extracted messages
     :rtype: `list`
     :raise ValueError: if the extraction method is not registered
@@ -291,6 +313,10 @@
         messages = tuple(msgs)
         if len(messages) == 1:
             messages = messages[0]
+
+        if strip_comment_tags:
+            _strip_comment_tags(comments, comment_tags)
+
         yield lineno, messages, comments
 
 def extract_nothing(fileobj, keywords, comment_tags, options):
@@ -318,6 +344,7 @@
     messages = []
     translator_comments = []
     in_def = in_translator_comments = False
+    comment_tag = None
 
     encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
 
@@ -344,8 +371,6 @@
             if in_translator_comments and \
                     translator_comments[-1][0] == lineno - 1:
                 # We're already inside a translator comment, continue appending
-                # XXX: Should we check if the programmer keeps adding the
-                # comment_tag for every comment line??? probably not!
                 translator_comments.append((lineno, value))
                 continue
             # If execution reaches this point, let's see if comment line
@@ -353,8 +378,7 @@
             for comment_tag in comment_tags:
                 if value.startswith(comment_tag):
                     in_translator_comments = True
-                    comment = value[len(comment_tag):].strip()
-                    translator_comments.append((lineno, comment))
+                    translator_comments.append((lineno, value))
                     break
         elif funcname and call_stack == 0:
             if tok == OP and value == ')':
diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py
--- a/babel/messages/frontend.py
+++ b/babel/messages/frontend.py
@@ -223,12 +223,14 @@
         ('add-comments=', 'c',
          'place comment block with TAG (or those preceding keyword lines) in '
          'output file. Seperate multiple TAGs with commas(,)'),
+        ('strip-comments', None,
+         'strip the comment TAGs from the comments.'),
         ('input-dirs=', None,
          'directories that should be scanned for messages'),
     ]
     boolean_options = [
         'no-default-keywords', 'no-location', 'omit-header', 'no-wrap',
-        'sort-output', 'sort-by-file'
+        'sort-output', 'sort-by-file', 'strip-comments'
     ]
 
     def initialize_options(self):
@@ -249,6 +251,7 @@
         self.copyright_holder = None
         self.add_comments = None
         self._add_comments = []
+        self.strip_comments = False
 
     def finalize_options(self):
         if self.no_default_keywords and not self.keywords:
@@ -305,7 +308,9 @@
                 extracted = extract_from_dir(dirname, method_map, options_map,
                                              keywords=self._keywords,
                                              comment_tags=self._add_comments,
-                                             callback=callback)
+                                             callback=callback,
+                                             strip_comment_tags=
+                                                self.strip_comments)
                 for filename, lineno, message, comments in extracted:
                     filepath = os.path.normpath(os.path.join(dirname, filename))
                     catalog.add(message, None, [(filepath, lineno)],
@@ -816,12 +821,15 @@
                           help='place comment block with TAG (or those '
                                'preceding keyword lines) in output file. One '
                                'TAG per argument call')
+        parser.add_option('--strip-comment-tags', '-s',
+                          dest='strip_comment_tags', action='store_true',
+                          help='Strip the comment tags from the comments.')
 
         parser.set_defaults(charset='utf-8', keywords=[],
                             no_default_keywords=False, no_location=False,
                             omit_header = False, width=76, no_wrap=False,
                             sort_output=False, sort_by_file=False,
-                            comment_tags=[])
+                            comment_tags=[], strip_comment_tags=False)
         options, args = parser.parse_args(argv)
         if not args:
             parser.error('incorrect number of arguments')
@@ -883,7 +891,9 @@
 
                 extracted = extract_from_dir(dirname, method_map, options_map,
                                              keywords, options.comment_tags,
-                                             callback=callback)
+                                             callback=callback,
+                                             strip_comment_tags=
+                                                options.strip_comment_tags)
                 for filename, lineno, message, comments in extracted:
                     filepath = os.path.normpath(os.path.join(dirname, filename))
                     catalog.add(message, None, [(filepath, lineno)],
diff --git a/babel/messages/tests/extract.py b/babel/messages/tests/extract.py
--- a/babel/messages/tests/extract.py
+++ b/babel/messages/tests/extract.py
@@ -145,7 +145,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment'], messages[0][3])
+        self.assertEqual([u'NOTE: A translation comment'], messages[0][3])
 
     def test_comment_tag_multiline(self):
         buf = StringIO("""
@@ -155,7 +155,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment', u'with a second line'],
+        self.assertEqual([u'NOTE: A translation comment', u'with a second line'],
                          messages[0][3])
 
     def test_translator_comments_with_previous_non_translator_comments(self):
@@ -168,7 +168,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'A translation comment', u'with a second line'],
+        self.assertEqual([u'NOTE: A translation comment', u'with a second line'],
                          messages[0][3])
 
     def test_comment_tags_not_on_start_of_comment(self):
@@ -181,7 +181,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'This one will be'], messages[0][3])
+        self.assertEqual([u'NOTE: This one will be'], messages[0][3])
 
     def test_multiple_comment_tags(self):
         buf = StringIO("""
@@ -195,10 +195,10 @@
         messages = list(extract.extract_python(buf, ('_',),
                                                ['NOTE1:', 'NOTE2:'], {}))
         self.assertEqual(u'Foo Bar1', messages[0][2])
-        self.assertEqual([u'A translation comment for tag1',
+        self.assertEqual([u'NOTE1: A translation comment for tag1',
                           u'with a second line'], messages[0][3])
         self.assertEqual(u'Foo Bar2', messages[1][2])
-        self.assertEqual([u'A translation comment for tag2'], messages[1][3])
+        self.assertEqual([u'NOTE2: A translation comment for tag2'], messages[1][3])
 
     def test_two_succeeding_comments(self):
         buf = StringIO("""
@@ -208,7 +208,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Foo Bar', messages[0][2])
-        self.assertEqual([u'one', u'NOTE: two'], messages[0][3])
+        self.assertEqual([u'NOTE: one', u'NOTE: two'], messages[0][3])
 
     def test_invalid_translator_comments(self):
         buf = StringIO("""
@@ -234,7 +234,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Hi there!', messages[0][2])
-        self.assertEqual([u'Hi!'], messages[0][3])
+        self.assertEqual([u'NOTE: Hi!'], messages[0][3])
         self.assertEqual(u'Hello', messages[1][2])
         self.assertEqual([], messages[1][3])
 
@@ -274,7 +274,7 @@
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'],
                                                {'encoding': 'utf-8'}))
         self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
 
     def test_utf8_message_with_magic_comment(self):
         buf = StringIO("""# -*- coding: utf-8 -*-
@@ -283,7 +283,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
 
     def test_utf8_message_with_utf8_bom(self):
         buf = StringIO(codecs.BOM_UTF8 + """
@@ -292,7 +292,7 @@
 """)
         messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
         self.assertEqual(u'Bonjour à tous', messages[0][2])
-        self.assertEqual([u'hello'], messages[0][3])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
 
     def test_utf8_raw_strings_match_unicode_strings(self):
         buf = StringIO(codecs.BOM_UTF8 + """
@@ -303,6 +303,24 @@
         self.assertEqual(u'Bonjour à tous', messages[0][2])
         self.assertEqual(messages[0][2], messages[1][2])
 
+    def test_extract_strip_comment_tags(self):
+        buf = StringIO("""\
+#: This is a comment with a very simple
+#: prefix specified
+_('Servus')
+
+# NOTE: This is a multiline comment with
+# a prefix too
+_('Babatschi')""")
+        messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'],
+                                        strip_comment_tags=True))
+        self.assertEqual(u'Servus', messages[0][1])
+        self.assertEqual([u'This is a comment with a very simple',
+                          u'prefix specified'], messages[0][2])
+        self.assertEqual(u'Babatschi', messages[1][1])
+        self.assertEqual([u'This is a multiline comment with',
+                          u'a prefix too'], messages[1][2])
+
 class ExtractTestCase(unittest.TestCase):
 
     def test_invalid_filter(self):
diff --git a/babel/messages/tests/frontend.py b/babel/messages/tests/frontend.py
--- a/babel/messages/tests/frontend.py
+++ b/babel/messages/tests/frontend.py
@@ -129,7 +129,7 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel %(version)s\n"
 
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
 #. that will include several lines
 #: project/file1.py:8
 msgid "bar"
@@ -187,7 +187,7 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel %(version)s\n"
 
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
 #. that will include several lines
 #: project/file1.py:8
 msgid "bar"
@@ -244,7 +244,7 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel %(version)s\n"
 
-#. This will be a translator coment,
+#. TRANSLATOR: This will be a translator coment,
 #. that will include several lines
 #: project/file1.py:8
 msgid "bar"