# HG changeset patch # User aronacher # Date 1213287865 0 # Node ID 292c639506a3b910c8274154f59d5262bed6e721 # Parent 6811369cb912e7c29a8422cb07a55f7ea3b390e0 Stripping of comment tags is optional now. If enabled it will strip the tags from all lines of a comment now. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -2,7 +2,9 @@ http://svn.edgewall.org/repos/babel/tags/0.9.3/ (?, from branches/stable/0.9.x) -* Fixed invalid message extraction methods causing an UnboundLocalError. + * Fixed invalid message extraction methods causing an UnboundLocalError. + * The stripping of the comment tags in comments is optional now and + is done for each line in a comment. Version 0.9.2 diff --git a/babel/messages/extract.py b/babel/messages/extract.py --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -30,6 +30,7 @@ from tokenize import generate_tokens, COMMENT, NAME, OP, STRING from babel.util import parse_encoding, pathmatch, relpath +from textwrap import dedent __all__ = ['extract', 'extract_from_dir', 'extract_from_file'] __docformat__ = 'restructuredtext en' @@ -53,9 +54,21 @@ '%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") ' 'returns the header entry with meta information, not the empty string.') + +def _strip_comment_tags(comments, tags): + """Helper function for `extract` that strips comment tags from strings + in a list of comment lines. This functions operates in-place. + """ + def _strip(line): + for tag in tags: + if line.startswith(tag): + return line[len(tag):].strip() + return line + comments[:] = map(_strip, comments) + def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING, options_map=None, keywords=DEFAULT_KEYWORDS, - comment_tags=(), callback=None): + comment_tags=(), callback=None, strip_comment_tags=False): """Extract messages from any source files found in the given directory. This function generates tuples of the form: @@ -118,6 +131,8 @@ performed; the function is passed the filename, the name of the extraction method and and the options dictionary as positional arguments, in that order + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: an iterator over ``(filename, lineno, funcname, message)`` tuples :rtype: ``iterator`` :see: `pathmatch` @@ -147,15 +162,17 @@ if callback: callback(filename, method, options) for lineno, message, comments in \ - extract_from_file(method, filepath, - keywords=keywords, - comment_tags=comment_tags, - options=options): + extract_from_file(method, filepath, + keywords=keywords, + comment_tags=comment_tags, + options=options, + strip_comment_tags= + strip_comment_tags): yield filename, lineno, message, comments break def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, - comment_tags=(), options=None): + comment_tags=(), options=None, strip_comment_tags=False): """Extract messages from a specific file. This function returns a list of tuples of the form: @@ -170,18 +187,21 @@ localizable strings :param comment_tags: a list of translator tags to search for and include in the results + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :param options: a dictionary of additional options (optional) :return: the list of extracted messages :rtype: `list` """ fileobj = open(filename, 'U') try: - return list(extract(method, fileobj, keywords, comment_tags, options)) + return list(extract(method, fileobj, keywords, comment_tags, options, + strip_comment_tags)) finally: fileobj.close() def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), - options=None): + options=None, strip_comment_tags=False): """Extract messages from the given file-like object using the specified extraction method. @@ -216,6 +236,8 @@ :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) + :param strip_comment_tags: a flag that if set to `True` causes all comment + tags to be removed from the collected comments. :return: the list of extracted messages :rtype: `list` :raise ValueError: if the extraction method is not registered @@ -291,6 +313,10 @@ messages = tuple(msgs) if len(messages) == 1: messages = messages[0] + + if strip_comment_tags: + _strip_comment_tags(comments, comment_tags) + yield lineno, messages, comments def extract_nothing(fileobj, keywords, comment_tags, options): @@ -318,6 +344,7 @@ messages = [] translator_comments = [] in_def = in_translator_comments = False + comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') @@ -344,8 +371,6 @@ if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending - # XXX: Should we check if the programmer keeps adding the - # comment_tag for every comment line??? probably not! translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line @@ -353,8 +378,7 @@ for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True - comment = value[len(comment_tag):].strip() - translator_comments.append((lineno, comment)) + translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -223,12 +223,14 @@ ('add-comments=', 'c', 'place comment block with TAG (or those preceding keyword lines) in ' 'output file. Seperate multiple TAGs with commas(,)'), + ('strip-comments', None, + 'strip the comment TAGs from the comments.'), ('input-dirs=', None, 'directories that should be scanned for messages'), ] boolean_options = [ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap', - 'sort-output', 'sort-by-file' + 'sort-output', 'sort-by-file', 'strip-comments' ] def initialize_options(self): @@ -249,6 +251,7 @@ self.copyright_holder = None self.add_comments = None self._add_comments = [] + self.strip_comments = False def finalize_options(self): if self.no_default_keywords and not self.keywords: @@ -305,7 +308,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords=self._keywords, comment_tags=self._add_comments, - callback=callback) + callback=callback, + strip_comment_tags= + self.strip_comments) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)], @@ -816,12 +821,15 @@ help='place comment block with TAG (or those ' 'preceding keyword lines) in output file. One ' 'TAG per argument call') + parser.add_option('--strip-comment-tags', '-s', + dest='strip_comment_tags', action='store_true', + help='Strip the comment tags from the comments.') parser.set_defaults(charset='utf-8', keywords=[], no_default_keywords=False, no_location=False, omit_header = False, width=76, no_wrap=False, sort_output=False, sort_by_file=False, - comment_tags=[]) + comment_tags=[], strip_comment_tags=False) options, args = parser.parse_args(argv) if not args: parser.error('incorrect number of arguments') @@ -883,7 +891,9 @@ extracted = extract_from_dir(dirname, method_map, options_map, keywords, options.comment_tags, - callback=callback) + callback=callback, + strip_comment_tags= + options.strip_comment_tags) for filename, lineno, message, comments in extracted: filepath = os.path.normpath(os.path.join(dirname, filename)) catalog.add(message, None, [(filepath, lineno)], diff --git a/babel/messages/tests/extract.py b/babel/messages/tests/extract.py --- a/babel/messages/tests/extract.py +++ b/babel/messages/tests/extract.py @@ -145,7 +145,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment'], messages[0][3]) + self.assertEqual([u'NOTE: A translation comment'], messages[0][3]) def test_comment_tag_multiline(self): buf = StringIO(""" @@ -155,7 +155,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment', u'with a second line'], + self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3]) def test_translator_comments_with_previous_non_translator_comments(self): @@ -168,7 +168,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'A translation comment', u'with a second line'], + self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3]) def test_comment_tags_not_on_start_of_comment(self): @@ -181,7 +181,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'This one will be'], messages[0][3]) + self.assertEqual([u'NOTE: This one will be'], messages[0][3]) def test_multiple_comment_tags(self): buf = StringIO(""" @@ -195,10 +195,10 @@ messages = list(extract.extract_python(buf, ('_',), ['NOTE1:', 'NOTE2:'], {})) self.assertEqual(u'Foo Bar1', messages[0][2]) - self.assertEqual([u'A translation comment for tag1', + self.assertEqual([u'NOTE1: A translation comment for tag1', u'with a second line'], messages[0][3]) self.assertEqual(u'Foo Bar2', messages[1][2]) - self.assertEqual([u'A translation comment for tag2'], messages[1][3]) + self.assertEqual([u'NOTE2: A translation comment for tag2'], messages[1][3]) def test_two_succeeding_comments(self): buf = StringIO(""" @@ -208,7 +208,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) - self.assertEqual([u'one', u'NOTE: two'], messages[0][3]) + self.assertEqual([u'NOTE: one', u'NOTE: two'], messages[0][3]) def test_invalid_translator_comments(self): buf = StringIO(""" @@ -234,7 +234,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Hi there!', messages[0][2]) - self.assertEqual([u'Hi!'], messages[0][3]) + self.assertEqual([u'NOTE: Hi!'], messages[0][3]) self.assertEqual(u'Hello', messages[1][2]) self.assertEqual([], messages[1][3]) @@ -274,7 +274,7 @@ messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {'encoding': 'utf-8'})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_message_with_magic_comment(self): buf = StringIO("""# -*- coding: utf-8 -*- @@ -283,7 +283,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_message_with_utf8_bom(self): buf = StringIO(codecs.BOM_UTF8 + """ @@ -292,7 +292,7 @@ """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) - self.assertEqual([u'hello'], messages[0][3]) + self.assertEqual([u'NOTE: hello'], messages[0][3]) def test_utf8_raw_strings_match_unicode_strings(self): buf = StringIO(codecs.BOM_UTF8 + """ @@ -303,6 +303,24 @@ self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual(messages[0][2], messages[1][2]) + def test_extract_strip_comment_tags(self): + buf = StringIO("""\ +#: This is a comment with a very simple +#: prefix specified +_('Servus') + +# NOTE: This is a multiline comment with +# a prefix too +_('Babatschi')""") + messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'], + strip_comment_tags=True)) + self.assertEqual(u'Servus', messages[0][1]) + self.assertEqual([u'This is a comment with a very simple', + u'prefix specified'], messages[0][2]) + self.assertEqual(u'Babatschi', messages[1][1]) + self.assertEqual([u'This is a multiline comment with', + u'a prefix too'], messages[1][2]) + class ExtractTestCase(unittest.TestCase): def test_invalid_filter(self): diff --git a/babel/messages/tests/frontend.py b/babel/messages/tests/frontend.py --- a/babel/messages/tests/frontend.py +++ b/babel/messages/tests/frontend.py @@ -129,7 +129,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar" @@ -187,7 +187,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar" @@ -244,7 +244,7 @@ "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel %(version)s\n" -#. This will be a translator coment, +#. TRANSLATOR: This will be a translator coment, #. that will include several lines #: project/file1.py:8 msgid "bar"