changeset 24:b09e90803d1b trunk

Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
author cmlenz
date Fri, 01 Jun 2007 15:36:00 +0000
parents a05c25898be4
children 1b9956f20649
files babel/catalog/frontend.py babel/catalog/pofile.py babel/catalog/tests/pofile.py doc/style/epydoc.css
diffstat 4 files changed, 209 insertions(+), 136 deletions(-) [+]
line wrap: on
line diff
--- a/babel/catalog/frontend.py
+++ b/babel/catalog/frontend.py
@@ -62,10 +62,10 @@
         ('output-file=', 'o',
          'name of the output file'),
         ('width=', 'w',
-         'set output line width. Default: 76'),
+         'set output line width (default 76)'),
         ('no-wrap', None,
-         'do not break long message lines, longer than the output '
-         'line width, into several lines.')
+         'do not break long message lines, longer than the output line width, '
+         'into several lines')
     ]
     boolean_options = [
         'no-default-keywords', 'no-location', 'omit-header', 'no-wrap'
@@ -73,36 +73,36 @@
 
     def initialize_options(self):
         self.charset = 'utf-8'
+        self.width = 76
+        self.no_wrap = False
         self.keywords = self._keywords = DEFAULT_KEYWORDS.copy()
         self.no_default_keywords = False
         self.no_location = False
         self.omit_header = False
         self.output_file = None
         self.input_dirs = None
-        self.width = None
-        self.no_wrap = False
 
     def finalize_options(self):
         if not self.input_dirs:
             self.input_dirs = dict.fromkeys([k.split('.',1)[0]
                 for k in self.distribution.packages
             ]).keys()
+
         if self.no_default_keywords and not self.keywords:
-            raise DistutilsOptionError, \
-                'you must specify new keywords if you disable the default ones'
+            raise DistutilsOptionError('you must specify new keywords if you '
+                                       'disable the default ones')
         if self.no_default_keywords:
             self._keywords = {}
         if isinstance(self.keywords, basestring):
             self._keywords.update(parse_keywords(self.keywords.split()))
         self.keywords = self._keywords
+
         if self.no_wrap and self.width:
-            raise DistutilsOptionError, \
-                "'--no-wrap' and '--width' are mutually exclusive."
-        elif self.no_wrap and not self.width:
-            self.width = 0
-        elif not self.no_wrap and not self.width:
-            self.width = 76
-        elif self.width and not self.no_wrap:
+            raise DistutilsOptionError("'--no-wrap' and '--width' are mutually"
+                                       "exclusive")
+        if self.no_wrap:
+            self.width = None
+        else:
             self.width = int(self.width)
 
     def run(self):
@@ -115,11 +115,12 @@
                 for filename, lineno, funcname, message in extracted:
                     messages.append((os.path.join(dirname, filename), lineno,
                                      funcname, message, None))
+
+            log.info('writing PO file to %s' % self.output_file)
             write_po(outfile, messages, project=self.distribution.get_name(),
-                     version=self.distribution.get_version(),
+                     version=self.distribution.get_version(), width=self.width,
                      charset=self.charset, no_location=self.no_location,
-                     omit_header=self.omit_header, width=self.width)
-            log.info('writing PO file to %s' % self.output_file)
+                     omit_header=self.omit_header)
         finally:
             outfile.close()
 
@@ -154,11 +155,11 @@
     parser.add_option('-o', '--output', dest='output',
                       help='path to the output POT file')
     parser.add_option('-w', '--width', dest='width', type='int',
-                      help="set output line width. Default: 76")
+                      help="set output line width (default 76)")
     parser.add_option('--no-wrap', dest='no_wrap', default=False,
                       action = 'store_true', help='do not break long message '
                       'lines, longer than the output line width, into several '
-                      'lines.')
+                      'lines')
     options, args = parser.parse_args(argv[1:])
     if not args:
         parser.error('incorrect number of arguments')
@@ -193,9 +194,9 @@
             for filename, lineno, funcname, message in extracted:
                 messages.append((os.path.join(dirname, filename), lineno,
                                  funcname, message, None))
-        write_po(outfile, messages,
+        write_po(outfile, messages, width=options.width,
                  charset=options.charset, no_location=options.no_location,
-                 omit_header=options.omit_header, width=options.width)
+                 omit_header=options.omit_header)
     finally:
         if options.output:
             outfile.close()
--- a/babel/catalog/pofile.py
+++ b/babel/catalog/pofile.py
@@ -18,83 +18,19 @@
        <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
 """
 
-# TODO: line wrapping
-from textwrap import wrap
 from datetime import date, datetime
 import re
 try:
     set
 except NameError:
     from sets import Set as set
+import textwrap
 import time
 
 from babel import __version__ as VERSION
 
 __all__ = ['escape', 'normalize', 'read_po', 'write_po']
 
-POT_HEADER = """\
-# Translations Template for %%(project)s.
-# Copyright (C) YEAR ORGANIZATION
-# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: %%(project)s %%(version)s\\n"
-"POT-Creation-Date: %%(creation_date)s\\n"
-"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
-"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
-"Language-Team: LANGUAGE <LL@li.org>\\n"
-"MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=%%(charset)s\\n"
-"Content-Transfer-Encoding: 8bit\\n"
-"Generated-By: Babel %s\\n"
-
-""" % VERSION
-
-PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
-
-def escape(string):
-    r"""Escape the given string so that it can be included in double-quoted
-    strings in ``PO`` files.
-    
-    >>> escape('''Say:
-    ...   "hello, world!"
-    ... ''')
-    'Say:\\n  \\"hello, world!\\"\\n'
-    
-    :param string: the string to escape
-    :return: the escaped string
-    :rtype: `str` or `unicode`
-    """
-    return string.replace('\\', '\\\\') \
-                 .replace('\t', '\\t') \
-                 .replace('\r', '\\r') \
-                 .replace('\n', '\\n') \
-                 .replace('\"', '\\"')
-
-def normalize(string, charset='utf-8'):
-    """This converts a string into a format that is appropriate for .po files,
-    namely much closer to C style.
-    
-    :param string: the string to normalize
-    :param charset: the encoding to use for `unicode` strings
-    :return: the normalized string
-    :rtype: `str`
-    """
-    string = string.encode(charset, 'backslashreplace')
-    lines = string.split('\n')
-    if len(lines) == 1:
-        string = '"' + escape(string) + '"'
-    else:
-        if not lines[-1]:
-            del lines[-1]
-            lines[-1] = lines[-1] + '\n'
-        for i in range(len(lines)):
-            lines[i] = escape(lines[i])
-        lineterm = '\\n"\n"'
-        string = '""\n"' + lineterm.join(lines) + '"'
-    return string
-
 def read_po(fileobj):
     """Read messages from a ``gettext`` PO (portable object) file from the given
     file-like object.
@@ -195,6 +131,114 @@
     if messages:
         yield pack()
 
+POT_HEADER = """\
+# Translations Template for %%(project)s.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: %%(project)s %%(version)s\\n"
+"POT-Creation-Date: %%(creation_date)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=%%(charset)s\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+"Generated-By: Babel %s\\n"
+
+""" % VERSION
+
+PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
+
+WORD_SEP = re.compile('('
+    r'\s+|'                                 # any whitespace
+    r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+    r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)'   # em-dash
+')')
+
+def escape(string):
+    r"""Escape the given string so that it can be included in double-quoted
+    strings in ``PO`` files.
+    
+    >>> escape('''Say:
+    ...   "hello, world!"
+    ... ''')
+    '"Say:\\n  \\"hello, world!\\"\\n"'
+    
+    :param string: the string to escape
+    :return: the escaped string
+    :rtype: `str` or `unicode`
+    """
+    return '"%s"' % string.replace('\\', '\\\\') \
+                          .replace('\t', '\\t') \
+                          .replace('\r', '\\r') \
+                          .replace('\n', '\\n') \
+                          .replace('\"', '\\"')
+
+def normalize(string, width=76):
+    r"""This converts a string into a format that is appropriate for .po files.
+    
+    >>> print normalize('''Say:
+    ...   "hello, world!"
+    ... ''', width=None)
+    ""
+    "Say:\n"
+    "  \"hello, world!\"\n"
+    
+    >>> print normalize('''Say:
+    ...   "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
+    ... ''', width=32)
+    ""
+    "Say:\n"
+    "  \"Lorem ipsum dolor sit "
+    "amet, consectetur adipisicing"
+    " elit, \"\n"
+    
+    :param string: the string to normalize
+    :param width: the maximum line width; use `None`, 0, or a negative number
+                  to completely disable line wrapping
+    :param charset: the encoding to use for `unicode` strings
+    :return: the normalized string
+    :rtype: `unicode`
+    """
+    if width and width > 0:
+        lines = []
+        for idx, line in enumerate(string.splitlines(True)):
+            if len(escape(line)) > width:
+                chunks = WORD_SEP.split(line)
+                chunks.reverse()
+                while chunks:
+                    buf = []
+                    size = 2
+                    while chunks:
+                        l = len(escape(chunks[-1])) - 2
+                        if size + l < width:
+                            buf.append(chunks.pop())
+                            size += l
+                        else:
+                            if not buf:
+                                # handle long chunks by putting them on a
+                                # separate line
+                                buf.append(chunks.pop())
+                            break
+                    lines.append(u''.join(buf))
+            else:
+                lines.append(line)
+    else:
+        lines = string.splitlines(True)
+
+    if len(lines) == 1:
+        return escape(string)
+
+    # Remove empty trailing line
+    if not lines[-1]:
+        del lines[-1]
+        lines[-1] += '\n'
+
+    return u'""\n' + u'\n'.join([escape(l) for l in lines])
+
 def write_po(fileobj, messages, project='PROJECT', version='VERSION', width=76,
              charset='utf-8', no_location=False, omit_header=False):
     r"""Write a ``gettext`` PO (portable object) file to the given file-like
@@ -230,16 +274,23 @@
     :param messages: an iterable over the messages
     :param project: the project name
     :param version: the project version
+    :param width: the maximum line width for the generated output; use `None`,
+                  0, or a negative number to completely disable line wrapping
     :param charset: the encoding
     :param no_location: do not emit a location comment for every message
     :param omit_header: do not include the ``msgid ""`` entry at the top of the
                         output
     """
     def _normalize(key):
-        return normalize(key, charset=charset)
+        return normalize(key, width=width).encode(charset, 'backslashreplace')
+
+    def _write(text):
+        if isinstance(text, unicode):
+            text = text.encode(charset)
+        fileobj.write(text)
 
     if not omit_header:
-        fileobj.write(POT_HEADER % {
+        _write(POT_HEADER % {
             'project': project,
             'version': version,
             'creation_date': time.strftime('%Y-%m-%d %H:%M%z'),
@@ -268,53 +319,22 @@
 
     for msgid in msgids:
         if not no_location:
-            locs = [
-                u' %s:%s' % (fname, lineno) for
-                fname, lineno in locations[msgid]
-            ]
-            if width > 0:
-                wrapped = wrap(u''.join(locs), width, break_long_words=False)
-            else:
-                wrapped = locs
-            for line in wrapped:
-                fileobj.write(u'#: %s\n' % line.strip())
+            locs = u' '.join([u'%s:%d' % item for item in locations[msgid]])
+            if width and width > 0:
+                locs = textwrap.wrap(locs, width, break_long_words=False)
+            for line in locs:
+                _write('#: %s\n' % line.strip())
         flags = msgflags[msgid]
         if flags:
-            fileobj.write('#%s\n' % ', '.join([''] + list(flags)))
+            _write('#%s\n' % ', '.join([''] + list(flags)))
+
         if type(msgid) is tuple:
             assert len(msgid) == 2
-            if width > 0:
-                wrapped = wrap(msgid[0], width, break_long_words=False)
-            else:
-                wrapped = [msgid[0]]
-            if len(wrapped) == 1:
-                fileobj.write('msgid ')
-            else:
-                fileobj.write('msgid ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            if width > 0:
-                wrapped = wrap(msgid[1], width, break_long_words=False)
-            else:
-                wrapped = [msgid[1]]
-            if len(wrapped) == 1:
-                fileobj.write('msgid_plural ')
-            else:
-                fileobj.write('msgid_plural ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            fileobj.write('msgstr[0] ""\n')
-            fileobj.write('msgstr[1] ""\n')
+            _write('msgid %s\n' % _normalize(msgid[0]))
+            _write('msgid_plural %s\n' % _normalize(msgid[1]))
+            _write('msgstr[0] ""\n')
+            _write('msgstr[1] ""\n')
         else:
-            if width > 0:
-                wrapped = wrap(msgid, width, break_long_words=False)
-            else:
-                wrapped = [msgid]
-            if len(wrapped) == 1:
-                fileobj.write('msgid ')
-            else:
-                fileobj.write('msgid ""\n')
-            for line in wrapped:
-                fileobj.write('%s\n' % normalize(line, charset))
-            fileobj.write('msgstr ""\n')
-        fileobj.write('\n')
+            _write('msgid %s\n' % _normalize(msgid))
+            _write('msgstr ""\n')
+        _write('\n')
--- a/babel/catalog/tests/pofile.py
+++ b/babel/catalog/tests/pofile.py
@@ -12,12 +12,13 @@
 # history and logs, available at http://babel.edgewall.org/log/.
 
 import doctest
+from StringIO import StringIO
 import unittest
 
 from babel.catalog import pofile
 
 
-class PythonFormatFlagUnitTest(unittest.TestCase):
+class PythonFormatFlagTestCase(unittest.TestCase):
 
     def test_without_name(self):
         assert pofile.PYTHON_FORMAT('foo %d bar')
@@ -25,10 +26,59 @@
         assert pofile.PYTHON_FORMAT('foo %r bar')
 
 
+class WritePoTestCase(unittest.TestCase):
+
+    def test_join_locations(self):
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, u'foo', None),
+            ('utils.py', 3, None, u'foo', None),
+        ], omit_header=True)
+        self.assertEqual('''#: main.py:1 utils.py:3
+msgid "foo"
+msgstr ""''', buf.getvalue().strip())
+
+    def test_wrap_long_lines(self):
+        text = """Here's some text where       
+white space and line breaks matter, and should
+
+not be removed
+
+"""
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, text, None),
+        ], no_location=True, omit_header=True, width=42)
+        self.assertEqual(r'''msgid ""
+"Here's some text where       \n"
+"white space and line breaks matter, and"
+" should\n"
+"\n"
+"not be removed\n"
+"\n"
+msgstr ""''', buf.getvalue().strip())
+
+    def test_wrap_long_lines_with_long_word(self):
+        text = """Here's some text that
+includesareallylongwordthatmightbutshouldnt throw us into an infinite loop
+"""
+        buf = StringIO()
+        pofile.write_po(buf, [
+            ('main.py', 1, None, text, None),
+        ], no_location=True, omit_header=True, width=32)
+        self.assertEqual(r'''msgid ""
+"Here's some text that\n"
+"includesareallylongwordthatmightbutshouldnt"
+" throw us into an infinite "
+"loop\n"
+msgstr ""''', buf.getvalue().strip())
+
+
 def suite():
     suite = unittest.TestSuite()
     suite.addTest(doctest.DocTestSuite(pofile))
-    suite.addTest(unittest.makeSuite(PythonFormatFlagUnitTest))
+    suite.addTest(unittest.makeSuite(PythonFormatFlagTestCase))
+    suite.addTest(unittest.makeSuite(WritePoTestCase))
     return suite
 
 if __name__ == '__main__':
--- a/doc/style/epydoc.css
+++ b/doc/style/epydoc.css
@@ -54,7 +54,9 @@
 table.summary th th, table.summary td td { border: none; }
 table.summary td.summary table td { color: #666; font-size: 90%; }
 table.summary td.summary table br { display: none; }
-table.summary td.summary span.summary-type { font-size: 90%; }
+table.summary td.summary span.summary-type { font-family: monospace; 
+  font-size: 90%;
+}
 table.summary td.summary span.summary-type code { font-size: 110%; }
 p.indent-wrapped-lines { color: #999; font-size: 85%; margin: 0;
   padding: 0 0 0 7em; text-indent: -7em;
Copyright (C) 2012-2017 Edgewall Software