# HG changeset patch # User cmlenz # Date 1180712160 0 # Node ID 93eaa2f4a0a2116d057746cc1110809553960425 # Parent ee33990f6e8364860bdd2be79039a35d47380ae0 Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks). diff --git a/babel/catalog/frontend.py b/babel/catalog/frontend.py --- a/babel/catalog/frontend.py +++ b/babel/catalog/frontend.py @@ -62,10 +62,10 @@ ('output-file=', 'o', 'name of the output file'), ('width=', 'w', - 'set output line width. Default: 76'), + 'set output line width (default 76)'), ('no-wrap', None, - 'do not break long message lines, longer than the output ' - 'line width, into several lines.') + 'do not break long message lines, longer than the output line width, ' + 'into several lines') ] boolean_options = [ 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap' @@ -73,36 +73,36 @@ def initialize_options(self): self.charset = 'utf-8' + self.width = 76 + self.no_wrap = False self.keywords = self._keywords = DEFAULT_KEYWORDS.copy() self.no_default_keywords = False self.no_location = False self.omit_header = False self.output_file = None self.input_dirs = None - self.width = None - self.no_wrap = False def finalize_options(self): if not self.input_dirs: self.input_dirs = dict.fromkeys([k.split('.',1)[0] for k in self.distribution.packages ]).keys() + if self.no_default_keywords and not self.keywords: - raise DistutilsOptionError, \ - 'you must specify new keywords if you disable the default ones' + raise DistutilsOptionError('you must specify new keywords if you ' + 'disable the default ones') if self.no_default_keywords: self._keywords = {} if isinstance(self.keywords, basestring): self._keywords.update(parse_keywords(self.keywords.split())) self.keywords = self._keywords + if self.no_wrap and self.width: - raise DistutilsOptionError, \ - "'--no-wrap' and '--width' are mutually exclusive." - elif self.no_wrap and not self.width: - self.width = 0 - elif not self.no_wrap and not self.width: - self.width = 76 - elif self.width and not self.no_wrap: + raise DistutilsOptionError("'--no-wrap' and '--width' are mutually" + "exclusive") + if self.no_wrap: + self.width = None + else: self.width = int(self.width) def run(self): @@ -115,11 +115,12 @@ for filename, lineno, funcname, message in extracted: messages.append((os.path.join(dirname, filename), lineno, funcname, message, None)) + + log.info('writing PO file to %s' % self.output_file) write_po(outfile, messages, project=self.distribution.get_name(), - version=self.distribution.get_version(), + version=self.distribution.get_version(), width=self.width, charset=self.charset, no_location=self.no_location, - omit_header=self.omit_header, width=self.width) - log.info('writing PO file to %s' % self.output_file) + omit_header=self.omit_header) finally: outfile.close() @@ -154,11 +155,11 @@ parser.add_option('-o', '--output', dest='output', help='path to the output POT file') parser.add_option('-w', '--width', dest='width', type='int', - help="set output line width. Default: 76") + help="set output line width (default 76)") parser.add_option('--no-wrap', dest='no_wrap', default=False, action = 'store_true', help='do not break long message ' 'lines, longer than the output line width, into several ' - 'lines.') + 'lines') options, args = parser.parse_args(argv[1:]) if not args: parser.error('incorrect number of arguments') @@ -193,9 +194,9 @@ for filename, lineno, funcname, message in extracted: messages.append((os.path.join(dirname, filename), lineno, funcname, message, None)) - write_po(outfile, messages, + write_po(outfile, messages, width=options.width, charset=options.charset, no_location=options.no_location, - omit_header=options.omit_header, width=options.width) + omit_header=options.omit_header) finally: if options.output: outfile.close() diff --git a/babel/catalog/pofile.py b/babel/catalog/pofile.py --- a/babel/catalog/pofile.py +++ b/babel/catalog/pofile.py @@ -18,83 +18,19 @@ `_ """ -# TODO: line wrapping -from textwrap import wrap from datetime import date, datetime import re try: set except NameError: from sets import Set as set +import textwrap import time from babel import __version__ as VERSION __all__ = ['escape', 'normalize', 'read_po', 'write_po'] -POT_HEADER = """\ -# Translations Template for %%(project)s. -# Copyright (C) YEAR ORGANIZATION -# FIRST AUTHOR , YEAR. -# -msgid "" -msgstr "" -"Project-Id-Version: %%(project)s %%(version)s\\n" -"POT-Creation-Date: %%(creation_date)s\\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" -"Last-Translator: FULL NAME \\n" -"Language-Team: LANGUAGE \\n" -"MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=%%(charset)s\\n" -"Content-Transfer-Encoding: 8bit\\n" -"Generated-By: Babel %s\\n" - -""" % VERSION - -PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search - -def escape(string): - r"""Escape the given string so that it can be included in double-quoted - strings in ``PO`` files. - - >>> escape('''Say: - ... "hello, world!" - ... ''') - 'Say:\\n \\"hello, world!\\"\\n' - - :param string: the string to escape - :return: the escaped string - :rtype: `str` or `unicode` - """ - return string.replace('\\', '\\\\') \ - .replace('\t', '\\t') \ - .replace('\r', '\\r') \ - .replace('\n', '\\n') \ - .replace('\"', '\\"') - -def normalize(string, charset='utf-8'): - """This converts a string into a format that is appropriate for .po files, - namely much closer to C style. - - :param string: the string to normalize - :param charset: the encoding to use for `unicode` strings - :return: the normalized string - :rtype: `str` - """ - string = string.encode(charset, 'backslashreplace') - lines = string.split('\n') - if len(lines) == 1: - string = '"' + escape(string) + '"' - else: - if not lines[-1]: - del lines[-1] - lines[-1] = lines[-1] + '\n' - for i in range(len(lines)): - lines[i] = escape(lines[i]) - lineterm = '\\n"\n"' - string = '""\n"' + lineterm.join(lines) + '"' - return string - def read_po(fileobj): """Read messages from a ``gettext`` PO (portable object) file from the given file-like object. @@ -195,6 +131,114 @@ if messages: yield pack() +POT_HEADER = """\ +# Translations Template for %%(project)s. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: %%(project)s %%(version)s\\n" +"POT-Creation-Date: %%(creation_date)s\\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" +"Last-Translator: FULL NAME \\n" +"Language-Team: LANGUAGE \\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=%%(charset)s\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Generated-By: Babel %s\\n" + +""" % VERSION + +PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search + +WORD_SEP = re.compile('(' + r'\s+|' # any whitespace + r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash +')') + +def escape(string): + r"""Escape the given string so that it can be included in double-quoted + strings in ``PO`` files. + + >>> escape('''Say: + ... "hello, world!" + ... ''') + '"Say:\\n \\"hello, world!\\"\\n"' + + :param string: the string to escape + :return: the escaped string + :rtype: `str` or `unicode` + """ + return '"%s"' % string.replace('\\', '\\\\') \ + .replace('\t', '\\t') \ + .replace('\r', '\\r') \ + .replace('\n', '\\n') \ + .replace('\"', '\\"') + +def normalize(string, width=76): + r"""This converts a string into a format that is appropriate for .po files. + + >>> print normalize('''Say: + ... "hello, world!" + ... ''', width=None) + "" + "Say:\n" + " \"hello, world!\"\n" + + >>> print normalize('''Say: + ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " + ... ''', width=32) + "" + "Say:\n" + " \"Lorem ipsum dolor sit " + "amet, consectetur adipisicing" + " elit, \"\n" + + :param string: the string to normalize + :param width: the maximum line width; use `None`, 0, or a negative number + to completely disable line wrapping + :param charset: the encoding to use for `unicode` strings + :return: the normalized string + :rtype: `unicode` + """ + if width and width > 0: + lines = [] + for idx, line in enumerate(string.splitlines(True)): + if len(escape(line)) > width: + chunks = WORD_SEP.split(line) + chunks.reverse() + while chunks: + buf = [] + size = 2 + while chunks: + l = len(escape(chunks[-1])) - 2 + if size + l < width: + buf.append(chunks.pop()) + size += l + else: + if not buf: + # handle long chunks by putting them on a + # separate line + buf.append(chunks.pop()) + break + lines.append(u''.join(buf)) + else: + lines.append(line) + else: + lines = string.splitlines(True) + + if len(lines) == 1: + return escape(string) + + # Remove empty trailing line + if not lines[-1]: + del lines[-1] + lines[-1] += '\n' + + return u'""\n' + u'\n'.join([escape(l) for l in lines]) + def write_po(fileobj, messages, project='PROJECT', version='VERSION', width=76, charset='utf-8', no_location=False, omit_header=False): r"""Write a ``gettext`` PO (portable object) file to the given file-like @@ -230,16 +274,23 @@ :param messages: an iterable over the messages :param project: the project name :param version: the project version + :param width: the maximum line width for the generated output; use `None`, + 0, or a negative number to completely disable line wrapping :param charset: the encoding :param no_location: do not emit a location comment for every message :param omit_header: do not include the ``msgid ""`` entry at the top of the output """ def _normalize(key): - return normalize(key, charset=charset) + return normalize(key, width=width).encode(charset, 'backslashreplace') + + def _write(text): + if isinstance(text, unicode): + text = text.encode(charset) + fileobj.write(text) if not omit_header: - fileobj.write(POT_HEADER % { + _write(POT_HEADER % { 'project': project, 'version': version, 'creation_date': time.strftime('%Y-%m-%d %H:%M%z'), @@ -268,53 +319,22 @@ for msgid in msgids: if not no_location: - locs = [ - u' %s:%s' % (fname, lineno) for - fname, lineno in locations[msgid] - ] - if width > 0: - wrapped = wrap(u''.join(locs), width, break_long_words=False) - else: - wrapped = locs - for line in wrapped: - fileobj.write(u'#: %s\n' % line.strip()) + locs = u' '.join([u'%s:%d' % item for item in locations[msgid]]) + if width and width > 0: + locs = textwrap.wrap(locs, width, break_long_words=False) + for line in locs: + _write('#: %s\n' % line.strip()) flags = msgflags[msgid] if flags: - fileobj.write('#%s\n' % ', '.join([''] + list(flags))) + _write('#%s\n' % ', '.join([''] + list(flags))) + if type(msgid) is tuple: assert len(msgid) == 2 - if width > 0: - wrapped = wrap(msgid[0], width, break_long_words=False) - else: - wrapped = [msgid[0]] - if len(wrapped) == 1: - fileobj.write('msgid ') - else: - fileobj.write('msgid ""\n') - for line in wrapped: - fileobj.write('%s\n' % normalize(line, charset)) - if width > 0: - wrapped = wrap(msgid[1], width, break_long_words=False) - else: - wrapped = [msgid[1]] - if len(wrapped) == 1: - fileobj.write('msgid_plural ') - else: - fileobj.write('msgid_plural ""\n') - for line in wrapped: - fileobj.write('%s\n' % normalize(line, charset)) - fileobj.write('msgstr[0] ""\n') - fileobj.write('msgstr[1] ""\n') + _write('msgid %s\n' % _normalize(msgid[0])) + _write('msgid_plural %s\n' % _normalize(msgid[1])) + _write('msgstr[0] ""\n') + _write('msgstr[1] ""\n') else: - if width > 0: - wrapped = wrap(msgid, width, break_long_words=False) - else: - wrapped = [msgid] - if len(wrapped) == 1: - fileobj.write('msgid ') - else: - fileobj.write('msgid ""\n') - for line in wrapped: - fileobj.write('%s\n' % normalize(line, charset)) - fileobj.write('msgstr ""\n') - fileobj.write('\n') + _write('msgid %s\n' % _normalize(msgid)) + _write('msgstr ""\n') + _write('\n') diff --git a/babel/catalog/tests/pofile.py b/babel/catalog/tests/pofile.py --- a/babel/catalog/tests/pofile.py +++ b/babel/catalog/tests/pofile.py @@ -12,12 +12,13 @@ # history and logs, available at http://babel.edgewall.org/log/. import doctest +from StringIO import StringIO import unittest from babel.catalog import pofile -class PythonFormatFlagUnitTest(unittest.TestCase): +class PythonFormatFlagTestCase(unittest.TestCase): def test_without_name(self): assert pofile.PYTHON_FORMAT('foo %d bar') @@ -25,10 +26,59 @@ assert pofile.PYTHON_FORMAT('foo %r bar') +class WritePoTestCase(unittest.TestCase): + + def test_join_locations(self): + buf = StringIO() + pofile.write_po(buf, [ + ('main.py', 1, None, u'foo', None), + ('utils.py', 3, None, u'foo', None), + ], omit_header=True) + self.assertEqual('''#: main.py:1 utils.py:3 +msgid "foo" +msgstr ""''', buf.getvalue().strip()) + + def test_wrap_long_lines(self): + text = """Here's some text where +white space and line breaks matter, and should + +not be removed + +""" + buf = StringIO() + pofile.write_po(buf, [ + ('main.py', 1, None, text, None), + ], no_location=True, omit_header=True, width=42) + self.assertEqual(r'''msgid "" +"Here's some text where \n" +"white space and line breaks matter, and" +" should\n" +"\n" +"not be removed\n" +"\n" +msgstr ""''', buf.getvalue().strip()) + + def test_wrap_long_lines_with_long_word(self): + text = """Here's some text that +includesareallylongwordthatmightbutshouldnt throw us into an infinite loop +""" + buf = StringIO() + pofile.write_po(buf, [ + ('main.py', 1, None, text, None), + ], no_location=True, omit_header=True, width=32) + self.assertEqual(r'''msgid "" +"Here's some text that\n" +"includesareallylongwordthatmightbutshouldnt" +" throw us into an infinite " +"loop\n" +msgstr ""''', buf.getvalue().strip()) + + def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(pofile)) - suite.addTest(unittest.makeSuite(PythonFormatFlagUnitTest)) + suite.addTest(unittest.makeSuite(PythonFormatFlagTestCase)) + suite.addTest(unittest.makeSuite(WritePoTestCase)) return suite if __name__ == '__main__': diff --git a/doc/style/epydoc.css b/doc/style/epydoc.css --- a/doc/style/epydoc.css +++ b/doc/style/epydoc.css @@ -54,7 +54,9 @@ table.summary th th, table.summary td td { border: none; } table.summary td.summary table td { color: #666; font-size: 90%; } table.summary td.summary table br { display: none; } -table.summary td.summary span.summary-type { font-size: 90%; } +table.summary td.summary span.summary-type { font-family: monospace; + font-size: 90%; +} table.summary td.summary span.summary-type code { font-size: 110%; } p.indent-wrapped-lines { color: #999; font-size: 85%; margin: 0; padding: 0 0 0 7em; text-indent: -7em;