# HG changeset patch # User cmlenz # Date 1181814540 0 # Node ID 8ea225f33f28e309f66d59689279e33215272666 # Parent 4b42e23644e5e6062813067e2eb256fdd506fbeb Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`. diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -14,6 +14,7 @@ """Data structures for message catalogs.""" from datetime import datetime +from email import message_from_string import re try: set @@ -24,7 +25,7 @@ from babel import __version__ as VERSION from babel.core import Locale from babel.messages.plurals import PLURALS -from babel.util import odict, LOCALTZ, UTC +from babel.util import odict, LOCALTZ, UTC, FixedOffsetTimezone __all__ = ['Message', 'Catalog'] __docformat__ = 'restructuredtext en' @@ -45,7 +46,8 @@ ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filenname, lineno)`` tuples :param flags: a set or sequence of flags - :param comments: a sequence of translator comments for the message + :param auto_comments: a sequence of automatic comments for the message + :param user_comments: a sequence of user comments for the message """ self.id = id if not string and self.pluralizable: @@ -149,7 +151,10 @@ self.version = version or 'VERSION' #: the project version self.copyright_holder = copyright_holder or 'ORGANIZATION' self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' - self.last_translator = last_translator #: last translator name + email + + self.last_translator = last_translator or 'FULL NAME ' + """Name and email address of the last translator.""" + self.charset = charset or 'utf-8' if creation_date is None: @@ -186,11 +191,11 @@ # This file is distributed under the same license as the Foobar project. # FIRST AUTHOR , 2007. # - + :type: `unicode` """) - def mime_headers(self): + def _get_mime_headers(self): headers = [] headers.append(('Project-Id-Version', '%s %s' % (self.project, self.version))) @@ -213,7 +218,28 @@ headers.append(('Content-Transfer-Encoding', '8bit')) headers.append(('Generated-By', 'Babel %s\n' % VERSION)) return headers - mime_headers = property(mime_headers, doc="""\ + + def _set_mime_headers(self, headers): + for name, value in headers: + name = name.lower() + if name == 'project-id-version': + parts = value.split(' ') + self.project = ' '.join(parts[:-1]) + self.version = parts[-1] + elif name == 'report-msgid-bugs-to': + self.msgid_bugs_address = value + elif name == 'last-translator': + self.last_translator = value + elif name == 'pot-creation-date': + # FIXME: this should use dates.parse_datetime as soon as that + # is ready + value, tzoffset, _ = re.split('[+-](\d{4})$', value, 1) + tt = time.strptime(value, '%Y-%m-%d %H:%M') + ts = time.mktime(tt) + tzoffset = FixedOffsetTimezone(int(tzoffset)) + self.creation_date = datetime.fromtimestamp(ts, tzoffset) + + mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ The MIME headers of the catalog, used for the special ``msgid ""`` entry. The behavior of this property changes slightly depending on whether a locale @@ -380,6 +406,10 @@ current.user_comments.extend(message.user_comments) current.flags |= message.flags message = current + elif id == '': + # special treatment for the header message + headers = message_from_string(message.string.encode(self.charset)) + self.mime_headers = headers.items() else: if isinstance(id, (list, tuple)): assert isinstance(message.string, (list, tuple)) @@ -403,7 +433,8 @@ ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filenname, lineno)`` tuples :param flags: a set or sequence of flags - :param comments: a list of translator comments + :param auto_comments: a sequence of automatic comments + :param user_comments: a sequence of user comments """ self[id] = Message(id, string, list(locations), flags, auto_comments, user_comments) diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -37,12 +37,7 @@ file-like object and return a `Catalog`. >>> from StringIO import StringIO - >>> buf = StringIO('''# Translations template for PROJECT. - ... # Copyright (C) YEAR COPYRIGHT HOLDER - ... # This file is distributed under the same license as the PROJECT project. - ... # FIRST AUTHOR , YEAR. - ... # - ... + >>> buf = StringIO(''' ... #: main.py:1 ... #, fuzzy, python-format ... msgid "foo %(name)s" @@ -59,12 +54,6 @@ >>> catalog = read_po(buf) >>> catalog.revision_date = datetime(2007, 04, 01) - >>> print catalog.header_comment - # Translations template for PROJECT. - # Copyright (C) 2007 ORGANIZATION - # This file is distributed under the same license as the PROJECT project. - # FIRST AUTHOR , 2007. - >>> for message in catalog: ... if message.id: ... print (message.id, message.string) @@ -90,19 +79,17 @@ user_comments = [] auto_comments = [] in_msgid = in_msgstr = False - in_header = True - header_lines = [] def _add_message(): translations.sort() if len(messages) > 1: - msgid = tuple(messages) + msgid = tuple([denormalize(m) for m in messages]) else: - msgid = messages[0] + msgid = denormalize(messages[0]) if len(translations) > 1: - string = tuple([t[1] for t in translations]) + string = tuple([denormalize(t[1]) for t in translations]) else: - string = translations[0][1] + string = denormalize(translations[0][1]) catalog.add(msgid, string, list(locations), set(flags), list(user_comments), list(auto_comments)) del messages[:]; del translations[:]; del locations[:]; @@ -111,59 +98,53 @@ for line in fileobj.readlines(): line = line.strip() if line.startswith('#'): - if in_header and line[1:].startswith(' '): - header_lines.append(line) - else: - in_header = in_msgid = in_msgstr = False - if messages: - _add_message() - if line[1:].startswith(':'): - for location in line[2:].lstrip().split(): - filename, lineno = location.split(':', 1) - locations.append((filename, int(lineno))) - elif line[1:].startswith(','): - for flag in line[2:].lstrip().split(','): - flags.append(flag.strip()) - elif line[1:].startswith('.'): - # These are called auto-comments - comment = line[2:].strip() - if comment: - # Just check that we're not adding empty comments - auto_comments.append(comment) - elif line[1:].startswith(' '): - # These are called user comments - comment = line[1:].strip() - if comment: - # Just check that we're not adding empty comments - user_comments.append(comment) + in_msgid = in_msgstr = False + if messages: + _add_message() + if line[1:].startswith(':'): + for location in line[2:].lstrip().split(): + filename, lineno = location.split(':', 1) + locations.append((filename, int(lineno))) + elif line[1:].startswith(','): + for flag in line[2:].lstrip().split(','): + flags.append(flag.strip()) + elif line[1:].startswith('.'): + # These are called auto-comments + comment = line[2:].strip() + if comment: + # Just check that we're not adding empty comments + auto_comments.append(comment) + elif line[1:].startswith(' '): + # These are called user comments + comment = line[1:].strip() + if comment: + # Just check that we're not adding empty comments + user_comments.append(comment) else: - in_header = False if line.startswith('msgid_plural'): in_msgid = True msg = line[12:].lstrip() - messages.append(msg[1:-1]) + messages.append(msg) elif line.startswith('msgid'): in_msgid = True if messages: _add_message() - msg = line[5:].lstrip() - messages.append(msg[1:-1]) + messages.append(line[5:].lstrip()) elif line.startswith('msgstr'): in_msgid = False in_msgstr = True msg = line[6:].lstrip() if msg.startswith('['): idx, msg = msg[1:].split(']') - translations.append([int(idx), msg.lstrip()[1:-1]]) + translations.append([int(idx), msg.lstrip()]) else: - translations.append([0, msg[1:-1]]) + translations.append([0, msg]) elif line.startswith('"'): if in_msgid: - messages[-1] += line.rstrip()[1:-1] + messages[-1] += u'\n' + line.rstrip() elif in_msgstr: - translations[-1][1] += line.rstrip()[1:-1] + translations[-1][1] += u'\n' + line.rstrip() - catalog.header_comment = '\n'.join(header_lines) if messages: _add_message() return catalog @@ -193,8 +174,26 @@ .replace('\n', '\\n') \ .replace('\"', '\\"') +def unescape(string): + r"""Reverse escape the given string. + + >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') + Say: + "hello, world!" + + + :param string: the string to unescape + :return: the unescaped string + :rtype: `str` or `unicode` + """ + return string[1:-1].replace('\\\\', '\\') \ + .replace('\\t', '\t') \ + .replace('\\r', '\r') \ + .replace('\\n', '\n') \ + .replace('\\"', '\"') + def normalize(string, width=76): - r"""This converts a string into a format that is appropriate for .po files. + r"""Convert a string into a format that is appropriate for .po files. >>> print normalize('''Say: ... "hello, world!" @@ -253,6 +252,37 @@ lines[-1] += '\n' return u'""\n' + u'\n'.join([escape(l) for l in lines]) +def denormalize(string): + r"""Reverse the normalization done by the `normalize` function. + + >>> print denormalize(r'''"" + ... "Say:\n" + ... " \"hello, world!\"\n"''') + Say: + "hello, world!" + + + >>> print denormalize(r'''"" + ... "Say:\n" + ... " \"Lorem ipsum dolor sit " + ... "amet, consectetur adipisicing" + ... " elit, \"\n"''') + Say: + "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " + + + :param string: the string to denormalize + :return: the denormalized string + :rtype: `unicode` or `str` + """ + if string.startswith('""'): + lines = [] + for line in string.splitlines()[1:]: + lines.append(unescape(line)) + return ''.join(lines) + else: + return unescape(string) + def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, sort_output=False, sort_by_file=False): r"""Write a ``gettext`` PO (portable object) template file for a given diff --git a/babel/messages/tests/pofile.py b/babel/messages/tests/pofile.py --- a/babel/messages/tests/pofile.py +++ b/babel/messages/tests/pofile.py @@ -20,6 +20,23 @@ from babel.messages import pofile +class ReadPoTestCase(unittest.TestCase): + + def test_read_multiline(self): + buf = StringIO(r'''msgid "" +"Here's some text that\n" +"includesareallylongwordthatmightbutshouldnt" +" throw us into an infinite " +"loop\n" +msgstr ""''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = list(catalog)[1] + self.assertEqual("Here's some text that\nincludesareallylongwordthat" + "mightbutshouldnt throw us into an infinite loop\n", + message.id) + + class WritePoTestCase(unittest.TestCase): def test_join_locations(self): @@ -110,6 +127,7 @@ def suite(): suite = unittest.TestSuite() suite.addTest(doctest.DocTestSuite(pofile)) + suite.addTest(unittest.makeSuite(ReadPoTestCase)) suite.addTest(unittest.makeSuite(WritePoTestCase)) return suite diff --git a/babel/util.py b/babel/util.py --- a/babel/util.py +++ b/babel/util.py @@ -142,30 +142,38 @@ rel_list = [os.path.pardir] * (len(start_list) - i) + path_list[i:] return os.path.join(*rel_list) +ZERO = timedelta(0) + + +class FixedOffsetTimezone(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name=None): + self._offset = timedelta(minutes=offset) + if name is None: + name = 'Etc/GMT+%d' % offset + self.zone = name + + def __str__(self): + return self.zone + + def __repr__(self): + return '' % (self.zone, self._offset) + + def utcoffset(self, dt): + return self._offset + + def tzname(self, dt): + return self.zone + + def dst(self, dt): + return ZERO + + try: from pytz import UTC except ImportError: - ZERO = timedelta(0) - - class UTC(tzinfo): - """Simple `tzinfo` implementation for UTC.""" - - def __repr__(self): - return '' - - def __str__(self): - return 'UTC' - - def utcoffset(self, dt): - return ZERO - - def tzname(self, dt): - return 'UTC' - - def dst(self, dt): - return ZERO - - UTC = UTC() + UTC = FixedOffsetTimezone(0, 'UTC') """`tzinfo` object for UTC (Universal Time). :type: `tzinfo` @@ -179,6 +187,7 @@ DSTDIFF = DSTOFFSET - STDOFFSET + class LocalTimezone(tzinfo): def utcoffset(self, dt): @@ -204,6 +213,7 @@ tt = time.localtime(stamp) return tt.tm_isdst > 0 + LOCALTZ = LocalTimezone() """`tzinfo` object for local time-zone.