cmlenz@3: # -*- coding: utf-8 -*- cmlenz@3: # jruigrok@532: # Copyright (C) 2007-2011 Edgewall Software cmlenz@3: # All rights reserved. cmlenz@3: # cmlenz@3: # This software is licensed as described in the file COPYING, which cmlenz@3: # you should have received as part of this distribution. The terms cmlenz@3: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@3: # cmlenz@3: # This software consists of voluntary contributions made by many cmlenz@3: # individuals. For the exact contribution history, see the revision cmlenz@3: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@3: cmlenz@3: """Reading and writing of files in the ``gettext`` PO (portable object) cmlenz@3: format. cmlenz@3: cmlenz@3: :see: `The Format of PO Files cmlenz@3: `_ cmlenz@3: """ cmlenz@3: fschwarz@533: from datetime import datetime cmlenz@136: import os cmlenz@3: import re cmlenz@3: cmlenz@201: from babel.messages.catalog import Catalog, Message fschwarz@533: from babel.util import wraptext cmlenz@3: cmlenz@180: __all__ = ['read_po', 'write_po'] cmlenz@163: __docformat__ = 'restructuredtext en' cmlenz@160: cmlenz@160: def unescape(string): cmlenz@160: r"""Reverse `escape` the given string. palgarvio@202: cmlenz@160: >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') cmlenz@160: Say: cmlenz@160: "hello, world!" cmlenz@160: palgarvio@202: cmlenz@160: :param string: the string to unescape cmlenz@160: :return: the unescaped string cmlenz@160: :rtype: `str` or `unicode` cmlenz@160: """ cmlenz@160: return string[1:-1].replace('\\\\', '\\') \ cmlenz@160: .replace('\\t', '\t') \ cmlenz@160: .replace('\\r', '\r') \ cmlenz@160: .replace('\\n', '\n') \ cmlenz@160: .replace('\\"', '\"') cmlenz@160: cmlenz@160: def denormalize(string): cmlenz@160: r"""Reverse the normalization done by the `normalize` function. palgarvio@202: cmlenz@160: >>> print denormalize(r'''"" cmlenz@160: ... "Say:\n" cmlenz@160: ... " \"hello, world!\"\n"''') cmlenz@160: Say: cmlenz@160: "hello, world!" cmlenz@160: palgarvio@202: cmlenz@160: >>> print denormalize(r'''"" cmlenz@160: ... "Say:\n" cmlenz@160: ... " \"Lorem ipsum dolor sit " cmlenz@160: ... "amet, consectetur adipisicing" cmlenz@160: ... " elit, \"\n"''') cmlenz@160: Say: cmlenz@160: "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " cmlenz@160: palgarvio@202: cmlenz@160: :param string: the string to denormalize cmlenz@160: :return: the denormalized string cmlenz@160: :rtype: `unicode` or `str` cmlenz@160: """ cmlenz@160: if string.startswith('""'): cmlenz@160: lines = [] cmlenz@160: for line in string.splitlines()[1:]: cmlenz@160: lines.append(unescape(line)) cmlenz@160: return ''.join(lines) cmlenz@160: else: cmlenz@160: return unescape(string) cmlenz@3: cmlenz@201: def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): cmlenz@8: """Read messages from a ``gettext`` PO (portable object) file from the given cmlenz@66: file-like object and return a `Catalog`. palgarvio@202: cmlenz@8: >>> from StringIO import StringIO cmlenz@108: >>> buf = StringIO(''' cmlenz@8: ... #: main.py:1 cmlenz@8: ... #, fuzzy, python-format cmlenz@8: ... msgid "foo %(name)s" cmlenz@8: ... msgstr "" palgarvio@23: ... palgarvio@96: ... # A user comment palgarvio@96: ... #. An auto comment cmlenz@8: ... #: main.py:3 cmlenz@8: ... msgid "bar" cmlenz@8: ... msgid_plural "baz" cmlenz@8: ... msgstr[0] "" cmlenz@8: ... msgstr[1] "" cmlenz@8: ... ''') cmlenz@66: >>> catalog = read_po(buf) cmlenz@106: >>> catalog.revision_date = datetime(2007, 04, 01) palgarvio@202: cmlenz@66: >>> for message in catalog: cmlenz@69: ... if message.id: cmlenz@69: ... print (message.id, message.string) palgarvio@107: ... print ' ', (message.locations, message.flags) palgarvio@107: ... print ' ', (message.user_comments, message.auto_comments) cmlenz@151: (u'foo %(name)s', '') cmlenz@151: ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) palgarvio@107: ([], []) cmlenz@151: ((u'bar', u'baz'), ('', '')) cmlenz@151: ([(u'main.py', 3)], set([])) cmlenz@151: ([u'A user comment'], [u'An auto comment']) palgarvio@202: cmlenz@3: :param fileobj: the file-like object to read the PO file from cmlenz@198: :param locale: the locale identifier or `Locale` object, or `None` cmlenz@198: if the catalog is not bound to a locale (which basically cmlenz@198: means it's a template) cmlenz@198: :param domain: the message domain cmlenz@229: :param ignore_obsolete: whether to ignore obsolete messages in the input cmlenz@336: :return: a catalog object representing the parsed PO file cmlenz@336: :rtype: `Catalog` cmlenz@3: """ cmlenz@198: catalog = Catalog(locale=locale, domain=domain) cmlenz@66: cmlenz@198: counter = [0] cmlenz@222: offset = [0] cmlenz@8: messages = [] cmlenz@8: translations = [] cmlenz@8: locations = [] cmlenz@8: flags = [] palgarvio@107: user_comments = [] palgarvio@107: auto_comments = [] cmlenz@201: obsolete = [False] cmlenz@337: context = [] cmlenz@201: in_msgid = [False] cmlenz@201: in_msgstr = [False] aronacher@344: in_msgctxt = [False] cmlenz@8: cmlenz@66: def _add_message(): cmlenz@8: translations.sort() cmlenz@66: if len(messages) > 1: cmlenz@108: msgid = tuple([denormalize(m) for m in messages]) cmlenz@66: else: cmlenz@108: msgid = denormalize(messages[0]) palgarvio@372: if isinstance(msgid, (list, tuple)): palgarvio@372: string = [] palgarvio@372: for idx in range(catalog.num_plurals): palgarvio@372: try: palgarvio@372: string.append(translations[idx]) palgarvio@372: except IndexError: palgarvio@372: string.append((idx, '')) palgarvio@372: string = tuple([denormalize(t[1]) for t in string]) cmlenz@66: else: cmlenz@108: string = denormalize(translations[0][1]) cmlenz@337: if context: cmlenz@337: msgctxt = denormalize('\n'.join(context)) cmlenz@337: else: cmlenz@337: msgctxt = None cmlenz@201: message = Message(msgid, string, list(locations), set(flags), cmlenz@337: auto_comments, user_comments, lineno=offset[0] + 1, cmlenz@337: context=msgctxt) cmlenz@201: if obsolete[0]: cmlenz@201: if not ignore_obsolete: cmlenz@201: catalog.obsolete[msgid] = message cmlenz@201: else: cmlenz@201: catalog[msgid] = message cmlenz@337: del messages[:]; del translations[:]; del context[:]; del locations[:]; cmlenz@337: del flags[:]; del auto_comments[:]; del user_comments[:]; cmlenz@201: obsolete[0] = False cmlenz@198: counter[0] += 1 cmlenz@8: cmlenz@222: def _process_message_line(lineno, line): cmlenz@201: if line.startswith('msgid_plural'): cmlenz@201: in_msgid[0] = True cmlenz@201: msg = line[12:].lstrip() cmlenz@201: messages.append(msg) cmlenz@201: elif line.startswith('msgid'): cmlenz@201: in_msgid[0] = True cmlenz@222: offset[0] = lineno cmlenz@201: txt = line[5:].lstrip() cmlenz@201: if messages: cmlenz@201: _add_message() cmlenz@201: messages.append(txt) cmlenz@201: elif line.startswith('msgstr'): cmlenz@201: in_msgid[0] = False cmlenz@201: in_msgstr[0] = True cmlenz@201: msg = line[6:].lstrip() cmlenz@201: if msg.startswith('['): jruigrok@443: idx, msg = msg[1:].split(']', 1) cmlenz@201: translations.append([int(idx), msg.lstrip()]) cmlenz@201: else: cmlenz@201: translations.append([0, msg]) cmlenz@337: elif line.startswith('msgctxt'): cmlenz@430: if messages: cmlenz@430: _add_message() cmlenz@337: in_msgid[0] = in_msgstr[0] = False cmlenz@337: context.append(line[7:].lstrip()) cmlenz@201: elif line.startswith('"'): cmlenz@201: if in_msgid[0]: cmlenz@201: messages[-1] += u'\n' + line.rstrip() cmlenz@201: elif in_msgstr[0]: cmlenz@201: translations[-1][1] += u'\n' + line.rstrip() cmlenz@337: elif in_msgctxt[0]: cmlenz@337: context.append(line.rstrip()) cmlenz@201: cmlenz@222: for lineno, line in enumerate(fileobj.readlines()): pjenvey@416: line = line.strip() pjenvey@416: if not isinstance(line, unicode): pjenvey@416: line = line.decode(catalog.charset) cmlenz@3: if line.startswith('#'): cmlenz@201: in_msgid[0] = in_msgstr[0] = False cmlenz@201: if messages and translations: cmlenz@108: _add_message() cmlenz@108: if line[1:].startswith(':'): cmlenz@108: for location in line[2:].lstrip().split(): aronacher@358: pos = location.rfind(':') aronacher@358: if pos >= 0: aronacher@358: try: aronacher@358: lineno = int(location[pos + 1:]) aronacher@358: except ValueError: aronacher@358: continue aronacher@358: locations.append((location[:pos], lineno)) cmlenz@108: elif line[1:].startswith(','): cmlenz@108: for flag in line[2:].lstrip().split(','): cmlenz@108: flags.append(flag.strip()) cmlenz@201: elif line[1:].startswith('~'): cmlenz@201: obsolete[0] = True cmlenz@222: _process_message_line(lineno, line[2:].lstrip()) cmlenz@108: elif line[1:].startswith('.'): cmlenz@108: # These are called auto-comments cmlenz@108: comment = line[2:].strip() cmlenz@201: if comment: # Just check that we're not adding empty comments cmlenz@108: auto_comments.append(comment) cmlenz@122: else: cmlenz@108: # These are called user comments cmlenz@122: user_comments.append(line[1:].strip()) cmlenz@106: else: cmlenz@222: _process_message_line(lineno, line) cmlenz@8: cmlenz@8: if messages: cmlenz@66: _add_message() cmlenz@198: cmlenz@198: # No actual messages found, but there was some info in comments, from which cmlenz@198: # we'll construct an empty header message cmlenz@198: elif not counter[0] and (flags or user_comments or auto_comments): cmlenz@198: messages.append(u'') cmlenz@198: translations.append([0, u'']) cmlenz@198: _add_message() cmlenz@198: cmlenz@66: return catalog cmlenz@3: cmlenz@26: WORD_SEP = re.compile('(' cmlenz@26: r'\s+|' # any whitespace cmlenz@26: r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words cmlenz@26: r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash cmlenz@26: ')') cmlenz@26: cmlenz@26: def escape(string): cmlenz@26: r"""Escape the given string so that it can be included in double-quoted cmlenz@26: strings in ``PO`` files. palgarvio@202: cmlenz@26: >>> escape('''Say: cmlenz@26: ... "hello, world!" cmlenz@26: ... ''') cmlenz@26: '"Say:\\n \\"hello, world!\\"\\n"' palgarvio@202: cmlenz@26: :param string: the string to escape cmlenz@26: :return: the escaped string cmlenz@26: :rtype: `str` or `unicode` cmlenz@26: """ cmlenz@26: return '"%s"' % string.replace('\\', '\\\\') \ cmlenz@26: .replace('\t', '\\t') \ cmlenz@26: .replace('\r', '\\r') \ cmlenz@26: .replace('\n', '\\n') \ cmlenz@26: .replace('\"', '\\"') cmlenz@26: cmlenz@192: def normalize(string, prefix='', width=76): cmlenz@108: r"""Convert a string into a format that is appropriate for .po files. palgarvio@202: cmlenz@26: >>> print normalize('''Say: cmlenz@26: ... "hello, world!" cmlenz@26: ... ''', width=None) cmlenz@26: "" cmlenz@26: "Say:\n" cmlenz@26: " \"hello, world!\"\n" palgarvio@202: cmlenz@26: >>> print normalize('''Say: cmlenz@26: ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " cmlenz@26: ... ''', width=32) cmlenz@26: "" cmlenz@26: "Say:\n" cmlenz@26: " \"Lorem ipsum dolor sit " cmlenz@26: "amet, consectetur adipisicing" cmlenz@26: " elit, \"\n" palgarvio@202: cmlenz@26: :param string: the string to normalize cmlenz@192: :param prefix: a string that should be prepended to every line cmlenz@26: :param width: the maximum line width; use `None`, 0, or a negative number cmlenz@26: to completely disable line wrapping cmlenz@26: :return: the normalized string cmlenz@26: :rtype: `unicode` cmlenz@26: """ cmlenz@26: if width and width > 0: cmlenz@192: prefixlen = len(prefix) cmlenz@26: lines = [] cmlenz@26: for idx, line in enumerate(string.splitlines(True)): cmlenz@192: if len(escape(line)) + prefixlen > width: cmlenz@26: chunks = WORD_SEP.split(line) cmlenz@26: chunks.reverse() cmlenz@26: while chunks: cmlenz@26: buf = [] cmlenz@26: size = 2 cmlenz@26: while chunks: cmlenz@192: l = len(escape(chunks[-1])) - 2 + prefixlen cmlenz@26: if size + l < width: cmlenz@26: buf.append(chunks.pop()) cmlenz@26: size += l cmlenz@26: else: cmlenz@26: if not buf: cmlenz@26: # handle long chunks by putting them on a cmlenz@26: # separate line cmlenz@26: buf.append(chunks.pop()) cmlenz@26: break cmlenz@26: lines.append(u''.join(buf)) cmlenz@26: else: cmlenz@26: lines.append(line) cmlenz@26: else: cmlenz@26: lines = string.splitlines(True) cmlenz@26: cmlenz@69: if len(lines) <= 1: cmlenz@26: return escape(string) cmlenz@26: cmlenz@26: # Remove empty trailing line cmlenz@69: if lines and not lines[-1]: cmlenz@26: del lines[-1] cmlenz@26: lines[-1] += '\n' cmlenz@192: return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) cmlenz@26: cmlenz@106: def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, palgarvio@202: sort_output=False, sort_by_file=False, ignore_obsolete=False, cmlenz@205: include_previous=False): cmlenz@58: r"""Write a ``gettext`` PO (portable object) template file for a given cmlenz@58: message catalog to the provided file-like object. palgarvio@202: cmlenz@58: >>> catalog = Catalog() cmlenz@58: >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], cmlenz@58: ... flags=('fuzzy',)) fschwarz@546: cmlenz@58: >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) fschwarz@546: cmlenz@3: >>> from StringIO import StringIO cmlenz@3: >>> buf = StringIO() cmlenz@106: >>> write_po(buf, catalog, omit_header=True) cmlenz@3: >>> print buf.getvalue() cmlenz@3: #: main.py:1 cmlenz@8: #, fuzzy, python-format cmlenz@8: msgid "foo %(name)s" cmlenz@3: msgstr "" cmlenz@3: cmlenz@3: #: main.py:3 cmlenz@3: msgid "bar" cmlenz@3: msgid_plural "baz" cmlenz@3: msgstr[0] "" cmlenz@3: msgstr[1] "" cmlenz@3: cmlenz@3: palgarvio@202: cmlenz@3: :param fileobj: the file-like object to write to cmlenz@69: :param catalog: the `Catalog` instance cmlenz@26: :param width: the maximum line width for the generated output; use `None`, cmlenz@26: 0, or a negative number to completely disable line wrapping cmlenz@3: :param no_location: do not emit a location comment for every message cmlenz@3: :param omit_header: do not include the ``msgid ""`` entry at the top of the cmlenz@3: output cmlenz@229: :param sort_output: whether to sort the messages in the output by msgid cmlenz@229: :param sort_by_file: whether to sort the messages in the output by their cmlenz@229: locations cmlenz@229: :param ignore_obsolete: whether to ignore obsolete messages and not include cmlenz@229: them in the output; by default they are included as cmlenz@229: comments cmlenz@205: :param include_previous: include the old msgid as a comment when cmlenz@231: updating the catalog cmlenz@3: """ cmlenz@192: def _normalize(key, prefix=''): fschwarz@549: return normalize(key, prefix=prefix, width=width) cmlenz@26: cmlenz@26: def _write(text): cmlenz@26: if isinstance(text, unicode): fschwarz@549: text = text.encode(catalog.charset, 'backslashreplace') cmlenz@26: fileobj.write(text) cmlenz@3: cmlenz@183: def _write_comment(comment, prefix=''): palgarvio@425: # xgettext always wraps comments even if --no-wrap is passed; palgarvio@425: # provide the same behaviour cmlenz@183: if width and width > 0: palgarvio@425: _width = width palgarvio@425: else: palgarvio@425: _width = 76 palgarvio@425: for line in wraptext(comment, _width): cmlenz@183: _write('#%s %s\n' % (prefix, line.strip())) cmlenz@183: cmlenz@183: def _write_message(message, prefix=''): cmlenz@183: if isinstance(message.id, (list, tuple)): palgarvio@423: if message.context: palgarvio@423: _write('%smsgctxt %s\n' % (prefix, palgarvio@423: _normalize(message.context, prefix))) cmlenz@192: _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) cmlenz@192: _write('%smsgid_plural %s\n' % ( cmlenz@192: prefix, _normalize(message.id[1], prefix) cmlenz@192: )) palgarvio@372: palgarvio@372: for idx in range(catalog.num_plurals): palgarvio@372: try: palgarvio@372: string = message.string[idx] palgarvio@372: except IndexError: palgarvio@372: string = '' cmlenz@192: _write('%smsgstr[%d] %s\n' % ( palgarvio@372: prefix, idx, _normalize(string, prefix) cmlenz@192: )) cmlenz@183: else: palgarvio@423: if message.context: palgarvio@423: _write('%smsgctxt %s\n' % (prefix, palgarvio@423: _normalize(message.context, prefix))) cmlenz@192: _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) cmlenz@192: _write('%smsgstr %s\n' % ( cmlenz@192: prefix, _normalize(message.string or '', prefix) cmlenz@192: )) cmlenz@183: cmlenz@106: messages = list(catalog) palgarvio@73: if sort_output: pjenvey@250: messages.sort() palgarvio@73: elif sort_by_file: palgarvio@73: messages.sort(lambda x,y: cmp(x.locations, y.locations)) cmlenz@70: palgarvio@73: for message in messages: cmlenz@69: if not message.id: # This is the header "message" cmlenz@69: if omit_header: cmlenz@69: continue cmlenz@106: comment_header = catalog.header_comment cmlenz@105: if width and width > 0: cmlenz@105: lines = [] cmlenz@106: for line in comment_header.splitlines(): cmlenz@317: lines += wraptext(line, width=width, cmlenz@317: subsequent_indent='# ') cmlenz@106: comment_header = u'\n'.join(lines) + u'\n' cmlenz@106: _write(comment_header) cmlenz@104: cmlenz@229: for comment in message.user_comments: cmlenz@183: _write_comment(comment) cmlenz@229: for comment in message.auto_comments: cmlenz@183: _write_comment(comment, prefix='.') cmlenz@3: cmlenz@3: if not no_location: cmlenz@136: locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) cmlenz@136: for filename, lineno in message.locations]) cmlenz@183: _write_comment(locs, prefix=':') cmlenz@58: if message.flags: cmlenz@58: _write('#%s\n' % ', '.join([''] + list(message.flags))) cmlenz@26: cmlenz@205: if message.previous_id and include_previous: cmlenz@311: _write_comment('msgid %s' % _normalize(message.previous_id[0]), cmlenz@205: prefix='|') cmlenz@205: if len(message.previous_id) > 1: cmlenz@311: _write_comment('msgid_plural %s' % _normalize( cmlenz@205: message.previous_id[1] cmlenz@205: ), prefix='|') palgarvio@202: cmlenz@183: _write_message(message) cmlenz@26: _write('\n') cmlenz@183: cmlenz@193: if not ignore_obsolete: cmlenz@193: for message in catalog.obsolete.values(): cmlenz@229: for comment in message.user_comments: cmlenz@193: _write_comment(comment) cmlenz@193: _write_message(message, prefix='#~ ') cmlenz@193: _write('\n')