cmlenz@1: # -*- coding: utf-8 -*- cmlenz@1: # jruigrok@530: # Copyright (C) 2007-2011 Edgewall Software cmlenz@1: # All rights reserved. cmlenz@1: # cmlenz@1: # This software is licensed as described in the file COPYING, which cmlenz@1: # you should have received as part of this distribution. The terms cmlenz@1: # are also available at http://babel.edgewall.org/wiki/License. cmlenz@1: # cmlenz@1: # This software consists of voluntary contributions made by many cmlenz@1: # individuals. For the exact contribution history, see the revision cmlenz@1: # history and logs, available at http://babel.edgewall.org/log/. cmlenz@1: cmlenz@1: """Reading and writing of files in the ``gettext`` PO (portable object) cmlenz@1: format. cmlenz@1: cmlenz@1: :see: `The Format of PO Files cmlenz@1: `_ cmlenz@1: """ cmlenz@1: fschwarz@531: from datetime import datetime cmlenz@134: import os cmlenz@1: import re cmlenz@1: cmlenz@199: from babel.messages.catalog import Catalog, Message fschwarz@531: from babel.util import wraptext cmlenz@1: cmlenz@178: __all__ = ['read_po', 'write_po'] cmlenz@161: __docformat__ = 'restructuredtext en' cmlenz@158: cmlenz@158: def unescape(string): cmlenz@158: r"""Reverse `escape` the given string. palgarvio@200: cmlenz@158: >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') cmlenz@158: Say: cmlenz@158: "hello, world!" cmlenz@158: palgarvio@200: cmlenz@158: :param string: the string to unescape cmlenz@158: :return: the unescaped string cmlenz@158: :rtype: `str` or `unicode` cmlenz@158: """ fschwarz@582: def replace_escapes(match): fschwarz@582: m = match.group(1) fschwarz@582: if m == 'n': fschwarz@582: return '\n' fschwarz@582: elif m == 't': fschwarz@582: return '\t' fschwarz@582: elif m == 'r': fschwarz@582: return '\r' fschwarz@582: # m is \ or " fschwarz@582: return m fschwarz@582: return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1]) cmlenz@158: cmlenz@158: def denormalize(string): cmlenz@158: r"""Reverse the normalization done by the `normalize` function. palgarvio@200: cmlenz@158: >>> print denormalize(r'''"" cmlenz@158: ... "Say:\n" cmlenz@158: ... " \"hello, world!\"\n"''') cmlenz@158: Say: cmlenz@158: "hello, world!" cmlenz@158: palgarvio@200: cmlenz@158: >>> print denormalize(r'''"" cmlenz@158: ... "Say:\n" cmlenz@158: ... " \"Lorem ipsum dolor sit " cmlenz@158: ... "amet, consectetur adipisicing" cmlenz@158: ... " elit, \"\n"''') cmlenz@158: Say: cmlenz@158: "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " cmlenz@158: palgarvio@200: cmlenz@158: :param string: the string to denormalize cmlenz@158: :return: the denormalized string cmlenz@158: :rtype: `unicode` or `str` cmlenz@158: """ fschwarz@585: if '\n' in string: fschwarz@585: escaped_lines = string.splitlines() fschwarz@585: if string.startswith('""'): fschwarz@585: escaped_lines = escaped_lines[1:] fschwarz@585: lines = map(unescape, escaped_lines) cmlenz@158: return ''.join(lines) cmlenz@158: else: cmlenz@158: return unescape(string) cmlenz@1: cmlenz@199: def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): cmlenz@6: """Read messages from a ``gettext`` PO (portable object) file from the given cmlenz@64: file-like object and return a `Catalog`. palgarvio@200: cmlenz@6: >>> from StringIO import StringIO cmlenz@106: >>> buf = StringIO(''' cmlenz@6: ... #: main.py:1 cmlenz@6: ... #, fuzzy, python-format cmlenz@6: ... msgid "foo %(name)s" fschwarz@586: ... msgstr "quux %(name)s" palgarvio@21: ... palgarvio@94: ... # A user comment palgarvio@94: ... #. An auto comment cmlenz@6: ... #: main.py:3 cmlenz@6: ... msgid "bar" cmlenz@6: ... msgid_plural "baz" fschwarz@586: ... msgstr[0] "bar" fschwarz@586: ... msgstr[1] "baaz" cmlenz@6: ... ''') cmlenz@64: >>> catalog = read_po(buf) cmlenz@104: >>> catalog.revision_date = datetime(2007, 04, 01) palgarvio@200: cmlenz@64: >>> for message in catalog: cmlenz@67: ... if message.id: cmlenz@67: ... print (message.id, message.string) palgarvio@105: ... print ' ', (message.locations, message.flags) palgarvio@105: ... print ' ', (message.user_comments, message.auto_comments) fschwarz@586: (u'foo %(name)s', u'quux %(name)s') cmlenz@149: ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) palgarvio@105: ([], []) fschwarz@586: ((u'bar', u'baz'), (u'bar', u'baaz')) cmlenz@149: ([(u'main.py', 3)], set([])) cmlenz@149: ([u'A user comment'], [u'An auto comment']) palgarvio@200: cmlenz@1: :param fileobj: the file-like object to read the PO file from cmlenz@196: :param locale: the locale identifier or `Locale` object, or `None` cmlenz@196: if the catalog is not bound to a locale (which basically cmlenz@196: means it's a template) cmlenz@196: :param domain: the message domain cmlenz@227: :param ignore_obsolete: whether to ignore obsolete messages in the input cmlenz@334: :return: a catalog object representing the parsed PO file cmlenz@334: :rtype: `Catalog` cmlenz@1: """ cmlenz@196: catalog = Catalog(locale=locale, domain=domain) cmlenz@64: cmlenz@196: counter = [0] cmlenz@220: offset = [0] cmlenz@6: messages = [] cmlenz@6: translations = [] cmlenz@6: locations = [] cmlenz@6: flags = [] palgarvio@105: user_comments = [] palgarvio@105: auto_comments = [] cmlenz@199: obsolete = [False] cmlenz@335: context = [] cmlenz@199: in_msgid = [False] cmlenz@199: in_msgstr = [False] aronacher@342: in_msgctxt = [False] cmlenz@6: cmlenz@64: def _add_message(): cmlenz@6: translations.sort() cmlenz@64: if len(messages) > 1: cmlenz@106: msgid = tuple([denormalize(m) for m in messages]) cmlenz@64: else: cmlenz@106: msgid = denormalize(messages[0]) palgarvio@370: if isinstance(msgid, (list, tuple)): palgarvio@370: string = [] palgarvio@370: for idx in range(catalog.num_plurals): palgarvio@370: try: palgarvio@370: string.append(translations[idx]) palgarvio@370: except IndexError: palgarvio@370: string.append((idx, '')) palgarvio@370: string = tuple([denormalize(t[1]) for t in string]) cmlenz@64: else: cmlenz@106: string = denormalize(translations[0][1]) cmlenz@335: if context: cmlenz@335: msgctxt = denormalize('\n'.join(context)) cmlenz@335: else: cmlenz@335: msgctxt = None cmlenz@199: message = Message(msgid, string, list(locations), set(flags), cmlenz@335: auto_comments, user_comments, lineno=offset[0] + 1, cmlenz@335: context=msgctxt) cmlenz@199: if obsolete[0]: cmlenz@199: if not ignore_obsolete: cmlenz@199: catalog.obsolete[msgid] = message cmlenz@199: else: cmlenz@199: catalog[msgid] = message cmlenz@335: del messages[:]; del translations[:]; del context[:]; del locations[:]; cmlenz@335: del flags[:]; del auto_comments[:]; del user_comments[:]; cmlenz@199: obsolete[0] = False cmlenz@196: counter[0] += 1 cmlenz@6: cmlenz@220: def _process_message_line(lineno, line): cmlenz@199: if line.startswith('msgid_plural'): cmlenz@199: in_msgid[0] = True cmlenz@199: msg = line[12:].lstrip() cmlenz@199: messages.append(msg) cmlenz@199: elif line.startswith('msgid'): cmlenz@199: in_msgid[0] = True cmlenz@220: offset[0] = lineno cmlenz@199: txt = line[5:].lstrip() cmlenz@199: if messages: cmlenz@199: _add_message() cmlenz@199: messages.append(txt) cmlenz@199: elif line.startswith('msgstr'): cmlenz@199: in_msgid[0] = False cmlenz@199: in_msgstr[0] = True cmlenz@199: msg = line[6:].lstrip() cmlenz@199: if msg.startswith('['): jruigrok@441: idx, msg = msg[1:].split(']', 1) cmlenz@199: translations.append([int(idx), msg.lstrip()]) cmlenz@199: else: cmlenz@199: translations.append([0, msg]) cmlenz@335: elif line.startswith('msgctxt'): cmlenz@428: if messages: cmlenz@428: _add_message() cmlenz@335: in_msgid[0] = in_msgstr[0] = False cmlenz@335: context.append(line[7:].lstrip()) cmlenz@199: elif line.startswith('"'): cmlenz@199: if in_msgid[0]: cmlenz@199: messages[-1] += u'\n' + line.rstrip() cmlenz@199: elif in_msgstr[0]: cmlenz@199: translations[-1][1] += u'\n' + line.rstrip() cmlenz@335: elif in_msgctxt[0]: cmlenz@335: context.append(line.rstrip()) cmlenz@199: cmlenz@220: for lineno, line in enumerate(fileobj.readlines()): pjenvey@414: line = line.strip() pjenvey@414: if not isinstance(line, unicode): pjenvey@414: line = line.decode(catalog.charset) cmlenz@1: if line.startswith('#'): cmlenz@199: in_msgid[0] = in_msgstr[0] = False cmlenz@199: if messages and translations: cmlenz@106: _add_message() cmlenz@106: if line[1:].startswith(':'): cmlenz@106: for location in line[2:].lstrip().split(): aronacher@356: pos = location.rfind(':') aronacher@356: if pos >= 0: aronacher@356: try: aronacher@356: lineno = int(location[pos + 1:]) aronacher@356: except ValueError: aronacher@356: continue aronacher@356: locations.append((location[:pos], lineno)) cmlenz@106: elif line[1:].startswith(','): cmlenz@106: for flag in line[2:].lstrip().split(','): cmlenz@106: flags.append(flag.strip()) cmlenz@199: elif line[1:].startswith('~'): cmlenz@199: obsolete[0] = True cmlenz@220: _process_message_line(lineno, line[2:].lstrip()) cmlenz@106: elif line[1:].startswith('.'): cmlenz@106: # These are called auto-comments cmlenz@106: comment = line[2:].strip() cmlenz@199: if comment: # Just check that we're not adding empty comments cmlenz@106: auto_comments.append(comment) cmlenz@120: else: cmlenz@106: # These are called user comments cmlenz@120: user_comments.append(line[1:].strip()) cmlenz@104: else: cmlenz@220: _process_message_line(lineno, line) cmlenz@6: cmlenz@6: if messages: cmlenz@64: _add_message() cmlenz@196: cmlenz@196: # No actual messages found, but there was some info in comments, from which cmlenz@196: # we'll construct an empty header message cmlenz@196: elif not counter[0] and (flags or user_comments or auto_comments): cmlenz@196: messages.append(u'') cmlenz@196: translations.append([0, u'']) cmlenz@196: _add_message() cmlenz@196: cmlenz@64: return catalog cmlenz@1: cmlenz@24: WORD_SEP = re.compile('(' cmlenz@24: r'\s+|' # any whitespace cmlenz@24: r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words cmlenz@24: r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash cmlenz@24: ')') cmlenz@24: cmlenz@24: def escape(string): cmlenz@24: r"""Escape the given string so that it can be included in double-quoted cmlenz@24: strings in ``PO`` files. palgarvio@200: cmlenz@24: >>> escape('''Say: cmlenz@24: ... "hello, world!" cmlenz@24: ... ''') cmlenz@24: '"Say:\\n \\"hello, world!\\"\\n"' palgarvio@200: cmlenz@24: :param string: the string to escape cmlenz@24: :return: the escaped string cmlenz@24: :rtype: `str` or `unicode` cmlenz@24: """ cmlenz@24: return '"%s"' % string.replace('\\', '\\\\') \ cmlenz@24: .replace('\t', '\\t') \ cmlenz@24: .replace('\r', '\\r') \ cmlenz@24: .replace('\n', '\\n') \ cmlenz@24: .replace('\"', '\\"') cmlenz@24: cmlenz@190: def normalize(string, prefix='', width=76): cmlenz@106: r"""Convert a string into a format that is appropriate for .po files. palgarvio@200: cmlenz@24: >>> print normalize('''Say: cmlenz@24: ... "hello, world!" cmlenz@24: ... ''', width=None) cmlenz@24: "" cmlenz@24: "Say:\n" cmlenz@24: " \"hello, world!\"\n" palgarvio@200: cmlenz@24: >>> print normalize('''Say: cmlenz@24: ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " cmlenz@24: ... ''', width=32) cmlenz@24: "" cmlenz@24: "Say:\n" cmlenz@24: " \"Lorem ipsum dolor sit " cmlenz@24: "amet, consectetur adipisicing" cmlenz@24: " elit, \"\n" palgarvio@200: cmlenz@24: :param string: the string to normalize cmlenz@190: :param prefix: a string that should be prepended to every line cmlenz@24: :param width: the maximum line width; use `None`, 0, or a negative number cmlenz@24: to completely disable line wrapping cmlenz@24: :return: the normalized string cmlenz@24: :rtype: `unicode` cmlenz@24: """ cmlenz@24: if width and width > 0: cmlenz@190: prefixlen = len(prefix) cmlenz@24: lines = [] fschwarz@568: for line in string.splitlines(True): cmlenz@190: if len(escape(line)) + prefixlen > width: cmlenz@24: chunks = WORD_SEP.split(line) cmlenz@24: chunks.reverse() cmlenz@24: while chunks: cmlenz@24: buf = [] cmlenz@24: size = 2 cmlenz@24: while chunks: cmlenz@190: l = len(escape(chunks[-1])) - 2 + prefixlen cmlenz@24: if size + l < width: cmlenz@24: buf.append(chunks.pop()) cmlenz@24: size += l cmlenz@24: else: cmlenz@24: if not buf: cmlenz@24: # handle long chunks by putting them on a cmlenz@24: # separate line cmlenz@24: buf.append(chunks.pop()) cmlenz@24: break cmlenz@24: lines.append(u''.join(buf)) cmlenz@24: else: cmlenz@24: lines.append(line) cmlenz@24: else: cmlenz@24: lines = string.splitlines(True) cmlenz@24: cmlenz@67: if len(lines) <= 1: cmlenz@24: return escape(string) cmlenz@24: cmlenz@24: # Remove empty trailing line cmlenz@67: if lines and not lines[-1]: cmlenz@24: del lines[-1] cmlenz@24: lines[-1] += '\n' cmlenz@190: return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) cmlenz@24: cmlenz@104: def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, palgarvio@200: sort_output=False, sort_by_file=False, ignore_obsolete=False, cmlenz@203: include_previous=False): cmlenz@56: r"""Write a ``gettext`` PO (portable object) template file for a given cmlenz@56: message catalog to the provided file-like object. palgarvio@200: cmlenz@56: >>> catalog = Catalog() cmlenz@56: >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], cmlenz@56: ... flags=('fuzzy',)) fschwarz@544: cmlenz@56: >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) fschwarz@544: cmlenz@1: >>> from StringIO import StringIO cmlenz@1: >>> buf = StringIO() cmlenz@104: >>> write_po(buf, catalog, omit_header=True) cmlenz@1: >>> print buf.getvalue() cmlenz@1: #: main.py:1 cmlenz@6: #, fuzzy, python-format cmlenz@6: msgid "foo %(name)s" cmlenz@1: msgstr "" cmlenz@1: cmlenz@1: #: main.py:3 cmlenz@1: msgid "bar" cmlenz@1: msgid_plural "baz" cmlenz@1: msgstr[0] "" cmlenz@1: msgstr[1] "" cmlenz@1: cmlenz@1: palgarvio@200: cmlenz@1: :param fileobj: the file-like object to write to cmlenz@67: :param catalog: the `Catalog` instance cmlenz@24: :param width: the maximum line width for the generated output; use `None`, cmlenz@24: 0, or a negative number to completely disable line wrapping cmlenz@1: :param no_location: do not emit a location comment for every message cmlenz@1: :param omit_header: do not include the ``msgid ""`` entry at the top of the cmlenz@1: output cmlenz@227: :param sort_output: whether to sort the messages in the output by msgid cmlenz@227: :param sort_by_file: whether to sort the messages in the output by their cmlenz@227: locations cmlenz@227: :param ignore_obsolete: whether to ignore obsolete messages and not include cmlenz@227: them in the output; by default they are included as cmlenz@227: comments cmlenz@203: :param include_previous: include the old msgid as a comment when cmlenz@229: updating the catalog cmlenz@1: """ cmlenz@190: def _normalize(key, prefix=''): fschwarz@547: return normalize(key, prefix=prefix, width=width) cmlenz@24: cmlenz@24: def _write(text): cmlenz@24: if isinstance(text, unicode): fschwarz@547: text = text.encode(catalog.charset, 'backslashreplace') cmlenz@24: fileobj.write(text) cmlenz@1: cmlenz@181: def _write_comment(comment, prefix=''): palgarvio@423: # xgettext always wraps comments even if --no-wrap is passed; palgarvio@423: # provide the same behaviour cmlenz@181: if width and width > 0: palgarvio@423: _width = width palgarvio@423: else: palgarvio@423: _width = 76 palgarvio@423: for line in wraptext(comment, _width): cmlenz@181: _write('#%s %s\n' % (prefix, line.strip())) cmlenz@181: cmlenz@181: def _write_message(message, prefix=''): cmlenz@181: if isinstance(message.id, (list, tuple)): palgarvio@421: if message.context: palgarvio@421: _write('%smsgctxt %s\n' % (prefix, palgarvio@421: _normalize(message.context, prefix))) cmlenz@190: _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) cmlenz@190: _write('%smsgid_plural %s\n' % ( cmlenz@190: prefix, _normalize(message.id[1], prefix) cmlenz@190: )) palgarvio@370: palgarvio@370: for idx in range(catalog.num_plurals): palgarvio@370: try: palgarvio@370: string = message.string[idx] palgarvio@370: except IndexError: palgarvio@370: string = '' cmlenz@190: _write('%smsgstr[%d] %s\n' % ( palgarvio@370: prefix, idx, _normalize(string, prefix) cmlenz@190: )) cmlenz@181: else: palgarvio@421: if message.context: palgarvio@421: _write('%smsgctxt %s\n' % (prefix, palgarvio@421: _normalize(message.context, prefix))) cmlenz@190: _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) cmlenz@190: _write('%smsgstr %s\n' % ( cmlenz@190: prefix, _normalize(message.string or '', prefix) cmlenz@190: )) cmlenz@181: cmlenz@104: messages = list(catalog) palgarvio@71: if sort_output: pjenvey@248: messages.sort() palgarvio@71: elif sort_by_file: palgarvio@71: messages.sort(lambda x,y: cmp(x.locations, y.locations)) cmlenz@68: palgarvio@71: for message in messages: cmlenz@67: if not message.id: # This is the header "message" cmlenz@67: if omit_header: cmlenz@67: continue cmlenz@104: comment_header = catalog.header_comment cmlenz@103: if width and width > 0: cmlenz@103: lines = [] cmlenz@104: for line in comment_header.splitlines(): cmlenz@315: lines += wraptext(line, width=width, cmlenz@315: subsequent_indent='# ') fschwarz@581: comment_header = u'\n'.join(lines) fschwarz@581: _write(comment_header + u'\n') cmlenz@102: cmlenz@227: for comment in message.user_comments: cmlenz@181: _write_comment(comment) cmlenz@227: for comment in message.auto_comments: cmlenz@181: _write_comment(comment, prefix='.') cmlenz@1: cmlenz@1: if not no_location: cmlenz@134: locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) cmlenz@134: for filename, lineno in message.locations]) cmlenz@181: _write_comment(locs, prefix=':') cmlenz@56: if message.flags: cmlenz@56: _write('#%s\n' % ', '.join([''] + list(message.flags))) cmlenz@24: cmlenz@203: if message.previous_id and include_previous: cmlenz@309: _write_comment('msgid %s' % _normalize(message.previous_id[0]), cmlenz@203: prefix='|') cmlenz@203: if len(message.previous_id) > 1: cmlenz@309: _write_comment('msgid_plural %s' % _normalize( cmlenz@203: message.previous_id[1] cmlenz@203: ), prefix='|') palgarvio@200: cmlenz@181: _write_message(message) cmlenz@24: _write('\n') cmlenz@181: cmlenz@191: if not ignore_obsolete: cmlenz@191: for message in catalog.obsolete.values(): cmlenz@227: for comment in message.user_comments: cmlenz@191: _write_comment(comment) cmlenz@191: _write_message(message, prefix='#~ ') cmlenz@191: _write('\n')