Mercurial > babel > mirror
annotate babel/messages/pofile.py @ 356:4cdca48fc832 trunk
Fixed #59 by falling back silently on invalid location comments.
author | aronacher |
---|---|
date | Tue, 17 Jun 2008 20:40:36 +0000 |
parents | 2ee7dc04836c |
children | bc18179832b7 |
rev | line source |
---|---|
1 | 1 # -*- coding: utf-8 -*- |
2 # | |
335 | 3 # Copyright (C) 2007-2008 Edgewall Software |
1 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Reading and writing of files in the ``gettext`` PO (portable object) | |
15 format. | |
16 | |
17 :see: `The Format of PO Files | |
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ | |
19 """ | |
20 | |
5
132526dcd074
* The creation-date header in generated PO files now includes the timezone offset.
cmlenz
parents:
1
diff
changeset
|
21 from datetime import date, datetime |
134 | 22 import os |
1 | 23 import re |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
24 try: |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
25 set |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
26 except NameError: |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
27 from sets import Set as set |
1 | 28 |
29 from babel import __version__ as VERSION | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
30 from babel.messages.catalog import Catalog, Message |
315 | 31 from babel.util import wraptext, LOCALTZ |
1 | 32 |
178
749c0f6863bc
Minor change to what symbols are ?exported?, primarily for the generated docs.
cmlenz
parents:
175
diff
changeset
|
33 __all__ = ['read_po', 'write_po'] |
161 | 34 __docformat__ = 'restructuredtext en' |
158 | 35 |
36 def unescape(string): | |
37 r"""Reverse `escape` the given string. | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
38 |
158 | 39 >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') |
40 Say: | |
41 "hello, world!" | |
42 <BLANKLINE> | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
43 |
158 | 44 :param string: the string to unescape |
45 :return: the unescaped string | |
46 :rtype: `str` or `unicode` | |
47 """ | |
48 return string[1:-1].replace('\\\\', '\\') \ | |
49 .replace('\\t', '\t') \ | |
50 .replace('\\r', '\r') \ | |
51 .replace('\\n', '\n') \ | |
52 .replace('\\"', '\"') | |
53 | |
54 def denormalize(string): | |
55 r"""Reverse the normalization done by the `normalize` function. | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
56 |
158 | 57 >>> print denormalize(r'''"" |
58 ... "Say:\n" | |
59 ... " \"hello, world!\"\n"''') | |
60 Say: | |
61 "hello, world!" | |
62 <BLANKLINE> | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
63 |
158 | 64 >>> print denormalize(r'''"" |
65 ... "Say:\n" | |
66 ... " \"Lorem ipsum dolor sit " | |
67 ... "amet, consectetur adipisicing" | |
68 ... " elit, \"\n"''') | |
69 Say: | |
70 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " | |
71 <BLANKLINE> | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
72 |
158 | 73 :param string: the string to denormalize |
74 :return: the denormalized string | |
75 :rtype: `unicode` or `str` | |
76 """ | |
77 if string.startswith('""'): | |
78 lines = [] | |
79 for line in string.splitlines()[1:]: | |
80 lines.append(unescape(line)) | |
81 return ''.join(lines) | |
82 else: | |
83 return unescape(string) | |
1 | 84 |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
85 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
86 """Read messages from a ``gettext`` PO (portable object) file from the given |
64 | 87 file-like object and return a `Catalog`. |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
88 |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
89 >>> from StringIO import StringIO |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
90 >>> buf = StringIO(''' |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
91 ... #: main.py:1 |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
92 ... #, fuzzy, python-format |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
93 ... msgid "foo %(name)s" |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
94 ... msgstr "" |
21
cd9aa202568e
Change pot header's first line, "Translations Template for %%(project)s." instead of "SOME DESCRIPTIVE TITLE.". '''`project`''' and '''`version`''' now default to '''PROJECT''' and '''VERSION''' respectively. Fixed a bug regarding '''Content-Transfer-Encoding''', it shouldn't be the charset, and we're defaulting to `8bit` untill someone complains.
palgarvio
parents:
17
diff
changeset
|
95 ... |
94
96037779b518
Updated `read_po` to add user comments besides just auto comments.
palgarvio
parents:
84
diff
changeset
|
96 ... # A user comment |
96037779b518
Updated `read_po` to add user comments besides just auto comments.
palgarvio
parents:
84
diff
changeset
|
97 ... #. An auto comment |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
98 ... #: main.py:3 |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
99 ... msgid "bar" |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
100 ... msgid_plural "baz" |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
101 ... msgstr[0] "" |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
102 ... msgstr[1] "" |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
103 ... ''') |
64 | 104 >>> catalog = read_po(buf) |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
105 >>> catalog.revision_date = datetime(2007, 04, 01) |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
106 |
64 | 107 >>> for message in catalog: |
67 | 108 ... if message.id: |
109 ... print (message.id, message.string) | |
105
c62b68a0b65e
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
104
diff
changeset
|
110 ... print ' ', (message.locations, message.flags) |
c62b68a0b65e
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
104
diff
changeset
|
111 ... print ' ', (message.user_comments, message.auto_comments) |
149
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
112 (u'foo %(name)s', '') |
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
113 ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) |
105
c62b68a0b65e
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
104
diff
changeset
|
114 ([], []) |
149
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
115 ((u'bar', u'baz'), ('', '')) |
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
116 ([(u'main.py', 3)], set([])) |
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
117 ([u'A user comment'], [u'An auto comment']) |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
118 |
1 | 119 :param fileobj: the file-like object to read the PO file from |
196
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
120 :param locale: the locale identifier or `Locale` object, or `None` |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
121 if the catalog is not bound to a locale (which basically |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
122 means it's a template) |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
123 :param domain: the message domain |
227 | 124 :param ignore_obsolete: whether to ignore obsolete messages in the input |
334 | 125 :return: a catalog object representing the parsed PO file |
126 :rtype: `Catalog` | |
1 | 127 """ |
196
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
128 catalog = Catalog(locale=locale, domain=domain) |
64 | 129 |
196
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
130 counter = [0] |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
131 offset = [0] |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
132 messages = [] |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
133 translations = [] |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
134 locations = [] |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
135 flags = [] |
105
c62b68a0b65e
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
104
diff
changeset
|
136 user_comments = [] |
c62b68a0b65e
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
104
diff
changeset
|
137 auto_comments = [] |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
138 obsolete = [False] |
335 | 139 context = [] |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
140 in_msgid = [False] |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
141 in_msgstr = [False] |
342
2ee7dc04836c
Fixed a bug in pofile (in_msgctxt was not defined). Test follows.
aronacher
parents:
335
diff
changeset
|
142 in_msgctxt = [False] |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
143 |
64 | 144 def _add_message(): |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
145 translations.sort() |
64 | 146 if len(messages) > 1: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
147 msgid = tuple([denormalize(m) for m in messages]) |
64 | 148 else: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
149 msgid = denormalize(messages[0]) |
64 | 150 if len(translations) > 1: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
151 string = tuple([denormalize(t[1]) for t in translations]) |
64 | 152 else: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
153 string = denormalize(translations[0][1]) |
335 | 154 if context: |
155 msgctxt = denormalize('\n'.join(context)) | |
156 else: | |
157 msgctxt = None | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
158 message = Message(msgid, string, list(locations), set(flags), |
335 | 159 auto_comments, user_comments, lineno=offset[0] + 1, |
160 context=msgctxt) | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
161 if obsolete[0]: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
162 if not ignore_obsolete: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
163 catalog.obsolete[msgid] = message |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
164 else: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
165 catalog[msgid] = message |
335 | 166 del messages[:]; del translations[:]; del context[:]; del locations[:]; |
167 del flags[:]; del auto_comments[:]; del user_comments[:]; | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
168 obsolete[0] = False |
196
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
169 counter[0] += 1 |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
170 |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
171 def _process_message_line(lineno, line): |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
172 if line.startswith('msgid_plural'): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
173 in_msgid[0] = True |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
174 msg = line[12:].lstrip() |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
175 messages.append(msg) |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
176 elif line.startswith('msgid'): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
177 in_msgid[0] = True |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
178 offset[0] = lineno |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
179 txt = line[5:].lstrip() |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
180 if messages: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
181 _add_message() |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
182 messages.append(txt) |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
183 elif line.startswith('msgstr'): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
184 in_msgid[0] = False |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
185 in_msgstr[0] = True |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
186 msg = line[6:].lstrip() |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
187 if msg.startswith('['): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
188 idx, msg = msg[1:].split(']') |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
189 translations.append([int(idx), msg.lstrip()]) |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
190 else: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
191 translations.append([0, msg]) |
335 | 192 elif line.startswith('msgctxt'): |
193 in_msgid[0] = in_msgstr[0] = False | |
194 context.append(line[7:].lstrip()) | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
195 elif line.startswith('"'): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
196 if in_msgid[0]: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
197 messages[-1] += u'\n' + line.rstrip() |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
198 elif in_msgstr[0]: |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
199 translations[-1][1] += u'\n' + line.rstrip() |
335 | 200 elif in_msgctxt[0]: |
201 context.append(line.rstrip()) | |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
202 |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
203 for lineno, line in enumerate(fileobj.readlines()): |
149
d62c63280e81
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
134
diff
changeset
|
204 line = line.strip().decode(catalog.charset) |
1 | 205 if line.startswith('#'): |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
206 in_msgid[0] = in_msgstr[0] = False |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
207 if messages and translations: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
208 _add_message() |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
209 if line[1:].startswith(':'): |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
210 for location in line[2:].lstrip().split(): |
356
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
211 pos = location.rfind(':') |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
212 if pos >= 0: |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
213 try: |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
214 lineno = int(location[pos + 1:]) |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
215 except ValueError: |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
216 continue |
4cdca48fc832
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
342
diff
changeset
|
217 locations.append((location[:pos], lineno)) |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
218 elif line[1:].startswith(','): |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
219 for flag in line[2:].lstrip().split(','): |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
220 flags.append(flag.strip()) |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
221 elif line[1:].startswith('~'): |
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
222 obsolete[0] = True |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
223 _process_message_line(lineno, line[2:].lstrip()) |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
224 elif line[1:].startswith('.'): |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
225 # These are called auto-comments |
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
226 comment = line[2:].strip() |
199
a0d22f2f2df0
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
196
diff
changeset
|
227 if comment: # Just check that we're not adding empty comments |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
228 auto_comments.append(comment) |
120 | 229 else: |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
230 # These are called user comments |
120 | 231 user_comments.append(line[1:].strip()) |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
232 else: |
220
97b4b289e792
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
203
diff
changeset
|
233 _process_message_line(lineno, line) |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
234 |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
235 if messages: |
64 | 236 _add_message() |
196
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
237 |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
238 # No actual messages found, but there was some info in comments, from which |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
239 # we'll construct an empty header message |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
240 elif not counter[0] and (flags or user_comments or auto_comments): |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
241 messages.append(u'') |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
242 translations.append([0, u'']) |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
243 _add_message() |
b38a6b220ea2
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
191
diff
changeset
|
244 |
64 | 245 return catalog |
1 | 246 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
247 WORD_SEP = re.compile('(' |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
248 r'\s+|' # any whitespace |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
249 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
250 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
251 ')') |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
252 |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
253 def escape(string): |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
254 r"""Escape the given string so that it can be included in double-quoted |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
255 strings in ``PO`` files. |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
256 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
257 >>> escape('''Say: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
258 ... "hello, world!" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
259 ... ''') |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
260 '"Say:\\n \\"hello, world!\\"\\n"' |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
261 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
262 :param string: the string to escape |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
263 :return: the escaped string |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
264 :rtype: `str` or `unicode` |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
265 """ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
266 return '"%s"' % string.replace('\\', '\\\\') \ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
267 .replace('\t', '\\t') \ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
268 .replace('\r', '\\r') \ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
269 .replace('\n', '\\n') \ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
270 .replace('\"', '\\"') |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
271 |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
272 def normalize(string, prefix='', width=76): |
106
2cd83f77cc98
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
105
diff
changeset
|
273 r"""Convert a string into a format that is appropriate for .po files. |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
274 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
275 >>> print normalize('''Say: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
276 ... "hello, world!" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
277 ... ''', width=None) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
278 "" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
279 "Say:\n" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
280 " \"hello, world!\"\n" |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
281 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
282 >>> print normalize('''Say: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
283 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
284 ... ''', width=32) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
285 "" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
286 "Say:\n" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
287 " \"Lorem ipsum dolor sit " |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
288 "amet, consectetur adipisicing" |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
289 " elit, \"\n" |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
290 |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
291 :param string: the string to normalize |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
292 :param prefix: a string that should be prepended to every line |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
293 :param width: the maximum line width; use `None`, 0, or a negative number |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
294 to completely disable line wrapping |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
295 :return: the normalized string |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
296 :rtype: `unicode` |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
297 """ |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
298 if width and width > 0: |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
299 prefixlen = len(prefix) |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
300 lines = [] |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
301 for idx, line in enumerate(string.splitlines(True)): |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
302 if len(escape(line)) + prefixlen > width: |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
303 chunks = WORD_SEP.split(line) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
304 chunks.reverse() |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
305 while chunks: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
306 buf = [] |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
307 size = 2 |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
308 while chunks: |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
309 l = len(escape(chunks[-1])) - 2 + prefixlen |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
310 if size + l < width: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
311 buf.append(chunks.pop()) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
312 size += l |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
313 else: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
314 if not buf: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
315 # handle long chunks by putting them on a |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
316 # separate line |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
317 buf.append(chunks.pop()) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
318 break |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
319 lines.append(u''.join(buf)) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
320 else: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
321 lines.append(line) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
322 else: |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
323 lines = string.splitlines(True) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
324 |
67 | 325 if len(lines) <= 1: |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
326 return escape(string) |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
327 |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
328 # Remove empty trailing line |
67 | 329 if lines and not lines[-1]: |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
330 del lines[-1] |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
331 lines[-1] += '\n' |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
332 return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
333 |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
334 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
335 sort_output=False, sort_by_file=False, ignore_obsolete=False, |
203 | 336 include_previous=False): |
56
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
337 r"""Write a ``gettext`` PO (portable object) template file for a given |
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
338 message catalog to the provided file-like object. |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
339 |
56
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
340 >>> catalog = Catalog() |
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
341 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], |
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
342 ... flags=('fuzzy',)) |
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
343 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) |
1 | 344 >>> from StringIO import StringIO |
345 >>> buf = StringIO() | |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
346 >>> write_po(buf, catalog, omit_header=True) |
1 | 347 >>> print buf.getvalue() |
348 #: main.py:1 | |
6
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
349 #, fuzzy, python-format |
c3b1b0b3d129
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
5
diff
changeset
|
350 msgid "foo %(name)s" |
1 | 351 msgstr "" |
352 <BLANKLINE> | |
353 #: main.py:3 | |
354 msgid "bar" | |
355 msgid_plural "baz" | |
356 msgstr[0] "" | |
357 msgstr[1] "" | |
358 <BLANKLINE> | |
359 <BLANKLINE> | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
360 |
1 | 361 :param fileobj: the file-like object to write to |
67 | 362 :param catalog: the `Catalog` instance |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
363 :param width: the maximum line width for the generated output; use `None`, |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
364 0, or a negative number to completely disable line wrapping |
1 | 365 :param no_location: do not emit a location comment for every message |
366 :param omit_header: do not include the ``msgid ""`` entry at the top of the | |
367 output | |
227 | 368 :param sort_output: whether to sort the messages in the output by msgid |
369 :param sort_by_file: whether to sort the messages in the output by their | |
370 locations | |
371 :param ignore_obsolete: whether to ignore obsolete messages and not include | |
372 them in the output; by default they are included as | |
373 comments | |
203 | 374 :param include_previous: include the old msgid as a comment when |
229 | 375 updating the catalog |
1 | 376 """ |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
377 def _normalize(key, prefix=''): |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
378 return normalize(key, prefix=prefix, width=width) \ |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
379 .encode(catalog.charset, 'backslashreplace') |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
380 |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
381 def _write(text): |
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
382 if isinstance(text, unicode): |
102
14a3d766a701
Project name and version, and the charset are available via the `Catalog` object, and do not need to be passed to `write_pot()`.
cmlenz
parents:
97
diff
changeset
|
383 text = text.encode(catalog.charset) |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
384 fileobj.write(text) |
1 | 385 |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
386 def _write_comment(comment, prefix=''): |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
387 lines = comment |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
388 if width and width > 0: |
315 | 389 lines = wraptext(comment, width) |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
390 for line in lines: |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
391 _write('#%s %s\n' % (prefix, line.strip())) |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
392 |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
393 def _write_message(message, prefix=''): |
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
394 if isinstance(message.id, (list, tuple)): |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
395 _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
396 _write('%smsgid_plural %s\n' % ( |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
397 prefix, _normalize(message.id[1], prefix) |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
398 )) |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
399 for i, string in enumerate(message.string): |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
400 _write('%smsgstr[%d] %s\n' % ( |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
401 prefix, i, _normalize(message.string[i], prefix) |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
402 )) |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
403 else: |
190
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
404 _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
405 _write('%smsgstr %s\n' % ( |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
406 prefix, _normalize(message.string or '', prefix) |
5041d90edf0c
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
181
diff
changeset
|
407 )) |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
408 |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
409 messages = list(catalog) |
71 | 410 if sort_output: |
248
f0b1ee94628c
add a __cmp__ to Message that correctly sorts by id, taking into account plurals
pjenvey
parents:
229
diff
changeset
|
411 messages.sort() |
71 | 412 elif sort_by_file: |
413 messages.sort(lambda x,y: cmp(x.locations, y.locations)) | |
68 | 414 |
71 | 415 for message in messages: |
67 | 416 if not message.id: # This is the header "message" |
417 if omit_header: | |
418 continue | |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
419 comment_header = catalog.header_comment |
103
dacfbaf0d1e0
Implement wrapping of header comments in PO(T) output. Related to #14.
cmlenz
parents:
102
diff
changeset
|
420 if width and width > 0: |
dacfbaf0d1e0
Implement wrapping of header comments in PO(T) output. Related to #14.
cmlenz
parents:
102
diff
changeset
|
421 lines = [] |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
422 for line in comment_header.splitlines(): |
315 | 423 lines += wraptext(line, width=width, |
424 subsequent_indent='# ') | |
104
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
425 comment_header = u'\n'.join(lines) + u'\n' |
395704fda00b
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
103
diff
changeset
|
426 _write(comment_header) |
102
14a3d766a701
Project name and version, and the charset are available via the `Catalog` object, and do not need to be passed to `write_pot()`.
cmlenz
parents:
97
diff
changeset
|
427 |
227 | 428 for comment in message.user_comments: |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
429 _write_comment(comment) |
227 | 430 for comment in message.auto_comments: |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
431 _write_comment(comment, prefix='.') |
1 | 432 |
433 if not no_location: | |
134 | 434 locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) |
435 for filename, lineno in message.locations]) | |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
436 _write_comment(locs, prefix=':') |
56
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
437 if message.flags: |
f40fc143439c
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
55
diff
changeset
|
438 _write('#%s\n' % ', '.join([''] + list(message.flags))) |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
439 |
203 | 440 if message.previous_id and include_previous: |
309
43dca73da5b5
Fix for unicode problem when the previous message id is included as a comment in PO serialization. Closes #78.
cmlenz
parents:
248
diff
changeset
|
441 _write_comment('msgid %s' % _normalize(message.previous_id[0]), |
203 | 442 prefix='|') |
443 if len(message.previous_id) > 1: | |
309
43dca73da5b5
Fix for unicode problem when the previous message id is included as a comment in PO serialization. Closes #78.
cmlenz
parents:
248
diff
changeset
|
444 _write_comment('msgid_plural %s' % _normalize( |
203 | 445 message.previous_id[1] |
446 ), prefix='|') | |
200
1c778cccd330
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
199
diff
changeset
|
447 |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
448 _write_message(message) |
24
b09e90803d1b
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
23
diff
changeset
|
449 _write('\n') |
181
8a762ce37bf7
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
178
diff
changeset
|
450 |
191
c171a0041ad2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
190
diff
changeset
|
451 if not ignore_obsolete: |
c171a0041ad2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
190
diff
changeset
|
452 for message in catalog.obsolete.values(): |
227 | 453 for comment in message.user_comments: |
191
c171a0041ad2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
190
diff
changeset
|
454 _write_comment(comment) |
c171a0041ad2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
190
diff
changeset
|
455 _write_message(message, prefix='#~ ') |
c171a0041ad2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
190
diff
changeset
|
456 _write('\n') |