Mercurial > babel > old > mirror
annotate babel/messages/pofile.py @ 549:12d5425fb430
babel.messages.pofile should only apply encoding when actually writing a file (eases Python 3 transition, closes #251)
author | fschwarz |
---|---|
date | Sat, 19 Mar 2011 19:50:21 +0000 |
parents | 10de195cfb04 |
children |
rev | line source |
---|---|
3 | 1 # -*- coding: utf-8 -*- |
2 # | |
532 | 3 # Copyright (C) 2007-2011 Edgewall Software |
3 | 4 # All rights reserved. |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Reading and writing of files in the ``gettext`` PO (portable object) | |
15 format. | |
16 | |
17 :see: `The Format of PO Files | |
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ | |
19 """ | |
20 | |
533 | 21 from datetime import datetime |
136 | 22 import os |
3 | 23 import re |
24 | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
25 from babel.messages.catalog import Catalog, Message |
533 | 26 from babel.util import wraptext |
3 | 27 |
180
7e88950ab661
Minor change to what symbols are ?exported?, primarily for the generated docs.
cmlenz
parents:
177
diff
changeset
|
28 __all__ = ['read_po', 'write_po'] |
163 | 29 __docformat__ = 'restructuredtext en' |
160 | 30 |
31 def unescape(string): | |
32 r"""Reverse `escape` the given string. | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
33 |
160 | 34 >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') |
35 Say: | |
36 "hello, world!" | |
37 <BLANKLINE> | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
38 |
160 | 39 :param string: the string to unescape |
40 :return: the unescaped string | |
41 :rtype: `str` or `unicode` | |
42 """ | |
43 return string[1:-1].replace('\\\\', '\\') \ | |
44 .replace('\\t', '\t') \ | |
45 .replace('\\r', '\r') \ | |
46 .replace('\\n', '\n') \ | |
47 .replace('\\"', '\"') | |
48 | |
49 def denormalize(string): | |
50 r"""Reverse the normalization done by the `normalize` function. | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
51 |
160 | 52 >>> print denormalize(r'''"" |
53 ... "Say:\n" | |
54 ... " \"hello, world!\"\n"''') | |
55 Say: | |
56 "hello, world!" | |
57 <BLANKLINE> | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
58 |
160 | 59 >>> print denormalize(r'''"" |
60 ... "Say:\n" | |
61 ... " \"Lorem ipsum dolor sit " | |
62 ... "amet, consectetur adipisicing" | |
63 ... " elit, \"\n"''') | |
64 Say: | |
65 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " | |
66 <BLANKLINE> | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
67 |
160 | 68 :param string: the string to denormalize |
69 :return: the denormalized string | |
70 :rtype: `unicode` or `str` | |
71 """ | |
72 if string.startswith('""'): | |
73 lines = [] | |
74 for line in string.splitlines()[1:]: | |
75 lines.append(unescape(line)) | |
76 return ''.join(lines) | |
77 else: | |
78 return unescape(string) | |
3 | 79 |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
80 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False): |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
81 """Read messages from a ``gettext`` PO (portable object) file from the given |
66 | 82 file-like object and return a `Catalog`. |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
83 |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
84 >>> from StringIO import StringIO |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
85 >>> buf = StringIO(''' |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
86 ... #: main.py:1 |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
87 ... #, fuzzy, python-format |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
88 ... msgid "foo %(name)s" |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
89 ... msgstr "" |
23
f828705c3bce
Change pot header's first line, "Translations Template for %%(project)s." instead of "SOME DESCRIPTIVE TITLE.". '''`project`''' and '''`version`''' now default to '''PROJECT''' and '''VERSION''' respectively. Fixed a bug regarding '''Content-Transfer-Encoding''', it shouldn't be the charset, and we're defaulting to `8bit` untill someone complains.
palgarvio
parents:
19
diff
changeset
|
90 ... |
96
6c07c38e23aa
Updated `read_po` to add user comments besides just auto comments.
palgarvio
parents:
86
diff
changeset
|
91 ... # A user comment |
6c07c38e23aa
Updated `read_po` to add user comments besides just auto comments.
palgarvio
parents:
86
diff
changeset
|
92 ... #. An auto comment |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
93 ... #: main.py:3 |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
94 ... msgid "bar" |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
95 ... msgid_plural "baz" |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
96 ... msgstr[0] "" |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
97 ... msgstr[1] "" |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
98 ... ''') |
66 | 99 >>> catalog = read_po(buf) |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
100 >>> catalog.revision_date = datetime(2007, 04, 01) |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
101 |
66 | 102 >>> for message in catalog: |
69 | 103 ... if message.id: |
104 ... print (message.id, message.string) | |
107
4b42e23644e5
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
106
diff
changeset
|
105 ... print ' ', (message.locations, message.flags) |
4b42e23644e5
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
106
diff
changeset
|
106 ... print ' ', (message.user_comments, message.auto_comments) |
151
12e5f21dfcda
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
136
diff
changeset
|
107 (u'foo %(name)s', '') |
12e5f21dfcda
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
136
diff
changeset
|
108 ([(u'main.py', 1)], set([u'fuzzy', u'python-format'])) |
107
4b42e23644e5
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
106
diff
changeset
|
109 ([], []) |
151
12e5f21dfcda
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
136
diff
changeset
|
110 ((u'bar', u'baz'), ('', '')) |
12e5f21dfcda
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
136
diff
changeset
|
111 ([(u'main.py', 3)], set([])) |
12e5f21dfcda
Respect charset specified in PO headers in `read_po()`. Fixes #17.
cmlenz
parents:
136
diff
changeset
|
112 ([u'A user comment'], [u'An auto comment']) |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
113 |
3 | 114 :param fileobj: the file-like object to read the PO file from |
198
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
115 :param locale: the locale identifier or `Locale` object, or `None` |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
116 if the catalog is not bound to a locale (which basically |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
117 means it's a template) |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
118 :param domain: the message domain |
229 | 119 :param ignore_obsolete: whether to ignore obsolete messages in the input |
336 | 120 :return: a catalog object representing the parsed PO file |
121 :rtype: `Catalog` | |
3 | 122 """ |
198
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
123 catalog = Catalog(locale=locale, domain=domain) |
66 | 124 |
198
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
125 counter = [0] |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
126 offset = [0] |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
127 messages = [] |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
128 translations = [] |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
129 locations = [] |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
130 flags = [] |
107
4b42e23644e5
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
106
diff
changeset
|
131 user_comments = [] |
4b42e23644e5
`Message`, `read_po` and `write_po` now all handle user/auto comments correctly.
palgarvio
parents:
106
diff
changeset
|
132 auto_comments = [] |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
133 obsolete = [False] |
337 | 134 context = [] |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
135 in_msgid = [False] |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
136 in_msgstr = [False] |
344
2a7b818fa5a0
Fixed a bug in pofile (in_msgctxt was not defined). Test follows.
aronacher
parents:
337
diff
changeset
|
137 in_msgctxt = [False] |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
138 |
66 | 139 def _add_message(): |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
140 translations.sort() |
66 | 141 if len(messages) > 1: |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
142 msgid = tuple([denormalize(m) for m in messages]) |
66 | 143 else: |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
144 msgid = denormalize(messages[0]) |
372
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
145 if isinstance(msgid, (list, tuple)): |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
146 string = [] |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
147 for idx in range(catalog.num_plurals): |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
148 try: |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
149 string.append(translations[idx]) |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
150 except IndexError: |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
151 string.append((idx, '')) |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
152 string = tuple([denormalize(t[1]) for t in string]) |
66 | 153 else: |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
154 string = denormalize(translations[0][1]) |
337 | 155 if context: |
156 msgctxt = denormalize('\n'.join(context)) | |
157 else: | |
158 msgctxt = None | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
159 message = Message(msgid, string, list(locations), set(flags), |
337 | 160 auto_comments, user_comments, lineno=offset[0] + 1, |
161 context=msgctxt) | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
162 if obsolete[0]: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
163 if not ignore_obsolete: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
164 catalog.obsolete[msgid] = message |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
165 else: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
166 catalog[msgid] = message |
337 | 167 del messages[:]; del translations[:]; del context[:]; del locations[:]; |
168 del flags[:]; del auto_comments[:]; del user_comments[:]; | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
169 obsolete[0] = False |
198
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
170 counter[0] += 1 |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
171 |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
172 def _process_message_line(lineno, line): |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
173 if line.startswith('msgid_plural'): |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
174 in_msgid[0] = True |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
175 msg = line[12:].lstrip() |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
176 messages.append(msg) |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
177 elif line.startswith('msgid'): |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
178 in_msgid[0] = True |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
179 offset[0] = lineno |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
180 txt = line[5:].lstrip() |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
181 if messages: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
182 _add_message() |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
183 messages.append(txt) |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
184 elif line.startswith('msgstr'): |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
185 in_msgid[0] = False |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
186 in_msgstr[0] = True |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
187 msg = line[6:].lstrip() |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
188 if msg.startswith('['): |
443
d2e9aaa7c91c
Make sure to only strip on the first occurence of ].
jruigrok
parents:
430
diff
changeset
|
189 idx, msg = msg[1:].split(']', 1) |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
190 translations.append([int(idx), msg.lstrip()]) |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
191 else: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
192 translations.append([0, msg]) |
337 | 193 elif line.startswith('msgctxt'): |
430
70f72bc70a93
Fix for msgctxt parsing in PO files. Thanks to Asheesh Laroia for the patch. Closes #159.
cmlenz
parents:
425
diff
changeset
|
194 if messages: |
70f72bc70a93
Fix for msgctxt parsing in PO files. Thanks to Asheesh Laroia for the patch. Closes #159.
cmlenz
parents:
425
diff
changeset
|
195 _add_message() |
337 | 196 in_msgid[0] = in_msgstr[0] = False |
197 context.append(line[7:].lstrip()) | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
198 elif line.startswith('"'): |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
199 if in_msgid[0]: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
200 messages[-1] += u'\n' + line.rstrip() |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
201 elif in_msgstr[0]: |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
202 translations[-1][1] += u'\n' + line.rstrip() |
337 | 203 elif in_msgctxt[0]: |
204 context.append(line.rstrip()) | |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
205 |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
206 for lineno, line in enumerate(fileobj.readlines()): |
416
f03cc3bed4e1
fix Python 2.3 compat: rearrange set/itemgetter/rsplit/sorted/unicode.decode
pjenvey
parents:
372
diff
changeset
|
207 line = line.strip() |
f03cc3bed4e1
fix Python 2.3 compat: rearrange set/itemgetter/rsplit/sorted/unicode.decode
pjenvey
parents:
372
diff
changeset
|
208 if not isinstance(line, unicode): |
f03cc3bed4e1
fix Python 2.3 compat: rearrange set/itemgetter/rsplit/sorted/unicode.decode
pjenvey
parents:
372
diff
changeset
|
209 line = line.decode(catalog.charset) |
3 | 210 if line.startswith('#'): |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
211 in_msgid[0] = in_msgstr[0] = False |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
212 if messages and translations: |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
213 _add_message() |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
214 if line[1:].startswith(':'): |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
215 for location in line[2:].lstrip().split(): |
358
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
216 pos = location.rfind(':') |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
217 if pos >= 0: |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
218 try: |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
219 lineno = int(location[pos + 1:]) |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
220 except ValueError: |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
221 continue |
c82ad0f5ff65
Fixed #59 by falling back silently on invalid location comments.
aronacher
parents:
344
diff
changeset
|
222 locations.append((location[:pos], lineno)) |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
223 elif line[1:].startswith(','): |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
224 for flag in line[2:].lstrip().split(','): |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
225 flags.append(flag.strip()) |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
226 elif line[1:].startswith('~'): |
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
227 obsolete[0] = True |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
228 _process_message_line(lineno, line[2:].lstrip()) |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
229 elif line[1:].startswith('.'): |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
230 # These are called auto-comments |
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
231 comment = line[2:].strip() |
201
10e8d072e2d1
Handle obsolete messages when parsing catalogs. Closes #32.
cmlenz
parents:
198
diff
changeset
|
232 if comment: # Just check that we're not adding empty comments |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
233 auto_comments.append(comment) |
122 | 234 else: |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
235 # These are called user comments |
122 | 236 user_comments.append(line[1:].strip()) |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
237 else: |
222
bd8b1301b27e
Added infrastructure for adding catalog checkers, and implement a checker that validations Python format parameters in translations, closing #19.
cmlenz
parents:
205
diff
changeset
|
238 _process_message_line(lineno, line) |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
239 |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
240 if messages: |
66 | 241 _add_message() |
198
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
242 |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
243 # No actual messages found, but there was some info in comments, from which |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
244 # we'll construct an empty header message |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
245 elif not counter[0] and (flags or user_comments or auto_comments): |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
246 messages.append(u'') |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
247 translations.append([0, u'']) |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
248 _add_message() |
982d7e704fdc
Fix for #35, and a minor improvement to how we parse the catalog fuzzy bit.
cmlenz
parents:
193
diff
changeset
|
249 |
66 | 250 return catalog |
3 | 251 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
252 WORD_SEP = re.compile('(' |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
253 r'\s+|' # any whitespace |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
254 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
255 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
256 ')') |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
257 |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
258 def escape(string): |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
259 r"""Escape the given string so that it can be included in double-quoted |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
260 strings in ``PO`` files. |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
261 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
262 >>> escape('''Say: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
263 ... "hello, world!" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
264 ... ''') |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
265 '"Say:\\n \\"hello, world!\\"\\n"' |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
266 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
267 :param string: the string to escape |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
268 :return: the escaped string |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
269 :rtype: `str` or `unicode` |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
270 """ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
271 return '"%s"' % string.replace('\\', '\\\\') \ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
272 .replace('\t', '\\t') \ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
273 .replace('\r', '\\r') \ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
274 .replace('\n', '\\n') \ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
275 .replace('\"', '\\"') |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
276 |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
277 def normalize(string, prefix='', width=76): |
108
8ea225f33f28
Fix for #16: the header message (`msgid = ""`) is now treated specially by `read_po` and `Catalog`.
cmlenz
parents:
107
diff
changeset
|
278 r"""Convert a string into a format that is appropriate for .po files. |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
279 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
280 >>> print normalize('''Say: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
281 ... "hello, world!" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
282 ... ''', width=None) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
283 "" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
284 "Say:\n" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
285 " \"hello, world!\"\n" |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
286 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
287 >>> print normalize('''Say: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
288 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
289 ... ''', width=32) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
290 "" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
291 "Say:\n" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
292 " \"Lorem ipsum dolor sit " |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
293 "amet, consectetur adipisicing" |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
294 " elit, \"\n" |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
295 |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
296 :param string: the string to normalize |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
297 :param prefix: a string that should be prepended to every line |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
298 :param width: the maximum line width; use `None`, 0, or a negative number |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
299 to completely disable line wrapping |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
300 :return: the normalized string |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
301 :rtype: `unicode` |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
302 """ |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
303 if width and width > 0: |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
304 prefixlen = len(prefix) |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
305 lines = [] |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
306 for idx, line in enumerate(string.splitlines(True)): |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
307 if len(escape(line)) + prefixlen > width: |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
308 chunks = WORD_SEP.split(line) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
309 chunks.reverse() |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
310 while chunks: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
311 buf = [] |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
312 size = 2 |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
313 while chunks: |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
314 l = len(escape(chunks[-1])) - 2 + prefixlen |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
315 if size + l < width: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
316 buf.append(chunks.pop()) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
317 size += l |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
318 else: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
319 if not buf: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
320 # handle long chunks by putting them on a |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
321 # separate line |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
322 buf.append(chunks.pop()) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
323 break |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
324 lines.append(u''.join(buf)) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
325 else: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
326 lines.append(line) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
327 else: |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
328 lines = string.splitlines(True) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
329 |
69 | 330 if len(lines) <= 1: |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
331 return escape(string) |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
332 |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
333 # Remove empty trailing line |
69 | 334 if lines and not lines[-1]: |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
335 del lines[-1] |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
336 lines[-1] += '\n' |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
337 return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines]) |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
338 |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
339 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
340 sort_output=False, sort_by_file=False, ignore_obsolete=False, |
205 | 341 include_previous=False): |
58
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
342 r"""Write a ``gettext`` PO (portable object) template file for a given |
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
343 message catalog to the provided file-like object. |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
344 |
58
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
345 >>> catalog = Catalog() |
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
346 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], |
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
347 ... flags=('fuzzy',)) |
546
10de195cfb04
catalog.add() now returns the message instance (closes #245)
fschwarz
parents:
533
diff
changeset
|
348 <Message...> |
58
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
349 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) |
546
10de195cfb04
catalog.add() now returns the message instance (closes #245)
fschwarz
parents:
533
diff
changeset
|
350 <Message...> |
3 | 351 >>> from StringIO import StringIO |
352 >>> buf = StringIO() | |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
353 >>> write_po(buf, catalog, omit_header=True) |
3 | 354 >>> print buf.getvalue() |
355 #: main.py:1 | |
8
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
356 #, fuzzy, python-format |
ff5481545bfd
Add basic PO file parsing, and change the PO writing procedure to also take flags (such as "python-format" or "fuzzy").
cmlenz
parents:
7
diff
changeset
|
357 msgid "foo %(name)s" |
3 | 358 msgstr "" |
359 <BLANKLINE> | |
360 #: main.py:3 | |
361 msgid "bar" | |
362 msgid_plural "baz" | |
363 msgstr[0] "" | |
364 msgstr[1] "" | |
365 <BLANKLINE> | |
366 <BLANKLINE> | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
367 |
3 | 368 :param fileobj: the file-like object to write to |
69 | 369 :param catalog: the `Catalog` instance |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
370 :param width: the maximum line width for the generated output; use `None`, |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
371 0, or a negative number to completely disable line wrapping |
3 | 372 :param no_location: do not emit a location comment for every message |
373 :param omit_header: do not include the ``msgid ""`` entry at the top of the | |
374 output | |
229 | 375 :param sort_output: whether to sort the messages in the output by msgid |
376 :param sort_by_file: whether to sort the messages in the output by their | |
377 locations | |
378 :param ignore_obsolete: whether to ignore obsolete messages and not include | |
379 them in the output; by default they are included as | |
380 comments | |
205 | 381 :param include_previous: include the old msgid as a comment when |
231 | 382 updating the catalog |
3 | 383 """ |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
384 def _normalize(key, prefix=''): |
549
12d5425fb430
babel.messages.pofile should only apply encoding when actually writing a file (eases Python 3 transition, closes #251)
fschwarz
parents:
546
diff
changeset
|
385 return normalize(key, prefix=prefix, width=width) |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
386 |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
387 def _write(text): |
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
388 if isinstance(text, unicode): |
549
12d5425fb430
babel.messages.pofile should only apply encoding when actually writing a file (eases Python 3 transition, closes #251)
fschwarz
parents:
546
diff
changeset
|
389 text = text.encode(catalog.charset, 'backslashreplace') |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
390 fileobj.write(text) |
3 | 391 |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
392 def _write_comment(comment, prefix=''): |
425
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
393 # xgettext always wraps comments even if --no-wrap is passed; |
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
394 # provide the same behaviour |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
395 if width and width > 0: |
425
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
396 _width = width |
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
397 else: |
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
398 _width = 76 |
15541acbe8cb
Now, the `--width` option, although with a default value of 76, it's not set to any value initially so that the `--no-wrap` option can be passed without throwing an error. Fixes #145.
palgarvio
parents:
423
diff
changeset
|
399 for line in wraptext(comment, _width): |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
400 _write('#%s %s\n' % (prefix, line.strip())) |
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
401 |
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
402 def _write_message(message, prefix=''): |
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
403 if isinstance(message.id, (list, tuple)): |
423 | 404 if message.context: |
405 _write('%smsgctxt %s\n' % (prefix, | |
406 _normalize(message.context, prefix))) | |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
407 _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix))) |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
408 _write('%smsgid_plural %s\n' % ( |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
409 prefix, _normalize(message.id[1], prefix) |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
410 )) |
372
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
411 |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
412 for idx in range(catalog.num_plurals): |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
413 try: |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
414 string = message.string[idx] |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
415 except IndexError: |
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
416 string = '' |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
417 _write('%smsgstr[%d] %s\n' % ( |
372
d1a9c618d2d5
We no longer neglect `catalog.plurals`. Added tests for it. Fixes #120.
palgarvio
parents:
358
diff
changeset
|
418 prefix, idx, _normalize(string, prefix) |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
419 )) |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
420 else: |
423 | 421 if message.context: |
422 _write('%smsgctxt %s\n' % (prefix, | |
423 _normalize(message.context, prefix))) | |
192
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
424 _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix))) |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
425 _write('%smsgstr %s\n' % ( |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
426 prefix, _normalize(message.string or '', prefix) |
8f5805197198
Correctly write out obsolete messages spanning multiple lines. Fixes #33.
cmlenz
parents:
183
diff
changeset
|
427 )) |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
428 |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
429 messages = list(catalog) |
73 | 430 if sort_output: |
250
194f927d8c5a
add a __cmp__ to Message that correctly sorts by id, taking into account plurals
pjenvey
parents:
231
diff
changeset
|
431 messages.sort() |
73 | 432 elif sort_by_file: |
433 messages.sort(lambda x,y: cmp(x.locations, y.locations)) | |
70 | 434 |
73 | 435 for message in messages: |
69 | 436 if not message.id: # This is the header "message" |
437 if omit_header: | |
438 continue | |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
439 comment_header = catalog.header_comment |
105
abd3a594dab4
Implement wrapping of header comments in PO(T) output. Related to #14.
cmlenz
parents:
104
diff
changeset
|
440 if width and width > 0: |
abd3a594dab4
Implement wrapping of header comments in PO(T) output. Related to #14.
cmlenz
parents:
104
diff
changeset
|
441 lines = [] |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
442 for line in comment_header.splitlines(): |
317 | 443 lines += wraptext(line, width=width, |
444 subsequent_indent='# ') | |
106
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
445 comment_header = u'\n'.join(lines) + u'\n' |
2a00e352c986
Merged `write_pot` and `write_po` functions by moving more functionality to the `Catalog` class. This is certainly not perfect yet, but moves us in the right direction.
cmlenz
parents:
105
diff
changeset
|
446 _write(comment_header) |
104
57d2f21a1fcc
Project name and version, and the charset are available via the `Catalog` object, and do not need to be passed to `write_pot()`.
cmlenz
parents:
99
diff
changeset
|
447 |
229 | 448 for comment in message.user_comments: |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
449 _write_comment(comment) |
229 | 450 for comment in message.auto_comments: |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
451 _write_comment(comment, prefix='.') |
3 | 452 |
453 if not no_location: | |
136 | 454 locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) |
455 for filename, lineno in message.locations]) | |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
456 _write_comment(locs, prefix=':') |
58
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
457 if message.flags: |
068952b4d4c0
Add actual data structures for handling message catalogs, so that more code can be reused here between the frontends.
cmlenz
parents:
57
diff
changeset
|
458 _write('#%s\n' % ', '.join([''] + list(message.flags))) |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
459 |
205 | 460 if message.previous_id and include_previous: |
311
7e460ba1aabe
Fix for unicode problem when the previous message id is included as a comment in PO serialization. Closes #78.
cmlenz
parents:
250
diff
changeset
|
461 _write_comment('msgid %s' % _normalize(message.previous_id[0]), |
205 | 462 prefix='|') |
463 if len(message.previous_id) > 1: | |
311
7e460ba1aabe
Fix for unicode problem when the previous message id is included as a comment in PO serialization. Closes #78.
cmlenz
parents:
250
diff
changeset
|
464 _write_comment('msgid_plural %s' % _normalize( |
205 | 465 message.previous_id[1] |
466 ), prefix='|') | |
202
d3c272492053
Added `--no-fuzzy-matching` to the frontends and also `--previous` which adds the old msgid's as comments. The latest closes #31.
palgarvio
parents:
201
diff
changeset
|
467 |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
468 _write_message(message) |
26
93eaa2f4a0a2
Reimplement line wrapping for PO writing (as the `textwrap` module is too destructive with white space) and move it to the `normalize` function (which was already doing some handling of line breaks).
cmlenz
parents:
25
diff
changeset
|
469 _write('\n') |
183
e927dffc9ab4
The frontends now provide ways to update existing translations catalogs from a template. Closes #22.
cmlenz
parents:
180
diff
changeset
|
470 |
193
b5e58a22ebd2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
192
diff
changeset
|
471 if not ignore_obsolete: |
b5e58a22ebd2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
192
diff
changeset
|
472 for message in catalog.obsolete.values(): |
229 | 473 for comment in message.user_comments: |
193
b5e58a22ebd2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
192
diff
changeset
|
474 _write_comment(comment) |
b5e58a22ebd2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
192
diff
changeset
|
475 _write_message(message, prefix='#~ ') |
b5e58a22ebd2
Add an option to the frontend commands for catalog updating that removes completely any obsolete messages, instead of putting them comments.
cmlenz
parents:
192
diff
changeset
|
476 _write('\n') |