Mercurial > babel > old > mirror
comparison 0.8.x/babel/messages/pofile.py @ 142:4a7af44e6695 stable
Create branch for 0.8.x releases.
author | cmlenz |
---|---|
date | Wed, 20 Jun 2007 10:09:07 +0000 |
parents | |
children | 90866b11734f |
comparison
equal
deleted
inserted
replaced
1:bf36ec5f5e50 | 142:4a7af44e6695 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Reading and writing of files in the ``gettext`` PO (portable object) | |
15 format. | |
16 | |
17 :see: `The Format of PO Files | |
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ | |
19 """ | |
20 | |
21 from datetime import date, datetime | |
22 import os | |
23 import re | |
24 try: | |
25 set | |
26 except NameError: | |
27 from sets import Set as set | |
28 from textwrap import wrap | |
29 | |
30 from babel import __version__ as VERSION | |
31 from babel.messages.catalog import Catalog | |
32 from babel.util import LOCALTZ | |
33 | |
34 __all__ = ['escape', 'normalize', 'read_po', 'write_po'] | |
35 | |
36 def read_po(fileobj): | |
37 """Read messages from a ``gettext`` PO (portable object) file from the given | |
38 file-like object and return a `Catalog`. | |
39 | |
40 >>> from StringIO import StringIO | |
41 >>> buf = StringIO(''' | |
42 ... #: main.py:1 | |
43 ... #, fuzzy, python-format | |
44 ... msgid "foo %(name)s" | |
45 ... msgstr "" | |
46 ... | |
47 ... # A user comment | |
48 ... #. An auto comment | |
49 ... #: main.py:3 | |
50 ... msgid "bar" | |
51 ... msgid_plural "baz" | |
52 ... msgstr[0] "" | |
53 ... msgstr[1] "" | |
54 ... ''') | |
55 >>> catalog = read_po(buf) | |
56 >>> catalog.revision_date = datetime(2007, 04, 01) | |
57 | |
58 >>> for message in catalog: | |
59 ... if message.id: | |
60 ... print (message.id, message.string) | |
61 ... print ' ', (message.locations, message.flags) | |
62 ... print ' ', (message.user_comments, message.auto_comments) | |
63 ('foo %(name)s', '') | |
64 ([('main.py', 1)], set(['fuzzy', 'python-format'])) | |
65 ([], []) | |
66 (('bar', 'baz'), ('', '')) | |
67 ([('main.py', 3)], set([])) | |
68 (['A user comment'], ['An auto comment']) | |
69 | |
70 :param fileobj: the file-like object to read the PO file from | |
71 :return: an iterator over ``(message, translation, location)`` tuples | |
72 :rtype: ``iterator`` | |
73 """ | |
74 catalog = Catalog() | |
75 | |
76 messages = [] | |
77 translations = [] | |
78 locations = [] | |
79 flags = [] | |
80 user_comments = [] | |
81 auto_comments = [] | |
82 in_msgid = in_msgstr = False | |
83 | |
84 def _add_message(): | |
85 translations.sort() | |
86 if len(messages) > 1: | |
87 msgid = tuple([denormalize(m) for m in messages]) | |
88 else: | |
89 msgid = denormalize(messages[0]) | |
90 if len(translations) > 1: | |
91 string = tuple([denormalize(t[1]) for t in translations]) | |
92 else: | |
93 string = denormalize(translations[0][1]) | |
94 catalog.add(msgid, string, list(locations), set(flags), | |
95 list(auto_comments), list(user_comments)) | |
96 del messages[:]; del translations[:]; del locations[:]; | |
97 del flags[:]; del auto_comments[:]; del user_comments[:] | |
98 | |
99 for line in fileobj.readlines(): | |
100 line = line.strip() | |
101 if line.startswith('#'): | |
102 in_msgid = in_msgstr = False | |
103 if messages: | |
104 _add_message() | |
105 if line[1:].startswith(':'): | |
106 for location in line[2:].lstrip().split(): | |
107 filename, lineno = location.split(':', 1) | |
108 locations.append((filename, int(lineno))) | |
109 elif line[1:].startswith(','): | |
110 for flag in line[2:].lstrip().split(','): | |
111 flags.append(flag.strip()) | |
112 elif line[1:].startswith('.'): | |
113 # These are called auto-comments | |
114 comment = line[2:].strip() | |
115 if comment: | |
116 # Just check that we're not adding empty comments | |
117 auto_comments.append(comment) | |
118 else: | |
119 # These are called user comments | |
120 user_comments.append(line[1:].strip()) | |
121 else: | |
122 if line.startswith('msgid_plural'): | |
123 in_msgid = True | |
124 msg = line[12:].lstrip() | |
125 messages.append(msg) | |
126 elif line.startswith('msgid'): | |
127 in_msgid = True | |
128 if messages: | |
129 _add_message() | |
130 messages.append(line[5:].lstrip()) | |
131 elif line.startswith('msgstr'): | |
132 in_msgid = False | |
133 in_msgstr = True | |
134 msg = line[6:].lstrip() | |
135 if msg.startswith('['): | |
136 idx, msg = msg[1:].split(']') | |
137 translations.append([int(idx), msg.lstrip()]) | |
138 else: | |
139 translations.append([0, msg]) | |
140 elif line.startswith('"'): | |
141 if in_msgid: | |
142 messages[-1] += u'\n' + line.rstrip() | |
143 elif in_msgstr: | |
144 translations[-1][1] += u'\n' + line.rstrip() | |
145 | |
146 if messages: | |
147 _add_message() | |
148 return catalog | |
149 | |
150 WORD_SEP = re.compile('(' | |
151 r'\s+|' # any whitespace | |
152 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words | |
153 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash | |
154 ')') | |
155 | |
156 def escape(string): | |
157 r"""Escape the given string so that it can be included in double-quoted | |
158 strings in ``PO`` files. | |
159 | |
160 >>> escape('''Say: | |
161 ... "hello, world!" | |
162 ... ''') | |
163 '"Say:\\n \\"hello, world!\\"\\n"' | |
164 | |
165 :param string: the string to escape | |
166 :return: the escaped string | |
167 :rtype: `str` or `unicode` | |
168 """ | |
169 return '"%s"' % string.replace('\\', '\\\\') \ | |
170 .replace('\t', '\\t') \ | |
171 .replace('\r', '\\r') \ | |
172 .replace('\n', '\\n') \ | |
173 .replace('\"', '\\"') | |
174 | |
175 def unescape(string): | |
176 r"""Reverse escape the given string. | |
177 | |
178 >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"') | |
179 Say: | |
180 "hello, world!" | |
181 <BLANKLINE> | |
182 | |
183 :param string: the string to unescape | |
184 :return: the unescaped string | |
185 :rtype: `str` or `unicode` | |
186 """ | |
187 return string[1:-1].replace('\\\\', '\\') \ | |
188 .replace('\\t', '\t') \ | |
189 .replace('\\r', '\r') \ | |
190 .replace('\\n', '\n') \ | |
191 .replace('\\"', '\"') | |
192 | |
193 def normalize(string, width=76): | |
194 r"""Convert a string into a format that is appropriate for .po files. | |
195 | |
196 >>> print normalize('''Say: | |
197 ... "hello, world!" | |
198 ... ''', width=None) | |
199 "" | |
200 "Say:\n" | |
201 " \"hello, world!\"\n" | |
202 | |
203 >>> print normalize('''Say: | |
204 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " | |
205 ... ''', width=32) | |
206 "" | |
207 "Say:\n" | |
208 " \"Lorem ipsum dolor sit " | |
209 "amet, consectetur adipisicing" | |
210 " elit, \"\n" | |
211 | |
212 :param string: the string to normalize | |
213 :param width: the maximum line width; use `None`, 0, or a negative number | |
214 to completely disable line wrapping | |
215 :return: the normalized string | |
216 :rtype: `unicode` | |
217 """ | |
218 if width and width > 0: | |
219 lines = [] | |
220 for idx, line in enumerate(string.splitlines(True)): | |
221 if len(escape(line)) > width: | |
222 chunks = WORD_SEP.split(line) | |
223 chunks.reverse() | |
224 while chunks: | |
225 buf = [] | |
226 size = 2 | |
227 while chunks: | |
228 l = len(escape(chunks[-1])) - 2 | |
229 if size + l < width: | |
230 buf.append(chunks.pop()) | |
231 size += l | |
232 else: | |
233 if not buf: | |
234 # handle long chunks by putting them on a | |
235 # separate line | |
236 buf.append(chunks.pop()) | |
237 break | |
238 lines.append(u''.join(buf)) | |
239 else: | |
240 lines.append(line) | |
241 else: | |
242 lines = string.splitlines(True) | |
243 | |
244 if len(lines) <= 1: | |
245 return escape(string) | |
246 | |
247 # Remove empty trailing line | |
248 if lines and not lines[-1]: | |
249 del lines[-1] | |
250 lines[-1] += '\n' | |
251 return u'""\n' + u'\n'.join([escape(l) for l in lines]) | |
252 | |
253 def denormalize(string): | |
254 r"""Reverse the normalization done by the `normalize` function. | |
255 | |
256 >>> print denormalize(r'''"" | |
257 ... "Say:\n" | |
258 ... " \"hello, world!\"\n"''') | |
259 Say: | |
260 "hello, world!" | |
261 <BLANKLINE> | |
262 | |
263 >>> print denormalize(r'''"" | |
264 ... "Say:\n" | |
265 ... " \"Lorem ipsum dolor sit " | |
266 ... "amet, consectetur adipisicing" | |
267 ... " elit, \"\n"''') | |
268 Say: | |
269 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " | |
270 <BLANKLINE> | |
271 | |
272 :param string: the string to denormalize | |
273 :return: the denormalized string | |
274 :rtype: `unicode` or `str` | |
275 """ | |
276 if string.startswith('""'): | |
277 lines = [] | |
278 for line in string.splitlines()[1:]: | |
279 lines.append(unescape(line)) | |
280 return ''.join(lines) | |
281 else: | |
282 return unescape(string) | |
283 | |
284 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False, | |
285 sort_output=False, sort_by_file=False): | |
286 r"""Write a ``gettext`` PO (portable object) template file for a given | |
287 message catalog to the provided file-like object. | |
288 | |
289 >>> catalog = Catalog() | |
290 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)], | |
291 ... flags=('fuzzy',)) | |
292 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)]) | |
293 >>> from StringIO import StringIO | |
294 >>> buf = StringIO() | |
295 >>> write_po(buf, catalog, omit_header=True) | |
296 >>> print buf.getvalue() | |
297 #: main.py:1 | |
298 #, fuzzy, python-format | |
299 msgid "foo %(name)s" | |
300 msgstr "" | |
301 <BLANKLINE> | |
302 #: main.py:3 | |
303 msgid "bar" | |
304 msgid_plural "baz" | |
305 msgstr[0] "" | |
306 msgstr[1] "" | |
307 <BLANKLINE> | |
308 <BLANKLINE> | |
309 | |
310 :param fileobj: the file-like object to write to | |
311 :param catalog: the `Catalog` instance | |
312 :param width: the maximum line width for the generated output; use `None`, | |
313 0, or a negative number to completely disable line wrapping | |
314 :param no_location: do not emit a location comment for every message | |
315 :param omit_header: do not include the ``msgid ""`` entry at the top of the | |
316 output | |
317 """ | |
318 def _normalize(key): | |
319 return normalize(key, width=width).encode(catalog.charset, | |
320 'backslashreplace') | |
321 | |
322 def _write(text): | |
323 if isinstance(text, unicode): | |
324 text = text.encode(catalog.charset) | |
325 fileobj.write(text) | |
326 | |
327 messages = list(catalog) | |
328 if sort_output: | |
329 messages.sort(lambda x,y: cmp(x.id, y.id)) | |
330 elif sort_by_file: | |
331 messages.sort(lambda x,y: cmp(x.locations, y.locations)) | |
332 | |
333 for message in messages: | |
334 if not message.id: # This is the header "message" | |
335 if omit_header: | |
336 continue | |
337 comment_header = catalog.header_comment | |
338 if width and width > 0: | |
339 lines = [] | |
340 for line in comment_header.splitlines(): | |
341 lines += wrap(line, width=width, subsequent_indent='# ', | |
342 break_long_words=False) | |
343 comment_header = u'\n'.join(lines) + u'\n' | |
344 _write(comment_header) | |
345 | |
346 if message.user_comments: | |
347 for comment in message.user_comments: | |
348 for line in wrap(comment, width, break_long_words=False): | |
349 _write('# %s\n' % line.strip()) | |
350 | |
351 if message.auto_comments: | |
352 for comment in message.auto_comments: | |
353 for line in wrap(comment, width, break_long_words=False): | |
354 _write('#. %s\n' % line.strip()) | |
355 | |
356 if not no_location: | |
357 locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno) | |
358 for filename, lineno in message.locations]) | |
359 if width and width > 0: | |
360 locs = wrap(locs, width, break_long_words=False) | |
361 for line in locs: | |
362 _write('#: %s\n' % line.strip()) | |
363 if message.flags: | |
364 _write('#%s\n' % ', '.join([''] + list(message.flags))) | |
365 | |
366 if isinstance(message.id, (list, tuple)): | |
367 _write('msgid %s\n' % _normalize(message.id[0])) | |
368 _write('msgid_plural %s\n' % _normalize(message.id[1])) | |
369 for i, string in enumerate(message.string): | |
370 _write('msgstr[%d] %s\n' % (i, _normalize(message.string[i]))) | |
371 else: | |
372 _write('msgid %s\n' % _normalize(message.id)) | |
373 _write('msgstr %s\n' % _normalize(message.string or '')) | |
374 _write('\n') |