Mercurial > babel > old > mirror
comparison babel/messages/pofile.py @ 56:27d55a07c897
Rename the `babel.catalog` package to `babel.messages` for consistency with the other package names.
author | cmlenz |
---|---|
date | Fri, 08 Jun 2007 09:16:32 +0000 |
parents | babel/catalog/pofile.py@52dbebdd3789 |
children | e7080996fc46 |
comparison
equal
deleted
inserted
replaced
55:b298e583d326 | 56:27d55a07c897 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # | |
3 # Copyright (C) 2007 Edgewall Software | |
4 # All rights reserved. | |
5 # | |
6 # This software is licensed as described in the file COPYING, which | |
7 # you should have received as part of this distribution. The terms | |
8 # are also available at http://babel.edgewall.org/wiki/License. | |
9 # | |
10 # This software consists of voluntary contributions made by many | |
11 # individuals. For the exact contribution history, see the revision | |
12 # history and logs, available at http://babel.edgewall.org/log/. | |
13 | |
14 """Reading and writing of files in the ``gettext`` PO (portable object) | |
15 format. | |
16 | |
17 :see: `The Format of PO Files | |
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_ | |
19 """ | |
20 | |
21 from datetime import date, datetime | |
22 import re | |
23 try: | |
24 set | |
25 except NameError: | |
26 from sets import Set as set | |
27 import textwrap | |
28 import time | |
29 | |
30 from babel import __version__ as VERSION | |
31 | |
32 __all__ = ['escape', 'normalize', 'read_po', 'write_po', 'write_pot'] | |
33 | |
34 def read_po(fileobj): | |
35 """Read messages from a ``gettext`` PO (portable object) file from the given | |
36 file-like object. | |
37 | |
38 This function yields tuples of the form: | |
39 | |
40 ``(message, translation, locations, flags)`` | |
41 | |
42 where: | |
43 | |
44 * ``message`` is the original (untranslated) message, or a | |
45 ``(singular, plural)`` tuple for pluralizable messages | |
46 * ``translation`` is the translation of the message, or a tuple of | |
47 translations for pluralizable messages | |
48 * ``locations`` is a sequence of ``(filename, lineno)`` tuples | |
49 * ``flags`` is a set of strings (for exampe, "fuzzy") | |
50 | |
51 >>> from StringIO import StringIO | |
52 >>> buf = StringIO(''' | |
53 ... #: main.py:1 | |
54 ... #, fuzzy, python-format | |
55 ... msgid "foo %(name)s" | |
56 ... msgstr "" | |
57 ... | |
58 ... #: main.py:3 | |
59 ... msgid "bar" | |
60 ... msgid_plural "baz" | |
61 ... msgstr[0] "" | |
62 ... msgstr[1] "" | |
63 ... ''') | |
64 >>> for message, translation, locations, flags in read_po(buf): | |
65 ... print (message, translation) | |
66 ... print ' ', (locations, flags) | |
67 (('foo %(name)s',), ('',)) | |
68 ((('main.py', 1),), set(['fuzzy', 'python-format'])) | |
69 (('bar', 'baz'), ('', '')) | |
70 ((('main.py', 3),), set([])) | |
71 | |
72 :param fileobj: the file-like object to read the PO file from | |
73 :return: an iterator over ``(message, translation, location)`` tuples | |
74 :rtype: ``iterator`` | |
75 """ | |
76 messages = [] | |
77 translations = [] | |
78 locations = [] | |
79 flags = [] | |
80 in_msgid = in_msgstr = False | |
81 | |
82 def pack(): | |
83 translations.sort() | |
84 retval = (tuple(messages), tuple([t[1] for t in translations]), | |
85 tuple(locations), set(flags)) | |
86 del messages[:] | |
87 del translations[:] | |
88 del locations[:] | |
89 del flags[:] | |
90 return retval | |
91 | |
92 for line in fileobj.readlines(): | |
93 line = line.strip() | |
94 if line.startswith('#'): | |
95 in_msgid = in_msgstr = False | |
96 if messages: | |
97 yield pack() | |
98 if line[1:].startswith(':'): | |
99 for location in line[2:].lstrip().split(): | |
100 filename, lineno = location.split(':', 1) | |
101 locations.append((filename, int(lineno))) | |
102 elif line[1:].startswith(','): | |
103 for flag in line[2:].lstrip().split(','): | |
104 flags.append(flag.strip()) | |
105 elif line: | |
106 if line.startswith('msgid_plural'): | |
107 in_msgid = True | |
108 msg = line[12:].lstrip() | |
109 messages.append(msg[1:-1]) | |
110 elif line.startswith('msgid'): | |
111 in_msgid = True | |
112 if messages: | |
113 yield pack() | |
114 msg = line[5:].lstrip() | |
115 messages.append(msg[1:-1]) | |
116 elif line.startswith('msgstr'): | |
117 in_msgid = False | |
118 in_msgstr = True | |
119 msg = line[6:].lstrip() | |
120 if msg.startswith('['): | |
121 idx, msg = msg[1:].split(']') | |
122 translations.append([int(idx), msg.lstrip()[1:-1]]) | |
123 else: | |
124 translations.append([0, msg[1:-1]]) | |
125 elif line.startswith('"'): | |
126 if in_msgid: | |
127 messages[-1] += line.rstrip()[1:-1] | |
128 elif in_msgstr: | |
129 translations[-1][1] += line.rstrip()[1:-1] | |
130 | |
131 if messages: | |
132 yield pack() | |
133 | |
134 POT_HEADER = """\ | |
135 # Translations Template for %%(project)s. | |
136 # Copyright (C) %%(year)s ORGANIZATION | |
137 # This file is distributed under the same license as the | |
138 # %%(project)s project. | |
139 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. | |
140 # | |
141 #, fuzzy | |
142 msgid "" | |
143 msgstr "" | |
144 "Project-Id-Version: %%(project)s %%(version)s\\n" | |
145 "POT-Creation-Date: %%(creation_date)s\\n" | |
146 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" | |
147 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" | |
148 "Language-Team: LANGUAGE <LL@li.org>\\n" | |
149 "MIME-Version: 1.0\\n" | |
150 "Content-Type: text/plain; charset=%%(charset)s\\n" | |
151 "Content-Transfer-Encoding: 8bit\\n" | |
152 "Generated-By: Babel %s\\n" | |
153 | |
154 """ % VERSION | |
155 | |
156 PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search | |
157 | |
158 WORD_SEP = re.compile('(' | |
159 r'\s+|' # any whitespace | |
160 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words | |
161 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash | |
162 ')') | |
163 | |
164 def escape(string): | |
165 r"""Escape the given string so that it can be included in double-quoted | |
166 strings in ``PO`` files. | |
167 | |
168 >>> escape('''Say: | |
169 ... "hello, world!" | |
170 ... ''') | |
171 '"Say:\\n \\"hello, world!\\"\\n"' | |
172 | |
173 :param string: the string to escape | |
174 :return: the escaped string | |
175 :rtype: `str` or `unicode` | |
176 """ | |
177 return '"%s"' % string.replace('\\', '\\\\') \ | |
178 .replace('\t', '\\t') \ | |
179 .replace('\r', '\\r') \ | |
180 .replace('\n', '\\n') \ | |
181 .replace('\"', '\\"') | |
182 | |
183 def normalize(string, width=76): | |
184 r"""This converts a string into a format that is appropriate for .po files. | |
185 | |
186 >>> print normalize('''Say: | |
187 ... "hello, world!" | |
188 ... ''', width=None) | |
189 "" | |
190 "Say:\n" | |
191 " \"hello, world!\"\n" | |
192 | |
193 >>> print normalize('''Say: | |
194 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, " | |
195 ... ''', width=32) | |
196 "" | |
197 "Say:\n" | |
198 " \"Lorem ipsum dolor sit " | |
199 "amet, consectetur adipisicing" | |
200 " elit, \"\n" | |
201 | |
202 :param string: the string to normalize | |
203 :param width: the maximum line width; use `None`, 0, or a negative number | |
204 to completely disable line wrapping | |
205 :return: the normalized string | |
206 :rtype: `unicode` | |
207 """ | |
208 if width and width > 0: | |
209 lines = [] | |
210 for idx, line in enumerate(string.splitlines(True)): | |
211 if len(escape(line)) > width: | |
212 chunks = WORD_SEP.split(line) | |
213 chunks.reverse() | |
214 while chunks: | |
215 buf = [] | |
216 size = 2 | |
217 while chunks: | |
218 l = len(escape(chunks[-1])) - 2 | |
219 if size + l < width: | |
220 buf.append(chunks.pop()) | |
221 size += l | |
222 else: | |
223 if not buf: | |
224 # handle long chunks by putting them on a | |
225 # separate line | |
226 buf.append(chunks.pop()) | |
227 break | |
228 lines.append(u''.join(buf)) | |
229 else: | |
230 lines.append(line) | |
231 else: | |
232 lines = string.splitlines(True) | |
233 | |
234 if len(lines) == 1: | |
235 return escape(string) | |
236 | |
237 # Remove empty trailing line | |
238 if not lines[-1]: | |
239 del lines[-1] | |
240 lines[-1] += '\n' | |
241 return u'""\n' + u'\n'.join([escape(l) for l in lines]) | |
242 | |
243 def write_pot(fileobj, messages, project='PROJECT', version='VERSION', width=76, | |
244 charset='utf-8', no_location=False, omit_header=False): | |
245 r"""Write a ``gettext`` PO (portable object) template file to the given | |
246 file-like object. | |
247 | |
248 The `messages` parameter is expected to be an iterable object producing | |
249 tuples of the form: | |
250 | |
251 ``(filename, lineno, funcname, message, flags)`` | |
252 | |
253 >>> from StringIO import StringIO | |
254 >>> buf = StringIO() | |
255 >>> write_pot(buf, [ | |
256 ... ('main.py', 1, None, u'foo %(name)s', ('fuzzy',)), | |
257 ... ('main.py', 3, 'ngettext', (u'bar', u'baz'), None) | |
258 ... ], omit_header=True) | |
259 | |
260 >>> print buf.getvalue() | |
261 #: main.py:1 | |
262 #, fuzzy, python-format | |
263 msgid "foo %(name)s" | |
264 msgstr "" | |
265 <BLANKLINE> | |
266 #: main.py:3 | |
267 msgid "bar" | |
268 msgid_plural "baz" | |
269 msgstr[0] "" | |
270 msgstr[1] "" | |
271 <BLANKLINE> | |
272 <BLANKLINE> | |
273 | |
274 :param fileobj: the file-like object to write to | |
275 :param messages: an iterable over the messages | |
276 :param project: the project name | |
277 :param version: the project version | |
278 :param width: the maximum line width for the generated output; use `None`, | |
279 0, or a negative number to completely disable line wrapping | |
280 :param charset: the encoding | |
281 :param no_location: do not emit a location comment for every message | |
282 :param omit_header: do not include the ``msgid ""`` entry at the top of the | |
283 output | |
284 """ | |
285 def _normalize(key): | |
286 return normalize(key, width=width).encode(charset, 'backslashreplace') | |
287 | |
288 def _write(text): | |
289 if isinstance(text, unicode): | |
290 text = text.encode(charset) | |
291 fileobj.write(text) | |
292 | |
293 if not omit_header: | |
294 _write(POT_HEADER % { | |
295 'year': time.strftime('%Y'), | |
296 'project': project, | |
297 'version': version, | |
298 'creation_date': time.strftime('%Y-%m-%d %H:%M%z'), | |
299 'charset': charset, | |
300 }) | |
301 | |
302 locations = {} | |
303 msgflags = {} | |
304 msgids = [] | |
305 plurals = {} | |
306 | |
307 for filename, lineno, funcname, key, flags in messages: | |
308 flags = set(flags or []) | |
309 if isinstance(key, (list, tuple)): | |
310 assert len(key) == 2 | |
311 plurals[key[0]] = key[1] | |
312 key = key[0] | |
313 if key in msgids: | |
314 locations[key].append((filename, lineno)) | |
315 msgflags[key] |= flags | |
316 else: | |
317 if PYTHON_FORMAT(key): | |
318 flags.add('python-format') | |
319 else: | |
320 flags.discard('python-format') | |
321 | |
322 locations[key] = [(filename, lineno)] | |
323 msgflags[key] = flags | |
324 msgids.append(key) | |
325 | |
326 for msgid in msgids: | |
327 if not no_location: | |
328 locs = u' '.join([u'%s:%d' % item for item in locations[msgid]]) | |
329 if width and width > 0: | |
330 locs = textwrap.wrap(locs, width, break_long_words=False) | |
331 for line in locs: | |
332 _write('#: %s\n' % line.strip()) | |
333 flags = msgflags[msgid] | |
334 if flags: | |
335 _write('#%s\n' % ', '.join([''] + list(flags))) | |
336 | |
337 if plurals.has_key(msgid): | |
338 _write('msgid %s\n' % _normalize(msgid)) | |
339 _write('msgid_plural %s\n' % _normalize(plurals[msgid])) | |
340 _write('msgstr[0] ""\n') | |
341 _write('msgstr[1] ""\n') | |
342 else: | |
343 _write('msgid %s\n' % _normalize(msgid)) | |
344 _write('msgstr ""\n') | |
345 _write('\n') | |
346 | |
347 def write_po(fileobj, input_fileobj, language, country=None, project='PROJECT', | |
348 version='VERSION', first_author=None, first_author_email=None, | |
349 plurals=('INTEGER', 'EXPRESSION')): | |
350 r"""Write a ``gettext`` PO (portable object) file to the given file-like | |
351 object, from the given input PO template file. | |
352 | |
353 >>> from StringIO import StringIO | |
354 >>> inbuf = StringIO(r'''# Translations Template for FooBar. | |
355 ... # Copyright (C) 2007 ORGANIZATION | |
356 ... # This file is distributed under the same license as the | |
357 ... # FooBar project. | |
358 ... # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. | |
359 ... # | |
360 ... #, fuzzy | |
361 ... msgid "" | |
362 ... msgstr "" | |
363 ... "Project-Id-Version: FooBar 0.1\n" | |
364 ... "POT-Creation-Date: 2007-06-07 22:54+0100\n" | |
365 ... "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" | |
366 ... "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" | |
367 ... "Language-Team: LANGUAGE <LL@li.org>\n" | |
368 ... "MIME-Version: 1.0\n" | |
369 ... "Content-Type: text/plain; charset=utf-8\n" | |
370 ... "Content-Transfer-Encoding: 8bit\n" | |
371 ... "Generated-By: Babel 0.1dev-r50\n" | |
372 ... | |
373 ... #: base.py:83 templates/index.html:9 | |
374 ... #: templates/index2.html:9 | |
375 ... msgid "Home" | |
376 ... msgstr "" | |
377 ... | |
378 ... #: base.py:84 templates/index.html:9 | |
379 ... msgid "Accounts" | |
380 ... msgstr "" | |
381 ... ''') | |
382 >>> outbuf = StringIO() | |
383 >>> write_po(outbuf, inbuf, 'English', project='FooBar', | |
384 ... version='0.1', first_author='A Name', | |
385 ... first_author_email='user@domain.tld', | |
386 ... plurals=(2, '(n != 1)')) | |
387 >>> print outbuf.getvalue() # doctest: +ELLIPSIS | |
388 # English Translations for FooBar | |
389 # Copyright (C) 2007 ORGANIZATION | |
390 # This file is distributed under the same license as the | |
391 # FooBar project. | |
392 # A Name <user@domain.tld>, ... | |
393 # | |
394 #, fuzzy | |
395 msgid "" | |
396 msgstr "" | |
397 "Project-Id-Version: FooBar 0.1\n" | |
398 "POT-Creation-Date: 2007-06-07 22:54+0100\n" | |
399 "PO-Revision-Date: ...\n" | |
400 "Last-Translator: A Name <user@domain.tld>\n" | |
401 "Language-Team: LANGUAGE <LL@li.org>\n" | |
402 "MIME-Version: 1.0\n" | |
403 "Content-Type: text/plain; charset=utf-8\n" | |
404 "Content-Transfer-Encoding: 8bit\n" | |
405 "Plural-Forms: nplurals=2; plural=(n != 1);\n" | |
406 "Generated-By: Babel ...\n" | |
407 <BLANKLINE> | |
408 #: base.py:83 templates/index.html:9 | |
409 #: templates/index2.html:9 | |
410 msgid "Home" | |
411 msgstr "" | |
412 <BLANKLINE> | |
413 #: base.py:84 templates/index.html:9 | |
414 msgid "Accounts" | |
415 msgstr "" | |
416 <BLANKLINE> | |
417 >>> | |
418 """ | |
419 | |
420 _first_author = '' | |
421 if first_author: | |
422 _first_author += first_author | |
423 if first_author_email: | |
424 _first_author += ' <%s>' % first_author_email | |
425 | |
426 inlines = input_fileobj.readlines() | |
427 outlines = [] | |
428 in_header = True | |
429 for index in range(len(inlines)): | |
430 if in_header: | |
431 if '# Translations Template' in inlines[index]: | |
432 if country: | |
433 line = '# %s (%s) Translations for %%s\n' % \ | |
434 (language, country) | |
435 else: | |
436 line = '# %s Translations for %%s\n' % language | |
437 outlines.append(line % project) | |
438 elif '# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.' in inlines[index]: | |
439 if _first_author: | |
440 outlines.append( | |
441 '# %s, %s\n' % (_first_author, time.strftime('%Y')) | |
442 ) | |
443 else: | |
444 outlines.append(inlines[index]) | |
445 elif '"PO-Revision-Date:' in inlines[index]: | |
446 outlines.append( | |
447 '"PO-Revision-Date: %s\\n"\n' % \ | |
448 time.strftime('%Y-%m-%d %H:%M%z') | |
449 ) | |
450 elif '"Last-Translator:' in inlines[index]: | |
451 if _first_author: | |
452 outlines.append( | |
453 '"Last-Translator: %s\\n"\n' % _first_author | |
454 ) | |
455 else: | |
456 outlines.append(inlines[index]) | |
457 elif '"Content-Transfer-Encoding:' in inlines[index]: | |
458 outlines.append(inlines[index]) | |
459 if '"Plural-Forms:' not in inlines[index+1]: | |
460 outlines.append( | |
461 '"Plural-Forms: nplurals=%s; plural=%s;\\n"\n' % plurals | |
462 ) | |
463 elif inlines[index].endswith('\\n"\n') and \ | |
464 inlines[index+1] == '\n': | |
465 in_header = False | |
466 outlines.append(inlines[index]) | |
467 else: | |
468 outlines.append(inlines[index]) | |
469 else: | |
470 outlines.extend(inlines[index:]) | |
471 break | |
472 fileobj.writelines(outlines) |