comparison babel/messages/pofile.py @ 56:27d55a07c897

Rename the `babel.catalog` package to `babel.messages` for consistency with the other package names.
author cmlenz
date Fri, 08 Jun 2007 09:16:32 +0000
parents babel/catalog/pofile.py@52dbebdd3789
children e7080996fc46
comparison
equal deleted inserted replaced
55:b298e583d326 56:27d55a07c897
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2007 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://babel.edgewall.org/wiki/License.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://babel.edgewall.org/log/.
13
14 """Reading and writing of files in the ``gettext`` PO (portable object)
15 format.
16
17 :see: `The Format of PO Files
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
19 """
20
21 from datetime import date, datetime
22 import re
23 try:
24 set
25 except NameError:
26 from sets import Set as set
27 import textwrap
28 import time
29
30 from babel import __version__ as VERSION
31
32 __all__ = ['escape', 'normalize', 'read_po', 'write_po', 'write_pot']
33
34 def read_po(fileobj):
35 """Read messages from a ``gettext`` PO (portable object) file from the given
36 file-like object.
37
38 This function yields tuples of the form:
39
40 ``(message, translation, locations, flags)``
41
42 where:
43
44 * ``message`` is the original (untranslated) message, or a
45 ``(singular, plural)`` tuple for pluralizable messages
46 * ``translation`` is the translation of the message, or a tuple of
47 translations for pluralizable messages
48 * ``locations`` is a sequence of ``(filename, lineno)`` tuples
49 * ``flags`` is a set of strings (for exampe, "fuzzy")
50
51 >>> from StringIO import StringIO
52 >>> buf = StringIO('''
53 ... #: main.py:1
54 ... #, fuzzy, python-format
55 ... msgid "foo %(name)s"
56 ... msgstr ""
57 ...
58 ... #: main.py:3
59 ... msgid "bar"
60 ... msgid_plural "baz"
61 ... msgstr[0] ""
62 ... msgstr[1] ""
63 ... ''')
64 >>> for message, translation, locations, flags in read_po(buf):
65 ... print (message, translation)
66 ... print ' ', (locations, flags)
67 (('foo %(name)s',), ('',))
68 ((('main.py', 1),), set(['fuzzy', 'python-format']))
69 (('bar', 'baz'), ('', ''))
70 ((('main.py', 3),), set([]))
71
72 :param fileobj: the file-like object to read the PO file from
73 :return: an iterator over ``(message, translation, location)`` tuples
74 :rtype: ``iterator``
75 """
76 messages = []
77 translations = []
78 locations = []
79 flags = []
80 in_msgid = in_msgstr = False
81
82 def pack():
83 translations.sort()
84 retval = (tuple(messages), tuple([t[1] for t in translations]),
85 tuple(locations), set(flags))
86 del messages[:]
87 del translations[:]
88 del locations[:]
89 del flags[:]
90 return retval
91
92 for line in fileobj.readlines():
93 line = line.strip()
94 if line.startswith('#'):
95 in_msgid = in_msgstr = False
96 if messages:
97 yield pack()
98 if line[1:].startswith(':'):
99 for location in line[2:].lstrip().split():
100 filename, lineno = location.split(':', 1)
101 locations.append((filename, int(lineno)))
102 elif line[1:].startswith(','):
103 for flag in line[2:].lstrip().split(','):
104 flags.append(flag.strip())
105 elif line:
106 if line.startswith('msgid_plural'):
107 in_msgid = True
108 msg = line[12:].lstrip()
109 messages.append(msg[1:-1])
110 elif line.startswith('msgid'):
111 in_msgid = True
112 if messages:
113 yield pack()
114 msg = line[5:].lstrip()
115 messages.append(msg[1:-1])
116 elif line.startswith('msgstr'):
117 in_msgid = False
118 in_msgstr = True
119 msg = line[6:].lstrip()
120 if msg.startswith('['):
121 idx, msg = msg[1:].split(']')
122 translations.append([int(idx), msg.lstrip()[1:-1]])
123 else:
124 translations.append([0, msg[1:-1]])
125 elif line.startswith('"'):
126 if in_msgid:
127 messages[-1] += line.rstrip()[1:-1]
128 elif in_msgstr:
129 translations[-1][1] += line.rstrip()[1:-1]
130
131 if messages:
132 yield pack()
133
134 POT_HEADER = """\
135 # Translations Template for %%(project)s.
136 # Copyright (C) %%(year)s ORGANIZATION
137 # This file is distributed under the same license as the
138 # %%(project)s project.
139 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
140 #
141 #, fuzzy
142 msgid ""
143 msgstr ""
144 "Project-Id-Version: %%(project)s %%(version)s\\n"
145 "POT-Creation-Date: %%(creation_date)s\\n"
146 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
147 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
148 "Language-Team: LANGUAGE <LL@li.org>\\n"
149 "MIME-Version: 1.0\\n"
150 "Content-Type: text/plain; charset=%%(charset)s\\n"
151 "Content-Transfer-Encoding: 8bit\\n"
152 "Generated-By: Babel %s\\n"
153
154 """ % VERSION
155
156 PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?[diouxXeEfFgGcrs]').search
157
158 WORD_SEP = re.compile('('
159 r'\s+|' # any whitespace
160 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
161 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
162 ')')
163
164 def escape(string):
165 r"""Escape the given string so that it can be included in double-quoted
166 strings in ``PO`` files.
167
168 >>> escape('''Say:
169 ... "hello, world!"
170 ... ''')
171 '"Say:\\n \\"hello, world!\\"\\n"'
172
173 :param string: the string to escape
174 :return: the escaped string
175 :rtype: `str` or `unicode`
176 """
177 return '"%s"' % string.replace('\\', '\\\\') \
178 .replace('\t', '\\t') \
179 .replace('\r', '\\r') \
180 .replace('\n', '\\n') \
181 .replace('\"', '\\"')
182
183 def normalize(string, width=76):
184 r"""This converts a string into a format that is appropriate for .po files.
185
186 >>> print normalize('''Say:
187 ... "hello, world!"
188 ... ''', width=None)
189 ""
190 "Say:\n"
191 " \"hello, world!\"\n"
192
193 >>> print normalize('''Say:
194 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
195 ... ''', width=32)
196 ""
197 "Say:\n"
198 " \"Lorem ipsum dolor sit "
199 "amet, consectetur adipisicing"
200 " elit, \"\n"
201
202 :param string: the string to normalize
203 :param width: the maximum line width; use `None`, 0, or a negative number
204 to completely disable line wrapping
205 :return: the normalized string
206 :rtype: `unicode`
207 """
208 if width and width > 0:
209 lines = []
210 for idx, line in enumerate(string.splitlines(True)):
211 if len(escape(line)) > width:
212 chunks = WORD_SEP.split(line)
213 chunks.reverse()
214 while chunks:
215 buf = []
216 size = 2
217 while chunks:
218 l = len(escape(chunks[-1])) - 2
219 if size + l < width:
220 buf.append(chunks.pop())
221 size += l
222 else:
223 if not buf:
224 # handle long chunks by putting them on a
225 # separate line
226 buf.append(chunks.pop())
227 break
228 lines.append(u''.join(buf))
229 else:
230 lines.append(line)
231 else:
232 lines = string.splitlines(True)
233
234 if len(lines) == 1:
235 return escape(string)
236
237 # Remove empty trailing line
238 if not lines[-1]:
239 del lines[-1]
240 lines[-1] += '\n'
241 return u'""\n' + u'\n'.join([escape(l) for l in lines])
242
243 def write_pot(fileobj, messages, project='PROJECT', version='VERSION', width=76,
244 charset='utf-8', no_location=False, omit_header=False):
245 r"""Write a ``gettext`` PO (portable object) template file to the given
246 file-like object.
247
248 The `messages` parameter is expected to be an iterable object producing
249 tuples of the form:
250
251 ``(filename, lineno, funcname, message, flags)``
252
253 >>> from StringIO import StringIO
254 >>> buf = StringIO()
255 >>> write_pot(buf, [
256 ... ('main.py', 1, None, u'foo %(name)s', ('fuzzy',)),
257 ... ('main.py', 3, 'ngettext', (u'bar', u'baz'), None)
258 ... ], omit_header=True)
259
260 >>> print buf.getvalue()
261 #: main.py:1
262 #, fuzzy, python-format
263 msgid "foo %(name)s"
264 msgstr ""
265 <BLANKLINE>
266 #: main.py:3
267 msgid "bar"
268 msgid_plural "baz"
269 msgstr[0] ""
270 msgstr[1] ""
271 <BLANKLINE>
272 <BLANKLINE>
273
274 :param fileobj: the file-like object to write to
275 :param messages: an iterable over the messages
276 :param project: the project name
277 :param version: the project version
278 :param width: the maximum line width for the generated output; use `None`,
279 0, or a negative number to completely disable line wrapping
280 :param charset: the encoding
281 :param no_location: do not emit a location comment for every message
282 :param omit_header: do not include the ``msgid ""`` entry at the top of the
283 output
284 """
285 def _normalize(key):
286 return normalize(key, width=width).encode(charset, 'backslashreplace')
287
288 def _write(text):
289 if isinstance(text, unicode):
290 text = text.encode(charset)
291 fileobj.write(text)
292
293 if not omit_header:
294 _write(POT_HEADER % {
295 'year': time.strftime('%Y'),
296 'project': project,
297 'version': version,
298 'creation_date': time.strftime('%Y-%m-%d %H:%M%z'),
299 'charset': charset,
300 })
301
302 locations = {}
303 msgflags = {}
304 msgids = []
305 plurals = {}
306
307 for filename, lineno, funcname, key, flags in messages:
308 flags = set(flags or [])
309 if isinstance(key, (list, tuple)):
310 assert len(key) == 2
311 plurals[key[0]] = key[1]
312 key = key[0]
313 if key in msgids:
314 locations[key].append((filename, lineno))
315 msgflags[key] |= flags
316 else:
317 if PYTHON_FORMAT(key):
318 flags.add('python-format')
319 else:
320 flags.discard('python-format')
321
322 locations[key] = [(filename, lineno)]
323 msgflags[key] = flags
324 msgids.append(key)
325
326 for msgid in msgids:
327 if not no_location:
328 locs = u' '.join([u'%s:%d' % item for item in locations[msgid]])
329 if width and width > 0:
330 locs = textwrap.wrap(locs, width, break_long_words=False)
331 for line in locs:
332 _write('#: %s\n' % line.strip())
333 flags = msgflags[msgid]
334 if flags:
335 _write('#%s\n' % ', '.join([''] + list(flags)))
336
337 if plurals.has_key(msgid):
338 _write('msgid %s\n' % _normalize(msgid))
339 _write('msgid_plural %s\n' % _normalize(plurals[msgid]))
340 _write('msgstr[0] ""\n')
341 _write('msgstr[1] ""\n')
342 else:
343 _write('msgid %s\n' % _normalize(msgid))
344 _write('msgstr ""\n')
345 _write('\n')
346
347 def write_po(fileobj, input_fileobj, language, country=None, project='PROJECT',
348 version='VERSION', first_author=None, first_author_email=None,
349 plurals=('INTEGER', 'EXPRESSION')):
350 r"""Write a ``gettext`` PO (portable object) file to the given file-like
351 object, from the given input PO template file.
352
353 >>> from StringIO import StringIO
354 >>> inbuf = StringIO(r'''# Translations Template for FooBar.
355 ... # Copyright (C) 2007 ORGANIZATION
356 ... # This file is distributed under the same license as the
357 ... # FooBar project.
358 ... # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
359 ... #
360 ... #, fuzzy
361 ... msgid ""
362 ... msgstr ""
363 ... "Project-Id-Version: FooBar 0.1\n"
364 ... "POT-Creation-Date: 2007-06-07 22:54+0100\n"
365 ... "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
366 ... "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
367 ... "Language-Team: LANGUAGE <LL@li.org>\n"
368 ... "MIME-Version: 1.0\n"
369 ... "Content-Type: text/plain; charset=utf-8\n"
370 ... "Content-Transfer-Encoding: 8bit\n"
371 ... "Generated-By: Babel 0.1dev-r50\n"
372 ...
373 ... #: base.py:83 templates/index.html:9
374 ... #: templates/index2.html:9
375 ... msgid "Home"
376 ... msgstr ""
377 ...
378 ... #: base.py:84 templates/index.html:9
379 ... msgid "Accounts"
380 ... msgstr ""
381 ... ''')
382 >>> outbuf = StringIO()
383 >>> write_po(outbuf, inbuf, 'English', project='FooBar',
384 ... version='0.1', first_author='A Name',
385 ... first_author_email='user@domain.tld',
386 ... plurals=(2, '(n != 1)'))
387 >>> print outbuf.getvalue() # doctest: +ELLIPSIS
388 # English Translations for FooBar
389 # Copyright (C) 2007 ORGANIZATION
390 # This file is distributed under the same license as the
391 # FooBar project.
392 # A Name <user@domain.tld>, ...
393 #
394 #, fuzzy
395 msgid ""
396 msgstr ""
397 "Project-Id-Version: FooBar 0.1\n"
398 "POT-Creation-Date: 2007-06-07 22:54+0100\n"
399 "PO-Revision-Date: ...\n"
400 "Last-Translator: A Name <user@domain.tld>\n"
401 "Language-Team: LANGUAGE <LL@li.org>\n"
402 "MIME-Version: 1.0\n"
403 "Content-Type: text/plain; charset=utf-8\n"
404 "Content-Transfer-Encoding: 8bit\n"
405 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
406 "Generated-By: Babel ...\n"
407 <BLANKLINE>
408 #: base.py:83 templates/index.html:9
409 #: templates/index2.html:9
410 msgid "Home"
411 msgstr ""
412 <BLANKLINE>
413 #: base.py:84 templates/index.html:9
414 msgid "Accounts"
415 msgstr ""
416 <BLANKLINE>
417 >>>
418 """
419
420 _first_author = ''
421 if first_author:
422 _first_author += first_author
423 if first_author_email:
424 _first_author += ' <%s>' % first_author_email
425
426 inlines = input_fileobj.readlines()
427 outlines = []
428 in_header = True
429 for index in range(len(inlines)):
430 if in_header:
431 if '# Translations Template' in inlines[index]:
432 if country:
433 line = '# %s (%s) Translations for %%s\n' % \
434 (language, country)
435 else:
436 line = '# %s Translations for %%s\n' % language
437 outlines.append(line % project)
438 elif '# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.' in inlines[index]:
439 if _first_author:
440 outlines.append(
441 '# %s, %s\n' % (_first_author, time.strftime('%Y'))
442 )
443 else:
444 outlines.append(inlines[index])
445 elif '"PO-Revision-Date:' in inlines[index]:
446 outlines.append(
447 '"PO-Revision-Date: %s\\n"\n' % \
448 time.strftime('%Y-%m-%d %H:%M%z')
449 )
450 elif '"Last-Translator:' in inlines[index]:
451 if _first_author:
452 outlines.append(
453 '"Last-Translator: %s\\n"\n' % _first_author
454 )
455 else:
456 outlines.append(inlines[index])
457 elif '"Content-Transfer-Encoding:' in inlines[index]:
458 outlines.append(inlines[index])
459 if '"Plural-Forms:' not in inlines[index+1]:
460 outlines.append(
461 '"Plural-Forms: nplurals=%s; plural=%s;\\n"\n' % plurals
462 )
463 elif inlines[index].endswith('\\n"\n') and \
464 inlines[index+1] == '\n':
465 in_header = False
466 outlines.append(inlines[index])
467 else:
468 outlines.append(inlines[index])
469 else:
470 outlines.extend(inlines[index:])
471 break
472 fileobj.writelines(outlines)
Copyright (C) 2012-2017 Edgewall Software