comparison 0.8.x/babel/messages/pofile.py @ 142:4a7af44e6695 stable

Create branch for 0.8.x releases.
author cmlenz
date Wed, 20 Jun 2007 10:09:07 +0000
parents
children 90866b11734f
comparison
equal deleted inserted replaced
1:bf36ec5f5e50 142:4a7af44e6695
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2007 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://babel.edgewall.org/wiki/License.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://babel.edgewall.org/log/.
13
14 """Reading and writing of files in the ``gettext`` PO (portable object)
15 format.
16
17 :see: `The Format of PO Files
18 <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
19 """
20
21 from datetime import date, datetime
22 import os
23 import re
24 try:
25 set
26 except NameError:
27 from sets import Set as set
28 from textwrap import wrap
29
30 from babel import __version__ as VERSION
31 from babel.messages.catalog import Catalog
32 from babel.util import LOCALTZ
33
34 __all__ = ['escape', 'normalize', 'read_po', 'write_po']
35
36 def read_po(fileobj):
37 """Read messages from a ``gettext`` PO (portable object) file from the given
38 file-like object and return a `Catalog`.
39
40 >>> from StringIO import StringIO
41 >>> buf = StringIO('''
42 ... #: main.py:1
43 ... #, fuzzy, python-format
44 ... msgid "foo %(name)s"
45 ... msgstr ""
46 ...
47 ... # A user comment
48 ... #. An auto comment
49 ... #: main.py:3
50 ... msgid "bar"
51 ... msgid_plural "baz"
52 ... msgstr[0] ""
53 ... msgstr[1] ""
54 ... ''')
55 >>> catalog = read_po(buf)
56 >>> catalog.revision_date = datetime(2007, 04, 01)
57
58 >>> for message in catalog:
59 ... if message.id:
60 ... print (message.id, message.string)
61 ... print ' ', (message.locations, message.flags)
62 ... print ' ', (message.user_comments, message.auto_comments)
63 ('foo %(name)s', '')
64 ([('main.py', 1)], set(['fuzzy', 'python-format']))
65 ([], [])
66 (('bar', 'baz'), ('', ''))
67 ([('main.py', 3)], set([]))
68 (['A user comment'], ['An auto comment'])
69
70 :param fileobj: the file-like object to read the PO file from
71 :return: an iterator over ``(message, translation, location)`` tuples
72 :rtype: ``iterator``
73 """
74 catalog = Catalog()
75
76 messages = []
77 translations = []
78 locations = []
79 flags = []
80 user_comments = []
81 auto_comments = []
82 in_msgid = in_msgstr = False
83
84 def _add_message():
85 translations.sort()
86 if len(messages) > 1:
87 msgid = tuple([denormalize(m) for m in messages])
88 else:
89 msgid = denormalize(messages[0])
90 if len(translations) > 1:
91 string = tuple([denormalize(t[1]) for t in translations])
92 else:
93 string = denormalize(translations[0][1])
94 catalog.add(msgid, string, list(locations), set(flags),
95 list(auto_comments), list(user_comments))
96 del messages[:]; del translations[:]; del locations[:];
97 del flags[:]; del auto_comments[:]; del user_comments[:]
98
99 for line in fileobj.readlines():
100 line = line.strip()
101 if line.startswith('#'):
102 in_msgid = in_msgstr = False
103 if messages:
104 _add_message()
105 if line[1:].startswith(':'):
106 for location in line[2:].lstrip().split():
107 filename, lineno = location.split(':', 1)
108 locations.append((filename, int(lineno)))
109 elif line[1:].startswith(','):
110 for flag in line[2:].lstrip().split(','):
111 flags.append(flag.strip())
112 elif line[1:].startswith('.'):
113 # These are called auto-comments
114 comment = line[2:].strip()
115 if comment:
116 # Just check that we're not adding empty comments
117 auto_comments.append(comment)
118 else:
119 # These are called user comments
120 user_comments.append(line[1:].strip())
121 else:
122 if line.startswith('msgid_plural'):
123 in_msgid = True
124 msg = line[12:].lstrip()
125 messages.append(msg)
126 elif line.startswith('msgid'):
127 in_msgid = True
128 if messages:
129 _add_message()
130 messages.append(line[5:].lstrip())
131 elif line.startswith('msgstr'):
132 in_msgid = False
133 in_msgstr = True
134 msg = line[6:].lstrip()
135 if msg.startswith('['):
136 idx, msg = msg[1:].split(']')
137 translations.append([int(idx), msg.lstrip()])
138 else:
139 translations.append([0, msg])
140 elif line.startswith('"'):
141 if in_msgid:
142 messages[-1] += u'\n' + line.rstrip()
143 elif in_msgstr:
144 translations[-1][1] += u'\n' + line.rstrip()
145
146 if messages:
147 _add_message()
148 return catalog
149
150 WORD_SEP = re.compile('('
151 r'\s+|' # any whitespace
152 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
153 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
154 ')')
155
156 def escape(string):
157 r"""Escape the given string so that it can be included in double-quoted
158 strings in ``PO`` files.
159
160 >>> escape('''Say:
161 ... "hello, world!"
162 ... ''')
163 '"Say:\\n \\"hello, world!\\"\\n"'
164
165 :param string: the string to escape
166 :return: the escaped string
167 :rtype: `str` or `unicode`
168 """
169 return '"%s"' % string.replace('\\', '\\\\') \
170 .replace('\t', '\\t') \
171 .replace('\r', '\\r') \
172 .replace('\n', '\\n') \
173 .replace('\"', '\\"')
174
175 def unescape(string):
176 r"""Reverse escape the given string.
177
178 >>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
179 Say:
180 "hello, world!"
181 <BLANKLINE>
182
183 :param string: the string to unescape
184 :return: the unescaped string
185 :rtype: `str` or `unicode`
186 """
187 return string[1:-1].replace('\\\\', '\\') \
188 .replace('\\t', '\t') \
189 .replace('\\r', '\r') \
190 .replace('\\n', '\n') \
191 .replace('\\"', '\"')
192
193 def normalize(string, width=76):
194 r"""Convert a string into a format that is appropriate for .po files.
195
196 >>> print normalize('''Say:
197 ... "hello, world!"
198 ... ''', width=None)
199 ""
200 "Say:\n"
201 " \"hello, world!\"\n"
202
203 >>> print normalize('''Say:
204 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
205 ... ''', width=32)
206 ""
207 "Say:\n"
208 " \"Lorem ipsum dolor sit "
209 "amet, consectetur adipisicing"
210 " elit, \"\n"
211
212 :param string: the string to normalize
213 :param width: the maximum line width; use `None`, 0, or a negative number
214 to completely disable line wrapping
215 :return: the normalized string
216 :rtype: `unicode`
217 """
218 if width and width > 0:
219 lines = []
220 for idx, line in enumerate(string.splitlines(True)):
221 if len(escape(line)) > width:
222 chunks = WORD_SEP.split(line)
223 chunks.reverse()
224 while chunks:
225 buf = []
226 size = 2
227 while chunks:
228 l = len(escape(chunks[-1])) - 2
229 if size + l < width:
230 buf.append(chunks.pop())
231 size += l
232 else:
233 if not buf:
234 # handle long chunks by putting them on a
235 # separate line
236 buf.append(chunks.pop())
237 break
238 lines.append(u''.join(buf))
239 else:
240 lines.append(line)
241 else:
242 lines = string.splitlines(True)
243
244 if len(lines) <= 1:
245 return escape(string)
246
247 # Remove empty trailing line
248 if lines and not lines[-1]:
249 del lines[-1]
250 lines[-1] += '\n'
251 return u'""\n' + u'\n'.join([escape(l) for l in lines])
252
253 def denormalize(string):
254 r"""Reverse the normalization done by the `normalize` function.
255
256 >>> print denormalize(r'''""
257 ... "Say:\n"
258 ... " \"hello, world!\"\n"''')
259 Say:
260 "hello, world!"
261 <BLANKLINE>
262
263 >>> print denormalize(r'''""
264 ... "Say:\n"
265 ... " \"Lorem ipsum dolor sit "
266 ... "amet, consectetur adipisicing"
267 ... " elit, \"\n"''')
268 Say:
269 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
270 <BLANKLINE>
271
272 :param string: the string to denormalize
273 :return: the denormalized string
274 :rtype: `unicode` or `str`
275 """
276 if string.startswith('""'):
277 lines = []
278 for line in string.splitlines()[1:]:
279 lines.append(unescape(line))
280 return ''.join(lines)
281 else:
282 return unescape(string)
283
284 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
285 sort_output=False, sort_by_file=False):
286 r"""Write a ``gettext`` PO (portable object) template file for a given
287 message catalog to the provided file-like object.
288
289 >>> catalog = Catalog()
290 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
291 ... flags=('fuzzy',))
292 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
293 >>> from StringIO import StringIO
294 >>> buf = StringIO()
295 >>> write_po(buf, catalog, omit_header=True)
296 >>> print buf.getvalue()
297 #: main.py:1
298 #, fuzzy, python-format
299 msgid "foo %(name)s"
300 msgstr ""
301 <BLANKLINE>
302 #: main.py:3
303 msgid "bar"
304 msgid_plural "baz"
305 msgstr[0] ""
306 msgstr[1] ""
307 <BLANKLINE>
308 <BLANKLINE>
309
310 :param fileobj: the file-like object to write to
311 :param catalog: the `Catalog` instance
312 :param width: the maximum line width for the generated output; use `None`,
313 0, or a negative number to completely disable line wrapping
314 :param no_location: do not emit a location comment for every message
315 :param omit_header: do not include the ``msgid ""`` entry at the top of the
316 output
317 """
318 def _normalize(key):
319 return normalize(key, width=width).encode(catalog.charset,
320 'backslashreplace')
321
322 def _write(text):
323 if isinstance(text, unicode):
324 text = text.encode(catalog.charset)
325 fileobj.write(text)
326
327 messages = list(catalog)
328 if sort_output:
329 messages.sort(lambda x,y: cmp(x.id, y.id))
330 elif sort_by_file:
331 messages.sort(lambda x,y: cmp(x.locations, y.locations))
332
333 for message in messages:
334 if not message.id: # This is the header "message"
335 if omit_header:
336 continue
337 comment_header = catalog.header_comment
338 if width and width > 0:
339 lines = []
340 for line in comment_header.splitlines():
341 lines += wrap(line, width=width, subsequent_indent='# ',
342 break_long_words=False)
343 comment_header = u'\n'.join(lines) + u'\n'
344 _write(comment_header)
345
346 if message.user_comments:
347 for comment in message.user_comments:
348 for line in wrap(comment, width, break_long_words=False):
349 _write('# %s\n' % line.strip())
350
351 if message.auto_comments:
352 for comment in message.auto_comments:
353 for line in wrap(comment, width, break_long_words=False):
354 _write('#. %s\n' % line.strip())
355
356 if not no_location:
357 locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
358 for filename, lineno in message.locations])
359 if width and width > 0:
360 locs = wrap(locs, width, break_long_words=False)
361 for line in locs:
362 _write('#: %s\n' % line.strip())
363 if message.flags:
364 _write('#%s\n' % ', '.join([''] + list(message.flags)))
365
366 if isinstance(message.id, (list, tuple)):
367 _write('msgid %s\n' % _normalize(message.id[0]))
368 _write('msgid_plural %s\n' % _normalize(message.id[1]))
369 for i, string in enumerate(message.string):
370 _write('msgstr[%d] %s\n' % (i, _normalize(message.string[i])))
371 else:
372 _write('msgid %s\n' % _normalize(message.id))
373 _write('msgstr %s\n' % _normalize(message.string or ''))
374 _write('\n')
Copyright (C) 2012-2017 Edgewall Software