comparison 0.9.x/babel/messages/catalog.py @ 263:5b7d3f9f7d74 stable

Create branch for 0.9.x maintenance.
author cmlenz
date Mon, 20 Aug 2007 08:34:32 +0000
parents
children 65c6ca63aca3
comparison
equal deleted inserted replaced
197:79565db4faf0 263:5b7d3f9f7d74
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2007 Edgewall Software
4 # All rights reserved.
5 #
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://babel.edgewall.org/wiki/License.
9 #
10 # This software consists of voluntary contributions made by many
11 # individuals. For the exact contribution history, see the revision
12 # history and logs, available at http://babel.edgewall.org/log/.
13
14 """Data structures for message catalogs."""
15
16 from cgi import parse_header
17 from datetime import datetime
18 from difflib import get_close_matches
19 from email import message_from_string
20 import re
21 try:
22 set
23 except NameError:
24 from sets import Set as set
25 import time
26
27 from babel import __version__ as VERSION
28 from babel.core import Locale
29 from babel.dates import format_datetime
30 from babel.messages.plurals import PLURALS
31 from babel.util import odict, distinct, LOCALTZ, UTC, FixedOffsetTimezone
32
33 __all__ = ['Message', 'Catalog', 'TranslationError']
34 __docformat__ = 'restructuredtext en'
35
36 PYTHON_FORMAT = re.compile(r'\%(\([\w]+\))?([-#0\ +])?(\*|[\d]+)?'
37 r'(\.(\*|[\d]+))?([hlL])?[diouxXeEfFgGcrs]')
38
39
40 class Message(object):
41 """Representation of a single message in a catalog."""
42
43 def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
44 user_comments=(), previous_id=(), lineno=None):
45 """Create the message object.
46
47 :param id: the message ID, or a ``(singular, plural)`` tuple for
48 pluralizable messages
49 :param string: the translated message string, or a
50 ``(singular, plural)`` tuple for pluralizable messages
51 :param locations: a sequence of ``(filenname, lineno)`` tuples
52 :param flags: a set or sequence of flags
53 :param auto_comments: a sequence of automatic comments for the message
54 :param user_comments: a sequence of user comments for the message
55 :param previous_id: the previous message ID, or a ``(singular, plural)``
56 tuple for pluralizable messages
57 :param lineno: the line number on which the msgid line was found in the
58 PO file, if any
59 """
60 self.id = id #: The message ID
61 if not string and self.pluralizable:
62 string = (u'', u'')
63 self.string = string #: The message translation
64 self.locations = list(distinct(locations))
65 self.flags = set(flags)
66 if id and self.python_format:
67 self.flags.add('python-format')
68 else:
69 self.flags.discard('python-format')
70 self.auto_comments = list(distinct(auto_comments))
71 self.user_comments = list(distinct(user_comments))
72 if isinstance(previous_id, basestring):
73 self.previous_id = [previous_id]
74 else:
75 self.previous_id = list(previous_id)
76 self.lineno = lineno
77
78 def __repr__(self):
79 return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
80 list(self.flags))
81
82 def __cmp__(self, obj):
83 """Compare Messages, taking into account plural ids"""
84 if isinstance(obj, Message):
85 plural = self.pluralizable
86 obj_plural = obj.pluralizable
87 if plural and obj_plural:
88 return cmp(self.id[0], obj.id[0])
89 elif plural:
90 return cmp(self.id[0], obj.id)
91 elif obj_plural:
92 return cmp(self.id, obj.id[0])
93 return cmp(self.id, obj.id)
94
95 def fuzzy(self):
96 return 'fuzzy' in self.flags
97 fuzzy = property(fuzzy, doc="""\
98 Whether the translation is fuzzy.
99
100 >>> Message('foo').fuzzy
101 False
102 >>> msg = Message('foo', 'foo', flags=['fuzzy'])
103 >>> msg.fuzzy
104 True
105 >>> msg
106 <Message 'foo' (flags: ['fuzzy'])>
107
108 :type: `bool`
109 """)
110
111 def pluralizable(self):
112 return isinstance(self.id, (list, tuple))
113 pluralizable = property(pluralizable, doc="""\
114 Whether the message is plurizable.
115
116 >>> Message('foo').pluralizable
117 False
118 >>> Message(('foo', 'bar')).pluralizable
119 True
120
121 :type: `bool`
122 """)
123
124 def python_format(self):
125 ids = self.id
126 if not isinstance(ids, (list, tuple)):
127 ids = [ids]
128 return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids]))
129 python_format = property(python_format, doc="""\
130 Whether the message contains Python-style parameters.
131
132 >>> Message('foo %(name)s bar').python_format
133 True
134 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
135 True
136
137 :type: `bool`
138 """)
139
140
141 class TranslationError(Exception):
142 """Exception thrown by translation checkers when invalid message
143 translations are encountered."""
144
145
146 DEFAULT_HEADER = u"""\
147 # Translations template for PROJECT.
148 # Copyright (C) YEAR ORGANIZATION
149 # This file is distributed under the same license as the PROJECT project.
150 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
151 #"""
152
153
154 class Catalog(object):
155 """Representation of a message catalog."""
156
157 def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
158 project=None, version=None, copyright_holder=None,
159 msgid_bugs_address=None, creation_date=None,
160 revision_date=None, last_translator=None, language_team=None,
161 charset='utf-8', fuzzy=True):
162 """Initialize the catalog object.
163
164 :param locale: the locale identifier or `Locale` object, or `None`
165 if the catalog is not bound to a locale (which basically
166 means it's a template)
167 :param domain: the message domain
168 :param header_comment: the header comment as string, or `None` for the
169 default header
170 :param project: the project's name
171 :param version: the project's version
172 :param copyright_holder: the copyright holder of the catalog
173 :param msgid_bugs_address: the email address or URL to submit bug
174 reports to
175 :param creation_date: the date the catalog was created
176 :param revision_date: the date the catalog was revised
177 :param last_translator: the name and email of the last translator
178 :param language_team: the name and email of the language team
179 :param charset: the encoding to use in the output
180 :param fuzzy: the fuzzy bit on the catalog header
181 """
182 self.domain = domain #: The message domain
183 if locale:
184 locale = Locale.parse(locale)
185 self.locale = locale #: The locale or `None`
186 self._header_comment = header_comment
187 self._messages = odict()
188
189 self.project = project or 'PROJECT' #: The project name
190 self.version = version or 'VERSION' #: The project version
191 self.copyright_holder = copyright_holder or 'ORGANIZATION'
192 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
193
194 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
195 """Name and email address of the last translator."""
196 self.language_team = language_team or 'LANGUAGE <LL@li.org>'
197 """Name and email address of the language team."""
198
199 self.charset = charset or 'utf-8'
200
201 if creation_date is None:
202 creation_date = datetime.now(LOCALTZ)
203 elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
204 creation_date = creation_date.replace(tzinfo=LOCALTZ)
205 self.creation_date = creation_date #: Creation date of the template
206 if revision_date is None:
207 revision_date = datetime.now(LOCALTZ)
208 elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
209 revision_date = revision_date.replace(tzinfo=LOCALTZ)
210 self.revision_date = revision_date #: Last revision date of the catalog
211 self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`)
212
213 self.obsolete = odict() #: Dictionary of obsolete messages
214
215 def _get_header_comment(self):
216 comment = self._header_comment
217 comment = comment.replace('PROJECT', self.project) \
218 .replace('VERSION', self.version) \
219 .replace('YEAR', self.revision_date.strftime('%Y')) \
220 .replace('ORGANIZATION', self.copyright_holder)
221 if self.locale:
222 comment = comment.replace('Translations template', '%s translations'
223 % self.locale.english_name)
224 return comment
225
226 def _set_header_comment(self, string):
227 self._header_comment = string
228
229 header_comment = property(_get_header_comment, _set_header_comment, doc="""\
230 The header comment for the catalog.
231
232 >>> catalog = Catalog(project='Foobar', version='1.0',
233 ... copyright_holder='Foo Company')
234 >>> print catalog.header_comment
235 # Translations template for Foobar.
236 # Copyright (C) 2007 Foo Company
237 # This file is distributed under the same license as the Foobar project.
238 # FIRST AUTHOR <EMAIL@ADDRESS>, 2007.
239 #
240
241 The header can also be set from a string. Any known upper-case variables
242 will be replaced when the header is retrieved again:
243
244 >>> catalog = Catalog(project='Foobar', version='1.0',
245 ... copyright_holder='Foo Company')
246 >>> catalog.header_comment = '''\\
247 ... # The POT for my really cool PROJECT project.
248 ... # Copyright (C) 1990-2003 ORGANIZATION
249 ... # This file is distributed under the same license as the PROJECT
250 ... # project.
251 ... #'''
252 >>> print catalog.header_comment
253 # The POT for my really cool Foobar project.
254 # Copyright (C) 1990-2003 Foo Company
255 # This file is distributed under the same license as the Foobar
256 # project.
257 #
258
259 :type: `unicode`
260 """)
261
262 def _get_mime_headers(self):
263 headers = []
264 headers.append(('Project-Id-Version',
265 '%s %s' % (self.project, self.version)))
266 headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
267 headers.append(('POT-Creation-Date',
268 format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
269 locale='en')))
270 if self.locale is None:
271 headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE'))
272 headers.append(('Last-Translator', 'FULL NAME <EMAIL@ADDRESS>'))
273 headers.append(('Language-Team', 'LANGUAGE <LL@li.org>'))
274 else:
275 headers.append(('PO-Revision-Date',
276 format_datetime(self.revision_date,
277 'yyyy-MM-dd HH:mmZ', locale='en')))
278 headers.append(('Last-Translator', self.last_translator))
279 headers.append(('Language-Team',
280 self.language_team.replace('LANGUAGE',
281 str(self.locale))))
282 headers.append(('Plural-Forms', self.plural_forms))
283 headers.append(('MIME-Version', '1.0'))
284 headers.append(('Content-Type',
285 'text/plain; charset=%s' % self.charset))
286 headers.append(('Content-Transfer-Encoding', '8bit'))
287 headers.append(('Generated-By', 'Babel %s\n' % VERSION))
288 return headers
289
290 def _set_mime_headers(self, headers):
291 for name, value in headers:
292 if name == 'content-type':
293 mimetype, params = parse_header(value)
294 if 'charset' in params:
295 self.charset = params['charset'].lower()
296 break
297 for name, value in headers:
298 name = name.lower().decode(self.charset)
299 value = value.decode(self.charset)
300 if name == 'project-id-version':
301 parts = value.split(' ')
302 self.project = u' '.join(parts[:-1])
303 self.version = parts[-1]
304 elif name == 'report-msgid-bugs-to':
305 self.msgid_bugs_address = value
306 elif name == 'last-translator':
307 self.last_translator = value
308 elif name == 'language-team':
309 self.language_team = value
310 elif name == 'pot-creation-date':
311 # FIXME: this should use dates.parse_datetime as soon as that
312 # is ready
313 value, tzoffset, _ = re.split('[+-](\d{4})$', value, 1)
314 tt = time.strptime(value, '%Y-%m-%d %H:%M')
315 ts = time.mktime(tt)
316 tzoffset = FixedOffsetTimezone(int(tzoffset[:2]) * 60 +
317 int(tzoffset[2:]))
318 dt = datetime.fromtimestamp(ts)
319 self.creation_date = dt.replace(tzinfo=tzoffset)
320
321 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
322 The MIME headers of the catalog, used for the special ``msgid ""`` entry.
323
324 The behavior of this property changes slightly depending on whether a locale
325 is set or not, the latter indicating that the catalog is actually a template
326 for actual translations.
327
328 Here's an example of the output for such a catalog template:
329
330 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
331 >>> catalog = Catalog(project='Foobar', version='1.0',
332 ... creation_date=created)
333 >>> for name, value in catalog.mime_headers:
334 ... print '%s: %s' % (name, value)
335 Project-Id-Version: Foobar 1.0
336 Report-Msgid-Bugs-To: EMAIL@ADDRESS
337 POT-Creation-Date: 1990-04-01 15:30+0000
338 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
339 Last-Translator: FULL NAME <EMAIL@ADDRESS>
340 Language-Team: LANGUAGE <LL@li.org>
341 MIME-Version: 1.0
342 Content-Type: text/plain; charset=utf-8
343 Content-Transfer-Encoding: 8bit
344 Generated-By: Babel ...
345
346 And here's an example of the output when the locale is set:
347
348 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
349 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
350 ... creation_date=created, revision_date=revised,
351 ... last_translator='John Doe <jd@example.com>',
352 ... language_team='de_DE <de@example.com>')
353 >>> for name, value in catalog.mime_headers:
354 ... print '%s: %s' % (name, value)
355 Project-Id-Version: Foobar 1.0
356 Report-Msgid-Bugs-To: EMAIL@ADDRESS
357 POT-Creation-Date: 1990-04-01 15:30+0000
358 PO-Revision-Date: 1990-08-03 12:00+0000
359 Last-Translator: John Doe <jd@example.com>
360 Language-Team: de_DE <de@example.com>
361 Plural-Forms: nplurals=2; plural=(n != 1)
362 MIME-Version: 1.0
363 Content-Type: text/plain; charset=utf-8
364 Content-Transfer-Encoding: 8bit
365 Generated-By: Babel ...
366
367 :type: `list`
368 """)
369
370 def num_plurals(self):
371 num = 2
372 if self.locale:
373 if str(self.locale) in PLURALS:
374 num = PLURALS[str(self.locale)][0]
375 elif self.locale.language in PLURALS:
376 num = PLURALS[self.locale.language][0]
377 return num
378 num_plurals = property(num_plurals, doc="""\
379 The number of plurals used by the locale.
380
381 >>> Catalog(locale='en').num_plurals
382 2
383 >>> Catalog(locale='cs_CZ').num_plurals
384 3
385
386 :type: `int`
387 """)
388
389 def plural_forms(self):
390 num, expr = ('INTEGER', 'EXPRESSION')
391 if self.locale:
392 if str(self.locale) in PLURALS:
393 num, expr = PLURALS[str(self.locale)]
394 elif self.locale.language in PLURALS:
395 num, expr = PLURALS[self.locale.language]
396 return 'nplurals=%s; plural=%s' % (num, expr)
397 plural_forms = property(plural_forms, doc="""\
398 Return the plural forms declaration for the locale.
399
400 >>> Catalog(locale='en').plural_forms
401 'nplurals=2; plural=(n != 1)'
402 >>> Catalog(locale='pt_BR').plural_forms
403 'nplurals=2; plural=(n > 1)'
404
405 :type: `str`
406 """)
407
408 def __contains__(self, id):
409 """Return whether the catalog has a message with the specified ID."""
410 return self._key_for(id) in self._messages
411
412 def __len__(self):
413 """The number of messages in the catalog.
414
415 This does not include the special ``msgid ""`` entry.
416 """
417 return len(self._messages)
418
419 def __iter__(self):
420 """Iterates through all the entries in the catalog, in the order they
421 were added, yielding a `Message` object for every entry.
422
423 :rtype: ``iterator``
424 """
425 buf = []
426 for name, value in self.mime_headers:
427 buf.append('%s: %s' % (name, value))
428 flags = set()
429 if self.fuzzy:
430 flags |= set(['fuzzy'])
431 yield Message(u'', '\n'.join(buf), flags=flags)
432 for key in self._messages:
433 yield self._messages[key]
434
435 def __repr__(self):
436 locale = ''
437 if self.locale:
438 locale = ' %s' % self.locale
439 return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
440
441 def __delitem__(self, id):
442 """Delete the message with the specified ID."""
443 key = self._key_for(id)
444 if key in self._messages:
445 del self._messages[key]
446
447 def __getitem__(self, id):
448 """Return the message with the specified ID.
449
450 :param id: the message ID
451 :return: the message with the specified ID, or `None` if no such message
452 is in the catalog
453 :rtype: `Message`
454 """
455 return self._messages.get(self._key_for(id))
456
457 def __setitem__(self, id, message):
458 """Add or update the message with the specified ID.
459
460 >>> catalog = Catalog()
461 >>> catalog[u'foo'] = Message(u'foo')
462 >>> catalog[u'foo']
463 <Message u'foo' (flags: [])>
464
465 If a message with that ID is already in the catalog, it is updated
466 to include the locations and flags of the new message.
467
468 >>> catalog = Catalog()
469 >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
470 >>> catalog[u'foo'].locations
471 [('main.py', 1)]
472 >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
473 >>> catalog[u'foo'].locations
474 [('main.py', 1), ('utils.py', 5)]
475
476 :param id: the message ID
477 :param message: the `Message` object
478 """
479 assert isinstance(message, Message), 'expected a Message object'
480 key = self._key_for(id)
481 current = self._messages.get(key)
482 if current:
483 if message.pluralizable and not current.pluralizable:
484 # The new message adds pluralization
485 current.id = message.id
486 current.string = message.string
487 current.locations = list(distinct(current.locations +
488 message.locations))
489 current.auto_comments = list(distinct(current.auto_comments +
490 message.auto_comments))
491 current.user_comments = list(distinct(current.user_comments +
492 message.user_comments))
493 current.flags |= message.flags
494 message = current
495 elif id == '':
496 # special treatment for the header message
497 headers = message_from_string(message.string.encode(self.charset))
498 self.mime_headers = headers.items()
499 self.header_comment = '\n'.join(['# %s' % comment for comment
500 in message.user_comments])
501 self.fuzzy = message.fuzzy
502 else:
503 if isinstance(id, (list, tuple)):
504 assert isinstance(message.string, (list, tuple))
505 self._messages[key] = message
506
507 def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
508 user_comments=(), previous_id=(), lineno=None):
509 """Add or update the message with the specified ID.
510
511 >>> catalog = Catalog()
512 >>> catalog.add(u'foo')
513 >>> catalog[u'foo']
514 <Message u'foo' (flags: [])>
515
516 This method simply constructs a `Message` object with the given
517 arguments and invokes `__setitem__` with that object.
518
519 :param id: the message ID, or a ``(singular, plural)`` tuple for
520 pluralizable messages
521 :param string: the translated message string, or a
522 ``(singular, plural)`` tuple for pluralizable messages
523 :param locations: a sequence of ``(filenname, lineno)`` tuples
524 :param flags: a set or sequence of flags
525 :param auto_comments: a sequence of automatic comments
526 :param user_comments: a sequence of user comments
527 :param previous_id: the previous message ID, or a ``(singular, plural)``
528 tuple for pluralizable messages
529 :param lineno: the line number on which the msgid line was found in the
530 PO file, if any
531 """
532 self[id] = Message(id, string, list(locations), flags, auto_comments,
533 user_comments, previous_id, lineno=lineno)
534
535 def check(self):
536 """Run various validation checks on the translations in the catalog.
537
538 For every message which fails validation, this method yield a
539 ``(message, errors)`` tuple, where ``message`` is the `Message` object
540 and ``errors`` is a sequence of `TranslationError` objects.
541
542 :note: this feature requires ``setuptools``/``pkg_resources`` to be
543 installed; if it is not, this method will simply return an empty
544 iterator
545 :rtype: ``iterator``
546 """
547 checkers = []
548 try:
549 from pkg_resources import working_set
550 except ImportError:
551 return
552 else:
553 for entry_point in working_set.iter_entry_points('babel.checkers'):
554 checkers.append(entry_point.load())
555 for message in self._messages.values():
556 errors = []
557 for checker in checkers:
558 try:
559 checker(self, message)
560 except TranslationError, e:
561 errors.append(e)
562 if errors:
563 yield message, errors
564
565 def update(self, template, no_fuzzy_matching=False):
566 """Update the catalog based on the given template catalog.
567
568 >>> from babel.messages import Catalog
569 >>> template = Catalog()
570 >>> template.add('green', locations=[('main.py', 99)])
571 >>> template.add('blue', locations=[('main.py', 100)])
572 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
573 >>> catalog = Catalog(locale='de_DE')
574 >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
575 >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
576 >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
577 ... locations=[('util.py', 38)])
578
579 >>> catalog.update(template)
580 >>> len(catalog)
581 3
582
583 >>> msg1 = catalog['green']
584 >>> msg1.string
585 >>> msg1.locations
586 [('main.py', 99)]
587
588 >>> msg2 = catalog['blue']
589 >>> msg2.string
590 u'blau'
591 >>> msg2.locations
592 [('main.py', 100)]
593
594 >>> msg3 = catalog['salad']
595 >>> msg3.string
596 (u'Salat', u'Salate')
597 >>> msg3.locations
598 [('util.py', 42)]
599
600 Messages that are in the catalog but not in the template are removed
601 from the main collection, but can still be accessed via the `obsolete`
602 member:
603
604 >>> 'head' in catalog
605 False
606 >>> catalog.obsolete.values()
607 [<Message 'head' (flags: [])>]
608
609 :param template: the reference catalog, usually read from a POT file
610 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
611 """
612 messages = self._messages
613 self._messages = odict()
614
615 for message in template:
616 if message.id:
617 key = self._key_for(message.id)
618 if key in messages:
619 oldmsg = messages.pop(key)
620 message.string = oldmsg.string
621 message.flags |= oldmsg.flags
622 self[message.id] = message
623
624 else:
625 if no_fuzzy_matching is False:
626 # do some fuzzy matching with difflib
627 matches = get_close_matches(key.lower().strip(),
628 [self._key_for(msgid) for msgid in messages], 1)
629 if matches:
630 oldmsg = messages.pop(matches[0])
631 message.string = oldmsg.string
632 message.flags |= oldmsg.flags | set([u'fuzzy'])
633 if isinstance(oldmsg.id, basestring):
634 message.previous_id = [oldmsg.id]
635 else:
636 message.previous_id = list(oldmsg.id)
637 self[message.id] = message
638 continue
639
640 self[message.id] = message
641
642 self.obsolete = messages
643
644 def _key_for(self, id):
645 """The key for a message is just the singular ID even for pluralizable
646 messages.
647 """
648 key = id
649 if isinstance(key, (list, tuple)):
650 key = id[0]
651 return key
Copyright (C) 2012-2017 Edgewall Software