comparison examples/trac/trac/mimeview/api.py @ 39:93b4dcbafd7b trunk

Copy Trac to main branch.
author cmlenz
date Mon, 03 Jul 2006 18:53:27 +0000
parents
children
comparison
equal deleted inserted replaced
38:ee669cb9cccc 39:93b4dcbafd7b
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2004-2006 Edgewall Software
4 # Copyright (C) 2004 Daniel Lundin <daniel@edgewall.com>
5 # Copyright (C) 2005-2006 Christopher Lenz <cmlenz@gmx.de>
6 # Copyright (C) 2006 Christian Boos <cboos@neuf.fr>
7 # All rights reserved.
8 #
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://trac.edgewall.com/license.html.
12 #
13 # This software consists of voluntary contributions made by many
14 # individuals. For the exact contribution history, see the revision
15 # history and logs, available at http://projects.edgewall.com/trac/.
16 #
17 # Author: Daniel Lundin <daniel@edgewall.com>
18 # Christopher Lenz <cmlenz@gmx.de>
19 # Christian Boos <cboos@neuf.fr>
20
21 """
22 The `trac.mimeview` module centralize the intelligence related to
23 file metadata, principally concerning the `type` (MIME type) of the content
24 and, if relevant, concerning the text encoding (charset) used by the content.
25
26 There are primarily two approaches for getting the MIME type of a given file:
27 * taking advantage of existing conventions for the file name
28 * examining the file content and applying various heuristics
29
30 The module also knows how to convert the file content from one type
31 to another type.
32
33 In some cases, only the `url` pointing to the file's content is actually
34 needed, that's why we avoid to read the file's content when it's not needed.
35
36 The actual `content` to be converted might be a `unicode` object,
37 but it can also be the raw byte string (`str`) object, or simply
38 an object that can be `read()`.
39 """
40
41 import re
42 from StringIO import StringIO
43
44 from trac.config import IntOption, ListOption, Option
45 from trac.core import *
46 from trac.util import sorted
47 from trac.util.text import to_utf8, to_unicode
48 from trac.util.markup import escape, Markup, Fragment, html
49
50
51 __all__ = ['get_mimetype', 'is_binary', 'detect_unicode', 'Mimeview',
52 'content_to_unicode']
53
54
55 # Some common MIME types and their associated keywords and/or file extensions
56
57 KNOWN_MIME_TYPES = {
58 'application/pdf': ['pdf'],
59 'application/postscript': ['ps'],
60 'application/rtf': ['rtf'],
61 'application/x-sh': ['sh'],
62 'application/x-csh': ['csh'],
63 'application/x-troff': ['nroff', 'roff', 'troff'],
64
65 'image/x-icon': ['ico'],
66 'image/svg+xml': ['svg'],
67
68 'model/vrml': ['vrml', 'wrl'],
69
70 'text/css': ['css'],
71 'text/html': ['html'],
72 'text/plain': ['txt', 'TXT', 'text', 'README', 'INSTALL',
73 'AUTHORS', 'COPYING', 'ChangeLog', 'RELEASE'],
74 'text/xml': ['xml'],
75 'text/xsl': ['xsl'],
76 'text/x-csrc': ['c', 'xs'],
77 'text/x-chdr': ['h'],
78 'text/x-c++src': ['cc', 'CC', 'cpp', 'C'],
79 'text/x-c++hdr': ['hh', 'HH', 'hpp', 'H'],
80 'text/x-diff': ['diff', 'patch'],
81 'text/x-eiffel': ['e'],
82 'text/x-elisp': ['el'],
83 'text/x-fortran': ['f'],
84 'text/x-haskell': ['hs'],
85 'text/x-javascript': ['js'],
86 'text/x-objc': ['m', 'mm'],
87 'text/x-makefile': ['make', 'mk',
88 'Makefile', 'makefile', 'GNUMakefile'],
89 'text/x-pascal': ['pas'],
90 'text/x-perl': ['pl', 'pm', 'PL', 'perl'],
91 'text/x-php': ['php', 'php3', 'php4'],
92 'text/x-python': ['py', 'python'],
93 'text/x-pyrex': ['pyx'],
94 'text/x-ruby': ['rb', 'ruby'],
95 'text/x-scheme': ['scm'],
96 'text/x-textile': ['txtl', 'textile'],
97 'text/x-vba': ['vb', 'vba', 'bas'],
98 'text/x-verilog': ['v', 'verilog'],
99 'text/x-vhdl': ['vhd'],
100 }
101
102 # extend the above with simple (text/x-<something>: <something>) mappings
103
104 for x in ['ada', 'asm', 'asp', 'awk', 'idl', 'inf', 'java', 'ksh', 'lua',
105 'm4', 'mail', 'psp', 'rfc', 'rst', 'sql', 'tcl', 'tex', 'zsh']:
106 KNOWN_MIME_TYPES.setdefault('text/x-%s' % x, []).append(x)
107
108
109 # Default mapping from keywords/extensions to known MIME types:
110
111 MIME_MAP = {}
112 for t, exts in KNOWN_MIME_TYPES.items():
113 MIME_MAP[t] = t
114 for e in exts:
115 MIME_MAP[e] = t
116
117 # Simple builtin autodetection from the content using a regexp
118 MODE_RE = re.compile(
119 r"#!(?:[/\w.-_]+/)?(\w+)|" # look for shebang
120 r"-\*-\s*(?:mode:\s*)?([\w+-]+)\s*-\*-|" # look for Emacs' -*- mode -*-
121 r"vim:.*?syntax=(\w+)" # look for VIM's syntax=<n>
122 )
123
124 def get_mimetype(filename, content=None, mime_map=MIME_MAP):
125 """Guess the most probable MIME type of a file with the given name.
126
127 `filename` is either a filename (the lookup will then use the suffix)
128 or some arbitrary keyword.
129
130 `content` is either a `str` or an `unicode` string.
131 """
132 suffix = filename.split('.')[-1]
133 if suffix in mime_map:
134 # 1) mimetype from the suffix, using the `mime_map`
135 return mime_map[suffix]
136 else:
137 mimetype = None
138 try:
139 import mimetypes
140 # 2) mimetype from the suffix, using the `mimetypes` module
141 mimetype = mimetypes.guess_type(filename)[0]
142 except:
143 pass
144 if not mimetype and content:
145 match = re.search(MODE_RE, content[:1000])
146 if match:
147 mode = match.group(1) or match.group(3) or \
148 match.group(2).lower()
149 if mode in mime_map:
150 # 3) mimetype from the content, using the `MODE_RE`
151 return mime_map[mode]
152 else:
153 if is_binary(content):
154 # 4) mimetype from the content, using`is_binary`
155 return 'application/octet-stream'
156 return mimetype
157
158 def is_binary(data):
159 """Detect binary content by checking the first thousand bytes for zeroes.
160
161 Operate on either `str` or `unicode` strings.
162 """
163 if isinstance(data, str) and detect_unicode(data):
164 return False
165 return '\0' in data[:1000]
166
167 def detect_unicode(data):
168 """Detect different unicode charsets by looking for BOMs (Byte Order Marks).
169
170 Operate obviously only on `str` objects.
171 """
172 if data.startswith('\xff\xfe'):
173 return 'utf-16-le'
174 elif data.startswith('\xfe\xff'):
175 return 'utf-16-be'
176 elif data.startswith('\xef\xbb\xbf'):
177 return 'utf-8'
178 else:
179 return None
180
181 def content_to_unicode(env, content, mimetype):
182 """Retrieve an `unicode` object from a `content` to be previewed"""
183 mimeview = Mimeview(env)
184 if hasattr(content, 'read'):
185 content = content.read(mimeview.max_preview_size)
186 return mimeview.to_unicode(content, mimetype)
187
188
189 class IHTMLPreviewRenderer(Interface):
190 """Extension point interface for components that add HTML renderers of
191 specific content types to the `Mimeview` component.
192
193 (Deprecated)
194 """
195
196 # implementing classes should set this property to True if they
197 # support text content where Trac should expand tabs into spaces
198 expand_tabs = False
199
200 def get_quality_ratio(mimetype):
201 """Return the level of support this renderer provides for the `content`
202 of the specified MIME type. The return value must be a number between
203 0 and 9, where 0 means no support and 9 means "perfect" support.
204 """
205
206 def render(req, mimetype, content, filename=None, url=None):
207 """Render an XHTML preview of the raw `content`.
208
209 The `content` might be:
210 * a `str` object
211 * an `unicode` string
212 * any object with a `read` method, returning one of the above
213
214 It is assumed that the content will correspond to the given `mimetype`.
215
216 Besides the `content` value, the same content may eventually
217 be available through the `filename` or `url` parameters.
218 This is useful for renderers that embed objects, using <object> or
219 <img> instead of including the content inline.
220
221 Can return the generated XHTML text as a single string or as an
222 iterable that yields strings. In the latter case, the list will
223 be considered to correspond to lines of text in the original content.
224 """
225
226 class IHTMLPreviewAnnotator(Interface):
227 """Extension point interface for components that can annotate an XHTML
228 representation of file contents with additional information."""
229
230 def get_annotation_type():
231 """Return a (type, label, description) tuple that defines the type of
232 annotation and provides human readable names. The `type` element should
233 be unique to the annotator. The `label` element is used as column
234 heading for the table, while `description` is used as a display name to
235 let the user toggle the appearance of the annotation type.
236 """
237
238 def annotate_line(number, content):
239 """Return the XHTML markup for the table cell that contains the
240 annotation data."""
241
242
243 class IContentConverter(Interface):
244 """An extension point interface for generic MIME based content
245 conversion."""
246
247 def get_supported_conversions():
248 """Return an iterable of tuples in the form (key, name, extension,
249 in_mimetype, out_mimetype, quality) representing the MIME conversions
250 supported and
251 the quality ratio of the conversion in the range 0 to 9, where 0 means
252 no support and 9 means "perfect" support. eg. ('latex', 'LaTeX', 'tex',
253 'text/x-trac-wiki', 'text/plain', 8)"""
254
255 def convert_content(req, mimetype, content, key):
256 """Convert the given content from mimetype to the output MIME type
257 represented by key. Returns a tuple in the form (content,
258 output_mime_type) or None if conversion is not possible."""
259
260
261 class Mimeview(Component):
262 """A generic class to prettify data, typically source code."""
263
264 renderers = ExtensionPoint(IHTMLPreviewRenderer)
265 annotators = ExtensionPoint(IHTMLPreviewAnnotator)
266 converters = ExtensionPoint(IContentConverter)
267
268 default_charset = Option('trac', 'default_charset', 'iso-8859-15',
269 """Charset to be used when in doubt.""")
270
271 tab_width = IntOption('mimeviewer', 'tab_width', 8,
272 """Displayed tab width in file preview (''since 0.9'').""")
273
274 max_preview_size = IntOption('mimeviewer', 'max_preview_size', 262144,
275 """Maximum file size for HTML preview. (''since 0.9'').""")
276
277 mime_map = ListOption('mimeviewer', 'mime_map',
278 'text/x-dylan:dylan,text/x-idl:ice,text/x-ada:ads:adb',
279 """List of additional MIME types and keyword mappings.
280 Mappings are comma-separated, and for each MIME type,
281 there's a colon (":") separated list of associated keywords
282 or file extensions. (''since 0.10'').""")
283
284 def __init__(self):
285 self._mime_map = None
286
287 # Public API
288
289 def get_supported_conversions(self, mimetype):
290 """Return a list of target MIME types in same form as
291 `IContentConverter.get_supported_conversions()`, but with the converter
292 component appended. Output is ordered from best to worst quality."""
293 converters = []
294 for converter in self.converters:
295 for k, n, e, im, om, q in converter.get_supported_conversions():
296 if im == mimetype and q > 0:
297 converters.append((k, n, e, im, om, q, converter))
298 converters = sorted(converters, key=lambda i: i[-1], reverse=True)
299 return converters
300
301 def convert_content(self, req, mimetype, content, key, filename=None,
302 url=None):
303 """Convert the given content to the target MIME type represented by
304 `key`, which can be either a MIME type or a key. Returns a tuple of
305 (content, output_mime_type, extension)."""
306 if not content:
307 return ('', 'text/plain;charset=utf-8')
308
309 # Ensure we have a MIME type for this content
310 full_mimetype = mimetype
311 if not full_mimetype:
312 if hasattr(content, 'read'):
313 content = content.read(self.max_preview_size)
314 full_mimetype = self.get_mimetype(filename, content)
315 if full_mimetype:
316 mimetype = full_mimetype.split(';')[0].strip() # split off charset
317 else:
318 mimetype = full_mimetype = 'text/plain' # fallback if not binary
319
320 # Choose best converter
321 candidates = list(self.get_supported_conversions(mimetype))
322 candidates = [c for c in candidates if key in (c[0], c[4])]
323 if not candidates:
324 raise TracError('No available MIME conversions from %s to %s' %
325 (mimetype, key))
326
327 # First successful conversion wins
328 for ck, name, ext, input_mimettype, output_mimetype, quality, \
329 converter in candidates:
330 output = converter.convert_content(req, mimetype, content, ck)
331 if not output:
332 continue
333 return (output[0], output[1], ext)
334 raise TracError('No available MIME conversions from %s to %s' %
335 (mimetype, key))
336
337 def get_annotation_types(self):
338 """Generator that returns all available annotation types."""
339 for annotator in self.annotators:
340 yield annotator.get_annotation_type()
341
342 def render(self, req, mimetype, content, filename=None, url=None,
343 annotations=None):
344 """Render an XHTML preview of the given `content`.
345
346 `content` is the same as an `IHTMLPreviewRenderer.render`'s
347 `content` argument.
348
349 The specified `mimetype` will be used to select the most appropriate
350 `IHTMLPreviewRenderer` implementation available for this MIME type.
351 If not given, the MIME type will be infered from the filename or the
352 content.
353
354 Return a string containing the XHTML text.
355 """
356 if not content:
357 return ''
358
359 # Ensure we have a MIME type for this content
360 full_mimetype = mimetype
361 if not full_mimetype:
362 if hasattr(content, 'read'):
363 content = content.read(self.max_preview_size)
364 full_mimetype = self.get_mimetype(filename, content)
365 if full_mimetype:
366 mimetype = full_mimetype.split(';')[0].strip() # split off charset
367 else:
368 mimetype = full_mimetype = 'text/plain' # fallback if not binary
369
370 # Determine candidate `IHTMLPreviewRenderer`s
371 candidates = []
372 for renderer in self.renderers:
373 qr = renderer.get_quality_ratio(mimetype)
374 if qr > 0:
375 candidates.append((qr, renderer))
376 candidates.sort(lambda x,y: cmp(y[0], x[0]))
377
378 # First candidate which renders successfully wins.
379 # Also, we don't want to expand tabs more than once.
380 expanded_content = None
381 for qr, renderer in candidates:
382 try:
383 self.log.debug('Trying to render HTML preview using %s'
384 % renderer.__class__.__name__)
385 # check if we need to perform a tab expansion
386 rendered_content = content
387 if getattr(renderer, 'expand_tabs', False):
388 if expanded_content is None:
389 content = content_to_unicode(self.env, content,
390 full_mimetype)
391 expanded_content = content.expandtabs(self.tab_width)
392 rendered_content = expanded_content
393 result = renderer.render(req, full_mimetype, rendered_content,
394 filename, url)
395 if not result:
396 continue
397 elif isinstance(result, Fragment):
398 return result
399 elif isinstance(result, basestring):
400 return Markup(to_unicode(result))
401 elif annotations:
402 return Markup(self._annotate(result, annotations))
403 else:
404 buf = StringIO()
405 buf.write('<div class="code"><pre>')
406 for line in result:
407 buf.write(line + '\n')
408 buf.write('</pre></div>')
409 return Markup(buf.getvalue())
410 except Exception, e:
411 self.log.warning('HTML preview using %s failed (%s)'
412 % (renderer, e), exc_info=True)
413
414 def _annotate(self, lines, annotations):
415 buf = StringIO()
416 buf.write('<table class="code"><thead><tr>')
417 annotators = []
418 for annotator in self.annotators:
419 atype, alabel, adesc = annotator.get_annotation_type()
420 if atype in annotations:
421 buf.write('<th class="%s">%s</th>' % (atype, alabel))
422 annotators.append(annotator)
423 buf.write('<th class="content">&nbsp;</th>')
424 buf.write('</tr></thead><tbody>')
425
426 space_re = re.compile('(?P<spaces> (?: +))|'
427 '^(?P<tag><\w+.*?>)?( )')
428 def htmlify(match):
429 m = match.group('spaces')
430 if m:
431 div, mod = divmod(len(m), 2)
432 return div * '&nbsp; ' + mod * '&nbsp;'
433 return (match.group('tag') or '') + '&nbsp;'
434
435 num = -1
436 for num, line in enumerate(_html_splitlines(lines)):
437 cells = []
438 for annotator in annotators:
439 cells.append(annotator.annotate_line(num + 1, line))
440 cells.append('<td>%s</td>\n' % space_re.sub(htmlify, line))
441 buf.write('<tr>' + '\n'.join(cells) + '</tr>')
442 else:
443 if num < 0:
444 return ''
445 buf.write('</tbody></table>')
446 return buf.getvalue()
447
448 def get_max_preview_size(self):
449 """Deprecated: use `max_preview_size` attribute directly."""
450 return self.max_preview_size
451
452 def get_charset(self, content='', mimetype=None):
453 """Infer the character encoding from the `content` or the `mimetype`.
454
455 `content` is either a `str` or an `unicode` object.
456
457 The charset will be determined using this order:
458 * from the charset information present in the `mimetype` argument
459 * auto-detection of the charset from the `content`
460 * the configured `default_charset`
461 """
462 if mimetype:
463 ctpos = mimetype.find('charset=')
464 if ctpos >= 0:
465 return mimetype[ctpos + 8:].strip()
466 if isinstance(content, str):
467 utf = detect_unicode(content)
468 if utf is not None:
469 return utf
470 return self.default_charset
471
472 def get_mimetype(self, filename, content=None):
473 """Infer the MIME type from the `filename` or the `content`.
474
475 `content` is either a `str` or an `unicode` object.
476
477 Return the detected MIME type, augmented by the
478 charset information (i.e. "<mimetype>; charset=..."),
479 or `None` if detection failed.
480 """
481 # Extend default extension to MIME type mappings with configured ones
482 if not self._mime_map:
483 self._mime_map = MIME_MAP
484 for mapping in self.config['mimeviewer'].getlist('mime_map'):
485 if ':' in mapping:
486 assocations = mapping.split(':')
487 for keyword in assocations: # Note: [0] kept on purpose
488 self._mime_map[keyword] = assocations[0]
489
490 mimetype = get_mimetype(filename, content, self._mime_map)
491 charset = None
492 if mimetype:
493 charset = self.get_charset(content, mimetype)
494 if mimetype and charset and not 'charset' in mimetype:
495 mimetype += '; charset=' + charset
496 return mimetype
497
498 def to_utf8(self, content, mimetype=None):
499 """Convert an encoded `content` to utf-8.
500
501 ''Deprecated in 0.10. You should use `unicode` strings only.''
502 """
503 return to_utf8(content, self.get_charset(content, mimetype))
504
505 def to_unicode(self, content, mimetype=None, charset=None):
506 """Convert `content` (an encoded `str` object) to an `unicode` object.
507
508 This calls `trac.util.to_unicode` with the `charset` provided,
509 or the one obtained by `Mimeview.get_charset()`.
510 """
511 if not charset:
512 charset = self.get_charset(content, mimetype)
513 return to_unicode(content, charset)
514
515 def configured_modes_mapping(self, renderer):
516 """Return a MIME type to `(mode,quality)` mapping for given `option`"""
517 types, option = {}, '%s_modes' % renderer
518 for mapping in self.config['mimeviewer'].getlist(option):
519 if not mapping:
520 continue
521 try:
522 mimetype, mode, quality = mapping.split(':')
523 types[mimetype] = (mode, int(quality))
524 except (TypeError, ValueError):
525 self.log.warning("Invalid mapping '%s' specified in '%s' "
526 "option." % (mapping, option))
527 return types
528
529 def preview_to_hdf(self, req, content, length, mimetype, filename,
530 url=None, annotations=None):
531 """Prepares a rendered preview of the given `content`.
532
533 Note: `content` will usually be an object with a `read` method.
534 """
535 if length >= self.max_preview_size:
536 return {'max_file_size_reached': True,
537 'max_file_size': self.max_preview_size,
538 'raw_href': url}
539 else:
540 return {'preview': self.render(req, mimetype, content, filename,
541 url, annotations),
542 'raw_href': url}
543
544 def send_converted(self, req, in_type, content, selector, filename='file'):
545 """Helper method for converting `content` and sending it directly.
546
547 `selector` can be either a key or a MIME Type."""
548 from trac.web import RequestDone
549 content, output_type, ext = self.convert_content(req, in_type,
550 content, selector)
551 req.send_response(200)
552 req.send_header('Content-Type', output_type)
553 req.send_header('Content-Disposition', 'filename=%s.%s' % (filename,
554 ext))
555 req.end_headers()
556 req.write(content)
557 raise RequestDone
558
559
560 def _html_splitlines(lines):
561 """Tracks open and close tags in lines of HTML text and yields lines that
562 have no tags spanning more than one line."""
563 open_tag_re = re.compile(r'<(\w+)(\s.*?)?[^/]?>')
564 close_tag_re = re.compile(r'</(\w+)>')
565 open_tags = []
566 for line in lines:
567 # Reopen tags still open from the previous line
568 for tag in open_tags:
569 line = tag.group(0) + line
570 open_tags = []
571
572 # Find all tags opened on this line
573 for tag in open_tag_re.finditer(line):
574 open_tags.append(tag)
575
576 open_tags.reverse()
577
578 # Find all tags closed on this line
579 for ctag in close_tag_re.finditer(line):
580 for otag in open_tags:
581 if otag.group(1) == ctag.group(1):
582 open_tags.remove(otag)
583 break
584
585 # Close all tags still open at the end of line, they'll get reopened at
586 # the beginning of the next line
587 for tag in open_tags:
588 line += '</%s>' % tag.group(1)
589
590 yield line
591
592
593 # -- Default annotators
594
595 class LineNumberAnnotator(Component):
596 """Text annotator that adds a column with line numbers."""
597 implements(IHTMLPreviewAnnotator)
598
599 # ITextAnnotator methods
600
601 def get_annotation_type(self):
602 return 'lineno', 'Line', 'Line numbers'
603
604 def annotate_line(self, number, content):
605 return '<th id="L%s"><a href="#L%s">%s</a></th>' % (number, number,
606 number)
607
608
609 # -- Default renderers
610
611 class PlainTextRenderer(Component):
612 """HTML preview renderer for plain text, and fallback for any kind of text
613 for which no more specific renderer is available.
614 """
615 implements(IHTMLPreviewRenderer)
616
617 expand_tabs = True
618
619 TREAT_AS_BINARY = [
620 'application/pdf',
621 'application/postscript',
622 'application/rtf'
623 ]
624
625 def get_quality_ratio(self, mimetype):
626 if mimetype in self.TREAT_AS_BINARY:
627 return 0
628 return 1
629
630 def render(self, req, mimetype, content, filename=None, url=None):
631 if is_binary(content):
632 self.env.log.debug("Binary data; no preview available")
633 return
634
635 self.env.log.debug("Using default plain text mimeviewer")
636 content = content_to_unicode(self.env, content, mimetype)
637 for line in content.splitlines():
638 yield escape(line)
639
640
641 class ImageRenderer(Component):
642 """Inline image display. Here we don't need the `content` at all."""
643 implements(IHTMLPreviewRenderer)
644
645 def get_quality_ratio(self, mimetype):
646 if mimetype.startswith('image/'):
647 return 8
648 return 0
649
650 def render(self, req, mimetype, content, filename=None, url=None):
651 if url:
652 return html.DIV(html.IMG(src=url,alt=filename),
653 class_="image-file")
654
655
656 class WikiTextRenderer(Component):
657 """Render files containing Trac's own Wiki formatting markup."""
658 implements(IHTMLPreviewRenderer)
659
660 def get_quality_ratio(self, mimetype):
661 if mimetype in ('text/x-trac-wiki', 'application/x-trac-wiki'):
662 return 8
663 return 0
664
665 def render(self, req, mimetype, content, filename=None, url=None):
666 from trac.wiki import wiki_to_html
667 return wiki_to_html(content_to_unicode(self.env, content, mimetype),
668 self.env, req)
Copyright (C) 2012-2017 Edgewall Software