Mercurial > genshi > mirror
diff examples/trac/trac/mimeview/api.py @ 39:93b4dcbafd7b trunk
Copy Trac to main branch.
author | cmlenz |
---|---|
date | Mon, 03 Jul 2006 18:53:27 +0000 |
parents | |
children |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/examples/trac/trac/mimeview/api.py @@ -0,0 +1,668 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2004-2006 Edgewall Software +# Copyright (C) 2004 Daniel Lundin <daniel@edgewall.com> +# Copyright (C) 2005-2006 Christopher Lenz <cmlenz@gmx.de> +# Copyright (C) 2006 Christian Boos <cboos@neuf.fr> +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://trac.edgewall.com/license.html. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://projects.edgewall.com/trac/. +# +# Author: Daniel Lundin <daniel@edgewall.com> +# Christopher Lenz <cmlenz@gmx.de> +# Christian Boos <cboos@neuf.fr> + +""" +The `trac.mimeview` module centralize the intelligence related to +file metadata, principally concerning the `type` (MIME type) of the content +and, if relevant, concerning the text encoding (charset) used by the content. + +There are primarily two approaches for getting the MIME type of a given file: + * taking advantage of existing conventions for the file name + * examining the file content and applying various heuristics + +The module also knows how to convert the file content from one type +to another type. + +In some cases, only the `url` pointing to the file's content is actually +needed, that's why we avoid to read the file's content when it's not needed. + +The actual `content` to be converted might be a `unicode` object, +but it can also be the raw byte string (`str`) object, or simply +an object that can be `read()`. +""" + +import re +from StringIO import StringIO + +from trac.config import IntOption, ListOption, Option +from trac.core import * +from trac.util import sorted +from trac.util.text import to_utf8, to_unicode +from trac.util.markup import escape, Markup, Fragment, html + + +__all__ = ['get_mimetype', 'is_binary', 'detect_unicode', 'Mimeview', + 'content_to_unicode'] + + +# Some common MIME types and their associated keywords and/or file extensions + +KNOWN_MIME_TYPES = { + 'application/pdf': ['pdf'], + 'application/postscript': ['ps'], + 'application/rtf': ['rtf'], + 'application/x-sh': ['sh'], + 'application/x-csh': ['csh'], + 'application/x-troff': ['nroff', 'roff', 'troff'], + + 'image/x-icon': ['ico'], + 'image/svg+xml': ['svg'], + + 'model/vrml': ['vrml', 'wrl'], + + 'text/css': ['css'], + 'text/html': ['html'], + 'text/plain': ['txt', 'TXT', 'text', 'README', 'INSTALL', + 'AUTHORS', 'COPYING', 'ChangeLog', 'RELEASE'], + 'text/xml': ['xml'], + 'text/xsl': ['xsl'], + 'text/x-csrc': ['c', 'xs'], + 'text/x-chdr': ['h'], + 'text/x-c++src': ['cc', 'CC', 'cpp', 'C'], + 'text/x-c++hdr': ['hh', 'HH', 'hpp', 'H'], + 'text/x-diff': ['diff', 'patch'], + 'text/x-eiffel': ['e'], + 'text/x-elisp': ['el'], + 'text/x-fortran': ['f'], + 'text/x-haskell': ['hs'], + 'text/x-javascript': ['js'], + 'text/x-objc': ['m', 'mm'], + 'text/x-makefile': ['make', 'mk', + 'Makefile', 'makefile', 'GNUMakefile'], + 'text/x-pascal': ['pas'], + 'text/x-perl': ['pl', 'pm', 'PL', 'perl'], + 'text/x-php': ['php', 'php3', 'php4'], + 'text/x-python': ['py', 'python'], + 'text/x-pyrex': ['pyx'], + 'text/x-ruby': ['rb', 'ruby'], + 'text/x-scheme': ['scm'], + 'text/x-textile': ['txtl', 'textile'], + 'text/x-vba': ['vb', 'vba', 'bas'], + 'text/x-verilog': ['v', 'verilog'], + 'text/x-vhdl': ['vhd'], +} + +# extend the above with simple (text/x-<something>: <something>) mappings + +for x in ['ada', 'asm', 'asp', 'awk', 'idl', 'inf', 'java', 'ksh', 'lua', + 'm4', 'mail', 'psp', 'rfc', 'rst', 'sql', 'tcl', 'tex', 'zsh']: + KNOWN_MIME_TYPES.setdefault('text/x-%s' % x, []).append(x) + + +# Default mapping from keywords/extensions to known MIME types: + +MIME_MAP = {} +for t, exts in KNOWN_MIME_TYPES.items(): + MIME_MAP[t] = t + for e in exts: + MIME_MAP[e] = t + +# Simple builtin autodetection from the content using a regexp +MODE_RE = re.compile( + r"#!(?:[/\w.-_]+/)?(\w+)|" # look for shebang + r"-\*-\s*(?:mode:\s*)?([\w+-]+)\s*-\*-|" # look for Emacs' -*- mode -*- + r"vim:.*?syntax=(\w+)" # look for VIM's syntax=<n> + ) + +def get_mimetype(filename, content=None, mime_map=MIME_MAP): + """Guess the most probable MIME type of a file with the given name. + + `filename` is either a filename (the lookup will then use the suffix) + or some arbitrary keyword. + + `content` is either a `str` or an `unicode` string. + """ + suffix = filename.split('.')[-1] + if suffix in mime_map: + # 1) mimetype from the suffix, using the `mime_map` + return mime_map[suffix] + else: + mimetype = None + try: + import mimetypes + # 2) mimetype from the suffix, using the `mimetypes` module + mimetype = mimetypes.guess_type(filename)[0] + except: + pass + if not mimetype and content: + match = re.search(MODE_RE, content[:1000]) + if match: + mode = match.group(1) or match.group(3) or \ + match.group(2).lower() + if mode in mime_map: + # 3) mimetype from the content, using the `MODE_RE` + return mime_map[mode] + else: + if is_binary(content): + # 4) mimetype from the content, using`is_binary` + return 'application/octet-stream' + return mimetype + +def is_binary(data): + """Detect binary content by checking the first thousand bytes for zeroes. + + Operate on either `str` or `unicode` strings. + """ + if isinstance(data, str) and detect_unicode(data): + return False + return '\0' in data[:1000] + +def detect_unicode(data): + """Detect different unicode charsets by looking for BOMs (Byte Order Marks). + + Operate obviously only on `str` objects. + """ + if data.startswith('\xff\xfe'): + return 'utf-16-le' + elif data.startswith('\xfe\xff'): + return 'utf-16-be' + elif data.startswith('\xef\xbb\xbf'): + return 'utf-8' + else: + return None + +def content_to_unicode(env, content, mimetype): + """Retrieve an `unicode` object from a `content` to be previewed""" + mimeview = Mimeview(env) + if hasattr(content, 'read'): + content = content.read(mimeview.max_preview_size) + return mimeview.to_unicode(content, mimetype) + + +class IHTMLPreviewRenderer(Interface): + """Extension point interface for components that add HTML renderers of + specific content types to the `Mimeview` component. + + (Deprecated) + """ + + # implementing classes should set this property to True if they + # support text content where Trac should expand tabs into spaces + expand_tabs = False + + def get_quality_ratio(mimetype): + """Return the level of support this renderer provides for the `content` + of the specified MIME type. The return value must be a number between + 0 and 9, where 0 means no support and 9 means "perfect" support. + """ + + def render(req, mimetype, content, filename=None, url=None): + """Render an XHTML preview of the raw `content`. + + The `content` might be: + * a `str` object + * an `unicode` string + * any object with a `read` method, returning one of the above + + It is assumed that the content will correspond to the given `mimetype`. + + Besides the `content` value, the same content may eventually + be available through the `filename` or `url` parameters. + This is useful for renderers that embed objects, using <object> or + <img> instead of including the content inline. + + Can return the generated XHTML text as a single string or as an + iterable that yields strings. In the latter case, the list will + be considered to correspond to lines of text in the original content. + """ + +class IHTMLPreviewAnnotator(Interface): + """Extension point interface for components that can annotate an XHTML + representation of file contents with additional information.""" + + def get_annotation_type(): + """Return a (type, label, description) tuple that defines the type of + annotation and provides human readable names. The `type` element should + be unique to the annotator. The `label` element is used as column + heading for the table, while `description` is used as a display name to + let the user toggle the appearance of the annotation type. + """ + + def annotate_line(number, content): + """Return the XHTML markup for the table cell that contains the + annotation data.""" + + +class IContentConverter(Interface): + """An extension point interface for generic MIME based content + conversion.""" + + def get_supported_conversions(): + """Return an iterable of tuples in the form (key, name, extension, + in_mimetype, out_mimetype, quality) representing the MIME conversions + supported and + the quality ratio of the conversion in the range 0 to 9, where 0 means + no support and 9 means "perfect" support. eg. ('latex', 'LaTeX', 'tex', + 'text/x-trac-wiki', 'text/plain', 8)""" + + def convert_content(req, mimetype, content, key): + """Convert the given content from mimetype to the output MIME type + represented by key. Returns a tuple in the form (content, + output_mime_type) or None if conversion is not possible.""" + + +class Mimeview(Component): + """A generic class to prettify data, typically source code.""" + + renderers = ExtensionPoint(IHTMLPreviewRenderer) + annotators = ExtensionPoint(IHTMLPreviewAnnotator) + converters = ExtensionPoint(IContentConverter) + + default_charset = Option('trac', 'default_charset', 'iso-8859-15', + """Charset to be used when in doubt.""") + + tab_width = IntOption('mimeviewer', 'tab_width', 8, + """Displayed tab width in file preview (''since 0.9'').""") + + max_preview_size = IntOption('mimeviewer', 'max_preview_size', 262144, + """Maximum file size for HTML preview. (''since 0.9'').""") + + mime_map = ListOption('mimeviewer', 'mime_map', + 'text/x-dylan:dylan,text/x-idl:ice,text/x-ada:ads:adb', + """List of additional MIME types and keyword mappings. + Mappings are comma-separated, and for each MIME type, + there's a colon (":") separated list of associated keywords + or file extensions. (''since 0.10'').""") + + def __init__(self): + self._mime_map = None + + # Public API + + def get_supported_conversions(self, mimetype): + """Return a list of target MIME types in same form as + `IContentConverter.get_supported_conversions()`, but with the converter + component appended. Output is ordered from best to worst quality.""" + converters = [] + for converter in self.converters: + for k, n, e, im, om, q in converter.get_supported_conversions(): + if im == mimetype and q > 0: + converters.append((k, n, e, im, om, q, converter)) + converters = sorted(converters, key=lambda i: i[-1], reverse=True) + return converters + + def convert_content(self, req, mimetype, content, key, filename=None, + url=None): + """Convert the given content to the target MIME type represented by + `key`, which can be either a MIME type or a key. Returns a tuple of + (content, output_mime_type, extension).""" + if not content: + return ('', 'text/plain;charset=utf-8') + + # Ensure we have a MIME type for this content + full_mimetype = mimetype + if not full_mimetype: + if hasattr(content, 'read'): + content = content.read(self.max_preview_size) + full_mimetype = self.get_mimetype(filename, content) + if full_mimetype: + mimetype = full_mimetype.split(';')[0].strip() # split off charset + else: + mimetype = full_mimetype = 'text/plain' # fallback if not binary + + # Choose best converter + candidates = list(self.get_supported_conversions(mimetype)) + candidates = [c for c in candidates if key in (c[0], c[4])] + if not candidates: + raise TracError('No available MIME conversions from %s to %s' % + (mimetype, key)) + + # First successful conversion wins + for ck, name, ext, input_mimettype, output_mimetype, quality, \ + converter in candidates: + output = converter.convert_content(req, mimetype, content, ck) + if not output: + continue + return (output[0], output[1], ext) + raise TracError('No available MIME conversions from %s to %s' % + (mimetype, key)) + + def get_annotation_types(self): + """Generator that returns all available annotation types.""" + for annotator in self.annotators: + yield annotator.get_annotation_type() + + def render(self, req, mimetype, content, filename=None, url=None, + annotations=None): + """Render an XHTML preview of the given `content`. + + `content` is the same as an `IHTMLPreviewRenderer.render`'s + `content` argument. + + The specified `mimetype` will be used to select the most appropriate + `IHTMLPreviewRenderer` implementation available for this MIME type. + If not given, the MIME type will be infered from the filename or the + content. + + Return a string containing the XHTML text. + """ + if not content: + return '' + + # Ensure we have a MIME type for this content + full_mimetype = mimetype + if not full_mimetype: + if hasattr(content, 'read'): + content = content.read(self.max_preview_size) + full_mimetype = self.get_mimetype(filename, content) + if full_mimetype: + mimetype = full_mimetype.split(';')[0].strip() # split off charset + else: + mimetype = full_mimetype = 'text/plain' # fallback if not binary + + # Determine candidate `IHTMLPreviewRenderer`s + candidates = [] + for renderer in self.renderers: + qr = renderer.get_quality_ratio(mimetype) + if qr > 0: + candidates.append((qr, renderer)) + candidates.sort(lambda x,y: cmp(y[0], x[0])) + + # First candidate which renders successfully wins. + # Also, we don't want to expand tabs more than once. + expanded_content = None + for qr, renderer in candidates: + try: + self.log.debug('Trying to render HTML preview using %s' + % renderer.__class__.__name__) + # check if we need to perform a tab expansion + rendered_content = content + if getattr(renderer, 'expand_tabs', False): + if expanded_content is None: + content = content_to_unicode(self.env, content, + full_mimetype) + expanded_content = content.expandtabs(self.tab_width) + rendered_content = expanded_content + result = renderer.render(req, full_mimetype, rendered_content, + filename, url) + if not result: + continue + elif isinstance(result, Fragment): + return result + elif isinstance(result, basestring): + return Markup(to_unicode(result)) + elif annotations: + return Markup(self._annotate(result, annotations)) + else: + buf = StringIO() + buf.write('<div class="code"><pre>') + for line in result: + buf.write(line + '\n') + buf.write('</pre></div>') + return Markup(buf.getvalue()) + except Exception, e: + self.log.warning('HTML preview using %s failed (%s)' + % (renderer, e), exc_info=True) + + def _annotate(self, lines, annotations): + buf = StringIO() + buf.write('<table class="code"><thead><tr>') + annotators = [] + for annotator in self.annotators: + atype, alabel, adesc = annotator.get_annotation_type() + if atype in annotations: + buf.write('<th class="%s">%s</th>' % (atype, alabel)) + annotators.append(annotator) + buf.write('<th class="content"> </th>') + buf.write('</tr></thead><tbody>') + + space_re = re.compile('(?P<spaces> (?: +))|' + '^(?P<tag><\w+.*?>)?( )') + def htmlify(match): + m = match.group('spaces') + if m: + div, mod = divmod(len(m), 2) + return div * ' ' + mod * ' ' + return (match.group('tag') or '') + ' ' + + num = -1 + for num, line in enumerate(_html_splitlines(lines)): + cells = [] + for annotator in annotators: + cells.append(annotator.annotate_line(num + 1, line)) + cells.append('<td>%s</td>\n' % space_re.sub(htmlify, line)) + buf.write('<tr>' + '\n'.join(cells) + '</tr>') + else: + if num < 0: + return '' + buf.write('</tbody></table>') + return buf.getvalue() + + def get_max_preview_size(self): + """Deprecated: use `max_preview_size` attribute directly.""" + return self.max_preview_size + + def get_charset(self, content='', mimetype=None): + """Infer the character encoding from the `content` or the `mimetype`. + + `content` is either a `str` or an `unicode` object. + + The charset will be determined using this order: + * from the charset information present in the `mimetype` argument + * auto-detection of the charset from the `content` + * the configured `default_charset` + """ + if mimetype: + ctpos = mimetype.find('charset=') + if ctpos >= 0: + return mimetype[ctpos + 8:].strip() + if isinstance(content, str): + utf = detect_unicode(content) + if utf is not None: + return utf + return self.default_charset + + def get_mimetype(self, filename, content=None): + """Infer the MIME type from the `filename` or the `content`. + + `content` is either a `str` or an `unicode` object. + + Return the detected MIME type, augmented by the + charset information (i.e. "<mimetype>; charset=..."), + or `None` if detection failed. + """ + # Extend default extension to MIME type mappings with configured ones + if not self._mime_map: + self._mime_map = MIME_MAP + for mapping in self.config['mimeviewer'].getlist('mime_map'): + if ':' in mapping: + assocations = mapping.split(':') + for keyword in assocations: # Note: [0] kept on purpose + self._mime_map[keyword] = assocations[0] + + mimetype = get_mimetype(filename, content, self._mime_map) + charset = None + if mimetype: + charset = self.get_charset(content, mimetype) + if mimetype and charset and not 'charset' in mimetype: + mimetype += '; charset=' + charset + return mimetype + + def to_utf8(self, content, mimetype=None): + """Convert an encoded `content` to utf-8. + + ''Deprecated in 0.10. You should use `unicode` strings only.'' + """ + return to_utf8(content, self.get_charset(content, mimetype)) + + def to_unicode(self, content, mimetype=None, charset=None): + """Convert `content` (an encoded `str` object) to an `unicode` object. + + This calls `trac.util.to_unicode` with the `charset` provided, + or the one obtained by `Mimeview.get_charset()`. + """ + if not charset: + charset = self.get_charset(content, mimetype) + return to_unicode(content, charset) + + def configured_modes_mapping(self, renderer): + """Return a MIME type to `(mode,quality)` mapping for given `option`""" + types, option = {}, '%s_modes' % renderer + for mapping in self.config['mimeviewer'].getlist(option): + if not mapping: + continue + try: + mimetype, mode, quality = mapping.split(':') + types[mimetype] = (mode, int(quality)) + except (TypeError, ValueError): + self.log.warning("Invalid mapping '%s' specified in '%s' " + "option." % (mapping, option)) + return types + + def preview_to_hdf(self, req, content, length, mimetype, filename, + url=None, annotations=None): + """Prepares a rendered preview of the given `content`. + + Note: `content` will usually be an object with a `read` method. + """ + if length >= self.max_preview_size: + return {'max_file_size_reached': True, + 'max_file_size': self.max_preview_size, + 'raw_href': url} + else: + return {'preview': self.render(req, mimetype, content, filename, + url, annotations), + 'raw_href': url} + + def send_converted(self, req, in_type, content, selector, filename='file'): + """Helper method for converting `content` and sending it directly. + + `selector` can be either a key or a MIME Type.""" + from trac.web import RequestDone + content, output_type, ext = self.convert_content(req, in_type, + content, selector) + req.send_response(200) + req.send_header('Content-Type', output_type) + req.send_header('Content-Disposition', 'filename=%s.%s' % (filename, + ext)) + req.end_headers() + req.write(content) + raise RequestDone + + +def _html_splitlines(lines): + """Tracks open and close tags in lines of HTML text and yields lines that + have no tags spanning more than one line.""" + open_tag_re = re.compile(r'<(\w+)(\s.*?)?[^/]?>') + close_tag_re = re.compile(r'</(\w+)>') + open_tags = [] + for line in lines: + # Reopen tags still open from the previous line + for tag in open_tags: + line = tag.group(0) + line + open_tags = [] + + # Find all tags opened on this line + for tag in open_tag_re.finditer(line): + open_tags.append(tag) + + open_tags.reverse() + + # Find all tags closed on this line + for ctag in close_tag_re.finditer(line): + for otag in open_tags: + if otag.group(1) == ctag.group(1): + open_tags.remove(otag) + break + + # Close all tags still open at the end of line, they'll get reopened at + # the beginning of the next line + for tag in open_tags: + line += '</%s>' % tag.group(1) + + yield line + + +# -- Default annotators + +class LineNumberAnnotator(Component): + """Text annotator that adds a column with line numbers.""" + implements(IHTMLPreviewAnnotator) + + # ITextAnnotator methods + + def get_annotation_type(self): + return 'lineno', 'Line', 'Line numbers' + + def annotate_line(self, number, content): + return '<th id="L%s"><a href="#L%s">%s</a></th>' % (number, number, + number) + + +# -- Default renderers + +class PlainTextRenderer(Component): + """HTML preview renderer for plain text, and fallback for any kind of text + for which no more specific renderer is available. + """ + implements(IHTMLPreviewRenderer) + + expand_tabs = True + + TREAT_AS_BINARY = [ + 'application/pdf', + 'application/postscript', + 'application/rtf' + ] + + def get_quality_ratio(self, mimetype): + if mimetype in self.TREAT_AS_BINARY: + return 0 + return 1 + + def render(self, req, mimetype, content, filename=None, url=None): + if is_binary(content): + self.env.log.debug("Binary data; no preview available") + return + + self.env.log.debug("Using default plain text mimeviewer") + content = content_to_unicode(self.env, content, mimetype) + for line in content.splitlines(): + yield escape(line) + + +class ImageRenderer(Component): + """Inline image display. Here we don't need the `content` at all.""" + implements(IHTMLPreviewRenderer) + + def get_quality_ratio(self, mimetype): + if mimetype.startswith('image/'): + return 8 + return 0 + + def render(self, req, mimetype, content, filename=None, url=None): + if url: + return html.DIV(html.IMG(src=url,alt=filename), + class_="image-file") + + +class WikiTextRenderer(Component): + """Render files containing Trac's own Wiki formatting markup.""" + implements(IHTMLPreviewRenderer) + + def get_quality_ratio(self, mimetype): + if mimetype in ('text/x-trac-wiki', 'application/x-trac-wiki'): + return 8 + return 0 + + def render(self, req, mimetype, content, filename=None, url=None): + from trac.wiki import wiki_to_html + return wiki_to_html(content_to_unicode(self.env, content, mimetype), + self.env, req)