changeset 541:4a53763b3948

Merged cspeedups branch into trunk.
author cmlenz
date Thu, 28 Jun 2007 17:43:31 +0000
parents 661715b49c0c
children ecb5729c06b1
files genshi/_speedups.c genshi/core.py setup.py
diffstat 3 files changed, 601 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/genshi/_speedups.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (C) 2006 Edgewall Software
+ * All rights reserved.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://genshi.edgewall.org/wiki/License.
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals. For the exact contribution history, see the revision
+ * history and logs, available at http://genshi.edgewall.org/log/.
+ */
+
+#include <Python.h>
+#include <structmember.h>
+
+static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2;
+static PyObject *stripentities, *striptags;
+
+static void
+init_constants(void)
+{
+    PyObject *util = PyImport_ImportModule("genshi.util");
+    stripentities = PyObject_GetAttrString(util, "stripentities");
+    striptags = PyObject_GetAttrString(util, "striptags");
+    Py_DECREF(util);
+
+    amp1 = PyUnicode_DecodeASCII("&", 1, NULL);
+    amp2 = PyUnicode_DecodeASCII("&amp;", 5, NULL);
+    lt1 = PyUnicode_DecodeASCII("<", 1, NULL);
+    lt2 = PyUnicode_DecodeASCII("&lt;", 4, NULL);
+    gt1 = PyUnicode_DecodeASCII(">", 1, NULL);
+    gt2 = PyUnicode_DecodeASCII("&gt;", 4, NULL);
+    qt1 = PyUnicode_DecodeASCII("\"", 1, NULL);
+    qt2 = PyUnicode_DecodeASCII("&#34;", 5, NULL);
+}
+
+/* Markup class */
+
+PyAPI_DATA(PyTypeObject) MarkupType;
+
+PyDoc_STRVAR(Markup__doc__,
+"Marks a string as being safe for inclusion in HTML/XML output without\n\
+needing to be escaped.");
+
+static PyObject *
+escape(PyObject *text, int quotes)
+{
+    PyObject *args, *ret;
+    PyUnicodeObject *in, *out;
+    Py_UNICODE *inp, *outp;
+    int len, inn, outn;
+
+    if (PyObject_TypeCheck(text, &MarkupType)) {
+        Py_INCREF(text);
+        return text;
+    }
+    in = (PyUnicodeObject *) PyObject_Unicode(text);
+    if (in == NULL) {
+        return NULL;
+    }
+    /* First we need to figure out how long the escaped string will be */
+    len = inn = 0;
+    inp = in->str;
+    while (*(inp) || in->length > inp - in->str) {
+        switch (*inp++) {
+            case '&': len += 5; inn++;                                 break;
+            case '"': len += quotes ? 5 : 1; inn += quotes ? 1 : 0;    break;
+            case '<':
+            case '>': len += 4; inn++;                                 break;
+            default:  len++;
+        }
+    }
+
+    /* Do we need to escape anything at all? */
+    if (!inn) {
+        args = PyTuple_New(1);
+        if (args == NULL) {
+            Py_DECREF((PyObject *) in);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(args, 0, (PyObject *) in);
+        ret = MarkupType.tp_new(&MarkupType, args, NULL);
+        Py_DECREF(args);
+        return ret;
+    }
+
+    out = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, len);
+    if (out == NULL) {
+        return NULL;
+    }
+
+    outn = 0;
+    inp = in->str;
+    outp = out->str;
+    while (*(inp) || in->length > inp - in->str) {
+        if (outn == inn) {
+            /* copy rest of string if we have already replaced everything */
+            Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str));
+            break;
+        }
+        switch (*inp) {
+            case '&':
+                Py_UNICODE_COPY(outp, ((PyUnicodeObject *) amp2)->str, 5);
+                outp += 5;
+                outn++;
+                break;
+            case '"':
+                if (quotes) {
+                    Py_UNICODE_COPY(outp, ((PyUnicodeObject *) qt2)->str, 5);
+                    outp += 5;
+                    outn++;
+                } else {
+                    *outp++ = *inp;
+                }
+                break;
+            case '<':
+                Py_UNICODE_COPY(outp, ((PyUnicodeObject *) lt2)->str, 4);
+                outp += 4;
+                outn++;
+                break;
+            case '>':
+                Py_UNICODE_COPY(outp, ((PyUnicodeObject *) gt2)->str, 4);
+                outp += 4;
+                outn++;
+                break;
+            default:
+                *outp++ = *inp;
+        }
+        inp++;
+    }
+
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF((PyObject *) out);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, (PyObject *) out);
+    ret = MarkupType.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+    return ret;
+}
+
+static PyObject *
+Markup_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *self, *text, *tmp, *args2;
+    int nargs, i;
+
+    nargs = PyTuple_GET_SIZE(args);
+    if (nargs < 2) {
+        return PyUnicode_Type.tp_new(type, args, NULL);
+    }
+
+    text = PyTuple_GET_ITEM(args, 0);
+    args2 = PyTuple_New(nargs - 1);
+    if (args2 == NULL) {
+        return NULL;
+    }
+    for (i = 1; i < nargs; i++) {
+        tmp = escape(PyTuple_GET_ITEM(args, i), 1);
+        if (tmp == NULL) {
+            Py_DECREF(args2);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(args2, i - 1, tmp);
+    }
+    tmp = PyUnicode_Format(text, args2);
+    Py_DECREF(args2);
+    if (tmp == NULL) {
+        return NULL;
+    }
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(tmp);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, tmp);
+    self = PyUnicode_Type.tp_new(type, args, NULL);
+    Py_DECREF(args);
+    return self;
+}
+
+PyDoc_STRVAR(escape__doc__,
+"Create a Markup instance from a string and escape special characters\n\
+it may contain (<, >, & and \").\n\
+\n\
+If the `quotes` parameter is set to `False`, the \" character is left\n\
+as is. Escaping quotes is generally only required for strings that are\n\
+to be used in attribute values.");
+
+static PyObject *
+Markup_escape(PyTypeObject* type, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"text", "quotes", 0};
+    PyObject *text = NULL;
+    char quotes = 1;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &text, &quotes)) {
+        return NULL;
+    }
+    if (PyObject_Not(text)) {
+        return type->tp_new(type, args, NULL);
+    }
+    if (PyObject_TypeCheck(text, type)) {
+        Py_INCREF(text);
+        return text;
+    }
+    return escape(text, quotes);
+}
+
+static PyObject *
+Markup_join(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"seq", "escape_quotes", 0};
+    PyObject *seq = NULL, *seq2, *tmp;
+    char quotes = 1;
+    int n, i;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &seq, &quotes)) {
+        return NULL;
+    }
+    if (!PySequence_Check(seq)) {
+        return NULL;
+    }
+    n = PySequence_Size(seq);
+    if (n < 0) {
+        return NULL;
+    }
+    seq2 = PyTuple_New(n);
+    if (seq2 == NULL) {
+        return NULL;
+    }
+    for (i = 0; i < n; i++) {
+        tmp = PySequence_GetItem(seq, i);
+        if (tmp == NULL) {
+            Py_DECREF(seq2);
+            return NULL;
+        }
+        tmp = escape(tmp, quotes);
+        if (tmp == NULL) {
+            Py_DECREF(seq2);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(seq2, i, tmp);
+    }
+    tmp = PyUnicode_Join(self, seq2);
+    Py_DECREF(seq2);
+    if (tmp == NULL)
+        return NULL;
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(tmp);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, tmp);
+    tmp = MarkupType.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+    return tmp;
+}
+
+static PyObject *
+Markup_add(PyObject *self, PyObject *other)
+{
+    PyObject *tmp, *tmp2, *args, *ret;
+    if (PyObject_TypeCheck(self, &MarkupType)) {
+        tmp = escape(other, 1);
+        if (tmp == NULL)
+            return NULL;
+        tmp2 = PyUnicode_Concat(self, tmp);
+    } else { // __radd__
+        tmp = escape(self, 1);
+        if (tmp == NULL)
+            return NULL;
+        tmp2 = PyUnicode_Concat(tmp, other);
+    }
+    if (tmp2 == NULL) {
+        Py_DECREF(tmp);
+        return NULL;
+    }
+    Py_DECREF(tmp);
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(tmp2);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, tmp2);
+    ret = MarkupType.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+    return ret;
+}
+
+static PyObject *
+Markup_mod(PyObject *self, PyObject *args)
+{
+    PyObject *tmp, *tmp2, *ret, *args2;
+    int i, nargs;
+
+    if (PyTuple_Check(args)) {
+        nargs = PyTuple_GET_SIZE(args);
+        args2 = PyTuple_New(nargs);
+        if (args2 == NULL) {
+            return NULL;
+        }
+        for (i = 0; i < nargs; i++) {
+            tmp = escape(PyTuple_GET_ITEM(args, i), 1);
+            if (tmp == NULL) {
+                Py_DECREF(args2);
+                return NULL;
+            }
+            PyTuple_SET_ITEM(args2, i, tmp);
+        }
+        tmp = PyUnicode_Format(self, args2);
+        Py_DECREF(args2);
+        if (tmp == NULL) {
+            return NULL;
+        }
+    } else {
+        tmp2 = escape(args, 1);
+        if (tmp2 == NULL) {
+            return NULL;
+        }
+        tmp = PyUnicode_Format(self, tmp2);
+        Py_DECREF(tmp2);
+        if (tmp == NULL) {
+            return NULL;
+        }
+    }
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(tmp);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, tmp);
+    ret = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+    return ret;
+}
+
+static PyObject *
+Markup_mul(PyObject *self, PyObject *num)
+{
+    PyObject *unicode, *result, *args;
+
+    if (PyObject_TypeCheck(self, &MarkupType)) {
+        unicode = PyObject_Unicode(self);
+        if (unicode == NULL) return NULL;
+        result = PyNumber_Multiply(unicode, num);
+    } else { // __rmul__
+        unicode = PyObject_Unicode(num);
+        if (unicode == NULL) return NULL;
+        result = PyNumber_Multiply(unicode, self);
+    }
+
+    if (result == NULL) return NULL;
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, result);
+    result = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+
+    return result;
+}
+
+static PyObject *
+Markup_repr(PyObject *self)
+{
+    PyObject *format, *result, *args;
+
+    format = PyString_FromString("<Markup %r>");
+    if (format == NULL) return NULL;
+    result = PyObject_Unicode(self);
+    if (result == NULL) return NULL;
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, result);
+    result = PyString_Format(format, args);
+    Py_DECREF(args);
+    return result;
+}
+
+PyDoc_STRVAR(unescape__doc__,
+"Reverse-escapes &, <, > and \" and returns a `unicode` object.");
+
+static PyObject *
+Markup_unescape(PyObject* self)
+{
+    PyObject *tmp, *tmp2;
+
+    tmp = PyUnicode_Replace(self, qt2, qt1, -1);
+    if (tmp == NULL) return NULL;
+    tmp2 = PyUnicode_Replace(tmp, gt2, gt1, -1);
+    Py_DECREF(tmp);
+    if (tmp2 == NULL) return NULL;
+    tmp = PyUnicode_Replace(tmp2, lt2, lt1, -1);
+    Py_DECREF(tmp2);
+    if (tmp == NULL) return NULL;
+    tmp2 = PyUnicode_Replace(tmp, amp2, amp1, -1);
+    Py_DECREF(tmp);
+    return tmp2;
+}
+
+PyDoc_STRVAR(stripentities__doc__,
+"Return a copy of the text with any character or numeric entities\n\
+replaced by the equivalent UTF-8 characters.\n\
+\n\
+If the `keepxmlentities` parameter is provided and evaluates to `True`,\n\
+the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not\n\
+stripped.");
+
+static PyObject *
+Markup_stripentities(PyObject* self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"keepxmlentities", 0};
+    PyObject *result, *args2;
+    char keepxml = 0;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|b", kwlist, &keepxml)) {
+        return NULL;
+    }
+
+    if (stripentities == NULL) return NULL;
+    result = PyObject_CallFunction(stripentities, "Ob", self, keepxml);
+    if (result == NULL) return NULL;
+    args2 = PyTuple_New(1);
+    if (args2 == NULL) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args2, 0, result);
+    result = MarkupType.tp_new(&MarkupType, args2, NULL);
+    Py_DECREF(args2);
+    return result;
+}
+
+PyDoc_STRVAR(striptags__doc__,
+"Return a copy of the text with all XML/HTML tags removed.");
+
+static PyObject *
+Markup_striptags(PyObject* self)
+{
+    PyObject *result, *args;
+
+    if (striptags == NULL) return NULL;
+    result = PyObject_CallFunction(striptags, "O", self);
+    if (result == NULL) return NULL;
+    args = PyTuple_New(1);
+    if (args == NULL) {
+        Py_DECREF(result);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(args, 0, result);
+    result = MarkupType.tp_new(&MarkupType, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+
+typedef struct {
+    PyUnicodeObject HEAD;
+} MarkupObject;
+
+static PyMethodDef Markup_methods[] = {
+    {"escape", (PyCFunction) Markup_escape,
+     METH_VARARGS|METH_CLASS|METH_KEYWORDS,  escape__doc__},
+    {"join", (PyCFunction)Markup_join, METH_VARARGS|METH_KEYWORDS},
+    {"unescape", (PyCFunction)Markup_unescape, METH_NOARGS, unescape__doc__},
+    {"stripentities", (PyCFunction) Markup_stripentities,
+     METH_VARARGS|METH_KEYWORDS, stripentities__doc__},
+    {"striptags", (PyCFunction) Markup_striptags, METH_NOARGS,
+     striptags__doc__},
+    {NULL}  /* Sentinel */
+};
+
+static PyNumberMethods Markup_as_number = {
+        Markup_add, /*nb_add*/
+        0, /*nb_subtract*/
+        Markup_mul, /*nb_multiply*/
+        0, /*nb_divide*/
+        Markup_mod, /*nb_remainder*/
+};
+
+PyTypeObject MarkupType = {
+    PyObject_HEAD_INIT(NULL)
+    0,
+    "genshi._speedups.Markup",
+    sizeof(MarkupObject),
+    0,
+    0,          /*tp_dealloc*/
+    0,          /*tp_print*/ 
+    0,          /*tp_getattr*/
+    0,          /*tp_setattr*/
+    0,          /*tp_compare*/
+    Markup_repr, /*tp_repr*/
+    &Markup_as_number, /*tp_as_number*/
+    0,          /*tp_as_sequence*/
+    0,          /*tp_as_mapping*/
+    0,          /*tp_hash */
+
+    0,          /*tp_call*/
+    0,          /*tp_str*/
+    0,          /*tp_getattro*/
+    0,          /*tp_setattro*/
+    0,          /*tp_as_buffer*/
+
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
+    Markup__doc__,/*tp_doc*/
+    
+    0,          /*tp_traverse*/
+    0,          /*tp_clear*/
+
+    0,          /*tp_richcompare*/
+    0,          /*tp_weaklistoffset*/
+
+    0,          /*tp_iter*/
+    0,          /*tp_iternext*/
+
+    /* Attribute descriptor and subclassing stuff */
+
+    Markup_methods,/*tp_methods*/
+    0,          /*tp_members*/
+    0,          /*tp_getset*/
+    0,          /*tp_base*/
+    0,          /*tp_dict*/
+    
+    0,          /*tp_descr_get*/
+    0,          /*tp_descr_set*/
+    0,          /*tp_dictoffset*/
+    
+    0,          /*tp_init*/
+    0,          /*tp_alloc  will be set to PyType_GenericAlloc in module init*/
+    Markup_new, /*tp_new*/
+    0,          /*tp_free  Low-level free-memory routine */
+    0,          /*tp_is_gc For PyObject_IS_GC */
+    0,          /*tp_bases*/
+    0,          /*tp_mro method resolution order */
+    0,          /*tp_cache*/
+    0,          /*tp_subclasses*/
+    0           /*tp_weaklist*/
+};
+
+PyMODINIT_FUNC
+init_speedups(void)
+{
+    PyObject *module;
+
+    /* Workaround for quirk in Visual Studio, see
+        <http://www.python.it/faq/faq-3.html#3.24> */
+    MarkupType.tp_base = &PyUnicode_Type;
+
+    if (PyType_Ready(&MarkupType) < 0)
+        return;
+
+    init_constants();
+
+    module = Py_InitModule("_speedups", NULL);
+    Py_INCREF(&MarkupType);
+    PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType);
+}
--- a/genshi/core.py
+++ b/genshi/core.py
@@ -477,6 +477,11 @@
         return Markup(striptags(self))
 
 
+try:
+    from genshi._speedups import Markup
+except ImportError:
+    pass # just use the Python implementation
+
 escape = Markup.escape
 
 def unescape(text):
--- a/setup.py
+++ b/setup.py
@@ -13,13 +13,16 @@
 # history and logs, available at http://genshi.edgewall.org/log/.
 
 from distutils.cmd import Command
+from distutils.command.build_ext import build_ext
+from distutils.errors import CCompilerError
 import doctest
 from glob import glob
 import os
 try:
-    from setuptools import setup
+    from setuptools import setup, Extension, Feature
 except ImportError:
-    from distutils.core import setup
+    from distutils.core import setup, Extension
+    Feature = None
 import sys
 
 
@@ -107,6 +110,30 @@
             doctest.testfile(filename, False, optionflags=doctest.ELLIPSIS)
 
 
+class optional_build_ext(build_ext):
+    # This class allows C extension building to fail.
+    def build_extension(self, ext):
+        try:
+            build_ext.build_extension(self, ext)
+        except CCompilerError, x:
+            print '*' * 70
+            print """WARNING:
+An optional C extension could not be compiled, speedups will not be
+available."""
+            print '*' * 70
+
+
+if Feature:
+    speedups = Feature(
+        "optionial C speed-enhancements",
+        standard = True,
+        ext_modules = [
+            Extension('genshi._speedups', ['genshi/_speedups.c']),
+        ],
+    )
+else:
+    speedups = None
+
 setup(
     name = 'Genshi',
     version = '0.5',
@@ -153,5 +180,7 @@
     genshi-text = genshi.template.plugin:TextTemplateEnginePlugin[plugin]
     """,
 
-    cmdclass = {'build_doc': build_doc, 'test_doc': test_doc}
+    features = {'speedups': speedups},
+    cmdclass = {'build_doc': build_doc, 'test_doc': test_doc,
+                'build_ext': optional_build_ext}
 )
Copyright (C) 2012-2017 Edgewall Software