comparison genshi/_speedups.c @ 541:773d8c470e82 trunk

Merged cspeedups branch into trunk.
author cmlenz
date Thu, 28 Jun 2007 17:43:31 +0000
parents
children 93b2a5792cfc
comparison
equal deleted inserted replaced
540:6b413fbf359a 541:773d8c470e82
1 /*
2 * Copyright (C) 2006 Edgewall Software
3 * All rights reserved.
4 *
5 * This software is licensed as described in the file COPYING, which
6 * you should have received as part of this distribution. The terms
7 * are also available at http://genshi.edgewall.org/wiki/License.
8 *
9 * This software consists of voluntary contributions made by many
10 * individuals. For the exact contribution history, see the revision
11 * history and logs, available at http://genshi.edgewall.org/log/.
12 */
13
14 #include <Python.h>
15 #include <structmember.h>
16
17 static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2;
18 static PyObject *stripentities, *striptags;
19
20 static void
21 init_constants(void)
22 {
23 PyObject *util = PyImport_ImportModule("genshi.util");
24 stripentities = PyObject_GetAttrString(util, "stripentities");
25 striptags = PyObject_GetAttrString(util, "striptags");
26 Py_DECREF(util);
27
28 amp1 = PyUnicode_DecodeASCII("&", 1, NULL);
29 amp2 = PyUnicode_DecodeASCII("&amp;", 5, NULL);
30 lt1 = PyUnicode_DecodeASCII("<", 1, NULL);
31 lt2 = PyUnicode_DecodeASCII("&lt;", 4, NULL);
32 gt1 = PyUnicode_DecodeASCII(">", 1, NULL);
33 gt2 = PyUnicode_DecodeASCII("&gt;", 4, NULL);
34 qt1 = PyUnicode_DecodeASCII("\"", 1, NULL);
35 qt2 = PyUnicode_DecodeASCII("&#34;", 5, NULL);
36 }
37
38 /* Markup class */
39
40 PyAPI_DATA(PyTypeObject) MarkupType;
41
42 PyDoc_STRVAR(Markup__doc__,
43 "Marks a string as being safe for inclusion in HTML/XML output without\n\
44 needing to be escaped.");
45
46 static PyObject *
47 escape(PyObject *text, int quotes)
48 {
49 PyObject *args, *ret;
50 PyUnicodeObject *in, *out;
51 Py_UNICODE *inp, *outp;
52 int len, inn, outn;
53
54 if (PyObject_TypeCheck(text, &MarkupType)) {
55 Py_INCREF(text);
56 return text;
57 }
58 in = (PyUnicodeObject *) PyObject_Unicode(text);
59 if (in == NULL) {
60 return NULL;
61 }
62 /* First we need to figure out how long the escaped string will be */
63 len = inn = 0;
64 inp = in->str;
65 while (*(inp) || in->length > inp - in->str) {
66 switch (*inp++) {
67 case '&': len += 5; inn++; break;
68 case '"': len += quotes ? 5 : 1; inn += quotes ? 1 : 0; break;
69 case '<':
70 case '>': len += 4; inn++; break;
71 default: len++;
72 }
73 }
74
75 /* Do we need to escape anything at all? */
76 if (!inn) {
77 args = PyTuple_New(1);
78 if (args == NULL) {
79 Py_DECREF((PyObject *) in);
80 return NULL;
81 }
82 PyTuple_SET_ITEM(args, 0, (PyObject *) in);
83 ret = MarkupType.tp_new(&MarkupType, args, NULL);
84 Py_DECREF(args);
85 return ret;
86 }
87
88 out = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, len);
89 if (out == NULL) {
90 return NULL;
91 }
92
93 outn = 0;
94 inp = in->str;
95 outp = out->str;
96 while (*(inp) || in->length > inp - in->str) {
97 if (outn == inn) {
98 /* copy rest of string if we have already replaced everything */
99 Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str));
100 break;
101 }
102 switch (*inp) {
103 case '&':
104 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) amp2)->str, 5);
105 outp += 5;
106 outn++;
107 break;
108 case '"':
109 if (quotes) {
110 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) qt2)->str, 5);
111 outp += 5;
112 outn++;
113 } else {
114 *outp++ = *inp;
115 }
116 break;
117 case '<':
118 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) lt2)->str, 4);
119 outp += 4;
120 outn++;
121 break;
122 case '>':
123 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) gt2)->str, 4);
124 outp += 4;
125 outn++;
126 break;
127 default:
128 *outp++ = *inp;
129 }
130 inp++;
131 }
132
133 args = PyTuple_New(1);
134 if (args == NULL) {
135 Py_DECREF((PyObject *) out);
136 return NULL;
137 }
138 PyTuple_SET_ITEM(args, 0, (PyObject *) out);
139 ret = MarkupType.tp_new(&MarkupType, args, NULL);
140 Py_DECREF(args);
141 return ret;
142 }
143
144 static PyObject *
145 Markup_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
146 {
147 PyObject *self, *text, *tmp, *args2;
148 int nargs, i;
149
150 nargs = PyTuple_GET_SIZE(args);
151 if (nargs < 2) {
152 return PyUnicode_Type.tp_new(type, args, NULL);
153 }
154
155 text = PyTuple_GET_ITEM(args, 0);
156 args2 = PyTuple_New(nargs - 1);
157 if (args2 == NULL) {
158 return NULL;
159 }
160 for (i = 1; i < nargs; i++) {
161 tmp = escape(PyTuple_GET_ITEM(args, i), 1);
162 if (tmp == NULL) {
163 Py_DECREF(args2);
164 return NULL;
165 }
166 PyTuple_SET_ITEM(args2, i - 1, tmp);
167 }
168 tmp = PyUnicode_Format(text, args2);
169 Py_DECREF(args2);
170 if (tmp == NULL) {
171 return NULL;
172 }
173 args = PyTuple_New(1);
174 if (args == NULL) {
175 Py_DECREF(tmp);
176 return NULL;
177 }
178 PyTuple_SET_ITEM(args, 0, tmp);
179 self = PyUnicode_Type.tp_new(type, args, NULL);
180 Py_DECREF(args);
181 return self;
182 }
183
184 PyDoc_STRVAR(escape__doc__,
185 "Create a Markup instance from a string and escape special characters\n\
186 it may contain (<, >, & and \").\n\
187 \n\
188 If the `quotes` parameter is set to `False`, the \" character is left\n\
189 as is. Escaping quotes is generally only required for strings that are\n\
190 to be used in attribute values.");
191
192 static PyObject *
193 Markup_escape(PyTypeObject* type, PyObject *args, PyObject *kwds)
194 {
195 static char *kwlist[] = {"text", "quotes", 0};
196 PyObject *text = NULL;
197 char quotes = 1;
198
199 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &text, &quotes)) {
200 return NULL;
201 }
202 if (PyObject_Not(text)) {
203 return type->tp_new(type, args, NULL);
204 }
205 if (PyObject_TypeCheck(text, type)) {
206 Py_INCREF(text);
207 return text;
208 }
209 return escape(text, quotes);
210 }
211
212 static PyObject *
213 Markup_join(PyObject *self, PyObject *args, PyObject *kwds)
214 {
215 static char *kwlist[] = {"seq", "escape_quotes", 0};
216 PyObject *seq = NULL, *seq2, *tmp;
217 char quotes = 1;
218 int n, i;
219
220 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &seq, &quotes)) {
221 return NULL;
222 }
223 if (!PySequence_Check(seq)) {
224 return NULL;
225 }
226 n = PySequence_Size(seq);
227 if (n < 0) {
228 return NULL;
229 }
230 seq2 = PyTuple_New(n);
231 if (seq2 == NULL) {
232 return NULL;
233 }
234 for (i = 0; i < n; i++) {
235 tmp = PySequence_GetItem(seq, i);
236 if (tmp == NULL) {
237 Py_DECREF(seq2);
238 return NULL;
239 }
240 tmp = escape(tmp, quotes);
241 if (tmp == NULL) {
242 Py_DECREF(seq2);
243 return NULL;
244 }
245 PyTuple_SET_ITEM(seq2, i, tmp);
246 }
247 tmp = PyUnicode_Join(self, seq2);
248 Py_DECREF(seq2);
249 if (tmp == NULL)
250 return NULL;
251 args = PyTuple_New(1);
252 if (args == NULL) {
253 Py_DECREF(tmp);
254 return NULL;
255 }
256 PyTuple_SET_ITEM(args, 0, tmp);
257 tmp = MarkupType.tp_new(&MarkupType, args, NULL);
258 Py_DECREF(args);
259 return tmp;
260 }
261
262 static PyObject *
263 Markup_add(PyObject *self, PyObject *other)
264 {
265 PyObject *tmp, *tmp2, *args, *ret;
266 if (PyObject_TypeCheck(self, &MarkupType)) {
267 tmp = escape(other, 1);
268 if (tmp == NULL)
269 return NULL;
270 tmp2 = PyUnicode_Concat(self, tmp);
271 } else { // __radd__
272 tmp = escape(self, 1);
273 if (tmp == NULL)
274 return NULL;
275 tmp2 = PyUnicode_Concat(tmp, other);
276 }
277 if (tmp2 == NULL) {
278 Py_DECREF(tmp);
279 return NULL;
280 }
281 Py_DECREF(tmp);
282 args = PyTuple_New(1);
283 if (args == NULL) {
284 Py_DECREF(tmp2);
285 return NULL;
286 }
287 PyTuple_SET_ITEM(args, 0, tmp2);
288 ret = MarkupType.tp_new(&MarkupType, args, NULL);
289 Py_DECREF(args);
290 return ret;
291 }
292
293 static PyObject *
294 Markup_mod(PyObject *self, PyObject *args)
295 {
296 PyObject *tmp, *tmp2, *ret, *args2;
297 int i, nargs;
298
299 if (PyTuple_Check(args)) {
300 nargs = PyTuple_GET_SIZE(args);
301 args2 = PyTuple_New(nargs);
302 if (args2 == NULL) {
303 return NULL;
304 }
305 for (i = 0; i < nargs; i++) {
306 tmp = escape(PyTuple_GET_ITEM(args, i), 1);
307 if (tmp == NULL) {
308 Py_DECREF(args2);
309 return NULL;
310 }
311 PyTuple_SET_ITEM(args2, i, tmp);
312 }
313 tmp = PyUnicode_Format(self, args2);
314 Py_DECREF(args2);
315 if (tmp == NULL) {
316 return NULL;
317 }
318 } else {
319 tmp2 = escape(args, 1);
320 if (tmp2 == NULL) {
321 return NULL;
322 }
323 tmp = PyUnicode_Format(self, tmp2);
324 Py_DECREF(tmp2);
325 if (tmp == NULL) {
326 return NULL;
327 }
328 }
329 args = PyTuple_New(1);
330 if (args == NULL) {
331 Py_DECREF(tmp);
332 return NULL;
333 }
334 PyTuple_SET_ITEM(args, 0, tmp);
335 ret = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
336 Py_DECREF(args);
337 return ret;
338 }
339
340 static PyObject *
341 Markup_mul(PyObject *self, PyObject *num)
342 {
343 PyObject *unicode, *result, *args;
344
345 if (PyObject_TypeCheck(self, &MarkupType)) {
346 unicode = PyObject_Unicode(self);
347 if (unicode == NULL) return NULL;
348 result = PyNumber_Multiply(unicode, num);
349 } else { // __rmul__
350 unicode = PyObject_Unicode(num);
351 if (unicode == NULL) return NULL;
352 result = PyNumber_Multiply(unicode, self);
353 }
354
355 if (result == NULL) return NULL;
356 args = PyTuple_New(1);
357 if (args == NULL) {
358 Py_DECREF(result);
359 return NULL;
360 }
361 PyTuple_SET_ITEM(args, 0, result);
362 result = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
363 Py_DECREF(args);
364
365 return result;
366 }
367
368 static PyObject *
369 Markup_repr(PyObject *self)
370 {
371 PyObject *format, *result, *args;
372
373 format = PyString_FromString("<Markup %r>");
374 if (format == NULL) return NULL;
375 result = PyObject_Unicode(self);
376 if (result == NULL) return NULL;
377 args = PyTuple_New(1);
378 if (args == NULL) {
379 Py_DECREF(result);
380 return NULL;
381 }
382 PyTuple_SET_ITEM(args, 0, result);
383 result = PyString_Format(format, args);
384 Py_DECREF(args);
385 return result;
386 }
387
388 PyDoc_STRVAR(unescape__doc__,
389 "Reverse-escapes &, <, > and \" and returns a `unicode` object.");
390
391 static PyObject *
392 Markup_unescape(PyObject* self)
393 {
394 PyObject *tmp, *tmp2;
395
396 tmp = PyUnicode_Replace(self, qt2, qt1, -1);
397 if (tmp == NULL) return NULL;
398 tmp2 = PyUnicode_Replace(tmp, gt2, gt1, -1);
399 Py_DECREF(tmp);
400 if (tmp2 == NULL) return NULL;
401 tmp = PyUnicode_Replace(tmp2, lt2, lt1, -1);
402 Py_DECREF(tmp2);
403 if (tmp == NULL) return NULL;
404 tmp2 = PyUnicode_Replace(tmp, amp2, amp1, -1);
405 Py_DECREF(tmp);
406 return tmp2;
407 }
408
409 PyDoc_STRVAR(stripentities__doc__,
410 "Return a copy of the text with any character or numeric entities\n\
411 replaced by the equivalent UTF-8 characters.\n\
412 \n\
413 If the `keepxmlentities` parameter is provided and evaluates to `True`,\n\
414 the core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are not\n\
415 stripped.");
416
417 static PyObject *
418 Markup_stripentities(PyObject* self, PyObject *args, PyObject *kwds)
419 {
420 static char *kwlist[] = {"keepxmlentities", 0};
421 PyObject *result, *args2;
422 char keepxml = 0;
423
424 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|b", kwlist, &keepxml)) {
425 return NULL;
426 }
427
428 if (stripentities == NULL) return NULL;
429 result = PyObject_CallFunction(stripentities, "Ob", self, keepxml);
430 if (result == NULL) return NULL;
431 args2 = PyTuple_New(1);
432 if (args2 == NULL) {
433 Py_DECREF(result);
434 return NULL;
435 }
436 PyTuple_SET_ITEM(args2, 0, result);
437 result = MarkupType.tp_new(&MarkupType, args2, NULL);
438 Py_DECREF(args2);
439 return result;
440 }
441
442 PyDoc_STRVAR(striptags__doc__,
443 "Return a copy of the text with all XML/HTML tags removed.");
444
445 static PyObject *
446 Markup_striptags(PyObject* self)
447 {
448 PyObject *result, *args;
449
450 if (striptags == NULL) return NULL;
451 result = PyObject_CallFunction(striptags, "O", self);
452 if (result == NULL) return NULL;
453 args = PyTuple_New(1);
454 if (args == NULL) {
455 Py_DECREF(result);
456 return NULL;
457 }
458 PyTuple_SET_ITEM(args, 0, result);
459 result = MarkupType.tp_new(&MarkupType, args, NULL);
460 Py_DECREF(args);
461 return result;
462 }
463
464 typedef struct {
465 PyUnicodeObject HEAD;
466 } MarkupObject;
467
468 static PyMethodDef Markup_methods[] = {
469 {"escape", (PyCFunction) Markup_escape,
470 METH_VARARGS|METH_CLASS|METH_KEYWORDS, escape__doc__},
471 {"join", (PyCFunction)Markup_join, METH_VARARGS|METH_KEYWORDS},
472 {"unescape", (PyCFunction)Markup_unescape, METH_NOARGS, unescape__doc__},
473 {"stripentities", (PyCFunction) Markup_stripentities,
474 METH_VARARGS|METH_KEYWORDS, stripentities__doc__},
475 {"striptags", (PyCFunction) Markup_striptags, METH_NOARGS,
476 striptags__doc__},
477 {NULL} /* Sentinel */
478 };
479
480 static PyNumberMethods Markup_as_number = {
481 Markup_add, /*nb_add*/
482 0, /*nb_subtract*/
483 Markup_mul, /*nb_multiply*/
484 0, /*nb_divide*/
485 Markup_mod, /*nb_remainder*/
486 };
487
488 PyTypeObject MarkupType = {
489 PyObject_HEAD_INIT(NULL)
490 0,
491 "genshi._speedups.Markup",
492 sizeof(MarkupObject),
493 0,
494 0, /*tp_dealloc*/
495 0, /*tp_print*/
496 0, /*tp_getattr*/
497 0, /*tp_setattr*/
498 0, /*tp_compare*/
499 Markup_repr, /*tp_repr*/
500 &Markup_as_number, /*tp_as_number*/
501 0, /*tp_as_sequence*/
502 0, /*tp_as_mapping*/
503 0, /*tp_hash */
504
505 0, /*tp_call*/
506 0, /*tp_str*/
507 0, /*tp_getattro*/
508 0, /*tp_setattro*/
509 0, /*tp_as_buffer*/
510
511 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
512 Markup__doc__,/*tp_doc*/
513
514 0, /*tp_traverse*/
515 0, /*tp_clear*/
516
517 0, /*tp_richcompare*/
518 0, /*tp_weaklistoffset*/
519
520 0, /*tp_iter*/
521 0, /*tp_iternext*/
522
523 /* Attribute descriptor and subclassing stuff */
524
525 Markup_methods,/*tp_methods*/
526 0, /*tp_members*/
527 0, /*tp_getset*/
528 0, /*tp_base*/
529 0, /*tp_dict*/
530
531 0, /*tp_descr_get*/
532 0, /*tp_descr_set*/
533 0, /*tp_dictoffset*/
534
535 0, /*tp_init*/
536 0, /*tp_alloc will be set to PyType_GenericAlloc in module init*/
537 Markup_new, /*tp_new*/
538 0, /*tp_free Low-level free-memory routine */
539 0, /*tp_is_gc For PyObject_IS_GC */
540 0, /*tp_bases*/
541 0, /*tp_mro method resolution order */
542 0, /*tp_cache*/
543 0, /*tp_subclasses*/
544 0 /*tp_weaklist*/
545 };
546
547 PyMODINIT_FUNC
548 init_speedups(void)
549 {
550 PyObject *module;
551
552 /* Workaround for quirk in Visual Studio, see
553 <http://www.python.it/faq/faq-3.html#3.24> */
554 MarkupType.tp_base = &PyUnicode_Type;
555
556 if (PyType_Ready(&MarkupType) < 0)
557 return;
558
559 init_constants();
560
561 module = Py_InitModule("_speedups", NULL);
562 Py_INCREF(&MarkupType);
563 PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType);
564 }
Copyright (C) 2012-2017 Edgewall Software