comparison genshi/_speedups.c @ 820:9755836bb396 experimental-inline

Sync (old) experimental inline branch with trunk@1027.
author cmlenz
date Wed, 11 Mar 2009 17:51:06 +0000
parents
children
comparison
equal deleted inserted replaced
500:3eb30e4ece8c 820:9755836bb396
1 /*
2 * Copyright (C) 2006-2008 Edgewall Software
3 * All rights reserved.
4 *
5 * This software is licensed as described in the file COPYING, which
6 * you should have received as part of this distribution. The terms
7 * are also available at http://genshi.edgewall.org/wiki/License.
8 *
9 * This software consists of voluntary contributions made by many
10 * individuals. For the exact contribution history, see the revision
11 * history and logs, available at http://genshi.edgewall.org/log/.
12 */
13
14 #include <Python.h>
15 #include <structmember.h>
16
17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
18 typedef int Py_ssize_t;
19 #define PY_SSIZE_T_MAX INT_MAX
20 #define PY_SSIZE_T_MIN INT_MIN
21 #endif
22
23 static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2;
24 static PyObject *stripentities, *striptags;
25
26 static void
27 init_constants(void)
28 {
29 PyObject *util = PyImport_ImportModule("genshi.util");
30 stripentities = PyObject_GetAttrString(util, "stripentities");
31 striptags = PyObject_GetAttrString(util, "striptags");
32 Py_DECREF(util);
33
34 amp1 = PyUnicode_DecodeASCII("&", 1, NULL);
35 amp2 = PyUnicode_DecodeASCII("&amp;", 5, NULL);
36 lt1 = PyUnicode_DecodeASCII("<", 1, NULL);
37 lt2 = PyUnicode_DecodeASCII("&lt;", 4, NULL);
38 gt1 = PyUnicode_DecodeASCII(">", 1, NULL);
39 gt2 = PyUnicode_DecodeASCII("&gt;", 4, NULL);
40 qt1 = PyUnicode_DecodeASCII("\"", 1, NULL);
41 qt2 = PyUnicode_DecodeASCII("&#34;", 5, NULL);
42 }
43
44 /* Markup class */
45
46 PyTypeObject MarkupType; /* declared later */
47
48 PyDoc_STRVAR(Markup__doc__,
49 "Marks a string as being safe for inclusion in HTML/XML output without\n\
50 needing to be escaped.");
51
52 static PyObject *
53 escape(PyObject *text, int quotes)
54 {
55 PyObject *args, *ret;
56 PyUnicodeObject *in, *out;
57 Py_UNICODE *inp, *outp;
58 int len, inn, outn;
59
60 if (PyObject_TypeCheck(text, &MarkupType)) {
61 Py_INCREF(text);
62 return text;
63 }
64 if (PyObject_HasAttrString(text, "__html__")) {
65 ret = PyObject_CallMethod(text, "__html__", NULL);
66 args = PyTuple_New(1);
67 if (args == NULL) {
68 Py_DECREF(ret);
69 return NULL;
70 }
71 PyTuple_SET_ITEM(args, 0, ret);
72 ret = MarkupType.tp_new(&MarkupType, args, NULL);
73 Py_DECREF(args);
74 return ret;
75 }
76 in = (PyUnicodeObject *) PyObject_Unicode(text);
77 if (in == NULL) {
78 return NULL;
79 }
80 /* First we need to figure out how long the escaped string will be */
81 len = inn = 0;
82 inp = in->str;
83 while (*(inp) || in->length > inp - in->str) {
84 switch (*inp++) {
85 case '&': len += 5; inn++; break;
86 case '"': len += quotes ? 5 : 1; inn += quotes ? 1 : 0; break;
87 case '<':
88 case '>': len += 4; inn++; break;
89 default: len++;
90 }
91 }
92
93 /* Do we need to escape anything at all? */
94 if (!inn) {
95 args = PyTuple_New(1);
96 if (args == NULL) {
97 Py_DECREF((PyObject *) in);
98 return NULL;
99 }
100 PyTuple_SET_ITEM(args, 0, (PyObject *) in);
101 ret = MarkupType.tp_new(&MarkupType, args, NULL);
102 Py_DECREF(args);
103 return ret;
104 }
105
106 out = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, len);
107 if (out == NULL) {
108 Py_DECREF((PyObject *) in);
109 return NULL;
110 }
111
112 outn = 0;
113 inp = in->str;
114 outp = out->str;
115 while (*(inp) || in->length > inp - in->str) {
116 if (outn == inn) {
117 /* copy rest of string if we have already replaced everything */
118 Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str));
119 break;
120 }
121 switch (*inp) {
122 case '&':
123 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) amp2)->str, 5);
124 outp += 5;
125 outn++;
126 break;
127 case '"':
128 if (quotes) {
129 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) qt2)->str, 5);
130 outp += 5;
131 outn++;
132 } else {
133 *outp++ = *inp;
134 }
135 break;
136 case '<':
137 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) lt2)->str, 4);
138 outp += 4;
139 outn++;
140 break;
141 case '>':
142 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) gt2)->str, 4);
143 outp += 4;
144 outn++;
145 break;
146 default:
147 *outp++ = *inp;
148 }
149 inp++;
150 }
151
152 Py_DECREF((PyObject *) in);
153
154 args = PyTuple_New(1);
155 if (args == NULL) {
156 Py_DECREF((PyObject *) out);
157 return NULL;
158 }
159 PyTuple_SET_ITEM(args, 0, (PyObject *) out);
160 ret = MarkupType.tp_new(&MarkupType, args, NULL);
161 Py_DECREF(args);
162 return ret;
163 }
164
165 PyDoc_STRVAR(escape__doc__,
166 "Create a Markup instance from a string and escape special characters\n\
167 it may contain (<, >, & and \").\n\
168 \n\
169 >>> escape('\"1 < 2\"')\n\
170 <Markup u'&#34;1 &lt; 2&#34;'>\n\
171 \n\
172 If the `quotes` parameter is set to `False`, the \" character is left\n\
173 as is. Escaping quotes is generally only required for strings that are\n\
174 to be used in attribute values.\n\
175 \n\
176 >>> escape('\"1 < 2\"', quotes=False)\n\
177 <Markup u'\"1 &lt; 2\"'>\n\
178 \n\
179 :param text: the text to escape\n\
180 :param quotes: if ``True``, double quote characters are escaped in\n\
181 addition to the other special characters\n\
182 :return: the escaped `Markup` string\n\
183 :rtype: `Markup`\n\
184 ");
185
186 static PyObject *
187 Markup_escape(PyTypeObject* type, PyObject *args, PyObject *kwds)
188 {
189 static char *kwlist[] = {"text", "quotes", 0};
190 PyObject *text = NULL;
191 char quotes = 1;
192
193 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &text, &quotes)) {
194 return NULL;
195 }
196 if (PyObject_Not(text)) {
197 return type->tp_new(type, args, NULL);
198 }
199 if (PyObject_TypeCheck(text, type)) {
200 Py_INCREF(text);
201 return text;
202 }
203 return escape(text, quotes);
204 }
205
206 static PyObject *
207 Markup_html(PyObject *self)
208 {
209 Py_INCREF(self);
210 return self;
211 }
212
213 PyDoc_STRVAR(join__doc__,
214 "Return a `Markup` object which is the concatenation of the strings\n\
215 in the given sequence, where this `Markup` object is the separator\n\
216 between the joined elements.\n\
217 \n\
218 Any element in the sequence that is not a `Markup` instance is\n\
219 automatically escaped.\n\
220 \n\
221 :param seq: the sequence of strings to join\n\
222 :param escape_quotes: whether double quote characters in the elements\n\
223 should be escaped\n\
224 :return: the joined `Markup` object\n\
225 :rtype: `Markup`\n\
226 :see: `escape`\n\
227 ");
228
229 static PyObject *
230 Markup_join(PyObject *self, PyObject *args, PyObject *kwds)
231 {
232 static char *kwlist[] = {"seq", "escape_quotes", 0};
233 PyObject *seq = NULL, *seq2, *tmp, *tmp2;
234 char quotes = 1;
235 Py_ssize_t n;
236 int i;
237
238 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &seq, &quotes)) {
239 return NULL;
240 }
241 if (!PySequence_Check(seq)) {
242 return NULL;
243 }
244 n = PySequence_Size(seq);
245 if (n < 0) {
246 return NULL;
247 }
248 seq2 = PyTuple_New(n);
249 if (seq2 == NULL) {
250 return NULL;
251 }
252 for (i = 0; i < n; i++) {
253 tmp = PySequence_GetItem(seq, i);
254 if (tmp == NULL) {
255 Py_DECREF(seq2);
256 return NULL;
257 }
258 tmp2 = escape(tmp, quotes);
259 if (tmp2 == NULL) {
260 Py_DECREF(seq2);
261 return NULL;
262 }
263 PyTuple_SET_ITEM(seq2, i, tmp2);
264 Py_DECREF(tmp);
265 }
266 tmp = PyUnicode_Join(self, seq2);
267 Py_DECREF(seq2);
268 if (tmp == NULL)
269 return NULL;
270 args = PyTuple_New(1);
271 if (args == NULL) {
272 Py_DECREF(tmp);
273 return NULL;
274 }
275 PyTuple_SET_ITEM(args, 0, tmp);
276 tmp = MarkupType.tp_new(&MarkupType, args, NULL);
277 Py_DECREF(args);
278 return tmp;
279 }
280
281 static PyObject *
282 Markup_add(PyObject *self, PyObject *other)
283 {
284 PyObject *tmp, *tmp2, *args, *ret;
285 if (PyObject_TypeCheck(self, &MarkupType)) {
286 tmp = escape(other, 1);
287 if (tmp == NULL)
288 return NULL;
289 tmp2 = PyUnicode_Concat(self, tmp);
290 } else { // __radd__
291 tmp = escape(self, 1);
292 if (tmp == NULL)
293 return NULL;
294 tmp2 = PyUnicode_Concat(tmp, other);
295 }
296 Py_DECREF(tmp);
297 if (tmp2 == NULL)
298 return NULL;
299 args = PyTuple_New(1);
300 if (args == NULL) {
301 Py_DECREF(tmp2);
302 return NULL;
303 }
304 PyTuple_SET_ITEM(args, 0, tmp2);
305 ret = MarkupType.tp_new(&MarkupType, args, NULL);
306 Py_DECREF(args);
307 return ret;
308 }
309
310 static PyObject *
311 Markup_mod(PyObject *self, PyObject *args)
312 {
313 PyObject *tmp, *tmp2, *ret, *args2;
314 int i;
315 Py_ssize_t nargs = 0;
316 PyObject *kwds = NULL;
317
318 if (PyDict_Check(args)) {
319 kwds = args;
320 }
321 if (kwds && PyDict_Size(kwds)) {
322 PyObject *kwcopy, *key, *value;
323 Py_ssize_t pos = 0;
324
325 kwcopy = PyDict_Copy( kwds );
326 if (kwcopy == NULL) {
327 return NULL;
328 }
329 while (PyDict_Next(kwcopy, &pos, &key, &value)) {
330 tmp = escape(value, 1);
331 if (tmp == NULL) {
332 Py_DECREF(kwcopy);
333 return NULL;
334 }
335 if (PyDict_SetItem(kwcopy, key, tmp) < 0) {
336 Py_DECREF(tmp);
337 Py_DECREF(kwcopy);
338 return NULL;
339 }
340 }
341 tmp = PyUnicode_Format(self, kwcopy);
342 Py_DECREF(kwcopy);
343 if (tmp == NULL) {
344 return NULL;
345 }
346 } else if (PyTuple_Check(args)) {
347 nargs = PyTuple_GET_SIZE(args);
348 args2 = PyTuple_New(nargs);
349 if (args2 == NULL) {
350 return NULL;
351 }
352 for (i = 0; i < nargs; i++) {
353 tmp = escape(PyTuple_GET_ITEM(args, i), 1);
354 if (tmp == NULL) {
355 Py_DECREF(args2);
356 return NULL;
357 }
358 PyTuple_SET_ITEM(args2, i, tmp);
359 }
360 tmp = PyUnicode_Format(self, args2);
361 Py_DECREF(args2);
362 if (tmp == NULL) {
363 return NULL;
364 }
365 } else {
366 tmp2 = escape(args, 1);
367 if (tmp2 == NULL) {
368 return NULL;
369 }
370 tmp = PyUnicode_Format(self, tmp2);
371 Py_DECREF(tmp2);
372 if (tmp == NULL) {
373 return NULL;
374 }
375 }
376 args = PyTuple_New(1);
377 if (args == NULL) {
378 Py_DECREF(tmp);
379 return NULL;
380 }
381 PyTuple_SET_ITEM(args, 0, tmp);
382 ret = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
383 Py_DECREF(args);
384 return ret;
385 }
386
387 static PyObject *
388 Markup_mul(PyObject *self, PyObject *num)
389 {
390 PyObject *unicode, *result, *args;
391
392 if (PyObject_TypeCheck(self, &MarkupType)) {
393 unicode = PyObject_Unicode(self);
394 if (unicode == NULL) return NULL;
395 result = PyNumber_Multiply(unicode, num);
396 } else { // __rmul__
397 unicode = PyObject_Unicode(num);
398 if (unicode == NULL) return NULL;
399 result = PyNumber_Multiply(unicode, self);
400 }
401 Py_DECREF(unicode);
402
403 if (result == NULL) return NULL;
404 args = PyTuple_New(1);
405 if (args == NULL) {
406 Py_DECREF(result);
407 return NULL;
408 }
409 PyTuple_SET_ITEM(args, 0, result);
410 result = PyUnicode_Type.tp_new(&MarkupType, args, NULL);
411 Py_DECREF(args);
412
413 return result;
414 }
415
416 static PyObject *
417 Markup_repr(PyObject *self)
418 {
419 PyObject *format, *result, *args;
420
421 format = PyString_FromString("<Markup %r>");
422 if (format == NULL) return NULL;
423 result = PyObject_Unicode(self);
424 if (result == NULL) {
425 Py_DECREF(format);
426 return NULL;
427 }
428 args = PyTuple_New(1);
429 if (args == NULL) {
430 Py_DECREF(format);
431 Py_DECREF(result);
432 return NULL;
433 }
434 PyTuple_SET_ITEM(args, 0, result);
435 result = PyString_Format(format, args);
436 Py_DECREF(format);
437 Py_DECREF(args);
438 return result;
439 }
440
441 PyDoc_STRVAR(unescape__doc__,
442 "Reverse-escapes &, <, >, and \" and returns a `unicode` object.\n\
443 \n\
444 >>> Markup('1 &lt; 2').unescape()\n\
445 u'1 < 2'\n\
446 \n\
447 :return: the unescaped string\n\
448 :rtype: `unicode`\n\
449 :see: `genshi.core.unescape`\n\
450 ");
451
452 static PyObject *
453 Markup_unescape(PyObject* self)
454 {
455 PyObject *tmp, *tmp2;
456
457 tmp = PyUnicode_Replace(self, qt2, qt1, -1);
458 if (tmp == NULL) return NULL;
459 tmp2 = PyUnicode_Replace(tmp, gt2, gt1, -1);
460 Py_DECREF(tmp);
461 if (tmp2 == NULL) return NULL;
462 tmp = PyUnicode_Replace(tmp2, lt2, lt1, -1);
463 Py_DECREF(tmp2);
464 if (tmp == NULL) return NULL;
465 tmp2 = PyUnicode_Replace(tmp, amp2, amp1, -1);
466 Py_DECREF(tmp);
467 return tmp2;
468 }
469
470 PyDoc_STRVAR(stripentities__doc__,
471 "Return a copy of the text with any character or numeric entities\n\
472 replaced by the equivalent UTF-8 characters.\n\
473 \n\
474 If the `keepxmlentities` parameter is provided and evaluates to `True`,\n\
475 the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and\n\
476 ``&quot;``) are not stripped.\n\
477 \n\
478 :return: a `Markup` instance with entities removed\n\
479 :rtype: `Markup`\n\
480 :see: `genshi.util.stripentities`\n\
481 ");
482
483 static PyObject *
484 Markup_stripentities(PyObject* self, PyObject *args, PyObject *kwds)
485 {
486 static char *kwlist[] = {"keepxmlentities", 0};
487 PyObject *result, *args2;
488 char keepxml = 0;
489
490 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|b", kwlist, &keepxml)) {
491 return NULL;
492 }
493
494 if (stripentities == NULL) return NULL;
495 result = PyObject_CallFunction(stripentities, "Ob", self, keepxml);
496 if (result == NULL) return NULL;
497 args2 = PyTuple_New(1);
498 if (args2 == NULL) {
499 Py_DECREF(result);
500 return NULL;
501 }
502 PyTuple_SET_ITEM(args2, 0, result);
503 result = MarkupType.tp_new(&MarkupType, args2, NULL);
504 Py_DECREF(args2);
505 return result;
506 }
507
508 PyDoc_STRVAR(striptags__doc__,
509 """Return a copy of the text with all XML/HTML tags removed.\n\
510 \n\
511 :return: a `Markup` instance with all tags removed\n\
512 :rtype: `Markup`\n\
513 :see: `genshi.util.striptags`\n\
514 ");
515
516 static PyObject *
517 Markup_striptags(PyObject* self)
518 {
519 PyObject *result, *args;
520
521 if (striptags == NULL) return NULL;
522 result = PyObject_CallFunction(striptags, "O", self);
523 if (result == NULL) return NULL;
524 args = PyTuple_New(1);
525 if (args == NULL) {
526 Py_DECREF(result);
527 return NULL;
528 }
529 PyTuple_SET_ITEM(args, 0, result);
530 result = MarkupType.tp_new(&MarkupType, args, NULL);
531 Py_DECREF(args);
532 return result;
533 }
534
535 typedef struct {
536 PyUnicodeObject HEAD;
537 } MarkupObject;
538
539 static PyMethodDef Markup_methods[] = {
540 {"__html__", (PyCFunction) Markup_html, METH_NOARGS, NULL},
541 {"escape", (PyCFunction) Markup_escape,
542 METH_VARARGS|METH_CLASS|METH_KEYWORDS, escape__doc__},
543 {"join", (PyCFunction)Markup_join, METH_VARARGS|METH_KEYWORDS, join__doc__},
544 {"unescape", (PyCFunction)Markup_unescape, METH_NOARGS, unescape__doc__},
545 {"stripentities", (PyCFunction) Markup_stripentities,
546 METH_VARARGS|METH_KEYWORDS, stripentities__doc__},
547 {"striptags", (PyCFunction) Markup_striptags, METH_NOARGS,
548 striptags__doc__},
549 {NULL} /* Sentinel */
550 };
551
552 static PyNumberMethods Markup_as_number = {
553 Markup_add, /*nb_add*/
554 0, /*nb_subtract*/
555 Markup_mul, /*nb_multiply*/
556 0, /*nb_divide*/
557 Markup_mod, /*nb_remainder*/
558 };
559
560 PyTypeObject MarkupType = {
561 PyObject_HEAD_INIT(NULL)
562 0,
563 "genshi._speedups.Markup",
564 sizeof(MarkupObject),
565 0,
566 0, /*tp_dealloc*/
567 0, /*tp_print*/
568 0, /*tp_getattr*/
569 0, /*tp_setattr*/
570 0, /*tp_compare*/
571 Markup_repr, /*tp_repr*/
572 &Markup_as_number, /*tp_as_number*/
573 0, /*tp_as_sequence*/
574 0, /*tp_as_mapping*/
575 0, /*tp_hash */
576
577 0, /*tp_call*/
578 0, /*tp_str*/
579 0, /*tp_getattro*/
580 0, /*tp_setattro*/
581 0, /*tp_as_buffer*/
582
583 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
584 Markup__doc__,/*tp_doc*/
585
586 0, /*tp_traverse*/
587 0, /*tp_clear*/
588
589 0, /*tp_richcompare*/
590 0, /*tp_weaklistoffset*/
591
592 0, /*tp_iter*/
593 0, /*tp_iternext*/
594
595 /* Attribute descriptor and subclassing stuff */
596
597 Markup_methods,/*tp_methods*/
598 0, /*tp_members*/
599 0, /*tp_getset*/
600 0, /*tp_base*/
601 0, /*tp_dict*/
602
603 0, /*tp_descr_get*/
604 0, /*tp_descr_set*/
605 0, /*tp_dictoffset*/
606
607 0, /*tp_init*/
608 0, /*tp_alloc will be set to PyType_GenericAlloc in module init*/
609 0, /*tp_new*/
610 0, /*tp_free Low-level free-memory routine */
611 0, /*tp_is_gc For PyObject_IS_GC */
612 0, /*tp_bases*/
613 0, /*tp_mro method resolution order */
614 0, /*tp_cache*/
615 0, /*tp_subclasses*/
616 0 /*tp_weaklist*/
617 };
618
619 PyMODINIT_FUNC
620 init_speedups(void)
621 {
622 PyObject *module;
623
624 /* Workaround for quirk in Visual Studio, see
625 <http://www.python.it/faq/faq-3.html#3.24> */
626 MarkupType.tp_base = &PyUnicode_Type;
627
628 if (PyType_Ready(&MarkupType) < 0)
629 return;
630
631 init_constants();
632
633 module = Py_InitModule("_speedups", NULL);
634 Py_INCREF(&MarkupType);
635 PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType);
636 }
Copyright (C) 2012-2017 Edgewall Software