Mercurial > genshi > mirror
comparison genshi/_speedups.c @ 541:773d8c470e82 trunk
Merged cspeedups branch into trunk.
author | cmlenz |
---|---|
date | Thu, 28 Jun 2007 17:43:31 +0000 |
parents | |
children | 93b2a5792cfc |
comparison
equal
deleted
inserted
replaced
540:6b413fbf359a | 541:773d8c470e82 |
---|---|
1 /* | |
2 * Copyright (C) 2006 Edgewall Software | |
3 * All rights reserved. | |
4 * | |
5 * This software is licensed as described in the file COPYING, which | |
6 * you should have received as part of this distribution. The terms | |
7 * are also available at http://genshi.edgewall.org/wiki/License. | |
8 * | |
9 * This software consists of voluntary contributions made by many | |
10 * individuals. For the exact contribution history, see the revision | |
11 * history and logs, available at http://genshi.edgewall.org/log/. | |
12 */ | |
13 | |
14 #include <Python.h> | |
15 #include <structmember.h> | |
16 | |
17 static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2; | |
18 static PyObject *stripentities, *striptags; | |
19 | |
20 static void | |
21 init_constants(void) | |
22 { | |
23 PyObject *util = PyImport_ImportModule("genshi.util"); | |
24 stripentities = PyObject_GetAttrString(util, "stripentities"); | |
25 striptags = PyObject_GetAttrString(util, "striptags"); | |
26 Py_DECREF(util); | |
27 | |
28 amp1 = PyUnicode_DecodeASCII("&", 1, NULL); | |
29 amp2 = PyUnicode_DecodeASCII("&", 5, NULL); | |
30 lt1 = PyUnicode_DecodeASCII("<", 1, NULL); | |
31 lt2 = PyUnicode_DecodeASCII("<", 4, NULL); | |
32 gt1 = PyUnicode_DecodeASCII(">", 1, NULL); | |
33 gt2 = PyUnicode_DecodeASCII(">", 4, NULL); | |
34 qt1 = PyUnicode_DecodeASCII("\"", 1, NULL); | |
35 qt2 = PyUnicode_DecodeASCII(""", 5, NULL); | |
36 } | |
37 | |
38 /* Markup class */ | |
39 | |
40 PyAPI_DATA(PyTypeObject) MarkupType; | |
41 | |
42 PyDoc_STRVAR(Markup__doc__, | |
43 "Marks a string as being safe for inclusion in HTML/XML output without\n\ | |
44 needing to be escaped."); | |
45 | |
46 static PyObject * | |
47 escape(PyObject *text, int quotes) | |
48 { | |
49 PyObject *args, *ret; | |
50 PyUnicodeObject *in, *out; | |
51 Py_UNICODE *inp, *outp; | |
52 int len, inn, outn; | |
53 | |
54 if (PyObject_TypeCheck(text, &MarkupType)) { | |
55 Py_INCREF(text); | |
56 return text; | |
57 } | |
58 in = (PyUnicodeObject *) PyObject_Unicode(text); | |
59 if (in == NULL) { | |
60 return NULL; | |
61 } | |
62 /* First we need to figure out how long the escaped string will be */ | |
63 len = inn = 0; | |
64 inp = in->str; | |
65 while (*(inp) || in->length > inp - in->str) { | |
66 switch (*inp++) { | |
67 case '&': len += 5; inn++; break; | |
68 case '"': len += quotes ? 5 : 1; inn += quotes ? 1 : 0; break; | |
69 case '<': | |
70 case '>': len += 4; inn++; break; | |
71 default: len++; | |
72 } | |
73 } | |
74 | |
75 /* Do we need to escape anything at all? */ | |
76 if (!inn) { | |
77 args = PyTuple_New(1); | |
78 if (args == NULL) { | |
79 Py_DECREF((PyObject *) in); | |
80 return NULL; | |
81 } | |
82 PyTuple_SET_ITEM(args, 0, (PyObject *) in); | |
83 ret = MarkupType.tp_new(&MarkupType, args, NULL); | |
84 Py_DECREF(args); | |
85 return ret; | |
86 } | |
87 | |
88 out = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, len); | |
89 if (out == NULL) { | |
90 return NULL; | |
91 } | |
92 | |
93 outn = 0; | |
94 inp = in->str; | |
95 outp = out->str; | |
96 while (*(inp) || in->length > inp - in->str) { | |
97 if (outn == inn) { | |
98 /* copy rest of string if we have already replaced everything */ | |
99 Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str)); | |
100 break; | |
101 } | |
102 switch (*inp) { | |
103 case '&': | |
104 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) amp2)->str, 5); | |
105 outp += 5; | |
106 outn++; | |
107 break; | |
108 case '"': | |
109 if (quotes) { | |
110 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) qt2)->str, 5); | |
111 outp += 5; | |
112 outn++; | |
113 } else { | |
114 *outp++ = *inp; | |
115 } | |
116 break; | |
117 case '<': | |
118 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) lt2)->str, 4); | |
119 outp += 4; | |
120 outn++; | |
121 break; | |
122 case '>': | |
123 Py_UNICODE_COPY(outp, ((PyUnicodeObject *) gt2)->str, 4); | |
124 outp += 4; | |
125 outn++; | |
126 break; | |
127 default: | |
128 *outp++ = *inp; | |
129 } | |
130 inp++; | |
131 } | |
132 | |
133 args = PyTuple_New(1); | |
134 if (args == NULL) { | |
135 Py_DECREF((PyObject *) out); | |
136 return NULL; | |
137 } | |
138 PyTuple_SET_ITEM(args, 0, (PyObject *) out); | |
139 ret = MarkupType.tp_new(&MarkupType, args, NULL); | |
140 Py_DECREF(args); | |
141 return ret; | |
142 } | |
143 | |
144 static PyObject * | |
145 Markup_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
146 { | |
147 PyObject *self, *text, *tmp, *args2; | |
148 int nargs, i; | |
149 | |
150 nargs = PyTuple_GET_SIZE(args); | |
151 if (nargs < 2) { | |
152 return PyUnicode_Type.tp_new(type, args, NULL); | |
153 } | |
154 | |
155 text = PyTuple_GET_ITEM(args, 0); | |
156 args2 = PyTuple_New(nargs - 1); | |
157 if (args2 == NULL) { | |
158 return NULL; | |
159 } | |
160 for (i = 1; i < nargs; i++) { | |
161 tmp = escape(PyTuple_GET_ITEM(args, i), 1); | |
162 if (tmp == NULL) { | |
163 Py_DECREF(args2); | |
164 return NULL; | |
165 } | |
166 PyTuple_SET_ITEM(args2, i - 1, tmp); | |
167 } | |
168 tmp = PyUnicode_Format(text, args2); | |
169 Py_DECREF(args2); | |
170 if (tmp == NULL) { | |
171 return NULL; | |
172 } | |
173 args = PyTuple_New(1); | |
174 if (args == NULL) { | |
175 Py_DECREF(tmp); | |
176 return NULL; | |
177 } | |
178 PyTuple_SET_ITEM(args, 0, tmp); | |
179 self = PyUnicode_Type.tp_new(type, args, NULL); | |
180 Py_DECREF(args); | |
181 return self; | |
182 } | |
183 | |
184 PyDoc_STRVAR(escape__doc__, | |
185 "Create a Markup instance from a string and escape special characters\n\ | |
186 it may contain (<, >, & and \").\n\ | |
187 \n\ | |
188 If the `quotes` parameter is set to `False`, the \" character is left\n\ | |
189 as is. Escaping quotes is generally only required for strings that are\n\ | |
190 to be used in attribute values."); | |
191 | |
192 static PyObject * | |
193 Markup_escape(PyTypeObject* type, PyObject *args, PyObject *kwds) | |
194 { | |
195 static char *kwlist[] = {"text", "quotes", 0}; | |
196 PyObject *text = NULL; | |
197 char quotes = 1; | |
198 | |
199 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &text, "es)) { | |
200 return NULL; | |
201 } | |
202 if (PyObject_Not(text)) { | |
203 return type->tp_new(type, args, NULL); | |
204 } | |
205 if (PyObject_TypeCheck(text, type)) { | |
206 Py_INCREF(text); | |
207 return text; | |
208 } | |
209 return escape(text, quotes); | |
210 } | |
211 | |
212 static PyObject * | |
213 Markup_join(PyObject *self, PyObject *args, PyObject *kwds) | |
214 { | |
215 static char *kwlist[] = {"seq", "escape_quotes", 0}; | |
216 PyObject *seq = NULL, *seq2, *tmp; | |
217 char quotes = 1; | |
218 int n, i; | |
219 | |
220 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|b", kwlist, &seq, "es)) { | |
221 return NULL; | |
222 } | |
223 if (!PySequence_Check(seq)) { | |
224 return NULL; | |
225 } | |
226 n = PySequence_Size(seq); | |
227 if (n < 0) { | |
228 return NULL; | |
229 } | |
230 seq2 = PyTuple_New(n); | |
231 if (seq2 == NULL) { | |
232 return NULL; | |
233 } | |
234 for (i = 0; i < n; i++) { | |
235 tmp = PySequence_GetItem(seq, i); | |
236 if (tmp == NULL) { | |
237 Py_DECREF(seq2); | |
238 return NULL; | |
239 } | |
240 tmp = escape(tmp, quotes); | |
241 if (tmp == NULL) { | |
242 Py_DECREF(seq2); | |
243 return NULL; | |
244 } | |
245 PyTuple_SET_ITEM(seq2, i, tmp); | |
246 } | |
247 tmp = PyUnicode_Join(self, seq2); | |
248 Py_DECREF(seq2); | |
249 if (tmp == NULL) | |
250 return NULL; | |
251 args = PyTuple_New(1); | |
252 if (args == NULL) { | |
253 Py_DECREF(tmp); | |
254 return NULL; | |
255 } | |
256 PyTuple_SET_ITEM(args, 0, tmp); | |
257 tmp = MarkupType.tp_new(&MarkupType, args, NULL); | |
258 Py_DECREF(args); | |
259 return tmp; | |
260 } | |
261 | |
262 static PyObject * | |
263 Markup_add(PyObject *self, PyObject *other) | |
264 { | |
265 PyObject *tmp, *tmp2, *args, *ret; | |
266 if (PyObject_TypeCheck(self, &MarkupType)) { | |
267 tmp = escape(other, 1); | |
268 if (tmp == NULL) | |
269 return NULL; | |
270 tmp2 = PyUnicode_Concat(self, tmp); | |
271 } else { // __radd__ | |
272 tmp = escape(self, 1); | |
273 if (tmp == NULL) | |
274 return NULL; | |
275 tmp2 = PyUnicode_Concat(tmp, other); | |
276 } | |
277 if (tmp2 == NULL) { | |
278 Py_DECREF(tmp); | |
279 return NULL; | |
280 } | |
281 Py_DECREF(tmp); | |
282 args = PyTuple_New(1); | |
283 if (args == NULL) { | |
284 Py_DECREF(tmp2); | |
285 return NULL; | |
286 } | |
287 PyTuple_SET_ITEM(args, 0, tmp2); | |
288 ret = MarkupType.tp_new(&MarkupType, args, NULL); | |
289 Py_DECREF(args); | |
290 return ret; | |
291 } | |
292 | |
293 static PyObject * | |
294 Markup_mod(PyObject *self, PyObject *args) | |
295 { | |
296 PyObject *tmp, *tmp2, *ret, *args2; | |
297 int i, nargs; | |
298 | |
299 if (PyTuple_Check(args)) { | |
300 nargs = PyTuple_GET_SIZE(args); | |
301 args2 = PyTuple_New(nargs); | |
302 if (args2 == NULL) { | |
303 return NULL; | |
304 } | |
305 for (i = 0; i < nargs; i++) { | |
306 tmp = escape(PyTuple_GET_ITEM(args, i), 1); | |
307 if (tmp == NULL) { | |
308 Py_DECREF(args2); | |
309 return NULL; | |
310 } | |
311 PyTuple_SET_ITEM(args2, i, tmp); | |
312 } | |
313 tmp = PyUnicode_Format(self, args2); | |
314 Py_DECREF(args2); | |
315 if (tmp == NULL) { | |
316 return NULL; | |
317 } | |
318 } else { | |
319 tmp2 = escape(args, 1); | |
320 if (tmp2 == NULL) { | |
321 return NULL; | |
322 } | |
323 tmp = PyUnicode_Format(self, tmp2); | |
324 Py_DECREF(tmp2); | |
325 if (tmp == NULL) { | |
326 return NULL; | |
327 } | |
328 } | |
329 args = PyTuple_New(1); | |
330 if (args == NULL) { | |
331 Py_DECREF(tmp); | |
332 return NULL; | |
333 } | |
334 PyTuple_SET_ITEM(args, 0, tmp); | |
335 ret = PyUnicode_Type.tp_new(&MarkupType, args, NULL); | |
336 Py_DECREF(args); | |
337 return ret; | |
338 } | |
339 | |
340 static PyObject * | |
341 Markup_mul(PyObject *self, PyObject *num) | |
342 { | |
343 PyObject *unicode, *result, *args; | |
344 | |
345 if (PyObject_TypeCheck(self, &MarkupType)) { | |
346 unicode = PyObject_Unicode(self); | |
347 if (unicode == NULL) return NULL; | |
348 result = PyNumber_Multiply(unicode, num); | |
349 } else { // __rmul__ | |
350 unicode = PyObject_Unicode(num); | |
351 if (unicode == NULL) return NULL; | |
352 result = PyNumber_Multiply(unicode, self); | |
353 } | |
354 | |
355 if (result == NULL) return NULL; | |
356 args = PyTuple_New(1); | |
357 if (args == NULL) { | |
358 Py_DECREF(result); | |
359 return NULL; | |
360 } | |
361 PyTuple_SET_ITEM(args, 0, result); | |
362 result = PyUnicode_Type.tp_new(&MarkupType, args, NULL); | |
363 Py_DECREF(args); | |
364 | |
365 return result; | |
366 } | |
367 | |
368 static PyObject * | |
369 Markup_repr(PyObject *self) | |
370 { | |
371 PyObject *format, *result, *args; | |
372 | |
373 format = PyString_FromString("<Markup %r>"); | |
374 if (format == NULL) return NULL; | |
375 result = PyObject_Unicode(self); | |
376 if (result == NULL) return NULL; | |
377 args = PyTuple_New(1); | |
378 if (args == NULL) { | |
379 Py_DECREF(result); | |
380 return NULL; | |
381 } | |
382 PyTuple_SET_ITEM(args, 0, result); | |
383 result = PyString_Format(format, args); | |
384 Py_DECREF(args); | |
385 return result; | |
386 } | |
387 | |
388 PyDoc_STRVAR(unescape__doc__, | |
389 "Reverse-escapes &, <, > and \" and returns a `unicode` object."); | |
390 | |
391 static PyObject * | |
392 Markup_unescape(PyObject* self) | |
393 { | |
394 PyObject *tmp, *tmp2; | |
395 | |
396 tmp = PyUnicode_Replace(self, qt2, qt1, -1); | |
397 if (tmp == NULL) return NULL; | |
398 tmp2 = PyUnicode_Replace(tmp, gt2, gt1, -1); | |
399 Py_DECREF(tmp); | |
400 if (tmp2 == NULL) return NULL; | |
401 tmp = PyUnicode_Replace(tmp2, lt2, lt1, -1); | |
402 Py_DECREF(tmp2); | |
403 if (tmp == NULL) return NULL; | |
404 tmp2 = PyUnicode_Replace(tmp, amp2, amp1, -1); | |
405 Py_DECREF(tmp); | |
406 return tmp2; | |
407 } | |
408 | |
409 PyDoc_STRVAR(stripentities__doc__, | |
410 "Return a copy of the text with any character or numeric entities\n\ | |
411 replaced by the equivalent UTF-8 characters.\n\ | |
412 \n\ | |
413 If the `keepxmlentities` parameter is provided and evaluates to `True`,\n\ | |
414 the core XML entities (&, ', >, < and ") are not\n\ | |
415 stripped."); | |
416 | |
417 static PyObject * | |
418 Markup_stripentities(PyObject* self, PyObject *args, PyObject *kwds) | |
419 { | |
420 static char *kwlist[] = {"keepxmlentities", 0}; | |
421 PyObject *result, *args2; | |
422 char keepxml = 0; | |
423 | |
424 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|b", kwlist, &keepxml)) { | |
425 return NULL; | |
426 } | |
427 | |
428 if (stripentities == NULL) return NULL; | |
429 result = PyObject_CallFunction(stripentities, "Ob", self, keepxml); | |
430 if (result == NULL) return NULL; | |
431 args2 = PyTuple_New(1); | |
432 if (args2 == NULL) { | |
433 Py_DECREF(result); | |
434 return NULL; | |
435 } | |
436 PyTuple_SET_ITEM(args2, 0, result); | |
437 result = MarkupType.tp_new(&MarkupType, args2, NULL); | |
438 Py_DECREF(args2); | |
439 return result; | |
440 } | |
441 | |
442 PyDoc_STRVAR(striptags__doc__, | |
443 "Return a copy of the text with all XML/HTML tags removed."); | |
444 | |
445 static PyObject * | |
446 Markup_striptags(PyObject* self) | |
447 { | |
448 PyObject *result, *args; | |
449 | |
450 if (striptags == NULL) return NULL; | |
451 result = PyObject_CallFunction(striptags, "O", self); | |
452 if (result == NULL) return NULL; | |
453 args = PyTuple_New(1); | |
454 if (args == NULL) { | |
455 Py_DECREF(result); | |
456 return NULL; | |
457 } | |
458 PyTuple_SET_ITEM(args, 0, result); | |
459 result = MarkupType.tp_new(&MarkupType, args, NULL); | |
460 Py_DECREF(args); | |
461 return result; | |
462 } | |
463 | |
464 typedef struct { | |
465 PyUnicodeObject HEAD; | |
466 } MarkupObject; | |
467 | |
468 static PyMethodDef Markup_methods[] = { | |
469 {"escape", (PyCFunction) Markup_escape, | |
470 METH_VARARGS|METH_CLASS|METH_KEYWORDS, escape__doc__}, | |
471 {"join", (PyCFunction)Markup_join, METH_VARARGS|METH_KEYWORDS}, | |
472 {"unescape", (PyCFunction)Markup_unescape, METH_NOARGS, unescape__doc__}, | |
473 {"stripentities", (PyCFunction) Markup_stripentities, | |
474 METH_VARARGS|METH_KEYWORDS, stripentities__doc__}, | |
475 {"striptags", (PyCFunction) Markup_striptags, METH_NOARGS, | |
476 striptags__doc__}, | |
477 {NULL} /* Sentinel */ | |
478 }; | |
479 | |
480 static PyNumberMethods Markup_as_number = { | |
481 Markup_add, /*nb_add*/ | |
482 0, /*nb_subtract*/ | |
483 Markup_mul, /*nb_multiply*/ | |
484 0, /*nb_divide*/ | |
485 Markup_mod, /*nb_remainder*/ | |
486 }; | |
487 | |
488 PyTypeObject MarkupType = { | |
489 PyObject_HEAD_INIT(NULL) | |
490 0, | |
491 "genshi._speedups.Markup", | |
492 sizeof(MarkupObject), | |
493 0, | |
494 0, /*tp_dealloc*/ | |
495 0, /*tp_print*/ | |
496 0, /*tp_getattr*/ | |
497 0, /*tp_setattr*/ | |
498 0, /*tp_compare*/ | |
499 Markup_repr, /*tp_repr*/ | |
500 &Markup_as_number, /*tp_as_number*/ | |
501 0, /*tp_as_sequence*/ | |
502 0, /*tp_as_mapping*/ | |
503 0, /*tp_hash */ | |
504 | |
505 0, /*tp_call*/ | |
506 0, /*tp_str*/ | |
507 0, /*tp_getattro*/ | |
508 0, /*tp_setattro*/ | |
509 0, /*tp_as_buffer*/ | |
510 | |
511 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/ | |
512 Markup__doc__,/*tp_doc*/ | |
513 | |
514 0, /*tp_traverse*/ | |
515 0, /*tp_clear*/ | |
516 | |
517 0, /*tp_richcompare*/ | |
518 0, /*tp_weaklistoffset*/ | |
519 | |
520 0, /*tp_iter*/ | |
521 0, /*tp_iternext*/ | |
522 | |
523 /* Attribute descriptor and subclassing stuff */ | |
524 | |
525 Markup_methods,/*tp_methods*/ | |
526 0, /*tp_members*/ | |
527 0, /*tp_getset*/ | |
528 0, /*tp_base*/ | |
529 0, /*tp_dict*/ | |
530 | |
531 0, /*tp_descr_get*/ | |
532 0, /*tp_descr_set*/ | |
533 0, /*tp_dictoffset*/ | |
534 | |
535 0, /*tp_init*/ | |
536 0, /*tp_alloc will be set to PyType_GenericAlloc in module init*/ | |
537 Markup_new, /*tp_new*/ | |
538 0, /*tp_free Low-level free-memory routine */ | |
539 0, /*tp_is_gc For PyObject_IS_GC */ | |
540 0, /*tp_bases*/ | |
541 0, /*tp_mro method resolution order */ | |
542 0, /*tp_cache*/ | |
543 0, /*tp_subclasses*/ | |
544 0 /*tp_weaklist*/ | |
545 }; | |
546 | |
547 PyMODINIT_FUNC | |
548 init_speedups(void) | |
549 { | |
550 PyObject *module; | |
551 | |
552 /* Workaround for quirk in Visual Studio, see | |
553 <http://www.python.it/faq/faq-3.html#3.24> */ | |
554 MarkupType.tp_base = &PyUnicode_Type; | |
555 | |
556 if (PyType_Ready(&MarkupType) < 0) | |
557 return; | |
558 | |
559 init_constants(); | |
560 | |
561 module = Py_InitModule("_speedups", NULL); | |
562 Py_INCREF(&MarkupType); | |
563 PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType); | |
564 } |