1 | #include "Python.h" |
2 | #include <ctype.h> |
3 | |
4 | #include "structmember.h" // PyMemberDef |
5 | #include "frameobject.h" |
6 | #include "expat.h" |
7 | |
8 | #include "pyexpat.h" |
9 | |
10 | /* Do not emit Clinic output to a file as that wreaks havoc with conditionally |
11 | included methods. */ |
12 | /*[clinic input] |
13 | module pyexpat |
14 | [clinic start generated code]*/ |
15 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/ |
16 | |
17 | #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) |
18 | |
19 | static XML_Memory_Handling_Suite ExpatMemoryHandler = { |
20 | PyObject_Malloc, PyObject_Realloc, PyObject_Free}; |
21 | |
22 | enum HandlerTypes { |
23 | StartElement, |
24 | EndElement, |
25 | ProcessingInstruction, |
26 | CharacterData, |
27 | UnparsedEntityDecl, |
28 | NotationDecl, |
29 | StartNamespaceDecl, |
30 | EndNamespaceDecl, |
31 | , |
32 | StartCdataSection, |
33 | EndCdataSection, |
34 | Default, |
35 | DefaultHandlerExpand, |
36 | NotStandalone, |
37 | ExternalEntityRef, |
38 | StartDoctypeDecl, |
39 | EndDoctypeDecl, |
40 | EntityDecl, |
41 | XmlDecl, |
42 | ElementDecl, |
43 | AttlistDecl, |
44 | #if XML_COMBINED_VERSION >= 19504 |
45 | SkippedEntity, |
46 | #endif |
47 | _DummyDecl |
48 | }; |
49 | |
50 | typedef struct { |
51 | PyTypeObject *xml_parse_type; |
52 | PyObject *error; |
53 | } pyexpat_state; |
54 | |
55 | static inline pyexpat_state* |
56 | pyexpat_get_state(PyObject *module) |
57 | { |
58 | void *state = PyModule_GetState(module); |
59 | assert(state != NULL); |
60 | return (pyexpat_state *)state; |
61 | } |
62 | |
63 | /* ----------------------------------------------------- */ |
64 | |
65 | /* Declarations for objects of type xmlparser */ |
66 | |
67 | typedef struct { |
68 | PyObject_HEAD |
69 | |
70 | XML_Parser itself; |
71 | int ordered_attributes; /* Return attributes as a list. */ |
72 | int specified_attributes; /* Report only specified attributes. */ |
73 | int in_callback; /* Is a callback active? */ |
74 | int ns_prefixes; /* Namespace-triplets mode? */ |
75 | XML_Char *buffer; /* Buffer used when accumulating characters */ |
76 | /* NULL if not enabled */ |
77 | int buffer_size; /* Size of buffer, in XML_Char units */ |
78 | int buffer_used; /* Buffer units in use */ |
79 | PyObject *intern; /* Dictionary to intern strings */ |
80 | PyObject **handlers; |
81 | } xmlparseobject; |
82 | |
83 | #include "clinic/pyexpat.c.h" |
84 | |
85 | #define CHARACTER_DATA_BUFFER_SIZE 8192 |
86 | |
87 | typedef void (*xmlhandlersetter)(XML_Parser self, void *meth); |
88 | typedef void* xmlhandler; |
89 | |
90 | struct HandlerInfo { |
91 | const char *name; |
92 | xmlhandlersetter setter; |
93 | xmlhandler handler; |
94 | PyGetSetDef getset; |
95 | }; |
96 | |
97 | static struct HandlerInfo handler_info[64]; |
98 | |
99 | /* Set an integer attribute on the error object; return true on success, |
100 | * false on an exception. |
101 | */ |
102 | static int |
103 | set_error_attr(PyObject *err, const char *name, int value) |
104 | { |
105 | PyObject *v = PyLong_FromLong(value); |
106 | |
107 | if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { |
108 | Py_XDECREF(v); |
109 | return 0; |
110 | } |
111 | Py_DECREF(v); |
112 | return 1; |
113 | } |
114 | |
115 | /* Build and set an Expat exception, including positioning |
116 | * information. Always returns NULL. |
117 | */ |
118 | static PyObject * |
119 | set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code) |
120 | { |
121 | PyObject *err; |
122 | PyObject *buffer; |
123 | XML_Parser parser = self->itself; |
124 | int lineno = XML_GetErrorLineNumber(parser); |
125 | int column = XML_GetErrorColumnNumber(parser); |
126 | |
127 | buffer = PyUnicode_FromFormat("%s: line %i, column %i" , |
128 | XML_ErrorString(code), lineno, column); |
129 | if (buffer == NULL) |
130 | return NULL; |
131 | err = PyObject_CallOneArg(state->error, buffer); |
132 | Py_DECREF(buffer); |
133 | if ( err != NULL |
134 | && set_error_attr(err, "code" , code) |
135 | && set_error_attr(err, "offset" , column) |
136 | && set_error_attr(err, "lineno" , lineno)) { |
137 | PyErr_SetObject(state->error, err); |
138 | } |
139 | Py_XDECREF(err); |
140 | return NULL; |
141 | } |
142 | |
143 | static int |
144 | have_handler(xmlparseobject *self, int type) |
145 | { |
146 | PyObject *handler = self->handlers[type]; |
147 | return handler != NULL; |
148 | } |
149 | |
150 | /* Convert a string of XML_Chars into a Unicode string. |
151 | Returns None if str is a null pointer. */ |
152 | |
153 | static PyObject * |
154 | conv_string_to_unicode(const XML_Char *str) |
155 | { |
156 | /* XXX currently this code assumes that XML_Char is 8-bit, |
157 | and hence in UTF-8. */ |
158 | /* UTF-8 from Expat, Unicode desired */ |
159 | if (str == NULL) { |
160 | Py_RETURN_NONE; |
161 | } |
162 | return PyUnicode_DecodeUTF8(str, strlen(str), "strict" ); |
163 | } |
164 | |
165 | static PyObject * |
166 | conv_string_len_to_unicode(const XML_Char *str, int len) |
167 | { |
168 | /* XXX currently this code assumes that XML_Char is 8-bit, |
169 | and hence in UTF-8. */ |
170 | /* UTF-8 from Expat, Unicode desired */ |
171 | if (str == NULL) { |
172 | Py_RETURN_NONE; |
173 | } |
174 | return PyUnicode_DecodeUTF8((const char *)str, len, "strict" ); |
175 | } |
176 | |
177 | /* Callback routines */ |
178 | |
179 | static void clear_handlers(xmlparseobject *self, int initial); |
180 | |
181 | /* This handler is used when an error has been detected, in the hope |
182 | that actual parsing can be terminated early. This will only help |
183 | if an external entity reference is encountered. */ |
184 | static int |
185 | error_external_entity_ref_handler(XML_Parser parser, |
186 | const XML_Char *context, |
187 | const XML_Char *base, |
188 | const XML_Char *systemId, |
189 | const XML_Char *publicId) |
190 | { |
191 | return 0; |
192 | } |
193 | |
194 | /* Dummy character data handler used when an error (exception) has |
195 | been detected, and the actual parsing can be terminated early. |
196 | This is needed since character data handler can't be safely removed |
197 | from within the character data handler, but can be replaced. It is |
198 | used only from the character data handler trampoline, and must be |
199 | used right after `flag_error()` is called. */ |
200 | static void |
201 | noop_character_data_handler(void *userData, const XML_Char *data, int len) |
202 | { |
203 | /* Do nothing. */ |
204 | } |
205 | |
206 | static void |
207 | flag_error(xmlparseobject *self) |
208 | { |
209 | clear_handlers(self, 0); |
210 | XML_SetExternalEntityRefHandler(self->itself, |
211 | error_external_entity_ref_handler); |
212 | } |
213 | |
214 | static PyObject* |
215 | call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args, |
216 | xmlparseobject *self) |
217 | { |
218 | PyObject *res; |
219 | |
220 | res = PyObject_Call(func, args, NULL); |
221 | if (res == NULL) { |
222 | _PyTraceback_Add(funcname, __FILE__, lineno); |
223 | XML_StopParser(self->itself, XML_FALSE); |
224 | } |
225 | return res; |
226 | } |
227 | |
228 | static PyObject* |
229 | string_intern(xmlparseobject *self, const char* str) |
230 | { |
231 | PyObject *result = conv_string_to_unicode(str); |
232 | PyObject *value; |
233 | /* result can be NULL if the unicode conversion failed. */ |
234 | if (!result) |
235 | return result; |
236 | if (!self->intern) |
237 | return result; |
238 | value = PyDict_GetItemWithError(self->intern, result); |
239 | if (!value) { |
240 | if (!PyErr_Occurred() && |
241 | PyDict_SetItem(self->intern, result, result) == 0) |
242 | { |
243 | return result; |
244 | } |
245 | else { |
246 | Py_DECREF(result); |
247 | return NULL; |
248 | } |
249 | } |
250 | Py_INCREF(value); |
251 | Py_DECREF(result); |
252 | return value; |
253 | } |
254 | |
255 | /* Return 0 on success, -1 on exception. |
256 | * flag_error() will be called before return if needed. |
257 | */ |
258 | static int |
259 | call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len) |
260 | { |
261 | PyObject *args; |
262 | PyObject *temp; |
263 | |
264 | if (!have_handler(self, CharacterData)) |
265 | return -1; |
266 | |
267 | args = PyTuple_New(1); |
268 | if (args == NULL) |
269 | return -1; |
270 | temp = (conv_string_len_to_unicode(buffer, len)); |
271 | if (temp == NULL) { |
272 | Py_DECREF(args); |
273 | flag_error(self); |
274 | XML_SetCharacterDataHandler(self->itself, |
275 | noop_character_data_handler); |
276 | return -1; |
277 | } |
278 | PyTuple_SET_ITEM(args, 0, temp); |
279 | /* temp is now a borrowed reference; consider it unused. */ |
280 | self->in_callback = 1; |
281 | temp = call_with_frame("CharacterData" , __LINE__, |
282 | self->handlers[CharacterData], args, self); |
283 | /* temp is an owned reference again, or NULL */ |
284 | self->in_callback = 0; |
285 | Py_DECREF(args); |
286 | if (temp == NULL) { |
287 | flag_error(self); |
288 | XML_SetCharacterDataHandler(self->itself, |
289 | noop_character_data_handler); |
290 | return -1; |
291 | } |
292 | Py_DECREF(temp); |
293 | return 0; |
294 | } |
295 | |
296 | static int |
297 | flush_character_buffer(xmlparseobject *self) |
298 | { |
299 | int rc; |
300 | if (self->buffer == NULL || self->buffer_used == 0) |
301 | return 0; |
302 | rc = call_character_handler(self, self->buffer, self->buffer_used); |
303 | self->buffer_used = 0; |
304 | return rc; |
305 | } |
306 | |
307 | static void |
308 | my_CharacterDataHandler(void *userData, const XML_Char *data, int len) |
309 | { |
310 | xmlparseobject *self = (xmlparseobject *) userData; |
311 | |
312 | if (PyErr_Occurred()) |
313 | return; |
314 | |
315 | if (self->buffer == NULL) |
316 | call_character_handler(self, data, len); |
317 | else { |
318 | if ((self->buffer_used + len) > self->buffer_size) { |
319 | if (flush_character_buffer(self) < 0) |
320 | return; |
321 | /* handler might have changed; drop the rest on the floor |
322 | * if there isn't a handler anymore |
323 | */ |
324 | if (!have_handler(self, CharacterData)) |
325 | return; |
326 | } |
327 | if (len > self->buffer_size) { |
328 | call_character_handler(self, data, len); |
329 | self->buffer_used = 0; |
330 | } |
331 | else { |
332 | memcpy(self->buffer + self->buffer_used, |
333 | data, len * sizeof(XML_Char)); |
334 | self->buffer_used += len; |
335 | } |
336 | } |
337 | } |
338 | |
339 | static void |
340 | my_StartElementHandler(void *userData, |
341 | const XML_Char *name, const XML_Char *atts[]) |
342 | { |
343 | xmlparseobject *self = (xmlparseobject *)userData; |
344 | |
345 | if (have_handler(self, StartElement)) { |
346 | PyObject *container, *rv, *args; |
347 | int i, max; |
348 | |
349 | if (PyErr_Occurred()) |
350 | return; |
351 | |
352 | if (flush_character_buffer(self) < 0) |
353 | return; |
354 | /* Set max to the number of slots filled in atts[]; max/2 is |
355 | * the number of attributes we need to process. |
356 | */ |
357 | if (self->specified_attributes) { |
358 | max = XML_GetSpecifiedAttributeCount(self->itself); |
359 | } |
360 | else { |
361 | max = 0; |
362 | while (atts[max] != NULL) |
363 | max += 2; |
364 | } |
365 | /* Build the container. */ |
366 | if (self->ordered_attributes) |
367 | container = PyList_New(max); |
368 | else |
369 | container = PyDict_New(); |
370 | if (container == NULL) { |
371 | flag_error(self); |
372 | return; |
373 | } |
374 | for (i = 0; i < max; i += 2) { |
375 | PyObject *n = string_intern(self, (XML_Char *) atts[i]); |
376 | PyObject *v; |
377 | if (n == NULL) { |
378 | flag_error(self); |
379 | Py_DECREF(container); |
380 | return; |
381 | } |
382 | v = conv_string_to_unicode((XML_Char *) atts[i+1]); |
383 | if (v == NULL) { |
384 | flag_error(self); |
385 | Py_DECREF(container); |
386 | Py_DECREF(n); |
387 | return; |
388 | } |
389 | if (self->ordered_attributes) { |
390 | PyList_SET_ITEM(container, i, n); |
391 | PyList_SET_ITEM(container, i+1, v); |
392 | } |
393 | else if (PyDict_SetItem(container, n, v)) { |
394 | flag_error(self); |
395 | Py_DECREF(n); |
396 | Py_DECREF(v); |
397 | Py_DECREF(container); |
398 | return; |
399 | } |
400 | else { |
401 | Py_DECREF(n); |
402 | Py_DECREF(v); |
403 | } |
404 | } |
405 | args = string_intern(self, name); |
406 | if (args == NULL) { |
407 | Py_DECREF(container); |
408 | return; |
409 | } |
410 | args = Py_BuildValue("(NN)" , args, container); |
411 | if (args == NULL) { |
412 | return; |
413 | } |
414 | /* Container is now a borrowed reference; ignore it. */ |
415 | self->in_callback = 1; |
416 | rv = call_with_frame("StartElement" , __LINE__, |
417 | self->handlers[StartElement], args, self); |
418 | self->in_callback = 0; |
419 | Py_DECREF(args); |
420 | if (rv == NULL) { |
421 | flag_error(self); |
422 | return; |
423 | } |
424 | Py_DECREF(rv); |
425 | } |
426 | } |
427 | |
428 | #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \ |
429 | RETURN, GETUSERDATA) \ |
430 | static RC \ |
431 | my_##NAME##Handler PARAMS {\ |
432 | xmlparseobject *self = GETUSERDATA ; \ |
433 | PyObject *args = NULL; \ |
434 | PyObject *rv = NULL; \ |
435 | INIT \ |
436 | \ |
437 | if (have_handler(self, NAME)) { \ |
438 | if (PyErr_Occurred()) \ |
439 | return RETURN; \ |
440 | if (flush_character_buffer(self) < 0) \ |
441 | return RETURN; \ |
442 | args = Py_BuildValue PARAM_FORMAT ;\ |
443 | if (!args) { flag_error(self); return RETURN;} \ |
444 | self->in_callback = 1; \ |
445 | rv = call_with_frame(#NAME,__LINE__, \ |
446 | self->handlers[NAME], args, self); \ |
447 | self->in_callback = 0; \ |
448 | Py_DECREF(args); \ |
449 | if (rv == NULL) { \ |
450 | flag_error(self); \ |
451 | return RETURN; \ |
452 | } \ |
453 | CONVERSION \ |
454 | Py_DECREF(rv); \ |
455 | } \ |
456 | return RETURN; \ |
457 | } |
458 | |
459 | #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \ |
460 | RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\ |
461 | (xmlparseobject *)userData) |
462 | |
463 | #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\ |
464 | RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \ |
465 | rc = PyLong_AsLong(rv);, rc, \ |
466 | (xmlparseobject *)userData) |
467 | |
468 | VOID_HANDLER(EndElement, |
469 | (void *userData, const XML_Char *name), |
470 | ("(N)" , string_intern(self, name))) |
471 | |
472 | VOID_HANDLER(ProcessingInstruction, |
473 | (void *userData, |
474 | const XML_Char *target, |
475 | const XML_Char *data), |
476 | ("(NO&)" , string_intern(self, target), conv_string_to_unicode ,data)) |
477 | |
478 | VOID_HANDLER(UnparsedEntityDecl, |
479 | (void *userData, |
480 | const XML_Char *entityName, |
481 | const XML_Char *base, |
482 | const XML_Char *systemId, |
483 | const XML_Char *publicId, |
484 | const XML_Char *notationName), |
485 | ("(NNNNN)" , |
486 | string_intern(self, entityName), string_intern(self, base), |
487 | string_intern(self, systemId), string_intern(self, publicId), |
488 | string_intern(self, notationName))) |
489 | |
490 | VOID_HANDLER(EntityDecl, |
491 | (void *userData, |
492 | const XML_Char *entityName, |
493 | int is_parameter_entity, |
494 | const XML_Char *value, |
495 | int value_length, |
496 | const XML_Char *base, |
497 | const XML_Char *systemId, |
498 | const XML_Char *publicId, |
499 | const XML_Char *notationName), |
500 | ("NiNNNNN" , |
501 | string_intern(self, entityName), is_parameter_entity, |
502 | (conv_string_len_to_unicode(value, value_length)), |
503 | string_intern(self, base), string_intern(self, systemId), |
504 | string_intern(self, publicId), |
505 | string_intern(self, notationName))) |
506 | |
507 | VOID_HANDLER(XmlDecl, |
508 | (void *userData, |
509 | const XML_Char *version, |
510 | const XML_Char *encoding, |
511 | int standalone), |
512 | ("(O&O&i)" , |
513 | conv_string_to_unicode ,version, conv_string_to_unicode ,encoding, |
514 | standalone)) |
515 | |
516 | static PyObject * |
517 | conv_content_model(XML_Content * const model, |
518 | PyObject *(*conv_string)(const XML_Char *)) |
519 | { |
520 | PyObject *result = NULL; |
521 | PyObject *children = PyTuple_New(model->numchildren); |
522 | int i; |
523 | |
524 | if (children != NULL) { |
525 | assert(model->numchildren < INT_MAX); |
526 | for (i = 0; i < (int)model->numchildren; ++i) { |
527 | PyObject *child = conv_content_model(&model->children[i], |
528 | conv_string); |
529 | if (child == NULL) { |
530 | Py_XDECREF(children); |
531 | return NULL; |
532 | } |
533 | PyTuple_SET_ITEM(children, i, child); |
534 | } |
535 | result = Py_BuildValue("(iiO&N)" , |
536 | model->type, model->quant, |
537 | conv_string,model->name, children); |
538 | } |
539 | return result; |
540 | } |
541 | |
542 | static void |
543 | my_ElementDeclHandler(void *userData, |
544 | const XML_Char *name, |
545 | XML_Content *model) |
546 | { |
547 | xmlparseobject *self = (xmlparseobject *)userData; |
548 | PyObject *args = NULL; |
549 | |
550 | if (have_handler(self, ElementDecl)) { |
551 | PyObject *rv = NULL; |
552 | PyObject *modelobj, *nameobj; |
553 | |
554 | if (PyErr_Occurred()) |
555 | return; |
556 | |
557 | if (flush_character_buffer(self) < 0) |
558 | goto finally; |
559 | modelobj = conv_content_model(model, (conv_string_to_unicode)); |
560 | if (modelobj == NULL) { |
561 | flag_error(self); |
562 | goto finally; |
563 | } |
564 | nameobj = string_intern(self, name); |
565 | if (nameobj == NULL) { |
566 | Py_DECREF(modelobj); |
567 | flag_error(self); |
568 | goto finally; |
569 | } |
570 | args = Py_BuildValue("NN" , nameobj, modelobj); |
571 | if (args == NULL) { |
572 | flag_error(self); |
573 | goto finally; |
574 | } |
575 | self->in_callback = 1; |
576 | rv = call_with_frame("ElementDecl" , __LINE__, |
577 | self->handlers[ElementDecl], args, self); |
578 | self->in_callback = 0; |
579 | if (rv == NULL) { |
580 | flag_error(self); |
581 | goto finally; |
582 | } |
583 | Py_DECREF(rv); |
584 | } |
585 | finally: |
586 | Py_XDECREF(args); |
587 | XML_FreeContentModel(self->itself, model); |
588 | return; |
589 | } |
590 | |
591 | VOID_HANDLER(AttlistDecl, |
592 | (void *userData, |
593 | const XML_Char *elname, |
594 | const XML_Char *attname, |
595 | const XML_Char *att_type, |
596 | const XML_Char *dflt, |
597 | int isrequired), |
598 | ("(NNO&O&i)" , |
599 | string_intern(self, elname), string_intern(self, attname), |
600 | conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt, |
601 | isrequired)) |
602 | |
603 | #if XML_COMBINED_VERSION >= 19504 |
604 | VOID_HANDLER(SkippedEntity, |
605 | (void *userData, |
606 | const XML_Char *entityName, |
607 | int is_parameter_entity), |
608 | ("Ni" , |
609 | string_intern(self, entityName), is_parameter_entity)) |
610 | #endif |
611 | |
612 | VOID_HANDLER(NotationDecl, |
613 | (void *userData, |
614 | const XML_Char *notationName, |
615 | const XML_Char *base, |
616 | const XML_Char *systemId, |
617 | const XML_Char *publicId), |
618 | ("(NNNN)" , |
619 | string_intern(self, notationName), string_intern(self, base), |
620 | string_intern(self, systemId), string_intern(self, publicId))) |
621 | |
622 | VOID_HANDLER(StartNamespaceDecl, |
623 | (void *userData, |
624 | const XML_Char *prefix, |
625 | const XML_Char *uri), |
626 | ("(NN)" , |
627 | string_intern(self, prefix), string_intern(self, uri))) |
628 | |
629 | VOID_HANDLER(EndNamespaceDecl, |
630 | (void *userData, |
631 | const XML_Char *prefix), |
632 | ("(N)" , string_intern(self, prefix))) |
633 | |
634 | VOID_HANDLER(Comment, |
635 | (void *userData, const XML_Char *data), |
636 | ("(O&)" , conv_string_to_unicode ,data)) |
637 | |
638 | VOID_HANDLER(StartCdataSection, |
639 | (void *userData), |
640 | ("()" )) |
641 | |
642 | VOID_HANDLER(EndCdataSection, |
643 | (void *userData), |
644 | ("()" )) |
645 | |
646 | VOID_HANDLER(Default, |
647 | (void *userData, const XML_Char *s, int len), |
648 | ("(N)" , (conv_string_len_to_unicode(s,len)))) |
649 | |
650 | VOID_HANDLER(DefaultHandlerExpand, |
651 | (void *userData, const XML_Char *s, int len), |
652 | ("(N)" , (conv_string_len_to_unicode(s,len)))) |
653 | #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler |
654 | |
655 | INT_HANDLER(NotStandalone, |
656 | (void *userData), |
657 | ("()" )) |
658 | |
659 | RC_HANDLER(int, ExternalEntityRef, |
660 | (XML_Parser parser, |
661 | const XML_Char *context, |
662 | const XML_Char *base, |
663 | const XML_Char *systemId, |
664 | const XML_Char *publicId), |
665 | int rc=0;, |
666 | ("(O&NNN)" , |
667 | conv_string_to_unicode ,context, string_intern(self, base), |
668 | string_intern(self, systemId), string_intern(self, publicId)), |
669 | rc = PyLong_AsLong(rv);, rc, |
670 | XML_GetUserData(parser)) |
671 | |
672 | /* XXX UnknownEncodingHandler */ |
673 | |
674 | VOID_HANDLER(StartDoctypeDecl, |
675 | (void *userData, const XML_Char *doctypeName, |
676 | const XML_Char *sysid, const XML_Char *pubid, |
677 | int has_internal_subset), |
678 | ("(NNNi)" , string_intern(self, doctypeName), |
679 | string_intern(self, sysid), string_intern(self, pubid), |
680 | has_internal_subset)) |
681 | |
682 | VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()" )) |
683 | |
684 | /* ---------------------------------------------------------------- */ |
685 | /*[clinic input] |
686 | class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype" |
687 | [clinic start generated code]*/ |
688 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/ |
689 | |
690 | |
691 | static PyObject * |
692 | get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv) |
693 | { |
694 | if (PyErr_Occurred()) { |
695 | return NULL; |
696 | } |
697 | if (rv == 0) { |
698 | return set_error(state, self, XML_GetErrorCode(self->itself)); |
699 | } |
700 | if (flush_character_buffer(self) < 0) { |
701 | return NULL; |
702 | } |
703 | return PyLong_FromLong(rv); |
704 | } |
705 | |
706 | #define MAX_CHUNK_SIZE (1 << 20) |
707 | |
708 | /*[clinic input] |
709 | pyexpat.xmlparser.Parse |
710 | |
711 | cls: defining_class |
712 | data: object |
713 | isfinal: bool(accept={int}) = False |
714 | / |
715 | |
716 | Parse XML data. |
717 | |
718 | `isfinal' should be true at end of input. |
719 | [clinic start generated code]*/ |
720 | |
721 | static PyObject * |
722 | pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls, |
723 | PyObject *data, int isfinal) |
724 | /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/ |
725 | { |
726 | const char *s; |
727 | Py_ssize_t slen; |
728 | Py_buffer view; |
729 | int rc; |
730 | pyexpat_state *state = PyType_GetModuleState(cls); |
731 | |
732 | if (PyUnicode_Check(data)) { |
733 | view.buf = NULL; |
734 | s = PyUnicode_AsUTF8AndSize(data, &slen); |
735 | if (s == NULL) |
736 | return NULL; |
737 | /* Explicitly set UTF-8 encoding. Return code ignored. */ |
738 | (void)XML_SetEncoding(self->itself, "utf-8" ); |
739 | } |
740 | else { |
741 | if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) |
742 | return NULL; |
743 | s = view.buf; |
744 | slen = view.len; |
745 | } |
746 | |
747 | while (slen > MAX_CHUNK_SIZE) { |
748 | rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0); |
749 | if (!rc) |
750 | goto done; |
751 | s += MAX_CHUNK_SIZE; |
752 | slen -= MAX_CHUNK_SIZE; |
753 | } |
754 | Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX); |
755 | assert(slen <= INT_MAX); |
756 | rc = XML_Parse(self->itself, s, (int)slen, isfinal); |
757 | |
758 | done: |
759 | if (view.buf != NULL) { |
760 | PyBuffer_Release(&view); |
761 | } |
762 | return get_parse_result(state, self, rc); |
763 | } |
764 | |
765 | /* File reading copied from cPickle */ |
766 | |
767 | #define BUF_SIZE 2048 |
768 | |
769 | static int |
770 | readinst(char *buf, int buf_size, PyObject *meth) |
771 | { |
772 | PyObject *str; |
773 | Py_ssize_t len; |
774 | const char *ptr; |
775 | |
776 | str = PyObject_CallFunction(meth, "n" , buf_size); |
777 | if (str == NULL) |
778 | goto error; |
779 | |
780 | if (PyBytes_Check(str)) |
781 | ptr = PyBytes_AS_STRING(str); |
782 | else if (PyByteArray_Check(str)) |
783 | ptr = PyByteArray_AS_STRING(str); |
784 | else { |
785 | PyErr_Format(PyExc_TypeError, |
786 | "read() did not return a bytes object (type=%.400s)" , |
787 | Py_TYPE(str)->tp_name); |
788 | goto error; |
789 | } |
790 | len = Py_SIZE(str); |
791 | if (len > buf_size) { |
792 | PyErr_Format(PyExc_ValueError, |
793 | "read() returned too much data: " |
794 | "%i bytes requested, %zd returned" , |
795 | buf_size, len); |
796 | goto error; |
797 | } |
798 | memcpy(buf, ptr, len); |
799 | Py_DECREF(str); |
800 | /* len <= buf_size <= INT_MAX */ |
801 | return (int)len; |
802 | |
803 | error: |
804 | Py_XDECREF(str); |
805 | return -1; |
806 | } |
807 | |
808 | /*[clinic input] |
809 | pyexpat.xmlparser.ParseFile |
810 | |
811 | cls: defining_class |
812 | file: object |
813 | / |
814 | |
815 | Parse XML data from file-like object. |
816 | [clinic start generated code]*/ |
817 | |
818 | static PyObject * |
819 | pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls, |
820 | PyObject *file) |
821 | /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/ |
822 | { |
823 | int rv = 1; |
824 | PyObject *readmethod = NULL; |
825 | _Py_IDENTIFIER(read); |
826 | |
827 | pyexpat_state *state = PyType_GetModuleState(cls); |
828 | |
829 | if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) { |
830 | return NULL; |
831 | } |
832 | if (readmethod == NULL) { |
833 | PyErr_SetString(PyExc_TypeError, |
834 | "argument must have 'read' attribute" ); |
835 | return NULL; |
836 | } |
837 | for (;;) { |
838 | int bytes_read; |
839 | void *buf = XML_GetBuffer(self->itself, BUF_SIZE); |
840 | if (buf == NULL) { |
841 | Py_XDECREF(readmethod); |
842 | return get_parse_result(state, self, 0); |
843 | } |
844 | |
845 | bytes_read = readinst(buf, BUF_SIZE, readmethod); |
846 | if (bytes_read < 0) { |
847 | Py_DECREF(readmethod); |
848 | return NULL; |
849 | } |
850 | rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0); |
851 | if (PyErr_Occurred()) { |
852 | Py_XDECREF(readmethod); |
853 | return NULL; |
854 | } |
855 | |
856 | if (!rv || bytes_read == 0) |
857 | break; |
858 | } |
859 | Py_XDECREF(readmethod); |
860 | return get_parse_result(state, self, rv); |
861 | } |
862 | |
863 | /*[clinic input] |
864 | pyexpat.xmlparser.SetBase |
865 | |
866 | base: str |
867 | / |
868 | |
869 | Set the base URL for the parser. |
870 | [clinic start generated code]*/ |
871 | |
872 | static PyObject * |
873 | pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base) |
874 | /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/ |
875 | { |
876 | if (!XML_SetBase(self->itself, base)) { |
877 | return PyErr_NoMemory(); |
878 | } |
879 | Py_RETURN_NONE; |
880 | } |
881 | |
882 | /*[clinic input] |
883 | pyexpat.xmlparser.GetBase |
884 | |
885 | Return base URL string for the parser. |
886 | [clinic start generated code]*/ |
887 | |
888 | static PyObject * |
889 | pyexpat_xmlparser_GetBase_impl(xmlparseobject *self) |
890 | /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/ |
891 | { |
892 | return Py_BuildValue("z" , XML_GetBase(self->itself)); |
893 | } |
894 | |
895 | /*[clinic input] |
896 | pyexpat.xmlparser.GetInputContext |
897 | |
898 | Return the untranslated text of the input that caused the current event. |
899 | |
900 | If the event was generated by a large amount of text (such as a start tag |
901 | for an element with many attributes), not all of the text may be available. |
902 | [clinic start generated code]*/ |
903 | |
904 | static PyObject * |
905 | pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self) |
906 | /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/ |
907 | { |
908 | if (self->in_callback) { |
909 | int offset, size; |
910 | const char *buffer |
911 | = XML_GetInputContext(self->itself, &offset, &size); |
912 | |
913 | if (buffer != NULL) |
914 | return PyBytes_FromStringAndSize(buffer + offset, |
915 | size - offset); |
916 | else |
917 | Py_RETURN_NONE; |
918 | } |
919 | else |
920 | Py_RETURN_NONE; |
921 | } |
922 | |
923 | /*[clinic input] |
924 | pyexpat.xmlparser.ExternalEntityParserCreate |
925 | |
926 | cls: defining_class |
927 | context: str(accept={str, NoneType}) |
928 | encoding: str = NULL |
929 | / |
930 | |
931 | Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler. |
932 | [clinic start generated code]*/ |
933 | |
934 | static PyObject * |
935 | pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, |
936 | PyTypeObject *cls, |
937 | const char *context, |
938 | const char *encoding) |
939 | /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/ |
940 | { |
941 | xmlparseobject *new_parser; |
942 | int i; |
943 | |
944 | pyexpat_state *state = PyType_GetModuleState(cls); |
945 | |
946 | new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type); |
947 | if (new_parser == NULL) { |
948 | return NULL; |
949 | } |
950 | |
951 | new_parser->buffer_size = self->buffer_size; |
952 | new_parser->buffer_used = 0; |
953 | new_parser->buffer = NULL; |
954 | new_parser->ordered_attributes = self->ordered_attributes; |
955 | new_parser->specified_attributes = self->specified_attributes; |
956 | new_parser->in_callback = 0; |
957 | new_parser->ns_prefixes = self->ns_prefixes; |
958 | new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, |
959 | encoding); |
960 | new_parser->handlers = 0; |
961 | new_parser->intern = self->intern; |
962 | Py_XINCREF(new_parser->intern); |
963 | |
964 | if (self->buffer != NULL) { |
965 | new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); |
966 | if (new_parser->buffer == NULL) { |
967 | Py_DECREF(new_parser); |
968 | return PyErr_NoMemory(); |
969 | } |
970 | } |
971 | if (!new_parser->itself) { |
972 | Py_DECREF(new_parser); |
973 | return PyErr_NoMemory(); |
974 | } |
975 | |
976 | XML_SetUserData(new_parser->itself, (void *)new_parser); |
977 | |
978 | /* allocate and clear handlers first */ |
979 | for (i = 0; handler_info[i].name != NULL; i++) |
980 | /* do nothing */; |
981 | |
982 | new_parser->handlers = PyMem_New(PyObject *, i); |
983 | if (!new_parser->handlers) { |
984 | Py_DECREF(new_parser); |
985 | return PyErr_NoMemory(); |
986 | } |
987 | clear_handlers(new_parser, 1); |
988 | |
989 | /* then copy handlers from self */ |
990 | for (i = 0; handler_info[i].name != NULL; i++) { |
991 | PyObject *handler = self->handlers[i]; |
992 | if (handler != NULL) { |
993 | Py_INCREF(handler); |
994 | new_parser->handlers[i] = handler; |
995 | handler_info[i].setter(new_parser->itself, |
996 | handler_info[i].handler); |
997 | } |
998 | } |
999 | |
1000 | PyObject_GC_Track(new_parser); |
1001 | return (PyObject *)new_parser; |
1002 | } |
1003 | |
1004 | /*[clinic input] |
1005 | pyexpat.xmlparser.SetParamEntityParsing |
1006 | |
1007 | flag: int |
1008 | / |
1009 | |
1010 | Controls parsing of parameter entities (including the external DTD subset). |
1011 | |
1012 | Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER, |
1013 | XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and |
1014 | XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag |
1015 | was successful. |
1016 | [clinic start generated code]*/ |
1017 | |
1018 | static PyObject * |
1019 | pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag) |
1020 | /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/ |
1021 | { |
1022 | flag = XML_SetParamEntityParsing(self->itself, flag); |
1023 | return PyLong_FromLong(flag); |
1024 | } |
1025 | |
1026 | |
1027 | #if XML_COMBINED_VERSION >= 19505 |
1028 | /*[clinic input] |
1029 | pyexpat.xmlparser.UseForeignDTD |
1030 | |
1031 | cls: defining_class |
1032 | flag: bool = True |
1033 | / |
1034 | |
1035 | Allows the application to provide an artificial external subset if one is not specified as part of the document instance. |
1036 | |
1037 | This readily allows the use of a 'default' document type controlled by the |
1038 | application, while still getting the advantage of providing document type |
1039 | information to the parser. 'flag' defaults to True if not provided. |
1040 | [clinic start generated code]*/ |
1041 | |
1042 | static PyObject * |
1043 | pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, |
1044 | int flag) |
1045 | /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/ |
1046 | { |
1047 | pyexpat_state *state = PyType_GetModuleState(cls); |
1048 | enum XML_Error rc; |
1049 | |
1050 | rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE); |
1051 | if (rc != XML_ERROR_NONE) { |
1052 | return set_error(state, self, rc); |
1053 | } |
1054 | Py_RETURN_NONE; |
1055 | } |
1056 | #endif |
1057 | |
1058 | static struct PyMethodDef xmlparse_methods[] = { |
1059 | PYEXPAT_XMLPARSER_PARSE_METHODDEF |
1060 | PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF |
1061 | PYEXPAT_XMLPARSER_SETBASE_METHODDEF |
1062 | PYEXPAT_XMLPARSER_GETBASE_METHODDEF |
1063 | PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF |
1064 | PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF |
1065 | PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF |
1066 | #if XML_COMBINED_VERSION >= 19505 |
1067 | PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF |
1068 | #endif |
1069 | {NULL, NULL} /* sentinel */ |
1070 | }; |
1071 | |
1072 | /* ---------- */ |
1073 | |
1074 | |
1075 | |
1076 | /* pyexpat international encoding support. |
1077 | Make it as simple as possible. |
1078 | */ |
1079 | |
1080 | static int |
1081 | PyUnknownEncodingHandler(void *encodingHandlerData, |
1082 | const XML_Char *name, |
1083 | XML_Encoding *info) |
1084 | { |
1085 | static unsigned char template_buffer[256] = {0}; |
1086 | PyObject* u; |
1087 | int i; |
1088 | const void *data; |
1089 | unsigned int kind; |
1090 | |
1091 | if (PyErr_Occurred()) |
1092 | return XML_STATUS_ERROR; |
1093 | |
1094 | if (template_buffer[1] == 0) { |
1095 | for (i = 0; i < 256; i++) |
1096 | template_buffer[i] = i; |
1097 | } |
1098 | |
1099 | u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace" ); |
1100 | if (u == NULL || PyUnicode_READY(u)) { |
1101 | Py_XDECREF(u); |
1102 | return XML_STATUS_ERROR; |
1103 | } |
1104 | |
1105 | if (PyUnicode_GET_LENGTH(u) != 256) { |
1106 | Py_DECREF(u); |
1107 | PyErr_SetString(PyExc_ValueError, |
1108 | "multi-byte encodings are not supported" ); |
1109 | return XML_STATUS_ERROR; |
1110 | } |
1111 | |
1112 | kind = PyUnicode_KIND(u); |
1113 | data = PyUnicode_DATA(u); |
1114 | for (i = 0; i < 256; i++) { |
1115 | Py_UCS4 ch = PyUnicode_READ(kind, data, i); |
1116 | if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) |
1117 | info->map[i] = ch; |
1118 | else |
1119 | info->map[i] = -1; |
1120 | } |
1121 | |
1122 | info->data = NULL; |
1123 | info->convert = NULL; |
1124 | info->release = NULL; |
1125 | Py_DECREF(u); |
1126 | |
1127 | return XML_STATUS_OK; |
1128 | } |
1129 | |
1130 | |
1131 | static PyObject * |
1132 | newxmlparseobject(pyexpat_state *state, const char *encoding, |
1133 | const char *namespace_separator, PyObject *intern) |
1134 | { |
1135 | int i; |
1136 | xmlparseobject *self; |
1137 | |
1138 | self = PyObject_GC_New(xmlparseobject, state->xml_parse_type); |
1139 | if (self == NULL) |
1140 | return NULL; |
1141 | |
1142 | self->buffer = NULL; |
1143 | self->buffer_size = CHARACTER_DATA_BUFFER_SIZE; |
1144 | self->buffer_used = 0; |
1145 | self->ordered_attributes = 0; |
1146 | self->specified_attributes = 0; |
1147 | self->in_callback = 0; |
1148 | self->ns_prefixes = 0; |
1149 | self->handlers = NULL; |
1150 | self->intern = intern; |
1151 | Py_XINCREF(self->intern); |
1152 | |
1153 | /* namespace_separator is either NULL or contains one char + \0 */ |
1154 | self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, |
1155 | namespace_separator); |
1156 | if (self->itself == NULL) { |
1157 | PyErr_SetString(PyExc_RuntimeError, |
1158 | "XML_ParserCreate failed" ); |
1159 | Py_DECREF(self); |
1160 | return NULL; |
1161 | } |
1162 | #if XML_COMBINED_VERSION >= 20100 |
1163 | /* This feature was added upstream in libexpat 2.1.0. */ |
1164 | XML_SetHashSalt(self->itself, |
1165 | (unsigned long)_Py_HashSecret.expat.hashsalt); |
1166 | #endif |
1167 | XML_SetUserData(self->itself, (void *)self); |
1168 | XML_SetUnknownEncodingHandler(self->itself, |
1169 | (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL); |
1170 | |
1171 | for (i = 0; handler_info[i].name != NULL; i++) |
1172 | /* do nothing */; |
1173 | |
1174 | self->handlers = PyMem_New(PyObject *, i); |
1175 | if (!self->handlers) { |
1176 | Py_DECREF(self); |
1177 | return PyErr_NoMemory(); |
1178 | } |
1179 | clear_handlers(self, 1); |
1180 | |
1181 | PyObject_GC_Track(self); |
1182 | return (PyObject*)self; |
1183 | } |
1184 | |
1185 | static int |
1186 | xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg) |
1187 | { |
1188 | for (int i = 0; handler_info[i].name != NULL; i++) { |
1189 | Py_VISIT(op->handlers[i]); |
1190 | } |
1191 | Py_VISIT(Py_TYPE(op)); |
1192 | return 0; |
1193 | } |
1194 | |
1195 | static int |
1196 | xmlparse_clear(xmlparseobject *op) |
1197 | { |
1198 | clear_handlers(op, 0); |
1199 | Py_CLEAR(op->intern); |
1200 | return 0; |
1201 | } |
1202 | |
1203 | static void |
1204 | xmlparse_dealloc(xmlparseobject *self) |
1205 | { |
1206 | PyObject_GC_UnTrack(self); |
1207 | (void)xmlparse_clear(self); |
1208 | if (self->itself != NULL) |
1209 | XML_ParserFree(self->itself); |
1210 | self->itself = NULL; |
1211 | |
1212 | if (self->handlers != NULL) { |
1213 | PyMem_Free(self->handlers); |
1214 | self->handlers = NULL; |
1215 | } |
1216 | if (self->buffer != NULL) { |
1217 | PyMem_Free(self->buffer); |
1218 | self->buffer = NULL; |
1219 | } |
1220 | PyTypeObject *tp = Py_TYPE(self); |
1221 | PyObject_GC_Del(self); |
1222 | Py_DECREF(tp); |
1223 | } |
1224 | |
1225 | |
1226 | static PyObject * |
1227 | xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi) |
1228 | { |
1229 | assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info)); |
1230 | int handlernum = (int)(hi - handler_info); |
1231 | PyObject *result = self->handlers[handlernum]; |
1232 | if (result == NULL) |
1233 | result = Py_None; |
1234 | Py_INCREF(result); |
1235 | return result; |
1236 | } |
1237 | |
1238 | static int |
1239 | xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi) |
1240 | { |
1241 | assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info)); |
1242 | int handlernum = (int)(hi - handler_info); |
1243 | if (v == NULL) { |
1244 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1245 | return -1; |
1246 | } |
1247 | if (handlernum == CharacterData) { |
1248 | /* If we're changing the character data handler, flush all |
1249 | * cached data with the old handler. Not sure there's a |
1250 | * "right" thing to do, though, but this probably won't |
1251 | * happen. |
1252 | */ |
1253 | if (flush_character_buffer(self) < 0) |
1254 | return -1; |
1255 | } |
1256 | |
1257 | xmlhandler c_handler = NULL; |
1258 | if (v == Py_None) { |
1259 | /* If this is the character data handler, and a character |
1260 | data handler is already active, we need to be more |
1261 | careful. What we can safely do is replace the existing |
1262 | character data handler callback function with a no-op |
1263 | function that will refuse to call Python. The downside |
1264 | is that this doesn't completely remove the character |
1265 | data handler from the C layer if there's any callback |
1266 | active, so Expat does a little more work than it |
1267 | otherwise would, but that's really an odd case. A more |
1268 | elaborate system of handlers and state could remove the |
1269 | C handler more effectively. */ |
1270 | if (handlernum == CharacterData && self->in_callback) |
1271 | c_handler = noop_character_data_handler; |
1272 | v = NULL; |
1273 | } |
1274 | else if (v != NULL) { |
1275 | Py_INCREF(v); |
1276 | c_handler = handler_info[handlernum].handler; |
1277 | } |
1278 | Py_XSETREF(self->handlers[handlernum], v); |
1279 | handler_info[handlernum].setter(self->itself, c_handler); |
1280 | return 0; |
1281 | } |
1282 | |
1283 | #define INT_GETTER(name) \ |
1284 | static PyObject * \ |
1285 | xmlparse_##name##_getter(xmlparseobject *self, void *closure) \ |
1286 | { \ |
1287 | return PyLong_FromLong((long) XML_Get##name(self->itself)); \ |
1288 | } |
1289 | INT_GETTER(ErrorCode) |
1290 | INT_GETTER(ErrorLineNumber) |
1291 | INT_GETTER(ErrorColumnNumber) |
1292 | INT_GETTER(ErrorByteIndex) |
1293 | INT_GETTER(CurrentLineNumber) |
1294 | INT_GETTER(CurrentColumnNumber) |
1295 | INT_GETTER(CurrentByteIndex) |
1296 | |
1297 | #undef INT_GETTER |
1298 | |
1299 | static PyObject * |
1300 | xmlparse_buffer_text_getter(xmlparseobject *self, void *closure) |
1301 | { |
1302 | return PyBool_FromLong(self->buffer != NULL); |
1303 | } |
1304 | |
1305 | static int |
1306 | xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure) |
1307 | { |
1308 | if (v == NULL) { |
1309 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1310 | return -1; |
1311 | } |
1312 | int b = PyObject_IsTrue(v); |
1313 | if (b < 0) |
1314 | return -1; |
1315 | if (b) { |
1316 | if (self->buffer == NULL) { |
1317 | self->buffer = PyMem_Malloc(self->buffer_size); |
1318 | if (self->buffer == NULL) { |
1319 | PyErr_NoMemory(); |
1320 | return -1; |
1321 | } |
1322 | self->buffer_used = 0; |
1323 | } |
1324 | } |
1325 | else if (self->buffer != NULL) { |
1326 | if (flush_character_buffer(self) < 0) |
1327 | return -1; |
1328 | PyMem_Free(self->buffer); |
1329 | self->buffer = NULL; |
1330 | } |
1331 | return 0; |
1332 | } |
1333 | |
1334 | static PyObject * |
1335 | xmlparse_buffer_size_getter(xmlparseobject *self, void *closure) |
1336 | { |
1337 | return PyLong_FromLong((long) self->buffer_size); |
1338 | } |
1339 | |
1340 | static int |
1341 | xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure) |
1342 | { |
1343 | if (v == NULL) { |
1344 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1345 | return -1; |
1346 | } |
1347 | long new_buffer_size; |
1348 | if (!PyLong_Check(v)) { |
1349 | PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer" ); |
1350 | return -1; |
1351 | } |
1352 | |
1353 | new_buffer_size = PyLong_AsLong(v); |
1354 | if (new_buffer_size <= 0) { |
1355 | if (!PyErr_Occurred()) |
1356 | PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero" ); |
1357 | return -1; |
1358 | } |
1359 | |
1360 | /* trivial case -- no change */ |
1361 | if (new_buffer_size == self->buffer_size) { |
1362 | return 0; |
1363 | } |
1364 | |
1365 | /* check maximum */ |
1366 | if (new_buffer_size > INT_MAX) { |
1367 | char errmsg[100]; |
1368 | sprintf(errmsg, "buffer_size must not be greater than %i" , INT_MAX); |
1369 | PyErr_SetString(PyExc_ValueError, errmsg); |
1370 | return -1; |
1371 | } |
1372 | |
1373 | if (self->buffer != NULL) { |
1374 | /* there is already a buffer */ |
1375 | if (self->buffer_used != 0) { |
1376 | if (flush_character_buffer(self) < 0) { |
1377 | return -1; |
1378 | } |
1379 | } |
1380 | /* free existing buffer */ |
1381 | PyMem_Free(self->buffer); |
1382 | } |
1383 | self->buffer = PyMem_Malloc(new_buffer_size); |
1384 | if (self->buffer == NULL) { |
1385 | PyErr_NoMemory(); |
1386 | return -1; |
1387 | } |
1388 | self->buffer_size = new_buffer_size; |
1389 | return 0; |
1390 | } |
1391 | |
1392 | static PyObject * |
1393 | xmlparse_buffer_used_getter(xmlparseobject *self, void *closure) |
1394 | { |
1395 | return PyLong_FromLong((long) self->buffer_used); |
1396 | } |
1397 | |
1398 | static PyObject * |
1399 | xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure) |
1400 | { |
1401 | return PyBool_FromLong(self->ns_prefixes); |
1402 | } |
1403 | |
1404 | static int |
1405 | xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure) |
1406 | { |
1407 | if (v == NULL) { |
1408 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1409 | return -1; |
1410 | } |
1411 | int b = PyObject_IsTrue(v); |
1412 | if (b < 0) |
1413 | return -1; |
1414 | self->ns_prefixes = b; |
1415 | XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); |
1416 | return 0; |
1417 | } |
1418 | |
1419 | static PyObject * |
1420 | xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure) |
1421 | { |
1422 | return PyBool_FromLong(self->ordered_attributes); |
1423 | } |
1424 | |
1425 | static int |
1426 | xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure) |
1427 | { |
1428 | if (v == NULL) { |
1429 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1430 | return -1; |
1431 | } |
1432 | int b = PyObject_IsTrue(v); |
1433 | if (b < 0) |
1434 | return -1; |
1435 | self->ordered_attributes = b; |
1436 | return 0; |
1437 | } |
1438 | |
1439 | static PyObject * |
1440 | xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure) |
1441 | { |
1442 | return PyBool_FromLong((long) self->specified_attributes); |
1443 | } |
1444 | |
1445 | static int |
1446 | xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure) |
1447 | { |
1448 | if (v == NULL) { |
1449 | PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute" ); |
1450 | return -1; |
1451 | } |
1452 | int b = PyObject_IsTrue(v); |
1453 | if (b < 0) |
1454 | return -1; |
1455 | self->specified_attributes = b; |
1456 | return 0; |
1457 | } |
1458 | |
1459 | static PyMemberDef xmlparse_members[] = { |
1460 | {"intern" , T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL}, |
1461 | {NULL} |
1462 | }; |
1463 | |
1464 | #define XMLPARSE_GETTER_DEF(name) \ |
1465 | {#name, (getter)xmlparse_##name##_getter, NULL, NULL}, |
1466 | #define XMLPARSE_GETTER_SETTER_DEF(name) \ |
1467 | {#name, (getter)xmlparse_##name##_getter, \ |
1468 | (setter)xmlparse_##name##_setter, NULL}, |
1469 | |
1470 | static PyGetSetDef xmlparse_getsetlist[] = { |
1471 | XMLPARSE_GETTER_DEF(ErrorCode) |
1472 | XMLPARSE_GETTER_DEF(ErrorLineNumber) |
1473 | XMLPARSE_GETTER_DEF(ErrorColumnNumber) |
1474 | XMLPARSE_GETTER_DEF(ErrorByteIndex) |
1475 | XMLPARSE_GETTER_DEF(CurrentLineNumber) |
1476 | XMLPARSE_GETTER_DEF(CurrentColumnNumber) |
1477 | XMLPARSE_GETTER_DEF(CurrentByteIndex) |
1478 | XMLPARSE_GETTER_SETTER_DEF(buffer_size) |
1479 | XMLPARSE_GETTER_SETTER_DEF(buffer_text) |
1480 | XMLPARSE_GETTER_DEF(buffer_used) |
1481 | XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes) |
1482 | XMLPARSE_GETTER_SETTER_DEF(ordered_attributes) |
1483 | XMLPARSE_GETTER_SETTER_DEF(specified_attributes) |
1484 | {NULL}, |
1485 | }; |
1486 | |
1487 | #undef XMLPARSE_GETTER_DEF |
1488 | #undef XMLPARSE_GETTER_SETTER_DEF |
1489 | |
1490 | PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser" ); |
1491 | |
1492 | static PyType_Slot _xml_parse_type_spec_slots[] = { |
1493 | {Py_tp_dealloc, xmlparse_dealloc}, |
1494 | {Py_tp_doc, (void *)Xmlparsetype__doc__}, |
1495 | {Py_tp_traverse, xmlparse_traverse}, |
1496 | {Py_tp_clear, xmlparse_clear}, |
1497 | {Py_tp_methods, xmlparse_methods}, |
1498 | {Py_tp_members, xmlparse_members}, |
1499 | {Py_tp_getset, xmlparse_getsetlist}, |
1500 | {0, 0} |
1501 | }; |
1502 | |
1503 | static PyType_Spec _xml_parse_type_spec = { |
1504 | .name = "pyexpat.xmlparser" , |
1505 | .basicsize = sizeof(xmlparseobject), |
1506 | .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | |
1507 | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), |
1508 | .slots = _xml_parse_type_spec_slots, |
1509 | }; |
1510 | |
1511 | /* End of code for xmlparser objects */ |
1512 | /* -------------------------------------------------------- */ |
1513 | |
1514 | /*[clinic input] |
1515 | pyexpat.ParserCreate |
1516 | |
1517 | encoding: str(accept={str, NoneType}) = None |
1518 | namespace_separator: str(accept={str, NoneType}) = None |
1519 | intern: object = NULL |
1520 | |
1521 | Return a new XML parser object. |
1522 | [clinic start generated code]*/ |
1523 | |
1524 | static PyObject * |
1525 | pyexpat_ParserCreate_impl(PyObject *module, const char *encoding, |
1526 | const char *namespace_separator, PyObject *intern) |
1527 | /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/ |
1528 | { |
1529 | PyObject *result; |
1530 | int intern_decref = 0; |
1531 | |
1532 | if (namespace_separator != NULL |
1533 | && strlen(namespace_separator) > 1) { |
1534 | PyErr_SetString(PyExc_ValueError, |
1535 | "namespace_separator must be at most one" |
1536 | " character, omitted, or None" ); |
1537 | return NULL; |
1538 | } |
1539 | /* Explicitly passing None means no interning is desired. |
1540 | Not passing anything means that a new dictionary is used. */ |
1541 | if (intern == Py_None) |
1542 | intern = NULL; |
1543 | else if (intern == NULL) { |
1544 | intern = PyDict_New(); |
1545 | if (!intern) |
1546 | return NULL; |
1547 | intern_decref = 1; |
1548 | } |
1549 | else if (!PyDict_Check(intern)) { |
1550 | PyErr_SetString(PyExc_TypeError, "intern must be a dictionary" ); |
1551 | return NULL; |
1552 | } |
1553 | |
1554 | pyexpat_state *state = pyexpat_get_state(module); |
1555 | result = newxmlparseobject(state, encoding, namespace_separator, intern); |
1556 | if (intern_decref) { |
1557 | Py_DECREF(intern); |
1558 | } |
1559 | return result; |
1560 | } |
1561 | |
1562 | /*[clinic input] |
1563 | pyexpat.ErrorString |
1564 | |
1565 | code: long |
1566 | / |
1567 | |
1568 | Returns string error for given number. |
1569 | [clinic start generated code]*/ |
1570 | |
1571 | static PyObject * |
1572 | pyexpat_ErrorString_impl(PyObject *module, long code) |
1573 | /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/ |
1574 | { |
1575 | return Py_BuildValue("z" , XML_ErrorString((int)code)); |
1576 | } |
1577 | |
1578 | /* List of methods defined in the module */ |
1579 | |
1580 | static struct PyMethodDef pyexpat_methods[] = { |
1581 | PYEXPAT_PARSERCREATE_METHODDEF |
1582 | PYEXPAT_ERRORSTRING_METHODDEF |
1583 | {NULL, NULL} /* sentinel */ |
1584 | }; |
1585 | |
1586 | /* Module docstring */ |
1587 | |
1588 | PyDoc_STRVAR(pyexpat_module_documentation, |
1589 | "Python wrapper for Expat parser." ); |
1590 | |
1591 | /* Initialization function for the module */ |
1592 | |
1593 | #ifndef MODULE_NAME |
1594 | #define MODULE_NAME "pyexpat" |
1595 | #endif |
1596 | |
1597 | static int init_handler_descrs(pyexpat_state *state) |
1598 | { |
1599 | int i; |
1600 | assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG)); |
1601 | for (i = 0; handler_info[i].name != NULL; i++) { |
1602 | struct HandlerInfo *hi = &handler_info[i]; |
1603 | hi->getset.name = hi->name; |
1604 | hi->getset.get = (getter)xmlparse_handler_getter; |
1605 | hi->getset.set = (setter)xmlparse_handler_setter; |
1606 | hi->getset.closure = &handler_info[i]; |
1607 | |
1608 | PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset); |
1609 | if (descr == NULL) |
1610 | return -1; |
1611 | |
1612 | if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) { |
1613 | Py_DECREF(descr); |
1614 | return -1; |
1615 | } |
1616 | Py_DECREF(descr); |
1617 | } |
1618 | return 0; |
1619 | } |
1620 | |
1621 | static PyObject * |
1622 | add_submodule(PyObject *mod, const char *fullname) |
1623 | { |
1624 | const char *name = strrchr(fullname, '.') + 1; |
1625 | |
1626 | PyObject *submodule = PyModule_New(fullname); |
1627 | if (submodule == NULL) { |
1628 | return NULL; |
1629 | } |
1630 | |
1631 | PyObject *mod_name = PyUnicode_FromString(fullname); |
1632 | if (mod_name == NULL) { |
1633 | Py_DECREF(submodule); |
1634 | return NULL; |
1635 | } |
1636 | |
1637 | if (_PyImport_SetModule(mod_name, submodule) < 0) { |
1638 | Py_DECREF(submodule); |
1639 | Py_DECREF(mod_name); |
1640 | return NULL; |
1641 | } |
1642 | Py_DECREF(mod_name); |
1643 | |
1644 | /* gives away the reference to the submodule */ |
1645 | if (PyModule_AddObject(mod, name, submodule) < 0) { |
1646 | Py_DECREF(submodule); |
1647 | return NULL; |
1648 | } |
1649 | |
1650 | return submodule; |
1651 | } |
1652 | |
1653 | static int |
1654 | add_error(PyObject *errors_module, PyObject *codes_dict, |
1655 | PyObject *rev_codes_dict, const char *name, int value) |
1656 | { |
1657 | const char *error_string = XML_ErrorString(value); |
1658 | if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) { |
1659 | return -1; |
1660 | } |
1661 | |
1662 | PyObject *num = PyLong_FromLong(value); |
1663 | if (num == NULL) { |
1664 | return -1; |
1665 | } |
1666 | |
1667 | if (PyDict_SetItemString(codes_dict, error_string, num) < 0) { |
1668 | Py_DECREF(num); |
1669 | return -1; |
1670 | } |
1671 | |
1672 | PyObject *str = PyUnicode_FromString(error_string); |
1673 | if (str == NULL) { |
1674 | Py_DECREF(num); |
1675 | return -1; |
1676 | } |
1677 | |
1678 | int res = PyDict_SetItem(rev_codes_dict, num, str); |
1679 | Py_DECREF(str); |
1680 | Py_DECREF(num); |
1681 | if (res < 0) { |
1682 | return -1; |
1683 | } |
1684 | |
1685 | return 0; |
1686 | } |
1687 | |
1688 | static int |
1689 | add_errors_module(PyObject *mod) |
1690 | { |
1691 | PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors" ); |
1692 | if (errors_module == NULL) { |
1693 | return -1; |
1694 | } |
1695 | |
1696 | PyObject *codes_dict = PyDict_New(); |
1697 | PyObject *rev_codes_dict = PyDict_New(); |
1698 | if (codes_dict == NULL || rev_codes_dict == NULL) { |
1699 | goto error; |
1700 | } |
1701 | |
1702 | #define ADD_CONST(name) do { \ |
1703 | if (add_error(errors_module, codes_dict, rev_codes_dict, \ |
1704 | #name, name) < 0) { \ |
1705 | goto error; \ |
1706 | } \ |
1707 | } while(0) |
1708 | |
1709 | ADD_CONST(XML_ERROR_NO_MEMORY); |
1710 | ADD_CONST(XML_ERROR_SYNTAX); |
1711 | ADD_CONST(XML_ERROR_NO_ELEMENTS); |
1712 | ADD_CONST(XML_ERROR_INVALID_TOKEN); |
1713 | ADD_CONST(XML_ERROR_UNCLOSED_TOKEN); |
1714 | ADD_CONST(XML_ERROR_PARTIAL_CHAR); |
1715 | ADD_CONST(XML_ERROR_TAG_MISMATCH); |
1716 | ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE); |
1717 | ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT); |
1718 | ADD_CONST(XML_ERROR_PARAM_ENTITY_REF); |
1719 | ADD_CONST(XML_ERROR_UNDEFINED_ENTITY); |
1720 | ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF); |
1721 | ADD_CONST(XML_ERROR_ASYNC_ENTITY); |
1722 | ADD_CONST(XML_ERROR_BAD_CHAR_REF); |
1723 | ADD_CONST(XML_ERROR_BINARY_ENTITY_REF); |
1724 | ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF); |
1725 | ADD_CONST(XML_ERROR_MISPLACED_XML_PI); |
1726 | ADD_CONST(XML_ERROR_UNKNOWN_ENCODING); |
1727 | ADD_CONST(XML_ERROR_INCORRECT_ENCODING); |
1728 | ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION); |
1729 | ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING); |
1730 | ADD_CONST(XML_ERROR_NOT_STANDALONE); |
1731 | ADD_CONST(XML_ERROR_UNEXPECTED_STATE); |
1732 | ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE); |
1733 | ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD); |
1734 | ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING); |
1735 | /* Added in Expat 1.95.7. */ |
1736 | ADD_CONST(XML_ERROR_UNBOUND_PREFIX); |
1737 | /* Added in Expat 1.95.8. */ |
1738 | ADD_CONST(XML_ERROR_UNDECLARING_PREFIX); |
1739 | ADD_CONST(XML_ERROR_INCOMPLETE_PE); |
1740 | ADD_CONST(XML_ERROR_XML_DECL); |
1741 | ADD_CONST(XML_ERROR_TEXT_DECL); |
1742 | ADD_CONST(XML_ERROR_PUBLICID); |
1743 | ADD_CONST(XML_ERROR_SUSPENDED); |
1744 | ADD_CONST(XML_ERROR_NOT_SUSPENDED); |
1745 | ADD_CONST(XML_ERROR_ABORTED); |
1746 | ADD_CONST(XML_ERROR_FINISHED); |
1747 | ADD_CONST(XML_ERROR_SUSPEND_PE); |
1748 | #undef ADD_CONST |
1749 | |
1750 | if (PyModule_AddStringConstant(errors_module, "__doc__" , |
1751 | "Constants used to describe " |
1752 | "error conditions." ) < 0) { |
1753 | goto error; |
1754 | } |
1755 | |
1756 | Py_INCREF(codes_dict); |
1757 | if (PyModule_AddObject(errors_module, "codes" , codes_dict) < 0) { |
1758 | Py_DECREF(codes_dict); |
1759 | goto error; |
1760 | } |
1761 | Py_CLEAR(codes_dict); |
1762 | |
1763 | Py_INCREF(rev_codes_dict); |
1764 | if (PyModule_AddObject(errors_module, "messages" , rev_codes_dict) < 0) { |
1765 | Py_DECREF(rev_codes_dict); |
1766 | goto error; |
1767 | } |
1768 | Py_CLEAR(rev_codes_dict); |
1769 | |
1770 | return 0; |
1771 | |
1772 | error: |
1773 | Py_XDECREF(codes_dict); |
1774 | Py_XDECREF(rev_codes_dict); |
1775 | return -1; |
1776 | } |
1777 | |
1778 | static int |
1779 | add_model_module(PyObject *mod) |
1780 | { |
1781 | PyObject *model_module = add_submodule(mod, MODULE_NAME ".model" ); |
1782 | if (model_module == NULL) { |
1783 | return -1; |
1784 | } |
1785 | |
1786 | #define MYCONST(c) do { \ |
1787 | if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \ |
1788 | return -1; \ |
1789 | } \ |
1790 | } while(0) |
1791 | |
1792 | if (PyModule_AddStringConstant( |
1793 | model_module, "__doc__" , |
1794 | "Constants used to interpret content model information." ) < 0) { |
1795 | return -1; |
1796 | } |
1797 | |
1798 | MYCONST(XML_CTYPE_EMPTY); |
1799 | MYCONST(XML_CTYPE_ANY); |
1800 | MYCONST(XML_CTYPE_MIXED); |
1801 | MYCONST(XML_CTYPE_NAME); |
1802 | MYCONST(XML_CTYPE_CHOICE); |
1803 | MYCONST(XML_CTYPE_SEQ); |
1804 | |
1805 | MYCONST(XML_CQUANT_NONE); |
1806 | MYCONST(XML_CQUANT_OPT); |
1807 | MYCONST(XML_CQUANT_REP); |
1808 | MYCONST(XML_CQUANT_PLUS); |
1809 | #undef MYCONST |
1810 | return 0; |
1811 | } |
1812 | |
1813 | #if XML_COMBINED_VERSION > 19505 |
1814 | static int |
1815 | add_features(PyObject *mod) |
1816 | { |
1817 | PyObject *list = PyList_New(0); |
1818 | if (list == NULL) { |
1819 | return -1; |
1820 | } |
1821 | |
1822 | const XML_Feature *features = XML_GetFeatureList(); |
1823 | for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) { |
1824 | PyObject *item = Py_BuildValue("si" , features[i].name, |
1825 | features[i].value); |
1826 | if (item == NULL) { |
1827 | goto error; |
1828 | } |
1829 | int ok = PyList_Append(list, item); |
1830 | Py_DECREF(item); |
1831 | if (ok < 0) { |
1832 | goto error; |
1833 | } |
1834 | } |
1835 | if (PyModule_AddObject(mod, "features" , list) < 0) { |
1836 | goto error; |
1837 | } |
1838 | return 0; |
1839 | |
1840 | error: |
1841 | Py_DECREF(list); |
1842 | return -1; |
1843 | } |
1844 | #endif |
1845 | |
1846 | static void |
1847 | pyexpat_destructor(PyObject *op) |
1848 | { |
1849 | void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME); |
1850 | PyMem_Free(p); |
1851 | } |
1852 | |
1853 | static int |
1854 | pyexpat_exec(PyObject *mod) |
1855 | { |
1856 | pyexpat_state *state = pyexpat_get_state(mod); |
1857 | state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec( |
1858 | mod, &_xml_parse_type_spec, NULL); |
1859 | |
1860 | if (state->xml_parse_type == NULL) { |
1861 | return -1; |
1862 | } |
1863 | |
1864 | if (init_handler_descrs(state) < 0) { |
1865 | return -1; |
1866 | } |
1867 | state->error = PyErr_NewException("xml.parsers.expat.ExpatError" , |
1868 | NULL, NULL); |
1869 | if (state->error == NULL) { |
1870 | return -1; |
1871 | } |
1872 | |
1873 | /* Add some symbolic constants to the module */ |
1874 | |
1875 | if (PyModule_AddObjectRef(mod, "error" , state->error) < 0) { |
1876 | return -1; |
1877 | } |
1878 | |
1879 | if (PyModule_AddObjectRef(mod, "ExpatError" , state->error) < 0) { |
1880 | return -1; |
1881 | } |
1882 | |
1883 | if (PyModule_AddObjectRef(mod, "XMLParserType" , |
1884 | (PyObject *) state->xml_parse_type) < 0) { |
1885 | return -1; |
1886 | } |
1887 | |
1888 | if (PyModule_AddStringConstant(mod, "EXPAT_VERSION" , |
1889 | XML_ExpatVersion()) < 0) { |
1890 | return -1; |
1891 | } |
1892 | { |
1893 | XML_Expat_Version info = XML_ExpatVersionInfo(); |
1894 | PyObject *versionInfo = Py_BuildValue("(iii)" , |
1895 | info.major, |
1896 | info.minor, |
1897 | info.micro); |
1898 | if (PyModule_AddObject(mod, "version_info" , versionInfo) < 0) { |
1899 | Py_DECREF(versionInfo); |
1900 | return -1; |
1901 | } |
1902 | } |
1903 | /* XXX When Expat supports some way of figuring out how it was |
1904 | compiled, this should check and set native_encoding |
1905 | appropriately. |
1906 | */ |
1907 | if (PyModule_AddStringConstant(mod, "native_encoding" , "UTF-8" ) < 0) { |
1908 | return -1; |
1909 | } |
1910 | |
1911 | if (add_errors_module(mod) < 0) { |
1912 | return -1; |
1913 | } |
1914 | |
1915 | if (add_model_module(mod) < 0) { |
1916 | return -1; |
1917 | } |
1918 | |
1919 | #if XML_COMBINED_VERSION > 19505 |
1920 | if (add_features(mod) < 0) { |
1921 | return -1; |
1922 | } |
1923 | #endif |
1924 | |
1925 | #define MYCONST(c) do { \ |
1926 | if (PyModule_AddIntConstant(mod, #c, c) < 0) { \ |
1927 | return -1; \ |
1928 | } \ |
1929 | } while(0) |
1930 | |
1931 | MYCONST(XML_PARAM_ENTITY_PARSING_NEVER); |
1932 | MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); |
1933 | MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS); |
1934 | #undef MYCONST |
1935 | |
1936 | struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI)); |
1937 | if (capi == NULL) { |
1938 | PyErr_NoMemory(); |
1939 | return -1; |
1940 | } |
1941 | /* initialize pyexpat dispatch table */ |
1942 | capi->size = sizeof(*capi); |
1943 | capi->magic = PyExpat_CAPI_MAGIC; |
1944 | capi->MAJOR_VERSION = XML_MAJOR_VERSION; |
1945 | capi->MINOR_VERSION = XML_MINOR_VERSION; |
1946 | capi->MICRO_VERSION = XML_MICRO_VERSION; |
1947 | capi->ErrorString = XML_ErrorString; |
1948 | capi->GetErrorCode = XML_GetErrorCode; |
1949 | capi->GetErrorColumnNumber = XML_GetErrorColumnNumber; |
1950 | capi->GetErrorLineNumber = XML_GetErrorLineNumber; |
1951 | capi->Parse = XML_Parse; |
1952 | capi->ParserCreate_MM = XML_ParserCreate_MM; |
1953 | capi->ParserFree = XML_ParserFree; |
1954 | capi->SetCharacterDataHandler = XML_SetCharacterDataHandler; |
1955 | capi->SetCommentHandler = XML_SetCommentHandler; |
1956 | capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand; |
1957 | capi->SetElementHandler = XML_SetElementHandler; |
1958 | capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler; |
1959 | capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler; |
1960 | capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; |
1961 | capi->SetUserData = XML_SetUserData; |
1962 | capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; |
1963 | capi->SetEncoding = XML_SetEncoding; |
1964 | capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler; |
1965 | #if XML_COMBINED_VERSION >= 20100 |
1966 | capi->SetHashSalt = XML_SetHashSalt; |
1967 | #else |
1968 | capi->SetHashSalt = NULL; |
1969 | #endif |
1970 | |
1971 | /* export using capsule */ |
1972 | PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, |
1973 | pyexpat_destructor); |
1974 | if (capi_object == NULL) { |
1975 | PyMem_Free(capi); |
1976 | return -1; |
1977 | } |
1978 | |
1979 | if (PyModule_AddObject(mod, "expat_CAPI" , capi_object) < 0) { |
1980 | Py_DECREF(capi_object); |
1981 | return -1; |
1982 | } |
1983 | |
1984 | return 0; |
1985 | } |
1986 | |
1987 | static int |
1988 | pyexpat_traverse(PyObject *module, visitproc visit, void *arg) |
1989 | { |
1990 | pyexpat_state *state = pyexpat_get_state(module); |
1991 | Py_VISIT(state->xml_parse_type); |
1992 | Py_VISIT(state->error); |
1993 | return 0; |
1994 | } |
1995 | |
1996 | static int |
1997 | pyexpat_clear(PyObject *module) |
1998 | { |
1999 | pyexpat_state *state = pyexpat_get_state(module); |
2000 | Py_CLEAR(state->xml_parse_type); |
2001 | Py_CLEAR(state->error); |
2002 | return 0; |
2003 | } |
2004 | |
2005 | static void |
2006 | pyexpat_free(void *module) |
2007 | { |
2008 | pyexpat_clear((PyObject *)module); |
2009 | } |
2010 | |
2011 | static PyModuleDef_Slot pyexpat_slots[] = { |
2012 | {Py_mod_exec, pyexpat_exec}, |
2013 | {0, NULL} |
2014 | }; |
2015 | |
2016 | static struct PyModuleDef pyexpatmodule = { |
2017 | PyModuleDef_HEAD_INIT, |
2018 | .m_name = MODULE_NAME, |
2019 | .m_doc = pyexpat_module_documentation, |
2020 | .m_size = sizeof(pyexpat_state), |
2021 | .m_methods = pyexpat_methods, |
2022 | .m_slots = pyexpat_slots, |
2023 | .m_traverse = pyexpat_traverse, |
2024 | .m_clear = pyexpat_clear, |
2025 | .m_free = pyexpat_free |
2026 | }; |
2027 | |
2028 | PyMODINIT_FUNC |
2029 | PyInit_pyexpat(void) |
2030 | { |
2031 | return PyModuleDef_Init(&pyexpatmodule); |
2032 | } |
2033 | |
2034 | static void |
2035 | clear_handlers(xmlparseobject *self, int initial) |
2036 | { |
2037 | int i = 0; |
2038 | |
2039 | for (; handler_info[i].name != NULL; i++) { |
2040 | if (initial) |
2041 | self->handlers[i] = NULL; |
2042 | else { |
2043 | Py_CLEAR(self->handlers[i]); |
2044 | handler_info[i].setter(self->itself, NULL); |
2045 | } |
2046 | } |
2047 | } |
2048 | |
2049 | static struct HandlerInfo handler_info[] = { |
2050 | |
2051 | #define HANDLER_INFO(name) \ |
2052 | {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name}, |
2053 | |
2054 | HANDLER_INFO(StartElementHandler) |
2055 | HANDLER_INFO(EndElementHandler) |
2056 | HANDLER_INFO(ProcessingInstructionHandler) |
2057 | HANDLER_INFO(CharacterDataHandler) |
2058 | HANDLER_INFO(UnparsedEntityDeclHandler) |
2059 | HANDLER_INFO(NotationDeclHandler) |
2060 | HANDLER_INFO(StartNamespaceDeclHandler) |
2061 | HANDLER_INFO(EndNamespaceDeclHandler) |
2062 | HANDLER_INFO(CommentHandler) |
2063 | HANDLER_INFO(StartCdataSectionHandler) |
2064 | HANDLER_INFO(EndCdataSectionHandler) |
2065 | HANDLER_INFO(DefaultHandler) |
2066 | HANDLER_INFO(DefaultHandlerExpand) |
2067 | HANDLER_INFO(NotStandaloneHandler) |
2068 | HANDLER_INFO(ExternalEntityRefHandler) |
2069 | HANDLER_INFO(StartDoctypeDeclHandler) |
2070 | HANDLER_INFO(EndDoctypeDeclHandler) |
2071 | HANDLER_INFO(EntityDeclHandler) |
2072 | HANDLER_INFO(XmlDeclHandler) |
2073 | HANDLER_INFO(ElementDeclHandler) |
2074 | HANDLER_INFO(AttlistDeclHandler) |
2075 | #if XML_COMBINED_VERSION >= 19504 |
2076 | HANDLER_INFO(SkippedEntityHandler) |
2077 | #endif |
2078 | |
2079 | #undef HANDLER_INFO |
2080 | |
2081 | {NULL, NULL, NULL} /* sentinel */ |
2082 | }; |
2083 | |