1#include "Python.h"
2#include <ctype.h>
3
4#include "structmember.h" // PyMemberDef
5#include "frameobject.h"
6#include "expat.h"
7
8#include "pyexpat.h"
9
10/* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12/*[clinic input]
13module pyexpat
14[clinic start generated code]*/
15/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
17#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
19static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
22enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
37 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
40 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
44#if XML_COMBINED_VERSION >= 19504
45 SkippedEntity,
46#endif
47 _DummyDecl
48};
49
50typedef struct {
51 PyTypeObject *xml_parse_type;
52 PyObject *error;
53} pyexpat_state;
54
55static inline pyexpat_state*
56pyexpat_get_state(PyObject *module)
57{
58 void *state = PyModule_GetState(module);
59 assert(state != NULL);
60 return (pyexpat_state *)state;
61}
62
63/* ----------------------------------------------------- */
64
65/* Declarations for objects of type xmlparser */
66
67typedef struct {
68 PyObject_HEAD
69
70 XML_Parser itself;
71 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
73 int in_callback; /* Is a callback active? */
74 int ns_prefixes; /* Namespace-triplets mode? */
75 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
79 PyObject *intern; /* Dictionary to intern strings */
80 PyObject **handlers;
81} xmlparseobject;
82
83#include "clinic/pyexpat.c.h"
84
85#define CHARACTER_DATA_BUFFER_SIZE 8192
86
87typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
88typedef void* xmlhandler;
89
90struct HandlerInfo {
91 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
94 PyGetSetDef getset;
95};
96
97static struct HandlerInfo handler_info[64];
98
99/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
103set_error_attr(PyObject *err, const char *name, int value)
104{
105 PyObject *v = PyLong_FromLong(value);
106
107 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 Py_XDECREF(v);
109 return 0;
110 }
111 Py_DECREF(v);
112 return 1;
113}
114
115/* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
118static PyObject *
119set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
120{
121 PyObject *err;
122 PyObject *buffer;
123 XML_Parser parser = self->itself;
124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
126
127 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 XML_ErrorString(code), lineno, column);
129 if (buffer == NULL)
130 return NULL;
131 err = PyObject_CallOneArg(state->error, buffer);
132 Py_DECREF(buffer);
133 if ( err != NULL
134 && set_error_attr(err, "code", code)
135 && set_error_attr(err, "offset", column)
136 && set_error_attr(err, "lineno", lineno)) {
137 PyErr_SetObject(state->error, err);
138 }
139 Py_XDECREF(err);
140 return NULL;
141}
142
143static int
144have_handler(xmlparseobject *self, int type)
145{
146 PyObject *handler = self->handlers[type];
147 return handler != NULL;
148}
149
150/* Convert a string of XML_Chars into a Unicode string.
151 Returns None if str is a null pointer. */
152
153static PyObject *
154conv_string_to_unicode(const XML_Char *str)
155{
156 /* XXX currently this code assumes that XML_Char is 8-bit,
157 and hence in UTF-8. */
158 /* UTF-8 from Expat, Unicode desired */
159 if (str == NULL) {
160 Py_RETURN_NONE;
161 }
162 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163}
164
165static PyObject *
166conv_string_len_to_unicode(const XML_Char *str, int len)
167{
168 /* XXX currently this code assumes that XML_Char is 8-bit,
169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
172 Py_RETURN_NONE;
173 }
174 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175}
176
177/* Callback routines */
178
179static void clear_handlers(xmlparseobject *self, int initial);
180
181/* This handler is used when an error has been detected, in the hope
182 that actual parsing can be terminated early. This will only help
183 if an external entity reference is encountered. */
184static int
185error_external_entity_ref_handler(XML_Parser parser,
186 const XML_Char *context,
187 const XML_Char *base,
188 const XML_Char *systemId,
189 const XML_Char *publicId)
190{
191 return 0;
192}
193
194/* Dummy character data handler used when an error (exception) has
195 been detected, and the actual parsing can be terminated early.
196 This is needed since character data handler can't be safely removed
197 from within the character data handler, but can be replaced. It is
198 used only from the character data handler trampoline, and must be
199 used right after `flag_error()` is called. */
200static void
201noop_character_data_handler(void *userData, const XML_Char *data, int len)
202{
203 /* Do nothing. */
204}
205
206static void
207flag_error(xmlparseobject *self)
208{
209 clear_handlers(self, 0);
210 XML_SetExternalEntityRefHandler(self->itself,
211 error_external_entity_ref_handler);
212}
213
214static PyObject*
215call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
216 xmlparseobject *self)
217{
218 PyObject *res;
219
220 res = PyObject_Call(func, args, NULL);
221 if (res == NULL) {
222 _PyTraceback_Add(funcname, __FILE__, lineno);
223 XML_StopParser(self->itself, XML_FALSE);
224 }
225 return res;
226}
227
228static PyObject*
229string_intern(xmlparseobject *self, const char* str)
230{
231 PyObject *result = conv_string_to_unicode(str);
232 PyObject *value;
233 /* result can be NULL if the unicode conversion failed. */
234 if (!result)
235 return result;
236 if (!self->intern)
237 return result;
238 value = PyDict_GetItemWithError(self->intern, result);
239 if (!value) {
240 if (!PyErr_Occurred() &&
241 PyDict_SetItem(self->intern, result, result) == 0)
242 {
243 return result;
244 }
245 else {
246 Py_DECREF(result);
247 return NULL;
248 }
249 }
250 Py_INCREF(value);
251 Py_DECREF(result);
252 return value;
253}
254
255/* Return 0 on success, -1 on exception.
256 * flag_error() will be called before return if needed.
257 */
258static int
259call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260{
261 PyObject *args;
262 PyObject *temp;
263
264 if (!have_handler(self, CharacterData))
265 return -1;
266
267 args = PyTuple_New(1);
268 if (args == NULL)
269 return -1;
270 temp = (conv_string_len_to_unicode(buffer, len));
271 if (temp == NULL) {
272 Py_DECREF(args);
273 flag_error(self);
274 XML_SetCharacterDataHandler(self->itself,
275 noop_character_data_handler);
276 return -1;
277 }
278 PyTuple_SET_ITEM(args, 0, temp);
279 /* temp is now a borrowed reference; consider it unused. */
280 self->in_callback = 1;
281 temp = call_with_frame("CharacterData", __LINE__,
282 self->handlers[CharacterData], args, self);
283 /* temp is an owned reference again, or NULL */
284 self->in_callback = 0;
285 Py_DECREF(args);
286 if (temp == NULL) {
287 flag_error(self);
288 XML_SetCharacterDataHandler(self->itself,
289 noop_character_data_handler);
290 return -1;
291 }
292 Py_DECREF(temp);
293 return 0;
294}
295
296static int
297flush_character_buffer(xmlparseobject *self)
298{
299 int rc;
300 if (self->buffer == NULL || self->buffer_used == 0)
301 return 0;
302 rc = call_character_handler(self, self->buffer, self->buffer_used);
303 self->buffer_used = 0;
304 return rc;
305}
306
307static void
308my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
309{
310 xmlparseobject *self = (xmlparseobject *) userData;
311
312 if (PyErr_Occurred())
313 return;
314
315 if (self->buffer == NULL)
316 call_character_handler(self, data, len);
317 else {
318 if ((self->buffer_used + len) > self->buffer_size) {
319 if (flush_character_buffer(self) < 0)
320 return;
321 /* handler might have changed; drop the rest on the floor
322 * if there isn't a handler anymore
323 */
324 if (!have_handler(self, CharacterData))
325 return;
326 }
327 if (len > self->buffer_size) {
328 call_character_handler(self, data, len);
329 self->buffer_used = 0;
330 }
331 else {
332 memcpy(self->buffer + self->buffer_used,
333 data, len * sizeof(XML_Char));
334 self->buffer_used += len;
335 }
336 }
337}
338
339static void
340my_StartElementHandler(void *userData,
341 const XML_Char *name, const XML_Char *atts[])
342{
343 xmlparseobject *self = (xmlparseobject *)userData;
344
345 if (have_handler(self, StartElement)) {
346 PyObject *container, *rv, *args;
347 int i, max;
348
349 if (PyErr_Occurred())
350 return;
351
352 if (flush_character_buffer(self) < 0)
353 return;
354 /* Set max to the number of slots filled in atts[]; max/2 is
355 * the number of attributes we need to process.
356 */
357 if (self->specified_attributes) {
358 max = XML_GetSpecifiedAttributeCount(self->itself);
359 }
360 else {
361 max = 0;
362 while (atts[max] != NULL)
363 max += 2;
364 }
365 /* Build the container. */
366 if (self->ordered_attributes)
367 container = PyList_New(max);
368 else
369 container = PyDict_New();
370 if (container == NULL) {
371 flag_error(self);
372 return;
373 }
374 for (i = 0; i < max; i += 2) {
375 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
376 PyObject *v;
377 if (n == NULL) {
378 flag_error(self);
379 Py_DECREF(container);
380 return;
381 }
382 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
383 if (v == NULL) {
384 flag_error(self);
385 Py_DECREF(container);
386 Py_DECREF(n);
387 return;
388 }
389 if (self->ordered_attributes) {
390 PyList_SET_ITEM(container, i, n);
391 PyList_SET_ITEM(container, i+1, v);
392 }
393 else if (PyDict_SetItem(container, n, v)) {
394 flag_error(self);
395 Py_DECREF(n);
396 Py_DECREF(v);
397 Py_DECREF(container);
398 return;
399 }
400 else {
401 Py_DECREF(n);
402 Py_DECREF(v);
403 }
404 }
405 args = string_intern(self, name);
406 if (args == NULL) {
407 Py_DECREF(container);
408 return;
409 }
410 args = Py_BuildValue("(NN)", args, container);
411 if (args == NULL) {
412 return;
413 }
414 /* Container is now a borrowed reference; ignore it. */
415 self->in_callback = 1;
416 rv = call_with_frame("StartElement", __LINE__,
417 self->handlers[StartElement], args, self);
418 self->in_callback = 0;
419 Py_DECREF(args);
420 if (rv == NULL) {
421 flag_error(self);
422 return;
423 }
424 Py_DECREF(rv);
425 }
426}
427
428#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 RETURN, GETUSERDATA) \
430static RC \
431my_##NAME##Handler PARAMS {\
432 xmlparseobject *self = GETUSERDATA ; \
433 PyObject *args = NULL; \
434 PyObject *rv = NULL; \
435 INIT \
436\
437 if (have_handler(self, NAME)) { \
438 if (PyErr_Occurred()) \
439 return RETURN; \
440 if (flush_character_buffer(self) < 0) \
441 return RETURN; \
442 args = Py_BuildValue PARAM_FORMAT ;\
443 if (!args) { flag_error(self); return RETURN;} \
444 self->in_callback = 1; \
445 rv = call_with_frame(#NAME,__LINE__, \
446 self->handlers[NAME], args, self); \
447 self->in_callback = 0; \
448 Py_DECREF(args); \
449 if (rv == NULL) { \
450 flag_error(self); \
451 return RETURN; \
452 } \
453 CONVERSION \
454 Py_DECREF(rv); \
455 } \
456 return RETURN; \
457}
458
459#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
460 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 (xmlparseobject *)userData)
462
463#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
464 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 rc = PyLong_AsLong(rv);, rc, \
466 (xmlparseobject *)userData)
467
468VOID_HANDLER(EndElement,
469 (void *userData, const XML_Char *name),
470 ("(N)", string_intern(self, name)))
471
472VOID_HANDLER(ProcessingInstruction,
473 (void *userData,
474 const XML_Char *target,
475 const XML_Char *data),
476 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
477
478VOID_HANDLER(UnparsedEntityDecl,
479 (void *userData,
480 const XML_Char *entityName,
481 const XML_Char *base,
482 const XML_Char *systemId,
483 const XML_Char *publicId,
484 const XML_Char *notationName),
485 ("(NNNNN)",
486 string_intern(self, entityName), string_intern(self, base),
487 string_intern(self, systemId), string_intern(self, publicId),
488 string_intern(self, notationName)))
489
490VOID_HANDLER(EntityDecl,
491 (void *userData,
492 const XML_Char *entityName,
493 int is_parameter_entity,
494 const XML_Char *value,
495 int value_length,
496 const XML_Char *base,
497 const XML_Char *systemId,
498 const XML_Char *publicId,
499 const XML_Char *notationName),
500 ("NiNNNNN",
501 string_intern(self, entityName), is_parameter_entity,
502 (conv_string_len_to_unicode(value, value_length)),
503 string_intern(self, base), string_intern(self, systemId),
504 string_intern(self, publicId),
505 string_intern(self, notationName)))
506
507VOID_HANDLER(XmlDecl,
508 (void *userData,
509 const XML_Char *version,
510 const XML_Char *encoding,
511 int standalone),
512 ("(O&O&i)",
513 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
514 standalone))
515
516static PyObject *
517conv_content_model(XML_Content * const model,
518 PyObject *(*conv_string)(const XML_Char *))
519{
520 PyObject *result = NULL;
521 PyObject *children = PyTuple_New(model->numchildren);
522 int i;
523
524 if (children != NULL) {
525 assert(model->numchildren < INT_MAX);
526 for (i = 0; i < (int)model->numchildren; ++i) {
527 PyObject *child = conv_content_model(&model->children[i],
528 conv_string);
529 if (child == NULL) {
530 Py_XDECREF(children);
531 return NULL;
532 }
533 PyTuple_SET_ITEM(children, i, child);
534 }
535 result = Py_BuildValue("(iiO&N)",
536 model->type, model->quant,
537 conv_string,model->name, children);
538 }
539 return result;
540}
541
542static void
543my_ElementDeclHandler(void *userData,
544 const XML_Char *name,
545 XML_Content *model)
546{
547 xmlparseobject *self = (xmlparseobject *)userData;
548 PyObject *args = NULL;
549
550 if (have_handler(self, ElementDecl)) {
551 PyObject *rv = NULL;
552 PyObject *modelobj, *nameobj;
553
554 if (PyErr_Occurred())
555 return;
556
557 if (flush_character_buffer(self) < 0)
558 goto finally;
559 modelobj = conv_content_model(model, (conv_string_to_unicode));
560 if (modelobj == NULL) {
561 flag_error(self);
562 goto finally;
563 }
564 nameobj = string_intern(self, name);
565 if (nameobj == NULL) {
566 Py_DECREF(modelobj);
567 flag_error(self);
568 goto finally;
569 }
570 args = Py_BuildValue("NN", nameobj, modelobj);
571 if (args == NULL) {
572 flag_error(self);
573 goto finally;
574 }
575 self->in_callback = 1;
576 rv = call_with_frame("ElementDecl", __LINE__,
577 self->handlers[ElementDecl], args, self);
578 self->in_callback = 0;
579 if (rv == NULL) {
580 flag_error(self);
581 goto finally;
582 }
583 Py_DECREF(rv);
584 }
585 finally:
586 Py_XDECREF(args);
587 XML_FreeContentModel(self->itself, model);
588 return;
589}
590
591VOID_HANDLER(AttlistDecl,
592 (void *userData,
593 const XML_Char *elname,
594 const XML_Char *attname,
595 const XML_Char *att_type,
596 const XML_Char *dflt,
597 int isrequired),
598 ("(NNO&O&i)",
599 string_intern(self, elname), string_intern(self, attname),
600 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
601 isrequired))
602
603#if XML_COMBINED_VERSION >= 19504
604VOID_HANDLER(SkippedEntity,
605 (void *userData,
606 const XML_Char *entityName,
607 int is_parameter_entity),
608 ("Ni",
609 string_intern(self, entityName), is_parameter_entity))
610#endif
611
612VOID_HANDLER(NotationDecl,
613 (void *userData,
614 const XML_Char *notationName,
615 const XML_Char *base,
616 const XML_Char *systemId,
617 const XML_Char *publicId),
618 ("(NNNN)",
619 string_intern(self, notationName), string_intern(self, base),
620 string_intern(self, systemId), string_intern(self, publicId)))
621
622VOID_HANDLER(StartNamespaceDecl,
623 (void *userData,
624 const XML_Char *prefix,
625 const XML_Char *uri),
626 ("(NN)",
627 string_intern(self, prefix), string_intern(self, uri)))
628
629VOID_HANDLER(EndNamespaceDecl,
630 (void *userData,
631 const XML_Char *prefix),
632 ("(N)", string_intern(self, prefix)))
633
634VOID_HANDLER(Comment,
635 (void *userData, const XML_Char *data),
636 ("(O&)", conv_string_to_unicode ,data))
637
638VOID_HANDLER(StartCdataSection,
639 (void *userData),
640 ("()"))
641
642VOID_HANDLER(EndCdataSection,
643 (void *userData),
644 ("()"))
645
646VOID_HANDLER(Default,
647 (void *userData, const XML_Char *s, int len),
648 ("(N)", (conv_string_len_to_unicode(s,len))))
649
650VOID_HANDLER(DefaultHandlerExpand,
651 (void *userData, const XML_Char *s, int len),
652 ("(N)", (conv_string_len_to_unicode(s,len))))
653#define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
654
655INT_HANDLER(NotStandalone,
656 (void *userData),
657 ("()"))
658
659RC_HANDLER(int, ExternalEntityRef,
660 (XML_Parser parser,
661 const XML_Char *context,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId),
665 int rc=0;,
666 ("(O&NNN)",
667 conv_string_to_unicode ,context, string_intern(self, base),
668 string_intern(self, systemId), string_intern(self, publicId)),
669 rc = PyLong_AsLong(rv);, rc,
670 XML_GetUserData(parser))
671
672/* XXX UnknownEncodingHandler */
673
674VOID_HANDLER(StartDoctypeDecl,
675 (void *userData, const XML_Char *doctypeName,
676 const XML_Char *sysid, const XML_Char *pubid,
677 int has_internal_subset),
678 ("(NNNi)", string_intern(self, doctypeName),
679 string_intern(self, sysid), string_intern(self, pubid),
680 has_internal_subset))
681
682VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
683
684/* ---------------------------------------------------------------- */
685/*[clinic input]
686class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687[clinic start generated code]*/
688/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689
690
691static PyObject *
692get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
693{
694 if (PyErr_Occurred()) {
695 return NULL;
696 }
697 if (rv == 0) {
698 return set_error(state, self, XML_GetErrorCode(self->itself));
699 }
700 if (flush_character_buffer(self) < 0) {
701 return NULL;
702 }
703 return PyLong_FromLong(rv);
704}
705
706#define MAX_CHUNK_SIZE (1 << 20)
707
708/*[clinic input]
709pyexpat.xmlparser.Parse
710
711 cls: defining_class
712 data: object
713 isfinal: bool(accept={int}) = False
714 /
715
716Parse XML data.
717
718`isfinal' should be true at end of input.
719[clinic start generated code]*/
720
721static PyObject *
722pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 PyObject *data, int isfinal)
724/*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
725{
726 const char *s;
727 Py_ssize_t slen;
728 Py_buffer view;
729 int rc;
730 pyexpat_state *state = PyType_GetModuleState(cls);
731
732 if (PyUnicode_Check(data)) {
733 view.buf = NULL;
734 s = PyUnicode_AsUTF8AndSize(data, &slen);
735 if (s == NULL)
736 return NULL;
737 /* Explicitly set UTF-8 encoding. Return code ignored. */
738 (void)XML_SetEncoding(self->itself, "utf-8");
739 }
740 else {
741 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 return NULL;
743 s = view.buf;
744 slen = view.len;
745 }
746
747 while (slen > MAX_CHUNK_SIZE) {
748 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
749 if (!rc)
750 goto done;
751 s += MAX_CHUNK_SIZE;
752 slen -= MAX_CHUNK_SIZE;
753 }
754 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
755 assert(slen <= INT_MAX);
756 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
757
758done:
759 if (view.buf != NULL) {
760 PyBuffer_Release(&view);
761 }
762 return get_parse_result(state, self, rc);
763}
764
765/* File reading copied from cPickle */
766
767#define BUF_SIZE 2048
768
769static int
770readinst(char *buf, int buf_size, PyObject *meth)
771{
772 PyObject *str;
773 Py_ssize_t len;
774 const char *ptr;
775
776 str = PyObject_CallFunction(meth, "n", buf_size);
777 if (str == NULL)
778 goto error;
779
780 if (PyBytes_Check(str))
781 ptr = PyBytes_AS_STRING(str);
782 else if (PyByteArray_Check(str))
783 ptr = PyByteArray_AS_STRING(str);
784 else {
785 PyErr_Format(PyExc_TypeError,
786 "read() did not return a bytes object (type=%.400s)",
787 Py_TYPE(str)->tp_name);
788 goto error;
789 }
790 len = Py_SIZE(str);
791 if (len > buf_size) {
792 PyErr_Format(PyExc_ValueError,
793 "read() returned too much data: "
794 "%i bytes requested, %zd returned",
795 buf_size, len);
796 goto error;
797 }
798 memcpy(buf, ptr, len);
799 Py_DECREF(str);
800 /* len <= buf_size <= INT_MAX */
801 return (int)len;
802
803error:
804 Py_XDECREF(str);
805 return -1;
806}
807
808/*[clinic input]
809pyexpat.xmlparser.ParseFile
810
811 cls: defining_class
812 file: object
813 /
814
815Parse XML data from file-like object.
816[clinic start generated code]*/
817
818static PyObject *
819pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
820 PyObject *file)
821/*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
822{
823 int rv = 1;
824 PyObject *readmethod = NULL;
825 _Py_IDENTIFIER(read);
826
827 pyexpat_state *state = PyType_GetModuleState(cls);
828
829 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
830 return NULL;
831 }
832 if (readmethod == NULL) {
833 PyErr_SetString(PyExc_TypeError,
834 "argument must have 'read' attribute");
835 return NULL;
836 }
837 for (;;) {
838 int bytes_read;
839 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
840 if (buf == NULL) {
841 Py_XDECREF(readmethod);
842 return get_parse_result(state, self, 0);
843 }
844
845 bytes_read = readinst(buf, BUF_SIZE, readmethod);
846 if (bytes_read < 0) {
847 Py_DECREF(readmethod);
848 return NULL;
849 }
850 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
851 if (PyErr_Occurred()) {
852 Py_XDECREF(readmethod);
853 return NULL;
854 }
855
856 if (!rv || bytes_read == 0)
857 break;
858 }
859 Py_XDECREF(readmethod);
860 return get_parse_result(state, self, rv);
861}
862
863/*[clinic input]
864pyexpat.xmlparser.SetBase
865
866 base: str
867 /
868
869Set the base URL for the parser.
870[clinic start generated code]*/
871
872static PyObject *
873pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
874/*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
875{
876 if (!XML_SetBase(self->itself, base)) {
877 return PyErr_NoMemory();
878 }
879 Py_RETURN_NONE;
880}
881
882/*[clinic input]
883pyexpat.xmlparser.GetBase
884
885Return base URL string for the parser.
886[clinic start generated code]*/
887
888static PyObject *
889pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
890/*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
891{
892 return Py_BuildValue("z", XML_GetBase(self->itself));
893}
894
895/*[clinic input]
896pyexpat.xmlparser.GetInputContext
897
898Return the untranslated text of the input that caused the current event.
899
900If the event was generated by a large amount of text (such as a start tag
901for an element with many attributes), not all of the text may be available.
902[clinic start generated code]*/
903
904static PyObject *
905pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
906/*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
907{
908 if (self->in_callback) {
909 int offset, size;
910 const char *buffer
911 = XML_GetInputContext(self->itself, &offset, &size);
912
913 if (buffer != NULL)
914 return PyBytes_FromStringAndSize(buffer + offset,
915 size - offset);
916 else
917 Py_RETURN_NONE;
918 }
919 else
920 Py_RETURN_NONE;
921}
922
923/*[clinic input]
924pyexpat.xmlparser.ExternalEntityParserCreate
925
926 cls: defining_class
927 context: str(accept={str, NoneType})
928 encoding: str = NULL
929 /
930
931Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
932[clinic start generated code]*/
933
934static PyObject *
935pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
936 PyTypeObject *cls,
937 const char *context,
938 const char *encoding)
939/*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
940{
941 xmlparseobject *new_parser;
942 int i;
943
944 pyexpat_state *state = PyType_GetModuleState(cls);
945
946 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
947 if (new_parser == NULL) {
948 return NULL;
949 }
950
951 new_parser->buffer_size = self->buffer_size;
952 new_parser->buffer_used = 0;
953 new_parser->buffer = NULL;
954 new_parser->ordered_attributes = self->ordered_attributes;
955 new_parser->specified_attributes = self->specified_attributes;
956 new_parser->in_callback = 0;
957 new_parser->ns_prefixes = self->ns_prefixes;
958 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
959 encoding);
960 new_parser->handlers = 0;
961 new_parser->intern = self->intern;
962 Py_XINCREF(new_parser->intern);
963
964 if (self->buffer != NULL) {
965 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
966 if (new_parser->buffer == NULL) {
967 Py_DECREF(new_parser);
968 return PyErr_NoMemory();
969 }
970 }
971 if (!new_parser->itself) {
972 Py_DECREF(new_parser);
973 return PyErr_NoMemory();
974 }
975
976 XML_SetUserData(new_parser->itself, (void *)new_parser);
977
978 /* allocate and clear handlers first */
979 for (i = 0; handler_info[i].name != NULL; i++)
980 /* do nothing */;
981
982 new_parser->handlers = PyMem_New(PyObject *, i);
983 if (!new_parser->handlers) {
984 Py_DECREF(new_parser);
985 return PyErr_NoMemory();
986 }
987 clear_handlers(new_parser, 1);
988
989 /* then copy handlers from self */
990 for (i = 0; handler_info[i].name != NULL; i++) {
991 PyObject *handler = self->handlers[i];
992 if (handler != NULL) {
993 Py_INCREF(handler);
994 new_parser->handlers[i] = handler;
995 handler_info[i].setter(new_parser->itself,
996 handler_info[i].handler);
997 }
998 }
999
1000 PyObject_GC_Track(new_parser);
1001 return (PyObject *)new_parser;
1002}
1003
1004/*[clinic input]
1005pyexpat.xmlparser.SetParamEntityParsing
1006
1007 flag: int
1008 /
1009
1010Controls parsing of parameter entities (including the external DTD subset).
1011
1012Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1013XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1014XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1015was successful.
1016[clinic start generated code]*/
1017
1018static PyObject *
1019pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1020/*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1021{
1022 flag = XML_SetParamEntityParsing(self->itself, flag);
1023 return PyLong_FromLong(flag);
1024}
1025
1026
1027#if XML_COMBINED_VERSION >= 19505
1028/*[clinic input]
1029pyexpat.xmlparser.UseForeignDTD
1030
1031 cls: defining_class
1032 flag: bool = True
1033 /
1034
1035Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1036
1037This readily allows the use of a 'default' document type controlled by the
1038application, while still getting the advantage of providing document type
1039information to the parser. 'flag' defaults to True if not provided.
1040[clinic start generated code]*/
1041
1042static PyObject *
1043pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1044 int flag)
1045/*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1046{
1047 pyexpat_state *state = PyType_GetModuleState(cls);
1048 enum XML_Error rc;
1049
1050 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1051 if (rc != XML_ERROR_NONE) {
1052 return set_error(state, self, rc);
1053 }
1054 Py_RETURN_NONE;
1055}
1056#endif
1057
1058static struct PyMethodDef xmlparse_methods[] = {
1059 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1060 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1061 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1062 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1063 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1064 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1065 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1066#if XML_COMBINED_VERSION >= 19505
1067 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1068#endif
1069 {NULL, NULL} /* sentinel */
1070};
1071
1072/* ---------- */
1073
1074
1075
1076/* pyexpat international encoding support.
1077 Make it as simple as possible.
1078*/
1079
1080static int
1081PyUnknownEncodingHandler(void *encodingHandlerData,
1082 const XML_Char *name,
1083 XML_Encoding *info)
1084{
1085 static unsigned char template_buffer[256] = {0};
1086 PyObject* u;
1087 int i;
1088 const void *data;
1089 unsigned int kind;
1090
1091 if (PyErr_Occurred())
1092 return XML_STATUS_ERROR;
1093
1094 if (template_buffer[1] == 0) {
1095 for (i = 0; i < 256; i++)
1096 template_buffer[i] = i;
1097 }
1098
1099 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1100 if (u == NULL || PyUnicode_READY(u)) {
1101 Py_XDECREF(u);
1102 return XML_STATUS_ERROR;
1103 }
1104
1105 if (PyUnicode_GET_LENGTH(u) != 256) {
1106 Py_DECREF(u);
1107 PyErr_SetString(PyExc_ValueError,
1108 "multi-byte encodings are not supported");
1109 return XML_STATUS_ERROR;
1110 }
1111
1112 kind = PyUnicode_KIND(u);
1113 data = PyUnicode_DATA(u);
1114 for (i = 0; i < 256; i++) {
1115 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1116 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1117 info->map[i] = ch;
1118 else
1119 info->map[i] = -1;
1120 }
1121
1122 info->data = NULL;
1123 info->convert = NULL;
1124 info->release = NULL;
1125 Py_DECREF(u);
1126
1127 return XML_STATUS_OK;
1128}
1129
1130
1131static PyObject *
1132newxmlparseobject(pyexpat_state *state, const char *encoding,
1133 const char *namespace_separator, PyObject *intern)
1134{
1135 int i;
1136 xmlparseobject *self;
1137
1138 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1139 if (self == NULL)
1140 return NULL;
1141
1142 self->buffer = NULL;
1143 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1144 self->buffer_used = 0;
1145 self->ordered_attributes = 0;
1146 self->specified_attributes = 0;
1147 self->in_callback = 0;
1148 self->ns_prefixes = 0;
1149 self->handlers = NULL;
1150 self->intern = intern;
1151 Py_XINCREF(self->intern);
1152
1153 /* namespace_separator is either NULL or contains one char + \0 */
1154 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1155 namespace_separator);
1156 if (self->itself == NULL) {
1157 PyErr_SetString(PyExc_RuntimeError,
1158 "XML_ParserCreate failed");
1159 Py_DECREF(self);
1160 return NULL;
1161 }
1162#if XML_COMBINED_VERSION >= 20100
1163 /* This feature was added upstream in libexpat 2.1.0. */
1164 XML_SetHashSalt(self->itself,
1165 (unsigned long)_Py_HashSecret.expat.hashsalt);
1166#endif
1167 XML_SetUserData(self->itself, (void *)self);
1168 XML_SetUnknownEncodingHandler(self->itself,
1169 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1170
1171 for (i = 0; handler_info[i].name != NULL; i++)
1172 /* do nothing */;
1173
1174 self->handlers = PyMem_New(PyObject *, i);
1175 if (!self->handlers) {
1176 Py_DECREF(self);
1177 return PyErr_NoMemory();
1178 }
1179 clear_handlers(self, 1);
1180
1181 PyObject_GC_Track(self);
1182 return (PyObject*)self;
1183}
1184
1185static int
1186xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1187{
1188 for (int i = 0; handler_info[i].name != NULL; i++) {
1189 Py_VISIT(op->handlers[i]);
1190 }
1191 Py_VISIT(Py_TYPE(op));
1192 return 0;
1193}
1194
1195static int
1196xmlparse_clear(xmlparseobject *op)
1197{
1198 clear_handlers(op, 0);
1199 Py_CLEAR(op->intern);
1200 return 0;
1201}
1202
1203static void
1204xmlparse_dealloc(xmlparseobject *self)
1205{
1206 PyObject_GC_UnTrack(self);
1207 (void)xmlparse_clear(self);
1208 if (self->itself != NULL)
1209 XML_ParserFree(self->itself);
1210 self->itself = NULL;
1211
1212 if (self->handlers != NULL) {
1213 PyMem_Free(self->handlers);
1214 self->handlers = NULL;
1215 }
1216 if (self->buffer != NULL) {
1217 PyMem_Free(self->buffer);
1218 self->buffer = NULL;
1219 }
1220 PyTypeObject *tp = Py_TYPE(self);
1221 PyObject_GC_Del(self);
1222 Py_DECREF(tp);
1223}
1224
1225
1226static PyObject *
1227xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1228{
1229 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1230 int handlernum = (int)(hi - handler_info);
1231 PyObject *result = self->handlers[handlernum];
1232 if (result == NULL)
1233 result = Py_None;
1234 Py_INCREF(result);
1235 return result;
1236}
1237
1238static int
1239xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1240{
1241 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1242 int handlernum = (int)(hi - handler_info);
1243 if (v == NULL) {
1244 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1245 return -1;
1246 }
1247 if (handlernum == CharacterData) {
1248 /* If we're changing the character data handler, flush all
1249 * cached data with the old handler. Not sure there's a
1250 * "right" thing to do, though, but this probably won't
1251 * happen.
1252 */
1253 if (flush_character_buffer(self) < 0)
1254 return -1;
1255 }
1256
1257 xmlhandler c_handler = NULL;
1258 if (v == Py_None) {
1259 /* If this is the character data handler, and a character
1260 data handler is already active, we need to be more
1261 careful. What we can safely do is replace the existing
1262 character data handler callback function with a no-op
1263 function that will refuse to call Python. The downside
1264 is that this doesn't completely remove the character
1265 data handler from the C layer if there's any callback
1266 active, so Expat does a little more work than it
1267 otherwise would, but that's really an odd case. A more
1268 elaborate system of handlers and state could remove the
1269 C handler more effectively. */
1270 if (handlernum == CharacterData && self->in_callback)
1271 c_handler = noop_character_data_handler;
1272 v = NULL;
1273 }
1274 else if (v != NULL) {
1275 Py_INCREF(v);
1276 c_handler = handler_info[handlernum].handler;
1277 }
1278 Py_XSETREF(self->handlers[handlernum], v);
1279 handler_info[handlernum].setter(self->itself, c_handler);
1280 return 0;
1281}
1282
1283#define INT_GETTER(name) \
1284 static PyObject * \
1285 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1286 { \
1287 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1288 }
1289INT_GETTER(ErrorCode)
1290INT_GETTER(ErrorLineNumber)
1291INT_GETTER(ErrorColumnNumber)
1292INT_GETTER(ErrorByteIndex)
1293INT_GETTER(CurrentLineNumber)
1294INT_GETTER(CurrentColumnNumber)
1295INT_GETTER(CurrentByteIndex)
1296
1297#undef INT_GETTER
1298
1299static PyObject *
1300xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1301{
1302 return PyBool_FromLong(self->buffer != NULL);
1303}
1304
1305static int
1306xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1307{
1308 if (v == NULL) {
1309 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1310 return -1;
1311 }
1312 int b = PyObject_IsTrue(v);
1313 if (b < 0)
1314 return -1;
1315 if (b) {
1316 if (self->buffer == NULL) {
1317 self->buffer = PyMem_Malloc(self->buffer_size);
1318 if (self->buffer == NULL) {
1319 PyErr_NoMemory();
1320 return -1;
1321 }
1322 self->buffer_used = 0;
1323 }
1324 }
1325 else if (self->buffer != NULL) {
1326 if (flush_character_buffer(self) < 0)
1327 return -1;
1328 PyMem_Free(self->buffer);
1329 self->buffer = NULL;
1330 }
1331 return 0;
1332}
1333
1334static PyObject *
1335xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1336{
1337 return PyLong_FromLong((long) self->buffer_size);
1338}
1339
1340static int
1341xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1342{
1343 if (v == NULL) {
1344 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1345 return -1;
1346 }
1347 long new_buffer_size;
1348 if (!PyLong_Check(v)) {
1349 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1350 return -1;
1351 }
1352
1353 new_buffer_size = PyLong_AsLong(v);
1354 if (new_buffer_size <= 0) {
1355 if (!PyErr_Occurred())
1356 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1357 return -1;
1358 }
1359
1360 /* trivial case -- no change */
1361 if (new_buffer_size == self->buffer_size) {
1362 return 0;
1363 }
1364
1365 /* check maximum */
1366 if (new_buffer_size > INT_MAX) {
1367 char errmsg[100];
1368 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1369 PyErr_SetString(PyExc_ValueError, errmsg);
1370 return -1;
1371 }
1372
1373 if (self->buffer != NULL) {
1374 /* there is already a buffer */
1375 if (self->buffer_used != 0) {
1376 if (flush_character_buffer(self) < 0) {
1377 return -1;
1378 }
1379 }
1380 /* free existing buffer */
1381 PyMem_Free(self->buffer);
1382 }
1383 self->buffer = PyMem_Malloc(new_buffer_size);
1384 if (self->buffer == NULL) {
1385 PyErr_NoMemory();
1386 return -1;
1387 }
1388 self->buffer_size = new_buffer_size;
1389 return 0;
1390}
1391
1392static PyObject *
1393xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1394{
1395 return PyLong_FromLong((long) self->buffer_used);
1396}
1397
1398static PyObject *
1399xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1400{
1401 return PyBool_FromLong(self->ns_prefixes);
1402}
1403
1404static int
1405xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1406{
1407 if (v == NULL) {
1408 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1409 return -1;
1410 }
1411 int b = PyObject_IsTrue(v);
1412 if (b < 0)
1413 return -1;
1414 self->ns_prefixes = b;
1415 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1416 return 0;
1417}
1418
1419static PyObject *
1420xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1421{
1422 return PyBool_FromLong(self->ordered_attributes);
1423}
1424
1425static int
1426xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1427{
1428 if (v == NULL) {
1429 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1430 return -1;
1431 }
1432 int b = PyObject_IsTrue(v);
1433 if (b < 0)
1434 return -1;
1435 self->ordered_attributes = b;
1436 return 0;
1437}
1438
1439static PyObject *
1440xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1441{
1442 return PyBool_FromLong((long) self->specified_attributes);
1443}
1444
1445static int
1446xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1447{
1448 if (v == NULL) {
1449 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1450 return -1;
1451 }
1452 int b = PyObject_IsTrue(v);
1453 if (b < 0)
1454 return -1;
1455 self->specified_attributes = b;
1456 return 0;
1457}
1458
1459static PyMemberDef xmlparse_members[] = {
1460 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1461 {NULL}
1462};
1463
1464#define XMLPARSE_GETTER_DEF(name) \
1465 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1466#define XMLPARSE_GETTER_SETTER_DEF(name) \
1467 {#name, (getter)xmlparse_##name##_getter, \
1468 (setter)xmlparse_##name##_setter, NULL},
1469
1470static PyGetSetDef xmlparse_getsetlist[] = {
1471 XMLPARSE_GETTER_DEF(ErrorCode)
1472 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1473 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1474 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1475 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1476 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1477 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1478 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1479 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1480 XMLPARSE_GETTER_DEF(buffer_used)
1481 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1482 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1483 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1484 {NULL},
1485};
1486
1487#undef XMLPARSE_GETTER_DEF
1488#undef XMLPARSE_GETTER_SETTER_DEF
1489
1490PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1491
1492static PyType_Slot _xml_parse_type_spec_slots[] = {
1493 {Py_tp_dealloc, xmlparse_dealloc},
1494 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1495 {Py_tp_traverse, xmlparse_traverse},
1496 {Py_tp_clear, xmlparse_clear},
1497 {Py_tp_methods, xmlparse_methods},
1498 {Py_tp_members, xmlparse_members},
1499 {Py_tp_getset, xmlparse_getsetlist},
1500 {0, 0}
1501};
1502
1503static PyType_Spec _xml_parse_type_spec = {
1504 .name = "pyexpat.xmlparser",
1505 .basicsize = sizeof(xmlparseobject),
1506 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1507 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1508 .slots = _xml_parse_type_spec_slots,
1509};
1510
1511/* End of code for xmlparser objects */
1512/* -------------------------------------------------------- */
1513
1514/*[clinic input]
1515pyexpat.ParserCreate
1516
1517 encoding: str(accept={str, NoneType}) = None
1518 namespace_separator: str(accept={str, NoneType}) = None
1519 intern: object = NULL
1520
1521Return a new XML parser object.
1522[clinic start generated code]*/
1523
1524static PyObject *
1525pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1526 const char *namespace_separator, PyObject *intern)
1527/*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1528{
1529 PyObject *result;
1530 int intern_decref = 0;
1531
1532 if (namespace_separator != NULL
1533 && strlen(namespace_separator) > 1) {
1534 PyErr_SetString(PyExc_ValueError,
1535 "namespace_separator must be at most one"
1536 " character, omitted, or None");
1537 return NULL;
1538 }
1539 /* Explicitly passing None means no interning is desired.
1540 Not passing anything means that a new dictionary is used. */
1541 if (intern == Py_None)
1542 intern = NULL;
1543 else if (intern == NULL) {
1544 intern = PyDict_New();
1545 if (!intern)
1546 return NULL;
1547 intern_decref = 1;
1548 }
1549 else if (!PyDict_Check(intern)) {
1550 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1551 return NULL;
1552 }
1553
1554 pyexpat_state *state = pyexpat_get_state(module);
1555 result = newxmlparseobject(state, encoding, namespace_separator, intern);
1556 if (intern_decref) {
1557 Py_DECREF(intern);
1558 }
1559 return result;
1560}
1561
1562/*[clinic input]
1563pyexpat.ErrorString
1564
1565 code: long
1566 /
1567
1568Returns string error for given number.
1569[clinic start generated code]*/
1570
1571static PyObject *
1572pyexpat_ErrorString_impl(PyObject *module, long code)
1573/*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1574{
1575 return Py_BuildValue("z", XML_ErrorString((int)code));
1576}
1577
1578/* List of methods defined in the module */
1579
1580static struct PyMethodDef pyexpat_methods[] = {
1581 PYEXPAT_PARSERCREATE_METHODDEF
1582 PYEXPAT_ERRORSTRING_METHODDEF
1583 {NULL, NULL} /* sentinel */
1584};
1585
1586/* Module docstring */
1587
1588PyDoc_STRVAR(pyexpat_module_documentation,
1589"Python wrapper for Expat parser.");
1590
1591/* Initialization function for the module */
1592
1593#ifndef MODULE_NAME
1594#define MODULE_NAME "pyexpat"
1595#endif
1596
1597static int init_handler_descrs(pyexpat_state *state)
1598{
1599 int i;
1600 assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
1601 for (i = 0; handler_info[i].name != NULL; i++) {
1602 struct HandlerInfo *hi = &handler_info[i];
1603 hi->getset.name = hi->name;
1604 hi->getset.get = (getter)xmlparse_handler_getter;
1605 hi->getset.set = (setter)xmlparse_handler_setter;
1606 hi->getset.closure = &handler_info[i];
1607
1608 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1609 if (descr == NULL)
1610 return -1;
1611
1612 if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
1613 Py_DECREF(descr);
1614 return -1;
1615 }
1616 Py_DECREF(descr);
1617 }
1618 return 0;
1619}
1620
1621static PyObject *
1622add_submodule(PyObject *mod, const char *fullname)
1623{
1624 const char *name = strrchr(fullname, '.') + 1;
1625
1626 PyObject *submodule = PyModule_New(fullname);
1627 if (submodule == NULL) {
1628 return NULL;
1629 }
1630
1631 PyObject *mod_name = PyUnicode_FromString(fullname);
1632 if (mod_name == NULL) {
1633 Py_DECREF(submodule);
1634 return NULL;
1635 }
1636
1637 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1638 Py_DECREF(submodule);
1639 Py_DECREF(mod_name);
1640 return NULL;
1641 }
1642 Py_DECREF(mod_name);
1643
1644 /* gives away the reference to the submodule */
1645 if (PyModule_AddObject(mod, name, submodule) < 0) {
1646 Py_DECREF(submodule);
1647 return NULL;
1648 }
1649
1650 return submodule;
1651}
1652
1653static int
1654add_error(PyObject *errors_module, PyObject *codes_dict,
1655 PyObject *rev_codes_dict, const char *name, int value)
1656{
1657 const char *error_string = XML_ErrorString(value);
1658 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1659 return -1;
1660 }
1661
1662 PyObject *num = PyLong_FromLong(value);
1663 if (num == NULL) {
1664 return -1;
1665 }
1666
1667 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1668 Py_DECREF(num);
1669 return -1;
1670 }
1671
1672 PyObject *str = PyUnicode_FromString(error_string);
1673 if (str == NULL) {
1674 Py_DECREF(num);
1675 return -1;
1676 }
1677
1678 int res = PyDict_SetItem(rev_codes_dict, num, str);
1679 Py_DECREF(str);
1680 Py_DECREF(num);
1681 if (res < 0) {
1682 return -1;
1683 }
1684
1685 return 0;
1686}
1687
1688static int
1689add_errors_module(PyObject *mod)
1690{
1691 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1692 if (errors_module == NULL) {
1693 return -1;
1694 }
1695
1696 PyObject *codes_dict = PyDict_New();
1697 PyObject *rev_codes_dict = PyDict_New();
1698 if (codes_dict == NULL || rev_codes_dict == NULL) {
1699 goto error;
1700 }
1701
1702#define ADD_CONST(name) do { \
1703 if (add_error(errors_module, codes_dict, rev_codes_dict, \
1704 #name, name) < 0) { \
1705 goto error; \
1706 } \
1707 } while(0)
1708
1709 ADD_CONST(XML_ERROR_NO_MEMORY);
1710 ADD_CONST(XML_ERROR_SYNTAX);
1711 ADD_CONST(XML_ERROR_NO_ELEMENTS);
1712 ADD_CONST(XML_ERROR_INVALID_TOKEN);
1713 ADD_CONST(XML_ERROR_UNCLOSED_TOKEN);
1714 ADD_CONST(XML_ERROR_PARTIAL_CHAR);
1715 ADD_CONST(XML_ERROR_TAG_MISMATCH);
1716 ADD_CONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1717 ADD_CONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1718 ADD_CONST(XML_ERROR_PARAM_ENTITY_REF);
1719 ADD_CONST(XML_ERROR_UNDEFINED_ENTITY);
1720 ADD_CONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1721 ADD_CONST(XML_ERROR_ASYNC_ENTITY);
1722 ADD_CONST(XML_ERROR_BAD_CHAR_REF);
1723 ADD_CONST(XML_ERROR_BINARY_ENTITY_REF);
1724 ADD_CONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1725 ADD_CONST(XML_ERROR_MISPLACED_XML_PI);
1726 ADD_CONST(XML_ERROR_UNKNOWN_ENCODING);
1727 ADD_CONST(XML_ERROR_INCORRECT_ENCODING);
1728 ADD_CONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1729 ADD_CONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1730 ADD_CONST(XML_ERROR_NOT_STANDALONE);
1731 ADD_CONST(XML_ERROR_UNEXPECTED_STATE);
1732 ADD_CONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1733 ADD_CONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1734 ADD_CONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1735 /* Added in Expat 1.95.7. */
1736 ADD_CONST(XML_ERROR_UNBOUND_PREFIX);
1737 /* Added in Expat 1.95.8. */
1738 ADD_CONST(XML_ERROR_UNDECLARING_PREFIX);
1739 ADD_CONST(XML_ERROR_INCOMPLETE_PE);
1740 ADD_CONST(XML_ERROR_XML_DECL);
1741 ADD_CONST(XML_ERROR_TEXT_DECL);
1742 ADD_CONST(XML_ERROR_PUBLICID);
1743 ADD_CONST(XML_ERROR_SUSPENDED);
1744 ADD_CONST(XML_ERROR_NOT_SUSPENDED);
1745 ADD_CONST(XML_ERROR_ABORTED);
1746 ADD_CONST(XML_ERROR_FINISHED);
1747 ADD_CONST(XML_ERROR_SUSPEND_PE);
1748#undef ADD_CONST
1749
1750 if (PyModule_AddStringConstant(errors_module, "__doc__",
1751 "Constants used to describe "
1752 "error conditions.") < 0) {
1753 goto error;
1754 }
1755
1756 Py_INCREF(codes_dict);
1757 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1758 Py_DECREF(codes_dict);
1759 goto error;
1760 }
1761 Py_CLEAR(codes_dict);
1762
1763 Py_INCREF(rev_codes_dict);
1764 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1765 Py_DECREF(rev_codes_dict);
1766 goto error;
1767 }
1768 Py_CLEAR(rev_codes_dict);
1769
1770 return 0;
1771
1772error:
1773 Py_XDECREF(codes_dict);
1774 Py_XDECREF(rev_codes_dict);
1775 return -1;
1776}
1777
1778static int
1779add_model_module(PyObject *mod)
1780{
1781 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1782 if (model_module == NULL) {
1783 return -1;
1784 }
1785
1786#define MYCONST(c) do { \
1787 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1788 return -1; \
1789 } \
1790 } while(0)
1791
1792 if (PyModule_AddStringConstant(
1793 model_module, "__doc__",
1794 "Constants used to interpret content model information.") < 0) {
1795 return -1;
1796 }
1797
1798 MYCONST(XML_CTYPE_EMPTY);
1799 MYCONST(XML_CTYPE_ANY);
1800 MYCONST(XML_CTYPE_MIXED);
1801 MYCONST(XML_CTYPE_NAME);
1802 MYCONST(XML_CTYPE_CHOICE);
1803 MYCONST(XML_CTYPE_SEQ);
1804
1805 MYCONST(XML_CQUANT_NONE);
1806 MYCONST(XML_CQUANT_OPT);
1807 MYCONST(XML_CQUANT_REP);
1808 MYCONST(XML_CQUANT_PLUS);
1809#undef MYCONST
1810 return 0;
1811}
1812
1813#if XML_COMBINED_VERSION > 19505
1814static int
1815add_features(PyObject *mod)
1816{
1817 PyObject *list = PyList_New(0);
1818 if (list == NULL) {
1819 return -1;
1820 }
1821
1822 const XML_Feature *features = XML_GetFeatureList();
1823 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1824 PyObject *item = Py_BuildValue("si", features[i].name,
1825 features[i].value);
1826 if (item == NULL) {
1827 goto error;
1828 }
1829 int ok = PyList_Append(list, item);
1830 Py_DECREF(item);
1831 if (ok < 0) {
1832 goto error;
1833 }
1834 }
1835 if (PyModule_AddObject(mod, "features", list) < 0) {
1836 goto error;
1837 }
1838 return 0;
1839
1840error:
1841 Py_DECREF(list);
1842 return -1;
1843}
1844#endif
1845
1846static void
1847pyexpat_destructor(PyObject *op)
1848{
1849 void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1850 PyMem_Free(p);
1851}
1852
1853static int
1854pyexpat_exec(PyObject *mod)
1855{
1856 pyexpat_state *state = pyexpat_get_state(mod);
1857 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1858 mod, &_xml_parse_type_spec, NULL);
1859
1860 if (state->xml_parse_type == NULL) {
1861 return -1;
1862 }
1863
1864 if (init_handler_descrs(state) < 0) {
1865 return -1;
1866 }
1867 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1868 NULL, NULL);
1869 if (state->error == NULL) {
1870 return -1;
1871 }
1872
1873 /* Add some symbolic constants to the module */
1874
1875 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1876 return -1;
1877 }
1878
1879 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1880 return -1;
1881 }
1882
1883 if (PyModule_AddObjectRef(mod, "XMLParserType",
1884 (PyObject *) state->xml_parse_type) < 0) {
1885 return -1;
1886 }
1887
1888 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1889 XML_ExpatVersion()) < 0) {
1890 return -1;
1891 }
1892 {
1893 XML_Expat_Version info = XML_ExpatVersionInfo();
1894 PyObject *versionInfo = Py_BuildValue("(iii)",
1895 info.major,
1896 info.minor,
1897 info.micro);
1898 if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1899 Py_DECREF(versionInfo);
1900 return -1;
1901 }
1902 }
1903 /* XXX When Expat supports some way of figuring out how it was
1904 compiled, this should check and set native_encoding
1905 appropriately.
1906 */
1907 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1908 return -1;
1909 }
1910
1911 if (add_errors_module(mod) < 0) {
1912 return -1;
1913 }
1914
1915 if (add_model_module(mod) < 0) {
1916 return -1;
1917 }
1918
1919#if XML_COMBINED_VERSION > 19505
1920 if (add_features(mod) < 0) {
1921 return -1;
1922 }
1923#endif
1924
1925#define MYCONST(c) do { \
1926 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1927 return -1; \
1928 } \
1929 } while(0)
1930
1931 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1932 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1933 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1934#undef MYCONST
1935
1936 struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1937 if (capi == NULL) {
1938 PyErr_NoMemory();
1939 return -1;
1940 }
1941 /* initialize pyexpat dispatch table */
1942 capi->size = sizeof(*capi);
1943 capi->magic = PyExpat_CAPI_MAGIC;
1944 capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1945 capi->MINOR_VERSION = XML_MINOR_VERSION;
1946 capi->MICRO_VERSION = XML_MICRO_VERSION;
1947 capi->ErrorString = XML_ErrorString;
1948 capi->GetErrorCode = XML_GetErrorCode;
1949 capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1950 capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1951 capi->Parse = XML_Parse;
1952 capi->ParserCreate_MM = XML_ParserCreate_MM;
1953 capi->ParserFree = XML_ParserFree;
1954 capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
1955 capi->SetCommentHandler = XML_SetCommentHandler;
1956 capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1957 capi->SetElementHandler = XML_SetElementHandler;
1958 capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1959 capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1960 capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1961 capi->SetUserData = XML_SetUserData;
1962 capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1963 capi->SetEncoding = XML_SetEncoding;
1964 capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
1965#if XML_COMBINED_VERSION >= 20100
1966 capi->SetHashSalt = XML_SetHashSalt;
1967#else
1968 capi->SetHashSalt = NULL;
1969#endif
1970
1971 /* export using capsule */
1972 PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
1973 pyexpat_destructor);
1974 if (capi_object == NULL) {
1975 PyMem_Free(capi);
1976 return -1;
1977 }
1978
1979 if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
1980 Py_DECREF(capi_object);
1981 return -1;
1982 }
1983
1984 return 0;
1985}
1986
1987static int
1988pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
1989{
1990 pyexpat_state *state = pyexpat_get_state(module);
1991 Py_VISIT(state->xml_parse_type);
1992 Py_VISIT(state->error);
1993 return 0;
1994}
1995
1996static int
1997pyexpat_clear(PyObject *module)
1998{
1999 pyexpat_state *state = pyexpat_get_state(module);
2000 Py_CLEAR(state->xml_parse_type);
2001 Py_CLEAR(state->error);
2002 return 0;
2003}
2004
2005static void
2006pyexpat_free(void *module)
2007{
2008 pyexpat_clear((PyObject *)module);
2009}
2010
2011static PyModuleDef_Slot pyexpat_slots[] = {
2012 {Py_mod_exec, pyexpat_exec},
2013 {0, NULL}
2014};
2015
2016static struct PyModuleDef pyexpatmodule = {
2017 PyModuleDef_HEAD_INIT,
2018 .m_name = MODULE_NAME,
2019 .m_doc = pyexpat_module_documentation,
2020 .m_size = sizeof(pyexpat_state),
2021 .m_methods = pyexpat_methods,
2022 .m_slots = pyexpat_slots,
2023 .m_traverse = pyexpat_traverse,
2024 .m_clear = pyexpat_clear,
2025 .m_free = pyexpat_free
2026};
2027
2028PyMODINIT_FUNC
2029PyInit_pyexpat(void)
2030{
2031 return PyModuleDef_Init(&pyexpatmodule);
2032}
2033
2034static void
2035clear_handlers(xmlparseobject *self, int initial)
2036{
2037 int i = 0;
2038
2039 for (; handler_info[i].name != NULL; i++) {
2040 if (initial)
2041 self->handlers[i] = NULL;
2042 else {
2043 Py_CLEAR(self->handlers[i]);
2044 handler_info[i].setter(self->itself, NULL);
2045 }
2046 }
2047}
2048
2049static struct HandlerInfo handler_info[] = {
2050
2051#define HANDLER_INFO(name) \
2052 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2053
2054 HANDLER_INFO(StartElementHandler)
2055 HANDLER_INFO(EndElementHandler)
2056 HANDLER_INFO(ProcessingInstructionHandler)
2057 HANDLER_INFO(CharacterDataHandler)
2058 HANDLER_INFO(UnparsedEntityDeclHandler)
2059 HANDLER_INFO(NotationDeclHandler)
2060 HANDLER_INFO(StartNamespaceDeclHandler)
2061 HANDLER_INFO(EndNamespaceDeclHandler)
2062 HANDLER_INFO(CommentHandler)
2063 HANDLER_INFO(StartCdataSectionHandler)
2064 HANDLER_INFO(EndCdataSectionHandler)
2065 HANDLER_INFO(DefaultHandler)
2066 HANDLER_INFO(DefaultHandlerExpand)
2067 HANDLER_INFO(NotStandaloneHandler)
2068 HANDLER_INFO(ExternalEntityRefHandler)
2069 HANDLER_INFO(StartDoctypeDeclHandler)
2070 HANDLER_INFO(EndDoctypeDeclHandler)
2071 HANDLER_INFO(EntityDeclHandler)
2072 HANDLER_INFO(XmlDeclHandler)
2073 HANDLER_INFO(ElementDeclHandler)
2074 HANDLER_INFO(AttlistDeclHandler)
2075#if XML_COMBINED_VERSION >= 19504
2076 HANDLER_INFO(SkippedEntityHandler)
2077#endif
2078
2079#undef HANDLER_INFO
2080
2081 {NULL, NULL, NULL} /* sentinel */
2082};
2083