1/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See https://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * [email protected]
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14#define PY_SSIZE_T_CLEAN
15
16#include "Python.h"
17#include "structmember.h" // PyMemberDef
18
19/* -------------------------------------------------------------------- */
20/* configuration */
21
22/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
32 eight bytes. For the current C version of ElementTree, this means
33 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
56/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63
64/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
73/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
76static void _clear_joined_ptr(PyObject **p)
77{
78 if (*p) {
79 _set_joined_ptr(p, NULL);
80 }
81}
82
83/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
90/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95 PyObject *comment_factory;
96 PyObject *pi_factory;
97} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104static inline elementtreestate*
105get_elementtree_state(PyObject *module)
106{
107 void *state = PyModule_GetState(module);
108 assert(state != NULL);
109 return (elementtreestate *)state;
110}
111
112/* Find the module instance imported in the currently running sub-interpreter
113 * and get its state.
114 */
115#define ET_STATE_GLOBAL \
116 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117
118static int
119elementtree_clear(PyObject *m)
120{
121 elementtreestate *st = get_elementtree_state(m);
122 Py_CLEAR(st->parseerror_obj);
123 Py_CLEAR(st->deepcopy_obj);
124 Py_CLEAR(st->elementpath_obj);
125 Py_CLEAR(st->comment_factory);
126 Py_CLEAR(st->pi_factory);
127 return 0;
128}
129
130static int
131elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132{
133 elementtreestate *st = get_elementtree_state(m);
134 Py_VISIT(st->parseerror_obj);
135 Py_VISIT(st->deepcopy_obj);
136 Py_VISIT(st->elementpath_obj);
137 Py_VISIT(st->comment_factory);
138 Py_VISIT(st->pi_factory);
139 return 0;
140}
141
142static void
143elementtree_free(void *m)
144{
145 elementtree_clear((PyObject *)m);
146}
147
148/* helpers */
149
150LOCAL(PyObject*)
151list_join(PyObject* list)
152{
153 /* join list elements */
154 PyObject* joiner;
155 PyObject* result;
156
157 joiner = PyUnicode_FromStringAndSize("", 0);
158 if (!joiner)
159 return NULL;
160 result = PyUnicode_Join(joiner, list);
161 Py_DECREF(joiner);
162 return result;
163}
164
165/* Is the given object an empty dictionary?
166*/
167static int
168is_empty_dict(PyObject *obj)
169{
170 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
171}
172
173
174/* -------------------------------------------------------------------- */
175/* the Element type */
176
177typedef struct {
178
179 /* attributes (a dictionary object), or NULL if no attributes */
180 PyObject* attrib;
181
182 /* child elements */
183 Py_ssize_t length; /* actual number of items */
184 Py_ssize_t allocated; /* allocated items */
185
186 /* this either points to _children or to a malloced buffer */
187 PyObject* *children;
188
189 PyObject* _children[STATIC_CHILDREN];
190
191} ElementObjectExtra;
192
193typedef struct {
194 PyObject_HEAD
195
196 /* element tag (a string). */
197 PyObject* tag;
198
199 /* text before first child. note that this is a tagged pointer;
200 use JOIN_OBJ to get the object pointer. the join flag is used
201 to distinguish lists created by the tree builder from lists
202 assigned to the attribute by application code; the former
203 should be joined before being returned to the user, the latter
204 should be left intact. */
205 PyObject* text;
206
207 /* text after this element, in parent. note that this is a tagged
208 pointer; use JOIN_OBJ to get the object pointer. */
209 PyObject* tail;
210
211 ElementObjectExtra* extra;
212
213 PyObject *weakreflist; /* For tp_weaklistoffset */
214
215} ElementObject;
216
217
218#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
219#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220
221
222/* -------------------------------------------------------------------- */
223/* Element constructors and destructor */
224
225LOCAL(int)
226create_extra(ElementObject* self, PyObject* attrib)
227{
228 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
229 if (!self->extra) {
230 PyErr_NoMemory();
231 return -1;
232 }
233
234 Py_XINCREF(attrib);
235 self->extra->attrib = attrib;
236
237 self->extra->length = 0;
238 self->extra->allocated = STATIC_CHILDREN;
239 self->extra->children = self->extra->_children;
240
241 return 0;
242}
243
244LOCAL(void)
245dealloc_extra(ElementObjectExtra *extra)
246{
247 Py_ssize_t i;
248
249 if (!extra)
250 return;
251
252 Py_XDECREF(extra->attrib);
253
254 for (i = 0; i < extra->length; i++)
255 Py_DECREF(extra->children[i]);
256
257 if (extra->children != extra->_children)
258 PyObject_Free(extra->children);
259
260 PyObject_Free(extra);
261}
262
263LOCAL(void)
264clear_extra(ElementObject* self)
265{
266 ElementObjectExtra *myextra;
267
268 if (!self->extra)
269 return;
270
271 /* Avoid DECREFs calling into this code again (cycles, etc.)
272 */
273 myextra = self->extra;
274 self->extra = NULL;
275
276 dealloc_extra(myextra);
277}
278
279/* Convenience internal function to create new Element objects with the given
280 * tag and attributes.
281*/
282LOCAL(PyObject*)
283create_new_element(PyObject* tag, PyObject* attrib)
284{
285 ElementObject* self;
286
287 self = PyObject_GC_New(ElementObject, &Element_Type);
288 if (self == NULL)
289 return NULL;
290 self->extra = NULL;
291
292 Py_INCREF(tag);
293 self->tag = tag;
294
295 Py_INCREF(Py_None);
296 self->text = Py_None;
297
298 Py_INCREF(Py_None);
299 self->tail = Py_None;
300
301 self->weakreflist = NULL;
302
303 ALLOC(sizeof(ElementObject), "create element");
304 PyObject_GC_Track(self);
305
306 if (attrib != NULL && !is_empty_dict(attrib)) {
307 if (create_extra(self, attrib) < 0) {
308 Py_DECREF(self);
309 return NULL;
310 }
311 }
312
313 return (PyObject*) self;
314}
315
316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 e->weakreflist = NULL;
332 }
333 return (PyObject *)e;
334}
335
336/* Helper function for extracting the attrib dictionary from a keywords dict.
337 * This is required by some constructors/functions in this module that can
338 * either accept attrib as a keyword argument or all attributes splashed
339 * directly into *kwds.
340 *
341 * Return a dictionary with the content of kwds merged into the content of
342 * attrib. If there is no attrib keyword, return a copy of kwds.
343 */
344static PyObject*
345get_attrib_from_keywords(PyObject *kwds)
346{
347 PyObject *attrib_str = PyUnicode_FromString("attrib");
348 if (attrib_str == NULL) {
349 return NULL;
350 }
351 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
352
353 if (attrib) {
354 /* If attrib was found in kwds, copy its value and remove it from
355 * kwds
356 */
357 if (!PyDict_Check(attrib)) {
358 Py_DECREF(attrib_str);
359 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360 Py_TYPE(attrib)->tp_name);
361 return NULL;
362 }
363 attrib = PyDict_Copy(attrib);
364 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365 Py_DECREF(attrib);
366 attrib = NULL;
367 }
368 }
369 else if (!PyErr_Occurred()) {
370 attrib = PyDict_New();
371 }
372
373 Py_DECREF(attrib_str);
374
375 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376 Py_DECREF(attrib);
377 return NULL;
378 }
379 return attrib;
380}
381
382/*[clinic input]
383module _elementtree
384class _elementtree.Element "ElementObject *" "&Element_Type"
385class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387[clinic start generated code]*/
388/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389
390static int
391element_init(PyObject *self, PyObject *args, PyObject *kwds)
392{
393 PyObject *tag;
394 PyObject *attrib = NULL;
395 ElementObject *self_elem;
396
397 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398 return -1;
399
400 if (attrib) {
401 /* attrib passed as positional arg */
402 attrib = PyDict_Copy(attrib);
403 if (!attrib)
404 return -1;
405 if (kwds) {
406 if (PyDict_Update(attrib, kwds) < 0) {
407 Py_DECREF(attrib);
408 return -1;
409 }
410 }
411 } else if (kwds) {
412 /* have keywords args */
413 attrib = get_attrib_from_keywords(kwds);
414 if (!attrib)
415 return -1;
416 }
417
418 self_elem = (ElementObject *)self;
419
420 if (attrib != NULL && !is_empty_dict(attrib)) {
421 if (create_extra(self_elem, attrib) < 0) {
422 Py_DECREF(attrib);
423 return -1;
424 }
425 }
426
427 /* We own a reference to attrib here and it's no longer needed. */
428 Py_XDECREF(attrib);
429
430 /* Replace the objects already pointed to by tag, text and tail. */
431 Py_INCREF(tag);
432 Py_XSETREF(self_elem->tag, tag);
433
434 Py_INCREF(Py_None);
435 _set_joined_ptr(&self_elem->text, Py_None);
436
437 Py_INCREF(Py_None);
438 _set_joined_ptr(&self_elem->tail, Py_None);
439
440 return 0;
441}
442
443LOCAL(int)
444element_resize(ElementObject* self, Py_ssize_t extra)
445{
446 Py_ssize_t size;
447 PyObject* *children;
448
449 assert(extra >= 0);
450 /* make sure self->children can hold the given number of extra
451 elements. set an exception and return -1 if allocation failed */
452
453 if (!self->extra) {
454 if (create_extra(self, NULL) < 0)
455 return -1;
456 }
457
458 size = self->extra->length + extra; /* never overflows */
459
460 if (size > self->extra->allocated) {
461 /* use Python 2.4's list growth strategy */
462 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
463 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
464 * which needs at least 4 bytes.
465 * Although it's a false alarm always assume at least one child to
466 * be safe.
467 */
468 size = size ? size : 1;
469 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470 goto nomemory;
471 if (self->extra->children != self->extra->_children) {
472 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
473 * "children", which needs at least 4 bytes. Although it's a
474 * false alarm always assume at least one child to be safe.
475 */
476 children = PyObject_Realloc(self->extra->children,
477 size * sizeof(PyObject*));
478 if (!children)
479 goto nomemory;
480 } else {
481 children = PyObject_Malloc(size * sizeof(PyObject*));
482 if (!children)
483 goto nomemory;
484 /* copy existing children from static area to malloc buffer */
485 memcpy(children, self->extra->children,
486 self->extra->length * sizeof(PyObject*));
487 }
488 self->extra->children = children;
489 self->extra->allocated = size;
490 }
491
492 return 0;
493
494 nomemory:
495 PyErr_NoMemory();
496 return -1;
497}
498
499LOCAL(void)
500raise_type_error(PyObject *element)
501{
502 PyErr_Format(PyExc_TypeError,
503 "expected an Element, not \"%.200s\"",
504 Py_TYPE(element)->tp_name);
505}
506
507LOCAL(int)
508element_add_subelement(ElementObject* self, PyObject* element)
509{
510 /* add a child element to a parent */
511
512 if (!Element_Check(element)) {
513 raise_type_error(element);
514 return -1;
515 }
516
517 if (element_resize(self, 1) < 0)
518 return -1;
519
520 Py_INCREF(element);
521 self->extra->children[self->extra->length] = element;
522
523 self->extra->length++;
524
525 return 0;
526}
527
528LOCAL(PyObject*)
529element_get_attrib(ElementObject* self)
530{
531 /* return borrowed reference to attrib dictionary */
532 /* note: this function assumes that the extra section exists */
533
534 PyObject* res = self->extra->attrib;
535
536 if (!res) {
537 /* create missing dictionary */
538 res = self->extra->attrib = PyDict_New();
539 }
540
541 return res;
542}
543
544LOCAL(PyObject*)
545element_get_text(ElementObject* self)
546{
547 /* return borrowed reference to text attribute */
548
549 PyObject *res = self->text;
550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
554 PyObject *tmp = list_join(res);
555 if (!tmp)
556 return NULL;
557 self->text = tmp;
558 Py_DECREF(res);
559 res = tmp;
560 }
561 }
562
563 return res;
564}
565
566LOCAL(PyObject*)
567element_get_tail(ElementObject* self)
568{
569 /* return borrowed reference to text attribute */
570
571 PyObject *res = self->tail;
572
573 if (JOIN_GET(res)) {
574 res = JOIN_OBJ(res);
575 if (PyList_CheckExact(res)) {
576 PyObject *tmp = list_join(res);
577 if (!tmp)
578 return NULL;
579 self->tail = tmp;
580 Py_DECREF(res);
581 res = tmp;
582 }
583 }
584
585 return res;
586}
587
588static PyObject*
589subelement(PyObject *self, PyObject *args, PyObject *kwds)
590{
591 PyObject* elem;
592
593 ElementObject* parent;
594 PyObject* tag;
595 PyObject* attrib = NULL;
596 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597 &Element_Type, &parent, &tag,
598 &PyDict_Type, &attrib)) {
599 return NULL;
600 }
601
602 if (attrib) {
603 /* attrib passed as positional arg */
604 attrib = PyDict_Copy(attrib);
605 if (!attrib)
606 return NULL;
607 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608 Py_DECREF(attrib);
609 return NULL;
610 }
611 } else if (kwds) {
612 /* have keyword args */
613 attrib = get_attrib_from_keywords(kwds);
614 if (!attrib)
615 return NULL;
616 } else {
617 /* no attrib arg, no kwds, so no attribute */
618 }
619
620 elem = create_new_element(tag, attrib);
621 Py_XDECREF(attrib);
622 if (elem == NULL)
623 return NULL;
624
625 if (element_add_subelement(parent, elem) < 0) {
626 Py_DECREF(elem);
627 return NULL;
628 }
629
630 return elem;
631}
632
633static int
634element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635{
636 Py_VISIT(self->tag);
637 Py_VISIT(JOIN_OBJ(self->text));
638 Py_VISIT(JOIN_OBJ(self->tail));
639
640 if (self->extra) {
641 Py_ssize_t i;
642 Py_VISIT(self->extra->attrib);
643
644 for (i = 0; i < self->extra->length; ++i)
645 Py_VISIT(self->extra->children[i]);
646 }
647 return 0;
648}
649
650static int
651element_gc_clear(ElementObject *self)
652{
653 Py_CLEAR(self->tag);
654 _clear_joined_ptr(&self->text);
655 _clear_joined_ptr(&self->tail);
656
657 /* After dropping all references from extra, it's no longer valid anyway,
658 * so fully deallocate it.
659 */
660 clear_extra(self);
661 return 0;
662}
663
664static void
665element_dealloc(ElementObject* self)
666{
667 /* bpo-31095: UnTrack is needed before calling any callbacks */
668 PyObject_GC_UnTrack(self);
669 Py_TRASHCAN_BEGIN(self, element_dealloc)
670
671 if (self->weakreflist != NULL)
672 PyObject_ClearWeakRefs((PyObject *) self);
673
674 /* element_gc_clear clears all references and deallocates extra
675 */
676 element_gc_clear(self);
677
678 RELEASE(sizeof(ElementObject), "destroy element");
679 Py_TYPE(self)->tp_free((PyObject *)self);
680 Py_TRASHCAN_END
681}
682
683/* -------------------------------------------------------------------- */
684
685/*[clinic input]
686_elementtree.Element.append
687
688 subelement: object(subclass_of='&Element_Type')
689 /
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696{
697 if (element_add_subelement(self, subelement) < 0)
698 return NULL;
699
700 Py_RETURN_NONE;
701}
702
703/*[clinic input]
704_elementtree.Element.clear
705
706[clinic start generated code]*/
707
708static PyObject *
709_elementtree_Element_clear_impl(ElementObject *self)
710/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711{
712 clear_extra(self);
713
714 Py_INCREF(Py_None);
715 _set_joined_ptr(&self->text, Py_None);
716
717 Py_INCREF(Py_None);
718 _set_joined_ptr(&self->tail, Py_None);
719
720 Py_RETURN_NONE;
721}
722
723/*[clinic input]
724_elementtree.Element.__copy__
725
726[clinic start generated code]*/
727
728static PyObject *
729_elementtree_Element___copy___impl(ElementObject *self)
730/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
731{
732 Py_ssize_t i;
733 ElementObject* element;
734
735 element = (ElementObject*) create_new_element(
736 self->tag, self->extra ? self->extra->attrib : NULL);
737 if (!element)
738 return NULL;
739
740 Py_INCREF(JOIN_OBJ(self->text));
741 _set_joined_ptr(&element->text, self->text);
742
743 Py_INCREF(JOIN_OBJ(self->tail));
744 _set_joined_ptr(&element->tail, self->tail);
745
746 assert(!element->extra || !element->extra->length);
747 if (self->extra) {
748 if (element_resize(element, self->extra->length) < 0) {
749 Py_DECREF(element);
750 return NULL;
751 }
752
753 for (i = 0; i < self->extra->length; i++) {
754 Py_INCREF(self->extra->children[i]);
755 element->extra->children[i] = self->extra->children[i];
756 }
757
758 assert(!element->extra->length);
759 element->extra->length = self->extra->length;
760 }
761
762 return (PyObject*) element;
763}
764
765/* Helper for a deep copy. */
766LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767
768/*[clinic input]
769_elementtree.Element.__deepcopy__
770
771 memo: object(subclass_of="&PyDict_Type")
772 /
773
774[clinic start generated code]*/
775
776static PyObject *
777_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
779{
780 Py_ssize_t i;
781 ElementObject* element;
782 PyObject* tag;
783 PyObject* attrib;
784 PyObject* text;
785 PyObject* tail;
786 PyObject* id;
787
788 tag = deepcopy(self->tag, memo);
789 if (!tag)
790 return NULL;
791
792 if (self->extra && self->extra->attrib) {
793 attrib = deepcopy(self->extra->attrib, memo);
794 if (!attrib) {
795 Py_DECREF(tag);
796 return NULL;
797 }
798 } else {
799 attrib = NULL;
800 }
801
802 element = (ElementObject*) create_new_element(tag, attrib);
803
804 Py_DECREF(tag);
805 Py_XDECREF(attrib);
806
807 if (!element)
808 return NULL;
809
810 text = deepcopy(JOIN_OBJ(self->text), memo);
811 if (!text)
812 goto error;
813 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
814
815 tail = deepcopy(JOIN_OBJ(self->tail), memo);
816 if (!tail)
817 goto error;
818 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
819
820 assert(!element->extra || !element->extra->length);
821 if (self->extra) {
822 if (element_resize(element, self->extra->length) < 0)
823 goto error;
824
825 for (i = 0; i < self->extra->length; i++) {
826 PyObject* child = deepcopy(self->extra->children[i], memo);
827 if (!child || !Element_Check(child)) {
828 if (child) {
829 raise_type_error(child);
830 Py_DECREF(child);
831 }
832 element->extra->length = i;
833 goto error;
834 }
835 element->extra->children[i] = child;
836 }
837
838 assert(!element->extra->length);
839 element->extra->length = self->extra->length;
840 }
841
842 /* add object to memo dictionary (so deepcopy won't visit it again) */
843 id = PyLong_FromSsize_t((uintptr_t) self);
844 if (!id)
845 goto error;
846
847 i = PyDict_SetItem(memo, id, (PyObject*) element);
848
849 Py_DECREF(id);
850
851 if (i < 0)
852 goto error;
853
854 return (PyObject*) element;
855
856 error:
857 Py_DECREF(element);
858 return NULL;
859}
860
861LOCAL(PyObject *)
862deepcopy(PyObject *object, PyObject *memo)
863{
864 /* do a deep copy of the given object */
865 elementtreestate *st;
866 PyObject *stack[2];
867
868 /* Fast paths */
869 if (object == Py_None || PyUnicode_CheckExact(object)) {
870 Py_INCREF(object);
871 return object;
872 }
873
874 if (Py_REFCNT(object) == 1) {
875 if (PyDict_CheckExact(object)) {
876 PyObject *key, *value;
877 Py_ssize_t pos = 0;
878 int simple = 1;
879 while (PyDict_Next(object, &pos, &key, &value)) {
880 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881 simple = 0;
882 break;
883 }
884 }
885 if (simple)
886 return PyDict_Copy(object);
887 /* Fall through to general case */
888 }
889 else if (Element_CheckExact(object)) {
890 return _elementtree_Element___deepcopy___impl(
891 (ElementObject *)object, memo);
892 }
893 }
894
895 /* General case */
896 st = ET_STATE_GLOBAL;
897 if (!st->deepcopy_obj) {
898 PyErr_SetString(PyExc_RuntimeError,
899 "deepcopy helper not found");
900 return NULL;
901 }
902
903 stack[0] = object;
904 stack[1] = memo;
905 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
906}
907
908
909/*[clinic input]
910_elementtree.Element.__sizeof__ -> Py_ssize_t
911
912[clinic start generated code]*/
913
914static Py_ssize_t
915_elementtree_Element___sizeof___impl(ElementObject *self)
916/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
917{
918 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
919 if (self->extra) {
920 result += sizeof(ElementObjectExtra);
921 if (self->extra->children != self->extra->_children)
922 result += sizeof(PyObject*) * self->extra->allocated;
923 }
924 return result;
925}
926
927/* dict keys for getstate/setstate. */
928#define PICKLED_TAG "tag"
929#define PICKLED_CHILDREN "_children"
930#define PICKLED_ATTRIB "attrib"
931#define PICKLED_TAIL "tail"
932#define PICKLED_TEXT "text"
933
934/* __getstate__ returns a fabricated instance dict as in the pure-Python
935 * Element implementation, for interoperability/interchangeability. This
936 * makes the pure-Python implementation details an API, but (a) there aren't
937 * any unnecessary structures there; and (b) it buys compatibility with 3.2
938 * pickles. See issue #16076.
939 */
940/*[clinic input]
941_elementtree.Element.__getstate__
942
943[clinic start generated code]*/
944
945static PyObject *
946_elementtree_Element___getstate___impl(ElementObject *self)
947/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
948{
949 Py_ssize_t i;
950 PyObject *children, *attrib;
951
952 /* Build a list of children. */
953 children = PyList_New(self->extra ? self->extra->length : 0);
954 if (!children)
955 return NULL;
956 for (i = 0; i < PyList_GET_SIZE(children); i++) {
957 PyObject *child = self->extra->children[i];
958 Py_INCREF(child);
959 PyList_SET_ITEM(children, i, child);
960 }
961
962 if (self->extra && self->extra->attrib) {
963 attrib = self->extra->attrib;
964 Py_INCREF(attrib);
965 }
966 else {
967 attrib = PyDict_New();
968 if (!attrib) {
969 Py_DECREF(children);
970 return NULL;
971 }
972 }
973
974 return Py_BuildValue("{sOsNsNsOsO}",
975 PICKLED_TAG, self->tag,
976 PICKLED_CHILDREN, children,
977 PICKLED_ATTRIB, attrib,
978 PICKLED_TEXT, JOIN_OBJ(self->text),
979 PICKLED_TAIL, JOIN_OBJ(self->tail));
980}
981
982static PyObject *
983element_setstate_from_attributes(ElementObject *self,
984 PyObject *tag,
985 PyObject *attrib,
986 PyObject *text,
987 PyObject *tail,
988 PyObject *children)
989{
990 Py_ssize_t i, nchildren;
991 ElementObjectExtra *oldextra = NULL;
992
993 if (!tag) {
994 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995 return NULL;
996 }
997
998 Py_INCREF(tag);
999 Py_XSETREF(self->tag, tag);
1000
1001 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002 Py_INCREF(JOIN_OBJ(text));
1003 _set_joined_ptr(&self->text, text);
1004
1005 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(tail));
1007 _set_joined_ptr(&self->tail, tail);
1008
1009 /* Handle ATTRIB and CHILDREN. */
1010 if (!children && !attrib) {
1011 Py_RETURN_NONE;
1012 }
1013
1014 /* Compute 'nchildren'. */
1015 if (children) {
1016 if (!PyList_Check(children)) {
1017 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018 return NULL;
1019 }
1020 nchildren = PyList_GET_SIZE(children);
1021
1022 /* (Re-)allocate 'extra'.
1023 Avoid DECREFs calling into this code again (cycles, etc.)
1024 */
1025 oldextra = self->extra;
1026 self->extra = NULL;
1027 if (element_resize(self, nchildren)) {
1028 assert(!self->extra || !self->extra->length);
1029 clear_extra(self);
1030 self->extra = oldextra;
1031 return NULL;
1032 }
1033 assert(self->extra);
1034 assert(self->extra->allocated >= nchildren);
1035 if (oldextra) {
1036 assert(self->extra->attrib == NULL);
1037 self->extra->attrib = oldextra->attrib;
1038 oldextra->attrib = NULL;
1039 }
1040
1041 /* Copy children */
1042 for (i = 0; i < nchildren; i++) {
1043 PyObject *child = PyList_GET_ITEM(children, i);
1044 if (!Element_Check(child)) {
1045 raise_type_error(child);
1046 self->extra->length = i;
1047 dealloc_extra(oldextra);
1048 return NULL;
1049 }
1050 Py_INCREF(child);
1051 self->extra->children[i] = child;
1052 }
1053
1054 assert(!self->extra->length);
1055 self->extra->length = nchildren;
1056 }
1057 else {
1058 if (element_resize(self, 0)) {
1059 return NULL;
1060 }
1061 }
1062
1063 /* Stash attrib. */
1064 Py_XINCREF(attrib);
1065 Py_XSETREF(self->extra->attrib, attrib);
1066 dealloc_extra(oldextra);
1067
1068 Py_RETURN_NONE;
1069}
1070
1071/* __setstate__ for Element instance from the Python implementation.
1072 * 'state' should be the instance dict.
1073 */
1074
1075static PyObject *
1076element_setstate_from_Python(ElementObject *self, PyObject *state)
1077{
1078 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080 PyObject *args;
1081 PyObject *tag, *attrib, *text, *tail, *children;
1082 PyObject *retval;
1083
1084 tag = attrib = text = tail = children = NULL;
1085 args = PyTuple_New(0);
1086 if (!args)
1087 return NULL;
1088
1089 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090 &attrib, &text, &tail, &children))
1091 retval = element_setstate_from_attributes(self, tag, attrib, text,
1092 tail, children);
1093 else
1094 retval = NULL;
1095
1096 Py_DECREF(args);
1097 return retval;
1098}
1099
1100/*[clinic input]
1101_elementtree.Element.__setstate__
1102
1103 state: object
1104 /
1105
1106[clinic start generated code]*/
1107
1108static PyObject *
1109_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1111{
1112 if (!PyDict_CheckExact(state)) {
1113 PyErr_Format(PyExc_TypeError,
1114 "Don't know how to unpickle \"%.200R\" as an Element",
1115 state);
1116 return NULL;
1117 }
1118 else
1119 return element_setstate_from_Python(self, state);
1120}
1121
1122LOCAL(int)
1123checkpath(PyObject* tag)
1124{
1125 Py_ssize_t i;
1126 int check = 1;
1127
1128 /* check if a tag contains an xpath character */
1129
1130#define PATHCHAR(ch) \
1131 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1132
1133 if (PyUnicode_Check(tag)) {
1134 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1135 const void *data = PyUnicode_DATA(tag);
1136 unsigned int kind = PyUnicode_KIND(tag);
1137 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138 PyUnicode_READ(kind, data, 1) == '}' || (
1139 PyUnicode_READ(kind, data, 1) == '*' &&
1140 PyUnicode_READ(kind, data, 2) == '}'))) {
1141 /* wildcard: '{}tag' or '{*}tag' */
1142 return 1;
1143 }
1144 for (i = 0; i < len; i++) {
1145 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146 if (ch == '{')
1147 check = 0;
1148 else if (ch == '}')
1149 check = 1;
1150 else if (check && PATHCHAR(ch))
1151 return 1;
1152 }
1153 return 0;
1154 }
1155 if (PyBytes_Check(tag)) {
1156 const char *p = PyBytes_AS_STRING(tag);
1157 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158 if (len >= 3 && p[0] == '{' && (
1159 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1160 /* wildcard: '{}tag' or '{*}tag' */
1161 return 1;
1162 }
1163 for (i = 0; i < len; i++) {
1164 if (p[i] == '{')
1165 check = 0;
1166 else if (p[i] == '}')
1167 check = 1;
1168 else if (check && PATHCHAR(p[i]))
1169 return 1;
1170 }
1171 return 0;
1172 }
1173
1174 return 1; /* unknown type; might be path expression */
1175}
1176
1177/*[clinic input]
1178_elementtree.Element.extend
1179
1180 elements: object
1181 /
1182
1183[clinic start generated code]*/
1184
1185static PyObject *
1186_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1188{
1189 PyObject* seq;
1190 Py_ssize_t i;
1191
1192 seq = PySequence_Fast(elements, "");
1193 if (!seq) {
1194 PyErr_Format(
1195 PyExc_TypeError,
1196 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1197 );
1198 return NULL;
1199 }
1200
1201 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1202 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1203 Py_INCREF(element);
1204 if (element_add_subelement(self, element) < 0) {
1205 Py_DECREF(seq);
1206 Py_DECREF(element);
1207 return NULL;
1208 }
1209 Py_DECREF(element);
1210 }
1211
1212 Py_DECREF(seq);
1213
1214 Py_RETURN_NONE;
1215}
1216
1217/*[clinic input]
1218_elementtree.Element.find
1219
1220 path: object
1221 namespaces: object = None
1222
1223[clinic start generated code]*/
1224
1225static PyObject *
1226_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227 PyObject *namespaces)
1228/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1229{
1230 Py_ssize_t i;
1231 elementtreestate *st = ET_STATE_GLOBAL;
1232
1233 if (checkpath(path) || namespaces != Py_None) {
1234 _Py_IDENTIFIER(find);
1235 return _PyObject_CallMethodIdObjArgs(
1236 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1237 );
1238 }
1239
1240 if (!self->extra)
1241 Py_RETURN_NONE;
1242
1243 for (i = 0; i < self->extra->length; i++) {
1244 PyObject* item = self->extra->children[i];
1245 int rc;
1246 assert(Element_Check(item));
1247 Py_INCREF(item);
1248 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1249 if (rc > 0)
1250 return item;
1251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
1254 }
1255
1256 Py_RETURN_NONE;
1257}
1258
1259/*[clinic input]
1260_elementtree.Element.findtext
1261
1262 path: object
1263 default: object = None
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270 PyObject *default_value,
1271 PyObject *namespaces)
1272/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1273{
1274 Py_ssize_t i;
1275 _Py_IDENTIFIER(findtext);
1276 elementtreestate *st = ET_STATE_GLOBAL;
1277
1278 if (checkpath(path) || namespaces != Py_None)
1279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findtext,
1281 self, path, default_value, namespaces, NULL
1282 );
1283
1284 if (!self->extra) {
1285 Py_INCREF(default_value);
1286 return default_value;
1287 }
1288
1289 for (i = 0; i < self->extra->length; i++) {
1290 PyObject *item = self->extra->children[i];
1291 int rc;
1292 assert(Element_Check(item));
1293 Py_INCREF(item);
1294 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1295 if (rc > 0) {
1296 PyObject* text = element_get_text((ElementObject*)item);
1297 if (text == Py_None) {
1298 Py_DECREF(item);
1299 return PyUnicode_New(0, 0);
1300 }
1301 Py_XINCREF(text);
1302 Py_DECREF(item);
1303 return text;
1304 }
1305 Py_DECREF(item);
1306 if (rc < 0)
1307 return NULL;
1308 }
1309
1310 Py_INCREF(default_value);
1311 return default_value;
1312}
1313
1314/*[clinic input]
1315_elementtree.Element.findall
1316
1317 path: object
1318 namespaces: object = None
1319
1320[clinic start generated code]*/
1321
1322static PyObject *
1323_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324 PyObject *namespaces)
1325/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1326{
1327 Py_ssize_t i;
1328 PyObject* out;
1329 elementtreestate *st = ET_STATE_GLOBAL;
1330
1331 if (checkpath(path) || namespaces != Py_None) {
1332 _Py_IDENTIFIER(findall);
1333 return _PyObject_CallMethodIdObjArgs(
1334 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1335 );
1336 }
1337
1338 out = PyList_New(0);
1339 if (!out)
1340 return NULL;
1341
1342 if (!self->extra)
1343 return out;
1344
1345 for (i = 0; i < self->extra->length; i++) {
1346 PyObject* item = self->extra->children[i];
1347 int rc;
1348 assert(Element_Check(item));
1349 Py_INCREF(item);
1350 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1351 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352 Py_DECREF(item);
1353 Py_DECREF(out);
1354 return NULL;
1355 }
1356 Py_DECREF(item);
1357 }
1358
1359 return out;
1360}
1361
1362/*[clinic input]
1363_elementtree.Element.iterfind
1364
1365 path: object
1366 namespaces: object = None
1367
1368[clinic start generated code]*/
1369
1370static PyObject *
1371_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372 PyObject *namespaces)
1373/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374{
1375 PyObject* tag = path;
1376 _Py_IDENTIFIER(iterfind);
1377 elementtreestate *st = ET_STATE_GLOBAL;
1378
1379 return _PyObject_CallMethodIdObjArgs(
1380 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1381}
1382
1383/*[clinic input]
1384_elementtree.Element.get
1385
1386 key: object
1387 default: object = None
1388
1389[clinic start generated code]*/
1390
1391static PyObject *
1392_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393 PyObject *default_value)
1394/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1395{
1396 if (self->extra && self->extra->attrib) {
1397 PyObject *attrib = self->extra->attrib;
1398 Py_INCREF(attrib);
1399 PyObject *value = PyDict_GetItemWithError(attrib, key);
1400 Py_XINCREF(value);
1401 Py_DECREF(attrib);
1402 if (value != NULL || PyErr_Occurred()) {
1403 return value;
1404 }
1405 }
1406
1407 Py_INCREF(default_value);
1408 return default_value;
1409}
1410
1411static PyObject *
1412create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1413
1414
1415/*[clinic input]
1416_elementtree.Element.iter
1417
1418 tag: object = None
1419
1420[clinic start generated code]*/
1421
1422static PyObject *
1423_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1424/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1425{
1426 if (PyUnicode_Check(tag)) {
1427 if (PyUnicode_READY(tag) < 0)
1428 return NULL;
1429 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1430 tag = Py_None;
1431 }
1432 else if (PyBytes_Check(tag)) {
1433 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1434 tag = Py_None;
1435 }
1436
1437 return create_elementiter(self, tag, 0);
1438}
1439
1440
1441/*[clinic input]
1442_elementtree.Element.itertext
1443
1444[clinic start generated code]*/
1445
1446static PyObject *
1447_elementtree_Element_itertext_impl(ElementObject *self)
1448/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1449{
1450 return create_elementiter(self, Py_None, 1);
1451}
1452
1453
1454static PyObject*
1455element_getitem(PyObject* self_, Py_ssize_t index)
1456{
1457 ElementObject* self = (ElementObject*) self_;
1458
1459 if (!self->extra || index < 0 || index >= self->extra->length) {
1460 PyErr_SetString(
1461 PyExc_IndexError,
1462 "child index out of range"
1463 );
1464 return NULL;
1465 }
1466
1467 Py_INCREF(self->extra->children[index]);
1468 return self->extra->children[index];
1469}
1470
1471/*[clinic input]
1472_elementtree.Element.insert
1473
1474 index: Py_ssize_t
1475 subelement: object(subclass_of='&Element_Type')
1476 /
1477
1478[clinic start generated code]*/
1479
1480static PyObject *
1481_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1482 PyObject *subelement)
1483/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1484{
1485 Py_ssize_t i;
1486
1487 if (!self->extra) {
1488 if (create_extra(self, NULL) < 0)
1489 return NULL;
1490 }
1491
1492 if (index < 0) {
1493 index += self->extra->length;
1494 if (index < 0)
1495 index = 0;
1496 }
1497 if (index > self->extra->length)
1498 index = self->extra->length;
1499
1500 if (element_resize(self, 1) < 0)
1501 return NULL;
1502
1503 for (i = self->extra->length; i > index; i--)
1504 self->extra->children[i] = self->extra->children[i-1];
1505
1506 Py_INCREF(subelement);
1507 self->extra->children[index] = subelement;
1508
1509 self->extra->length++;
1510
1511 Py_RETURN_NONE;
1512}
1513
1514/*[clinic input]
1515_elementtree.Element.items
1516
1517[clinic start generated code]*/
1518
1519static PyObject *
1520_elementtree_Element_items_impl(ElementObject *self)
1521/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1522{
1523 if (!self->extra || !self->extra->attrib)
1524 return PyList_New(0);
1525
1526 return PyDict_Items(self->extra->attrib);
1527}
1528
1529/*[clinic input]
1530_elementtree.Element.keys
1531
1532[clinic start generated code]*/
1533
1534static PyObject *
1535_elementtree_Element_keys_impl(ElementObject *self)
1536/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1537{
1538 if (!self->extra || !self->extra->attrib)
1539 return PyList_New(0);
1540
1541 return PyDict_Keys(self->extra->attrib);
1542}
1543
1544static Py_ssize_t
1545element_length(ElementObject* self)
1546{
1547 if (!self->extra)
1548 return 0;
1549
1550 return self->extra->length;
1551}
1552
1553/*[clinic input]
1554_elementtree.Element.makeelement
1555
1556 tag: object
1557 attrib: object(subclass_of='&PyDict_Type')
1558 /
1559
1560[clinic start generated code]*/
1561
1562static PyObject *
1563_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1564 PyObject *attrib)
1565/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1566{
1567 PyObject* elem;
1568
1569 attrib = PyDict_Copy(attrib);
1570 if (!attrib)
1571 return NULL;
1572
1573 elem = create_new_element(tag, attrib);
1574
1575 Py_DECREF(attrib);
1576
1577 return elem;
1578}
1579
1580/*[clinic input]
1581_elementtree.Element.remove
1582
1583 subelement: object(subclass_of='&Element_Type')
1584 /
1585
1586[clinic start generated code]*/
1587
1588static PyObject *
1589_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1590/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1591{
1592 Py_ssize_t i;
1593 int rc;
1594 PyObject *found;
1595
1596 if (!self->extra) {
1597 /* element has no children, so raise exception */
1598 PyErr_SetString(
1599 PyExc_ValueError,
1600 "list.remove(x): x not in list"
1601 );
1602 return NULL;
1603 }
1604
1605 for (i = 0; i < self->extra->length; i++) {
1606 if (self->extra->children[i] == subelement)
1607 break;
1608 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1609 if (rc > 0)
1610 break;
1611 if (rc < 0)
1612 return NULL;
1613 }
1614
1615 if (i >= self->extra->length) {
1616 /* subelement is not in children, so raise exception */
1617 PyErr_SetString(
1618 PyExc_ValueError,
1619 "list.remove(x): x not in list"
1620 );
1621 return NULL;
1622 }
1623
1624 found = self->extra->children[i];
1625
1626 self->extra->length--;
1627 for (; i < self->extra->length; i++)
1628 self->extra->children[i] = self->extra->children[i+1];
1629
1630 Py_DECREF(found);
1631 Py_RETURN_NONE;
1632}
1633
1634static PyObject*
1635element_repr(ElementObject* self)
1636{
1637 int status;
1638
1639 if (self->tag == NULL)
1640 return PyUnicode_FromFormat("<Element at %p>", self);
1641
1642 status = Py_ReprEnter((PyObject *)self);
1643 if (status == 0) {
1644 PyObject *res;
1645 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1646 Py_ReprLeave((PyObject *)self);
1647 return res;
1648 }
1649 if (status > 0)
1650 PyErr_Format(PyExc_RuntimeError,
1651 "reentrant call inside %s.__repr__",
1652 Py_TYPE(self)->tp_name);
1653 return NULL;
1654}
1655
1656/*[clinic input]
1657_elementtree.Element.set
1658
1659 key: object
1660 value: object
1661 /
1662
1663[clinic start generated code]*/
1664
1665static PyObject *
1666_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1667 PyObject *value)
1668/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1669{
1670 PyObject* attrib;
1671
1672 if (!self->extra) {
1673 if (create_extra(self, NULL) < 0)
1674 return NULL;
1675 }
1676
1677 attrib = element_get_attrib(self);
1678 if (!attrib)
1679 return NULL;
1680
1681 if (PyDict_SetItem(attrib, key, value) < 0)
1682 return NULL;
1683
1684 Py_RETURN_NONE;
1685}
1686
1687static int
1688element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1689{
1690 ElementObject* self = (ElementObject*) self_;
1691 Py_ssize_t i;
1692 PyObject* old;
1693
1694 if (!self->extra || index < 0 || index >= self->extra->length) {
1695 PyErr_SetString(
1696 PyExc_IndexError,
1697 "child assignment index out of range");
1698 return -1;
1699 }
1700
1701 old = self->extra->children[index];
1702
1703 if (item) {
1704 if (!Element_Check(item)) {
1705 raise_type_error(item);
1706 return -1;
1707 }
1708 Py_INCREF(item);
1709 self->extra->children[index] = item;
1710 } else {
1711 self->extra->length--;
1712 for (i = index; i < self->extra->length; i++)
1713 self->extra->children[i] = self->extra->children[i+1];
1714 }
1715
1716 Py_DECREF(old);
1717
1718 return 0;
1719}
1720
1721static PyObject*
1722element_subscr(PyObject* self_, PyObject* item)
1723{
1724 ElementObject* self = (ElementObject*) self_;
1725
1726 if (PyIndex_Check(item)) {
1727 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1728
1729 if (i == -1 && PyErr_Occurred()) {
1730 return NULL;
1731 }
1732 if (i < 0 && self->extra)
1733 i += self->extra->length;
1734 return element_getitem(self_, i);
1735 }
1736 else if (PySlice_Check(item)) {
1737 Py_ssize_t start, stop, step, slicelen, i;
1738 size_t cur;
1739 PyObject* list;
1740
1741 if (!self->extra)
1742 return PyList_New(0);
1743
1744 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1745 return NULL;
1746 }
1747 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1748 step);
1749
1750 if (slicelen <= 0)
1751 return PyList_New(0);
1752 else {
1753 list = PyList_New(slicelen);
1754 if (!list)
1755 return NULL;
1756
1757 for (cur = start, i = 0; i < slicelen;
1758 cur += step, i++) {
1759 PyObject* item = self->extra->children[cur];
1760 Py_INCREF(item);
1761 PyList_SET_ITEM(list, i, item);
1762 }
1763
1764 return list;
1765 }
1766 }
1767 else {
1768 PyErr_SetString(PyExc_TypeError,
1769 "element indices must be integers");
1770 return NULL;
1771 }
1772}
1773
1774static int
1775element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1776{
1777 ElementObject* self = (ElementObject*) self_;
1778
1779 if (PyIndex_Check(item)) {
1780 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1781
1782 if (i == -1 && PyErr_Occurred()) {
1783 return -1;
1784 }
1785 if (i < 0 && self->extra)
1786 i += self->extra->length;
1787 return element_setitem(self_, i, value);
1788 }
1789 else if (PySlice_Check(item)) {
1790 Py_ssize_t start, stop, step, slicelen, newlen, i;
1791 size_t cur;
1792
1793 PyObject* recycle = NULL;
1794 PyObject* seq;
1795
1796 if (!self->extra) {
1797 if (create_extra(self, NULL) < 0)
1798 return -1;
1799 }
1800
1801 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1802 return -1;
1803 }
1804 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1805 step);
1806
1807 if (value == NULL) {
1808 /* Delete slice */
1809 size_t cur;
1810 Py_ssize_t i;
1811
1812 if (slicelen <= 0)
1813 return 0;
1814
1815 /* Since we're deleting, the direction of the range doesn't matter,
1816 * so for simplicity make it always ascending.
1817 */
1818 if (step < 0) {
1819 stop = start + 1;
1820 start = stop + step * (slicelen - 1) - 1;
1821 step = -step;
1822 }
1823
1824 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1825
1826 /* recycle is a list that will contain all the children
1827 * scheduled for removal.
1828 */
1829 if (!(recycle = PyList_New(slicelen))) {
1830 return -1;
1831 }
1832
1833 /* This loop walks over all the children that have to be deleted,
1834 * with cur pointing at them. num_moved is the amount of children
1835 * until the next deleted child that have to be "shifted down" to
1836 * occupy the deleted's places.
1837 * Note that in the ith iteration, shifting is done i+i places down
1838 * because i children were already removed.
1839 */
1840 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1841 /* Compute how many children have to be moved, clipping at the
1842 * list end.
1843 */
1844 Py_ssize_t num_moved = step - 1;
1845 if (cur + step >= (size_t)self->extra->length) {
1846 num_moved = self->extra->length - cur - 1;
1847 }
1848
1849 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1850
1851 memmove(
1852 self->extra->children + cur - i,
1853 self->extra->children + cur + 1,
1854 num_moved * sizeof(PyObject *));
1855 }
1856
1857 /* Leftover "tail" after the last removed child */
1858 cur = start + (size_t)slicelen * step;
1859 if (cur < (size_t)self->extra->length) {
1860 memmove(
1861 self->extra->children + cur - slicelen,
1862 self->extra->children + cur,
1863 (self->extra->length - cur) * sizeof(PyObject *));
1864 }
1865
1866 self->extra->length -= slicelen;
1867
1868 /* Discard the recycle list with all the deleted sub-elements */
1869 Py_DECREF(recycle);
1870 return 0;
1871 }
1872
1873 /* A new slice is actually being assigned */
1874 seq = PySequence_Fast(value, "");
1875 if (!seq) {
1876 PyErr_Format(
1877 PyExc_TypeError,
1878 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1879 );
1880 return -1;
1881 }
1882 newlen = PySequence_Fast_GET_SIZE(seq);
1883
1884 if (step != 1 && newlen != slicelen)
1885 {
1886 Py_DECREF(seq);
1887 PyErr_Format(PyExc_ValueError,
1888 "attempt to assign sequence of size %zd "
1889 "to extended slice of size %zd",
1890 newlen, slicelen
1891 );
1892 return -1;
1893 }
1894
1895 /* Resize before creating the recycle bin, to prevent refleaks. */
1896 if (newlen > slicelen) {
1897 if (element_resize(self, newlen - slicelen) < 0) {
1898 Py_DECREF(seq);
1899 return -1;
1900 }
1901 }
1902
1903 for (i = 0; i < newlen; i++) {
1904 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1905 if (!Element_Check(element)) {
1906 raise_type_error(element);
1907 Py_DECREF(seq);
1908 return -1;
1909 }
1910 }
1911
1912 if (slicelen > 0) {
1913 /* to avoid recursive calls to this method (via decref), move
1914 old items to the recycle bin here, and get rid of them when
1915 we're done modifying the element */
1916 recycle = PyList_New(slicelen);
1917 if (!recycle) {
1918 Py_DECREF(seq);
1919 return -1;
1920 }
1921 for (cur = start, i = 0; i < slicelen;
1922 cur += step, i++)
1923 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1924 }
1925
1926 if (newlen < slicelen) {
1927 /* delete slice */
1928 for (i = stop; i < self->extra->length; i++)
1929 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1930 } else if (newlen > slicelen) {
1931 /* insert slice */
1932 for (i = self->extra->length-1; i >= stop; i--)
1933 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1934 }
1935
1936 /* replace the slice */
1937 for (cur = start, i = 0; i < newlen;
1938 cur += step, i++) {
1939 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1940 Py_INCREF(element);
1941 self->extra->children[cur] = element;
1942 }
1943
1944 self->extra->length += newlen - slicelen;
1945
1946 Py_DECREF(seq);
1947
1948 /* discard the recycle bin, and everything in it */
1949 Py_XDECREF(recycle);
1950
1951 return 0;
1952 }
1953 else {
1954 PyErr_SetString(PyExc_TypeError,
1955 "element indices must be integers");
1956 return -1;
1957 }
1958}
1959
1960static PyObject*
1961element_tag_getter(ElementObject *self, void *closure)
1962{
1963 PyObject *res = self->tag;
1964 Py_INCREF(res);
1965 return res;
1966}
1967
1968static PyObject*
1969element_text_getter(ElementObject *self, void *closure)
1970{
1971 PyObject *res = element_get_text(self);
1972 Py_XINCREF(res);
1973 return res;
1974}
1975
1976static PyObject*
1977element_tail_getter(ElementObject *self, void *closure)
1978{
1979 PyObject *res = element_get_tail(self);
1980 Py_XINCREF(res);
1981 return res;
1982}
1983
1984static PyObject*
1985element_attrib_getter(ElementObject *self, void *closure)
1986{
1987 PyObject *res;
1988 if (!self->extra) {
1989 if (create_extra(self, NULL) < 0)
1990 return NULL;
1991 }
1992 res = element_get_attrib(self);
1993 Py_XINCREF(res);
1994 return res;
1995}
1996
1997/* macro for setter validation */
1998#define _VALIDATE_ATTR_VALUE(V) \
1999 if ((V) == NULL) { \
2000 PyErr_SetString( \
2001 PyExc_AttributeError, \
2002 "can't delete element attribute"); \
2003 return -1; \
2004 }
2005
2006static int
2007element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2008{
2009 _VALIDATE_ATTR_VALUE(value);
2010 Py_INCREF(value);
2011 Py_SETREF(self->tag, value);
2012 return 0;
2013}
2014
2015static int
2016element_text_setter(ElementObject *self, PyObject *value, void *closure)
2017{
2018 _VALIDATE_ATTR_VALUE(value);
2019 Py_INCREF(value);
2020 _set_joined_ptr(&self->text, value);
2021 return 0;
2022}
2023
2024static int
2025element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2026{
2027 _VALIDATE_ATTR_VALUE(value);
2028 Py_INCREF(value);
2029 _set_joined_ptr(&self->tail, value);
2030 return 0;
2031}
2032
2033static int
2034element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2035{
2036 _VALIDATE_ATTR_VALUE(value);
2037 if (!PyDict_Check(value)) {
2038 PyErr_Format(PyExc_TypeError,
2039 "attrib must be dict, not %.200s",
2040 Py_TYPE(value)->tp_name);
2041 return -1;
2042 }
2043 if (!self->extra) {
2044 if (create_extra(self, NULL) < 0)
2045 return -1;
2046 }
2047 Py_INCREF(value);
2048 Py_XSETREF(self->extra->attrib, value);
2049 return 0;
2050}
2051
2052static PySequenceMethods element_as_sequence = {
2053 (lenfunc) element_length,
2054 0, /* sq_concat */
2055 0, /* sq_repeat */
2056 element_getitem,
2057 0,
2058 element_setitem,
2059 0,
2060};
2061
2062/******************************* Element iterator ****************************/
2063
2064/* ElementIterObject represents the iteration state over an XML element in
2065 * pre-order traversal. To keep track of which sub-element should be returned
2066 * next, a stack of parents is maintained. This is a standard stack-based
2067 * iterative pre-order traversal of a tree.
2068 * The stack is managed using a continuous array.
2069 * Each stack item contains the saved parent to which we should return after
2070 * the current one is exhausted, and the next child to examine in that parent.
2071 */
2072typedef struct ParentLocator_t {
2073 ElementObject *parent;
2074 Py_ssize_t child_index;
2075} ParentLocator;
2076
2077typedef struct {
2078 PyObject_HEAD
2079 ParentLocator *parent_stack;
2080 Py_ssize_t parent_stack_used;
2081 Py_ssize_t parent_stack_size;
2082 ElementObject *root_element;
2083 PyObject *sought_tag;
2084 int gettext;
2085} ElementIterObject;
2086
2087
2088static void
2089elementiter_dealloc(ElementIterObject *it)
2090{
2091 Py_ssize_t i = it->parent_stack_used;
2092 it->parent_stack_used = 0;
2093 /* bpo-31095: UnTrack is needed before calling any callbacks */
2094 PyObject_GC_UnTrack(it);
2095 while (i--)
2096 Py_XDECREF(it->parent_stack[i].parent);
2097 PyMem_Free(it->parent_stack);
2098
2099 Py_XDECREF(it->sought_tag);
2100 Py_XDECREF(it->root_element);
2101
2102 PyObject_GC_Del(it);
2103}
2104
2105static int
2106elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2107{
2108 Py_ssize_t i = it->parent_stack_used;
2109 while (i--)
2110 Py_VISIT(it->parent_stack[i].parent);
2111
2112 Py_VISIT(it->root_element);
2113 Py_VISIT(it->sought_tag);
2114 return 0;
2115}
2116
2117/* Helper function for elementiter_next. Add a new parent to the parent stack.
2118 */
2119static int
2120parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2121{
2122 ParentLocator *item;
2123
2124 if (it->parent_stack_used >= it->parent_stack_size) {
2125 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2126 ParentLocator *parent_stack = it->parent_stack;
2127 PyMem_Resize(parent_stack, ParentLocator, new_size);
2128 if (parent_stack == NULL)
2129 return -1;
2130 it->parent_stack = parent_stack;
2131 it->parent_stack_size = new_size;
2132 }
2133 item = it->parent_stack + it->parent_stack_used++;
2134 Py_INCREF(parent);
2135 item->parent = parent;
2136 item->child_index = 0;
2137 return 0;
2138}
2139
2140static PyObject *
2141elementiter_next(ElementIterObject *it)
2142{
2143 /* Sub-element iterator.
2144 *
2145 * A short note on gettext: this function serves both the iter() and
2146 * itertext() methods to avoid code duplication. However, there are a few
2147 * small differences in the way these iterations work. Namely:
2148 * - itertext() only yields text from nodes that have it, and continues
2149 * iterating when a node doesn't have text (so it doesn't return any
2150 * node like iter())
2151 * - itertext() also has to handle tail, after finishing with all the
2152 * children of a node.
2153 */
2154 int rc;
2155 ElementObject *elem;
2156 PyObject *text;
2157
2158 while (1) {
2159 /* Handle the case reached in the beginning and end of iteration, where
2160 * the parent stack is empty. If root_element is NULL and we're here, the
2161 * iterator is exhausted.
2162 */
2163 if (!it->parent_stack_used) {
2164 if (!it->root_element) {
2165 PyErr_SetNone(PyExc_StopIteration);
2166 return NULL;
2167 }
2168
2169 elem = it->root_element; /* steals a reference */
2170 it->root_element = NULL;
2171 }
2172 else {
2173 /* See if there are children left to traverse in the current parent. If
2174 * yes, visit the next child. If not, pop the stack and try again.
2175 */
2176 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2177 Py_ssize_t child_index = item->child_index;
2178 ElementObjectExtra *extra;
2179 elem = item->parent;
2180 extra = elem->extra;
2181 if (!extra || child_index >= extra->length) {
2182 it->parent_stack_used--;
2183 /* Note that extra condition on it->parent_stack_used here;
2184 * this is because itertext() is supposed to only return *inner*
2185 * text, not text following the element it began iteration with.
2186 */
2187 if (it->gettext && it->parent_stack_used) {
2188 text = element_get_tail(elem);
2189 goto gettext;
2190 }
2191 Py_DECREF(elem);
2192 continue;
2193 }
2194
2195 assert(Element_Check(extra->children[child_index]));
2196 elem = (ElementObject *)extra->children[child_index];
2197 item->child_index++;
2198 Py_INCREF(elem);
2199 }
2200
2201 if (parent_stack_push_new(it, elem) < 0) {
2202 Py_DECREF(elem);
2203 PyErr_NoMemory();
2204 return NULL;
2205 }
2206 if (it->gettext) {
2207 text = element_get_text(elem);
2208 goto gettext;
2209 }
2210
2211 if (it->sought_tag == Py_None)
2212 return (PyObject *)elem;
2213
2214 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2215 if (rc > 0)
2216 return (PyObject *)elem;
2217
2218 Py_DECREF(elem);
2219 if (rc < 0)
2220 return NULL;
2221 continue;
2222
2223gettext:
2224 if (!text) {
2225 Py_DECREF(elem);
2226 return NULL;
2227 }
2228 if (text == Py_None) {
2229 Py_DECREF(elem);
2230 }
2231 else {
2232 Py_INCREF(text);
2233 Py_DECREF(elem);
2234 rc = PyObject_IsTrue(text);
2235 if (rc > 0)
2236 return text;
2237 Py_DECREF(text);
2238 if (rc < 0)
2239 return NULL;
2240 }
2241 }
2242
2243 return NULL;
2244}
2245
2246
2247static PyTypeObject ElementIter_Type = {
2248 PyVarObject_HEAD_INIT(NULL, 0)
2249 /* Using the module's name since the pure-Python implementation does not
2250 have such a type. */
2251 "_elementtree._element_iterator", /* tp_name */
2252 sizeof(ElementIterObject), /* tp_basicsize */
2253 0, /* tp_itemsize */
2254 /* methods */
2255 (destructor)elementiter_dealloc, /* tp_dealloc */
2256 0, /* tp_vectorcall_offset */
2257 0, /* tp_getattr */
2258 0, /* tp_setattr */
2259 0, /* tp_as_async */
2260 0, /* tp_repr */
2261 0, /* tp_as_number */
2262 0, /* tp_as_sequence */
2263 0, /* tp_as_mapping */
2264 0, /* tp_hash */
2265 0, /* tp_call */
2266 0, /* tp_str */
2267 0, /* tp_getattro */
2268 0, /* tp_setattro */
2269 0, /* tp_as_buffer */
2270 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2271 0, /* tp_doc */
2272 (traverseproc)elementiter_traverse, /* tp_traverse */
2273 0, /* tp_clear */
2274 0, /* tp_richcompare */
2275 0, /* tp_weaklistoffset */
2276 PyObject_SelfIter, /* tp_iter */
2277 (iternextfunc)elementiter_next, /* tp_iternext */
2278 0, /* tp_methods */
2279 0, /* tp_members */
2280 0, /* tp_getset */
2281 0, /* tp_base */
2282 0, /* tp_dict */
2283 0, /* tp_descr_get */
2284 0, /* tp_descr_set */
2285 0, /* tp_dictoffset */
2286 0, /* tp_init */
2287 0, /* tp_alloc */
2288 0, /* tp_new */
2289};
2290
2291#define INIT_PARENT_STACK_SIZE 8
2292
2293static PyObject *
2294create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2295{
2296 ElementIterObject *it;
2297
2298 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2299 if (!it)
2300 return NULL;
2301
2302 Py_INCREF(tag);
2303 it->sought_tag = tag;
2304 it->gettext = gettext;
2305 Py_INCREF(self);
2306 it->root_element = self;
2307
2308 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2309 if (it->parent_stack == NULL) {
2310 Py_DECREF(it);
2311 PyErr_NoMemory();
2312 return NULL;
2313 }
2314 it->parent_stack_used = 0;
2315 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2316
2317 PyObject_GC_Track(it);
2318
2319 return (PyObject *)it;
2320}
2321
2322
2323/* ==================================================================== */
2324/* the tree builder type */
2325
2326typedef struct {
2327 PyObject_HEAD
2328
2329 PyObject *root; /* root node (first created node) */
2330
2331 PyObject *this; /* current node */
2332 PyObject *last; /* most recently created node */
2333 PyObject *last_for_tail; /* most recently created node that takes a tail */
2334
2335 PyObject *data; /* data collector (string or list), or NULL */
2336
2337 PyObject *stack; /* element stack */
2338 Py_ssize_t index; /* current stack size (0 means empty) */
2339
2340 PyObject *element_factory;
2341 PyObject *comment_factory;
2342 PyObject *pi_factory;
2343
2344 /* element tracing */
2345 PyObject *events_append; /* the append method of the list of events, or NULL */
2346 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2347 PyObject *end_event_obj;
2348 PyObject *start_ns_event_obj;
2349 PyObject *end_ns_event_obj;
2350 PyObject *comment_event_obj;
2351 PyObject *pi_event_obj;
2352
2353 char insert_comments;
2354 char insert_pis;
2355} TreeBuilderObject;
2356
2357#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2358
2359/* -------------------------------------------------------------------- */
2360/* constructor and destructor */
2361
2362static PyObject *
2363treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2364{
2365 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2366 if (t != NULL) {
2367 t->root = NULL;
2368
2369 Py_INCREF(Py_None);
2370 t->this = Py_None;
2371 Py_INCREF(Py_None);
2372 t->last = Py_None;
2373
2374 t->data = NULL;
2375 t->element_factory = NULL;
2376 t->comment_factory = NULL;
2377 t->pi_factory = NULL;
2378 t->stack = PyList_New(20);
2379 if (!t->stack) {
2380 Py_DECREF(t->this);
2381 Py_DECREF(t->last);
2382 Py_DECREF((PyObject *) t);
2383 return NULL;
2384 }
2385 t->index = 0;
2386
2387 t->events_append = NULL;
2388 t->start_event_obj = t->end_event_obj = NULL;
2389 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2390 t->comment_event_obj = t->pi_event_obj = NULL;
2391 t->insert_comments = t->insert_pis = 0;
2392 }
2393 return (PyObject *)t;
2394}
2395
2396/*[clinic input]
2397_elementtree.TreeBuilder.__init__
2398
2399 element_factory: object = None
2400 *
2401 comment_factory: object = None
2402 pi_factory: object = None
2403 insert_comments: bool = False
2404 insert_pis: bool = False
2405
2406[clinic start generated code]*/
2407
2408static int
2409_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2410 PyObject *element_factory,
2411 PyObject *comment_factory,
2412 PyObject *pi_factory,
2413 int insert_comments, int insert_pis)
2414/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2415{
2416 if (element_factory != Py_None) {
2417 Py_INCREF(element_factory);
2418 Py_XSETREF(self->element_factory, element_factory);
2419 } else {
2420 Py_CLEAR(self->element_factory);
2421 }
2422
2423 if (comment_factory == Py_None) {
2424 elementtreestate *st = ET_STATE_GLOBAL;
2425 comment_factory = st->comment_factory;
2426 }
2427 if (comment_factory) {
2428 Py_INCREF(comment_factory);
2429 Py_XSETREF(self->comment_factory, comment_factory);
2430 self->insert_comments = insert_comments;
2431 } else {
2432 Py_CLEAR(self->comment_factory);
2433 self->insert_comments = 0;
2434 }
2435
2436 if (pi_factory == Py_None) {
2437 elementtreestate *st = ET_STATE_GLOBAL;
2438 pi_factory = st->pi_factory;
2439 }
2440 if (pi_factory) {
2441 Py_INCREF(pi_factory);
2442 Py_XSETREF(self->pi_factory, pi_factory);
2443 self->insert_pis = insert_pis;
2444 } else {
2445 Py_CLEAR(self->pi_factory);
2446 self->insert_pis = 0;
2447 }
2448
2449 return 0;
2450}
2451
2452static int
2453treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2454{
2455 Py_VISIT(self->pi_event_obj);
2456 Py_VISIT(self->comment_event_obj);
2457 Py_VISIT(self->end_ns_event_obj);
2458 Py_VISIT(self->start_ns_event_obj);
2459 Py_VISIT(self->end_event_obj);
2460 Py_VISIT(self->start_event_obj);
2461 Py_VISIT(self->events_append);
2462 Py_VISIT(self->root);
2463 Py_VISIT(self->this);
2464 Py_VISIT(self->last);
2465 Py_VISIT(self->last_for_tail);
2466 Py_VISIT(self->data);
2467 Py_VISIT(self->stack);
2468 Py_VISIT(self->pi_factory);
2469 Py_VISIT(self->comment_factory);
2470 Py_VISIT(self->element_factory);
2471 return 0;
2472}
2473
2474static int
2475treebuilder_gc_clear(TreeBuilderObject *self)
2476{
2477 Py_CLEAR(self->pi_event_obj);
2478 Py_CLEAR(self->comment_event_obj);
2479 Py_CLEAR(self->end_ns_event_obj);
2480 Py_CLEAR(self->start_ns_event_obj);
2481 Py_CLEAR(self->end_event_obj);
2482 Py_CLEAR(self->start_event_obj);
2483 Py_CLEAR(self->events_append);
2484 Py_CLEAR(self->stack);
2485 Py_CLEAR(self->data);
2486 Py_CLEAR(self->last);
2487 Py_CLEAR(self->last_for_tail);
2488 Py_CLEAR(self->this);
2489 Py_CLEAR(self->pi_factory);
2490 Py_CLEAR(self->comment_factory);
2491 Py_CLEAR(self->element_factory);
2492 Py_CLEAR(self->root);
2493 return 0;
2494}
2495
2496static void
2497treebuilder_dealloc(TreeBuilderObject *self)
2498{
2499 PyObject_GC_UnTrack(self);
2500 treebuilder_gc_clear(self);
2501 Py_TYPE(self)->tp_free((PyObject *)self);
2502}
2503
2504/* -------------------------------------------------------------------- */
2505/* helpers for handling of arbitrary element-like objects */
2506
2507/*[clinic input]
2508_elementtree._set_factories
2509
2510 comment_factory: object
2511 pi_factory: object
2512 /
2513
2514Change the factories used to create comments and processing instructions.
2515
2516For internal use only.
2517[clinic start generated code]*/
2518
2519static PyObject *
2520_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2521 PyObject *pi_factory)
2522/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2523{
2524 elementtreestate *st = ET_STATE_GLOBAL;
2525 PyObject *old;
2526
2527 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2528 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2529 Py_TYPE(comment_factory)->tp_name);
2530 return NULL;
2531 }
2532 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2533 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2534 Py_TYPE(pi_factory)->tp_name);
2535 return NULL;
2536 }
2537
2538 old = PyTuple_Pack(2,
2539 st->comment_factory ? st->comment_factory : Py_None,
2540 st->pi_factory ? st->pi_factory : Py_None);
2541
2542 if (comment_factory == Py_None) {
2543 Py_CLEAR(st->comment_factory);
2544 } else {
2545 Py_INCREF(comment_factory);
2546 Py_XSETREF(st->comment_factory, comment_factory);
2547 }
2548 if (pi_factory == Py_None) {
2549 Py_CLEAR(st->pi_factory);
2550 } else {
2551 Py_INCREF(pi_factory);
2552 Py_XSETREF(st->pi_factory, pi_factory);
2553 }
2554
2555 return old;
2556}
2557
2558static int
2559treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2560 PyObject **dest, _Py_Identifier *name)
2561{
2562 /* Fast paths for the "almost always" cases. */
2563 if (Element_CheckExact(element)) {
2564 PyObject *dest_obj = JOIN_OBJ(*dest);
2565 if (dest_obj == Py_None) {
2566 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2567 *data = NULL;
2568 Py_DECREF(dest_obj);
2569 return 0;
2570 }
2571 else if (JOIN_GET(*dest)) {
2572 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2573 return -1;
2574 }
2575 Py_CLEAR(*data);
2576 return 0;
2577 }
2578 }
2579
2580 /* Fallback for the non-Element / non-trivial cases. */
2581 {
2582 int r;
2583 PyObject* joined;
2584 PyObject* previous = _PyObject_GetAttrId(element, name);
2585 if (!previous)
2586 return -1;
2587 joined = list_join(*data);
2588 if (!joined) {
2589 Py_DECREF(previous);
2590 return -1;
2591 }
2592 if (previous != Py_None) {
2593 PyObject *tmp = PyNumber_Add(previous, joined);
2594 Py_DECREF(joined);
2595 Py_DECREF(previous);
2596 if (!tmp)
2597 return -1;
2598 joined = tmp;
2599 } else {
2600 Py_DECREF(previous);
2601 }
2602
2603 r = _PyObject_SetAttrId(element, name, joined);
2604 Py_DECREF(joined);
2605 if (r < 0)
2606 return -1;
2607 Py_CLEAR(*data);
2608 return 0;
2609 }
2610}
2611
2612LOCAL(int)
2613treebuilder_flush_data(TreeBuilderObject* self)
2614{
2615 if (!self->data) {
2616 return 0;
2617 }
2618
2619 if (!self->last_for_tail) {
2620 PyObject *element = self->last;
2621 _Py_IDENTIFIER(text);
2622 return treebuilder_extend_element_text_or_tail(
2623 element, &self->data,
2624 &((ElementObject *) element)->text, &PyId_text);
2625 }
2626 else {
2627 PyObject *element = self->last_for_tail;
2628 _Py_IDENTIFIER(tail);
2629 return treebuilder_extend_element_text_or_tail(
2630 element, &self->data,
2631 &((ElementObject *) element)->tail, &PyId_tail);
2632 }
2633}
2634
2635static int
2636treebuilder_add_subelement(PyObject *element, PyObject *child)
2637{
2638 _Py_IDENTIFIER(append);
2639 if (Element_CheckExact(element)) {
2640 ElementObject *elem = (ElementObject *) element;
2641 return element_add_subelement(elem, child);
2642 }
2643 else {
2644 PyObject *res;
2645 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2646 if (res == NULL)
2647 return -1;
2648 Py_DECREF(res);
2649 return 0;
2650 }
2651}
2652
2653LOCAL(int)
2654treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2655 PyObject *node)
2656{
2657 if (action != NULL) {
2658 PyObject *res;
2659 PyObject *event = PyTuple_Pack(2, action, node);
2660 if (event == NULL)
2661 return -1;
2662 res = PyObject_CallOneArg(self->events_append, event);
2663 Py_DECREF(event);
2664 if (res == NULL)
2665 return -1;
2666 Py_DECREF(res);
2667 }
2668 return 0;
2669}
2670
2671/* -------------------------------------------------------------------- */
2672/* handlers */
2673
2674LOCAL(PyObject*)
2675treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2676 PyObject* attrib)
2677{
2678 PyObject* node;
2679 PyObject* this;
2680 elementtreestate *st = ET_STATE_GLOBAL;
2681
2682 if (treebuilder_flush_data(self) < 0) {
2683 return NULL;
2684 }
2685
2686 if (!self->element_factory) {
2687 node = create_new_element(tag, attrib);
2688 } else if (attrib == NULL) {
2689 attrib = PyDict_New();
2690 if (!attrib)
2691 return NULL;
2692 node = PyObject_CallFunctionObjArgs(self->element_factory,
2693 tag, attrib, NULL);
2694 Py_DECREF(attrib);
2695 }
2696 else {
2697 node = PyObject_CallFunctionObjArgs(self->element_factory,
2698 tag, attrib, NULL);
2699 }
2700 if (!node) {
2701 return NULL;
2702 }
2703
2704 this = self->this;
2705 Py_CLEAR(self->last_for_tail);
2706
2707 if (this != Py_None) {
2708 if (treebuilder_add_subelement(this, node) < 0)
2709 goto error;
2710 } else {
2711 if (self->root) {
2712 PyErr_SetString(
2713 st->parseerror_obj,
2714 "multiple elements on top level"
2715 );
2716 goto error;
2717 }
2718 Py_INCREF(node);
2719 self->root = node;
2720 }
2721
2722 if (self->index < PyList_GET_SIZE(self->stack)) {
2723 if (PyList_SetItem(self->stack, self->index, this) < 0)
2724 goto error;
2725 Py_INCREF(this);
2726 } else {
2727 if (PyList_Append(self->stack, this) < 0)
2728 goto error;
2729 }
2730 self->index++;
2731
2732 Py_INCREF(node);
2733 Py_SETREF(self->this, node);
2734 Py_INCREF(node);
2735 Py_SETREF(self->last, node);
2736
2737 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2738 goto error;
2739
2740 return node;
2741
2742 error:
2743 Py_DECREF(node);
2744 return NULL;
2745}
2746
2747LOCAL(PyObject*)
2748treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2749{
2750 if (!self->data) {
2751 if (self->last == Py_None) {
2752 /* ignore calls to data before the first call to start */
2753 Py_RETURN_NONE;
2754 }
2755 /* store the first item as is */
2756 Py_INCREF(data); self->data = data;
2757 } else {
2758 /* more than one item; use a list to collect items */
2759 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2760 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2761 /* XXX this code path unused in Python 3? */
2762 /* expat often generates single character data sections; handle
2763 the most common case by resizing the existing string... */
2764 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2765 if (_PyBytes_Resize(&self->data, size + 1) < 0)
2766 return NULL;
2767 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2768 } else if (PyList_CheckExact(self->data)) {
2769 if (PyList_Append(self->data, data) < 0)
2770 return NULL;
2771 } else {
2772 PyObject* list = PyList_New(2);
2773 if (!list)
2774 return NULL;
2775 PyList_SET_ITEM(list, 0, self->data);
2776 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2777 self->data = list;
2778 }
2779 }
2780
2781 Py_RETURN_NONE;
2782}
2783
2784LOCAL(PyObject*)
2785treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2786{
2787 PyObject* item;
2788
2789 if (treebuilder_flush_data(self) < 0) {
2790 return NULL;
2791 }
2792
2793 if (self->index == 0) {
2794 PyErr_SetString(
2795 PyExc_IndexError,
2796 "pop from empty stack"
2797 );
2798 return NULL;
2799 }
2800
2801 item = self->last;
2802 self->last = self->this;
2803 Py_INCREF(self->last);
2804 Py_XSETREF(self->last_for_tail, self->last);
2805 self->index--;
2806 self->this = PyList_GET_ITEM(self->stack, self->index);
2807 Py_INCREF(self->this);
2808 Py_DECREF(item);
2809
2810 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2811 return NULL;
2812
2813 Py_INCREF(self->last);
2814 return (PyObject*) self->last;
2815}
2816
2817LOCAL(PyObject*)
2818treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2819{
2820 PyObject* comment;
2821 PyObject* this;
2822
2823 if (treebuilder_flush_data(self) < 0) {
2824 return NULL;
2825 }
2826
2827 if (self->comment_factory) {
2828 comment = PyObject_CallOneArg(self->comment_factory, text);
2829 if (!comment)
2830 return NULL;
2831
2832 this = self->this;
2833 if (self->insert_comments && this != Py_None) {
2834 if (treebuilder_add_subelement(this, comment) < 0)
2835 goto error;
2836 Py_INCREF(comment);
2837 Py_XSETREF(self->last_for_tail, comment);
2838 }
2839 } else {
2840 Py_INCREF(text);
2841 comment = text;
2842 }
2843
2844 if (self->events_append && self->comment_event_obj) {
2845 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2846 goto error;
2847 }
2848
2849 return comment;
2850
2851 error:
2852 Py_DECREF(comment);
2853 return NULL;
2854}
2855
2856LOCAL(PyObject*)
2857treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2858{
2859 PyObject* pi;
2860 PyObject* this;
2861 PyObject* stack[2] = {target, text};
2862
2863 if (treebuilder_flush_data(self) < 0) {
2864 return NULL;
2865 }
2866
2867 if (self->pi_factory) {
2868 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2869 if (!pi) {
2870 return NULL;
2871 }
2872
2873 this = self->this;
2874 if (self->insert_pis && this != Py_None) {
2875 if (treebuilder_add_subelement(this, pi) < 0)
2876 goto error;
2877 Py_INCREF(pi);
2878 Py_XSETREF(self->last_for_tail, pi);
2879 }
2880 } else {
2881 pi = PyTuple_Pack(2, target, text);
2882 if (!pi) {
2883 return NULL;
2884 }
2885 }
2886
2887 if (self->events_append && self->pi_event_obj) {
2888 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2889 goto error;
2890 }
2891
2892 return pi;
2893
2894 error:
2895 Py_DECREF(pi);
2896 return NULL;
2897}
2898
2899LOCAL(PyObject*)
2900treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2901{
2902 PyObject* parcel;
2903
2904 if (self->events_append && self->start_ns_event_obj) {
2905 parcel = PyTuple_Pack(2, prefix, uri);
2906 if (!parcel) {
2907 return NULL;
2908 }
2909
2910 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2911 Py_DECREF(parcel);
2912 return NULL;
2913 }
2914 Py_DECREF(parcel);
2915 }
2916
2917 Py_RETURN_NONE;
2918}
2919
2920LOCAL(PyObject*)
2921treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2922{
2923 if (self->events_append && self->end_ns_event_obj) {
2924 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2925 return NULL;
2926 }
2927 }
2928
2929 Py_RETURN_NONE;
2930}
2931
2932/* -------------------------------------------------------------------- */
2933/* methods (in alphabetical order) */
2934
2935/*[clinic input]
2936_elementtree.TreeBuilder.data
2937
2938 data: object
2939 /
2940
2941[clinic start generated code]*/
2942
2943static PyObject *
2944_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2945/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2946{
2947 return treebuilder_handle_data(self, data);
2948}
2949
2950/*[clinic input]
2951_elementtree.TreeBuilder.end
2952
2953 tag: object
2954 /
2955
2956[clinic start generated code]*/
2957
2958static PyObject *
2959_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2960/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2961{
2962 return treebuilder_handle_end(self, tag);
2963}
2964
2965/*[clinic input]
2966_elementtree.TreeBuilder.comment
2967
2968 text: object
2969 /
2970
2971[clinic start generated code]*/
2972
2973static PyObject *
2974_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2975/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2976{
2977 return treebuilder_handle_comment(self, text);
2978}
2979
2980/*[clinic input]
2981_elementtree.TreeBuilder.pi
2982
2983 target: object
2984 text: object = None
2985 /
2986
2987[clinic start generated code]*/
2988
2989static PyObject *
2990_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2991 PyObject *text)
2992/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2993{
2994 return treebuilder_handle_pi(self, target, text);
2995}
2996
2997LOCAL(PyObject*)
2998treebuilder_done(TreeBuilderObject* self)
2999{
3000 PyObject* res;
3001
3002 /* FIXME: check stack size? */
3003
3004 if (self->root)
3005 res = self->root;
3006 else
3007 res = Py_None;
3008
3009 Py_INCREF(res);
3010 return res;
3011}
3012
3013/*[clinic input]
3014_elementtree.TreeBuilder.close
3015
3016[clinic start generated code]*/
3017
3018static PyObject *
3019_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3020/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3021{
3022 return treebuilder_done(self);
3023}
3024
3025/*[clinic input]
3026_elementtree.TreeBuilder.start
3027
3028 tag: object
3029 attrs: object(subclass_of='&PyDict_Type')
3030 /
3031
3032[clinic start generated code]*/
3033
3034static PyObject *
3035_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3036 PyObject *attrs)
3037/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3038{
3039 return treebuilder_handle_start(self, tag, attrs);
3040}
3041
3042/* ==================================================================== */
3043/* the expat interface */
3044
3045#include "expat.h"
3046#include "pyexpat.h"
3047
3048/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3049 * cached globally without being in per-module state.
3050 */
3051static struct PyExpat_CAPI *expat_capi;
3052#define EXPAT(func) (expat_capi->func)
3053
3054static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3055 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3056
3057typedef struct {
3058 PyObject_HEAD
3059
3060 XML_Parser parser;
3061
3062 PyObject *target;
3063 PyObject *entity;
3064
3065 PyObject *names;
3066
3067 PyObject *handle_start_ns;
3068 PyObject *handle_end_ns;
3069 PyObject *handle_start;
3070 PyObject *handle_data;
3071 PyObject *handle_end;
3072
3073 PyObject *handle_comment;
3074 PyObject *handle_pi;
3075 PyObject *handle_doctype;
3076
3077 PyObject *handle_close;
3078
3079} XMLParserObject;
3080
3081/* helpers */
3082
3083LOCAL(PyObject*)
3084makeuniversal(XMLParserObject* self, const char* string)
3085{
3086 /* convert a UTF-8 tag/attribute name from the expat parser
3087 to a universal name string */
3088
3089 Py_ssize_t size = (Py_ssize_t) strlen(string);
3090 PyObject* key;
3091 PyObject* value;
3092
3093 /* look the 'raw' name up in the names dictionary */
3094 key = PyBytes_FromStringAndSize(string, size);
3095 if (!key)
3096 return NULL;
3097
3098 value = PyDict_GetItemWithError(self->names, key);
3099
3100 if (value) {
3101 Py_INCREF(value);
3102 }
3103 else if (!PyErr_Occurred()) {
3104 /* new name. convert to universal name, and decode as
3105 necessary */
3106
3107 PyObject* tag;
3108 char* p;
3109 Py_ssize_t i;
3110
3111 /* look for namespace separator */
3112 for (i = 0; i < size; i++)
3113 if (string[i] == '}')
3114 break;
3115 if (i != size) {
3116 /* convert to universal name */
3117 tag = PyBytes_FromStringAndSize(NULL, size+1);
3118 if (tag == NULL) {
3119 Py_DECREF(key);
3120 return NULL;
3121 }
3122 p = PyBytes_AS_STRING(tag);
3123 p[0] = '{';
3124 memcpy(p+1, string, size);
3125 size++;
3126 } else {
3127 /* plain name; use key as tag */
3128 Py_INCREF(key);
3129 tag = key;
3130 }
3131
3132 /* decode universal name */
3133 p = PyBytes_AS_STRING(tag);
3134 value = PyUnicode_DecodeUTF8(p, size, "strict");
3135 Py_DECREF(tag);
3136 if (!value) {
3137 Py_DECREF(key);
3138 return NULL;
3139 }
3140
3141 /* add to names dictionary */
3142 if (PyDict_SetItem(self->names, key, value) < 0) {
3143 Py_DECREF(key);
3144 Py_DECREF(value);
3145 return NULL;
3146 }
3147 }
3148
3149 Py_DECREF(key);
3150 return value;
3151}
3152
3153/* Set the ParseError exception with the given parameters.
3154 * If message is not NULL, it's used as the error string. Otherwise, the
3155 * message string is the default for the given error_code.
3156*/
3157static void
3158expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3159 const char *message)
3160{
3161 PyObject *errmsg, *error, *position, *code;
3162 elementtreestate *st = ET_STATE_GLOBAL;
3163
3164 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3165 message ? message : EXPAT(ErrorString)(error_code),
3166 line, column);
3167 if (errmsg == NULL)
3168 return;
3169
3170 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3171 Py_DECREF(errmsg);
3172 if (!error)
3173 return;
3174
3175 /* Add code and position attributes */
3176 code = PyLong_FromLong((long)error_code);
3177 if (!code) {
3178 Py_DECREF(error);
3179 return;
3180 }
3181 if (PyObject_SetAttrString(error, "code", code) == -1) {
3182 Py_DECREF(error);
3183 Py_DECREF(code);
3184 return;
3185 }
3186 Py_DECREF(code);
3187
3188 position = Py_BuildValue("(nn)", line, column);
3189 if (!position) {
3190 Py_DECREF(error);
3191 return;
3192 }
3193 if (PyObject_SetAttrString(error, "position", position) == -1) {
3194 Py_DECREF(error);
3195 Py_DECREF(position);
3196 return;
3197 }
3198 Py_DECREF(position);
3199
3200 PyErr_SetObject(st->parseerror_obj, error);
3201 Py_DECREF(error);
3202}
3203
3204/* -------------------------------------------------------------------- */
3205/* handlers */
3206
3207static void
3208expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3209 int data_len)
3210{
3211 PyObject* key;
3212 PyObject* value;
3213 PyObject* res;
3214
3215 if (data_len < 2 || data_in[0] != '&')
3216 return;
3217
3218 if (PyErr_Occurred())
3219 return;
3220
3221 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3222 if (!key)
3223 return;
3224
3225 value = PyDict_GetItemWithError(self->entity, key);
3226
3227 if (value) {
3228 if (TreeBuilder_CheckExact(self->target))
3229 res = treebuilder_handle_data(
3230 (TreeBuilderObject*) self->target, value
3231 );
3232 else if (self->handle_data)
3233 res = PyObject_CallOneArg(self->handle_data, value);
3234 else
3235 res = NULL;
3236 Py_XDECREF(res);
3237 } else if (!PyErr_Occurred()) {
3238 /* Report the first error, not the last */
3239 char message[128] = "undefined entity ";
3240 strncat(message, data_in, data_len < 100?data_len:100);
3241 expat_set_error(
3242 XML_ERROR_UNDEFINED_ENTITY,
3243 EXPAT(GetErrorLineNumber)(self->parser),
3244 EXPAT(GetErrorColumnNumber)(self->parser),
3245 message
3246 );
3247 }
3248
3249 Py_DECREF(key);
3250}
3251
3252static void
3253expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3254 const XML_Char **attrib_in)
3255{
3256 PyObject* res;
3257 PyObject* tag;
3258 PyObject* attrib;
3259 int ok;
3260
3261 if (PyErr_Occurred())
3262 return;
3263
3264 /* tag name */
3265 tag = makeuniversal(self, tag_in);
3266 if (!tag)
3267 return; /* parser will look for errors */
3268
3269 /* attributes */
3270 if (attrib_in[0]) {
3271 attrib = PyDict_New();
3272 if (!attrib) {
3273 Py_DECREF(tag);
3274 return;
3275 }
3276 while (attrib_in[0] && attrib_in[1]) {
3277 PyObject* key = makeuniversal(self, attrib_in[0]);
3278 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3279 if (!key || !value) {
3280 Py_XDECREF(value);
3281 Py_XDECREF(key);
3282 Py_DECREF(attrib);
3283 Py_DECREF(tag);
3284 return;
3285 }
3286 ok = PyDict_SetItem(attrib, key, value);
3287 Py_DECREF(value);
3288 Py_DECREF(key);
3289 if (ok < 0) {
3290 Py_DECREF(attrib);
3291 Py_DECREF(tag);
3292 return;
3293 }
3294 attrib_in += 2;
3295 }
3296 } else {
3297 attrib = NULL;
3298 }
3299
3300 if (TreeBuilder_CheckExact(self->target)) {
3301 /* shortcut */
3302 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3303 tag, attrib);
3304 }
3305 else if (self->handle_start) {
3306 if (attrib == NULL) {
3307 attrib = PyDict_New();
3308 if (!attrib) {
3309 Py_DECREF(tag);
3310 return;
3311 }
3312 }
3313 res = PyObject_CallFunctionObjArgs(self->handle_start,
3314 tag, attrib, NULL);
3315 } else
3316 res = NULL;
3317
3318 Py_DECREF(tag);
3319 Py_XDECREF(attrib);
3320
3321 Py_XDECREF(res);
3322}
3323
3324static void
3325expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3326 int data_len)
3327{
3328 PyObject* data;
3329 PyObject* res;
3330
3331 if (PyErr_Occurred())
3332 return;
3333
3334 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3335 if (!data)
3336 return; /* parser will look for errors */
3337
3338 if (TreeBuilder_CheckExact(self->target))
3339 /* shortcut */
3340 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3341 else if (self->handle_data)
3342 res = PyObject_CallOneArg(self->handle_data, data);
3343 else
3344 res = NULL;
3345
3346 Py_DECREF(data);
3347
3348 Py_XDECREF(res);
3349}
3350
3351static void
3352expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3353{
3354 PyObject* tag;
3355 PyObject* res = NULL;
3356
3357 if (PyErr_Occurred())
3358 return;
3359
3360 if (TreeBuilder_CheckExact(self->target))
3361 /* shortcut */
3362 /* the standard tree builder doesn't look at the end tag */
3363 res = treebuilder_handle_end(
3364 (TreeBuilderObject*) self->target, Py_None
3365 );
3366 else if (self->handle_end) {
3367 tag = makeuniversal(self, tag_in);
3368 if (tag) {
3369 res = PyObject_CallOneArg(self->handle_end, tag);
3370 Py_DECREF(tag);
3371 }
3372 }
3373
3374 Py_XDECREF(res);
3375}
3376
3377static void
3378expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3379 const XML_Char *uri_in)
3380{
3381 PyObject* res = NULL;
3382 PyObject* uri;
3383 PyObject* prefix;
3384 PyObject* stack[2];
3385
3386 if (PyErr_Occurred())
3387 return;
3388
3389 if (!uri_in)
3390 uri_in = "";
3391 if (!prefix_in)
3392 prefix_in = "";
3393
3394 if (TreeBuilder_CheckExact(self->target)) {
3395 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3396 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3397
3398 if (target->events_append && target->start_ns_event_obj) {
3399 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3400 if (!prefix)
3401 return;
3402 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3403 if (!uri) {
3404 Py_DECREF(prefix);
3405 return;
3406 }
3407
3408 res = treebuilder_handle_start_ns(target, prefix, uri);
3409 Py_DECREF(uri);
3410 Py_DECREF(prefix);
3411 }
3412 } else if (self->handle_start_ns) {
3413 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3414 if (!prefix)
3415 return;
3416 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3417 if (!uri) {
3418 Py_DECREF(prefix);
3419 return;
3420 }
3421
3422 stack[0] = prefix;
3423 stack[1] = uri;
3424 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3425 Py_DECREF(uri);
3426 Py_DECREF(prefix);
3427 }
3428
3429 Py_XDECREF(res);
3430}
3431
3432static void
3433expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3434{
3435 PyObject *res = NULL;
3436 PyObject* prefix;
3437
3438 if (PyErr_Occurred())
3439 return;
3440
3441 if (!prefix_in)
3442 prefix_in = "";
3443
3444 if (TreeBuilder_CheckExact(self->target)) {
3445 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3446 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3447
3448 if (target->events_append && target->end_ns_event_obj) {
3449 res = treebuilder_handle_end_ns(target, Py_None);
3450 }
3451 } else if (self->handle_end_ns) {
3452 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3453 if (!prefix)
3454 return;
3455
3456 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3457 Py_DECREF(prefix);
3458 }
3459
3460 Py_XDECREF(res);
3461}
3462
3463static void
3464expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3465{
3466 PyObject* comment;
3467 PyObject* res;
3468
3469 if (PyErr_Occurred())
3470 return;
3471
3472 if (TreeBuilder_CheckExact(self->target)) {
3473 /* shortcut */
3474 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3475
3476 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3477 if (!comment)
3478 return; /* parser will look for errors */
3479
3480 res = treebuilder_handle_comment(target, comment);
3481 Py_XDECREF(res);
3482 Py_DECREF(comment);
3483 } else if (self->handle_comment) {
3484 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3485 if (!comment)
3486 return;
3487
3488 res = PyObject_CallOneArg(self->handle_comment, comment);
3489 Py_XDECREF(res);
3490 Py_DECREF(comment);
3491 }
3492}
3493
3494static void
3495expat_start_doctype_handler(XMLParserObject *self,
3496 const XML_Char *doctype_name,
3497 const XML_Char *sysid,
3498 const XML_Char *pubid,
3499 int has_internal_subset)
3500{
3501 _Py_IDENTIFIER(doctype);
3502 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3503 PyObject *res;
3504
3505 if (PyErr_Occurred())
3506 return;
3507
3508 doctype_name_obj = makeuniversal(self, doctype_name);
3509 if (!doctype_name_obj)
3510 return;
3511
3512 if (sysid) {
3513 sysid_obj = makeuniversal(self, sysid);
3514 if (!sysid_obj) {
3515 Py_DECREF(doctype_name_obj);
3516 return;
3517 }
3518 } else {
3519 Py_INCREF(Py_None);
3520 sysid_obj = Py_None;
3521 }
3522
3523 if (pubid) {
3524 pubid_obj = makeuniversal(self, pubid);
3525 if (!pubid_obj) {
3526 Py_DECREF(doctype_name_obj);
3527 Py_DECREF(sysid_obj);
3528 return;
3529 }
3530 } else {
3531 Py_INCREF(Py_None);
3532 pubid_obj = Py_None;
3533 }
3534
3535 /* If the target has a handler for doctype, call it. */
3536 if (self->handle_doctype) {
3537 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3538 doctype_name_obj, pubid_obj,
3539 sysid_obj, NULL);
3540 Py_XDECREF(res);
3541 }
3542 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3543 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3544 "The doctype() method of XMLParser is ignored. "
3545 "Define doctype() method on the TreeBuilder target.",
3546 1);
3547 Py_DECREF(res);
3548 }
3549
3550 Py_DECREF(doctype_name_obj);
3551 Py_DECREF(pubid_obj);
3552 Py_DECREF(sysid_obj);
3553}
3554
3555static void
3556expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3557 const XML_Char* data_in)
3558{
3559 PyObject* pi_target;
3560 PyObject* data;
3561 PyObject* res;
3562 PyObject* stack[2];
3563
3564 if (PyErr_Occurred())
3565 return;
3566
3567 if (TreeBuilder_CheckExact(self->target)) {
3568 /* shortcut */
3569 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3570
3571 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3572 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573 if (!pi_target)
3574 goto error;
3575 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576 if (!data)
3577 goto error;
3578 res = treebuilder_handle_pi(target, pi_target, data);
3579 Py_XDECREF(res);
3580 Py_DECREF(data);
3581 Py_DECREF(pi_target);
3582 }
3583 } else if (self->handle_pi) {
3584 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3585 if (!pi_target)
3586 goto error;
3587 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3588 if (!data)
3589 goto error;
3590
3591 stack[0] = pi_target;
3592 stack[1] = data;
3593 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3594 Py_XDECREF(res);
3595 Py_DECREF(data);
3596 Py_DECREF(pi_target);
3597 }
3598
3599 return;
3600
3601 error:
3602 Py_XDECREF(pi_target);
3603 return;
3604}
3605
3606/* -------------------------------------------------------------------- */
3607
3608static PyObject *
3609xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3610{
3611 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3612 if (self) {
3613 self->parser = NULL;
3614 self->target = self->entity = self->names = NULL;
3615 self->handle_start_ns = self->handle_end_ns = NULL;
3616 self->handle_start = self->handle_data = self->handle_end = NULL;
3617 self->handle_comment = self->handle_pi = self->handle_close = NULL;
3618 self->handle_doctype = NULL;
3619 }
3620 return (PyObject *)self;
3621}
3622
3623static int
3624ignore_attribute_error(PyObject *value)
3625{
3626 if (value == NULL) {
3627 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3628 return -1;
3629 }
3630 PyErr_Clear();
3631 }
3632 return 0;
3633}
3634
3635/*[clinic input]
3636_elementtree.XMLParser.__init__
3637
3638 *
3639 target: object = None
3640 encoding: str(accept={str, NoneType}) = None
3641
3642[clinic start generated code]*/
3643
3644static int
3645_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3646 const char *encoding)
3647/*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/
3648{
3649 self->entity = PyDict_New();
3650 if (!self->entity)
3651 return -1;
3652
3653 self->names = PyDict_New();
3654 if (!self->names) {
3655 Py_CLEAR(self->entity);
3656 return -1;
3657 }
3658
3659 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3660 if (!self->parser) {
3661 Py_CLEAR(self->entity);
3662 Py_CLEAR(self->names);
3663 PyErr_NoMemory();
3664 return -1;
3665 }
3666 /* expat < 2.1.0 has no XML_SetHashSalt() */
3667 if (EXPAT(SetHashSalt) != NULL) {
3668 EXPAT(SetHashSalt)(self->parser,
3669 (unsigned long)_Py_HashSecret.expat.hashsalt);
3670 }
3671
3672 if (target != Py_None) {
3673 Py_INCREF(target);
3674 } else {
3675 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3676 if (!target) {
3677 Py_CLEAR(self->entity);
3678 Py_CLEAR(self->names);
3679 return -1;
3680 }
3681 }
3682 self->target = target;
3683
3684 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3685 if (ignore_attribute_error(self->handle_start_ns)) {
3686 return -1;
3687 }
3688 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3689 if (ignore_attribute_error(self->handle_end_ns)) {
3690 return -1;
3691 }
3692 self->handle_start = PyObject_GetAttrString(target, "start");
3693 if (ignore_attribute_error(self->handle_start)) {
3694 return -1;
3695 }
3696 self->handle_data = PyObject_GetAttrString(target, "data");
3697 if (ignore_attribute_error(self->handle_data)) {
3698 return -1;
3699 }
3700 self->handle_end = PyObject_GetAttrString(target, "end");
3701 if (ignore_attribute_error(self->handle_end)) {
3702 return -1;
3703 }
3704 self->handle_comment = PyObject_GetAttrString(target, "comment");
3705 if (ignore_attribute_error(self->handle_comment)) {
3706 return -1;
3707 }
3708 self->handle_pi = PyObject_GetAttrString(target, "pi");
3709 if (ignore_attribute_error(self->handle_pi)) {
3710 return -1;
3711 }
3712 self->handle_close = PyObject_GetAttrString(target, "close");
3713 if (ignore_attribute_error(self->handle_close)) {
3714 return -1;
3715 }
3716 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3717 if (ignore_attribute_error(self->handle_doctype)) {
3718 return -1;
3719 }
3720
3721 /* configure parser */
3722 EXPAT(SetUserData)(self->parser, self);
3723 if (self->handle_start_ns || self->handle_end_ns)
3724 EXPAT(SetNamespaceDeclHandler)(
3725 self->parser,
3726 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3727 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3728 );
3729 EXPAT(SetElementHandler)(
3730 self->parser,
3731 (XML_StartElementHandler) expat_start_handler,
3732 (XML_EndElementHandler) expat_end_handler
3733 );
3734 EXPAT(SetDefaultHandlerExpand)(
3735 self->parser,
3736 (XML_DefaultHandler) expat_default_handler
3737 );
3738 EXPAT(SetCharacterDataHandler)(
3739 self->parser,
3740 (XML_CharacterDataHandler) expat_data_handler
3741 );
3742 if (self->handle_comment)
3743 EXPAT(SetCommentHandler)(
3744 self->parser,
3745 (XML_CommentHandler) expat_comment_handler
3746 );
3747 if (self->handle_pi)
3748 EXPAT(SetProcessingInstructionHandler)(
3749 self->parser,
3750 (XML_ProcessingInstructionHandler) expat_pi_handler
3751 );
3752 EXPAT(SetStartDoctypeDeclHandler)(
3753 self->parser,
3754 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3755 );
3756 EXPAT(SetUnknownEncodingHandler)(
3757 self->parser,
3758 EXPAT(DefaultUnknownEncodingHandler), NULL
3759 );
3760
3761 return 0;
3762}
3763
3764static int
3765xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3766{
3767 Py_VISIT(self->handle_close);
3768 Py_VISIT(self->handle_pi);
3769 Py_VISIT(self->handle_comment);
3770 Py_VISIT(self->handle_end);
3771 Py_VISIT(self->handle_data);
3772 Py_VISIT(self->handle_start);
3773 Py_VISIT(self->handle_start_ns);
3774 Py_VISIT(self->handle_end_ns);
3775 Py_VISIT(self->handle_doctype);
3776
3777 Py_VISIT(self->target);
3778 Py_VISIT(self->entity);
3779 Py_VISIT(self->names);
3780
3781 return 0;
3782}
3783
3784static int
3785xmlparser_gc_clear(XMLParserObject *self)
3786{
3787 if (self->parser != NULL) {
3788 XML_Parser parser = self->parser;
3789 self->parser = NULL;
3790 EXPAT(ParserFree)(parser);
3791 }
3792
3793 Py_CLEAR(self->handle_close);
3794 Py_CLEAR(self->handle_pi);
3795 Py_CLEAR(self->handle_comment);
3796 Py_CLEAR(self->handle_end);
3797 Py_CLEAR(self->handle_data);
3798 Py_CLEAR(self->handle_start);
3799 Py_CLEAR(self->handle_start_ns);
3800 Py_CLEAR(self->handle_end_ns);
3801 Py_CLEAR(self->handle_doctype);
3802
3803 Py_CLEAR(self->target);
3804 Py_CLEAR(self->entity);
3805 Py_CLEAR(self->names);
3806
3807 return 0;
3808}
3809
3810static void
3811xmlparser_dealloc(XMLParserObject* self)
3812{
3813 PyObject_GC_UnTrack(self);
3814 xmlparser_gc_clear(self);
3815 Py_TYPE(self)->tp_free((PyObject *)self);
3816}
3817
3818Py_LOCAL_INLINE(int)
3819_check_xmlparser(XMLParserObject* self)
3820{
3821 if (self->target == NULL) {
3822 PyErr_SetString(PyExc_ValueError,
3823 "XMLParser.__init__() wasn't called");
3824 return 0;
3825 }
3826 return 1;
3827}
3828
3829LOCAL(PyObject*)
3830expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3831{
3832 int ok;
3833
3834 assert(!PyErr_Occurred());
3835 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3836
3837 if (PyErr_Occurred())
3838 return NULL;
3839
3840 if (!ok) {
3841 expat_set_error(
3842 EXPAT(GetErrorCode)(self->parser),
3843 EXPAT(GetErrorLineNumber)(self->parser),
3844 EXPAT(GetErrorColumnNumber)(self->parser),
3845 NULL
3846 );
3847 return NULL;
3848 }
3849
3850 Py_RETURN_NONE;
3851}
3852
3853/*[clinic input]
3854_elementtree.XMLParser.close
3855
3856[clinic start generated code]*/
3857
3858static PyObject *
3859_elementtree_XMLParser_close_impl(XMLParserObject *self)
3860/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3861{
3862 /* end feeding data to parser */
3863
3864 PyObject* res;
3865
3866 if (!_check_xmlparser(self)) {
3867 return NULL;
3868 }
3869 res = expat_parse(self, "", 0, 1);
3870 if (!res)
3871 return NULL;
3872
3873 if (TreeBuilder_CheckExact(self->target)) {
3874 Py_DECREF(res);
3875 return treebuilder_done((TreeBuilderObject*) self->target);
3876 }
3877 else if (self->handle_close) {
3878 Py_DECREF(res);
3879 return PyObject_CallNoArgs(self->handle_close);
3880 }
3881 else {
3882 return res;
3883 }
3884}
3885
3886/*[clinic input]
3887_elementtree.XMLParser.feed
3888
3889 data: object
3890 /
3891
3892[clinic start generated code]*/
3893
3894static PyObject *
3895_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3896/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3897{
3898 /* feed data to parser */
3899
3900 if (!_check_xmlparser(self)) {
3901 return NULL;
3902 }
3903 if (PyUnicode_Check(data)) {
3904 Py_ssize_t data_len;
3905 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3906 if (data_ptr == NULL)
3907 return NULL;
3908 if (data_len > INT_MAX) {
3909 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3910 return NULL;
3911 }
3912 /* Explicitly set UTF-8 encoding. Return code ignored. */
3913 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3914 return expat_parse(self, data_ptr, (int)data_len, 0);
3915 }
3916 else {
3917 Py_buffer view;
3918 PyObject *res;
3919 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3920 return NULL;
3921 if (view.len > INT_MAX) {
3922 PyBuffer_Release(&view);
3923 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3924 return NULL;
3925 }
3926 res = expat_parse(self, view.buf, (int)view.len, 0);
3927 PyBuffer_Release(&view);
3928 return res;
3929 }
3930}
3931
3932/*[clinic input]
3933_elementtree.XMLParser._parse_whole
3934
3935 file: object
3936 /
3937
3938[clinic start generated code]*/
3939
3940static PyObject *
3941_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3942/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3943{
3944 /* (internal) parse the whole input, until end of stream */
3945 PyObject* reader;
3946 PyObject* buffer;
3947 PyObject* temp;
3948 PyObject* res;
3949
3950 if (!_check_xmlparser(self)) {
3951 return NULL;
3952 }
3953 reader = PyObject_GetAttrString(file, "read");
3954 if (!reader)
3955 return NULL;
3956
3957 /* read from open file object */
3958 for (;;) {
3959
3960 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3961
3962 if (!buffer) {
3963 /* read failed (e.g. due to KeyboardInterrupt) */
3964 Py_DECREF(reader);
3965 return NULL;
3966 }
3967
3968 if (PyUnicode_CheckExact(buffer)) {
3969 /* A unicode object is encoded into bytes using UTF-8 */
3970 if (PyUnicode_GET_LENGTH(buffer) == 0) {
3971 Py_DECREF(buffer);
3972 break;
3973 }
3974 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3975 Py_DECREF(buffer);
3976 if (!temp) {
3977 /* Propagate exception from PyUnicode_AsEncodedString */
3978 Py_DECREF(reader);
3979 return NULL;
3980 }
3981 buffer = temp;
3982 }
3983 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3984 Py_DECREF(buffer);
3985 break;
3986 }
3987
3988 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3989 Py_DECREF(buffer);
3990 Py_DECREF(reader);
3991 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3992 return NULL;
3993 }
3994 res = expat_parse(
3995 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3996 );
3997
3998 Py_DECREF(buffer);
3999
4000 if (!res) {
4001 Py_DECREF(reader);
4002 return NULL;
4003 }
4004 Py_DECREF(res);
4005
4006 }
4007
4008 Py_DECREF(reader);
4009
4010 res = expat_parse(self, "", 0, 1);
4011
4012 if (res && TreeBuilder_CheckExact(self->target)) {
4013 Py_DECREF(res);
4014 return treebuilder_done((TreeBuilderObject*) self->target);
4015 }
4016
4017 return res;
4018}
4019
4020/*[clinic input]
4021_elementtree.XMLParser._setevents
4022
4023 events_queue: object
4024 events_to_report: object = None
4025 /
4026
4027[clinic start generated code]*/
4028
4029static PyObject *
4030_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4031 PyObject *events_queue,
4032 PyObject *events_to_report)
4033/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4034{
4035 /* activate element event reporting */
4036 Py_ssize_t i;
4037 TreeBuilderObject *target;
4038 PyObject *events_append, *events_seq;
4039
4040 if (!_check_xmlparser(self)) {
4041 return NULL;
4042 }
4043 if (!TreeBuilder_CheckExact(self->target)) {
4044 PyErr_SetString(
4045 PyExc_TypeError,
4046 "event handling only supported for ElementTree.TreeBuilder "
4047 "targets"
4048 );
4049 return NULL;
4050 }
4051
4052 target = (TreeBuilderObject*) self->target;
4053
4054 events_append = PyObject_GetAttrString(events_queue, "append");
4055 if (events_append == NULL)
4056 return NULL;
4057 Py_XSETREF(target->events_append, events_append);
4058
4059 /* clear out existing events */
4060 Py_CLEAR(target->start_event_obj);
4061 Py_CLEAR(target->end_event_obj);
4062 Py_CLEAR(target->start_ns_event_obj);
4063 Py_CLEAR(target->end_ns_event_obj);
4064 Py_CLEAR(target->comment_event_obj);
4065 Py_CLEAR(target->pi_event_obj);
4066
4067 if (events_to_report == Py_None) {
4068 /* default is "end" only */
4069 target->end_event_obj = PyUnicode_FromString("end");
4070 Py_RETURN_NONE;
4071 }
4072
4073 if (!(events_seq = PySequence_Fast(events_to_report,
4074 "events must be a sequence"))) {
4075 return NULL;
4076 }
4077
4078 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4079 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4080 const char *event_name = NULL;
4081 if (PyUnicode_Check(event_name_obj)) {
4082 event_name = PyUnicode_AsUTF8(event_name_obj);
4083 } else if (PyBytes_Check(event_name_obj)) {
4084 event_name = PyBytes_AS_STRING(event_name_obj);
4085 }
4086 if (event_name == NULL) {
4087 Py_DECREF(events_seq);
4088 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4089 return NULL;
4090 }
4091
4092 Py_INCREF(event_name_obj);
4093 if (strcmp(event_name, "start") == 0) {
4094 Py_XSETREF(target->start_event_obj, event_name_obj);
4095 } else if (strcmp(event_name, "end") == 0) {
4096 Py_XSETREF(target->end_event_obj, event_name_obj);
4097 } else if (strcmp(event_name, "start-ns") == 0) {
4098 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4099 EXPAT(SetNamespaceDeclHandler)(
4100 self->parser,
4101 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4102 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4103 );
4104 } else if (strcmp(event_name, "end-ns") == 0) {
4105 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4106 EXPAT(SetNamespaceDeclHandler)(
4107 self->parser,
4108 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4109 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4110 );
4111 } else if (strcmp(event_name, "comment") == 0) {
4112 Py_XSETREF(target->comment_event_obj, event_name_obj);
4113 EXPAT(SetCommentHandler)(
4114 self->parser,
4115 (XML_CommentHandler) expat_comment_handler
4116 );
4117 } else if (strcmp(event_name, "pi") == 0) {
4118 Py_XSETREF(target->pi_event_obj, event_name_obj);
4119 EXPAT(SetProcessingInstructionHandler)(
4120 self->parser,
4121 (XML_ProcessingInstructionHandler) expat_pi_handler
4122 );
4123 } else {
4124 Py_DECREF(event_name_obj);
4125 Py_DECREF(events_seq);
4126 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4127 return NULL;
4128 }
4129 }
4130
4131 Py_DECREF(events_seq);
4132 Py_RETURN_NONE;
4133}
4134
4135static PyMemberDef xmlparser_members[] = {
4136 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4137 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4138 {NULL}
4139};
4140
4141static PyObject*
4142xmlparser_version_getter(XMLParserObject *self, void *closure)
4143{
4144 return PyUnicode_FromFormat(
4145 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4146 XML_MINOR_VERSION, XML_MICRO_VERSION);
4147}
4148
4149static PyGetSetDef xmlparser_getsetlist[] = {
4150 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4151 {NULL},
4152};
4153
4154#include "clinic/_elementtree.c.h"
4155
4156static PyMethodDef element_methods[] = {
4157
4158 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4159
4160 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4161 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4162
4163 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4164 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4165 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4166
4167 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4168 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4169 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4170 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4171
4172 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4173 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4174 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4175
4176 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4177 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4178
4179 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4180
4181 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4182 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4183 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4184 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4185 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4186
4187 {NULL, NULL}
4188};
4189
4190static PyMappingMethods element_as_mapping = {
4191 (lenfunc) element_length,
4192 (binaryfunc) element_subscr,
4193 (objobjargproc) element_ass_subscr,
4194};
4195
4196static PyGetSetDef element_getsetlist[] = {
4197 {"tag",
4198 (getter)element_tag_getter,
4199 (setter)element_tag_setter,
4200 "A string identifying what kind of data this element represents"},
4201 {"text",
4202 (getter)element_text_getter,
4203 (setter)element_text_setter,
4204 "A string of text directly after the start tag, or None"},
4205 {"tail",
4206 (getter)element_tail_getter,
4207 (setter)element_tail_setter,
4208 "A string of text directly after the end tag, or None"},
4209 {"attrib",
4210 (getter)element_attrib_getter,
4211 (setter)element_attrib_setter,
4212 "A dictionary containing the element's attributes"},
4213 {NULL},
4214};
4215
4216static PyTypeObject Element_Type = {
4217 PyVarObject_HEAD_INIT(NULL, 0)
4218 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4219 /* methods */
4220 (destructor)element_dealloc, /* tp_dealloc */
4221 0, /* tp_vectorcall_offset */
4222 0, /* tp_getattr */
4223 0, /* tp_setattr */
4224 0, /* tp_as_async */
4225 (reprfunc)element_repr, /* tp_repr */
4226 0, /* tp_as_number */
4227 &element_as_sequence, /* tp_as_sequence */
4228 &element_as_mapping, /* tp_as_mapping */
4229 0, /* tp_hash */
4230 0, /* tp_call */
4231 0, /* tp_str */
4232 PyObject_GenericGetAttr, /* tp_getattro */
4233 0, /* tp_setattro */
4234 0, /* tp_as_buffer */
4235 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4236 /* tp_flags */
4237 0, /* tp_doc */
4238 (traverseproc)element_gc_traverse, /* tp_traverse */
4239 (inquiry)element_gc_clear, /* tp_clear */
4240 0, /* tp_richcompare */
4241 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4242 0, /* tp_iter */
4243 0, /* tp_iternext */
4244 element_methods, /* tp_methods */
4245 0, /* tp_members */
4246 element_getsetlist, /* tp_getset */
4247 0, /* tp_base */
4248 0, /* tp_dict */
4249 0, /* tp_descr_get */
4250 0, /* tp_descr_set */
4251 0, /* tp_dictoffset */
4252 (initproc)element_init, /* tp_init */
4253 PyType_GenericAlloc, /* tp_alloc */
4254 element_new, /* tp_new */
4255 0, /* tp_free */
4256};
4257
4258static PyMethodDef treebuilder_methods[] = {
4259 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4260 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4261 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4262 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4263 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4264 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4265 {NULL, NULL}
4266};
4267
4268static PyTypeObject TreeBuilder_Type = {
4269 PyVarObject_HEAD_INIT(NULL, 0)
4270 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4271 /* methods */
4272 (destructor)treebuilder_dealloc, /* tp_dealloc */
4273 0, /* tp_vectorcall_offset */
4274 0, /* tp_getattr */
4275 0, /* tp_setattr */
4276 0, /* tp_as_async */
4277 0, /* tp_repr */
4278 0, /* tp_as_number */
4279 0, /* tp_as_sequence */
4280 0, /* tp_as_mapping */
4281 0, /* tp_hash */
4282 0, /* tp_call */
4283 0, /* tp_str */
4284 0, /* tp_getattro */
4285 0, /* tp_setattro */
4286 0, /* tp_as_buffer */
4287 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4288 /* tp_flags */
4289 0, /* tp_doc */
4290 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4291 (inquiry)treebuilder_gc_clear, /* tp_clear */
4292 0, /* tp_richcompare */
4293 0, /* tp_weaklistoffset */
4294 0, /* tp_iter */
4295 0, /* tp_iternext */
4296 treebuilder_methods, /* tp_methods */
4297 0, /* tp_members */
4298 0, /* tp_getset */
4299 0, /* tp_base */
4300 0, /* tp_dict */
4301 0, /* tp_descr_get */
4302 0, /* tp_descr_set */
4303 0, /* tp_dictoffset */
4304 _elementtree_TreeBuilder___init__, /* tp_init */
4305 PyType_GenericAlloc, /* tp_alloc */
4306 treebuilder_new, /* tp_new */
4307 0, /* tp_free */
4308};
4309
4310static PyMethodDef xmlparser_methods[] = {
4311 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4312 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4313 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4314 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4315 {NULL, NULL}
4316};
4317
4318static PyTypeObject XMLParser_Type = {
4319 PyVarObject_HEAD_INIT(NULL, 0)
4320 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4321 /* methods */
4322 (destructor)xmlparser_dealloc, /* tp_dealloc */
4323 0, /* tp_vectorcall_offset */
4324 0, /* tp_getattr */
4325 0, /* tp_setattr */
4326 0, /* tp_as_async */
4327 0, /* tp_repr */
4328 0, /* tp_as_number */
4329 0, /* tp_as_sequence */
4330 0, /* tp_as_mapping */
4331 0, /* tp_hash */
4332 0, /* tp_call */
4333 0, /* tp_str */
4334 0, /* tp_getattro */
4335 0, /* tp_setattro */
4336 0, /* tp_as_buffer */
4337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4338 /* tp_flags */
4339 0, /* tp_doc */
4340 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4341 (inquiry)xmlparser_gc_clear, /* tp_clear */
4342 0, /* tp_richcompare */
4343 0, /* tp_weaklistoffset */
4344 0, /* tp_iter */
4345 0, /* tp_iternext */
4346 xmlparser_methods, /* tp_methods */
4347 xmlparser_members, /* tp_members */
4348 xmlparser_getsetlist, /* tp_getset */
4349 0, /* tp_base */
4350 0, /* tp_dict */
4351 0, /* tp_descr_get */
4352 0, /* tp_descr_set */
4353 0, /* tp_dictoffset */
4354 _elementtree_XMLParser___init__, /* tp_init */
4355 PyType_GenericAlloc, /* tp_alloc */
4356 xmlparser_new, /* tp_new */
4357 0, /* tp_free */
4358};
4359
4360/* ==================================================================== */
4361/* python module interface */
4362
4363static PyMethodDef _functions[] = {
4364 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4365 _ELEMENTTREE__SET_FACTORIES_METHODDEF
4366 {NULL, NULL}
4367};
4368
4369
4370static struct PyModuleDef elementtreemodule = {
4371 PyModuleDef_HEAD_INIT,
4372 "_elementtree",
4373 NULL,
4374 sizeof(elementtreestate),
4375 _functions,
4376 NULL,
4377 elementtree_traverse,
4378 elementtree_clear,
4379 elementtree_free
4380};
4381
4382PyMODINIT_FUNC
4383PyInit__elementtree(void)
4384{
4385 PyObject *m, *temp;
4386 elementtreestate *st;
4387
4388 m = PyState_FindModule(&elementtreemodule);
4389 if (m) {
4390 Py_INCREF(m);
4391 return m;
4392 }
4393
4394 /* Initialize object types */
4395 if (PyType_Ready(&ElementIter_Type) < 0)
4396 return NULL;
4397 if (PyType_Ready(&TreeBuilder_Type) < 0)
4398 return NULL;
4399 if (PyType_Ready(&Element_Type) < 0)
4400 return NULL;
4401 if (PyType_Ready(&XMLParser_Type) < 0)
4402 return NULL;
4403
4404 m = PyModule_Create(&elementtreemodule);
4405 if (!m)
4406 return NULL;
4407 st = get_elementtree_state(m);
4408
4409 if (!(temp = PyImport_ImportModule("copy")))
4410 return NULL;
4411 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4412 Py_XDECREF(temp);
4413
4414 if (st->deepcopy_obj == NULL) {
4415 return NULL;
4416 }
4417
4418 assert(!PyErr_Occurred());
4419 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4420 return NULL;
4421
4422 /* link against pyexpat */
4423 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4424 if (expat_capi) {
4425 /* check that it's usable */
4426 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4427 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4428 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4429 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4430 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4431 PyErr_SetString(PyExc_ImportError,
4432 "pyexpat version is incompatible");
4433 return NULL;
4434 }
4435 } else {
4436 return NULL;
4437 }
4438
4439 st->parseerror_obj = PyErr_NewException(
4440 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4441 );
4442 Py_INCREF(st->parseerror_obj);
4443 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4444 Py_DECREF(st->parseerror_obj);
4445 return NULL;
4446 }
4447
4448 PyTypeObject *types[] = {
4449 &Element_Type,
4450 &TreeBuilder_Type,
4451 &XMLParser_Type
4452 };
4453
4454 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4455 if (PyModule_AddType(m, types[i]) < 0) {
4456 return NULL;
4457 }
4458 }
4459
4460 return m;
4461}
4462