1#include "Python.h"
2
3#include "pycore_bitutils.h" // _Py_popcount32
4#include "pycore_hamt.h"
5#include "pycore_object.h" // _PyObject_GC_TRACK()
6#include <stddef.h> // offsetof()
7
8/*
9This file provides an implementation of an immutable mapping using the
10Hash Array Mapped Trie (or HAMT) datastructure.
11
12This design allows to have:
13
141. Efficient copy: immutable mappings can be copied by reference,
15 making it an O(1) operation.
16
172. Efficient mutations: due to structural sharing, only a portion of
18 the trie needs to be copied when the collection is mutated. The
19 cost of set/delete operations is O(log N).
20
213. Efficient lookups: O(log N).
22
23(where N is number of key/value items in the immutable mapping.)
24
25
26HAMT
27====
28
29The core idea of HAMT is that the shape of the trie is encoded into the
30hashes of keys.
31
32Say we want to store a K/V pair in our mapping. First, we calculate the
33hash of K, let's say it's 19830128, or in binary:
34
35 0b1001011101001010101110000 = 19830128
36
37Now let's partition this bit representation of the hash into blocks of
385 bits each:
39
40 0b00_00000_10010_11101_00101_01011_10000 = 19830128
41 (6) (5) (4) (3) (2) (1)
42
43Each block of 5 bits represents a number between 0 and 31. So if we have
44a tree that consists of nodes, each of which is an array of 32 pointers,
45those 5-bit blocks will encode a position on a single tree level.
46
47For example, storing the key K with hash 19830128, results in the following
48tree structure:
49
50 (array of 32 pointers)
51 +---+ -- +----+----+----+ -- +----+
52 root node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b10000 = 16 (1)
53 (level 1) +---+ -- +----+----+----+ -- +----+
54 |
55 +---+ -- +----+----+----+ -- +----+
56 a 2nd level node | 0 | .. | 10 | 11 | 12 | .. | 31 | 0b01011 = 11 (2)
57 +---+ -- +----+----+----+ -- +----+
58 |
59 +---+ -- +----+----+----+ -- +----+
60 a 3rd level node | 0 | .. | 04 | 05 | 06 | .. | 31 | 0b00101 = 5 (3)
61 +---+ -- +----+----+----+ -- +----+
62 |
63 +---+ -- +----+----+----+----+
64 a 4th level node | 0 | .. | 04 | 29 | 30 | 31 | 0b11101 = 29 (4)
65 +---+ -- +----+----+----+----+
66 |
67 +---+ -- +----+----+----+ -- +----+
68 a 5th level node | 0 | .. | 17 | 18 | 19 | .. | 31 | 0b10010 = 18 (5)
69 +---+ -- +----+----+----+ -- +----+
70 |
71 +--------------+
72 |
73 +---+ -- +----+----+----+ -- +----+
74 a 6th level node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b00000 = 0 (6)
75 +---+ -- +----+----+----+ -- +----+
76 |
77 V -- our value (or collision)
78
79To rehash: for a K/V pair, the hash of K encodes where in the tree V will
80be stored.
81
82To optimize memory footprint and handle hash collisions, our implementation
83uses three different types of nodes:
84
85 * A Bitmap node;
86 * An Array node;
87 * A Collision node.
88
89Because we implement an immutable dictionary, our nodes are also
90immutable. Therefore, when we need to modify a node, we copy it, and
91do that modification to the copy.
92
93
94Array Nodes
95-----------
96
97These nodes are very simple. Essentially they are arrays of 32 pointers
98we used to illustrate the high-level idea in the previous section.
99
100We use Array nodes only when we need to store more than 16 pointers
101in a single node.
102
103Array nodes do not store key objects or value objects. They are used
104only as an indirection level - their pointers point to other nodes in
105the tree.
106
107
108Bitmap Node
109-----------
110
111Allocating a new 32-pointers array for every node of our tree would be
112very expensive. Unless we store millions of keys, most of tree nodes would
113be very sparse.
114
115When we have less than 16 elements in a node, we don't want to use the
116Array node, that would mean that we waste a lot of memory. Instead,
117we can use bitmap compression and can have just as many pointers
118as we need!
119
120Bitmap nodes consist of two fields:
121
1221. An array of pointers. If a Bitmap node holds N elements, the
123 array will be of N pointers.
124
1252. A 32bit integer -- a bitmap field. If an N-th bit is set in the
126 bitmap, it means that the node has an N-th element.
127
128For example, say we need to store a 3 elements sparse array:
129
130 +---+ -- +---+ -- +----+ -- +----+
131 | 0 | .. | 4 | .. | 11 | .. | 17 |
132 +---+ -- +---+ -- +----+ -- +----+
133 | | |
134 o1 o2 o3
135
136We allocate a three-pointer Bitmap node. Its bitmap field will be
137then set to:
138
139 0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
140
141To check if our Bitmap node has an I-th element we can do:
142
143 bitmap & (1 << I)
144
145
146And here's a formula to calculate a position in our pointer array
147which would correspond to an I-th element:
148
149 popcount(bitmap & ((1 << I) - 1))
150
151
152Let's break it down:
153
154 * `popcount` is a function that returns a number of bits set to 1;
155
156 * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
157 set to the *right* of our bit.
158
159
160So for our 17, 11, and 4 indexes:
161
162 * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
163
164 * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
165
166 * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
167
168
169To conclude: Bitmap nodes are just like Array nodes -- they can store
170a number of pointers, but use bitmap compression to eliminate unused
171pointers.
172
173
174Bitmap nodes have two pointers for each item:
175
176 +----+----+----+----+ -- +----+----+
177 | k1 | v1 | k2 | v2 | .. | kN | vN |
178 +----+----+----+----+ -- +----+----+
179
180When kI == NULL, vI points to another tree level.
181
182When kI != NULL, the actual key object is stored in kI, and its
183value is stored in vI.
184
185
186Collision Nodes
187---------------
188
189Collision nodes are simple arrays of pointers -- two pointers per
190key/value. When there's a hash collision, say for k1/v1 and k2/v2
191we have `hash(k1)==hash(k2)`. Then our collision node will be:
192
193 +----+----+----+----+
194 | k1 | v1 | k2 | v2 |
195 +----+----+----+----+
196
197
198Tree Structure
199--------------
200
201All nodes are PyObjects.
202
203The `PyHamtObject` object has a pointer to the root node (h_root),
204and has a length field (h_count).
205
206High-level functions accept a PyHamtObject object and dispatch to
207lower-level functions depending on what kind of node h_root points to.
208
209
210Operations
211==========
212
213There are three fundamental operations on an immutable dictionary:
214
2151. "o.assoc(k, v)" will return a new immutable dictionary, that will be
216 a copy of "o", but with the "k/v" item set.
217
218 Functions in this file:
219
220 hamt_node_assoc, hamt_node_bitmap_assoc,
221 hamt_node_array_assoc, hamt_node_collision_assoc
222
223 `hamt_node_assoc` function accepts a node object, and calls
224 other functions depending on its actual type.
225
2262. "o.find(k)" will lookup key "k" in "o".
227
228 Functions:
229
230 hamt_node_find, hamt_node_bitmap_find,
231 hamt_node_array_find, hamt_node_collision_find
232
2333. "o.without(k)" will return a new immutable dictionary, that will be
234 a copy of "o", buth without the "k" key.
235
236 Functions:
237
238 hamt_node_without, hamt_node_bitmap_without,
239 hamt_node_array_without, hamt_node_collision_without
240
241
242Further Reading
243===============
244
2451. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
246
2472. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
248
2493. Clojure's PersistentHashMap implementation:
250 https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
251
252
253Debug
254=====
255
256The HAMT datatype is accessible for testing purposes under the
257`_testcapi` module:
258
259 >>> from _testcapi import hamt
260 >>> h = hamt()
261 >>> h2 = h.set('a', 2)
262 >>> h3 = h2.set('b', 3)
263 >>> list(h3)
264 ['a', 'b']
265
266When CPython is built in debug mode, a '__dump__()' method is available
267to introspect the tree:
268
269 >>> print(h3.__dump__())
270 HAMT(len=2):
271 BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
272 'a': 2
273 'b': 3
274*/
275
276
277#define IS_ARRAY_NODE(node) Py_IS_TYPE(node, &_PyHamt_ArrayNode_Type)
278#define IS_BITMAP_NODE(node) Py_IS_TYPE(node, &_PyHamt_BitmapNode_Type)
279#define IS_COLLISION_NODE(node) Py_IS_TYPE(node, &_PyHamt_CollisionNode_Type)
280
281
282/* Return type for 'find' (lookup a key) functions.
283
284 * F_ERROR - an error occurred;
285 * F_NOT_FOUND - the key was not found;
286 * F_FOUND - the key was found.
287*/
288typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
289
290
291/* Return type for 'without' (delete a key) functions.
292
293 * W_ERROR - an error occurred;
294 * W_NOT_FOUND - the key was not found: there's nothing to delete;
295 * W_EMPTY - the key was found: the node/tree would be empty
296 if the key is deleted;
297 * W_NEWNODE - the key was found: a new node/tree is returned
298 without that key.
299*/
300typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
301
302
303/* Low-level iterator protocol type.
304
305 * I_ITEM - a new item has been yielded;
306 * I_END - the whole tree was visited (similar to StopIteration).
307*/
308typedef enum {I_ITEM, I_END} hamt_iter_t;
309
310
311#define HAMT_ARRAY_NODE_SIZE 32
312
313
314typedef struct {
315 PyObject_HEAD
316 PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
317 Py_ssize_t a_count;
318} PyHamtNode_Array;
319
320
321typedef struct {
322 PyObject_VAR_HEAD
323 uint32_t b_bitmap;
324 PyObject *b_array[1];
325} PyHamtNode_Bitmap;
326
327
328typedef struct {
329 PyObject_VAR_HEAD
330 int32_t c_hash;
331 PyObject *c_array[1];
332} PyHamtNode_Collision;
333
334
335static PyHamtNode_Bitmap *_empty_bitmap_node;
336static PyHamtObject *_empty_hamt;
337
338
339static PyHamtObject *
340hamt_alloc(void);
341
342static PyHamtNode *
343hamt_node_assoc(PyHamtNode *node,
344 uint32_t shift, int32_t hash,
345 PyObject *key, PyObject *val, int* added_leaf);
346
347static hamt_without_t
348hamt_node_without(PyHamtNode *node,
349 uint32_t shift, int32_t hash,
350 PyObject *key,
351 PyHamtNode **new_node);
352
353static hamt_find_t
354hamt_node_find(PyHamtNode *node,
355 uint32_t shift, int32_t hash,
356 PyObject *key, PyObject **val);
357
358#ifdef Py_DEBUG
359static int
360hamt_node_dump(PyHamtNode *node,
361 _PyUnicodeWriter *writer, int level);
362#endif
363
364static PyHamtNode *
365hamt_node_array_new(Py_ssize_t);
366
367static PyHamtNode *
368hamt_node_collision_new(int32_t hash, Py_ssize_t size);
369
370static inline Py_ssize_t
371hamt_node_collision_count(PyHamtNode_Collision *node);
372
373
374#ifdef Py_DEBUG
375static void
376_hamt_node_array_validate(void *obj_raw)
377{
378 PyObject *obj = _PyObject_CAST(obj_raw);
379 assert(IS_ARRAY_NODE(obj));
380 PyHamtNode_Array *node = (PyHamtNode_Array*)obj;
381 Py_ssize_t i = 0, count = 0;
382 for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
383 if (node->a_array[i] != NULL) {
384 count++;
385 }
386 }
387 assert(count == node->a_count);
388}
389
390#define VALIDATE_ARRAY_NODE(NODE) \
391 do { _hamt_node_array_validate(NODE); } while (0);
392#else
393#define VALIDATE_ARRAY_NODE(NODE)
394#endif
395
396
397/* Returns -1 on error */
398static inline int32_t
399hamt_hash(PyObject *o)
400{
401 Py_hash_t hash = PyObject_Hash(o);
402
403#if SIZEOF_PY_HASH_T <= 4
404 return hash;
405#else
406 if (hash == -1) {
407 /* exception */
408 return -1;
409 }
410
411 /* While it's somewhat suboptimal to reduce Python's 64 bit hash to
412 32 bits via XOR, it seems that the resulting hash function
413 is good enough (this is also how Long type is hashed in Java.)
414 Storing 10, 100, 1000 Python strings results in a relatively
415 shallow and uniform tree structure.
416
417 Also it's worth noting that it would be possible to adapt the tree
418 structure to 64 bit hashes, but that would increase memory pressure
419 and provide little to no performance benefits for collections with
420 fewer than billions of key/value pairs.
421
422 Important: do not change this hash reducing function. There are many
423 tests that need an exact tree shape to cover all code paths and
424 we do that by specifying concrete values for test data's `__hash__`.
425 If this function is changed most of the regression tests would
426 become useless.
427 */
428 int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
429 return xored == -1 ? -2 : xored;
430#endif
431}
432
433static inline uint32_t
434hamt_mask(int32_t hash, uint32_t shift)
435{
436 return (((uint32_t)hash >> shift) & 0x01f);
437}
438
439static inline uint32_t
440hamt_bitpos(int32_t hash, uint32_t shift)
441{
442 return (uint32_t)1 << hamt_mask(hash, shift);
443}
444
445static inline uint32_t
446hamt_bitindex(uint32_t bitmap, uint32_t bit)
447{
448 return (uint32_t)_Py_popcount32(bitmap & (bit - 1));
449}
450
451
452/////////////////////////////////// Dump Helpers
453#ifdef Py_DEBUG
454
455static int
456_hamt_dump_ident(_PyUnicodeWriter *writer, int level)
457{
458 /* Write `' ' * level` to the `writer` */
459 PyObject *str = NULL;
460 PyObject *num = NULL;
461 PyObject *res = NULL;
462 int ret = -1;
463
464 str = PyUnicode_FromString(" ");
465 if (str == NULL) {
466 goto error;
467 }
468
469 num = PyLong_FromLong((long)level);
470 if (num == NULL) {
471 goto error;
472 }
473
474 res = PyNumber_Multiply(str, num);
475 if (res == NULL) {
476 goto error;
477 }
478
479 ret = _PyUnicodeWriter_WriteStr(writer, res);
480
481error:
482 Py_XDECREF(res);
483 Py_XDECREF(str);
484 Py_XDECREF(num);
485 return ret;
486}
487
488static int
489_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
490{
491 /* A convenient helper combining _PyUnicodeWriter_WriteStr and
492 PyUnicode_FromFormatV.
493 */
494 PyObject* msg;
495 int ret;
496
497 va_list vargs;
498#ifdef HAVE_STDARG_PROTOTYPES
499 va_start(vargs, format);
500#else
501 va_start(vargs);
502#endif
503 msg = PyUnicode_FromFormatV(format, vargs);
504 va_end(vargs);
505
506 if (msg == NULL) {
507 return -1;
508 }
509
510 ret = _PyUnicodeWriter_WriteStr(writer, msg);
511 Py_DECREF(msg);
512 return ret;
513}
514
515#endif /* Py_DEBUG */
516/////////////////////////////////// Bitmap Node
517
518
519static PyHamtNode *
520hamt_node_bitmap_new(Py_ssize_t size)
521{
522 /* Create a new bitmap node of size 'size' */
523
524 PyHamtNode_Bitmap *node;
525 Py_ssize_t i;
526
527 assert(size >= 0);
528 assert(size % 2 == 0);
529
530 if (size == 0 && _empty_bitmap_node != NULL) {
531 Py_INCREF(_empty_bitmap_node);
532 return (PyHamtNode *)_empty_bitmap_node;
533 }
534
535 /* No freelist; allocate a new bitmap node */
536 node = PyObject_GC_NewVar(
537 PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
538 if (node == NULL) {
539 return NULL;
540 }
541
542 Py_SET_SIZE(node, size);
543
544 for (i = 0; i < size; i++) {
545 node->b_array[i] = NULL;
546 }
547
548 node->b_bitmap = 0;
549
550 _PyObject_GC_TRACK(node);
551
552 if (size == 0 && _empty_bitmap_node == NULL) {
553 /* Since bitmap nodes are immutable, we can cache the instance
554 for size=0 and reuse it whenever we need an empty bitmap node.
555 */
556 _empty_bitmap_node = node;
557 Py_INCREF(_empty_bitmap_node);
558 }
559
560 return (PyHamtNode *)node;
561}
562
563static inline Py_ssize_t
564hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
565{
566 return Py_SIZE(node) / 2;
567}
568
569static PyHamtNode_Bitmap *
570hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
571{
572 /* Clone a bitmap node; return a new one with the same child notes. */
573
574 PyHamtNode_Bitmap *clone;
575 Py_ssize_t i;
576
577 clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
578 if (clone == NULL) {
579 return NULL;
580 }
581
582 for (i = 0; i < Py_SIZE(node); i++) {
583 Py_XINCREF(node->b_array[i]);
584 clone->b_array[i] = node->b_array[i];
585 }
586
587 clone->b_bitmap = node->b_bitmap;
588 return clone;
589}
590
591static PyHamtNode_Bitmap *
592hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
593{
594 assert(bit & o->b_bitmap);
595 assert(hamt_node_bitmap_count(o) > 1);
596
597 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
598 Py_SIZE(o) - 2);
599 if (new == NULL) {
600 return NULL;
601 }
602
603 uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
604 uint32_t key_idx = 2 * idx;
605 uint32_t val_idx = key_idx + 1;
606 uint32_t i;
607
608 for (i = 0; i < key_idx; i++) {
609 Py_XINCREF(o->b_array[i]);
610 new->b_array[i] = o->b_array[i];
611 }
612
613 assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
614 for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
615 Py_XINCREF(o->b_array[i]);
616 new->b_array[i - 2] = o->b_array[i];
617 }
618
619 new->b_bitmap = o->b_bitmap & ~bit;
620 return new;
621}
622
623static PyHamtNode *
624hamt_node_new_bitmap_or_collision(uint32_t shift,
625 PyObject *key1, PyObject *val1,
626 int32_t key2_hash,
627 PyObject *key2, PyObject *val2)
628{
629 /* Helper method. Creates a new node for key1/val and key2/val2
630 pairs.
631
632 If key1 hash is equal to the hash of key2, a Collision node
633 will be created. If they are not equal, a Bitmap node is
634 created.
635 */
636
637 int32_t key1_hash = hamt_hash(key1);
638 if (key1_hash == -1) {
639 return NULL;
640 }
641
642 if (key1_hash == key2_hash) {
643 PyHamtNode_Collision *n;
644 n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
645 if (n == NULL) {
646 return NULL;
647 }
648
649 Py_INCREF(key1);
650 n->c_array[0] = key1;
651 Py_INCREF(val1);
652 n->c_array[1] = val1;
653
654 Py_INCREF(key2);
655 n->c_array[2] = key2;
656 Py_INCREF(val2);
657 n->c_array[3] = val2;
658
659 return (PyHamtNode *)n;
660 }
661 else {
662 int added_leaf = 0;
663 PyHamtNode *n = hamt_node_bitmap_new(0);
664 if (n == NULL) {
665 return NULL;
666 }
667
668 PyHamtNode *n2 = hamt_node_assoc(
669 n, shift, key1_hash, key1, val1, &added_leaf);
670 Py_DECREF(n);
671 if (n2 == NULL) {
672 return NULL;
673 }
674
675 n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
676 Py_DECREF(n2);
677 if (n == NULL) {
678 return NULL;
679 }
680
681 return n;
682 }
683}
684
685static PyHamtNode *
686hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
687 uint32_t shift, int32_t hash,
688 PyObject *key, PyObject *val, int* added_leaf)
689{
690 /* assoc operation for bitmap nodes.
691
692 Return: a new node, or self if key/val already is in the
693 collection.
694
695 'added_leaf' is later used in '_PyHamt_Assoc' to determine if
696 `hamt.set(key, val)` increased the size of the collection.
697 */
698
699 uint32_t bit = hamt_bitpos(hash, shift);
700 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
701
702 /* Bitmap node layout:
703
704 +------+------+------+------+ --- +------+------+
705 | key1 | val1 | key2 | val2 | ... | keyN | valN |
706 +------+------+------+------+ --- +------+------+
707 where `N < Py_SIZE(node)`.
708
709 The `node->b_bitmap` field is a bitmap. For a given
710 `(shift, hash)` pair we can determine:
711
712 - If this node has the corresponding key/val slots.
713 - The index of key/val slots.
714 */
715
716 if (self->b_bitmap & bit) {
717 /* The key is set in this node */
718
719 uint32_t key_idx = 2 * idx;
720 uint32_t val_idx = key_idx + 1;
721
722 assert(val_idx < (size_t)Py_SIZE(self));
723
724 PyObject *key_or_null = self->b_array[key_idx];
725 PyObject *val_or_node = self->b_array[val_idx];
726
727 if (key_or_null == NULL) {
728 /* key is NULL. This means that we have a few keys
729 that have the same (hash, shift) pair. */
730
731 assert(val_or_node != NULL);
732
733 PyHamtNode *sub_node = hamt_node_assoc(
734 (PyHamtNode *)val_or_node,
735 shift + 5, hash, key, val, added_leaf);
736 if (sub_node == NULL) {
737 return NULL;
738 }
739
740 if (val_or_node == (PyObject *)sub_node) {
741 Py_DECREF(sub_node);
742 Py_INCREF(self);
743 return (PyHamtNode *)self;
744 }
745
746 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
747 if (ret == NULL) {
748 return NULL;
749 }
750 Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
751 return (PyHamtNode *)ret;
752 }
753
754 assert(key != NULL);
755 /* key is not NULL. This means that we have only one other
756 key in this collection that matches our hash for this shift. */
757
758 int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
759 if (comp_err < 0) { /* exception in __eq__ */
760 return NULL;
761 }
762 if (comp_err == 1) { /* key == key_or_null */
763 if (val == val_or_node) {
764 /* we already have the same key/val pair; return self. */
765 Py_INCREF(self);
766 return (PyHamtNode *)self;
767 }
768
769 /* We're setting a new value for the key we had before.
770 Make a new bitmap node with a replaced value, and return it. */
771 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
772 if (ret == NULL) {
773 return NULL;
774 }
775 Py_INCREF(val);
776 Py_SETREF(ret->b_array[val_idx], val);
777 return (PyHamtNode *)ret;
778 }
779
780 /* It's a new key, and it has the same index as *one* another key.
781 We have a collision. We need to create a new node which will
782 combine the existing key and the key we're adding.
783
784 `hamt_node_new_bitmap_or_collision` will either create a new
785 Collision node if the keys have identical hashes, or
786 a new Bitmap node.
787 */
788 PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
789 shift + 5,
790 key_or_null, val_or_node, /* existing key/val */
791 hash,
792 key, val /* new key/val */
793 );
794 if (sub_node == NULL) {
795 return NULL;
796 }
797
798 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
799 if (ret == NULL) {
800 Py_DECREF(sub_node);
801 return NULL;
802 }
803 Py_SETREF(ret->b_array[key_idx], NULL);
804 Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
805
806 *added_leaf = 1;
807 return (PyHamtNode *)ret;
808 }
809 else {
810 /* There was no key before with the same (shift,hash). */
811
812 uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap);
813
814 if (n >= 16) {
815 /* When we have a situation where we want to store more
816 than 16 nodes at one level of the tree, we no longer
817 want to use the Bitmap node with bitmap encoding.
818
819 Instead we start using an Array node, which has
820 simpler (faster) implementation at the expense of
821 having preallocated 32 pointers for its keys/values
822 pairs.
823
824 Small hamt objects (<30 keys) usually don't have any
825 Array nodes at all. Between ~30 and ~400 keys hamt
826 objects usually have one Array node, and usually it's
827 a root node.
828 */
829
830 uint32_t jdx = hamt_mask(hash, shift);
831 /* 'jdx' is the index of where the new key should be added
832 in the new Array node we're about to create. */
833
834 PyHamtNode *empty = NULL;
835 PyHamtNode_Array *new_node = NULL;
836 PyHamtNode *res = NULL;
837
838 /* Create a new Array node. */
839 new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
840 if (new_node == NULL) {
841 goto fin;
842 }
843
844 /* Create an empty bitmap node for the next
845 hamt_node_assoc call. */
846 empty = hamt_node_bitmap_new(0);
847 if (empty == NULL) {
848 goto fin;
849 }
850
851 /* Make a new bitmap node for the key/val we're adding.
852 Set that bitmap node to new-array-node[jdx]. */
853 new_node->a_array[jdx] = hamt_node_assoc(
854 empty, shift + 5, hash, key, val, added_leaf);
855 if (new_node->a_array[jdx] == NULL) {
856 goto fin;
857 }
858
859 /* Copy existing key/value pairs from the current Bitmap
860 node to the new Array node we've just created. */
861 Py_ssize_t i, j;
862 for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
863 if (((self->b_bitmap >> i) & 1) != 0) {
864 /* Ensure we don't accidentally override `jdx` element
865 we set few lines above.
866 */
867 assert(new_node->a_array[i] == NULL);
868
869 if (self->b_array[j] == NULL) {
870 new_node->a_array[i] =
871 (PyHamtNode *)self->b_array[j + 1];
872 Py_INCREF(new_node->a_array[i]);
873 }
874 else {
875 int32_t rehash = hamt_hash(self->b_array[j]);
876 if (rehash == -1) {
877 goto fin;
878 }
879
880 new_node->a_array[i] = hamt_node_assoc(
881 empty, shift + 5,
882 rehash,
883 self->b_array[j],
884 self->b_array[j + 1],
885 added_leaf);
886
887 if (new_node->a_array[i] == NULL) {
888 goto fin;
889 }
890 }
891 j += 2;
892 }
893 }
894
895 VALIDATE_ARRAY_NODE(new_node)
896
897 /* That's it! */
898 res = (PyHamtNode *)new_node;
899
900 fin:
901 Py_XDECREF(empty);
902 if (res == NULL) {
903 Py_XDECREF(new_node);
904 }
905 return res;
906 }
907 else {
908 /* We have less than 16 keys at this level; let's just
909 create a new bitmap node out of this node with the
910 new key/val pair added. */
911
912 uint32_t key_idx = 2 * idx;
913 uint32_t val_idx = key_idx + 1;
914 uint32_t i;
915
916 *added_leaf = 1;
917
918 /* Allocate new Bitmap node which can have one more key/val
919 pair in addition to what we have already. */
920 PyHamtNode_Bitmap *new_node =
921 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
922 if (new_node == NULL) {
923 return NULL;
924 }
925
926 /* Copy all keys/values that will be before the new key/value
927 we are adding. */
928 for (i = 0; i < key_idx; i++) {
929 Py_XINCREF(self->b_array[i]);
930 new_node->b_array[i] = self->b_array[i];
931 }
932
933 /* Set the new key/value to the new Bitmap node. */
934 Py_INCREF(key);
935 new_node->b_array[key_idx] = key;
936 Py_INCREF(val);
937 new_node->b_array[val_idx] = val;
938
939 /* Copy all keys/values that will be after the new key/value
940 we are adding. */
941 assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
942 for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
943 Py_XINCREF(self->b_array[i]);
944 new_node->b_array[i + 2] = self->b_array[i];
945 }
946
947 new_node->b_bitmap = self->b_bitmap | bit;
948 return (PyHamtNode *)new_node;
949 }
950 }
951}
952
953static hamt_without_t
954hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
955 uint32_t shift, int32_t hash,
956 PyObject *key,
957 PyHamtNode **new_node)
958{
959 uint32_t bit = hamt_bitpos(hash, shift);
960 if ((self->b_bitmap & bit) == 0) {
961 return W_NOT_FOUND;
962 }
963
964 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
965
966 uint32_t key_idx = 2 * idx;
967 uint32_t val_idx = key_idx + 1;
968
969 PyObject *key_or_null = self->b_array[key_idx];
970 PyObject *val_or_node = self->b_array[val_idx];
971
972 if (key_or_null == NULL) {
973 /* key == NULL means that 'value' is another tree node. */
974
975 PyHamtNode *sub_node = NULL;
976
977 hamt_without_t res = hamt_node_without(
978 (PyHamtNode *)val_or_node,
979 shift + 5, hash, key, &sub_node);
980
981 switch (res) {
982 case W_EMPTY:
983 /* It's impossible for us to receive a W_EMPTY here:
984
985 - Array nodes are converted to Bitmap nodes when
986 we delete 16th item from them;
987
988 - Collision nodes are converted to Bitmap when
989 there is one item in them;
990
991 - Bitmap node's without() inlines single-item
992 sub-nodes.
993
994 So in no situation we can have a single-item
995 Bitmap child of another Bitmap node.
996 */
997 Py_UNREACHABLE();
998
999 case W_NEWNODE: {
1000 assert(sub_node != NULL);
1001
1002 if (IS_BITMAP_NODE(sub_node)) {
1003 PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1004 if (hamt_node_bitmap_count(sub_tree) == 1 &&
1005 sub_tree->b_array[0] != NULL)
1006 {
1007 /* A bitmap node with one key/value pair. Just
1008 merge it into this node.
1009
1010 Note that we don't inline Bitmap nodes that
1011 have a NULL key -- those nodes point to another
1012 tree level, and we cannot simply move tree levels
1013 up or down.
1014 */
1015
1016 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1017 if (clone == NULL) {
1018 Py_DECREF(sub_node);
1019 return W_ERROR;
1020 }
1021
1022 PyObject *key = sub_tree->b_array[0];
1023 PyObject *val = sub_tree->b_array[1];
1024
1025 Py_INCREF(key);
1026 Py_XSETREF(clone->b_array[key_idx], key);
1027 Py_INCREF(val);
1028 Py_SETREF(clone->b_array[val_idx], val);
1029
1030 Py_DECREF(sub_tree);
1031
1032 *new_node = (PyHamtNode *)clone;
1033 return W_NEWNODE;
1034 }
1035 }
1036
1037#ifdef Py_DEBUG
1038 /* Ensure that Collision.without implementation
1039 converts to Bitmap nodes itself.
1040 */
1041 if (IS_COLLISION_NODE(sub_node)) {
1042 assert(hamt_node_collision_count(
1043 (PyHamtNode_Collision*)sub_node) > 1);
1044 }
1045#endif
1046
1047 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1048 if (clone == NULL) {
1049 return W_ERROR;
1050 }
1051
1052 Py_SETREF(clone->b_array[val_idx],
1053 (PyObject *)sub_node); /* borrow */
1054
1055 *new_node = (PyHamtNode *)clone;
1056 return W_NEWNODE;
1057 }
1058
1059 case W_ERROR:
1060 case W_NOT_FOUND:
1061 assert(sub_node == NULL);
1062 return res;
1063
1064 default:
1065 Py_UNREACHABLE();
1066 }
1067 }
1068 else {
1069 /* We have a regular key/value pair */
1070
1071 int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1072 if (cmp < 0) {
1073 return W_ERROR;
1074 }
1075 if (cmp == 0) {
1076 return W_NOT_FOUND;
1077 }
1078
1079 if (hamt_node_bitmap_count(self) == 1) {
1080 return W_EMPTY;
1081 }
1082
1083 *new_node = (PyHamtNode *)
1084 hamt_node_bitmap_clone_without(self, bit);
1085 if (*new_node == NULL) {
1086 return W_ERROR;
1087 }
1088
1089 return W_NEWNODE;
1090 }
1091}
1092
1093static hamt_find_t
1094hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1095 uint32_t shift, int32_t hash,
1096 PyObject *key, PyObject **val)
1097{
1098 /* Lookup a key in a Bitmap node. */
1099
1100 uint32_t bit = hamt_bitpos(hash, shift);
1101 uint32_t idx;
1102 uint32_t key_idx;
1103 uint32_t val_idx;
1104 PyObject *key_or_null;
1105 PyObject *val_or_node;
1106 int comp_err;
1107
1108 if ((self->b_bitmap & bit) == 0) {
1109 return F_NOT_FOUND;
1110 }
1111
1112 idx = hamt_bitindex(self->b_bitmap, bit);
1113 key_idx = idx * 2;
1114 val_idx = key_idx + 1;
1115
1116 assert(val_idx < (size_t)Py_SIZE(self));
1117
1118 key_or_null = self->b_array[key_idx];
1119 val_or_node = self->b_array[val_idx];
1120
1121 if (key_or_null == NULL) {
1122 /* There are a few keys that have the same hash at the current shift
1123 that match our key. Dispatch the lookup further down the tree. */
1124 assert(val_or_node != NULL);
1125 return hamt_node_find((PyHamtNode *)val_or_node,
1126 shift + 5, hash, key, val);
1127 }
1128
1129 /* We have only one key -- a potential match. Let's compare if the
1130 key we are looking at is equal to the key we are looking for. */
1131 assert(key != NULL);
1132 comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1133 if (comp_err < 0) { /* exception in __eq__ */
1134 return F_ERROR;
1135 }
1136 if (comp_err == 1) { /* key == key_or_null */
1137 *val = val_or_node;
1138 return F_FOUND;
1139 }
1140
1141 return F_NOT_FOUND;
1142}
1143
1144static int
1145hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1146{
1147 /* Bitmap's tp_traverse */
1148
1149 Py_ssize_t i;
1150
1151 for (i = Py_SIZE(self); --i >= 0; ) {
1152 Py_VISIT(self->b_array[i]);
1153 }
1154
1155 return 0;
1156}
1157
1158static void
1159hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1160{
1161 /* Bitmap's tp_dealloc */
1162
1163 Py_ssize_t len = Py_SIZE(self);
1164 Py_ssize_t i;
1165
1166 PyObject_GC_UnTrack(self);
1167 Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc)
1168
1169 if (len > 0) {
1170 i = len;
1171 while (--i >= 0) {
1172 Py_XDECREF(self->b_array[i]);
1173 }
1174 }
1175
1176 Py_TYPE(self)->tp_free((PyObject *)self);
1177 Py_TRASHCAN_END
1178}
1179
1180#ifdef Py_DEBUG
1181static int
1182hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1183 _PyUnicodeWriter *writer, int level)
1184{
1185 /* Debug build: __dump__() method implementation for Bitmap nodes. */
1186
1187 Py_ssize_t i;
1188 PyObject *tmp1;
1189 PyObject *tmp2;
1190
1191 if (_hamt_dump_ident(writer, level + 1)) {
1192 goto error;
1193 }
1194
1195 if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1196 Py_SIZE(node), Py_SIZE(node) / 2))
1197 {
1198 goto error;
1199 }
1200
1201 tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1202 if (tmp1 == NULL) {
1203 goto error;
1204 }
1205 tmp2 = _PyLong_Format(tmp1, 2);
1206 Py_DECREF(tmp1);
1207 if (tmp2 == NULL) {
1208 goto error;
1209 }
1210 if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1211 Py_DECREF(tmp2);
1212 goto error;
1213 }
1214 Py_DECREF(tmp2);
1215
1216 for (i = 0; i < Py_SIZE(node); i += 2) {
1217 PyObject *key_or_null = node->b_array[i];
1218 PyObject *val_or_node = node->b_array[i + 1];
1219
1220 if (_hamt_dump_ident(writer, level + 2)) {
1221 goto error;
1222 }
1223
1224 if (key_or_null == NULL) {
1225 if (_hamt_dump_format(writer, "NULL:\n")) {
1226 goto error;
1227 }
1228
1229 if (hamt_node_dump((PyHamtNode *)val_or_node,
1230 writer, level + 2))
1231 {
1232 goto error;
1233 }
1234 }
1235 else {
1236 if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1237 val_or_node))
1238 {
1239 goto error;
1240 }
1241 }
1242
1243 if (_hamt_dump_format(writer, "\n")) {
1244 goto error;
1245 }
1246 }
1247
1248 return 0;
1249error:
1250 return -1;
1251}
1252#endif /* Py_DEBUG */
1253
1254
1255/////////////////////////////////// Collision Node
1256
1257
1258static PyHamtNode *
1259hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1260{
1261 /* Create a new Collision node. */
1262
1263 PyHamtNode_Collision *node;
1264 Py_ssize_t i;
1265
1266 assert(size >= 4);
1267 assert(size % 2 == 0);
1268
1269 node = PyObject_GC_NewVar(
1270 PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1271 if (node == NULL) {
1272 return NULL;
1273 }
1274
1275 for (i = 0; i < size; i++) {
1276 node->c_array[i] = NULL;
1277 }
1278
1279 Py_SET_SIZE(node, size);
1280 node->c_hash = hash;
1281
1282 _PyObject_GC_TRACK(node);
1283
1284 return (PyHamtNode *)node;
1285}
1286
1287static hamt_find_t
1288hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1289 Py_ssize_t *idx)
1290{
1291 /* Lookup `key` in the Collision node `self`. Set the index of the
1292 found key to 'idx'. */
1293
1294 Py_ssize_t i;
1295 PyObject *el;
1296
1297 for (i = 0; i < Py_SIZE(self); i += 2) {
1298 el = self->c_array[i];
1299
1300 assert(el != NULL);
1301 int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1302 if (cmp < 0) {
1303 return F_ERROR;
1304 }
1305 if (cmp == 1) {
1306 *idx = i;
1307 return F_FOUND;
1308 }
1309 }
1310
1311 return F_NOT_FOUND;
1312}
1313
1314static PyHamtNode *
1315hamt_node_collision_assoc(PyHamtNode_Collision *self,
1316 uint32_t shift, int32_t hash,
1317 PyObject *key, PyObject *val, int* added_leaf)
1318{
1319 /* Set a new key to this level (currently a Collision node)
1320 of the tree. */
1321
1322 if (hash == self->c_hash) {
1323 /* The hash of the 'key' we are adding matches the hash of
1324 other keys in this Collision node. */
1325
1326 Py_ssize_t key_idx = -1;
1327 hamt_find_t found;
1328 PyHamtNode_Collision *new_node;
1329 Py_ssize_t i;
1330
1331 /* Let's try to lookup the new 'key', maybe we already have it. */
1332 found = hamt_node_collision_find_index(self, key, &key_idx);
1333 switch (found) {
1334 case F_ERROR:
1335 /* Exception. */
1336 return NULL;
1337
1338 case F_NOT_FOUND:
1339 /* This is a totally new key. Clone the current node,
1340 add a new key/value to the cloned node. */
1341
1342 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1343 self->c_hash, Py_SIZE(self) + 2);
1344 if (new_node == NULL) {
1345 return NULL;
1346 }
1347
1348 for (i = 0; i < Py_SIZE(self); i++) {
1349 Py_INCREF(self->c_array[i]);
1350 new_node->c_array[i] = self->c_array[i];
1351 }
1352
1353 Py_INCREF(key);
1354 new_node->c_array[i] = key;
1355 Py_INCREF(val);
1356 new_node->c_array[i + 1] = val;
1357
1358 *added_leaf = 1;
1359 return (PyHamtNode *)new_node;
1360
1361 case F_FOUND:
1362 /* There's a key which is equal to the key we are adding. */
1363
1364 assert(key_idx >= 0);
1365 assert(key_idx < Py_SIZE(self));
1366 Py_ssize_t val_idx = key_idx + 1;
1367
1368 if (self->c_array[val_idx] == val) {
1369 /* We're setting a key/value pair that's already set. */
1370 Py_INCREF(self);
1371 return (PyHamtNode *)self;
1372 }
1373
1374 /* We need to replace old value for the key
1375 with a new value. Create a new Collision node.*/
1376 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1377 self->c_hash, Py_SIZE(self));
1378 if (new_node == NULL) {
1379 return NULL;
1380 }
1381
1382 /* Copy all elements of the old node to the new one. */
1383 for (i = 0; i < Py_SIZE(self); i++) {
1384 Py_INCREF(self->c_array[i]);
1385 new_node->c_array[i] = self->c_array[i];
1386 }
1387
1388 /* Replace the old value with the new value for the our key. */
1389 Py_DECREF(new_node->c_array[val_idx]);
1390 Py_INCREF(val);
1391 new_node->c_array[val_idx] = val;
1392
1393 return (PyHamtNode *)new_node;
1394
1395 default:
1396 Py_UNREACHABLE();
1397 }
1398 }
1399 else {
1400 /* The hash of the new key is different from the hash that
1401 all keys of this Collision node have.
1402
1403 Create a Bitmap node inplace with two children:
1404 key/value pair that we're adding, and the Collision node
1405 we're replacing on this tree level.
1406 */
1407
1408 PyHamtNode_Bitmap *new_node;
1409 PyHamtNode *assoc_res;
1410
1411 new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1412 if (new_node == NULL) {
1413 return NULL;
1414 }
1415 new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1416 Py_INCREF(self);
1417 new_node->b_array[1] = (PyObject*) self;
1418
1419 assoc_res = hamt_node_bitmap_assoc(
1420 new_node, shift, hash, key, val, added_leaf);
1421 Py_DECREF(new_node);
1422 return assoc_res;
1423 }
1424}
1425
1426static inline Py_ssize_t
1427hamt_node_collision_count(PyHamtNode_Collision *node)
1428{
1429 return Py_SIZE(node) / 2;
1430}
1431
1432static hamt_without_t
1433hamt_node_collision_without(PyHamtNode_Collision *self,
1434 uint32_t shift, int32_t hash,
1435 PyObject *key,
1436 PyHamtNode **new_node)
1437{
1438 if (hash != self->c_hash) {
1439 return W_NOT_FOUND;
1440 }
1441
1442 Py_ssize_t key_idx = -1;
1443 hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1444
1445 switch (found) {
1446 case F_ERROR:
1447 return W_ERROR;
1448
1449 case F_NOT_FOUND:
1450 return W_NOT_FOUND;
1451
1452 case F_FOUND:
1453 assert(key_idx >= 0);
1454 assert(key_idx < Py_SIZE(self));
1455
1456 Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1457
1458 if (new_count == 0) {
1459 /* The node has only one key/value pair and it's for the
1460 key we're trying to delete. So a new node will be empty
1461 after the removal.
1462 */
1463 return W_EMPTY;
1464 }
1465
1466 if (new_count == 1) {
1467 /* The node has two keys, and after deletion the
1468 new Collision node would have one. Collision nodes
1469 with one key shouldn't exist, so convert it to a
1470 Bitmap node.
1471 */
1472 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1473 hamt_node_bitmap_new(2);
1474 if (node == NULL) {
1475 return W_ERROR;
1476 }
1477
1478 if (key_idx == 0) {
1479 Py_INCREF(self->c_array[2]);
1480 node->b_array[0] = self->c_array[2];
1481 Py_INCREF(self->c_array[3]);
1482 node->b_array[1] = self->c_array[3];
1483 }
1484 else {
1485 assert(key_idx == 2);
1486 Py_INCREF(self->c_array[0]);
1487 node->b_array[0] = self->c_array[0];
1488 Py_INCREF(self->c_array[1]);
1489 node->b_array[1] = self->c_array[1];
1490 }
1491
1492 node->b_bitmap = hamt_bitpos(hash, shift);
1493
1494 *new_node = (PyHamtNode *)node;
1495 return W_NEWNODE;
1496 }
1497
1498 /* Allocate a new Collision node with capacity for one
1499 less key/value pair */
1500 PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1501 hamt_node_collision_new(
1502 self->c_hash, Py_SIZE(self) - 2);
1503 if (new == NULL) {
1504 return W_ERROR;
1505 }
1506
1507 /* Copy all other keys from `self` to `new` */
1508 Py_ssize_t i;
1509 for (i = 0; i < key_idx; i++) {
1510 Py_INCREF(self->c_array[i]);
1511 new->c_array[i] = self->c_array[i];
1512 }
1513 for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1514 Py_INCREF(self->c_array[i]);
1515 new->c_array[i - 2] = self->c_array[i];
1516 }
1517
1518 *new_node = (PyHamtNode*)new;
1519 return W_NEWNODE;
1520
1521 default:
1522 Py_UNREACHABLE();
1523 }
1524}
1525
1526static hamt_find_t
1527hamt_node_collision_find(PyHamtNode_Collision *self,
1528 uint32_t shift, int32_t hash,
1529 PyObject *key, PyObject **val)
1530{
1531 /* Lookup `key` in the Collision node `self`. Set the value
1532 for the found key to 'val'. */
1533
1534 Py_ssize_t idx = -1;
1535 hamt_find_t res;
1536
1537 res = hamt_node_collision_find_index(self, key, &idx);
1538 if (res == F_ERROR || res == F_NOT_FOUND) {
1539 return res;
1540 }
1541
1542 assert(idx >= 0);
1543 assert(idx + 1 < Py_SIZE(self));
1544
1545 *val = self->c_array[idx + 1];
1546 assert(*val != NULL);
1547
1548 return F_FOUND;
1549}
1550
1551
1552static int
1553hamt_node_collision_traverse(PyHamtNode_Collision *self,
1554 visitproc visit, void *arg)
1555{
1556 /* Collision's tp_traverse */
1557
1558 Py_ssize_t i;
1559
1560 for (i = Py_SIZE(self); --i >= 0; ) {
1561 Py_VISIT(self->c_array[i]);
1562 }
1563
1564 return 0;
1565}
1566
1567static void
1568hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1569{
1570 /* Collision's tp_dealloc */
1571
1572 Py_ssize_t len = Py_SIZE(self);
1573
1574 PyObject_GC_UnTrack(self);
1575 Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc)
1576
1577 if (len > 0) {
1578
1579 while (--len >= 0) {
1580 Py_XDECREF(self->c_array[len]);
1581 }
1582 }
1583
1584 Py_TYPE(self)->tp_free((PyObject *)self);
1585 Py_TRASHCAN_END
1586}
1587
1588#ifdef Py_DEBUG
1589static int
1590hamt_node_collision_dump(PyHamtNode_Collision *node,
1591 _PyUnicodeWriter *writer, int level)
1592{
1593 /* Debug build: __dump__() method implementation for Collision nodes. */
1594
1595 Py_ssize_t i;
1596
1597 if (_hamt_dump_ident(writer, level + 1)) {
1598 goto error;
1599 }
1600
1601 if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1602 Py_SIZE(node), node))
1603 {
1604 goto error;
1605 }
1606
1607 for (i = 0; i < Py_SIZE(node); i += 2) {
1608 PyObject *key = node->c_array[i];
1609 PyObject *val = node->c_array[i + 1];
1610
1611 if (_hamt_dump_ident(writer, level + 2)) {
1612 goto error;
1613 }
1614
1615 if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1616 goto error;
1617 }
1618 }
1619
1620 return 0;
1621error:
1622 return -1;
1623}
1624#endif /* Py_DEBUG */
1625
1626
1627/////////////////////////////////// Array Node
1628
1629
1630static PyHamtNode *
1631hamt_node_array_new(Py_ssize_t count)
1632{
1633 Py_ssize_t i;
1634
1635 PyHamtNode_Array *node = PyObject_GC_New(
1636 PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1637 if (node == NULL) {
1638 return NULL;
1639 }
1640
1641 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1642 node->a_array[i] = NULL;
1643 }
1644
1645 node->a_count = count;
1646
1647 _PyObject_GC_TRACK(node);
1648 return (PyHamtNode *)node;
1649}
1650
1651static PyHamtNode_Array *
1652hamt_node_array_clone(PyHamtNode_Array *node)
1653{
1654 PyHamtNode_Array *clone;
1655 Py_ssize_t i;
1656
1657 VALIDATE_ARRAY_NODE(node)
1658
1659 /* Create a new Array node. */
1660 clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1661 if (clone == NULL) {
1662 return NULL;
1663 }
1664
1665 /* Copy all elements from the current Array node to the new one. */
1666 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1667 Py_XINCREF(node->a_array[i]);
1668 clone->a_array[i] = node->a_array[i];
1669 }
1670
1671 VALIDATE_ARRAY_NODE(clone)
1672 return clone;
1673}
1674
1675static PyHamtNode *
1676hamt_node_array_assoc(PyHamtNode_Array *self,
1677 uint32_t shift, int32_t hash,
1678 PyObject *key, PyObject *val, int* added_leaf)
1679{
1680 /* Set a new key to this level (currently a Collision node)
1681 of the tree.
1682
1683 Array nodes don't store values, they can only point to
1684 other nodes. They are simple arrays of 32 BaseNode pointers/
1685 */
1686
1687 uint32_t idx = hamt_mask(hash, shift);
1688 PyHamtNode *node = self->a_array[idx];
1689 PyHamtNode *child_node;
1690 PyHamtNode_Array *new_node;
1691 Py_ssize_t i;
1692
1693 if (node == NULL) {
1694 /* There's no child node for the given hash. Create a new
1695 Bitmap node for this key. */
1696
1697 PyHamtNode_Bitmap *empty = NULL;
1698
1699 /* Get an empty Bitmap node to work with. */
1700 empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1701 if (empty == NULL) {
1702 return NULL;
1703 }
1704
1705 /* Set key/val to the newly created empty Bitmap, thus
1706 creating a new Bitmap node with our key/value pair. */
1707 child_node = hamt_node_bitmap_assoc(
1708 empty,
1709 shift + 5, hash, key, val, added_leaf);
1710 Py_DECREF(empty);
1711 if (child_node == NULL) {
1712 return NULL;
1713 }
1714
1715 /* Create a new Array node. */
1716 new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1717 if (new_node == NULL) {
1718 Py_DECREF(child_node);
1719 return NULL;
1720 }
1721
1722 /* Copy all elements from the current Array node to the
1723 new one. */
1724 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1725 Py_XINCREF(self->a_array[i]);
1726 new_node->a_array[i] = self->a_array[i];
1727 }
1728
1729 assert(new_node->a_array[idx] == NULL);
1730 new_node->a_array[idx] = child_node; /* borrow */
1731 VALIDATE_ARRAY_NODE(new_node)
1732 }
1733 else {
1734 /* There's a child node for the given hash.
1735 Set the key to it./ */
1736 child_node = hamt_node_assoc(
1737 node, shift + 5, hash, key, val, added_leaf);
1738 if (child_node == NULL) {
1739 return NULL;
1740 }
1741 else if (child_node == (PyHamtNode *)self) {
1742 Py_DECREF(child_node);
1743 return (PyHamtNode *)self;
1744 }
1745
1746 new_node = hamt_node_array_clone(self);
1747 if (new_node == NULL) {
1748 Py_DECREF(child_node);
1749 return NULL;
1750 }
1751
1752 Py_SETREF(new_node->a_array[idx], child_node); /* borrow */
1753 VALIDATE_ARRAY_NODE(new_node)
1754 }
1755
1756 return (PyHamtNode *)new_node;
1757}
1758
1759static hamt_without_t
1760hamt_node_array_without(PyHamtNode_Array *self,
1761 uint32_t shift, int32_t hash,
1762 PyObject *key,
1763 PyHamtNode **new_node)
1764{
1765 uint32_t idx = hamt_mask(hash, shift);
1766 PyHamtNode *node = self->a_array[idx];
1767
1768 if (node == NULL) {
1769 return W_NOT_FOUND;
1770 }
1771
1772 PyHamtNode *sub_node = NULL;
1773 hamt_without_t res = hamt_node_without(
1774 (PyHamtNode *)node,
1775 shift + 5, hash, key, &sub_node);
1776
1777 switch (res) {
1778 case W_NOT_FOUND:
1779 case W_ERROR:
1780 assert(sub_node == NULL);
1781 return res;
1782
1783 case W_NEWNODE: {
1784 /* We need to replace a node at the `idx` index.
1785 Clone this node and replace.
1786 */
1787 assert(sub_node != NULL);
1788
1789 PyHamtNode_Array *clone = hamt_node_array_clone(self);
1790 if (clone == NULL) {
1791 Py_DECREF(sub_node);
1792 return W_ERROR;
1793 }
1794
1795 Py_SETREF(clone->a_array[idx], sub_node); /* borrow */
1796 *new_node = (PyHamtNode*)clone; /* borrow */
1797 return W_NEWNODE;
1798 }
1799
1800 case W_EMPTY: {
1801 assert(sub_node == NULL);
1802 /* We need to remove a node at the `idx` index.
1803 Calculate the size of the replacement Array node.
1804 */
1805 Py_ssize_t new_count = self->a_count - 1;
1806
1807 if (new_count == 0) {
1808 return W_EMPTY;
1809 }
1810
1811 if (new_count >= 16) {
1812 /* We convert Bitmap nodes to Array nodes, when a
1813 Bitmap node needs to store more than 15 key/value
1814 pairs. So we will create a new Array node if we
1815 the number of key/values after deletion is still
1816 greater than 15.
1817 */
1818
1819 PyHamtNode_Array *new = hamt_node_array_clone(self);
1820 if (new == NULL) {
1821 return W_ERROR;
1822 }
1823 new->a_count = new_count;
1824 Py_CLEAR(new->a_array[idx]);
1825
1826 *new_node = (PyHamtNode*)new; /* borrow */
1827 return W_NEWNODE;
1828 }
1829
1830 /* New Array node would have less than 16 key/value
1831 pairs. We need to create a replacement Bitmap node. */
1832
1833 Py_ssize_t bitmap_size = new_count * 2;
1834 uint32_t bitmap = 0;
1835
1836 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1837 hamt_node_bitmap_new(bitmap_size);
1838 if (new == NULL) {
1839 return W_ERROR;
1840 }
1841
1842 Py_ssize_t new_i = 0;
1843 for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1844 if (i == idx) {
1845 /* Skip the node we are deleting. */
1846 continue;
1847 }
1848
1849 PyHamtNode *node = self->a_array[i];
1850 if (node == NULL) {
1851 /* Skip any missing nodes. */
1852 continue;
1853 }
1854
1855 bitmap |= 1U << i;
1856
1857 if (IS_BITMAP_NODE(node)) {
1858 PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1859
1860 if (hamt_node_bitmap_count(child) == 1 &&
1861 child->b_array[0] != NULL)
1862 {
1863 /* node is a Bitmap with one key/value pair, just
1864 merge it into the new Bitmap node we're building.
1865
1866 Note that we don't inline Bitmap nodes that
1867 have a NULL key -- those nodes point to another
1868 tree level, and we cannot simply move tree levels
1869 up or down.
1870 */
1871 PyObject *key = child->b_array[0];
1872 PyObject *val = child->b_array[1];
1873
1874 Py_INCREF(key);
1875 new->b_array[new_i] = key;
1876 Py_INCREF(val);
1877 new->b_array[new_i + 1] = val;
1878 }
1879 else {
1880 new->b_array[new_i] = NULL;
1881 Py_INCREF(node);
1882 new->b_array[new_i + 1] = (PyObject*)node;
1883 }
1884 }
1885 else {
1886
1887#ifdef Py_DEBUG
1888 if (IS_COLLISION_NODE(node)) {
1889 Py_ssize_t child_count = hamt_node_collision_count(
1890 (PyHamtNode_Collision*)node);
1891 assert(child_count > 1);
1892 }
1893 else if (IS_ARRAY_NODE(node)) {
1894 assert(((PyHamtNode_Array*)node)->a_count >= 16);
1895 }
1896#endif
1897
1898 /* Just copy the node into our new Bitmap */
1899 new->b_array[new_i] = NULL;
1900 Py_INCREF(node);
1901 new->b_array[new_i + 1] = (PyObject*)node;
1902 }
1903
1904 new_i += 2;
1905 }
1906
1907 new->b_bitmap = bitmap;
1908 *new_node = (PyHamtNode*)new; /* borrow */
1909 return W_NEWNODE;
1910 }
1911
1912 default:
1913 Py_UNREACHABLE();
1914 }
1915}
1916
1917static hamt_find_t
1918hamt_node_array_find(PyHamtNode_Array *self,
1919 uint32_t shift, int32_t hash,
1920 PyObject *key, PyObject **val)
1921{
1922 /* Lookup `key` in the Array node `self`. Set the value
1923 for the found key to 'val'. */
1924
1925 uint32_t idx = hamt_mask(hash, shift);
1926 PyHamtNode *node;
1927
1928 node = self->a_array[idx];
1929 if (node == NULL) {
1930 return F_NOT_FOUND;
1931 }
1932
1933 /* Dispatch to the generic hamt_node_find */
1934 return hamt_node_find(node, shift + 5, hash, key, val);
1935}
1936
1937static int
1938hamt_node_array_traverse(PyHamtNode_Array *self,
1939 visitproc visit, void *arg)
1940{
1941 /* Array's tp_traverse */
1942
1943 Py_ssize_t i;
1944
1945 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1946 Py_VISIT(self->a_array[i]);
1947 }
1948
1949 return 0;
1950}
1951
1952static void
1953hamt_node_array_dealloc(PyHamtNode_Array *self)
1954{
1955 /* Array's tp_dealloc */
1956
1957 Py_ssize_t i;
1958
1959 PyObject_GC_UnTrack(self);
1960 Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc)
1961
1962 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1963 Py_XDECREF(self->a_array[i]);
1964 }
1965
1966 Py_TYPE(self)->tp_free((PyObject *)self);
1967 Py_TRASHCAN_END
1968}
1969
1970#ifdef Py_DEBUG
1971static int
1972hamt_node_array_dump(PyHamtNode_Array *node,
1973 _PyUnicodeWriter *writer, int level)
1974{
1975 /* Debug build: __dump__() method implementation for Array nodes. */
1976
1977 Py_ssize_t i;
1978
1979 if (_hamt_dump_ident(writer, level + 1)) {
1980 goto error;
1981 }
1982
1983 if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1984 goto error;
1985 }
1986
1987 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1988 if (node->a_array[i] == NULL) {
1989 continue;
1990 }
1991
1992 if (_hamt_dump_ident(writer, level + 2)) {
1993 goto error;
1994 }
1995
1996 if (_hamt_dump_format(writer, "%zd::\n", i)) {
1997 goto error;
1998 }
1999
2000 if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2001 goto error;
2002 }
2003
2004 if (_hamt_dump_format(writer, "\n")) {
2005 goto error;
2006 }
2007 }
2008
2009 return 0;
2010error:
2011 return -1;
2012}
2013#endif /* Py_DEBUG */
2014
2015
2016/////////////////////////////////// Node Dispatch
2017
2018
2019static PyHamtNode *
2020hamt_node_assoc(PyHamtNode *node,
2021 uint32_t shift, int32_t hash,
2022 PyObject *key, PyObject *val, int* added_leaf)
2023{
2024 /* Set key/value to the 'node' starting with the given shift/hash.
2025 Return a new node, or the same node if key/value already
2026 set.
2027
2028 added_leaf will be set to 1 if key/value wasn't in the
2029 tree before.
2030
2031 This method automatically dispatches to the suitable
2032 hamt_node_{nodetype}_assoc method.
2033 */
2034
2035 if (IS_BITMAP_NODE(node)) {
2036 return hamt_node_bitmap_assoc(
2037 (PyHamtNode_Bitmap *)node,
2038 shift, hash, key, val, added_leaf);
2039 }
2040 else if (IS_ARRAY_NODE(node)) {
2041 return hamt_node_array_assoc(
2042 (PyHamtNode_Array *)node,
2043 shift, hash, key, val, added_leaf);
2044 }
2045 else {
2046 assert(IS_COLLISION_NODE(node));
2047 return hamt_node_collision_assoc(
2048 (PyHamtNode_Collision *)node,
2049 shift, hash, key, val, added_leaf);
2050 }
2051}
2052
2053static hamt_without_t
2054hamt_node_without(PyHamtNode *node,
2055 uint32_t shift, int32_t hash,
2056 PyObject *key,
2057 PyHamtNode **new_node)
2058{
2059 if (IS_BITMAP_NODE(node)) {
2060 return hamt_node_bitmap_without(
2061 (PyHamtNode_Bitmap *)node,
2062 shift, hash, key,
2063 new_node);
2064 }
2065 else if (IS_ARRAY_NODE(node)) {
2066 return hamt_node_array_without(
2067 (PyHamtNode_Array *)node,
2068 shift, hash, key,
2069 new_node);
2070 }
2071 else {
2072 assert(IS_COLLISION_NODE(node));
2073 return hamt_node_collision_without(
2074 (PyHamtNode_Collision *)node,
2075 shift, hash, key,
2076 new_node);
2077 }
2078}
2079
2080static hamt_find_t
2081hamt_node_find(PyHamtNode *node,
2082 uint32_t shift, int32_t hash,
2083 PyObject *key, PyObject **val)
2084{
2085 /* Find the key in the node starting with the given shift/hash.
2086
2087 If a value is found, the result will be set to F_FOUND, and
2088 *val will point to the found value object.
2089
2090 If a value wasn't found, the result will be set to F_NOT_FOUND.
2091
2092 If an exception occurs during the call, the result will be F_ERROR.
2093
2094 This method automatically dispatches to the suitable
2095 hamt_node_{nodetype}_find method.
2096 */
2097
2098 if (IS_BITMAP_NODE(node)) {
2099 return hamt_node_bitmap_find(
2100 (PyHamtNode_Bitmap *)node,
2101 shift, hash, key, val);
2102
2103 }
2104 else if (IS_ARRAY_NODE(node)) {
2105 return hamt_node_array_find(
2106 (PyHamtNode_Array *)node,
2107 shift, hash, key, val);
2108 }
2109 else {
2110 assert(IS_COLLISION_NODE(node));
2111 return hamt_node_collision_find(
2112 (PyHamtNode_Collision *)node,
2113 shift, hash, key, val);
2114 }
2115}
2116
2117#ifdef Py_DEBUG
2118static int
2119hamt_node_dump(PyHamtNode *node,
2120 _PyUnicodeWriter *writer, int level)
2121{
2122 /* Debug build: __dump__() method implementation for a node.
2123
2124 This method automatically dispatches to the suitable
2125 hamt_node_{nodetype})_dump method.
2126 */
2127
2128 if (IS_BITMAP_NODE(node)) {
2129 return hamt_node_bitmap_dump(
2130 (PyHamtNode_Bitmap *)node, writer, level);
2131 }
2132 else if (IS_ARRAY_NODE(node)) {
2133 return hamt_node_array_dump(
2134 (PyHamtNode_Array *)node, writer, level);
2135 }
2136 else {
2137 assert(IS_COLLISION_NODE(node));
2138 return hamt_node_collision_dump(
2139 (PyHamtNode_Collision *)node, writer, level);
2140 }
2141}
2142#endif /* Py_DEBUG */
2143
2144
2145/////////////////////////////////// Iterators: Machinery
2146
2147
2148static hamt_iter_t
2149hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2150
2151
2152static void
2153hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2154{
2155 for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2156 iter->i_nodes[i] = NULL;
2157 iter->i_pos[i] = 0;
2158 }
2159
2160 iter->i_level = 0;
2161
2162 /* Note: we don't incref/decref nodes in i_nodes. */
2163 iter->i_nodes[0] = root;
2164}
2165
2166static hamt_iter_t
2167hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2168 PyObject **key, PyObject **val)
2169{
2170 int8_t level = iter->i_level;
2171
2172 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2173 Py_ssize_t pos = iter->i_pos[level];
2174
2175 if (pos + 1 >= Py_SIZE(node)) {
2176#ifdef Py_DEBUG
2177 assert(iter->i_level >= 0);
2178 iter->i_nodes[iter->i_level] = NULL;
2179#endif
2180 iter->i_level--;
2181 return hamt_iterator_next(iter, key, val);
2182 }
2183
2184 if (node->b_array[pos] == NULL) {
2185 iter->i_pos[level] = pos + 2;
2186
2187 int8_t next_level = level + 1;
2188 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2189 iter->i_level = next_level;
2190 iter->i_pos[next_level] = 0;
2191 iter->i_nodes[next_level] = (PyHamtNode *)
2192 node->b_array[pos + 1];
2193
2194 return hamt_iterator_next(iter, key, val);
2195 }
2196
2197 *key = node->b_array[pos];
2198 *val = node->b_array[pos + 1];
2199 iter->i_pos[level] = pos + 2;
2200 return I_ITEM;
2201}
2202
2203static hamt_iter_t
2204hamt_iterator_collision_next(PyHamtIteratorState *iter,
2205 PyObject **key, PyObject **val)
2206{
2207 int8_t level = iter->i_level;
2208
2209 PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2210 Py_ssize_t pos = iter->i_pos[level];
2211
2212 if (pos + 1 >= Py_SIZE(node)) {
2213#ifdef Py_DEBUG
2214 assert(iter->i_level >= 0);
2215 iter->i_nodes[iter->i_level] = NULL;
2216#endif
2217 iter->i_level--;
2218 return hamt_iterator_next(iter, key, val);
2219 }
2220
2221 *key = node->c_array[pos];
2222 *val = node->c_array[pos + 1];
2223 iter->i_pos[level] = pos + 2;
2224 return I_ITEM;
2225}
2226
2227static hamt_iter_t
2228hamt_iterator_array_next(PyHamtIteratorState *iter,
2229 PyObject **key, PyObject **val)
2230{
2231 int8_t level = iter->i_level;
2232
2233 PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2234 Py_ssize_t pos = iter->i_pos[level];
2235
2236 if (pos >= HAMT_ARRAY_NODE_SIZE) {
2237#ifdef Py_DEBUG
2238 assert(iter->i_level >= 0);
2239 iter->i_nodes[iter->i_level] = NULL;
2240#endif
2241 iter->i_level--;
2242 return hamt_iterator_next(iter, key, val);
2243 }
2244
2245 for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2246 if (node->a_array[i] != NULL) {
2247 iter->i_pos[level] = i + 1;
2248
2249 int8_t next_level = level + 1;
2250 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2251 iter->i_pos[next_level] = 0;
2252 iter->i_nodes[next_level] = node->a_array[i];
2253 iter->i_level = next_level;
2254
2255 return hamt_iterator_next(iter, key, val);
2256 }
2257 }
2258
2259#ifdef Py_DEBUG
2260 assert(iter->i_level >= 0);
2261 iter->i_nodes[iter->i_level] = NULL;
2262#endif
2263
2264 iter->i_level--;
2265 return hamt_iterator_next(iter, key, val);
2266}
2267
2268static hamt_iter_t
2269hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2270{
2271 if (iter->i_level < 0) {
2272 return I_END;
2273 }
2274
2275 assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2276
2277 PyHamtNode *current = iter->i_nodes[iter->i_level];
2278
2279 if (IS_BITMAP_NODE(current)) {
2280 return hamt_iterator_bitmap_next(iter, key, val);
2281 }
2282 else if (IS_ARRAY_NODE(current)) {
2283 return hamt_iterator_array_next(iter, key, val);
2284 }
2285 else {
2286 assert(IS_COLLISION_NODE(current));
2287 return hamt_iterator_collision_next(iter, key, val);
2288 }
2289}
2290
2291
2292/////////////////////////////////// HAMT high-level functions
2293
2294
2295PyHamtObject *
2296_PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2297{
2298 int32_t key_hash;
2299 int added_leaf = 0;
2300 PyHamtNode *new_root;
2301 PyHamtObject *new_o;
2302
2303 key_hash = hamt_hash(key);
2304 if (key_hash == -1) {
2305 return NULL;
2306 }
2307
2308 new_root = hamt_node_assoc(
2309 (PyHamtNode *)(o->h_root),
2310 0, key_hash, key, val, &added_leaf);
2311 if (new_root == NULL) {
2312 return NULL;
2313 }
2314
2315 if (new_root == o->h_root) {
2316 Py_DECREF(new_root);
2317 Py_INCREF(o);
2318 return o;
2319 }
2320
2321 new_o = hamt_alloc();
2322 if (new_o == NULL) {
2323 Py_DECREF(new_root);
2324 return NULL;
2325 }
2326
2327 new_o->h_root = new_root; /* borrow */
2328 new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2329
2330 return new_o;
2331}
2332
2333PyHamtObject *
2334_PyHamt_Without(PyHamtObject *o, PyObject *key)
2335{
2336 int32_t key_hash = hamt_hash(key);
2337 if (key_hash == -1) {
2338 return NULL;
2339 }
2340
2341 PyHamtNode *new_root = NULL;
2342
2343 hamt_without_t res = hamt_node_without(
2344 (PyHamtNode *)(o->h_root),
2345 0, key_hash, key,
2346 &new_root);
2347
2348 switch (res) {
2349 case W_ERROR:
2350 return NULL;
2351 case W_EMPTY:
2352 return _PyHamt_New();
2353 case W_NOT_FOUND:
2354 Py_INCREF(o);
2355 return o;
2356 case W_NEWNODE: {
2357 assert(new_root != NULL);
2358
2359 PyHamtObject *new_o = hamt_alloc();
2360 if (new_o == NULL) {
2361 Py_DECREF(new_root);
2362 return NULL;
2363 }
2364
2365 new_o->h_root = new_root; /* borrow */
2366 new_o->h_count = o->h_count - 1;
2367 assert(new_o->h_count >= 0);
2368 return new_o;
2369 }
2370 default:
2371 Py_UNREACHABLE();
2372 }
2373}
2374
2375static hamt_find_t
2376hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2377{
2378 if (o->h_count == 0) {
2379 return F_NOT_FOUND;
2380 }
2381
2382 int32_t key_hash = hamt_hash(key);
2383 if (key_hash == -1) {
2384 return F_ERROR;
2385 }
2386
2387 return hamt_node_find(o->h_root, 0, key_hash, key, val);
2388}
2389
2390
2391int
2392_PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2393{
2394 hamt_find_t res = hamt_find(o, key, val);
2395 switch (res) {
2396 case F_ERROR:
2397 return -1;
2398 case F_NOT_FOUND:
2399 return 0;
2400 case F_FOUND:
2401 return 1;
2402 default:
2403 Py_UNREACHABLE();
2404 }
2405}
2406
2407
2408int
2409_PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2410{
2411 if (v == w) {
2412 return 1;
2413 }
2414
2415 if (v->h_count != w->h_count) {
2416 return 0;
2417 }
2418
2419 PyHamtIteratorState iter;
2420 hamt_iter_t iter_res;
2421 hamt_find_t find_res;
2422 PyObject *v_key;
2423 PyObject *v_val;
2424 PyObject *w_val;
2425
2426 hamt_iterator_init(&iter, v->h_root);
2427
2428 do {
2429 iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2430 if (iter_res == I_ITEM) {
2431 find_res = hamt_find(w, v_key, &w_val);
2432 switch (find_res) {
2433 case F_ERROR:
2434 return -1;
2435
2436 case F_NOT_FOUND:
2437 return 0;
2438
2439 case F_FOUND: {
2440 int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2441 if (cmp < 0) {
2442 return -1;
2443 }
2444 if (cmp == 0) {
2445 return 0;
2446 }
2447 }
2448 }
2449 }
2450 } while (iter_res != I_END);
2451
2452 return 1;
2453}
2454
2455Py_ssize_t
2456_PyHamt_Len(PyHamtObject *o)
2457{
2458 return o->h_count;
2459}
2460
2461static PyHamtObject *
2462hamt_alloc(void)
2463{
2464 PyHamtObject *o;
2465 o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2466 if (o == NULL) {
2467 return NULL;
2468 }
2469 o->h_count = 0;
2470 o->h_root = NULL;
2471 o->h_weakreflist = NULL;
2472 PyObject_GC_Track(o);
2473 return o;
2474}
2475
2476PyHamtObject *
2477_PyHamt_New(void)
2478{
2479 if (_empty_hamt != NULL) {
2480 /* HAMT is an immutable object so we can easily cache an
2481 empty instance. */
2482 Py_INCREF(_empty_hamt);
2483 return _empty_hamt;
2484 }
2485
2486 PyHamtObject *o = hamt_alloc();
2487 if (o == NULL) {
2488 return NULL;
2489 }
2490
2491 o->h_root = hamt_node_bitmap_new(0);
2492 if (o->h_root == NULL) {
2493 Py_DECREF(o);
2494 return NULL;
2495 }
2496
2497 o->h_count = 0;
2498
2499 if (_empty_hamt == NULL) {
2500 Py_INCREF(o);
2501 _empty_hamt = o;
2502 }
2503
2504 return o;
2505}
2506
2507#ifdef Py_DEBUG
2508static PyObject *
2509hamt_dump(PyHamtObject *self)
2510{
2511 _PyUnicodeWriter writer;
2512
2513 _PyUnicodeWriter_Init(&writer);
2514
2515 if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2516 goto error;
2517 }
2518
2519 if (hamt_node_dump(self->h_root, &writer, 0)) {
2520 goto error;
2521 }
2522
2523 return _PyUnicodeWriter_Finish(&writer);
2524
2525error:
2526 _PyUnicodeWriter_Dealloc(&writer);
2527 return NULL;
2528}
2529#endif /* Py_DEBUG */
2530
2531
2532/////////////////////////////////// Iterators: Shared Iterator Implementation
2533
2534
2535static int
2536hamt_baseiter_tp_clear(PyHamtIterator *it)
2537{
2538 Py_CLEAR(it->hi_obj);
2539 return 0;
2540}
2541
2542static void
2543hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2544{
2545 PyObject_GC_UnTrack(it);
2546 (void)hamt_baseiter_tp_clear(it);
2547 PyObject_GC_Del(it);
2548}
2549
2550static int
2551hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2552{
2553 Py_VISIT(it->hi_obj);
2554 return 0;
2555}
2556
2557static PyObject *
2558hamt_baseiter_tp_iternext(PyHamtIterator *it)
2559{
2560 PyObject *key;
2561 PyObject *val;
2562 hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2563
2564 switch (res) {
2565 case I_END:
2566 PyErr_SetNone(PyExc_StopIteration);
2567 return NULL;
2568
2569 case I_ITEM: {
2570 return (*(it->hi_yield))(key, val);
2571 }
2572
2573 default: {
2574 Py_UNREACHABLE();
2575 }
2576 }
2577}
2578
2579static Py_ssize_t
2580hamt_baseiter_tp_len(PyHamtIterator *it)
2581{
2582 return it->hi_obj->h_count;
2583}
2584
2585static PyMappingMethods PyHamtIterator_as_mapping = {
2586 (lenfunc)hamt_baseiter_tp_len,
2587};
2588
2589static PyObject *
2590hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2591{
2592 PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2593 if (it == NULL) {
2594 return NULL;
2595 }
2596
2597 Py_INCREF(o);
2598 it->hi_obj = o;
2599 it->hi_yield = yield;
2600
2601 hamt_iterator_init(&it->hi_iter, o->h_root);
2602
2603 return (PyObject*)it;
2604}
2605
2606#define ITERATOR_TYPE_SHARED_SLOTS \
2607 .tp_basicsize = sizeof(PyHamtIterator), \
2608 .tp_itemsize = 0, \
2609 .tp_as_mapping = &PyHamtIterator_as_mapping, \
2610 .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc, \
2611 .tp_getattro = PyObject_GenericGetAttr, \
2612 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, \
2613 .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse, \
2614 .tp_clear = (inquiry)hamt_baseiter_tp_clear, \
2615 .tp_iter = PyObject_SelfIter, \
2616 .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2617
2618
2619/////////////////////////////////// _PyHamtItems_Type
2620
2621
2622PyTypeObject _PyHamtItems_Type = {
2623 PyVarObject_HEAD_INIT(NULL, 0)
2624 "items",
2625 ITERATOR_TYPE_SHARED_SLOTS
2626};
2627
2628static PyObject *
2629hamt_iter_yield_items(PyObject *key, PyObject *val)
2630{
2631 return PyTuple_Pack(2, key, val);
2632}
2633
2634PyObject *
2635_PyHamt_NewIterItems(PyHamtObject *o)
2636{
2637 return hamt_baseiter_new(
2638 &_PyHamtItems_Type, hamt_iter_yield_items, o);
2639}
2640
2641
2642/////////////////////////////////// _PyHamtKeys_Type
2643
2644
2645PyTypeObject _PyHamtKeys_Type = {
2646 PyVarObject_HEAD_INIT(NULL, 0)
2647 "keys",
2648 ITERATOR_TYPE_SHARED_SLOTS
2649};
2650
2651static PyObject *
2652hamt_iter_yield_keys(PyObject *key, PyObject *val)
2653{
2654 Py_INCREF(key);
2655 return key;
2656}
2657
2658PyObject *
2659_PyHamt_NewIterKeys(PyHamtObject *o)
2660{
2661 return hamt_baseiter_new(
2662 &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2663}
2664
2665
2666/////////////////////////////////// _PyHamtValues_Type
2667
2668
2669PyTypeObject _PyHamtValues_Type = {
2670 PyVarObject_HEAD_INIT(NULL, 0)
2671 "values",
2672 ITERATOR_TYPE_SHARED_SLOTS
2673};
2674
2675static PyObject *
2676hamt_iter_yield_values(PyObject *key, PyObject *val)
2677{
2678 Py_INCREF(val);
2679 return val;
2680}
2681
2682PyObject *
2683_PyHamt_NewIterValues(PyHamtObject *o)
2684{
2685 return hamt_baseiter_new(
2686 &_PyHamtValues_Type, hamt_iter_yield_values, o);
2687}
2688
2689
2690/////////////////////////////////// _PyHamt_Type
2691
2692
2693#ifdef Py_DEBUG
2694static PyObject *
2695hamt_dump(PyHamtObject *self);
2696#endif
2697
2698
2699static PyObject *
2700hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2701{
2702 return (PyObject*)_PyHamt_New();
2703}
2704
2705static int
2706hamt_tp_clear(PyHamtObject *self)
2707{
2708 Py_CLEAR(self->h_root);
2709 return 0;
2710}
2711
2712
2713static int
2714hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2715{
2716 Py_VISIT(self->h_root);
2717 return 0;
2718}
2719
2720static void
2721hamt_tp_dealloc(PyHamtObject *self)
2722{
2723 PyObject_GC_UnTrack(self);
2724 if (self->h_weakreflist != NULL) {
2725 PyObject_ClearWeakRefs((PyObject*)self);
2726 }
2727 (void)hamt_tp_clear(self);
2728 Py_TYPE(self)->tp_free(self);
2729}
2730
2731
2732static PyObject *
2733hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2734{
2735 if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2736 Py_RETURN_NOTIMPLEMENTED;
2737 }
2738
2739 int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2740 if (res < 0) {
2741 return NULL;
2742 }
2743
2744 if (op == Py_NE) {
2745 res = !res;
2746 }
2747
2748 if (res) {
2749 Py_RETURN_TRUE;
2750 }
2751 else {
2752 Py_RETURN_FALSE;
2753 }
2754}
2755
2756static int
2757hamt_tp_contains(PyHamtObject *self, PyObject *key)
2758{
2759 PyObject *val;
2760 return _PyHamt_Find(self, key, &val);
2761}
2762
2763static PyObject *
2764hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2765{
2766 PyObject *val;
2767 hamt_find_t res = hamt_find(self, key, &val);
2768 switch (res) {
2769 case F_ERROR:
2770 return NULL;
2771 case F_FOUND:
2772 Py_INCREF(val);
2773 return val;
2774 case F_NOT_FOUND:
2775 PyErr_SetObject(PyExc_KeyError, key);
2776 return NULL;
2777 default:
2778 Py_UNREACHABLE();
2779 }
2780}
2781
2782static Py_ssize_t
2783hamt_tp_len(PyHamtObject *self)
2784{
2785 return _PyHamt_Len(self);
2786}
2787
2788static PyObject *
2789hamt_tp_iter(PyHamtObject *self)
2790{
2791 return _PyHamt_NewIterKeys(self);
2792}
2793
2794static PyObject *
2795hamt_py_set(PyHamtObject *self, PyObject *args)
2796{
2797 PyObject *key;
2798 PyObject *val;
2799
2800 if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2801 return NULL;
2802 }
2803
2804 return (PyObject *)_PyHamt_Assoc(self, key, val);
2805}
2806
2807static PyObject *
2808hamt_py_get(PyHamtObject *self, PyObject *args)
2809{
2810 PyObject *key;
2811 PyObject *def = NULL;
2812
2813 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2814 return NULL;
2815 }
2816
2817 PyObject *val = NULL;
2818 hamt_find_t res = hamt_find(self, key, &val);
2819 switch (res) {
2820 case F_ERROR:
2821 return NULL;
2822 case F_FOUND:
2823 Py_INCREF(val);
2824 return val;
2825 case F_NOT_FOUND:
2826 if (def == NULL) {
2827 Py_RETURN_NONE;
2828 }
2829 Py_INCREF(def);
2830 return def;
2831 default:
2832 Py_UNREACHABLE();
2833 }
2834}
2835
2836static PyObject *
2837hamt_py_delete(PyHamtObject *self, PyObject *key)
2838{
2839 return (PyObject *)_PyHamt_Without(self, key);
2840}
2841
2842static PyObject *
2843hamt_py_items(PyHamtObject *self, PyObject *args)
2844{
2845 return _PyHamt_NewIterItems(self);
2846}
2847
2848static PyObject *
2849hamt_py_values(PyHamtObject *self, PyObject *args)
2850{
2851 return _PyHamt_NewIterValues(self);
2852}
2853
2854static PyObject *
2855hamt_py_keys(PyHamtObject *self, PyObject *args)
2856{
2857 return _PyHamt_NewIterKeys(self);
2858}
2859
2860#ifdef Py_DEBUG
2861static PyObject *
2862hamt_py_dump(PyHamtObject *self, PyObject *args)
2863{
2864 return hamt_dump(self);
2865}
2866#endif
2867
2868
2869static PyMethodDef PyHamt_methods[] = {
2870 {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2871 {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2872 {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2873 {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2874 {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2875 {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2876#ifdef Py_DEBUG
2877 {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2878#endif
2879 {NULL, NULL}
2880};
2881
2882static PySequenceMethods PyHamt_as_sequence = {
2883 0, /* sq_length */
2884 0, /* sq_concat */
2885 0, /* sq_repeat */
2886 0, /* sq_item */
2887 0, /* sq_slice */
2888 0, /* sq_ass_item */
2889 0, /* sq_ass_slice */
2890 (objobjproc)hamt_tp_contains, /* sq_contains */
2891 0, /* sq_inplace_concat */
2892 0, /* sq_inplace_repeat */
2893};
2894
2895static PyMappingMethods PyHamt_as_mapping = {
2896 (lenfunc)hamt_tp_len, /* mp_length */
2897 (binaryfunc)hamt_tp_subscript, /* mp_subscript */
2898};
2899
2900PyTypeObject _PyHamt_Type = {
2901 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2902 "hamt",
2903 sizeof(PyHamtObject),
2904 .tp_methods = PyHamt_methods,
2905 .tp_as_mapping = &PyHamt_as_mapping,
2906 .tp_as_sequence = &PyHamt_as_sequence,
2907 .tp_iter = (getiterfunc)hamt_tp_iter,
2908 .tp_dealloc = (destructor)hamt_tp_dealloc,
2909 .tp_getattro = PyObject_GenericGetAttr,
2910 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2911 .tp_richcompare = hamt_tp_richcompare,
2912 .tp_traverse = (traverseproc)hamt_tp_traverse,
2913 .tp_clear = (inquiry)hamt_tp_clear,
2914 .tp_new = hamt_tp_new,
2915 .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2916 .tp_hash = PyObject_HashNotImplemented,
2917};
2918
2919
2920/////////////////////////////////// Tree Node Types
2921
2922
2923PyTypeObject _PyHamt_ArrayNode_Type = {
2924 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2925 "hamt_array_node",
2926 sizeof(PyHamtNode_Array),
2927 0,
2928 .tp_dealloc = (destructor)hamt_node_array_dealloc,
2929 .tp_getattro = PyObject_GenericGetAttr,
2930 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2931 .tp_traverse = (traverseproc)hamt_node_array_traverse,
2932 .tp_free = PyObject_GC_Del,
2933 .tp_hash = PyObject_HashNotImplemented,
2934};
2935
2936PyTypeObject _PyHamt_BitmapNode_Type = {
2937 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2938 "hamt_bitmap_node",
2939 sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2940 sizeof(PyObject *),
2941 .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2942 .tp_getattro = PyObject_GenericGetAttr,
2943 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2944 .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2945 .tp_free = PyObject_GC_Del,
2946 .tp_hash = PyObject_HashNotImplemented,
2947};
2948
2949PyTypeObject _PyHamt_CollisionNode_Type = {
2950 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2951 "hamt_collision_node",
2952 sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2953 sizeof(PyObject *),
2954 .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2955 .tp_getattro = PyObject_GenericGetAttr,
2956 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2957 .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2958 .tp_free = PyObject_GC_Del,
2959 .tp_hash = PyObject_HashNotImplemented,
2960};
2961
2962
2963int
2964_PyHamt_Init(void)
2965{
2966 if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2967 (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2968 (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2969 (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2970 (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2971 (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2972 (PyType_Ready(&_PyHamtItems_Type) < 0))
2973 {
2974 return 0;
2975 }
2976
2977 return 1;
2978}
2979
2980void
2981_PyHamt_Fini(void)
2982{
2983 Py_CLEAR(_empty_hamt);
2984 Py_CLEAR(_empty_bitmap_node);
2985}
2986