1/* Rax -- A radix tree implementation.
2 *
3 * Version 1.2 -- 7 February 2019
4 *
5 * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Redis nor the names of its contributors may be used
17 * to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <stdlib.h>
34#include <string.h>
35#include <assert.h>
36#include <stdio.h>
37#include <errno.h>
38#include <math.h>
39#include "rax.h"
40
41#ifndef RAX_MALLOC_INCLUDE
42#define RAX_MALLOC_INCLUDE "rax_malloc.h"
43#endif
44
45#include RAX_MALLOC_INCLUDE
46
47/* This is a special pointer that is guaranteed to never have the same value
48 * of a radix tree node. It's used in order to report "not found" error without
49 * requiring the function to have multiple return values. */
50void *raxNotFound = (void*)"rax-not-found-pointer";
51
52/* -------------------------------- Debugging ------------------------------ */
53
54void raxDebugShowNode(const char *msg, raxNode *n);
55
56/* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.
57 * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot
58 * of debugging info to the standard output, however you can still turn
59 * debugging on/off in order to enable it only when you suspect there is an
60 * operation causing a bug using the function raxSetDebugMsg(). */
61#ifdef RAX_DEBUG_MSG
62#define debugf(...) \
63 if (raxDebugMsg) { \
64 printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
65 printf(__VA_ARGS__); \
66 fflush(stdout); \
67 }
68
69#define debugnode(msg,n) raxDebugShowNode(msg,n)
70#else
71#define debugf(...)
72#define debugnode(msg,n)
73#endif
74
75/* By default log debug info if RAX_DEBUG_MSG is defined. */
76static int raxDebugMsg = 1;
77
78/* When debug messages are enabled, turn them on/off dynamically. By
79 * default they are enabled. Set the state to 0 to disable, and 1 to
80 * re-enable. */
81void raxSetDebugMsg(int onoff) {
82 raxDebugMsg = onoff;
83}
84
85/* ------------------------- raxStack functions --------------------------
86 * The raxStack is a simple stack of pointers that is capable of switching
87 * from using a stack-allocated array to dynamic heap once a given number of
88 * items are reached. It is used in order to retain the list of parent nodes
89 * while walking the radix tree in order to implement certain operations that
90 * need to navigate the tree upward.
91 * ------------------------------------------------------------------------- */
92
93/* Initialize the stack. */
94static inline void raxStackInit(raxStack *ts) {
95 ts->stack = ts->static_items;
96 ts->items = 0;
97 ts->maxitems = RAX_STACK_STATIC_ITEMS;
98 ts->oom = 0;
99}
100
101/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
102static inline int raxStackPush(raxStack *ts, void *ptr) {
103 if (ts->items == ts->maxitems) {
104 if (ts->stack == ts->static_items) {
105 ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
106 if (ts->stack == NULL) {
107 ts->stack = ts->static_items;
108 ts->oom = 1;
109 errno = ENOMEM;
110 return 0;
111 }
112 memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
113 } else {
114 void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
115 if (newalloc == NULL) {
116 ts->oom = 1;
117 errno = ENOMEM;
118 return 0;
119 }
120 ts->stack = newalloc;
121 }
122 ts->maxitems *= 2;
123 }
124 ts->stack[ts->items] = ptr;
125 ts->items++;
126 return 1;
127}
128
129/* Pop an item from the stack, the function returns NULL if there are no
130 * items to pop. */
131static inline void *raxStackPop(raxStack *ts) {
132 if (ts->items == 0) return NULL;
133 ts->items--;
134 return ts->stack[ts->items];
135}
136
137/* Return the stack item at the top of the stack without actually consuming
138 * it. */
139static inline void *raxStackPeek(raxStack *ts) {
140 if (ts->items == 0) return NULL;
141 return ts->stack[ts->items-1];
142}
143
144/* Free the stack in case we used heap allocation. */
145static inline void raxStackFree(raxStack *ts) {
146 if (ts->stack != ts->static_items) rax_free(ts->stack);
147}
148
149/* ----------------------------------------------------------------------------
150 * Radix tree implementation
151 * --------------------------------------------------------------------------*/
152
153/* Return the padding needed in the characters section of a node having size
154 * 'nodesize'. The padding is needed to store the child pointers to aligned
155 * addresses. Note that we add 4 to the node size because the node has a four
156 * bytes header. */
157#define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1))
158
159/* Return the pointer to the last child pointer in a node. For the compressed
160 * nodes this is the only child pointer. */
161#define raxNodeLastChildPtr(n) ((raxNode**) ( \
162 ((char*)(n)) + \
163 raxNodeCurrentLength(n) - \
164 sizeof(raxNode*) - \
165 (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
166))
167
168/* Return the pointer to the first child pointer. */
169#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
170 (n)->data + \
171 (n)->size + \
172 raxPadding((n)->size)))
173
174/* Return the current total size of the node. Note that the second line
175 * computes the padding after the string of characters, needed in order to
176 * save pointers to aligned addresses. */
177#define raxNodeCurrentLength(n) ( \
178 sizeof(raxNode)+(n)->size+ \
179 raxPadding((n)->size)+ \
180 ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
181 (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
182)
183
184/* Allocate a new non compressed node with the specified number of children.
185 * If datafield is true, the allocation is made large enough to hold the
186 * associated data pointer.
187 * Returns the new node pointer. On out of memory NULL is returned. */
188raxNode *raxNewNode(size_t children, int datafield) {
189 size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
190 sizeof(raxNode*)*children;
191 if (datafield) nodesize += sizeof(void*);
192 raxNode *node = rax_malloc(nodesize);
193 if (node == NULL) return NULL;
194 node->iskey = 0;
195 node->isnull = 0;
196 node->iscompr = 0;
197 node->size = children;
198 return node;
199}
200
201/* Allocate a new rax and return its pointer. On out of memory the function
202 * returns NULL. */
203rax *raxNew(void) {
204 rax *rax = rax_malloc(sizeof(*rax));
205 if (rax == NULL) return NULL;
206 rax->numele = 0;
207 rax->numnodes = 1;
208 rax->head = raxNewNode(0,0);
209 if (rax->head == NULL) {
210 rax_free(rax);
211 return NULL;
212 } else {
213 return rax;
214 }
215}
216
217/* realloc the node to make room for auxiliary data in order
218 * to store an item in that node. On out of memory NULL is returned. */
219raxNode *raxReallocForData(raxNode *n, void *data) {
220 if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
221 size_t curlen = raxNodeCurrentLength(n);
222 return rax_realloc(n,curlen+sizeof(void*));
223}
224
225/* Set the node auxiliary data to the specified pointer. */
226void raxSetData(raxNode *n, void *data) {
227 n->iskey = 1;
228 if (data != NULL) {
229 n->isnull = 0;
230 void **ndata = (void**)
231 ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
232 memcpy(ndata,&data,sizeof(data));
233 } else {
234 n->isnull = 1;
235 }
236}
237
238/* Get the node auxiliary data. */
239void *raxGetData(raxNode *n) {
240 if (n->isnull) return NULL;
241 void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
242 void *data;
243 memcpy(&data,ndata,sizeof(data));
244 return data;
245}
246
247/* Add a new child to the node 'n' representing the character 'c' and return
248 * its new pointer, as well as the child pointer by reference. Additionally
249 * '***parentlink' is populated with the raxNode pointer-to-pointer of where
250 * the new child was stored, which is useful for the caller to replace the
251 * child pointer if it gets reallocated.
252 *
253 * On success the new parent node pointer is returned (it may change because
254 * of the realloc, so the caller should discard 'n' and use the new value).
255 * On out of memory NULL is returned, and the old node is still valid. */
256raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
257 assert(n->iscompr == 0);
258
259 size_t curlen = raxNodeCurrentLength(n);
260 n->size++;
261 size_t newlen = raxNodeCurrentLength(n);
262 n->size--; /* For now restore the original size. We'll update it only on
263 success at the end. */
264
265 /* Alloc the new child we will link to 'n'. */
266 raxNode *child = raxNewNode(0,0);
267 if (child == NULL) return NULL;
268
269 /* Make space in the original node. */
270 raxNode *newn = rax_realloc(n,newlen);
271 if (newn == NULL) {
272 rax_free(child);
273 return NULL;
274 }
275 n = newn;
276
277 /* After the reallocation, we have up to 8/16 (depending on the system
278 * pointer size, and the required node padding) bytes at the end, that is,
279 * the additional char in the 'data' section, plus one pointer to the new
280 * child, plus the padding needed in order to store addresses into aligned
281 * locations.
282 *
283 * So if we start with the following node, having "abde" edges.
284 *
285 * Note:
286 * - We assume 4 bytes pointer for simplicity.
287 * - Each space below corresponds to one byte
288 *
289 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
290 *
291 * After the reallocation we need: 1 byte for the new edge character
292 * plus 4 bytes for a new child pointer (assuming 32 bit machine).
293 * However after adding 1 byte to the edge char, the header + the edge
294 * characters are no longer aligned, so we also need 3 bytes of padding.
295 * In total the reallocation will add 1+4+3 bytes = 8 bytes:
296 *
297 * (Blank bytes are represented by ".")
298 *
299 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
300 *
301 * Let's find where to insert the new child in order to make sure
302 * it is inserted in-place lexicographically. Assuming we are adding
303 * a child "c" in our case pos will be = 2 after the end of the following
304 * loop. */
305 int pos;
306 for (pos = 0; pos < n->size; pos++) {
307 if (n->data[pos] > c) break;
308 }
309
310 /* Now, if present, move auxiliary data pointer at the end
311 * so that we can mess with the other data without overwriting it.
312 * We will obtain something like that:
313 *
314 * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
315 */
316 unsigned char *src, *dst;
317 if (n->iskey && !n->isnull) {
318 src = ((unsigned char*)n+curlen-sizeof(void*));
319 dst = ((unsigned char*)n+newlen-sizeof(void*));
320 memmove(dst,src,sizeof(void*));
321 }
322
323 /* Compute the "shift", that is, how many bytes we need to move the
324 * pointers section forward because of the addition of the new child
325 * byte in the string section. Note that if we had no padding, that
326 * would be always "1", since we are adding a single byte in the string
327 * section of the node (where now there is "abde" basically).
328 *
329 * However we have padding, so it could be zero, or up to 8.
330 *
331 * Another way to think at the shift is, how many bytes we need to
332 * move child pointers forward *other than* the obvious sizeof(void*)
333 * needed for the additional pointer itself. */
334 size_t shift = newlen - curlen - sizeof(void*);
335
336 /* We said we are adding a node with edge 'c'. The insertion
337 * point is between 'b' and 'd', so the 'pos' variable value is
338 * the index of the first child pointer that we need to move forward
339 * to make space for our new pointer.
340 *
341 * To start, move all the child pointers after the insertion point
342 * of shift+sizeof(pointer) bytes on the right, to obtain:
343 *
344 * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
345 */
346 src = n->data+n->size+
347 raxPadding(n->size)+
348 sizeof(raxNode*)*pos;
349 memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
350
351 /* Move the pointers to the left of the insertion position as well. Often
352 * we don't need to do anything if there was already some padding to use. In
353 * that case the final destination of the pointers will be the same, however
354 * in our example there was no pre-existing padding, so we added one byte
355 * plus there bytes of padding. After the next memmove() things will look
356 * like that:
357 *
358 * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
359 */
360 if (shift) {
361 src = (unsigned char*) raxNodeFirstChildPtr(n);
362 memmove(src+shift,src,sizeof(raxNode*)*pos);
363 }
364
365 /* Now make the space for the additional char in the data section,
366 * but also move the pointers before the insertion point to the right
367 * by shift bytes, in order to obtain the following:
368 *
369 * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
370 */
371 src = n->data+pos;
372 memmove(src+1,src,n->size-pos);
373
374 /* We can now set the character and its child node pointer to get:
375 *
376 * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
377 * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
378 */
379 n->data[pos] = c;
380 n->size++;
381 src = (unsigned char*) raxNodeFirstChildPtr(n);
382 raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos);
383 memcpy(childfield,&child,sizeof(child));
384 *childptr = child;
385 *parentlink = childfield;
386 return n;
387}
388
389/* Turn the node 'n', that must be a node without any children, into a
390 * compressed node representing a set of nodes linked one after the other
391 * and having exactly one child each. The node can be a key or not: this
392 * property and the associated value if any will be preserved.
393 *
394 * The function also returns a child node, since the last node of the
395 * compressed chain cannot be part of the chain: it has zero children while
396 * we can only compress inner nodes with exactly one child each. */
397raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
398 assert(n->size == 0 && n->iscompr == 0);
399 void *data = NULL; /* Initialized only to avoid warnings. */
400 size_t newsize;
401
402 debugf("Compress node: %.*s\n", (int)len,s);
403
404 /* Allocate the child to link to this node. */
405 *child = raxNewNode(0,0);
406 if (*child == NULL) return NULL;
407
408 /* Make space in the parent node. */
409 newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
410 if (n->iskey) {
411 data = raxGetData(n); /* To restore it later. */
412 if (!n->isnull) newsize += sizeof(void*);
413 }
414 raxNode *newn = rax_realloc(n,newsize);
415 if (newn == NULL) {
416 rax_free(*child);
417 return NULL;
418 }
419 n = newn;
420
421 n->iscompr = 1;
422 n->size = len;
423 memcpy(n->data,s,len);
424 if (n->iskey) raxSetData(n,data);
425 raxNode **childfield = raxNodeLastChildPtr(n);
426 memcpy(childfield,child,sizeof(*child));
427 return n;
428}
429
430/* Low level function that walks the tree looking for the string
431 * 's' of 'len' bytes. The function returns the number of characters
432 * of the key that was possible to process: if the returned integer
433 * is the same as 'len', then it means that the node corresponding to the
434 * string was found (however it may not be a key in case the node->iskey is
435 * zero or if simply we stopped in the middle of a compressed node, so that
436 * 'splitpos' is non zero).
437 *
438 * Otherwise if the returned integer is not the same as 'len', there was an
439 * early stop during the tree walk because of a character mismatch.
440 *
441 * The node where the search ended (because the full string was processed
442 * or because there was an early stop) is returned by reference as
443 * '*stopnode' if the passed pointer is not NULL. This node link in the
444 * parent's node is returned as '*plink' if not NULL. Finally, if the
445 * search stopped in a compressed node, '*splitpos' returns the index
446 * inside the compressed node where the search ended. This is useful to
447 * know where to split the node for insertion.
448 *
449 * Note that when we stop in the middle of a compressed node with
450 * a perfect match, this function will return a length equal to the
451 * 'len' argument (all the key matched), and will return a *splitpos which is
452 * always positive (that will represent the index of the character immediately
453 * *after* the last match in the current compressed node).
454 *
455 * When instead we stop at a compressed node and *splitpos is zero, it
456 * means that the current node represents the key (that is, none of the
457 * compressed node characters are needed to represent the key, just all
458 * its parents nodes). */
459static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
460 raxNode *h = rax->head;
461 raxNode **parentlink = &rax->head;
462
463 size_t i = 0; /* Position in the string. */
464 size_t j = 0; /* Position in the node children (or bytes if compressed).*/
465 while(h->size && i < len) {
466 debugnode("Lookup current node",h);
467 unsigned char *v = h->data;
468
469 if (h->iscompr) {
470 for (j = 0; j < h->size && i < len; j++, i++) {
471 if (v[j] != s[i]) break;
472 }
473 if (j != h->size) break;
474 } else {
475 /* Even when h->size is large, linear scan provides good
476 * performances compared to other approaches that are in theory
477 * more sounding, like performing a binary search. */
478 for (j = 0; j < h->size; j++) {
479 if (v[j] == s[i]) break;
480 }
481 if (j == h->size) break;
482 i++;
483 }
484
485 if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
486 raxNode **children = raxNodeFirstChildPtr(h);
487 if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
488 memcpy(&h,children+j,sizeof(h));
489 parentlink = children+j;
490 j = 0; /* If the new node is non compressed and we do not
491 iterate again (since i == len) set the split
492 position to 0 to signal this node represents
493 the searched key. */
494 }
495 debugnode("Lookup stop node is",h);
496 if (stopnode) *stopnode = h;
497 if (plink) *plink = parentlink;
498 if (splitpos && h->iscompr) *splitpos = j;
499 return i;
500}
501
502/* Insert the element 's' of size 'len', setting as auxiliary data
503 * the pointer 'data'. If the element is already present, the associated
504 * data is updated (only if 'overwrite' is set to 1), and 0 is returned,
505 * otherwise the element is inserted and 1 is returned. On out of memory the
506 * function returns 0 as well but sets errno to ENOMEM, otherwise errno will
507 * be set to 0.
508 */
509int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
510 size_t i;
511 int j = 0; /* Split position. If raxLowWalk() stops in a compressed
512 node, the index 'j' represents the char we stopped within the
513 compressed node, that is, the position where to split the
514 node for insertion. */
515 raxNode *h, **parentlink;
516
517 debugf("### Insert %.*s with value %p\n", (int)len, s, data);
518 i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
519
520 /* If i == len we walked following the whole string. If we are not
521 * in the middle of a compressed node, the string is either already
522 * inserted or this middle node is currently not a key, but can represent
523 * our key. We have just to reallocate the node and make space for the
524 * data pointer. */
525 if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
526 debugf("### Insert: node representing key exists\n");
527 /* Make space for the value pointer if needed. */
528 if (!h->iskey || (h->isnull && overwrite)) {
529 h = raxReallocForData(h,data);
530 if (h) memcpy(parentlink,&h,sizeof(h));
531 }
532 if (h == NULL) {
533 errno = ENOMEM;
534 return 0;
535 }
536
537 /* Update the existing key if there is already one. */
538 if (h->iskey) {
539 if (old) *old = raxGetData(h);
540 if (overwrite) raxSetData(h,data);
541 errno = 0;
542 return 0; /* Element already exists. */
543 }
544
545 /* Otherwise set the node as a key. Note that raxSetData()
546 * will set h->iskey. */
547 raxSetData(h,data);
548 rax->numele++;
549 return 1; /* Element inserted. */
550 }
551
552 /* If the node we stopped at is a compressed node, we need to
553 * split it before to continue.
554 *
555 * Splitting a compressed node have a few possible cases.
556 * Imagine that the node 'h' we are currently at is a compressed
557 * node containing the string "ANNIBALE" (it means that it represents
558 * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
559 * pointer of this node pointing at the 'E' node, because remember that
560 * we have characters at the edges of the graph, not inside the nodes
561 * themselves.
562 *
563 * In order to show a real case imagine our node to also point to
564 * another compressed node, that finally points at the node without
565 * children, representing 'O':
566 *
567 * "ANNIBALE" -> "SCO" -> []
568 *
569 * When inserting we may face the following cases. Note that all the cases
570 * require the insertion of a non compressed node with exactly two
571 * children, except for the last case which just requires splitting a
572 * compressed node.
573 *
574 * 1) Inserting "ANNIENTARE"
575 *
576 * |B| -> "ALE" -> "SCO" -> []
577 * "ANNI" -> |-|
578 * |E| -> (... continue algo ...) "NTARE" -> []
579 *
580 * 2) Inserting "ANNIBALI"
581 *
582 * |E| -> "SCO" -> []
583 * "ANNIBAL" -> |-|
584 * |I| -> (... continue algo ...) []
585 *
586 * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
587 *
588 * |N| -> "NIBALE" -> "SCO" -> []
589 * |A| -> |-|
590 * |G| -> (... continue algo ...) |O| -> []
591 *
592 * 4) Inserting "CIAO"
593 *
594 * |A| -> "NNIBALE" -> "SCO" -> []
595 * |-|
596 * |C| -> (... continue algo ...) "IAO" -> []
597 *
598 * 5) Inserting "ANNI"
599 *
600 * "ANNI" -> "BALE" -> "SCO" -> []
601 *
602 * The final algorithm for insertion covering all the above cases is as
603 * follows.
604 *
605 * ============================= ALGO 1 =============================
606 *
607 * For the above cases 1 to 4, that is, all cases where we stopped in
608 * the middle of a compressed node for a character mismatch, do:
609 *
610 * Let $SPLITPOS be the zero-based index at which, in the
611 * compressed node array of characters, we found the mismatching
612 * character. For example if the node contains "ANNIBALE" and we add
613 * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
614 * mismatching character is found.
615 *
616 * 1. Save the current compressed node $NEXT pointer (the pointer to the
617 * child element, that is always present in compressed nodes).
618 *
619 * 2. Create "split node" having as child the non common letter
620 * at the compressed node. The other non common letter (at the key)
621 * will be added later as we continue the normal insertion algorithm
622 * at step "6".
623 *
624 * 3a. IF $SPLITPOS == 0:
625 * Replace the old node with the split node, by copying the auxiliary
626 * data if any. Fix parent's reference. Free old node eventually
627 * (we still need its data for the next steps of the algorithm).
628 *
629 * 3b. IF $SPLITPOS != 0:
630 * Trim the compressed node (reallocating it as well) in order to
631 * contain $splitpos characters. Change child pointer in order to link
632 * to the split node. If new compressed node len is just 1, set
633 * iscompr to 0 (layout is the same). Fix parent's reference.
634 *
635 * 4a. IF the postfix len (the length of the remaining string of the
636 * original compressed node after the split character) is non zero,
637 * create a "postfix node". If the postfix node has just one character
638 * set iscompr to 0, otherwise iscompr to 1. Set the postfix node
639 * child pointer to $NEXT.
640 *
641 * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
642 *
643 * 5. Set child[0] of split node to postfix node.
644 *
645 * 6. Set the split node as the current node, set current index at child[1]
646 * and continue insertion algorithm as usually.
647 *
648 * ============================= ALGO 2 =============================
649 *
650 * For case 5, that is, if we stopped in the middle of a compressed
651 * node but no mismatch was found, do:
652 *
653 * Let $SPLITPOS be the zero-based index at which, in the
654 * compressed node array of characters, we stopped iterating because
655 * there were no more keys character to match. So in the example of
656 * the node "ANNIBALE", adding the string "ANNI", the $SPLITPOS is 4.
657 *
658 * 1. Save the current compressed node $NEXT pointer (the pointer to the
659 * child element, that is always present in compressed nodes).
660 *
661 * 2. Create a "postfix node" containing all the characters from $SPLITPOS
662 * to the end. Use $NEXT as the postfix node child pointer.
663 * If the postfix node length is 1, set iscompr to 0.
664 * Set the node as a key with the associated value of the new
665 * inserted key.
666 *
667 * 3. Trim the current node to contain the first $SPLITPOS characters.
668 * As usually if the new node length is just 1, set iscompr to 0.
669 * Take the iskey / associated value as it was in the original node.
670 * Fix the parent's reference.
671 *
672 * 4. Set the postfix node as the only child pointer of the trimmed
673 * node created at step 1.
674 */
675
676 /* ------------------------- ALGORITHM 1 --------------------------- */
677 if (h->iscompr && i != len) {
678 debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
679 h->size, h->data, (void*)h);
680 debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
681 debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
682 debugf("Other (key) letter is '%c'\n", s[i]);
683
684 /* 1: Save next pointer. */
685 raxNode **childfield = raxNodeLastChildPtr(h);
686 raxNode *next;
687 memcpy(&next,childfield,sizeof(next));
688 debugf("Next is %p\n", (void*)next);
689 debugf("iskey %d\n", h->iskey);
690 if (h->iskey) {
691 debugf("key value is %p\n", raxGetData(h));
692 }
693
694 /* Set the length of the additional nodes we will need. */
695 size_t trimmedlen = j;
696 size_t postfixlen = h->size - j - 1;
697 int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
698 size_t nodesize;
699
700 /* 2: Create the split node. Also allocate the other nodes we'll need
701 * ASAP, so that it will be simpler to handle OOM. */
702 raxNode *splitnode = raxNewNode(1, split_node_is_key);
703 raxNode *trimmed = NULL;
704 raxNode *postfix = NULL;
705
706 if (trimmedlen) {
707 nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
708 sizeof(raxNode*);
709 if (h->iskey && !h->isnull) nodesize += sizeof(void*);
710 trimmed = rax_malloc(nodesize);
711 }
712
713 if (postfixlen) {
714 nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
715 sizeof(raxNode*);
716 postfix = rax_malloc(nodesize);
717 }
718
719 /* OOM? Abort now that the tree is untouched. */
720 if (splitnode == NULL ||
721 (trimmedlen && trimmed == NULL) ||
722 (postfixlen && postfix == NULL))
723 {
724 rax_free(splitnode);
725 rax_free(trimmed);
726 rax_free(postfix);
727 errno = ENOMEM;
728 return 0;
729 }
730 splitnode->data[0] = h->data[j];
731
732 if (j == 0) {
733 /* 3a: Replace the old node with the split node. */
734 if (h->iskey) {
735 void *ndata = raxGetData(h);
736 raxSetData(splitnode,ndata);
737 }
738 memcpy(parentlink,&splitnode,sizeof(splitnode));
739 } else {
740 /* 3b: Trim the compressed node. */
741 trimmed->size = j;
742 memcpy(trimmed->data,h->data,j);
743 trimmed->iscompr = j > 1 ? 1 : 0;
744 trimmed->iskey = h->iskey;
745 trimmed->isnull = h->isnull;
746 if (h->iskey && !h->isnull) {
747 void *ndata = raxGetData(h);
748 raxSetData(trimmed,ndata);
749 }
750 raxNode **cp = raxNodeLastChildPtr(trimmed);
751 memcpy(cp,&splitnode,sizeof(splitnode));
752 memcpy(parentlink,&trimmed,sizeof(trimmed));
753 parentlink = cp; /* Set parentlink to splitnode parent. */
754 rax->numnodes++;
755 }
756
757 /* 4: Create the postfix node: what remains of the original
758 * compressed node after the split. */
759 if (postfixlen) {
760 /* 4a: create a postfix node. */
761 postfix->iskey = 0;
762 postfix->isnull = 0;
763 postfix->size = postfixlen;
764 postfix->iscompr = postfixlen > 1;
765 memcpy(postfix->data,h->data+j+1,postfixlen);
766 raxNode **cp = raxNodeLastChildPtr(postfix);
767 memcpy(cp,&next,sizeof(next));
768 rax->numnodes++;
769 } else {
770 /* 4b: just use next as postfix node. */
771 postfix = next;
772 }
773
774 /* 5: Set splitnode first child as the postfix node. */
775 raxNode **splitchild = raxNodeLastChildPtr(splitnode);
776 memcpy(splitchild,&postfix,sizeof(postfix));
777
778 /* 6. Continue insertion: this will cause the splitnode to
779 * get a new child (the non common character at the currently
780 * inserted key). */
781 rax_free(h);
782 h = splitnode;
783 } else if (h->iscompr && i == len) {
784 /* ------------------------- ALGORITHM 2 --------------------------- */
785 debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
786 h->size, h->data, (void*)h, j);
787
788 /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
789 size_t postfixlen = h->size - j;
790 size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
791 sizeof(raxNode*);
792 if (data != NULL) nodesize += sizeof(void*);
793 raxNode *postfix = rax_malloc(nodesize);
794
795 nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*);
796 if (h->iskey && !h->isnull) nodesize += sizeof(void*);
797 raxNode *trimmed = rax_malloc(nodesize);
798
799 if (postfix == NULL || trimmed == NULL) {
800 rax_free(postfix);
801 rax_free(trimmed);
802 errno = ENOMEM;
803 return 0;
804 }
805
806 /* 1: Save next pointer. */
807 raxNode **childfield = raxNodeLastChildPtr(h);
808 raxNode *next;
809 memcpy(&next,childfield,sizeof(next));
810
811 /* 2: Create the postfix node. */
812 postfix->size = postfixlen;
813 postfix->iscompr = postfixlen > 1;
814 postfix->iskey = 1;
815 postfix->isnull = 0;
816 memcpy(postfix->data,h->data+j,postfixlen);
817 raxSetData(postfix,data);
818 raxNode **cp = raxNodeLastChildPtr(postfix);
819 memcpy(cp,&next,sizeof(next));
820 rax->numnodes++;
821
822 /* 3: Trim the compressed node. */
823 trimmed->size = j;
824 trimmed->iscompr = j > 1;
825 trimmed->iskey = 0;
826 trimmed->isnull = 0;
827 memcpy(trimmed->data,h->data,j);
828 memcpy(parentlink,&trimmed,sizeof(trimmed));
829 if (h->iskey) {
830 void *aux = raxGetData(h);
831 raxSetData(trimmed,aux);
832 }
833
834 /* Fix the trimmed node child pointer to point to
835 * the postfix node. */
836 cp = raxNodeLastChildPtr(trimmed);
837 memcpy(cp,&postfix,sizeof(postfix));
838
839 /* Finish! We don't need to continue with the insertion
840 * algorithm for ALGO 2. The key is already inserted. */
841 rax->numele++;
842 rax_free(h);
843 return 1; /* Key inserted. */
844 }
845
846 /* We walked the radix tree as far as we could, but still there are left
847 * chars in our string. We need to insert the missing nodes. */
848 while(i < len) {
849 raxNode *child;
850
851 /* If this node is going to have a single child, and there
852 * are other characters, so that that would result in a chain
853 * of single-childed nodes, turn it into a compressed node. */
854 if (h->size == 0 && len-i > 1) {
855 debugf("Inserting compressed node\n");
856 size_t comprsize = len-i;
857 if (comprsize > RAX_NODE_MAX_SIZE)
858 comprsize = RAX_NODE_MAX_SIZE;
859 raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
860 if (newh == NULL) goto oom;
861 h = newh;
862 memcpy(parentlink,&h,sizeof(h));
863 parentlink = raxNodeLastChildPtr(h);
864 i += comprsize;
865 } else {
866 debugf("Inserting normal node\n");
867 raxNode **new_parentlink;
868 raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
869 if (newh == NULL) goto oom;
870 h = newh;
871 memcpy(parentlink,&h,sizeof(h));
872 parentlink = new_parentlink;
873 i++;
874 }
875 rax->numnodes++;
876 h = child;
877 }
878 raxNode *newh = raxReallocForData(h,data);
879 if (newh == NULL) goto oom;
880 h = newh;
881 if (!h->iskey) rax->numele++;
882 raxSetData(h,data);
883 memcpy(parentlink,&h,sizeof(h));
884 return 1; /* Element inserted. */
885
886oom:
887 /* This code path handles out of memory after part of the sub-tree was
888 * already modified. Set the node as a key, and then remove it. However we
889 * do that only if the node is a terminal node, otherwise if the OOM
890 * happened reallocating a node in the middle, we don't need to free
891 * anything. */
892 if (h->size == 0) {
893 h->isnull = 1;
894 h->iskey = 1;
895 rax->numele++; /* Compensate the next remove. */
896 assert(raxRemove(rax,s,i,NULL) != 0);
897 }
898 errno = ENOMEM;
899 return 0;
900}
901
902/* Overwriting insert. Just a wrapper for raxGenericInsert() that will
903 * update the element if there is already one for the same key. */
904int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
905 return raxGenericInsert(rax,s,len,data,old,1);
906}
907
908/* Non overwriting insert function: if an element with the same key
909 * exists, the value is not updated and the function returns 0.
910 * This is just a wrapper for raxGenericInsert(). */
911int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
912 return raxGenericInsert(rax,s,len,data,old,0);
913}
914
915/* Find a key in the rax, returns raxNotFound special void pointer value
916 * if the item was not found, otherwise the value associated with the
917 * item is returned. */
918void *raxFind(rax *rax, unsigned char *s, size_t len) {
919 raxNode *h;
920
921 debugf("### Lookup: %.*s\n", (int)len, s);
922 int splitpos = 0;
923 size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
924 if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
925 return raxNotFound;
926 return raxGetData(h);
927}
928
929/* Return the memory address where the 'parent' node stores the specified
930 * 'child' pointer, so that the caller can update the pointer with another
931 * one if needed. The function assumes it will find a match, otherwise the
932 * operation is an undefined behavior (it will continue scanning the
933 * memory without any bound checking). */
934raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
935 raxNode **cp = raxNodeFirstChildPtr(parent);
936 raxNode *c;
937 while(1) {
938 memcpy(&c,cp,sizeof(c));
939 if (c == child) break;
940 cp++;
941 }
942 return cp;
943}
944
945/* Low level child removal from node. The new node pointer (after the child
946 * removal) is returned. Note that this function does not fix the pointer
947 * of the parent node in its parent, so this task is up to the caller.
948 * The function never fails for out of memory. */
949raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
950 debugnode("raxRemoveChild before", parent);
951 /* If parent is a compressed node (having a single child, as for definition
952 * of the data structure), the removal of the child consists into turning
953 * it into a normal node without children. */
954 if (parent->iscompr) {
955 void *data = NULL;
956 if (parent->iskey) data = raxGetData(parent);
957 parent->isnull = 0;
958 parent->iscompr = 0;
959 parent->size = 0;
960 if (parent->iskey) raxSetData(parent,data);
961 debugnode("raxRemoveChild after", parent);
962 return parent;
963 }
964
965 /* Otherwise we need to scan for the child pointer and memmove()
966 * accordingly.
967 *
968 * 1. To start we seek the first element in both the children
969 * pointers and edge bytes in the node. */
970 raxNode **cp = raxNodeFirstChildPtr(parent);
971 raxNode **c = cp;
972 unsigned char *e = parent->data;
973
974 /* 2. Search the child pointer to remove inside the array of children
975 * pointers. */
976 while(1) {
977 raxNode *aux;
978 memcpy(&aux,c,sizeof(aux));
979 if (aux == child) break;
980 c++;
981 e++;
982 }
983
984 /* 3. Remove the edge and the pointer by memmoving the remaining children
985 * pointer and edge bytes one position before. */
986 int taillen = parent->size - (e - parent->data) - 1;
987 debugf("raxRemoveChild tail len: %d\n", taillen);
988 memmove(e,e+1,taillen);
989
990 /* Compute the shift, that is the amount of bytes we should move our
991 * child pointers to the left, since the removal of one edge character
992 * and the corresponding padding change, may change the layout.
993 * We just check if in the old version of the node there was at the
994 * end just a single byte and all padding: in that case removing one char
995 * will remove a whole sizeof(void*) word. */
996 size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0;
997
998 /* Move the children pointers before the deletion point. */
999 if (shift)
1000 memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**));
1001
1002 /* Move the remaining "tail" pointers at the right position as well. */
1003 size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0;
1004 memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen);
1005
1006 /* 4. Update size. */
1007 parent->size--;
1008
1009 /* realloc the node according to the theoretical memory usage, to free
1010 * data if we are over-allocating right now. */
1011 raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
1012 if (newnode) {
1013 debugnode("raxRemoveChild after", newnode);
1014 }
1015 /* Note: if rax_realloc() fails we just return the old address, which
1016 * is valid. */
1017 return newnode ? newnode : parent;
1018}
1019
1020/* Remove the specified item. Returns 1 if the item was found and
1021 * deleted, 0 otherwise. */
1022int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
1023 raxNode *h;
1024 raxStack ts;
1025
1026 debugf("### Delete: %.*s\n", (int)len, s);
1027 raxStackInit(&ts);
1028 int splitpos = 0;
1029 size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
1030 if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
1031 raxStackFree(&ts);
1032 return 0;
1033 }
1034 if (old) *old = raxGetData(h);
1035 h->iskey = 0;
1036 rax->numele--;
1037
1038 /* If this node has no children, the deletion needs to reclaim the
1039 * no longer used nodes. This is an iterative process that needs to
1040 * walk the three upward, deleting all the nodes with just one child
1041 * that are not keys, until the head of the rax is reached or the first
1042 * node with more than one child is found. */
1043
1044 int trycompress = 0; /* Will be set to 1 if we should try to optimize the
1045 tree resulting from the deletion. */
1046
1047 if (h->size == 0) {
1048 debugf("Key deleted in node without children. Cleanup needed.\n");
1049 raxNode *child = NULL;
1050 while(h != rax->head) {
1051 child = h;
1052 debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
1053 (int)child->size, (char*)child->data, child->iskey);
1054 rax_free(child);
1055 rax->numnodes--;
1056 h = raxStackPop(&ts);
1057 /* If this node has more then one child, or actually holds
1058 * a key, stop here. */
1059 if (h->iskey || (!h->iscompr && h->size != 1)) break;
1060 }
1061 if (child) {
1062 debugf("Unlinking child %p from parent %p\n",
1063 (void*)child, (void*)h);
1064 raxNode *new = raxRemoveChild(h,child);
1065 if (new != h) {
1066 raxNode *parent = raxStackPeek(&ts);
1067 raxNode **parentlink;
1068 if (parent == NULL) {
1069 parentlink = &rax->head;
1070 } else {
1071 parentlink = raxFindParentLink(parent,h);
1072 }
1073 memcpy(parentlink,&new,sizeof(new));
1074 }
1075
1076 /* If after the removal the node has just a single child
1077 * and is not a key, we need to try to compress it. */
1078 if (new->size == 1 && new->iskey == 0) {
1079 trycompress = 1;
1080 h = new;
1081 }
1082 }
1083 } else if (h->size == 1) {
1084 /* If the node had just one child, after the removal of the key
1085 * further compression with adjacent nodes is potentially possible. */
1086 trycompress = 1;
1087 }
1088
1089 /* Don't try node compression if our nodes pointers stack is not
1090 * complete because of OOM while executing raxLowWalk() */
1091 if (trycompress && ts.oom) trycompress = 0;
1092
1093 /* Recompression: if trycompress is true, 'h' points to a radix tree node
1094 * that changed in a way that could allow to compress nodes in this
1095 * sub-branch. Compressed nodes represent chains of nodes that are not
1096 * keys and have a single child, so there are two deletion events that
1097 * may alter the tree so that further compression is needed:
1098 *
1099 * 1) A node with a single child was a key and now no longer is a key.
1100 * 2) A node with two children now has just one child.
1101 *
1102 * We try to navigate upward till there are other nodes that can be
1103 * compressed, when we reach the upper node which is not a key and has
1104 * a single child, we scan the chain of children to collect the
1105 * compressible part of the tree, and replace the current node with the
1106 * new one, fixing the child pointer to reference the first non
1107 * compressible node.
1108 *
1109 * Example of case "1". A tree stores the keys "FOO" = 1 and
1110 * "FOOBAR" = 2:
1111 *
1112 *
1113 * "FOO" -> "BAR" -> [] (2)
1114 * (1)
1115 *
1116 * After the removal of "FOO" the tree can be compressed as:
1117 *
1118 * "FOOBAR" -> [] (2)
1119 *
1120 *
1121 * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
1122 * "FOOTER" = 2:
1123 *
1124 * |B| -> "AR" -> [] (1)
1125 * "FOO" -> |-|
1126 * |T| -> "ER" -> [] (2)
1127 *
1128 * After the removal of "FOOTER" the resulting tree is:
1129 *
1130 * "FOO" -> |B| -> "AR" -> [] (1)
1131 *
1132 * That can be compressed into:
1133 *
1134 * "FOOBAR" -> [] (1)
1135 */
1136 if (trycompress) {
1137 debugf("After removing %.*s:\n", (int)len, s);
1138 debugnode("Compression may be needed",h);
1139 debugf("Seek start node\n");
1140
1141 /* Try to reach the upper node that is compressible.
1142 * At the end of the loop 'h' will point to the first node we
1143 * can try to compress and 'parent' to its parent. */
1144 raxNode *parent;
1145 while(1) {
1146 parent = raxStackPop(&ts);
1147 if (!parent || parent->iskey ||
1148 (!parent->iscompr && parent->size != 1)) break;
1149 h = parent;
1150 debugnode("Going up to",h);
1151 }
1152 raxNode *start = h; /* Compression starting node. */
1153
1154 /* Scan chain of nodes we can compress. */
1155 size_t comprsize = h->size;
1156 int nodes = 1;
1157 while(h->size != 0) {
1158 raxNode **cp = raxNodeLastChildPtr(h);
1159 memcpy(&h,cp,sizeof(h));
1160 if (h->iskey || (!h->iscompr && h->size != 1)) break;
1161 /* Stop here if going to the next node would result into
1162 * a compressed node larger than h->size can hold. */
1163 if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
1164 nodes++;
1165 comprsize += h->size;
1166 }
1167 if (nodes > 1) {
1168 /* If we can compress, create the new node and populate it. */
1169 size_t nodesize =
1170 sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*);
1171 raxNode *new = rax_malloc(nodesize);
1172 /* An out of memory here just means we cannot optimize this
1173 * node, but the tree is left in a consistent state. */
1174 if (new == NULL) {
1175 raxStackFree(&ts);
1176 return 1;
1177 }
1178 new->iskey = 0;
1179 new->isnull = 0;
1180 new->iscompr = 1;
1181 new->size = comprsize;
1182 rax->numnodes++;
1183
1184 /* Scan again, this time to populate the new node content and
1185 * to fix the new node child pointer. At the same time we free
1186 * all the nodes that we'll no longer use. */
1187 comprsize = 0;
1188 h = start;
1189 while(h->size != 0) {
1190 memcpy(new->data+comprsize,h->data,h->size);
1191 comprsize += h->size;
1192 raxNode **cp = raxNodeLastChildPtr(h);
1193 raxNode *tofree = h;
1194 memcpy(&h,cp,sizeof(h));
1195 rax_free(tofree); rax->numnodes--;
1196 if (h->iskey || (!h->iscompr && h->size != 1)) break;
1197 }
1198 debugnode("New node",new);
1199
1200 /* Now 'h' points to the first node that we still need to use,
1201 * so our new node child pointer will point to it. */
1202 raxNode **cp = raxNodeLastChildPtr(new);
1203 memcpy(cp,&h,sizeof(h));
1204
1205 /* Fix parent link. */
1206 if (parent) {
1207 raxNode **parentlink = raxFindParentLink(parent,start);
1208 memcpy(parentlink,&new,sizeof(new));
1209 } else {
1210 rax->head = new;
1211 }
1212
1213 debugf("Compressed %d nodes, %d total bytes\n",
1214 nodes, (int)comprsize);
1215 }
1216 }
1217 raxStackFree(&ts);
1218 return 1;
1219}
1220
1221/* This is the core of raxFree(): performs a depth-first scan of the
1222 * tree and releases all the nodes found. */
1223void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
1224 debugnode("free traversing",n);
1225 int numchildren = n->iscompr ? 1 : n->size;
1226 raxNode **cp = raxNodeLastChildPtr(n);
1227 while(numchildren--) {
1228 raxNode *child;
1229 memcpy(&child,cp,sizeof(child));
1230 raxRecursiveFree(rax,child,free_callback);
1231 cp--;
1232 }
1233 debugnode("free depth-first",n);
1234 if (free_callback && n->iskey && !n->isnull)
1235 free_callback(raxGetData(n));
1236 rax_free(n);
1237 rax->numnodes--;
1238}
1239
1240/* Free a whole radix tree, calling the specified callback in order to
1241 * free the auxiliary data. */
1242void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
1243 raxRecursiveFree(rax,rax->head,free_callback);
1244 assert(rax->numnodes == 0);
1245 rax_free(rax);
1246}
1247
1248/* Free a whole radix tree. */
1249void raxFree(rax *rax) {
1250 raxFreeWithCallback(rax,NULL);
1251}
1252
1253/* ------------------------------- Iterator --------------------------------- */
1254
1255/* Initialize a Rax iterator. This call should be performed a single time
1256 * to initialize the iterator, and must be followed by a raxSeek() call,
1257 * otherwise the raxPrev()/raxNext() functions will just return EOF. */
1258void raxStart(raxIterator *it, rax *rt) {
1259 it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
1260 it->rt = rt;
1261 it->key_len = 0;
1262 it->key = it->key_static_string;
1263 it->key_max = RAX_ITER_STATIC_LEN;
1264 it->data = NULL;
1265 it->node_cb = NULL;
1266 raxStackInit(&it->stack);
1267}
1268
1269/* Append characters at the current key string of the iterator 'it'. This
1270 * is a low level function used to implement the iterator, not callable by
1271 * the user. Returns 0 on out of memory, otherwise 1 is returned. */
1272int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
1273 if (len == 0) return 1;
1274 if (it->key_max < it->key_len+len) {
1275 unsigned char *old = (it->key == it->key_static_string) ? NULL :
1276 it->key;
1277 size_t new_max = (it->key_len+len)*2;
1278 it->key = rax_realloc(old,new_max);
1279 if (it->key == NULL) {
1280 it->key = (!old) ? it->key_static_string : old;
1281 errno = ENOMEM;
1282 return 0;
1283 }
1284 if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
1285 it->key_max = new_max;
1286 }
1287 /* Use memmove since there could be an overlap between 's' and
1288 * it->key when we use the current key in order to re-seek. */
1289 memmove(it->key+it->key_len,s,len);
1290 it->key_len += len;
1291 return 1;
1292}
1293
1294/* Remove the specified number of chars from the right of the current
1295 * iterator key. */
1296void raxIteratorDelChars(raxIterator *it, size_t count) {
1297 it->key_len -= count;
1298}
1299
1300/* Do an iteration step towards the next element. At the end of the step the
1301 * iterator key will represent the (new) current key. If it is not possible
1302 * to step in the specified direction since there are no longer elements, the
1303 * iterator is flagged with RAX_ITER_EOF.
1304 *
1305 * If 'noup' is true the function starts directly scanning for the next
1306 * lexicographically smaller children, and the current node is already assumed
1307 * to be the parent of the last key node, so the first operation to go back to
1308 * the parent will be skipped. This option is used by raxSeek() when
1309 * implementing seeking a non existing element with the ">" or "<" options:
1310 * the starting node is not a key in that particular case, so we start the scan
1311 * from a node that does not represent the key set.
1312 *
1313 * The function returns 1 on success or 0 on out of memory. */
1314int raxIteratorNextStep(raxIterator *it, int noup) {
1315 if (it->flags & RAX_ITER_EOF) {
1316 return 1;
1317 } else if (it->flags & RAX_ITER_JUST_SEEKED) {
1318 it->flags &= ~RAX_ITER_JUST_SEEKED;
1319 return 1;
1320 }
1321
1322 /* Save key len, stack items and the node where we are currently
1323 * so that on iterator EOF we can restore the current key and state. */
1324 size_t orig_key_len = it->key_len;
1325 size_t orig_stack_items = it->stack.items;
1326 raxNode *orig_node = it->node;
1327
1328 while(1) {
1329 int children = it->node->iscompr ? 1 : it->node->size;
1330 if (!noup && children) {
1331 debugf("GO DEEPER\n");
1332 /* Seek the lexicographically smaller key in this subtree, which
1333 * is the first one found always going towards the first child
1334 * of every successive node. */
1335 if (!raxStackPush(&it->stack,it->node)) return 0;
1336 raxNode **cp = raxNodeFirstChildPtr(it->node);
1337 if (!raxIteratorAddChars(it,it->node->data,
1338 it->node->iscompr ? it->node->size : 1)) return 0;
1339 memcpy(&it->node,cp,sizeof(it->node));
1340 /* Call the node callback if any, and replace the node pointer
1341 * if the callback returns true. */
1342 if (it->node_cb && it->node_cb(&it->node))
1343 memcpy(cp,&it->node,sizeof(it->node));
1344 /* For "next" step, stop every time we find a key along the
1345 * way, since the key is lexicographically smaller compared to
1346 * what follows in the sub-children. */
1347 if (it->node->iskey) {
1348 it->data = raxGetData(it->node);
1349 return 1;
1350 }
1351 } else {
1352 /* If we finished exploring the previous sub-tree, switch to the
1353 * new one: go upper until a node is found where there are
1354 * children representing keys lexicographically greater than the
1355 * current key. */
1356 while(1) {
1357 int old_noup = noup;
1358
1359 /* Already on head? Can't go up, iteration finished. */
1360 if (!noup && it->node == it->rt->head) {
1361 it->flags |= RAX_ITER_EOF;
1362 it->stack.items = orig_stack_items;
1363 it->key_len = orig_key_len;
1364 it->node = orig_node;
1365 return 1;
1366 }
1367 /* If there are no children at the current node, try parent's
1368 * next child. */
1369 unsigned char prevchild = it->key[it->key_len-1];
1370 if (!noup) {
1371 it->node = raxStackPop(&it->stack);
1372 } else {
1373 noup = 0;
1374 }
1375 /* Adjust the current key to represent the node we are
1376 * at. */
1377 int todel = it->node->iscompr ? it->node->size : 1;
1378 raxIteratorDelChars(it,todel);
1379
1380 /* Try visiting the next child if there was at least one
1381 * additional child. */
1382 if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
1383 raxNode **cp = raxNodeFirstChildPtr(it->node);
1384 int i = 0;
1385 while (i < it->node->size) {
1386 debugf("SCAN NEXT %c\n", it->node->data[i]);
1387 if (it->node->data[i] > prevchild) break;
1388 i++;
1389 cp++;
1390 }
1391 if (i != it->node->size) {
1392 debugf("SCAN found a new node\n");
1393 raxIteratorAddChars(it,it->node->data+i,1);
1394 if (!raxStackPush(&it->stack,it->node)) return 0;
1395 memcpy(&it->node,cp,sizeof(it->node));
1396 /* Call the node callback if any, and replace the node
1397 * pointer if the callback returns true. */
1398 if (it->node_cb && it->node_cb(&it->node))
1399 memcpy(cp,&it->node,sizeof(it->node));
1400 if (it->node->iskey) {
1401 it->data = raxGetData(it->node);
1402 return 1;
1403 }
1404 break;
1405 }
1406 }
1407 }
1408 }
1409 }
1410}
1411
1412/* Seek the greatest key in the subtree at the current node. Return 0 on
1413 * out of memory, otherwise 1. This is a helper function for different
1414 * iteration functions below. */
1415int raxSeekGreatest(raxIterator *it) {
1416 while(it->node->size) {
1417 if (it->node->iscompr) {
1418 if (!raxIteratorAddChars(it,it->node->data,
1419 it->node->size)) return 0;
1420 } else {
1421 if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
1422 return 0;
1423 }
1424 raxNode **cp = raxNodeLastChildPtr(it->node);
1425 if (!raxStackPush(&it->stack,it->node)) return 0;
1426 memcpy(&it->node,cp,sizeof(it->node));
1427 }
1428 return 1;
1429}
1430
1431/* Like raxIteratorNextStep() but implements an iteration step moving
1432 * to the lexicographically previous element. The 'noup' option has a similar
1433 * effect to the one of raxIteratorNextStep(). */
1434int raxIteratorPrevStep(raxIterator *it, int noup) {
1435 if (it->flags & RAX_ITER_EOF) {
1436 return 1;
1437 } else if (it->flags & RAX_ITER_JUST_SEEKED) {
1438 it->flags &= ~RAX_ITER_JUST_SEEKED;
1439 return 1;
1440 }
1441
1442 /* Save key len, stack items and the node where we are currently
1443 * so that on iterator EOF we can restore the current key and state. */
1444 size_t orig_key_len = it->key_len;
1445 size_t orig_stack_items = it->stack.items;
1446 raxNode *orig_node = it->node;
1447
1448 while(1) {
1449 int old_noup = noup;
1450
1451 /* Already on head? Can't go up, iteration finished. */
1452 if (!noup && it->node == it->rt->head) {
1453 it->flags |= RAX_ITER_EOF;
1454 it->stack.items = orig_stack_items;
1455 it->key_len = orig_key_len;
1456 it->node = orig_node;
1457 return 1;
1458 }
1459
1460 unsigned char prevchild = it->key[it->key_len-1];
1461 if (!noup) {
1462 it->node = raxStackPop(&it->stack);
1463 } else {
1464 noup = 0;
1465 }
1466
1467 /* Adjust the current key to represent the node we are
1468 * at. */
1469 int todel = it->node->iscompr ? it->node->size : 1;
1470 raxIteratorDelChars(it,todel);
1471
1472 /* Try visiting the prev child if there is at least one
1473 * child. */
1474 if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
1475 raxNode **cp = raxNodeLastChildPtr(it->node);
1476 int i = it->node->size-1;
1477 while (i >= 0) {
1478 debugf("SCAN PREV %c\n", it->node->data[i]);
1479 if (it->node->data[i] < prevchild) break;
1480 i--;
1481 cp--;
1482 }
1483 /* If we found a new subtree to explore in this node,
1484 * go deeper following all the last children in order to
1485 * find the key lexicographically greater. */
1486 if (i != -1) {
1487 debugf("SCAN found a new node\n");
1488 /* Enter the node we just found. */
1489 if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
1490 if (!raxStackPush(&it->stack,it->node)) return 0;
1491 memcpy(&it->node,cp,sizeof(it->node));
1492 /* Seek sub-tree max. */
1493 if (!raxSeekGreatest(it)) return 0;
1494 }
1495 }
1496
1497 /* Return the key: this could be the key we found scanning a new
1498 * subtree, or if we did not find a new subtree to explore here,
1499 * before giving up with this node, check if it's a key itself. */
1500 if (it->node->iskey) {
1501 it->data = raxGetData(it->node);
1502 return 1;
1503 }
1504 }
1505}
1506
1507/* Seek an iterator at the specified element.
1508 * Return 0 if the seek failed for syntax error or out of memory. Otherwise
1509 * 1 is returned. When 0 is returned for out of memory, errno is set to
1510 * the ENOMEM value. */
1511int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
1512 int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
1513
1514 it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */
1515 it->flags |= RAX_ITER_JUST_SEEKED;
1516 it->flags &= ~RAX_ITER_EOF;
1517 it->key_len = 0;
1518 it->node = NULL;
1519
1520 /* Set flags according to the operator used to perform the seek. */
1521 if (op[0] == '>') {
1522 gt = 1;
1523 if (op[1] == '=') eq = 1;
1524 } else if (op[0] == '<') {
1525 lt = 1;
1526 if (op[1] == '=') eq = 1;
1527 } else if (op[0] == '=') {
1528 eq = 1;
1529 } else if (op[0] == '^') {
1530 first = 1;
1531 } else if (op[0] == '$') {
1532 last = 1;
1533 } else {
1534 errno = 0;
1535 return 0; /* Error. */
1536 }
1537
1538 /* If there are no elements, set the EOF condition immediately and
1539 * return. */
1540 if (it->rt->numele == 0) {
1541 it->flags |= RAX_ITER_EOF;
1542 return 1;
1543 }
1544
1545 if (first) {
1546 /* Seeking the first key greater or equal to the empty string
1547 * is equivalent to seeking the smaller key available. */
1548 return raxSeek(it,">=",NULL,0);
1549 }
1550
1551 if (last) {
1552 /* Find the greatest key taking always the last child till a
1553 * final node is found. */
1554 it->node = it->rt->head;
1555 if (!raxSeekGreatest(it)) return 0;
1556 assert(it->node->iskey);
1557 it->data = raxGetData(it->node);
1558 return 1;
1559 }
1560
1561 /* We need to seek the specified key. What we do here is to actually
1562 * perform a lookup, and later invoke the prev/next key code that
1563 * we already use for iteration. */
1564 int splitpos = 0;
1565 size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
1566
1567 /* Return OOM on incomplete stack info. */
1568 if (it->stack.oom) return 0;
1569
1570 if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
1571 it->node->iskey)
1572 {
1573 /* We found our node, since the key matches and we have an
1574 * "equal" condition. */
1575 if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
1576 it->data = raxGetData(it->node);
1577 } else if (lt || gt) {
1578 /* Exact key not found or eq flag not set. We have to set as current
1579 * key the one represented by the node we stopped at, and perform
1580 * a next/prev operation to seek. */
1581 raxIteratorAddChars(it, ele, i-splitpos);
1582
1583 /* We need to set the iterator in the correct state to call next/prev
1584 * step in order to seek the desired element. */
1585 debugf("After initial seek: i=%d len=%d key=%.*s\n",
1586 (int)i, (int)len, (int)it->key_len, it->key);
1587 if (i != len && !it->node->iscompr) {
1588 /* If we stopped in the middle of a normal node because of a
1589 * mismatch, add the mismatching character to the current key
1590 * and call the iterator with the 'noup' flag so that it will try
1591 * to seek the next/prev child in the current node directly based
1592 * on the mismatching character. */
1593 if (!raxIteratorAddChars(it,ele+i,1)) return 0;
1594 debugf("Seek normal node on mismatch: %.*s\n",
1595 (int)it->key_len, (char*)it->key);
1596
1597 it->flags &= ~RAX_ITER_JUST_SEEKED;
1598 if (lt && !raxIteratorPrevStep(it,1)) return 0;
1599 if (gt && !raxIteratorNextStep(it,1)) return 0;
1600 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
1601 } else if (i != len && it->node->iscompr) {
1602 debugf("Compressed mismatch: %.*s\n",
1603 (int)it->key_len, (char*)it->key);
1604 /* In case of a mismatch within a compressed node. */
1605 int nodechar = it->node->data[splitpos];
1606 int keychar = ele[i];
1607 it->flags &= ~RAX_ITER_JUST_SEEKED;
1608 if (gt) {
1609 /* If the key the compressed node represents is greater
1610 * than our seek element, continue forward, otherwise set the
1611 * state in order to go back to the next sub-tree. */
1612 if (nodechar > keychar) {
1613 if (!raxIteratorNextStep(it,0)) return 0;
1614 } else {
1615 if (!raxIteratorAddChars(it,it->node->data,it->node->size))
1616 return 0;
1617 if (!raxIteratorNextStep(it,1)) return 0;
1618 }
1619 }
1620 if (lt) {
1621 /* If the key the compressed node represents is smaller
1622 * than our seek element, seek the greater key in this
1623 * subtree, otherwise set the state in order to go back to
1624 * the previous sub-tree. */
1625 if (nodechar < keychar) {
1626 if (!raxSeekGreatest(it)) return 0;
1627 it->data = raxGetData(it->node);
1628 } else {
1629 if (!raxIteratorAddChars(it,it->node->data,it->node->size))
1630 return 0;
1631 if (!raxIteratorPrevStep(it,1)) return 0;
1632 }
1633 }
1634 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
1635 } else {
1636 debugf("No mismatch: %.*s\n",
1637 (int)it->key_len, (char*)it->key);
1638 /* If there was no mismatch we are into a node representing the
1639 * key, (but which is not a key or the seek operator does not
1640 * include 'eq'), or we stopped in the middle of a compressed node
1641 * after processing all the key. Continue iterating as this was
1642 * a legitimate key we stopped at. */
1643 it->flags &= ~RAX_ITER_JUST_SEEKED;
1644 if (it->node->iscompr && it->node->iskey && splitpos && lt) {
1645 /* If we stopped in the middle of a compressed node with
1646 * perfect match, and the condition is to seek a key "<" than
1647 * the specified one, then if this node is a key it already
1648 * represents our match. For instance we may have nodes:
1649 *
1650 * "f" -> "oobar" = 1 -> "" = 2
1651 *
1652 * Representing keys "f" = 1, "foobar" = 2. A seek for
1653 * the key < "foo" will stop in the middle of the "oobar"
1654 * node, but will be our match, representing the key "f".
1655 *
1656 * So in that case, we don't seek backward. */
1657 it->data = raxGetData(it->node);
1658 } else {
1659 if (gt && !raxIteratorNextStep(it,0)) return 0;
1660 if (lt && !raxIteratorPrevStep(it,0)) return 0;
1661 }
1662 it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
1663 }
1664 } else {
1665 /* If we are here just eq was set but no match was found. */
1666 it->flags |= RAX_ITER_EOF;
1667 return 1;
1668 }
1669 return 1;
1670}
1671
1672/* Go to the next element in the scope of the iterator 'it'.
1673 * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
1674 * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
1675int raxNext(raxIterator *it) {
1676 if (!raxIteratorNextStep(it,0)) {
1677 errno = ENOMEM;
1678 return 0;
1679 }
1680 if (it->flags & RAX_ITER_EOF) {
1681 errno = 0;
1682 return 0;
1683 }
1684 return 1;
1685}
1686
1687/* Go to the previous element in the scope of the iterator 'it'.
1688 * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
1689 * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
1690int raxPrev(raxIterator *it) {
1691 if (!raxIteratorPrevStep(it,0)) {
1692 errno = ENOMEM;
1693 return 0;
1694 }
1695 if (it->flags & RAX_ITER_EOF) {
1696 errno = 0;
1697 return 0;
1698 }
1699 return 1;
1700}
1701
1702/* Perform a random walk starting in the current position of the iterator.
1703 * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
1704 * and the iterator is set to the node reached after doing a random walk
1705 * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
1706 * using a random number of steps between 1 and two times the logarithm of
1707 * the number of elements.
1708 *
1709 * NOTE: if you use this function to generate random elements from the radix
1710 * tree, expect a disappointing distribution. A random walk produces good
1711 * random elements if the tree is not sparse, however in the case of a radix
1712 * tree certain keys will be reported much more often than others. At least
1713 * this function should be able to explore every possible element eventually. */
1714int raxRandomWalk(raxIterator *it, size_t steps) {
1715 if (it->rt->numele == 0) {
1716 it->flags |= RAX_ITER_EOF;
1717 return 0;
1718 }
1719
1720 if (steps == 0) {
1721 size_t fle = 1+floor(log(it->rt->numele));
1722 fle *= 2;
1723 steps = 1 + rand() % fle;
1724 }
1725
1726 raxNode *n = it->node;
1727 while(steps > 0 || !n->iskey) {
1728 int numchildren = n->iscompr ? 1 : n->size;
1729 int r = rand() % (numchildren+(n != it->rt->head));
1730
1731 if (r == numchildren) {
1732 /* Go up to parent. */
1733 n = raxStackPop(&it->stack);
1734 int todel = n->iscompr ? n->size : 1;
1735 raxIteratorDelChars(it,todel);
1736 } else {
1737 /* Select a random child. */
1738 if (n->iscompr) {
1739 if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
1740 } else {
1741 if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
1742 }
1743 raxNode **cp = raxNodeFirstChildPtr(n)+r;
1744 if (!raxStackPush(&it->stack,n)) return 0;
1745 memcpy(&n,cp,sizeof(n));
1746 }
1747 if (n->iskey) steps--;
1748 }
1749 it->node = n;
1750 it->data = raxGetData(it->node);
1751 return 1;
1752}
1753
1754/* Compare the key currently pointed by the iterator to the specified
1755 * key according to the specified operator. Returns 1 if the comparison is
1756 * true, otherwise 0 is returned. */
1757int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
1758 int eq = 0, lt = 0, gt = 0;
1759
1760 if (op[0] == '=' || op[1] == '=') eq = 1;
1761 if (op[0] == '>') gt = 1;
1762 else if (op[0] == '<') lt = 1;
1763 else if (op[1] != '=') return 0; /* Syntax error. */
1764
1765 size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
1766 int cmp = memcmp(iter->key,key,minlen);
1767
1768 /* Handle == */
1769 if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
1770
1771 /* Handle >, >=, <, <= */
1772 if (cmp == 0) {
1773 /* Same prefix: longer wins. */
1774 if (eq && key_len == iter->key_len) return 1;
1775 else if (lt) return iter->key_len < key_len;
1776 else if (gt) return iter->key_len > key_len;
1777 else return 0; /* Avoid warning, just 'eq' is handled before. */
1778 } else if (cmp > 0) {
1779 return gt ? 1 : 0;
1780 } else /* (cmp < 0) */ {
1781 return lt ? 1 : 0;
1782 }
1783}
1784
1785/* Free the iterator. */
1786void raxStop(raxIterator *it) {
1787 if (it->key != it->key_static_string) rax_free(it->key);
1788 raxStackFree(&it->stack);
1789}
1790
1791/* Return if the iterator is in an EOF state. This happens when raxSeek()
1792 * failed to seek an appropriate element, so that raxNext() or raxPrev()
1793 * will return zero, or when an EOF condition was reached while iterating
1794 * with raxNext() and raxPrev(). */
1795int raxEOF(raxIterator *it) {
1796 return it->flags & RAX_ITER_EOF;
1797}
1798
1799/* Return the number of elements inside the radix tree. */
1800uint64_t raxSize(rax *rax) {
1801 return rax->numele;
1802}
1803
1804/* ----------------------------- Introspection ------------------------------ */
1805
1806/* This function is mostly used for debugging and learning purposes.
1807 * It shows an ASCII representation of a tree on standard output, outline
1808 * all the nodes and the contained keys.
1809 *
1810 * The representation is as follow:
1811 *
1812 * "foobar" (compressed node)
1813 * [abc] (normal node with three children)
1814 * [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
1815 * [] (a normal empty node)
1816 *
1817 * Children are represented in new indented lines, each children prefixed by
1818 * the "`-(x)" string, where "x" is the edge byte.
1819 *
1820 * [abc]
1821 * `-(a) "ladin"
1822 * `-(b) [kj]
1823 * `-(c) []
1824 *
1825 * However when a node has a single child the following representation
1826 * is used instead:
1827 *
1828 * [abc] -> "ladin" -> []
1829 */
1830
1831/* The actual implementation of raxShow(). */
1832void raxRecursiveShow(int level, int lpad, raxNode *n) {
1833 char s = n->iscompr ? '"' : '[';
1834 char e = n->iscompr ? '"' : ']';
1835
1836 int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
1837 if (n->iskey) {
1838 numchars += printf("=%p",raxGetData(n));
1839 }
1840
1841 int numchildren = n->iscompr ? 1 : n->size;
1842 /* Note that 7 and 4 magic constants are the string length
1843 * of " `-(x) " and " -> " respectively. */
1844 if (level) {
1845 lpad += (numchildren > 1) ? 7 : 4;
1846 if (numchildren == 1) lpad += numchars;
1847 }
1848 raxNode **cp = raxNodeFirstChildPtr(n);
1849 for (int i = 0; i < numchildren; i++) {
1850 char *branch = " `-(%c) ";
1851 if (numchildren > 1) {
1852 printf("\n");
1853 for (int j = 0; j < lpad; j++) putchar(' ');
1854 printf(branch,n->data[i]);
1855 } else {
1856 printf(" -> ");
1857 }
1858 raxNode *child;
1859 memcpy(&child,cp,sizeof(child));
1860 raxRecursiveShow(level+1,lpad,child);
1861 cp++;
1862 }
1863}
1864
1865/* Show a tree, as outlined in the comment above. */
1866void raxShow(rax *rax) {
1867 raxRecursiveShow(0,0,rax->head);
1868 putchar('\n');
1869}
1870
1871/* Used by debugnode() macro to show info about a given node. */
1872void raxDebugShowNode(const char *msg, raxNode *n) {
1873 if (raxDebugMsg == 0) return;
1874 printf("%s: %p [%.*s] key:%u size:%u children:",
1875 msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
1876 int numcld = n->iscompr ? 1 : n->size;
1877 raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
1878 while(numcld--) {
1879 raxNode *child;
1880 memcpy(&child,cldptr,sizeof(child));
1881 cldptr++;
1882 printf("%p ", (void*)child);
1883 }
1884 printf("\n");
1885 fflush(stdout);
1886}
1887
1888/* Touch all the nodes of a tree returning a check sum. This is useful
1889 * in order to make Valgrind detect if there is something wrong while
1890 * reading the data structure.
1891 *
1892 * This function was used in order to identify Rax bugs after a big refactoring
1893 * using this technique:
1894 *
1895 * 1. The rax-test is executed using Valgrind, adding a printf() so that for
1896 * the fuzz tester we see what iteration in the loop we are in.
1897 * 2. After every modification of the radix tree made by the fuzz tester
1898 * in rax-test.c, we add a call to raxTouch().
1899 * 3. Now as soon as an operation will corrupt the tree, raxTouch() will
1900 * detect it (via Valgrind) immediately. We can add more calls to narrow
1901 * the state.
1902 * 4. At this point a good idea is to enable Rax debugging messages immediately
1903 * before the moment the tree is corrupted, to see what happens.
1904 */
1905unsigned long raxTouch(raxNode *n) {
1906 debugf("Touching %p\n", (void*)n);
1907 unsigned long sum = 0;
1908 if (n->iskey) {
1909 sum += (unsigned long)raxGetData(n);
1910 }
1911
1912 int numchildren = n->iscompr ? 1 : n->size;
1913 raxNode **cp = raxNodeFirstChildPtr(n);
1914 int count = 0;
1915 for (int i = 0; i < numchildren; i++) {
1916 if (numchildren > 1) {
1917 sum += (long)n->data[i];
1918 }
1919 raxNode *child;
1920 memcpy(&child,cp,sizeof(child));
1921 if (child == (void*)0x65d1760) count++;
1922 if (count > 1) exit(1);
1923 sum += raxTouch(child);
1924 cp++;
1925 }
1926 return sum;
1927}
1928