1 | /* Rax -- A radix tree implementation. |
2 | * |
3 | * Version 1.2 -- 7 February 2019 |
4 | * |
5 | * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com> |
6 | * All rights reserved. |
7 | * |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions are met: |
10 | * |
11 | * * Redistributions of source code must retain the above copyright notice, |
12 | * this list of conditions and the following disclaimer. |
13 | * * Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the |
15 | * documentation and/or other materials provided with the distribution. |
16 | * * Neither the name of Redis nor the names of its contributors may be used |
17 | * to endorse or promote products derived from this software without |
18 | * specific prior written permission. |
19 | * |
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
24 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
30 | * POSSIBILITY OF SUCH DAMAGE. |
31 | */ |
32 | |
33 | #include <stdlib.h> |
34 | #include <string.h> |
35 | #include <assert.h> |
36 | #include <stdio.h> |
37 | #include <errno.h> |
38 | #include <math.h> |
39 | #include "rax.h" |
40 | |
41 | #ifndef RAX_MALLOC_INCLUDE |
42 | #define RAX_MALLOC_INCLUDE "rax_malloc.h" |
43 | #endif |
44 | |
45 | #include RAX_MALLOC_INCLUDE |
46 | |
47 | /* This is a special pointer that is guaranteed to never have the same value |
48 | * of a radix tree node. It's used in order to report "not found" error without |
49 | * requiring the function to have multiple return values. */ |
50 | void *raxNotFound = (void*)"rax-not-found-pointer" ; |
51 | |
52 | /* -------------------------------- Debugging ------------------------------ */ |
53 | |
54 | void raxDebugShowNode(const char *msg, raxNode *n); |
55 | |
56 | /* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on. |
57 | * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot |
58 | * of debugging info to the standard output, however you can still turn |
59 | * debugging on/off in order to enable it only when you suspect there is an |
60 | * operation causing a bug using the function raxSetDebugMsg(). */ |
61 | #ifdef RAX_DEBUG_MSG |
62 | #define debugf(...) \ |
63 | if (raxDebugMsg) { \ |
64 | printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \ |
65 | printf(__VA_ARGS__); \ |
66 | fflush(stdout); \ |
67 | } |
68 | |
69 | #define debugnode(msg,n) raxDebugShowNode(msg,n) |
70 | #else |
71 | #define debugf(...) |
72 | #define debugnode(msg,n) |
73 | #endif |
74 | |
75 | /* By default log debug info if RAX_DEBUG_MSG is defined. */ |
76 | static int raxDebugMsg = 1; |
77 | |
78 | /* When debug messages are enabled, turn them on/off dynamically. By |
79 | * default they are enabled. Set the state to 0 to disable, and 1 to |
80 | * re-enable. */ |
81 | void raxSetDebugMsg(int onoff) { |
82 | raxDebugMsg = onoff; |
83 | } |
84 | |
85 | /* ------------------------- raxStack functions -------------------------- |
86 | * The raxStack is a simple stack of pointers that is capable of switching |
87 | * from using a stack-allocated array to dynamic heap once a given number of |
88 | * items are reached. It is used in order to retain the list of parent nodes |
89 | * while walking the radix tree in order to implement certain operations that |
90 | * need to navigate the tree upward. |
91 | * ------------------------------------------------------------------------- */ |
92 | |
93 | /* Initialize the stack. */ |
94 | static inline void raxStackInit(raxStack *ts) { |
95 | ts->stack = ts->static_items; |
96 | ts->items = 0; |
97 | ts->maxitems = RAX_STACK_STATIC_ITEMS; |
98 | ts->oom = 0; |
99 | } |
100 | |
101 | /* Push an item into the stack, returns 1 on success, 0 on out of memory. */ |
102 | static inline int raxStackPush(raxStack *ts, void *ptr) { |
103 | if (ts->items == ts->maxitems) { |
104 | if (ts->stack == ts->static_items) { |
105 | ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2); |
106 | if (ts->stack == NULL) { |
107 | ts->stack = ts->static_items; |
108 | ts->oom = 1; |
109 | errno = ENOMEM; |
110 | return 0; |
111 | } |
112 | memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems); |
113 | } else { |
114 | void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2); |
115 | if (newalloc == NULL) { |
116 | ts->oom = 1; |
117 | errno = ENOMEM; |
118 | return 0; |
119 | } |
120 | ts->stack = newalloc; |
121 | } |
122 | ts->maxitems *= 2; |
123 | } |
124 | ts->stack[ts->items] = ptr; |
125 | ts->items++; |
126 | return 1; |
127 | } |
128 | |
129 | /* Pop an item from the stack, the function returns NULL if there are no |
130 | * items to pop. */ |
131 | static inline void *raxStackPop(raxStack *ts) { |
132 | if (ts->items == 0) return NULL; |
133 | ts->items--; |
134 | return ts->stack[ts->items]; |
135 | } |
136 | |
137 | /* Return the stack item at the top of the stack without actually consuming |
138 | * it. */ |
139 | static inline void *raxStackPeek(raxStack *ts) { |
140 | if (ts->items == 0) return NULL; |
141 | return ts->stack[ts->items-1]; |
142 | } |
143 | |
144 | /* Free the stack in case we used heap allocation. */ |
145 | static inline void raxStackFree(raxStack *ts) { |
146 | if (ts->stack != ts->static_items) rax_free(ts->stack); |
147 | } |
148 | |
149 | /* ---------------------------------------------------------------------------- |
150 | * Radix tree implementation |
151 | * --------------------------------------------------------------------------*/ |
152 | |
153 | /* Return the padding needed in the characters section of a node having size |
154 | * 'nodesize'. The padding is needed to store the child pointers to aligned |
155 | * addresses. Note that we add 4 to the node size because the node has a four |
156 | * bytes header. */ |
157 | #define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1)) |
158 | |
159 | /* Return the pointer to the last child pointer in a node. For the compressed |
160 | * nodes this is the only child pointer. */ |
161 | #define raxNodeLastChildPtr(n) ((raxNode**) ( \ |
162 | ((char*)(n)) + \ |
163 | raxNodeCurrentLength(n) - \ |
164 | sizeof(raxNode*) - \ |
165 | (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ |
166 | )) |
167 | |
168 | /* Return the pointer to the first child pointer. */ |
169 | #define raxNodeFirstChildPtr(n) ((raxNode**) ( \ |
170 | (n)->data + \ |
171 | (n)->size + \ |
172 | raxPadding((n)->size))) |
173 | |
174 | /* Return the current total size of the node. Note that the second line |
175 | * computes the padding after the string of characters, needed in order to |
176 | * save pointers to aligned addresses. */ |
177 | #define raxNodeCurrentLength(n) ( \ |
178 | sizeof(raxNode)+(n)->size+ \ |
179 | raxPadding((n)->size)+ \ |
180 | ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \ |
181 | (((n)->iskey && !(n)->isnull)*sizeof(void*)) \ |
182 | ) |
183 | |
184 | /* Allocate a new non compressed node with the specified number of children. |
185 | * If datafield is true, the allocation is made large enough to hold the |
186 | * associated data pointer. |
187 | * Returns the new node pointer. On out of memory NULL is returned. */ |
188 | raxNode *raxNewNode(size_t children, int datafield) { |
189 | size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+ |
190 | sizeof(raxNode*)*children; |
191 | if (datafield) nodesize += sizeof(void*); |
192 | raxNode *node = rax_malloc(nodesize); |
193 | if (node == NULL) return NULL; |
194 | node->iskey = 0; |
195 | node->isnull = 0; |
196 | node->iscompr = 0; |
197 | node->size = children; |
198 | return node; |
199 | } |
200 | |
201 | /* Allocate a new rax and return its pointer. On out of memory the function |
202 | * returns NULL. */ |
203 | rax *raxNew(void) { |
204 | rax *rax = rax_malloc(sizeof(*rax)); |
205 | if (rax == NULL) return NULL; |
206 | rax->numele = 0; |
207 | rax->numnodes = 1; |
208 | rax->head = raxNewNode(0,0); |
209 | if (rax->head == NULL) { |
210 | rax_free(rax); |
211 | return NULL; |
212 | } else { |
213 | return rax; |
214 | } |
215 | } |
216 | |
217 | /* realloc the node to make room for auxiliary data in order |
218 | * to store an item in that node. On out of memory NULL is returned. */ |
219 | raxNode *raxReallocForData(raxNode *n, void *data) { |
220 | if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */ |
221 | size_t curlen = raxNodeCurrentLength(n); |
222 | return rax_realloc(n,curlen+sizeof(void*)); |
223 | } |
224 | |
225 | /* Set the node auxiliary data to the specified pointer. */ |
226 | void raxSetData(raxNode *n, void *data) { |
227 | n->iskey = 1; |
228 | if (data != NULL) { |
229 | n->isnull = 0; |
230 | void **ndata = (void**) |
231 | ((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); |
232 | memcpy(ndata,&data,sizeof(data)); |
233 | } else { |
234 | n->isnull = 1; |
235 | } |
236 | } |
237 | |
238 | /* Get the node auxiliary data. */ |
239 | void *raxGetData(raxNode *n) { |
240 | if (n->isnull) return NULL; |
241 | void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*)); |
242 | void *data; |
243 | memcpy(&data,ndata,sizeof(data)); |
244 | return data; |
245 | } |
246 | |
247 | /* Add a new child to the node 'n' representing the character 'c' and return |
248 | * its new pointer, as well as the child pointer by reference. Additionally |
249 | * '***parentlink' is populated with the raxNode pointer-to-pointer of where |
250 | * the new child was stored, which is useful for the caller to replace the |
251 | * child pointer if it gets reallocated. |
252 | * |
253 | * On success the new parent node pointer is returned (it may change because |
254 | * of the realloc, so the caller should discard 'n' and use the new value). |
255 | * On out of memory NULL is returned, and the old node is still valid. */ |
256 | raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) { |
257 | assert(n->iscompr == 0); |
258 | |
259 | size_t curlen = raxNodeCurrentLength(n); |
260 | n->size++; |
261 | size_t newlen = raxNodeCurrentLength(n); |
262 | n->size--; /* For now restore the original size. We'll update it only on |
263 | success at the end. */ |
264 | |
265 | /* Alloc the new child we will link to 'n'. */ |
266 | raxNode *child = raxNewNode(0,0); |
267 | if (child == NULL) return NULL; |
268 | |
269 | /* Make space in the original node. */ |
270 | raxNode *newn = rax_realloc(n,newlen); |
271 | if (newn == NULL) { |
272 | rax_free(child); |
273 | return NULL; |
274 | } |
275 | n = newn; |
276 | |
277 | /* After the reallocation, we have up to 8/16 (depending on the system |
278 | * pointer size, and the required node padding) bytes at the end, that is, |
279 | * the additional char in the 'data' section, plus one pointer to the new |
280 | * child, plus the padding needed in order to store addresses into aligned |
281 | * locations. |
282 | * |
283 | * So if we start with the following node, having "abde" edges. |
284 | * |
285 | * Note: |
286 | * - We assume 4 bytes pointer for simplicity. |
287 | * - Each space below corresponds to one byte |
288 | * |
289 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP| |
290 | * |
291 | * After the reallocation we need: 1 byte for the new edge character |
292 | * plus 4 bytes for a new child pointer (assuming 32 bit machine). |
293 | * However after adding 1 byte to the edge char, the header + the edge |
294 | * characters are no longer aligned, so we also need 3 bytes of padding. |
295 | * In total the reallocation will add 1+4+3 bytes = 8 bytes: |
296 | * |
297 | * (Blank bytes are represented by ".") |
298 | * |
299 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....] |
300 | * |
301 | * Let's find where to insert the new child in order to make sure |
302 | * it is inserted in-place lexicographically. Assuming we are adding |
303 | * a child "c" in our case pos will be = 2 after the end of the following |
304 | * loop. */ |
305 | int pos; |
306 | for (pos = 0; pos < n->size; pos++) { |
307 | if (n->data[pos] > c) break; |
308 | } |
309 | |
310 | /* Now, if present, move auxiliary data pointer at the end |
311 | * so that we can mess with the other data without overwriting it. |
312 | * We will obtain something like that: |
313 | * |
314 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP| |
315 | */ |
316 | unsigned char *src, *dst; |
317 | if (n->iskey && !n->isnull) { |
318 | src = ((unsigned char*)n+curlen-sizeof(void*)); |
319 | dst = ((unsigned char*)n+newlen-sizeof(void*)); |
320 | memmove(dst,src,sizeof(void*)); |
321 | } |
322 | |
323 | /* Compute the "shift", that is, how many bytes we need to move the |
324 | * pointers section forward because of the addition of the new child |
325 | * byte in the string section. Note that if we had no padding, that |
326 | * would be always "1", since we are adding a single byte in the string |
327 | * section of the node (where now there is "abde" basically). |
328 | * |
329 | * However we have padding, so it could be zero, or up to 8. |
330 | * |
331 | * Another way to think at the shift is, how many bytes we need to |
332 | * move child pointers forward *other than* the obvious sizeof(void*) |
333 | * needed for the additional pointer itself. */ |
334 | size_t shift = newlen - curlen - sizeof(void*); |
335 | |
336 | /* We said we are adding a node with edge 'c'. The insertion |
337 | * point is between 'b' and 'd', so the 'pos' variable value is |
338 | * the index of the first child pointer that we need to move forward |
339 | * to make space for our new pointer. |
340 | * |
341 | * To start, move all the child pointers after the insertion point |
342 | * of shift+sizeof(pointer) bytes on the right, to obtain: |
343 | * |
344 | * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP| |
345 | */ |
346 | src = n->data+n->size+ |
347 | raxPadding(n->size)+ |
348 | sizeof(raxNode*)*pos; |
349 | memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos)); |
350 | |
351 | /* Move the pointers to the left of the insertion position as well. Often |
352 | * we don't need to do anything if there was already some padding to use. In |
353 | * that case the final destination of the pointers will be the same, however |
354 | * in our example there was no pre-existing padding, so we added one byte |
355 | * plus there bytes of padding. After the next memmove() things will look |
356 | * like that: |
357 | * |
358 | * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP| |
359 | */ |
360 | if (shift) { |
361 | src = (unsigned char*) raxNodeFirstChildPtr(n); |
362 | memmove(src+shift,src,sizeof(raxNode*)*pos); |
363 | } |
364 | |
365 | /* Now make the space for the additional char in the data section, |
366 | * but also move the pointers before the insertion point to the right |
367 | * by shift bytes, in order to obtain the following: |
368 | * |
369 | * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| |
370 | */ |
371 | src = n->data+pos; |
372 | memmove(src+1,src,n->size-pos); |
373 | |
374 | /* We can now set the character and its child node pointer to get: |
375 | * |
376 | * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| |
377 | * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP| |
378 | */ |
379 | n->data[pos] = c; |
380 | n->size++; |
381 | src = (unsigned char*) raxNodeFirstChildPtr(n); |
382 | raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos); |
383 | memcpy(childfield,&child,sizeof(child)); |
384 | *childptr = child; |
385 | *parentlink = childfield; |
386 | return n; |
387 | } |
388 | |
389 | /* Turn the node 'n', that must be a node without any children, into a |
390 | * compressed node representing a set of nodes linked one after the other |
391 | * and having exactly one child each. The node can be a key or not: this |
392 | * property and the associated value if any will be preserved. |
393 | * |
394 | * The function also returns a child node, since the last node of the |
395 | * compressed chain cannot be part of the chain: it has zero children while |
396 | * we can only compress inner nodes with exactly one child each. */ |
397 | raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) { |
398 | assert(n->size == 0 && n->iscompr == 0); |
399 | void *data = NULL; /* Initialized only to avoid warnings. */ |
400 | size_t newsize; |
401 | |
402 | debugf("Compress node: %.*s\n" , (int)len,s); |
403 | |
404 | /* Allocate the child to link to this node. */ |
405 | *child = raxNewNode(0,0); |
406 | if (*child == NULL) return NULL; |
407 | |
408 | /* Make space in the parent node. */ |
409 | newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*); |
410 | if (n->iskey) { |
411 | data = raxGetData(n); /* To restore it later. */ |
412 | if (!n->isnull) newsize += sizeof(void*); |
413 | } |
414 | raxNode *newn = rax_realloc(n,newsize); |
415 | if (newn == NULL) { |
416 | rax_free(*child); |
417 | return NULL; |
418 | } |
419 | n = newn; |
420 | |
421 | n->iscompr = 1; |
422 | n->size = len; |
423 | memcpy(n->data,s,len); |
424 | if (n->iskey) raxSetData(n,data); |
425 | raxNode **childfield = raxNodeLastChildPtr(n); |
426 | memcpy(childfield,child,sizeof(*child)); |
427 | return n; |
428 | } |
429 | |
430 | /* Low level function that walks the tree looking for the string |
431 | * 's' of 'len' bytes. The function returns the number of characters |
432 | * of the key that was possible to process: if the returned integer |
433 | * is the same as 'len', then it means that the node corresponding to the |
434 | * string was found (however it may not be a key in case the node->iskey is |
435 | * zero or if simply we stopped in the middle of a compressed node, so that |
436 | * 'splitpos' is non zero). |
437 | * |
438 | * Otherwise if the returned integer is not the same as 'len', there was an |
439 | * early stop during the tree walk because of a character mismatch. |
440 | * |
441 | * The node where the search ended (because the full string was processed |
442 | * or because there was an early stop) is returned by reference as |
443 | * '*stopnode' if the passed pointer is not NULL. This node link in the |
444 | * parent's node is returned as '*plink' if not NULL. Finally, if the |
445 | * search stopped in a compressed node, '*splitpos' returns the index |
446 | * inside the compressed node where the search ended. This is useful to |
447 | * know where to split the node for insertion. |
448 | * |
449 | * Note that when we stop in the middle of a compressed node with |
450 | * a perfect match, this function will return a length equal to the |
451 | * 'len' argument (all the key matched), and will return a *splitpos which is |
452 | * always positive (that will represent the index of the character immediately |
453 | * *after* the last match in the current compressed node). |
454 | * |
455 | * When instead we stop at a compressed node and *splitpos is zero, it |
456 | * means that the current node represents the key (that is, none of the |
457 | * compressed node characters are needed to represent the key, just all |
458 | * its parents nodes). */ |
459 | static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) { |
460 | raxNode *h = rax->head; |
461 | raxNode **parentlink = &rax->head; |
462 | |
463 | size_t i = 0; /* Position in the string. */ |
464 | size_t j = 0; /* Position in the node children (or bytes if compressed).*/ |
465 | while(h->size && i < len) { |
466 | debugnode("Lookup current node" ,h); |
467 | unsigned char *v = h->data; |
468 | |
469 | if (h->iscompr) { |
470 | for (j = 0; j < h->size && i < len; j++, i++) { |
471 | if (v[j] != s[i]) break; |
472 | } |
473 | if (j != h->size) break; |
474 | } else { |
475 | /* Even when h->size is large, linear scan provides good |
476 | * performances compared to other approaches that are in theory |
477 | * more sounding, like performing a binary search. */ |
478 | for (j = 0; j < h->size; j++) { |
479 | if (v[j] == s[i]) break; |
480 | } |
481 | if (j == h->size) break; |
482 | i++; |
483 | } |
484 | |
485 | if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */ |
486 | raxNode **children = raxNodeFirstChildPtr(h); |
487 | if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */ |
488 | memcpy(&h,children+j,sizeof(h)); |
489 | parentlink = children+j; |
490 | j = 0; /* If the new node is non compressed and we do not |
491 | iterate again (since i == len) set the split |
492 | position to 0 to signal this node represents |
493 | the searched key. */ |
494 | } |
495 | debugnode("Lookup stop node is" ,h); |
496 | if (stopnode) *stopnode = h; |
497 | if (plink) *plink = parentlink; |
498 | if (splitpos && h->iscompr) *splitpos = j; |
499 | return i; |
500 | } |
501 | |
502 | /* Insert the element 's' of size 'len', setting as auxiliary data |
503 | * the pointer 'data'. If the element is already present, the associated |
504 | * data is updated (only if 'overwrite' is set to 1), and 0 is returned, |
505 | * otherwise the element is inserted and 1 is returned. On out of memory the |
506 | * function returns 0 as well but sets errno to ENOMEM, otherwise errno will |
507 | * be set to 0. |
508 | */ |
509 | int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) { |
510 | size_t i; |
511 | int j = 0; /* Split position. If raxLowWalk() stops in a compressed |
512 | node, the index 'j' represents the char we stopped within the |
513 | compressed node, that is, the position where to split the |
514 | node for insertion. */ |
515 | raxNode *h, **parentlink; |
516 | |
517 | debugf("### Insert %.*s with value %p\n" , (int)len, s, data); |
518 | i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL); |
519 | |
520 | /* If i == len we walked following the whole string. If we are not |
521 | * in the middle of a compressed node, the string is either already |
522 | * inserted or this middle node is currently not a key, but can represent |
523 | * our key. We have just to reallocate the node and make space for the |
524 | * data pointer. */ |
525 | if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { |
526 | debugf("### Insert: node representing key exists\n" ); |
527 | /* Make space for the value pointer if needed. */ |
528 | if (!h->iskey || (h->isnull && overwrite)) { |
529 | h = raxReallocForData(h,data); |
530 | if (h) memcpy(parentlink,&h,sizeof(h)); |
531 | } |
532 | if (h == NULL) { |
533 | errno = ENOMEM; |
534 | return 0; |
535 | } |
536 | |
537 | /* Update the existing key if there is already one. */ |
538 | if (h->iskey) { |
539 | if (old) *old = raxGetData(h); |
540 | if (overwrite) raxSetData(h,data); |
541 | errno = 0; |
542 | return 0; /* Element already exists. */ |
543 | } |
544 | |
545 | /* Otherwise set the node as a key. Note that raxSetData() |
546 | * will set h->iskey. */ |
547 | raxSetData(h,data); |
548 | rax->numele++; |
549 | return 1; /* Element inserted. */ |
550 | } |
551 | |
552 | /* If the node we stopped at is a compressed node, we need to |
553 | * split it before to continue. |
554 | * |
555 | * Splitting a compressed node have a few possible cases. |
556 | * Imagine that the node 'h' we are currently at is a compressed |
557 | * node containing the string "ANNIBALE" (it means that it represents |
558 | * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child |
559 | * pointer of this node pointing at the 'E' node, because remember that |
560 | * we have characters at the edges of the graph, not inside the nodes |
561 | * themselves. |
562 | * |
563 | * In order to show a real case imagine our node to also point to |
564 | * another compressed node, that finally points at the node without |
565 | * children, representing 'O': |
566 | * |
567 | * "ANNIBALE" -> "SCO" -> [] |
568 | * |
569 | * When inserting we may face the following cases. Note that all the cases |
570 | * require the insertion of a non compressed node with exactly two |
571 | * children, except for the last case which just requires splitting a |
572 | * compressed node. |
573 | * |
574 | * 1) Inserting "ANNIENTARE" |
575 | * |
576 | * |B| -> "ALE" -> "SCO" -> [] |
577 | * "ANNI" -> |-| |
578 | * |E| -> (... continue algo ...) "NTARE" -> [] |
579 | * |
580 | * 2) Inserting "ANNIBALI" |
581 | * |
582 | * |E| -> "SCO" -> [] |
583 | * "ANNIBAL" -> |-| |
584 | * |I| -> (... continue algo ...) [] |
585 | * |
586 | * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node) |
587 | * |
588 | * |N| -> "NIBALE" -> "SCO" -> [] |
589 | * |A| -> |-| |
590 | * |G| -> (... continue algo ...) |O| -> [] |
591 | * |
592 | * 4) Inserting "CIAO" |
593 | * |
594 | * |A| -> "NNIBALE" -> "SCO" -> [] |
595 | * |-| |
596 | * |C| -> (... continue algo ...) "IAO" -> [] |
597 | * |
598 | * 5) Inserting "ANNI" |
599 | * |
600 | * "ANNI" -> "BALE" -> "SCO" -> [] |
601 | * |
602 | * The final algorithm for insertion covering all the above cases is as |
603 | * follows. |
604 | * |
605 | * ============================= ALGO 1 ============================= |
606 | * |
607 | * For the above cases 1 to 4, that is, all cases where we stopped in |
608 | * the middle of a compressed node for a character mismatch, do: |
609 | * |
610 | * Let $SPLITPOS be the zero-based index at which, in the |
611 | * compressed node array of characters, we found the mismatching |
612 | * character. For example if the node contains "ANNIBALE" and we add |
613 | * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the |
614 | * mismatching character is found. |
615 | * |
616 | * 1. Save the current compressed node $NEXT pointer (the pointer to the |
617 | * child element, that is always present in compressed nodes). |
618 | * |
619 | * 2. Create "split node" having as child the non common letter |
620 | * at the compressed node. The other non common letter (at the key) |
621 | * will be added later as we continue the normal insertion algorithm |
622 | * at step "6". |
623 | * |
624 | * 3a. IF $SPLITPOS == 0: |
625 | * Replace the old node with the split node, by copying the auxiliary |
626 | * data if any. Fix parent's reference. Free old node eventually |
627 | * (we still need its data for the next steps of the algorithm). |
628 | * |
629 | * 3b. IF $SPLITPOS != 0: |
630 | * Trim the compressed node (reallocating it as well) in order to |
631 | * contain $splitpos characters. Change child pointer in order to link |
632 | * to the split node. If new compressed node len is just 1, set |
633 | * iscompr to 0 (layout is the same). Fix parent's reference. |
634 | * |
635 | * 4a. IF the postfix len (the length of the remaining string of the |
636 | * original compressed node after the split character) is non zero, |
637 | * create a "postfix node". If the postfix node has just one character |
638 | * set iscompr to 0, otherwise iscompr to 1. Set the postfix node |
639 | * child pointer to $NEXT. |
640 | * |
641 | * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer. |
642 | * |
643 | * 5. Set child[0] of split node to postfix node. |
644 | * |
645 | * 6. Set the split node as the current node, set current index at child[1] |
646 | * and continue insertion algorithm as usually. |
647 | * |
648 | * ============================= ALGO 2 ============================= |
649 | * |
650 | * For case 5, that is, if we stopped in the middle of a compressed |
651 | * node but no mismatch was found, do: |
652 | * |
653 | * Let $SPLITPOS be the zero-based index at which, in the |
654 | * compressed node array of characters, we stopped iterating because |
655 | * there were no more keys character to match. So in the example of |
656 | * the node "ANNIBALE", adding the string "ANNI", the $SPLITPOS is 4. |
657 | * |
658 | * 1. Save the current compressed node $NEXT pointer (the pointer to the |
659 | * child element, that is always present in compressed nodes). |
660 | * |
661 | * 2. Create a "postfix node" containing all the characters from $SPLITPOS |
662 | * to the end. Use $NEXT as the postfix node child pointer. |
663 | * If the postfix node length is 1, set iscompr to 0. |
664 | * Set the node as a key with the associated value of the new |
665 | * inserted key. |
666 | * |
667 | * 3. Trim the current node to contain the first $SPLITPOS characters. |
668 | * As usually if the new node length is just 1, set iscompr to 0. |
669 | * Take the iskey / associated value as it was in the original node. |
670 | * Fix the parent's reference. |
671 | * |
672 | * 4. Set the postfix node as the only child pointer of the trimmed |
673 | * node created at step 1. |
674 | */ |
675 | |
676 | /* ------------------------- ALGORITHM 1 --------------------------- */ |
677 | if (h->iscompr && i != len) { |
678 | debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n" , |
679 | h->size, h->data, (void*)h); |
680 | debugf("Still to insert: %.*s\n" , (int)(len-i), s+i); |
681 | debugf("Splitting at %d: '%c'\n" , j, ((char*)h->data)[j]); |
682 | debugf("Other (key) letter is '%c'\n" , s[i]); |
683 | |
684 | /* 1: Save next pointer. */ |
685 | raxNode **childfield = raxNodeLastChildPtr(h); |
686 | raxNode *next; |
687 | memcpy(&next,childfield,sizeof(next)); |
688 | debugf("Next is %p\n" , (void*)next); |
689 | debugf("iskey %d\n" , h->iskey); |
690 | if (h->iskey) { |
691 | debugf("key value is %p\n" , raxGetData(h)); |
692 | } |
693 | |
694 | /* Set the length of the additional nodes we will need. */ |
695 | size_t trimmedlen = j; |
696 | size_t postfixlen = h->size - j - 1; |
697 | int split_node_is_key = !trimmedlen && h->iskey && !h->isnull; |
698 | size_t nodesize; |
699 | |
700 | /* 2: Create the split node. Also allocate the other nodes we'll need |
701 | * ASAP, so that it will be simpler to handle OOM. */ |
702 | raxNode *splitnode = raxNewNode(1, split_node_is_key); |
703 | raxNode *trimmed = NULL; |
704 | raxNode *postfix = NULL; |
705 | |
706 | if (trimmedlen) { |
707 | nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+ |
708 | sizeof(raxNode*); |
709 | if (h->iskey && !h->isnull) nodesize += sizeof(void*); |
710 | trimmed = rax_malloc(nodesize); |
711 | } |
712 | |
713 | if (postfixlen) { |
714 | nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+ |
715 | sizeof(raxNode*); |
716 | postfix = rax_malloc(nodesize); |
717 | } |
718 | |
719 | /* OOM? Abort now that the tree is untouched. */ |
720 | if (splitnode == NULL || |
721 | (trimmedlen && trimmed == NULL) || |
722 | (postfixlen && postfix == NULL)) |
723 | { |
724 | rax_free(splitnode); |
725 | rax_free(trimmed); |
726 | rax_free(postfix); |
727 | errno = ENOMEM; |
728 | return 0; |
729 | } |
730 | splitnode->data[0] = h->data[j]; |
731 | |
732 | if (j == 0) { |
733 | /* 3a: Replace the old node with the split node. */ |
734 | if (h->iskey) { |
735 | void *ndata = raxGetData(h); |
736 | raxSetData(splitnode,ndata); |
737 | } |
738 | memcpy(parentlink,&splitnode,sizeof(splitnode)); |
739 | } else { |
740 | /* 3b: Trim the compressed node. */ |
741 | trimmed->size = j; |
742 | memcpy(trimmed->data,h->data,j); |
743 | trimmed->iscompr = j > 1 ? 1 : 0; |
744 | trimmed->iskey = h->iskey; |
745 | trimmed->isnull = h->isnull; |
746 | if (h->iskey && !h->isnull) { |
747 | void *ndata = raxGetData(h); |
748 | raxSetData(trimmed,ndata); |
749 | } |
750 | raxNode **cp = raxNodeLastChildPtr(trimmed); |
751 | memcpy(cp,&splitnode,sizeof(splitnode)); |
752 | memcpy(parentlink,&trimmed,sizeof(trimmed)); |
753 | parentlink = cp; /* Set parentlink to splitnode parent. */ |
754 | rax->numnodes++; |
755 | } |
756 | |
757 | /* 4: Create the postfix node: what remains of the original |
758 | * compressed node after the split. */ |
759 | if (postfixlen) { |
760 | /* 4a: create a postfix node. */ |
761 | postfix->iskey = 0; |
762 | postfix->isnull = 0; |
763 | postfix->size = postfixlen; |
764 | postfix->iscompr = postfixlen > 1; |
765 | memcpy(postfix->data,h->data+j+1,postfixlen); |
766 | raxNode **cp = raxNodeLastChildPtr(postfix); |
767 | memcpy(cp,&next,sizeof(next)); |
768 | rax->numnodes++; |
769 | } else { |
770 | /* 4b: just use next as postfix node. */ |
771 | postfix = next; |
772 | } |
773 | |
774 | /* 5: Set splitnode first child as the postfix node. */ |
775 | raxNode **splitchild = raxNodeLastChildPtr(splitnode); |
776 | memcpy(splitchild,&postfix,sizeof(postfix)); |
777 | |
778 | /* 6. Continue insertion: this will cause the splitnode to |
779 | * get a new child (the non common character at the currently |
780 | * inserted key). */ |
781 | rax_free(h); |
782 | h = splitnode; |
783 | } else if (h->iscompr && i == len) { |
784 | /* ------------------------- ALGORITHM 2 --------------------------- */ |
785 | debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n" , |
786 | h->size, h->data, (void*)h, j); |
787 | |
788 | /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */ |
789 | size_t postfixlen = h->size - j; |
790 | size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+ |
791 | sizeof(raxNode*); |
792 | if (data != NULL) nodesize += sizeof(void*); |
793 | raxNode *postfix = rax_malloc(nodesize); |
794 | |
795 | nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*); |
796 | if (h->iskey && !h->isnull) nodesize += sizeof(void*); |
797 | raxNode *trimmed = rax_malloc(nodesize); |
798 | |
799 | if (postfix == NULL || trimmed == NULL) { |
800 | rax_free(postfix); |
801 | rax_free(trimmed); |
802 | errno = ENOMEM; |
803 | return 0; |
804 | } |
805 | |
806 | /* 1: Save next pointer. */ |
807 | raxNode **childfield = raxNodeLastChildPtr(h); |
808 | raxNode *next; |
809 | memcpy(&next,childfield,sizeof(next)); |
810 | |
811 | /* 2: Create the postfix node. */ |
812 | postfix->size = postfixlen; |
813 | postfix->iscompr = postfixlen > 1; |
814 | postfix->iskey = 1; |
815 | postfix->isnull = 0; |
816 | memcpy(postfix->data,h->data+j,postfixlen); |
817 | raxSetData(postfix,data); |
818 | raxNode **cp = raxNodeLastChildPtr(postfix); |
819 | memcpy(cp,&next,sizeof(next)); |
820 | rax->numnodes++; |
821 | |
822 | /* 3: Trim the compressed node. */ |
823 | trimmed->size = j; |
824 | trimmed->iscompr = j > 1; |
825 | trimmed->iskey = 0; |
826 | trimmed->isnull = 0; |
827 | memcpy(trimmed->data,h->data,j); |
828 | memcpy(parentlink,&trimmed,sizeof(trimmed)); |
829 | if (h->iskey) { |
830 | void *aux = raxGetData(h); |
831 | raxSetData(trimmed,aux); |
832 | } |
833 | |
834 | /* Fix the trimmed node child pointer to point to |
835 | * the postfix node. */ |
836 | cp = raxNodeLastChildPtr(trimmed); |
837 | memcpy(cp,&postfix,sizeof(postfix)); |
838 | |
839 | /* Finish! We don't need to continue with the insertion |
840 | * algorithm for ALGO 2. The key is already inserted. */ |
841 | rax->numele++; |
842 | rax_free(h); |
843 | return 1; /* Key inserted. */ |
844 | } |
845 | |
846 | /* We walked the radix tree as far as we could, but still there are left |
847 | * chars in our string. We need to insert the missing nodes. */ |
848 | while(i < len) { |
849 | raxNode *child; |
850 | |
851 | /* If this node is going to have a single child, and there |
852 | * are other characters, so that that would result in a chain |
853 | * of single-childed nodes, turn it into a compressed node. */ |
854 | if (h->size == 0 && len-i > 1) { |
855 | debugf("Inserting compressed node\n" ); |
856 | size_t comprsize = len-i; |
857 | if (comprsize > RAX_NODE_MAX_SIZE) |
858 | comprsize = RAX_NODE_MAX_SIZE; |
859 | raxNode *newh = raxCompressNode(h,s+i,comprsize,&child); |
860 | if (newh == NULL) goto oom; |
861 | h = newh; |
862 | memcpy(parentlink,&h,sizeof(h)); |
863 | parentlink = raxNodeLastChildPtr(h); |
864 | i += comprsize; |
865 | } else { |
866 | debugf("Inserting normal node\n" ); |
867 | raxNode **new_parentlink; |
868 | raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink); |
869 | if (newh == NULL) goto oom; |
870 | h = newh; |
871 | memcpy(parentlink,&h,sizeof(h)); |
872 | parentlink = new_parentlink; |
873 | i++; |
874 | } |
875 | rax->numnodes++; |
876 | h = child; |
877 | } |
878 | raxNode *newh = raxReallocForData(h,data); |
879 | if (newh == NULL) goto oom; |
880 | h = newh; |
881 | if (!h->iskey) rax->numele++; |
882 | raxSetData(h,data); |
883 | memcpy(parentlink,&h,sizeof(h)); |
884 | return 1; /* Element inserted. */ |
885 | |
886 | oom: |
887 | /* This code path handles out of memory after part of the sub-tree was |
888 | * already modified. Set the node as a key, and then remove it. However we |
889 | * do that only if the node is a terminal node, otherwise if the OOM |
890 | * happened reallocating a node in the middle, we don't need to free |
891 | * anything. */ |
892 | if (h->size == 0) { |
893 | h->isnull = 1; |
894 | h->iskey = 1; |
895 | rax->numele++; /* Compensate the next remove. */ |
896 | assert(raxRemove(rax,s,i,NULL) != 0); |
897 | } |
898 | errno = ENOMEM; |
899 | return 0; |
900 | } |
901 | |
902 | /* Overwriting insert. Just a wrapper for raxGenericInsert() that will |
903 | * update the element if there is already one for the same key. */ |
904 | int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { |
905 | return raxGenericInsert(rax,s,len,data,old,1); |
906 | } |
907 | |
908 | /* Non overwriting insert function: if an element with the same key |
909 | * exists, the value is not updated and the function returns 0. |
910 | * This is just a wrapper for raxGenericInsert(). */ |
911 | int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { |
912 | return raxGenericInsert(rax,s,len,data,old,0); |
913 | } |
914 | |
915 | /* Find a key in the rax, returns raxNotFound special void pointer value |
916 | * if the item was not found, otherwise the value associated with the |
917 | * item is returned. */ |
918 | void *raxFind(rax *rax, unsigned char *s, size_t len) { |
919 | raxNode *h; |
920 | |
921 | debugf("### Lookup: %.*s\n" , (int)len, s); |
922 | int splitpos = 0; |
923 | size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL); |
924 | if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) |
925 | return raxNotFound; |
926 | return raxGetData(h); |
927 | } |
928 | |
929 | /* Return the memory address where the 'parent' node stores the specified |
930 | * 'child' pointer, so that the caller can update the pointer with another |
931 | * one if needed. The function assumes it will find a match, otherwise the |
932 | * operation is an undefined behavior (it will continue scanning the |
933 | * memory without any bound checking). */ |
934 | raxNode **raxFindParentLink(raxNode *parent, raxNode *child) { |
935 | raxNode **cp = raxNodeFirstChildPtr(parent); |
936 | raxNode *c; |
937 | while(1) { |
938 | memcpy(&c,cp,sizeof(c)); |
939 | if (c == child) break; |
940 | cp++; |
941 | } |
942 | return cp; |
943 | } |
944 | |
945 | /* Low level child removal from node. The new node pointer (after the child |
946 | * removal) is returned. Note that this function does not fix the pointer |
947 | * of the parent node in its parent, so this task is up to the caller. |
948 | * The function never fails for out of memory. */ |
949 | raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { |
950 | debugnode("raxRemoveChild before" , parent); |
951 | /* If parent is a compressed node (having a single child, as for definition |
952 | * of the data structure), the removal of the child consists into turning |
953 | * it into a normal node without children. */ |
954 | if (parent->iscompr) { |
955 | void *data = NULL; |
956 | if (parent->iskey) data = raxGetData(parent); |
957 | parent->isnull = 0; |
958 | parent->iscompr = 0; |
959 | parent->size = 0; |
960 | if (parent->iskey) raxSetData(parent,data); |
961 | debugnode("raxRemoveChild after" , parent); |
962 | return parent; |
963 | } |
964 | |
965 | /* Otherwise we need to scan for the child pointer and memmove() |
966 | * accordingly. |
967 | * |
968 | * 1. To start we seek the first element in both the children |
969 | * pointers and edge bytes in the node. */ |
970 | raxNode **cp = raxNodeFirstChildPtr(parent); |
971 | raxNode **c = cp; |
972 | unsigned char *e = parent->data; |
973 | |
974 | /* 2. Search the child pointer to remove inside the array of children |
975 | * pointers. */ |
976 | while(1) { |
977 | raxNode *aux; |
978 | memcpy(&aux,c,sizeof(aux)); |
979 | if (aux == child) break; |
980 | c++; |
981 | e++; |
982 | } |
983 | |
984 | /* 3. Remove the edge and the pointer by memmoving the remaining children |
985 | * pointer and edge bytes one position before. */ |
986 | int taillen = parent->size - (e - parent->data) - 1; |
987 | debugf("raxRemoveChild tail len: %d\n" , taillen); |
988 | memmove(e,e+1,taillen); |
989 | |
990 | /* Compute the shift, that is the amount of bytes we should move our |
991 | * child pointers to the left, since the removal of one edge character |
992 | * and the corresponding padding change, may change the layout. |
993 | * We just check if in the old version of the node there was at the |
994 | * end just a single byte and all padding: in that case removing one char |
995 | * will remove a whole sizeof(void*) word. */ |
996 | size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0; |
997 | |
998 | /* Move the children pointers before the deletion point. */ |
999 | if (shift) |
1000 | memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**)); |
1001 | |
1002 | /* Move the remaining "tail" pointers at the right position as well. */ |
1003 | size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0; |
1004 | memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen); |
1005 | |
1006 | /* 4. Update size. */ |
1007 | parent->size--; |
1008 | |
1009 | /* realloc the node according to the theoretical memory usage, to free |
1010 | * data if we are over-allocating right now. */ |
1011 | raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent)); |
1012 | if (newnode) { |
1013 | debugnode("raxRemoveChild after" , newnode); |
1014 | } |
1015 | /* Note: if rax_realloc() fails we just return the old address, which |
1016 | * is valid. */ |
1017 | return newnode ? newnode : parent; |
1018 | } |
1019 | |
1020 | /* Remove the specified item. Returns 1 if the item was found and |
1021 | * deleted, 0 otherwise. */ |
1022 | int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { |
1023 | raxNode *h; |
1024 | raxStack ts; |
1025 | |
1026 | debugf("### Delete: %.*s\n" , (int)len, s); |
1027 | raxStackInit(&ts); |
1028 | int splitpos = 0; |
1029 | size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts); |
1030 | if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { |
1031 | raxStackFree(&ts); |
1032 | return 0; |
1033 | } |
1034 | if (old) *old = raxGetData(h); |
1035 | h->iskey = 0; |
1036 | rax->numele--; |
1037 | |
1038 | /* If this node has no children, the deletion needs to reclaim the |
1039 | * no longer used nodes. This is an iterative process that needs to |
1040 | * walk the three upward, deleting all the nodes with just one child |
1041 | * that are not keys, until the head of the rax is reached or the first |
1042 | * node with more than one child is found. */ |
1043 | |
1044 | int trycompress = 0; /* Will be set to 1 if we should try to optimize the |
1045 | tree resulting from the deletion. */ |
1046 | |
1047 | if (h->size == 0) { |
1048 | debugf("Key deleted in node without children. Cleanup needed.\n" ); |
1049 | raxNode *child = NULL; |
1050 | while(h != rax->head) { |
1051 | child = h; |
1052 | debugf("Freeing child %p [%.*s] key:%d\n" , (void*)child, |
1053 | (int)child->size, (char*)child->data, child->iskey); |
1054 | rax_free(child); |
1055 | rax->numnodes--; |
1056 | h = raxStackPop(&ts); |
1057 | /* If this node has more then one child, or actually holds |
1058 | * a key, stop here. */ |
1059 | if (h->iskey || (!h->iscompr && h->size != 1)) break; |
1060 | } |
1061 | if (child) { |
1062 | debugf("Unlinking child %p from parent %p\n" , |
1063 | (void*)child, (void*)h); |
1064 | raxNode *new = raxRemoveChild(h,child); |
1065 | if (new != h) { |
1066 | raxNode *parent = raxStackPeek(&ts); |
1067 | raxNode **parentlink; |
1068 | if (parent == NULL) { |
1069 | parentlink = &rax->head; |
1070 | } else { |
1071 | parentlink = raxFindParentLink(parent,h); |
1072 | } |
1073 | memcpy(parentlink,&new,sizeof(new)); |
1074 | } |
1075 | |
1076 | /* If after the removal the node has just a single child |
1077 | * and is not a key, we need to try to compress it. */ |
1078 | if (new->size == 1 && new->iskey == 0) { |
1079 | trycompress = 1; |
1080 | h = new; |
1081 | } |
1082 | } |
1083 | } else if (h->size == 1) { |
1084 | /* If the node had just one child, after the removal of the key |
1085 | * further compression with adjacent nodes is potentially possible. */ |
1086 | trycompress = 1; |
1087 | } |
1088 | |
1089 | /* Don't try node compression if our nodes pointers stack is not |
1090 | * complete because of OOM while executing raxLowWalk() */ |
1091 | if (trycompress && ts.oom) trycompress = 0; |
1092 | |
1093 | /* Recompression: if trycompress is true, 'h' points to a radix tree node |
1094 | * that changed in a way that could allow to compress nodes in this |
1095 | * sub-branch. Compressed nodes represent chains of nodes that are not |
1096 | * keys and have a single child, so there are two deletion events that |
1097 | * may alter the tree so that further compression is needed: |
1098 | * |
1099 | * 1) A node with a single child was a key and now no longer is a key. |
1100 | * 2) A node with two children now has just one child. |
1101 | * |
1102 | * We try to navigate upward till there are other nodes that can be |
1103 | * compressed, when we reach the upper node which is not a key and has |
1104 | * a single child, we scan the chain of children to collect the |
1105 | * compressible part of the tree, and replace the current node with the |
1106 | * new one, fixing the child pointer to reference the first non |
1107 | * compressible node. |
1108 | * |
1109 | * Example of case "1". A tree stores the keys "FOO" = 1 and |
1110 | * "FOOBAR" = 2: |
1111 | * |
1112 | * |
1113 | * "FOO" -> "BAR" -> [] (2) |
1114 | * (1) |
1115 | * |
1116 | * After the removal of "FOO" the tree can be compressed as: |
1117 | * |
1118 | * "FOOBAR" -> [] (2) |
1119 | * |
1120 | * |
1121 | * Example of case "2". A tree stores the keys "FOOBAR" = 1 and |
1122 | * "FOOTER" = 2: |
1123 | * |
1124 | * |B| -> "AR" -> [] (1) |
1125 | * "FOO" -> |-| |
1126 | * |T| -> "ER" -> [] (2) |
1127 | * |
1128 | * After the removal of "FOOTER" the resulting tree is: |
1129 | * |
1130 | * "FOO" -> |B| -> "AR" -> [] (1) |
1131 | * |
1132 | * That can be compressed into: |
1133 | * |
1134 | * "FOOBAR" -> [] (1) |
1135 | */ |
1136 | if (trycompress) { |
1137 | debugf("After removing %.*s:\n" , (int)len, s); |
1138 | debugnode("Compression may be needed" ,h); |
1139 | debugf("Seek start node\n" ); |
1140 | |
1141 | /* Try to reach the upper node that is compressible. |
1142 | * At the end of the loop 'h' will point to the first node we |
1143 | * can try to compress and 'parent' to its parent. */ |
1144 | raxNode *parent; |
1145 | while(1) { |
1146 | parent = raxStackPop(&ts); |
1147 | if (!parent || parent->iskey || |
1148 | (!parent->iscompr && parent->size != 1)) break; |
1149 | h = parent; |
1150 | debugnode("Going up to" ,h); |
1151 | } |
1152 | raxNode *start = h; /* Compression starting node. */ |
1153 | |
1154 | /* Scan chain of nodes we can compress. */ |
1155 | size_t comprsize = h->size; |
1156 | int nodes = 1; |
1157 | while(h->size != 0) { |
1158 | raxNode **cp = raxNodeLastChildPtr(h); |
1159 | memcpy(&h,cp,sizeof(h)); |
1160 | if (h->iskey || (!h->iscompr && h->size != 1)) break; |
1161 | /* Stop here if going to the next node would result into |
1162 | * a compressed node larger than h->size can hold. */ |
1163 | if (comprsize + h->size > RAX_NODE_MAX_SIZE) break; |
1164 | nodes++; |
1165 | comprsize += h->size; |
1166 | } |
1167 | if (nodes > 1) { |
1168 | /* If we can compress, create the new node and populate it. */ |
1169 | size_t nodesize = |
1170 | sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*); |
1171 | raxNode *new = rax_malloc(nodesize); |
1172 | /* An out of memory here just means we cannot optimize this |
1173 | * node, but the tree is left in a consistent state. */ |
1174 | if (new == NULL) { |
1175 | raxStackFree(&ts); |
1176 | return 1; |
1177 | } |
1178 | new->iskey = 0; |
1179 | new->isnull = 0; |
1180 | new->iscompr = 1; |
1181 | new->size = comprsize; |
1182 | rax->numnodes++; |
1183 | |
1184 | /* Scan again, this time to populate the new node content and |
1185 | * to fix the new node child pointer. At the same time we free |
1186 | * all the nodes that we'll no longer use. */ |
1187 | comprsize = 0; |
1188 | h = start; |
1189 | while(h->size != 0) { |
1190 | memcpy(new->data+comprsize,h->data,h->size); |
1191 | comprsize += h->size; |
1192 | raxNode **cp = raxNodeLastChildPtr(h); |
1193 | raxNode *tofree = h; |
1194 | memcpy(&h,cp,sizeof(h)); |
1195 | rax_free(tofree); rax->numnodes--; |
1196 | if (h->iskey || (!h->iscompr && h->size != 1)) break; |
1197 | } |
1198 | debugnode("New node" ,new); |
1199 | |
1200 | /* Now 'h' points to the first node that we still need to use, |
1201 | * so our new node child pointer will point to it. */ |
1202 | raxNode **cp = raxNodeLastChildPtr(new); |
1203 | memcpy(cp,&h,sizeof(h)); |
1204 | |
1205 | /* Fix parent link. */ |
1206 | if (parent) { |
1207 | raxNode **parentlink = raxFindParentLink(parent,start); |
1208 | memcpy(parentlink,&new,sizeof(new)); |
1209 | } else { |
1210 | rax->head = new; |
1211 | } |
1212 | |
1213 | debugf("Compressed %d nodes, %d total bytes\n" , |
1214 | nodes, (int)comprsize); |
1215 | } |
1216 | } |
1217 | raxStackFree(&ts); |
1218 | return 1; |
1219 | } |
1220 | |
1221 | /* This is the core of raxFree(): performs a depth-first scan of the |
1222 | * tree and releases all the nodes found. */ |
1223 | void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) { |
1224 | debugnode("free traversing" ,n); |
1225 | int numchildren = n->iscompr ? 1 : n->size; |
1226 | raxNode **cp = raxNodeLastChildPtr(n); |
1227 | while(numchildren--) { |
1228 | raxNode *child; |
1229 | memcpy(&child,cp,sizeof(child)); |
1230 | raxRecursiveFree(rax,child,free_callback); |
1231 | cp--; |
1232 | } |
1233 | debugnode("free depth-first" ,n); |
1234 | if (free_callback && n->iskey && !n->isnull) |
1235 | free_callback(raxGetData(n)); |
1236 | rax_free(n); |
1237 | rax->numnodes--; |
1238 | } |
1239 | |
1240 | /* Free a whole radix tree, calling the specified callback in order to |
1241 | * free the auxiliary data. */ |
1242 | void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { |
1243 | raxRecursiveFree(rax,rax->head,free_callback); |
1244 | assert(rax->numnodes == 0); |
1245 | rax_free(rax); |
1246 | } |
1247 | |
1248 | /* Free a whole radix tree. */ |
1249 | void raxFree(rax *rax) { |
1250 | raxFreeWithCallback(rax,NULL); |
1251 | } |
1252 | |
1253 | /* ------------------------------- Iterator --------------------------------- */ |
1254 | |
1255 | /* Initialize a Rax iterator. This call should be performed a single time |
1256 | * to initialize the iterator, and must be followed by a raxSeek() call, |
1257 | * otherwise the raxPrev()/raxNext() functions will just return EOF. */ |
1258 | void raxStart(raxIterator *it, rax *rt) { |
1259 | it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */ |
1260 | it->rt = rt; |
1261 | it->key_len = 0; |
1262 | it->key = it->key_static_string; |
1263 | it->key_max = RAX_ITER_STATIC_LEN; |
1264 | it->data = NULL; |
1265 | it->node_cb = NULL; |
1266 | raxStackInit(&it->stack); |
1267 | } |
1268 | |
1269 | /* Append characters at the current key string of the iterator 'it'. This |
1270 | * is a low level function used to implement the iterator, not callable by |
1271 | * the user. Returns 0 on out of memory, otherwise 1 is returned. */ |
1272 | int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { |
1273 | if (len == 0) return 1; |
1274 | if (it->key_max < it->key_len+len) { |
1275 | unsigned char *old = (it->key == it->key_static_string) ? NULL : |
1276 | it->key; |
1277 | size_t new_max = (it->key_len+len)*2; |
1278 | it->key = rax_realloc(old,new_max); |
1279 | if (it->key == NULL) { |
1280 | it->key = (!old) ? it->key_static_string : old; |
1281 | errno = ENOMEM; |
1282 | return 0; |
1283 | } |
1284 | if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len); |
1285 | it->key_max = new_max; |
1286 | } |
1287 | /* Use memmove since there could be an overlap between 's' and |
1288 | * it->key when we use the current key in order to re-seek. */ |
1289 | memmove(it->key+it->key_len,s,len); |
1290 | it->key_len += len; |
1291 | return 1; |
1292 | } |
1293 | |
1294 | /* Remove the specified number of chars from the right of the current |
1295 | * iterator key. */ |
1296 | void raxIteratorDelChars(raxIterator *it, size_t count) { |
1297 | it->key_len -= count; |
1298 | } |
1299 | |
1300 | /* Do an iteration step towards the next element. At the end of the step the |
1301 | * iterator key will represent the (new) current key. If it is not possible |
1302 | * to step in the specified direction since there are no longer elements, the |
1303 | * iterator is flagged with RAX_ITER_EOF. |
1304 | * |
1305 | * If 'noup' is true the function starts directly scanning for the next |
1306 | * lexicographically smaller children, and the current node is already assumed |
1307 | * to be the parent of the last key node, so the first operation to go back to |
1308 | * the parent will be skipped. This option is used by raxSeek() when |
1309 | * implementing seeking a non existing element with the ">" or "<" options: |
1310 | * the starting node is not a key in that particular case, so we start the scan |
1311 | * from a node that does not represent the key set. |
1312 | * |
1313 | * The function returns 1 on success or 0 on out of memory. */ |
1314 | int raxIteratorNextStep(raxIterator *it, int noup) { |
1315 | if (it->flags & RAX_ITER_EOF) { |
1316 | return 1; |
1317 | } else if (it->flags & RAX_ITER_JUST_SEEKED) { |
1318 | it->flags &= ~RAX_ITER_JUST_SEEKED; |
1319 | return 1; |
1320 | } |
1321 | |
1322 | /* Save key len, stack items and the node where we are currently |
1323 | * so that on iterator EOF we can restore the current key and state. */ |
1324 | size_t orig_key_len = it->key_len; |
1325 | size_t orig_stack_items = it->stack.items; |
1326 | raxNode *orig_node = it->node; |
1327 | |
1328 | while(1) { |
1329 | int children = it->node->iscompr ? 1 : it->node->size; |
1330 | if (!noup && children) { |
1331 | debugf("GO DEEPER\n" ); |
1332 | /* Seek the lexicographically smaller key in this subtree, which |
1333 | * is the first one found always going towards the first child |
1334 | * of every successive node. */ |
1335 | if (!raxStackPush(&it->stack,it->node)) return 0; |
1336 | raxNode **cp = raxNodeFirstChildPtr(it->node); |
1337 | if (!raxIteratorAddChars(it,it->node->data, |
1338 | it->node->iscompr ? it->node->size : 1)) return 0; |
1339 | memcpy(&it->node,cp,sizeof(it->node)); |
1340 | /* Call the node callback if any, and replace the node pointer |
1341 | * if the callback returns true. */ |
1342 | if (it->node_cb && it->node_cb(&it->node)) |
1343 | memcpy(cp,&it->node,sizeof(it->node)); |
1344 | /* For "next" step, stop every time we find a key along the |
1345 | * way, since the key is lexicographically smaller compared to |
1346 | * what follows in the sub-children. */ |
1347 | if (it->node->iskey) { |
1348 | it->data = raxGetData(it->node); |
1349 | return 1; |
1350 | } |
1351 | } else { |
1352 | /* If we finished exploring the previous sub-tree, switch to the |
1353 | * new one: go upper until a node is found where there are |
1354 | * children representing keys lexicographically greater than the |
1355 | * current key. */ |
1356 | while(1) { |
1357 | int old_noup = noup; |
1358 | |
1359 | /* Already on head? Can't go up, iteration finished. */ |
1360 | if (!noup && it->node == it->rt->head) { |
1361 | it->flags |= RAX_ITER_EOF; |
1362 | it->stack.items = orig_stack_items; |
1363 | it->key_len = orig_key_len; |
1364 | it->node = orig_node; |
1365 | return 1; |
1366 | } |
1367 | /* If there are no children at the current node, try parent's |
1368 | * next child. */ |
1369 | unsigned char prevchild = it->key[it->key_len-1]; |
1370 | if (!noup) { |
1371 | it->node = raxStackPop(&it->stack); |
1372 | } else { |
1373 | noup = 0; |
1374 | } |
1375 | /* Adjust the current key to represent the node we are |
1376 | * at. */ |
1377 | int todel = it->node->iscompr ? it->node->size : 1; |
1378 | raxIteratorDelChars(it,todel); |
1379 | |
1380 | /* Try visiting the next child if there was at least one |
1381 | * additional child. */ |
1382 | if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { |
1383 | raxNode **cp = raxNodeFirstChildPtr(it->node); |
1384 | int i = 0; |
1385 | while (i < it->node->size) { |
1386 | debugf("SCAN NEXT %c\n" , it->node->data[i]); |
1387 | if (it->node->data[i] > prevchild) break; |
1388 | i++; |
1389 | cp++; |
1390 | } |
1391 | if (i != it->node->size) { |
1392 | debugf("SCAN found a new node\n" ); |
1393 | raxIteratorAddChars(it,it->node->data+i,1); |
1394 | if (!raxStackPush(&it->stack,it->node)) return 0; |
1395 | memcpy(&it->node,cp,sizeof(it->node)); |
1396 | /* Call the node callback if any, and replace the node |
1397 | * pointer if the callback returns true. */ |
1398 | if (it->node_cb && it->node_cb(&it->node)) |
1399 | memcpy(cp,&it->node,sizeof(it->node)); |
1400 | if (it->node->iskey) { |
1401 | it->data = raxGetData(it->node); |
1402 | return 1; |
1403 | } |
1404 | break; |
1405 | } |
1406 | } |
1407 | } |
1408 | } |
1409 | } |
1410 | } |
1411 | |
1412 | /* Seek the greatest key in the subtree at the current node. Return 0 on |
1413 | * out of memory, otherwise 1. This is a helper function for different |
1414 | * iteration functions below. */ |
1415 | int raxSeekGreatest(raxIterator *it) { |
1416 | while(it->node->size) { |
1417 | if (it->node->iscompr) { |
1418 | if (!raxIteratorAddChars(it,it->node->data, |
1419 | it->node->size)) return 0; |
1420 | } else { |
1421 | if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1)) |
1422 | return 0; |
1423 | } |
1424 | raxNode **cp = raxNodeLastChildPtr(it->node); |
1425 | if (!raxStackPush(&it->stack,it->node)) return 0; |
1426 | memcpy(&it->node,cp,sizeof(it->node)); |
1427 | } |
1428 | return 1; |
1429 | } |
1430 | |
1431 | /* Like raxIteratorNextStep() but implements an iteration step moving |
1432 | * to the lexicographically previous element. The 'noup' option has a similar |
1433 | * effect to the one of raxIteratorNextStep(). */ |
1434 | int raxIteratorPrevStep(raxIterator *it, int noup) { |
1435 | if (it->flags & RAX_ITER_EOF) { |
1436 | return 1; |
1437 | } else if (it->flags & RAX_ITER_JUST_SEEKED) { |
1438 | it->flags &= ~RAX_ITER_JUST_SEEKED; |
1439 | return 1; |
1440 | } |
1441 | |
1442 | /* Save key len, stack items and the node where we are currently |
1443 | * so that on iterator EOF we can restore the current key and state. */ |
1444 | size_t orig_key_len = it->key_len; |
1445 | size_t orig_stack_items = it->stack.items; |
1446 | raxNode *orig_node = it->node; |
1447 | |
1448 | while(1) { |
1449 | int old_noup = noup; |
1450 | |
1451 | /* Already on head? Can't go up, iteration finished. */ |
1452 | if (!noup && it->node == it->rt->head) { |
1453 | it->flags |= RAX_ITER_EOF; |
1454 | it->stack.items = orig_stack_items; |
1455 | it->key_len = orig_key_len; |
1456 | it->node = orig_node; |
1457 | return 1; |
1458 | } |
1459 | |
1460 | unsigned char prevchild = it->key[it->key_len-1]; |
1461 | if (!noup) { |
1462 | it->node = raxStackPop(&it->stack); |
1463 | } else { |
1464 | noup = 0; |
1465 | } |
1466 | |
1467 | /* Adjust the current key to represent the node we are |
1468 | * at. */ |
1469 | int todel = it->node->iscompr ? it->node->size : 1; |
1470 | raxIteratorDelChars(it,todel); |
1471 | |
1472 | /* Try visiting the prev child if there is at least one |
1473 | * child. */ |
1474 | if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { |
1475 | raxNode **cp = raxNodeLastChildPtr(it->node); |
1476 | int i = it->node->size-1; |
1477 | while (i >= 0) { |
1478 | debugf("SCAN PREV %c\n" , it->node->data[i]); |
1479 | if (it->node->data[i] < prevchild) break; |
1480 | i--; |
1481 | cp--; |
1482 | } |
1483 | /* If we found a new subtree to explore in this node, |
1484 | * go deeper following all the last children in order to |
1485 | * find the key lexicographically greater. */ |
1486 | if (i != -1) { |
1487 | debugf("SCAN found a new node\n" ); |
1488 | /* Enter the node we just found. */ |
1489 | if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0; |
1490 | if (!raxStackPush(&it->stack,it->node)) return 0; |
1491 | memcpy(&it->node,cp,sizeof(it->node)); |
1492 | /* Seek sub-tree max. */ |
1493 | if (!raxSeekGreatest(it)) return 0; |
1494 | } |
1495 | } |
1496 | |
1497 | /* Return the key: this could be the key we found scanning a new |
1498 | * subtree, or if we did not find a new subtree to explore here, |
1499 | * before giving up with this node, check if it's a key itself. */ |
1500 | if (it->node->iskey) { |
1501 | it->data = raxGetData(it->node); |
1502 | return 1; |
1503 | } |
1504 | } |
1505 | } |
1506 | |
1507 | /* Seek an iterator at the specified element. |
1508 | * Return 0 if the seek failed for syntax error or out of memory. Otherwise |
1509 | * 1 is returned. When 0 is returned for out of memory, errno is set to |
1510 | * the ENOMEM value. */ |
1511 | int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { |
1512 | int eq = 0, lt = 0, gt = 0, first = 0, last = 0; |
1513 | |
1514 | it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */ |
1515 | it->flags |= RAX_ITER_JUST_SEEKED; |
1516 | it->flags &= ~RAX_ITER_EOF; |
1517 | it->key_len = 0; |
1518 | it->node = NULL; |
1519 | |
1520 | /* Set flags according to the operator used to perform the seek. */ |
1521 | if (op[0] == '>') { |
1522 | gt = 1; |
1523 | if (op[1] == '=') eq = 1; |
1524 | } else if (op[0] == '<') { |
1525 | lt = 1; |
1526 | if (op[1] == '=') eq = 1; |
1527 | } else if (op[0] == '=') { |
1528 | eq = 1; |
1529 | } else if (op[0] == '^') { |
1530 | first = 1; |
1531 | } else if (op[0] == '$') { |
1532 | last = 1; |
1533 | } else { |
1534 | errno = 0; |
1535 | return 0; /* Error. */ |
1536 | } |
1537 | |
1538 | /* If there are no elements, set the EOF condition immediately and |
1539 | * return. */ |
1540 | if (it->rt->numele == 0) { |
1541 | it->flags |= RAX_ITER_EOF; |
1542 | return 1; |
1543 | } |
1544 | |
1545 | if (first) { |
1546 | /* Seeking the first key greater or equal to the empty string |
1547 | * is equivalent to seeking the smaller key available. */ |
1548 | return raxSeek(it,">=" ,NULL,0); |
1549 | } |
1550 | |
1551 | if (last) { |
1552 | /* Find the greatest key taking always the last child till a |
1553 | * final node is found. */ |
1554 | it->node = it->rt->head; |
1555 | if (!raxSeekGreatest(it)) return 0; |
1556 | assert(it->node->iskey); |
1557 | it->data = raxGetData(it->node); |
1558 | return 1; |
1559 | } |
1560 | |
1561 | /* We need to seek the specified key. What we do here is to actually |
1562 | * perform a lookup, and later invoke the prev/next key code that |
1563 | * we already use for iteration. */ |
1564 | int splitpos = 0; |
1565 | size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack); |
1566 | |
1567 | /* Return OOM on incomplete stack info. */ |
1568 | if (it->stack.oom) return 0; |
1569 | |
1570 | if (eq && i == len && (!it->node->iscompr || splitpos == 0) && |
1571 | it->node->iskey) |
1572 | { |
1573 | /* We found our node, since the key matches and we have an |
1574 | * "equal" condition. */ |
1575 | if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ |
1576 | it->data = raxGetData(it->node); |
1577 | } else if (lt || gt) { |
1578 | /* Exact key not found or eq flag not set. We have to set as current |
1579 | * key the one represented by the node we stopped at, and perform |
1580 | * a next/prev operation to seek. */ |
1581 | raxIteratorAddChars(it, ele, i-splitpos); |
1582 | |
1583 | /* We need to set the iterator in the correct state to call next/prev |
1584 | * step in order to seek the desired element. */ |
1585 | debugf("After initial seek: i=%d len=%d key=%.*s\n" , |
1586 | (int)i, (int)len, (int)it->key_len, it->key); |
1587 | if (i != len && !it->node->iscompr) { |
1588 | /* If we stopped in the middle of a normal node because of a |
1589 | * mismatch, add the mismatching character to the current key |
1590 | * and call the iterator with the 'noup' flag so that it will try |
1591 | * to seek the next/prev child in the current node directly based |
1592 | * on the mismatching character. */ |
1593 | if (!raxIteratorAddChars(it,ele+i,1)) return 0; |
1594 | debugf("Seek normal node on mismatch: %.*s\n" , |
1595 | (int)it->key_len, (char*)it->key); |
1596 | |
1597 | it->flags &= ~RAX_ITER_JUST_SEEKED; |
1598 | if (lt && !raxIteratorPrevStep(it,1)) return 0; |
1599 | if (gt && !raxIteratorNextStep(it,1)) return 0; |
1600 | it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ |
1601 | } else if (i != len && it->node->iscompr) { |
1602 | debugf("Compressed mismatch: %.*s\n" , |
1603 | (int)it->key_len, (char*)it->key); |
1604 | /* In case of a mismatch within a compressed node. */ |
1605 | int nodechar = it->node->data[splitpos]; |
1606 | int keychar = ele[i]; |
1607 | it->flags &= ~RAX_ITER_JUST_SEEKED; |
1608 | if (gt) { |
1609 | /* If the key the compressed node represents is greater |
1610 | * than our seek element, continue forward, otherwise set the |
1611 | * state in order to go back to the next sub-tree. */ |
1612 | if (nodechar > keychar) { |
1613 | if (!raxIteratorNextStep(it,0)) return 0; |
1614 | } else { |
1615 | if (!raxIteratorAddChars(it,it->node->data,it->node->size)) |
1616 | return 0; |
1617 | if (!raxIteratorNextStep(it,1)) return 0; |
1618 | } |
1619 | } |
1620 | if (lt) { |
1621 | /* If the key the compressed node represents is smaller |
1622 | * than our seek element, seek the greater key in this |
1623 | * subtree, otherwise set the state in order to go back to |
1624 | * the previous sub-tree. */ |
1625 | if (nodechar < keychar) { |
1626 | if (!raxSeekGreatest(it)) return 0; |
1627 | it->data = raxGetData(it->node); |
1628 | } else { |
1629 | if (!raxIteratorAddChars(it,it->node->data,it->node->size)) |
1630 | return 0; |
1631 | if (!raxIteratorPrevStep(it,1)) return 0; |
1632 | } |
1633 | } |
1634 | it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ |
1635 | } else { |
1636 | debugf("No mismatch: %.*s\n" , |
1637 | (int)it->key_len, (char*)it->key); |
1638 | /* If there was no mismatch we are into a node representing the |
1639 | * key, (but which is not a key or the seek operator does not |
1640 | * include 'eq'), or we stopped in the middle of a compressed node |
1641 | * after processing all the key. Continue iterating as this was |
1642 | * a legitimate key we stopped at. */ |
1643 | it->flags &= ~RAX_ITER_JUST_SEEKED; |
1644 | if (it->node->iscompr && it->node->iskey && splitpos && lt) { |
1645 | /* If we stopped in the middle of a compressed node with |
1646 | * perfect match, and the condition is to seek a key "<" than |
1647 | * the specified one, then if this node is a key it already |
1648 | * represents our match. For instance we may have nodes: |
1649 | * |
1650 | * "f" -> "oobar" = 1 -> "" = 2 |
1651 | * |
1652 | * Representing keys "f" = 1, "foobar" = 2. A seek for |
1653 | * the key < "foo" will stop in the middle of the "oobar" |
1654 | * node, but will be our match, representing the key "f". |
1655 | * |
1656 | * So in that case, we don't seek backward. */ |
1657 | it->data = raxGetData(it->node); |
1658 | } else { |
1659 | if (gt && !raxIteratorNextStep(it,0)) return 0; |
1660 | if (lt && !raxIteratorPrevStep(it,0)) return 0; |
1661 | } |
1662 | it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ |
1663 | } |
1664 | } else { |
1665 | /* If we are here just eq was set but no match was found. */ |
1666 | it->flags |= RAX_ITER_EOF; |
1667 | return 1; |
1668 | } |
1669 | return 1; |
1670 | } |
1671 | |
1672 | /* Go to the next element in the scope of the iterator 'it'. |
1673 | * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is |
1674 | * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ |
1675 | int raxNext(raxIterator *it) { |
1676 | if (!raxIteratorNextStep(it,0)) { |
1677 | errno = ENOMEM; |
1678 | return 0; |
1679 | } |
1680 | if (it->flags & RAX_ITER_EOF) { |
1681 | errno = 0; |
1682 | return 0; |
1683 | } |
1684 | return 1; |
1685 | } |
1686 | |
1687 | /* Go to the previous element in the scope of the iterator 'it'. |
1688 | * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is |
1689 | * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ |
1690 | int raxPrev(raxIterator *it) { |
1691 | if (!raxIteratorPrevStep(it,0)) { |
1692 | errno = ENOMEM; |
1693 | return 0; |
1694 | } |
1695 | if (it->flags & RAX_ITER_EOF) { |
1696 | errno = 0; |
1697 | return 0; |
1698 | } |
1699 | return 1; |
1700 | } |
1701 | |
1702 | /* Perform a random walk starting in the current position of the iterator. |
1703 | * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned |
1704 | * and the iterator is set to the node reached after doing a random walk |
1705 | * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed |
1706 | * using a random number of steps between 1 and two times the logarithm of |
1707 | * the number of elements. |
1708 | * |
1709 | * NOTE: if you use this function to generate random elements from the radix |
1710 | * tree, expect a disappointing distribution. A random walk produces good |
1711 | * random elements if the tree is not sparse, however in the case of a radix |
1712 | * tree certain keys will be reported much more often than others. At least |
1713 | * this function should be able to explore every possible element eventually. */ |
1714 | int raxRandomWalk(raxIterator *it, size_t steps) { |
1715 | if (it->rt->numele == 0) { |
1716 | it->flags |= RAX_ITER_EOF; |
1717 | return 0; |
1718 | } |
1719 | |
1720 | if (steps == 0) { |
1721 | size_t fle = 1+floor(log(it->rt->numele)); |
1722 | fle *= 2; |
1723 | steps = 1 + rand() % fle; |
1724 | } |
1725 | |
1726 | raxNode *n = it->node; |
1727 | while(steps > 0 || !n->iskey) { |
1728 | int numchildren = n->iscompr ? 1 : n->size; |
1729 | int r = rand() % (numchildren+(n != it->rt->head)); |
1730 | |
1731 | if (r == numchildren) { |
1732 | /* Go up to parent. */ |
1733 | n = raxStackPop(&it->stack); |
1734 | int todel = n->iscompr ? n->size : 1; |
1735 | raxIteratorDelChars(it,todel); |
1736 | } else { |
1737 | /* Select a random child. */ |
1738 | if (n->iscompr) { |
1739 | if (!raxIteratorAddChars(it,n->data,n->size)) return 0; |
1740 | } else { |
1741 | if (!raxIteratorAddChars(it,n->data+r,1)) return 0; |
1742 | } |
1743 | raxNode **cp = raxNodeFirstChildPtr(n)+r; |
1744 | if (!raxStackPush(&it->stack,n)) return 0; |
1745 | memcpy(&n,cp,sizeof(n)); |
1746 | } |
1747 | if (n->iskey) steps--; |
1748 | } |
1749 | it->node = n; |
1750 | it->data = raxGetData(it->node); |
1751 | return 1; |
1752 | } |
1753 | |
1754 | /* Compare the key currently pointed by the iterator to the specified |
1755 | * key according to the specified operator. Returns 1 if the comparison is |
1756 | * true, otherwise 0 is returned. */ |
1757 | int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) { |
1758 | int eq = 0, lt = 0, gt = 0; |
1759 | |
1760 | if (op[0] == '=' || op[1] == '=') eq = 1; |
1761 | if (op[0] == '>') gt = 1; |
1762 | else if (op[0] == '<') lt = 1; |
1763 | else if (op[1] != '=') return 0; /* Syntax error. */ |
1764 | |
1765 | size_t minlen = key_len < iter->key_len ? key_len : iter->key_len; |
1766 | int cmp = memcmp(iter->key,key,minlen); |
1767 | |
1768 | /* Handle == */ |
1769 | if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len; |
1770 | |
1771 | /* Handle >, >=, <, <= */ |
1772 | if (cmp == 0) { |
1773 | /* Same prefix: longer wins. */ |
1774 | if (eq && key_len == iter->key_len) return 1; |
1775 | else if (lt) return iter->key_len < key_len; |
1776 | else if (gt) return iter->key_len > key_len; |
1777 | else return 0; /* Avoid warning, just 'eq' is handled before. */ |
1778 | } else if (cmp > 0) { |
1779 | return gt ? 1 : 0; |
1780 | } else /* (cmp < 0) */ { |
1781 | return lt ? 1 : 0; |
1782 | } |
1783 | } |
1784 | |
1785 | /* Free the iterator. */ |
1786 | void raxStop(raxIterator *it) { |
1787 | if (it->key != it->key_static_string) rax_free(it->key); |
1788 | raxStackFree(&it->stack); |
1789 | } |
1790 | |
1791 | /* Return if the iterator is in an EOF state. This happens when raxSeek() |
1792 | * failed to seek an appropriate element, so that raxNext() or raxPrev() |
1793 | * will return zero, or when an EOF condition was reached while iterating |
1794 | * with raxNext() and raxPrev(). */ |
1795 | int raxEOF(raxIterator *it) { |
1796 | return it->flags & RAX_ITER_EOF; |
1797 | } |
1798 | |
1799 | /* Return the number of elements inside the radix tree. */ |
1800 | uint64_t raxSize(rax *rax) { |
1801 | return rax->numele; |
1802 | } |
1803 | |
1804 | /* ----------------------------- Introspection ------------------------------ */ |
1805 | |
1806 | /* This function is mostly used for debugging and learning purposes. |
1807 | * It shows an ASCII representation of a tree on standard output, outline |
1808 | * all the nodes and the contained keys. |
1809 | * |
1810 | * The representation is as follow: |
1811 | * |
1812 | * "foobar" (compressed node) |
1813 | * [abc] (normal node with three children) |
1814 | * [abc]=0x12345678 (node is a key, pointing to value 0x12345678) |
1815 | * [] (a normal empty node) |
1816 | * |
1817 | * Children are represented in new indented lines, each children prefixed by |
1818 | * the "`-(x)" string, where "x" is the edge byte. |
1819 | * |
1820 | * [abc] |
1821 | * `-(a) "ladin" |
1822 | * `-(b) [kj] |
1823 | * `-(c) [] |
1824 | * |
1825 | * However when a node has a single child the following representation |
1826 | * is used instead: |
1827 | * |
1828 | * [abc] -> "ladin" -> [] |
1829 | */ |
1830 | |
1831 | /* The actual implementation of raxShow(). */ |
1832 | void raxRecursiveShow(int level, int lpad, raxNode *n) { |
1833 | char s = n->iscompr ? '"' : '['; |
1834 | char e = n->iscompr ? '"' : ']'; |
1835 | |
1836 | int numchars = printf("%c%.*s%c" , s, n->size, n->data, e); |
1837 | if (n->iskey) { |
1838 | numchars += printf("=%p" ,raxGetData(n)); |
1839 | } |
1840 | |
1841 | int numchildren = n->iscompr ? 1 : n->size; |
1842 | /* Note that 7 and 4 magic constants are the string length |
1843 | * of " `-(x) " and " -> " respectively. */ |
1844 | if (level) { |
1845 | lpad += (numchildren > 1) ? 7 : 4; |
1846 | if (numchildren == 1) lpad += numchars; |
1847 | } |
1848 | raxNode **cp = raxNodeFirstChildPtr(n); |
1849 | for (int i = 0; i < numchildren; i++) { |
1850 | char *branch = " `-(%c) " ; |
1851 | if (numchildren > 1) { |
1852 | printf("\n" ); |
1853 | for (int j = 0; j < lpad; j++) putchar(' '); |
1854 | printf(branch,n->data[i]); |
1855 | } else { |
1856 | printf(" -> " ); |
1857 | } |
1858 | raxNode *child; |
1859 | memcpy(&child,cp,sizeof(child)); |
1860 | raxRecursiveShow(level+1,lpad,child); |
1861 | cp++; |
1862 | } |
1863 | } |
1864 | |
1865 | /* Show a tree, as outlined in the comment above. */ |
1866 | void raxShow(rax *rax) { |
1867 | raxRecursiveShow(0,0,rax->head); |
1868 | putchar('\n'); |
1869 | } |
1870 | |
1871 | /* Used by debugnode() macro to show info about a given node. */ |
1872 | void raxDebugShowNode(const char *msg, raxNode *n) { |
1873 | if (raxDebugMsg == 0) return; |
1874 | printf("%s: %p [%.*s] key:%u size:%u children:" , |
1875 | msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size); |
1876 | int numcld = n->iscompr ? 1 : n->size; |
1877 | raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1); |
1878 | while(numcld--) { |
1879 | raxNode *child; |
1880 | memcpy(&child,cldptr,sizeof(child)); |
1881 | cldptr++; |
1882 | printf("%p " , (void*)child); |
1883 | } |
1884 | printf("\n" ); |
1885 | fflush(stdout); |
1886 | } |
1887 | |
1888 | /* Touch all the nodes of a tree returning a check sum. This is useful |
1889 | * in order to make Valgrind detect if there is something wrong while |
1890 | * reading the data structure. |
1891 | * |
1892 | * This function was used in order to identify Rax bugs after a big refactoring |
1893 | * using this technique: |
1894 | * |
1895 | * 1. The rax-test is executed using Valgrind, adding a printf() so that for |
1896 | * the fuzz tester we see what iteration in the loop we are in. |
1897 | * 2. After every modification of the radix tree made by the fuzz tester |
1898 | * in rax-test.c, we add a call to raxTouch(). |
1899 | * 3. Now as soon as an operation will corrupt the tree, raxTouch() will |
1900 | * detect it (via Valgrind) immediately. We can add more calls to narrow |
1901 | * the state. |
1902 | * 4. At this point a good idea is to enable Rax debugging messages immediately |
1903 | * before the moment the tree is corrupted, to see what happens. |
1904 | */ |
1905 | unsigned long raxTouch(raxNode *n) { |
1906 | debugf("Touching %p\n" , (void*)n); |
1907 | unsigned long sum = 0; |
1908 | if (n->iskey) { |
1909 | sum += (unsigned long)raxGetData(n); |
1910 | } |
1911 | |
1912 | int numchildren = n->iscompr ? 1 : n->size; |
1913 | raxNode **cp = raxNodeFirstChildPtr(n); |
1914 | int count = 0; |
1915 | for (int i = 0; i < numchildren; i++) { |
1916 | if (numchildren > 1) { |
1917 | sum += (long)n->data[i]; |
1918 | } |
1919 | raxNode *child; |
1920 | memcpy(&child,cp,sizeof(child)); |
1921 | if (child == (void*)0x65d1760) count++; |
1922 | if (count > 1) exit(1); |
1923 | sum += raxTouch(child); |
1924 | cp++; |
1925 | } |
1926 | return sum; |
1927 | } |
1928 | |