1 | /* Rax -- A radix tree implementation. |
2 | * |
3 | * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com> |
4 | * All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions are met: |
8 | * |
9 | * * Redistributions of source code must retain the above copyright notice, |
10 | * this list of conditions and the following disclaimer. |
11 | * * Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * * Neither the name of Redis nor the names of its contributors may be used |
15 | * to endorse or promote products derived from this software without |
16 | * specific prior written permission. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
19 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
22 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
23 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
24 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
25 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
26 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
28 | * POSSIBILITY OF SUCH DAMAGE. |
29 | */ |
30 | |
31 | #ifndef RAX_H |
32 | #define RAX_H |
33 | |
34 | #include <stdint.h> |
35 | |
36 | /* Representation of a radix tree as implemented in this file, that contains |
37 | * the strings "foo", "foobar" and "footer" after the insertion of each |
38 | * word. When the node represents a key inside the radix tree, we write it |
39 | * between [], otherwise it is written between (). |
40 | * |
41 | * This is the vanilla representation: |
42 | * |
43 | * (f) "" |
44 | * \ |
45 | * (o) "f" |
46 | * \ |
47 | * (o) "fo" |
48 | * \ |
49 | * [t b] "foo" |
50 | * / \ |
51 | * "foot" (e) (a) "foob" |
52 | * / \ |
53 | * "foote" (r) (r) "fooba" |
54 | * / \ |
55 | * "footer" [] [] "foobar" |
56 | * |
57 | * However, this implementation implements a very common optimization where |
58 | * successive nodes having a single child are "compressed" into the node |
59 | * itself as a string of characters, each representing a next-level child, |
60 | * and only the link to the node representing the last character node is |
61 | * provided inside the representation. So the above representation is turned |
62 | * into: |
63 | * |
64 | * ["foo"] "" |
65 | * | |
66 | * [t b] "foo" |
67 | * / \ |
68 | * "foot" ("er") ("ar") "foob" |
69 | * / \ |
70 | * "footer" [] [] "foobar" |
71 | * |
72 | * However this optimization makes the implementation a bit more complex. |
73 | * For instance if a key "first" is added in the above radix tree, a |
74 | * "node splitting" operation is needed, since the "foo" prefix is no longer |
75 | * composed of nodes having a single child one after the other. This is the |
76 | * above tree and the resulting node splitting after this event happens: |
77 | * |
78 | * |
79 | * (f) "" |
80 | * / |
81 | * (i o) "f" |
82 | * / \ |
83 | * "firs" ("rst") (o) "fo" |
84 | * / \ |
85 | * "first" [] [t b] "foo" |
86 | * / \ |
87 | * "foot" ("er") ("ar") "foob" |
88 | * / \ |
89 | * "footer" [] [] "foobar" |
90 | * |
91 | * Similarly after deletion, if a new chain of nodes having a single child |
92 | * is created (the chain must also not include nodes that represent keys), |
93 | * it must be compressed back into a single node. |
94 | * |
95 | */ |
96 | |
97 | #define RAX_NODE_MAX_SIZE ((1<<29)-1) |
98 | typedef struct raxNode { |
99 | uint32_t iskey:1; /* Does this node contain a key? */ |
100 | uint32_t isnull:1; /* Associated value is NULL (don't store it). */ |
101 | uint32_t iscompr:1; /* Node is compressed. */ |
102 | uint32_t size:29; /* Number of children, or compressed string len. */ |
103 | /* Data layout is as follows: |
104 | * |
105 | * If node is not compressed we have 'size' bytes, one for each children |
106 | * character, and 'size' raxNode pointers, point to each child node. |
107 | * Note how the character is not stored in the children but in the |
108 | * edge of the parents: |
109 | * |
110 | * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?) |
111 | * |
112 | * if node is compressed (iscompr bit is 1) the node has 1 children. |
113 | * In that case the 'size' bytes of the string stored immediately at |
114 | * the start of the data section, represent a sequence of successive |
115 | * nodes linked one after the other, for which only the last one in |
116 | * the sequence is actually represented as a node, and pointed to by |
117 | * the current compressed node. |
118 | * |
119 | * [header iscompr=1][xyz][z-ptr](value-ptr?) |
120 | * |
121 | * Both compressed and not compressed nodes can represent a key |
122 | * with associated data in the radix tree at any level (not just terminal |
123 | * nodes). |
124 | * |
125 | * If the node has an associated key (iskey=1) and is not NULL |
126 | * (isnull=0), then after the raxNode pointers pointing to the |
127 | * children, an additional value pointer is present (as you can see |
128 | * in the representation above as "value-ptr" field). |
129 | */ |
130 | unsigned char data[]; |
131 | } raxNode; |
132 | |
133 | typedef struct rax { |
134 | raxNode *head; |
135 | uint64_t numele; |
136 | uint64_t numnodes; |
137 | } rax; |
138 | |
139 | /* Stack data structure used by raxLowWalk() in order to, optionally, return |
140 | * a list of parent nodes to the caller. The nodes do not have a "parent" |
141 | * field for space concerns, so we use the auxiliary stack when needed. */ |
142 | #define RAX_STACK_STATIC_ITEMS 32 |
143 | typedef struct raxStack { |
144 | void **stack; /* Points to static_items or an heap allocated array. */ |
145 | size_t items, maxitems; /* Number of items contained and total space. */ |
146 | /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap |
147 | * and use this static array of pointers instead. */ |
148 | void *static_items[RAX_STACK_STATIC_ITEMS]; |
149 | int oom; /* True if pushing into this stack failed for OOM at some point. */ |
150 | } raxStack; |
151 | |
152 | /* Optional callback used for iterators and be notified on each rax node, |
153 | * including nodes not representing keys. If the callback returns true |
154 | * the callback changed the node pointer in the iterator structure, and the |
155 | * iterator implementation will have to replace the pointer in the radix tree |
156 | * internals. This allows the callback to reallocate the node to perform |
157 | * very special operations, normally not needed by normal applications. |
158 | * |
159 | * This callback is used to perform very low level analysis of the radix tree |
160 | * structure, scanning each possible node (but the root node), or in order to |
161 | * reallocate the nodes to reduce the allocation fragmentation (this is the |
162 | * Redis application for this callback). |
163 | * |
164 | * This is currently only supported in forward iterations (raxNext) */ |
165 | typedef int (*raxNodeCallback)(raxNode **noderef); |
166 | |
167 | /* Radix tree iterator state is encapsulated into this data structure. */ |
168 | #define RAX_ITER_STATIC_LEN 128 |
169 | #define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current |
170 | element for the first iteration and |
171 | clear the flag. */ |
172 | #define RAX_ITER_EOF (1<<1) /* End of iteration reached. */ |
173 | #define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while |
174 | iterating. But it is slower. */ |
175 | typedef struct raxIterator { |
176 | int flags; |
177 | rax *rt; /* Radix tree we are iterating. */ |
178 | unsigned char *key; /* The current string. */ |
179 | void *data; /* Data associated to this key. */ |
180 | size_t key_len; /* Current key length. */ |
181 | size_t key_max; /* Max key len the current key buffer can hold. */ |
182 | unsigned char key_static_string[RAX_ITER_STATIC_LEN]; |
183 | raxNode *node; /* Current node. Only for unsafe iteration. */ |
184 | raxStack stack; /* Stack used for unsafe iteration. */ |
185 | raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */ |
186 | } raxIterator; |
187 | |
188 | /* A special pointer returned for not found items. */ |
189 | extern void *raxNotFound; |
190 | |
191 | /* Exported API. */ |
192 | rax *raxNew(void); |
193 | int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); |
194 | int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); |
195 | int raxRemove(rax *rax, unsigned char *s, size_t len, void **old); |
196 | void *raxFind(rax *rax, unsigned char *s, size_t len); |
197 | void raxFree(rax *rax); |
198 | void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)); |
199 | void raxStart(raxIterator *it, rax *rt); |
200 | int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len); |
201 | int raxNext(raxIterator *it); |
202 | int raxPrev(raxIterator *it); |
203 | int raxRandomWalk(raxIterator *it, size_t steps); |
204 | int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len); |
205 | void raxStop(raxIterator *it); |
206 | int raxEOF(raxIterator *it); |
207 | void raxShow(rax *rax); |
208 | uint64_t raxSize(rax *rax); |
209 | unsigned long raxTouch(raxNode *n); |
210 | void raxSetDebugMsg(int onoff); |
211 | |
212 | /* Internal API. May be used by the node callback in order to access rax nodes |
213 | * in a low level way, so this function is exported as well. */ |
214 | void raxSetData(raxNode *n, void *data); |
215 | |
216 | #endif |
217 | |