1 | /* Hash Tables Implementation. |
2 | * |
3 | * This file implements in memory hash tables with insert/del/replace/find/ |
4 | * get-random-element operations. Hash tables will auto resize if needed |
5 | * tables of power of two in size are used, collisions are handled by |
6 | * chaining. See the source code for more information... :) |
7 | * |
8 | * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com> |
9 | * All rights reserved. |
10 | * |
11 | * Redistribution and use in source and binary forms, with or without |
12 | * modification, are permitted provided that the following conditions are met: |
13 | * |
14 | * * Redistributions of source code must retain the above copyright notice, |
15 | * this list of conditions and the following disclaimer. |
16 | * * Redistributions in binary form must reproduce the above copyright |
17 | * notice, this list of conditions and the following disclaimer in the |
18 | * documentation and/or other materials provided with the distribution. |
19 | * * Neither the name of Redis nor the names of its contributors may be used |
20 | * to endorse or promote products derived from this software without |
21 | * specific prior written permission. |
22 | * |
23 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
24 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
27 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | */ |
35 | |
36 | #include "fmacros.h" |
37 | |
38 | #include <stdio.h> |
39 | #include <stdlib.h> |
40 | #include <stdint.h> |
41 | #include <string.h> |
42 | #include <stdarg.h> |
43 | #include <limits.h> |
44 | #include <sys/time.h> |
45 | |
46 | #include "dict.h" |
47 | #include "zmalloc.h" |
48 | #include "redisassert.h" |
49 | |
50 | /* Using dictEnableResize() / dictDisableResize() we make possible to |
51 | * enable/disable resizing of the hash table as needed. This is very important |
52 | * for Redis, as we use copy-on-write and don't want to move too much memory |
53 | * around when there is a child performing saving operations. |
54 | * |
55 | * Note that even when dict_can_resize is set to 0, not all resizes are |
56 | * prevented: a hash table is still allowed to grow if the ratio between |
57 | * the number of elements and the buckets > dict_force_resize_ratio. */ |
58 | static int dict_can_resize = 1; |
59 | static unsigned int dict_force_resize_ratio = 5; |
60 | |
61 | /* -------------------------- private prototypes ---------------------------- */ |
62 | |
63 | static int _dictExpandIfNeeded(dict *d); |
64 | static signed char _dictNextExp(unsigned long size); |
65 | static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing); |
66 | static int _dictInit(dict *d, dictType *type); |
67 | |
68 | /* -------------------------- hash functions -------------------------------- */ |
69 | |
70 | static uint8_t dict_hash_function_seed[16]; |
71 | |
72 | void dictSetHashFunctionSeed(uint8_t *seed) { |
73 | memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed)); |
74 | } |
75 | |
76 | uint8_t *dictGetHashFunctionSeed(void) { |
77 | return dict_hash_function_seed; |
78 | } |
79 | |
80 | /* The default hashing function uses SipHash implementation |
81 | * in siphash.c. */ |
82 | |
83 | uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k); |
84 | uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k); |
85 | |
86 | uint64_t dictGenHashFunction(const void *key, size_t len) { |
87 | return siphash(key,len,dict_hash_function_seed); |
88 | } |
89 | |
90 | uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { |
91 | return siphash_nocase(buf,len,dict_hash_function_seed); |
92 | } |
93 | |
94 | /* ----------------------------- API implementation ------------------------- */ |
95 | |
96 | /* Reset hash table parameters already initialized with _dictInit()*/ |
97 | static void _dictReset(dict *d, int htidx) |
98 | { |
99 | d->ht_table[htidx] = NULL; |
100 | d->ht_size_exp[htidx] = -1; |
101 | d->ht_used[htidx] = 0; |
102 | } |
103 | |
104 | /* Create a new hash table */ |
105 | dict *dictCreate(dictType *type) |
106 | { |
107 | dict *d = zmalloc(sizeof(*d)); |
108 | |
109 | _dictInit(d,type); |
110 | return d; |
111 | } |
112 | |
113 | /* Initialize the hash table */ |
114 | int _dictInit(dict *d, dictType *type) |
115 | { |
116 | _dictReset(d, 0); |
117 | _dictReset(d, 1); |
118 | d->type = type; |
119 | d->rehashidx = -1; |
120 | d->pauserehash = 0; |
121 | return DICT_OK; |
122 | } |
123 | |
124 | /* Resize the table to the minimal size that contains all the elements, |
125 | * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ |
126 | int dictResize(dict *d) |
127 | { |
128 | unsigned long minimal; |
129 | |
130 | if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR; |
131 | minimal = d->ht_used[0]; |
132 | if (minimal < DICT_HT_INITIAL_SIZE) |
133 | minimal = DICT_HT_INITIAL_SIZE; |
134 | return dictExpand(d, minimal); |
135 | } |
136 | |
137 | /* Expand or create the hash table, |
138 | * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1). |
139 | * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */ |
140 | int _dictExpand(dict *d, unsigned long size, int* malloc_failed) |
141 | { |
142 | if (malloc_failed) *malloc_failed = 0; |
143 | |
144 | /* the size is invalid if it is smaller than the number of |
145 | * elements already inside the hash table */ |
146 | if (dictIsRehashing(d) || d->ht_used[0] > size) |
147 | return DICT_ERR; |
148 | |
149 | /* the new hash table */ |
150 | dictEntry **new_ht_table; |
151 | unsigned long new_ht_used; |
152 | signed char new_ht_size_exp = _dictNextExp(size); |
153 | |
154 | /* Detect overflows */ |
155 | size_t newsize = 1ul<<new_ht_size_exp; |
156 | if (newsize < size || newsize * sizeof(dictEntry*) < newsize) |
157 | return DICT_ERR; |
158 | |
159 | /* Rehashing to the same table size is not useful. */ |
160 | if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR; |
161 | |
162 | /* Allocate the new hash table and initialize all pointers to NULL */ |
163 | if (malloc_failed) { |
164 | new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*)); |
165 | *malloc_failed = new_ht_table == NULL; |
166 | if (*malloc_failed) |
167 | return DICT_ERR; |
168 | } else |
169 | new_ht_table = zcalloc(newsize*sizeof(dictEntry*)); |
170 | |
171 | new_ht_used = 0; |
172 | |
173 | /* Is this the first initialization? If so it's not really a rehashing |
174 | * we just set the first hash table so that it can accept keys. */ |
175 | if (d->ht_table[0] == NULL) { |
176 | d->ht_size_exp[0] = new_ht_size_exp; |
177 | d->ht_used[0] = new_ht_used; |
178 | d->ht_table[0] = new_ht_table; |
179 | return DICT_OK; |
180 | } |
181 | |
182 | /* Prepare a second hash table for incremental rehashing */ |
183 | d->ht_size_exp[1] = new_ht_size_exp; |
184 | d->ht_used[1] = new_ht_used; |
185 | d->ht_table[1] = new_ht_table; |
186 | d->rehashidx = 0; |
187 | return DICT_OK; |
188 | } |
189 | |
190 | /* return DICT_ERR if expand was not performed */ |
191 | int dictExpand(dict *d, unsigned long size) { |
192 | return _dictExpand(d, size, NULL); |
193 | } |
194 | |
195 | /* return DICT_ERR if expand failed due to memory allocation failure */ |
196 | int dictTryExpand(dict *d, unsigned long size) { |
197 | int malloc_failed; |
198 | _dictExpand(d, size, &malloc_failed); |
199 | return malloc_failed? DICT_ERR : DICT_OK; |
200 | } |
201 | |
202 | /* Performs N steps of incremental rehashing. Returns 1 if there are still |
203 | * keys to move from the old to the new hash table, otherwise 0 is returned. |
204 | * |
205 | * Note that a rehashing step consists in moving a bucket (that may have more |
206 | * than one key as we use chaining) from the old to the new hash table, however |
207 | * since part of the hash table may be composed of empty spaces, it is not |
208 | * guaranteed that this function will rehash even a single bucket, since it |
209 | * will visit at max N*10 empty buckets in total, otherwise the amount of |
210 | * work it does would be unbound and the function may block for a long time. */ |
211 | int dictRehash(dict *d, int n) { |
212 | int empty_visits = n*10; /* Max number of empty buckets to visit. */ |
213 | if (!dictIsRehashing(d)) return 0; |
214 | |
215 | while(n-- && d->ht_used[0] != 0) { |
216 | dictEntry *de, *nextde; |
217 | |
218 | /* Note that rehashidx can't overflow as we are sure there are more |
219 | * elements because ht[0].used != 0 */ |
220 | assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx); |
221 | while(d->ht_table[0][d->rehashidx] == NULL) { |
222 | d->rehashidx++; |
223 | if (--empty_visits == 0) return 1; |
224 | } |
225 | de = d->ht_table[0][d->rehashidx]; |
226 | /* Move all the keys in this bucket from the old to the new hash HT */ |
227 | while(de) { |
228 | uint64_t h; |
229 | |
230 | nextde = de->next; |
231 | /* Get the index in the new hash table */ |
232 | h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]); |
233 | de->next = d->ht_table[1][h]; |
234 | d->ht_table[1][h] = de; |
235 | d->ht_used[0]--; |
236 | d->ht_used[1]++; |
237 | de = nextde; |
238 | } |
239 | d->ht_table[0][d->rehashidx] = NULL; |
240 | d->rehashidx++; |
241 | } |
242 | |
243 | /* Check if we already rehashed the whole table... */ |
244 | if (d->ht_used[0] == 0) { |
245 | zfree(d->ht_table[0]); |
246 | /* Copy the new ht onto the old one */ |
247 | d->ht_table[0] = d->ht_table[1]; |
248 | d->ht_used[0] = d->ht_used[1]; |
249 | d->ht_size_exp[0] = d->ht_size_exp[1]; |
250 | _dictReset(d, 1); |
251 | d->rehashidx = -1; |
252 | return 0; |
253 | } |
254 | |
255 | /* More to rehash... */ |
256 | return 1; |
257 | } |
258 | |
259 | long long timeInMilliseconds(void) { |
260 | struct timeval tv; |
261 | |
262 | gettimeofday(&tv,NULL); |
263 | return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000); |
264 | } |
265 | |
266 | /* Rehash in ms+"delta" milliseconds. The value of "delta" is larger |
267 | * than 0, and is smaller than 1 in most cases. The exact upper bound |
268 | * depends on the running time of dictRehash(d,100).*/ |
269 | int dictRehashMilliseconds(dict *d, int ms) { |
270 | if (d->pauserehash > 0) return 0; |
271 | |
272 | long long start = timeInMilliseconds(); |
273 | int rehashes = 0; |
274 | |
275 | while(dictRehash(d,100)) { |
276 | rehashes += 100; |
277 | if (timeInMilliseconds()-start > ms) break; |
278 | } |
279 | return rehashes; |
280 | } |
281 | |
282 | /* This function performs just a step of rehashing, and only if hashing has |
283 | * not been paused for our hash table. When we have iterators in the |
284 | * middle of a rehashing we can't mess with the two hash tables otherwise |
285 | * some elements can be missed or duplicated. |
286 | * |
287 | * This function is called by common lookup or update operations in the |
288 | * dictionary so that the hash table automatically migrates from H1 to H2 |
289 | * while it is actively used. */ |
290 | static void _dictRehashStep(dict *d) { |
291 | if (d->pauserehash == 0) dictRehash(d,1); |
292 | } |
293 | |
294 | /* Add an element to the target hash table */ |
295 | int dictAdd(dict *d, void *key, void *val) |
296 | { |
297 | dictEntry *entry = dictAddRaw(d,key,NULL); |
298 | |
299 | if (!entry) return DICT_ERR; |
300 | dictSetVal(d, entry, val); |
301 | return DICT_OK; |
302 | } |
303 | |
304 | /* Low level add or find: |
305 | * This function adds the entry but instead of setting a value returns the |
306 | * dictEntry structure to the user, that will make sure to fill the value |
307 | * field as they wish. |
308 | * |
309 | * This function is also directly exposed to the user API to be called |
310 | * mainly in order to store non-pointers inside the hash value, example: |
311 | * |
312 | * entry = dictAddRaw(dict,mykey,NULL); |
313 | * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); |
314 | * |
315 | * Return values: |
316 | * |
317 | * If key already exists NULL is returned, and "*existing" is populated |
318 | * with the existing entry if existing is not NULL. |
319 | * |
320 | * If key was added, the hash entry is returned to be manipulated by the caller. |
321 | */ |
322 | dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) |
323 | { |
324 | long index; |
325 | dictEntry *entry; |
326 | int htidx; |
327 | |
328 | if (dictIsRehashing(d)) _dictRehashStep(d); |
329 | |
330 | /* Get the index of the new element, or -1 if |
331 | * the element already exists. */ |
332 | if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) |
333 | return NULL; |
334 | |
335 | /* Allocate the memory and store the new entry. |
336 | * Insert the element in top, with the assumption that in a database |
337 | * system it is more likely that recently added entries are accessed |
338 | * more frequently. */ |
339 | htidx = dictIsRehashing(d) ? 1 : 0; |
340 | size_t metasize = dictMetadataSize(d); |
341 | entry = zmalloc(sizeof(*entry) + metasize); |
342 | if (metasize > 0) { |
343 | memset(dictMetadata(entry), 0, metasize); |
344 | } |
345 | entry->next = d->ht_table[htidx][index]; |
346 | d->ht_table[htidx][index] = entry; |
347 | d->ht_used[htidx]++; |
348 | |
349 | /* Set the hash entry fields. */ |
350 | dictSetKey(d, entry, key); |
351 | return entry; |
352 | } |
353 | |
354 | /* Add or Overwrite: |
355 | * Add an element, discarding the old value if the key already exists. |
356 | * Return 1 if the key was added from scratch, 0 if there was already an |
357 | * element with such key and dictReplace() just performed a value update |
358 | * operation. */ |
359 | int dictReplace(dict *d, void *key, void *val) |
360 | { |
361 | dictEntry *entry, *existing, auxentry; |
362 | |
363 | /* Try to add the element. If the key |
364 | * does not exists dictAdd will succeed. */ |
365 | entry = dictAddRaw(d,key,&existing); |
366 | if (entry) { |
367 | dictSetVal(d, entry, val); |
368 | return 1; |
369 | } |
370 | |
371 | /* Set the new value and free the old one. Note that it is important |
372 | * to do that in this order, as the value may just be exactly the same |
373 | * as the previous one. In this context, think to reference counting, |
374 | * you want to increment (set), and then decrement (free), and not the |
375 | * reverse. */ |
376 | auxentry = *existing; |
377 | dictSetVal(d, existing, val); |
378 | dictFreeVal(d, &auxentry); |
379 | return 0; |
380 | } |
381 | |
382 | /* Add or Find: |
383 | * dictAddOrFind() is simply a version of dictAddRaw() that always |
384 | * returns the hash entry of the specified key, even if the key already |
385 | * exists and can't be added (in that case the entry of the already |
386 | * existing key is returned.) |
387 | * |
388 | * See dictAddRaw() for more information. */ |
389 | dictEntry *dictAddOrFind(dict *d, void *key) { |
390 | dictEntry *entry, *existing; |
391 | entry = dictAddRaw(d,key,&existing); |
392 | return entry ? entry : existing; |
393 | } |
394 | |
395 | /* Search and remove an element. This is a helper function for |
396 | * dictDelete() and dictUnlink(), please check the top comment |
397 | * of those functions. */ |
398 | static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { |
399 | uint64_t h, idx; |
400 | dictEntry *he, *prevHe; |
401 | int table; |
402 | |
403 | /* dict is empty */ |
404 | if (dictSize(d) == 0) return NULL; |
405 | |
406 | if (dictIsRehashing(d)) _dictRehashStep(d); |
407 | h = dictHashKey(d, key); |
408 | |
409 | for (table = 0; table <= 1; table++) { |
410 | idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); |
411 | he = d->ht_table[table][idx]; |
412 | prevHe = NULL; |
413 | while(he) { |
414 | if (key==he->key || dictCompareKeys(d, key, he->key)) { |
415 | /* Unlink the element from the list */ |
416 | if (prevHe) |
417 | prevHe->next = he->next; |
418 | else |
419 | d->ht_table[table][idx] = he->next; |
420 | if (!nofree) { |
421 | dictFreeUnlinkedEntry(d, he); |
422 | } |
423 | d->ht_used[table]--; |
424 | return he; |
425 | } |
426 | prevHe = he; |
427 | he = he->next; |
428 | } |
429 | if (!dictIsRehashing(d)) break; |
430 | } |
431 | return NULL; /* not found */ |
432 | } |
433 | |
434 | /* Remove an element, returning DICT_OK on success or DICT_ERR if the |
435 | * element was not found. */ |
436 | int dictDelete(dict *ht, const void *key) { |
437 | return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR; |
438 | } |
439 | |
440 | /* Remove an element from the table, but without actually releasing |
441 | * the key, value and dictionary entry. The dictionary entry is returned |
442 | * if the element was found (and unlinked from the table), and the user |
443 | * should later call `dictFreeUnlinkedEntry()` with it in order to release it. |
444 | * Otherwise if the key is not found, NULL is returned. |
445 | * |
446 | * This function is useful when we want to remove something from the hash |
447 | * table but want to use its value before actually deleting the entry. |
448 | * Without this function the pattern would require two lookups: |
449 | * |
450 | * entry = dictFind(...); |
451 | * // Do something with entry |
452 | * dictDelete(dictionary,entry); |
453 | * |
454 | * Thanks to this function it is possible to avoid this, and use |
455 | * instead: |
456 | * |
457 | * entry = dictUnlink(dictionary,entry); |
458 | * // Do something with entry |
459 | * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again. |
460 | */ |
461 | dictEntry *dictUnlink(dict *d, const void *key) { |
462 | return dictGenericDelete(d,key,1); |
463 | } |
464 | |
465 | /* You need to call this function to really free the entry after a call |
466 | * to dictUnlink(). It's safe to call this function with 'he' = NULL. */ |
467 | void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { |
468 | if (he == NULL) return; |
469 | dictFreeKey(d, he); |
470 | dictFreeVal(d, he); |
471 | zfree(he); |
472 | } |
473 | |
474 | /* Destroy an entire dictionary */ |
475 | int _dictClear(dict *d, int htidx, void(callback)(dict*)) { |
476 | unsigned long i; |
477 | |
478 | /* Free all the elements */ |
479 | for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) { |
480 | dictEntry *he, *nextHe; |
481 | |
482 | if (callback && (i & 65535) == 0) callback(d); |
483 | |
484 | if ((he = d->ht_table[htidx][i]) == NULL) continue; |
485 | while(he) { |
486 | nextHe = he->next; |
487 | dictFreeKey(d, he); |
488 | dictFreeVal(d, he); |
489 | zfree(he); |
490 | d->ht_used[htidx]--; |
491 | he = nextHe; |
492 | } |
493 | } |
494 | /* Free the table and the allocated cache structure */ |
495 | zfree(d->ht_table[htidx]); |
496 | /* Re-initialize the table */ |
497 | _dictReset(d, htidx); |
498 | return DICT_OK; /* never fails */ |
499 | } |
500 | |
501 | /* Clear & Release the hash table */ |
502 | void dictRelease(dict *d) |
503 | { |
504 | _dictClear(d,0,NULL); |
505 | _dictClear(d,1,NULL); |
506 | zfree(d); |
507 | } |
508 | |
509 | dictEntry *dictFind(dict *d, const void *key) |
510 | { |
511 | dictEntry *he; |
512 | uint64_t h, idx, table; |
513 | |
514 | if (dictSize(d) == 0) return NULL; /* dict is empty */ |
515 | if (dictIsRehashing(d)) _dictRehashStep(d); |
516 | h = dictHashKey(d, key); |
517 | for (table = 0; table <= 1; table++) { |
518 | idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); |
519 | he = d->ht_table[table][idx]; |
520 | while(he) { |
521 | if (key==he->key || dictCompareKeys(d, key, he->key)) |
522 | return he; |
523 | he = he->next; |
524 | } |
525 | if (!dictIsRehashing(d)) return NULL; |
526 | } |
527 | return NULL; |
528 | } |
529 | |
530 | void *dictFetchValue(dict *d, const void *key) { |
531 | dictEntry *he; |
532 | |
533 | he = dictFind(d,key); |
534 | return he ? dictGetVal(he) : NULL; |
535 | } |
536 | |
537 | /* A fingerprint is a 64 bit number that represents the state of the dictionary |
538 | * at a given time, it's just a few dict properties xored together. |
539 | * When an unsafe iterator is initialized, we get the dict fingerprint, and check |
540 | * the fingerprint again when the iterator is released. |
541 | * If the two fingerprints are different it means that the user of the iterator |
542 | * performed forbidden operations against the dictionary while iterating. */ |
543 | unsigned long long dictFingerprint(dict *d) { |
544 | unsigned long long integers[6], hash = 0; |
545 | int j; |
546 | |
547 | integers[0] = (long) d->ht_table[0]; |
548 | integers[1] = d->ht_size_exp[0]; |
549 | integers[2] = d->ht_used[0]; |
550 | integers[3] = (long) d->ht_table[1]; |
551 | integers[4] = d->ht_size_exp[1]; |
552 | integers[5] = d->ht_used[1]; |
553 | |
554 | /* We hash N integers by summing every successive integer with the integer |
555 | * hashing of the previous sum. Basically: |
556 | * |
557 | * Result = hash(hash(hash(int1)+int2)+int3) ... |
558 | * |
559 | * This way the same set of integers in a different order will (likely) hash |
560 | * to a different number. */ |
561 | for (j = 0; j < 6; j++) { |
562 | hash += integers[j]; |
563 | /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ |
564 | hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; |
565 | hash = hash ^ (hash >> 24); |
566 | hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 |
567 | hash = hash ^ (hash >> 14); |
568 | hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 |
569 | hash = hash ^ (hash >> 28); |
570 | hash = hash + (hash << 31); |
571 | } |
572 | return hash; |
573 | } |
574 | |
575 | dictIterator *dictGetIterator(dict *d) |
576 | { |
577 | dictIterator *iter = zmalloc(sizeof(*iter)); |
578 | |
579 | iter->d = d; |
580 | iter->table = 0; |
581 | iter->index = -1; |
582 | iter->safe = 0; |
583 | iter->entry = NULL; |
584 | iter->nextEntry = NULL; |
585 | return iter; |
586 | } |
587 | |
588 | dictIterator *dictGetSafeIterator(dict *d) { |
589 | dictIterator *i = dictGetIterator(d); |
590 | |
591 | i->safe = 1; |
592 | return i; |
593 | } |
594 | |
595 | dictEntry *dictNext(dictIterator *iter) |
596 | { |
597 | while (1) { |
598 | if (iter->entry == NULL) { |
599 | if (iter->index == -1 && iter->table == 0) { |
600 | if (iter->safe) |
601 | dictPauseRehashing(iter->d); |
602 | else |
603 | iter->fingerprint = dictFingerprint(iter->d); |
604 | } |
605 | iter->index++; |
606 | if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) { |
607 | if (dictIsRehashing(iter->d) && iter->table == 0) { |
608 | iter->table++; |
609 | iter->index = 0; |
610 | } else { |
611 | break; |
612 | } |
613 | } |
614 | iter->entry = iter->d->ht_table[iter->table][iter->index]; |
615 | } else { |
616 | iter->entry = iter->nextEntry; |
617 | } |
618 | if (iter->entry) { |
619 | /* We need to save the 'next' here, the iterator user |
620 | * may delete the entry we are returning. */ |
621 | iter->nextEntry = iter->entry->next; |
622 | return iter->entry; |
623 | } |
624 | } |
625 | return NULL; |
626 | } |
627 | |
628 | void dictReleaseIterator(dictIterator *iter) |
629 | { |
630 | if (!(iter->index == -1 && iter->table == 0)) { |
631 | if (iter->safe) |
632 | dictResumeRehashing(iter->d); |
633 | else |
634 | assert(iter->fingerprint == dictFingerprint(iter->d)); |
635 | } |
636 | zfree(iter); |
637 | } |
638 | |
639 | /* Return a random entry from the hash table. Useful to |
640 | * implement randomized algorithms */ |
641 | dictEntry *dictGetRandomKey(dict *d) |
642 | { |
643 | dictEntry *he, *orighe; |
644 | unsigned long h; |
645 | int listlen, listele; |
646 | |
647 | if (dictSize(d) == 0) return NULL; |
648 | if (dictIsRehashing(d)) _dictRehashStep(d); |
649 | if (dictIsRehashing(d)) { |
650 | unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]); |
651 | do { |
652 | /* We are sure there are no elements in indexes from 0 |
653 | * to rehashidx-1 */ |
654 | h = d->rehashidx + (randomULong() % (dictSlots(d) - d->rehashidx)); |
655 | he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h]; |
656 | } while(he == NULL); |
657 | } else { |
658 | unsigned long m = DICTHT_SIZE_MASK(d->ht_size_exp[0]); |
659 | do { |
660 | h = randomULong() & m; |
661 | he = d->ht_table[0][h]; |
662 | } while(he == NULL); |
663 | } |
664 | |
665 | /* Now we found a non empty bucket, but it is a linked |
666 | * list and we need to get a random element from the list. |
667 | * The only sane way to do so is counting the elements and |
668 | * select a random index. */ |
669 | listlen = 0; |
670 | orighe = he; |
671 | while(he) { |
672 | he = he->next; |
673 | listlen++; |
674 | } |
675 | listele = random() % listlen; |
676 | he = orighe; |
677 | while(listele--) he = he->next; |
678 | return he; |
679 | } |
680 | |
681 | /* This function samples the dictionary to return a few keys from random |
682 | * locations. |
683 | * |
684 | * It does not guarantee to return all the keys specified in 'count', nor |
685 | * it does guarantee to return non-duplicated elements, however it will make |
686 | * some effort to do both things. |
687 | * |
688 | * Returned pointers to hash table entries are stored into 'des' that |
689 | * points to an array of dictEntry pointers. The array must have room for |
690 | * at least 'count' elements, that is the argument we pass to the function |
691 | * to tell how many random elements we need. |
692 | * |
693 | * The function returns the number of items stored into 'des', that may |
694 | * be less than 'count' if the hash table has less than 'count' elements |
695 | * inside, or if not enough elements were found in a reasonable amount of |
696 | * steps. |
697 | * |
698 | * Note that this function is not suitable when you need a good distribution |
699 | * of the returned items, but only when you need to "sample" a given number |
700 | * of continuous elements to run some kind of algorithm or to produce |
701 | * statistics. However the function is much faster than dictGetRandomKey() |
702 | * at producing N elements. */ |
703 | unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { |
704 | unsigned long j; /* internal hash table id, 0 or 1. */ |
705 | unsigned long tables; /* 1 or 2 tables? */ |
706 | unsigned long stored = 0, maxsizemask; |
707 | unsigned long maxsteps; |
708 | |
709 | if (dictSize(d) < count) count = dictSize(d); |
710 | maxsteps = count*10; |
711 | |
712 | /* Try to do a rehashing work proportional to 'count'. */ |
713 | for (j = 0; j < count; j++) { |
714 | if (dictIsRehashing(d)) |
715 | _dictRehashStep(d); |
716 | else |
717 | break; |
718 | } |
719 | |
720 | tables = dictIsRehashing(d) ? 2 : 1; |
721 | maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[0]); |
722 | if (tables > 1 && maxsizemask < DICTHT_SIZE_MASK(d->ht_size_exp[1])) |
723 | maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[1]); |
724 | |
725 | /* Pick a random point inside the larger table. */ |
726 | unsigned long i = randomULong() & maxsizemask; |
727 | unsigned long emptylen = 0; /* Continuous empty entries so far. */ |
728 | while(stored < count && maxsteps--) { |
729 | for (j = 0; j < tables; j++) { |
730 | /* Invariant of the dict.c rehashing: up to the indexes already |
731 | * visited in ht[0] during the rehashing, there are no populated |
732 | * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */ |
733 | if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) { |
734 | /* Moreover, if we are currently out of range in the second |
735 | * table, there will be no elements in both tables up to |
736 | * the current rehashing index, so we jump if possible. |
737 | * (this happens when going from big to small table). */ |
738 | if (i >= DICTHT_SIZE(d->ht_size_exp[1])) |
739 | i = d->rehashidx; |
740 | else |
741 | continue; |
742 | } |
743 | if (i >= DICTHT_SIZE(d->ht_size_exp[j])) continue; /* Out of range for this table. */ |
744 | dictEntry *he = d->ht_table[j][i]; |
745 | |
746 | /* Count contiguous empty buckets, and jump to other |
747 | * locations if they reach 'count' (with a minimum of 5). */ |
748 | if (he == NULL) { |
749 | emptylen++; |
750 | if (emptylen >= 5 && emptylen > count) { |
751 | i = randomULong() & maxsizemask; |
752 | emptylen = 0; |
753 | } |
754 | } else { |
755 | emptylen = 0; |
756 | while (he) { |
757 | /* Collect all the elements of the buckets found non |
758 | * empty while iterating. */ |
759 | *des = he; |
760 | des++; |
761 | he = he->next; |
762 | stored++; |
763 | if (stored == count) return stored; |
764 | } |
765 | } |
766 | } |
767 | i = (i+1) & maxsizemask; |
768 | } |
769 | return stored; |
770 | } |
771 | |
772 | /* This is like dictGetRandomKey() from the POV of the API, but will do more |
773 | * work to ensure a better distribution of the returned element. |
774 | * |
775 | * This function improves the distribution because the dictGetRandomKey() |
776 | * problem is that it selects a random bucket, then it selects a random |
777 | * element from the chain in the bucket. However elements being in different |
778 | * chain lengths will have different probabilities of being reported. With |
779 | * this function instead what we do is to consider a "linear" range of the table |
780 | * that may be constituted of N buckets with chains of different lengths |
781 | * appearing one after the other. Then we report a random element in the range. |
782 | * In this way we smooth away the problem of different chain lengths. */ |
783 | #define GETFAIR_NUM_ENTRIES 15 |
784 | dictEntry *dictGetFairRandomKey(dict *d) { |
785 | dictEntry *entries[GETFAIR_NUM_ENTRIES]; |
786 | unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES); |
787 | /* Note that dictGetSomeKeys() may return zero elements in an unlucky |
788 | * run() even if there are actually elements inside the hash table. So |
789 | * when we get zero, we call the true dictGetRandomKey() that will always |
790 | * yield the element if the hash table has at least one. */ |
791 | if (count == 0) return dictGetRandomKey(d); |
792 | unsigned int idx = rand() % count; |
793 | return entries[idx]; |
794 | } |
795 | |
796 | /* Function to reverse bits. Algorithm from: |
797 | * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ |
798 | static unsigned long rev(unsigned long v) { |
799 | unsigned long s = CHAR_BIT * sizeof(v); // bit size; must be power of 2 |
800 | unsigned long mask = ~0UL; |
801 | while ((s >>= 1) > 0) { |
802 | mask ^= (mask << s); |
803 | v = ((v >> s) & mask) | ((v << s) & ~mask); |
804 | } |
805 | return v; |
806 | } |
807 | |
808 | /* dictScan() is used to iterate over the elements of a dictionary. |
809 | * |
810 | * Iterating works the following way: |
811 | * |
812 | * 1) Initially you call the function using a cursor (v) value of 0. |
813 | * 2) The function performs one step of the iteration, and returns the |
814 | * new cursor value you must use in the next call. |
815 | * 3) When the returned cursor is 0, the iteration is complete. |
816 | * |
817 | * The function guarantees all elements present in the |
818 | * dictionary get returned between the start and end of the iteration. |
819 | * However it is possible some elements get returned multiple times. |
820 | * |
821 | * For every element returned, the callback argument 'fn' is |
822 | * called with 'privdata' as first argument and the dictionary entry |
823 | * 'de' as second argument. |
824 | * |
825 | * HOW IT WORKS. |
826 | * |
827 | * The iteration algorithm was designed by Pieter Noordhuis. |
828 | * The main idea is to increment a cursor starting from the higher order |
829 | * bits. That is, instead of incrementing the cursor normally, the bits |
830 | * of the cursor are reversed, then the cursor is incremented, and finally |
831 | * the bits are reversed again. |
832 | * |
833 | * This strategy is needed because the hash table may be resized between |
834 | * iteration calls. |
835 | * |
836 | * dict.c hash tables are always power of two in size, and they |
837 | * use chaining, so the position of an element in a given table is given |
838 | * by computing the bitwise AND between Hash(key) and SIZE-1 |
839 | * (where SIZE-1 is always the mask that is equivalent to taking the rest |
840 | * of the division between the Hash of the key and SIZE). |
841 | * |
842 | * For example if the current hash table size is 16, the mask is |
843 | * (in binary) 1111. The position of a key in the hash table will always be |
844 | * the last four bits of the hash output, and so forth. |
845 | * |
846 | * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE? |
847 | * |
848 | * If the hash table grows, elements can go anywhere in one multiple of |
849 | * the old bucket: for example let's say we already iterated with |
850 | * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16). |
851 | * |
852 | * If the hash table will be resized to 64 elements, then the new mask will |
853 | * be 111111. The new buckets you obtain by substituting in ??1100 |
854 | * with either 0 or 1 can be targeted only by keys we already visited |
855 | * when scanning the bucket 1100 in the smaller hash table. |
856 | * |
857 | * By iterating the higher bits first, because of the inverted counter, the |
858 | * cursor does not need to restart if the table size gets bigger. It will |
859 | * continue iterating using cursors without '1100' at the end, and also |
860 | * without any other combination of the final 4 bits already explored. |
861 | * |
862 | * Similarly when the table size shrinks over time, for example going from |
863 | * 16 to 8, if a combination of the lower three bits (the mask for size 8 |
864 | * is 111) were already completely explored, it would not be visited again |
865 | * because we are sure we tried, for example, both 0111 and 1111 (all the |
866 | * variations of the higher bit) so we don't need to test it again. |
867 | * |
868 | * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING! |
869 | * |
870 | * Yes, this is true, but we always iterate the smaller table first, then |
871 | * we test all the expansions of the current cursor into the larger |
872 | * table. For example if the current cursor is 101 and we also have a |
873 | * larger table of size 16, we also test (0)101 and (1)101 inside the larger |
874 | * table. This reduces the problem back to having only one table, where |
875 | * the larger one, if it exists, is just an expansion of the smaller one. |
876 | * |
877 | * LIMITATIONS |
878 | * |
879 | * This iterator is completely stateless, and this is a huge advantage, |
880 | * including no additional memory used. |
881 | * |
882 | * The disadvantages resulting from this design are: |
883 | * |
884 | * 1) It is possible we return elements more than once. However this is usually |
885 | * easy to deal with in the application level. |
886 | * 2) The iterator must return multiple elements per call, as it needs to always |
887 | * return all the keys chained in a given bucket, and all the expansions, so |
888 | * we are sure we don't miss keys moving during rehashing. |
889 | * 3) The reverse cursor is somewhat hard to understand at first, but this |
890 | * comment is supposed to help. |
891 | */ |
892 | unsigned long dictScan(dict *d, |
893 | unsigned long v, |
894 | dictScanFunction *fn, |
895 | dictScanBucketFunction* bucketfn, |
896 | void *privdata) |
897 | { |
898 | int htidx0, htidx1; |
899 | const dictEntry *de, *next; |
900 | unsigned long m0, m1; |
901 | |
902 | if (dictSize(d) == 0) return 0; |
903 | |
904 | /* This is needed in case the scan callback tries to do dictFind or alike. */ |
905 | dictPauseRehashing(d); |
906 | |
907 | if (!dictIsRehashing(d)) { |
908 | htidx0 = 0; |
909 | m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]); |
910 | |
911 | /* Emit entries at cursor */ |
912 | if (bucketfn) bucketfn(d, &d->ht_table[htidx0][v & m0]); |
913 | de = d->ht_table[htidx0][v & m0]; |
914 | while (de) { |
915 | next = de->next; |
916 | fn(privdata, de); |
917 | de = next; |
918 | } |
919 | |
920 | /* Set unmasked bits so incrementing the reversed cursor |
921 | * operates on the masked bits */ |
922 | v |= ~m0; |
923 | |
924 | /* Increment the reverse cursor */ |
925 | v = rev(v); |
926 | v++; |
927 | v = rev(v); |
928 | |
929 | } else { |
930 | htidx0 = 0; |
931 | htidx1 = 1; |
932 | |
933 | /* Make sure t0 is the smaller and t1 is the bigger table */ |
934 | if (DICTHT_SIZE(d->ht_size_exp[htidx0]) > DICTHT_SIZE(d->ht_size_exp[htidx1])) { |
935 | htidx0 = 1; |
936 | htidx1 = 0; |
937 | } |
938 | |
939 | m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]); |
940 | m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]); |
941 | |
942 | /* Emit entries at cursor */ |
943 | if (bucketfn) bucketfn(d, &d->ht_table[htidx0][v & m0]); |
944 | de = d->ht_table[htidx0][v & m0]; |
945 | while (de) { |
946 | next = de->next; |
947 | fn(privdata, de); |
948 | de = next; |
949 | } |
950 | |
951 | /* Iterate over indices in larger table that are the expansion |
952 | * of the index pointed to by the cursor in the smaller table */ |
953 | do { |
954 | /* Emit entries at cursor */ |
955 | if (bucketfn) bucketfn(d, &d->ht_table[htidx1][v & m1]); |
956 | de = d->ht_table[htidx1][v & m1]; |
957 | while (de) { |
958 | next = de->next; |
959 | fn(privdata, de); |
960 | de = next; |
961 | } |
962 | |
963 | /* Increment the reverse cursor not covered by the smaller mask.*/ |
964 | v |= ~m1; |
965 | v = rev(v); |
966 | v++; |
967 | v = rev(v); |
968 | |
969 | /* Continue while bits covered by mask difference is non-zero */ |
970 | } while (v & (m0 ^ m1)); |
971 | } |
972 | |
973 | dictResumeRehashing(d); |
974 | |
975 | return v; |
976 | } |
977 | |
978 | /* ------------------------- private functions ------------------------------ */ |
979 | |
980 | /* Because we may need to allocate huge memory chunk at once when dict |
981 | * expands, we will check this allocation is allowed or not if the dict |
982 | * type has expandAllowed member function. */ |
983 | static int dictTypeExpandAllowed(dict *d) { |
984 | if (d->type->expandAllowed == NULL) return 1; |
985 | return d->type->expandAllowed( |
986 | DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*), |
987 | (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0])); |
988 | } |
989 | |
990 | /* Expand the hash table if needed */ |
991 | static int _dictExpandIfNeeded(dict *d) |
992 | { |
993 | /* Incremental rehashing already in progress. Return. */ |
994 | if (dictIsRehashing(d)) return DICT_OK; |
995 | |
996 | /* If the hash table is empty expand it to the initial size. */ |
997 | if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); |
998 | |
999 | /* If we reached the 1:1 ratio, and we are allowed to resize the hash |
1000 | * table (global setting) or we should avoid it but the ratio between |
1001 | * elements/buckets is over the "safe" threshold, we resize doubling |
1002 | * the number of buckets. */ |
1003 | if (d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0]) && |
1004 | (dict_can_resize || |
1005 | d->ht_used[0]/ DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio) && |
1006 | dictTypeExpandAllowed(d)) |
1007 | { |
1008 | return dictExpand(d, d->ht_used[0] + 1); |
1009 | } |
1010 | return DICT_OK; |
1011 | } |
1012 | |
1013 | /* TODO: clz optimization */ |
1014 | /* Our hash table capability is a power of two */ |
1015 | static signed char _dictNextExp(unsigned long size) |
1016 | { |
1017 | unsigned char e = DICT_HT_INITIAL_EXP; |
1018 | |
1019 | if (size >= LONG_MAX) return (8*sizeof(long)-1); |
1020 | while(1) { |
1021 | if (((unsigned long)1<<e) >= size) |
1022 | return e; |
1023 | e++; |
1024 | } |
1025 | } |
1026 | |
1027 | /* Returns the index of a free slot that can be populated with |
1028 | * a hash entry for the given 'key'. |
1029 | * If the key already exists, -1 is returned |
1030 | * and the optional output parameter may be filled. |
1031 | * |
1032 | * Note that if we are in the process of rehashing the hash table, the |
1033 | * index is always returned in the context of the second (new) hash table. */ |
1034 | static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing) |
1035 | { |
1036 | unsigned long idx, table; |
1037 | dictEntry *he; |
1038 | if (existing) *existing = NULL; |
1039 | |
1040 | /* Expand the hash table if needed */ |
1041 | if (_dictExpandIfNeeded(d) == DICT_ERR) |
1042 | return -1; |
1043 | for (table = 0; table <= 1; table++) { |
1044 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); |
1045 | /* Search if this slot does not already contain the given key */ |
1046 | he = d->ht_table[table][idx]; |
1047 | while(he) { |
1048 | if (key==he->key || dictCompareKeys(d, key, he->key)) { |
1049 | if (existing) *existing = he; |
1050 | return -1; |
1051 | } |
1052 | he = he->next; |
1053 | } |
1054 | if (!dictIsRehashing(d)) break; |
1055 | } |
1056 | return idx; |
1057 | } |
1058 | |
1059 | void dictEmpty(dict *d, void(callback)(dict*)) { |
1060 | _dictClear(d,0,callback); |
1061 | _dictClear(d,1,callback); |
1062 | d->rehashidx = -1; |
1063 | d->pauserehash = 0; |
1064 | } |
1065 | |
1066 | void dictEnableResize(void) { |
1067 | dict_can_resize = 1; |
1068 | } |
1069 | |
1070 | void dictDisableResize(void) { |
1071 | dict_can_resize = 0; |
1072 | } |
1073 | |
1074 | uint64_t dictGetHash(dict *d, const void *key) { |
1075 | return dictHashKey(d, key); |
1076 | } |
1077 | |
1078 | /* Finds the dictEntry reference by using pointer and pre-calculated hash. |
1079 | * oldkey is a dead pointer and should not be accessed. |
1080 | * the hash value should be provided using dictGetHash. |
1081 | * no string / key comparison is performed. |
1082 | * return value is the reference to the dictEntry if found, or NULL if not found. */ |
1083 | dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) { |
1084 | dictEntry *he, **heref; |
1085 | unsigned long idx, table; |
1086 | |
1087 | if (dictSize(d) == 0) return NULL; /* dict is empty */ |
1088 | for (table = 0; table <= 1; table++) { |
1089 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); |
1090 | heref = &d->ht_table[table][idx]; |
1091 | he = *heref; |
1092 | while(he) { |
1093 | if (oldptr==he->key) |
1094 | return heref; |
1095 | heref = &he->next; |
1096 | he = *heref; |
1097 | } |
1098 | if (!dictIsRehashing(d)) return NULL; |
1099 | } |
1100 | return NULL; |
1101 | } |
1102 | |
1103 | /* ------------------------------- Debugging ---------------------------------*/ |
1104 | |
1105 | #define DICT_STATS_VECTLEN 50 |
1106 | size_t _dictGetStatsHt(char *buf, size_t bufsize, dict *d, int htidx) { |
1107 | unsigned long i, slots = 0, chainlen, maxchainlen = 0; |
1108 | unsigned long totchainlen = 0; |
1109 | unsigned long clvector[DICT_STATS_VECTLEN]; |
1110 | size_t l = 0; |
1111 | |
1112 | if (d->ht_used[htidx] == 0) { |
1113 | return snprintf(buf,bufsize, |
1114 | "No stats available for empty dictionaries\n" ); |
1115 | } |
1116 | |
1117 | /* Compute stats. */ |
1118 | for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0; |
1119 | for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) { |
1120 | dictEntry *he; |
1121 | |
1122 | if (d->ht_table[htidx][i] == NULL) { |
1123 | clvector[0]++; |
1124 | continue; |
1125 | } |
1126 | slots++; |
1127 | /* For each hash entry on this slot... */ |
1128 | chainlen = 0; |
1129 | he = d->ht_table[htidx][i]; |
1130 | while(he) { |
1131 | chainlen++; |
1132 | he = he->next; |
1133 | } |
1134 | clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; |
1135 | if (chainlen > maxchainlen) maxchainlen = chainlen; |
1136 | totchainlen += chainlen; |
1137 | } |
1138 | |
1139 | /* Generate human readable stats. */ |
1140 | l += snprintf(buf+l,bufsize-l, |
1141 | "Hash table %d stats (%s):\n" |
1142 | " table size: %lu\n" |
1143 | " number of elements: %lu\n" |
1144 | " different slots: %lu\n" |
1145 | " max chain length: %lu\n" |
1146 | " avg chain length (counted): %.02f\n" |
1147 | " avg chain length (computed): %.02f\n" |
1148 | " Chain length distribution:\n" , |
1149 | htidx, (htidx == 0) ? "main hash table" : "rehashing target" , |
1150 | DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx], slots, maxchainlen, |
1151 | (float)totchainlen/slots, (float)d->ht_used[htidx]/slots); |
1152 | |
1153 | for (i = 0; i < DICT_STATS_VECTLEN-1; i++) { |
1154 | if (clvector[i] == 0) continue; |
1155 | if (l >= bufsize) break; |
1156 | l += snprintf(buf+l,bufsize-l, |
1157 | " %ld: %ld (%.02f%%)\n" , |
1158 | i, clvector[i], ((float)clvector[i]/DICTHT_SIZE(d->ht_size_exp[htidx]))*100); |
1159 | } |
1160 | |
1161 | /* Unlike snprintf(), return the number of characters actually written. */ |
1162 | if (bufsize) buf[bufsize-1] = '\0'; |
1163 | return strlen(buf); |
1164 | } |
1165 | |
1166 | void dictGetStats(char *buf, size_t bufsize, dict *d) { |
1167 | size_t l; |
1168 | char *orig_buf = buf; |
1169 | size_t orig_bufsize = bufsize; |
1170 | |
1171 | l = _dictGetStatsHt(buf,bufsize,d,0); |
1172 | buf += l; |
1173 | bufsize -= l; |
1174 | if (dictIsRehashing(d) && bufsize > 0) { |
1175 | _dictGetStatsHt(buf,bufsize,d,1); |
1176 | } |
1177 | /* Make sure there is a NULL term at the end. */ |
1178 | if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0'; |
1179 | } |
1180 | |
1181 | /* ------------------------------- Benchmark ---------------------------------*/ |
1182 | |
1183 | #ifdef REDIS_TEST |
1184 | #include "testhelp.h" |
1185 | |
1186 | #define UNUSED(V) ((void) V) |
1187 | |
1188 | uint64_t hashCallback(const void *key) { |
1189 | return dictGenHashFunction((unsigned char*)key, strlen((char*)key)); |
1190 | } |
1191 | |
1192 | int compareCallback(dict *d, const void *key1, const void *key2) { |
1193 | int l1,l2; |
1194 | UNUSED(d); |
1195 | |
1196 | l1 = strlen((char*)key1); |
1197 | l2 = strlen((char*)key2); |
1198 | if (l1 != l2) return 0; |
1199 | return memcmp(key1, key2, l1) == 0; |
1200 | } |
1201 | |
1202 | void freeCallback(dict *d, void *val) { |
1203 | UNUSED(d); |
1204 | |
1205 | zfree(val); |
1206 | } |
1207 | |
1208 | char *stringFromLongLong(long long value) { |
1209 | char buf[32]; |
1210 | int len; |
1211 | char *s; |
1212 | |
1213 | len = sprintf(buf,"%lld" ,value); |
1214 | s = zmalloc(len+1); |
1215 | memcpy(s, buf, len); |
1216 | s[len] = '\0'; |
1217 | return s; |
1218 | } |
1219 | |
1220 | dictType BenchmarkDictType = { |
1221 | hashCallback, |
1222 | NULL, |
1223 | NULL, |
1224 | compareCallback, |
1225 | freeCallback, |
1226 | NULL, |
1227 | NULL |
1228 | }; |
1229 | |
1230 | #define start_benchmark() start = timeInMilliseconds() |
1231 | #define end_benchmark(msg) do { \ |
1232 | elapsed = timeInMilliseconds()-start; \ |
1233 | printf(msg ": %ld items in %lld ms\n", count, elapsed); \ |
1234 | } while(0) |
1235 | |
1236 | /* ./redis-server test dict [<count> | --accurate] */ |
1237 | int dictTest(int argc, char **argv, int flags) { |
1238 | long j; |
1239 | long long start, elapsed; |
1240 | dict *dict = dictCreate(&BenchmarkDictType); |
1241 | long count = 0; |
1242 | int accurate = (flags & REDIS_TEST_ACCURATE); |
1243 | |
1244 | if (argc == 4) { |
1245 | if (accurate) { |
1246 | count = 5000000; |
1247 | } else { |
1248 | count = strtol(argv[3],NULL,10); |
1249 | } |
1250 | } else { |
1251 | count = 5000; |
1252 | } |
1253 | |
1254 | start_benchmark(); |
1255 | for (j = 0; j < count; j++) { |
1256 | int retval = dictAdd(dict,stringFromLongLong(j),(void*)j); |
1257 | assert(retval == DICT_OK); |
1258 | } |
1259 | end_benchmark("Inserting" ); |
1260 | assert((long)dictSize(dict) == count); |
1261 | |
1262 | /* Wait for rehashing. */ |
1263 | while (dictIsRehashing(dict)) { |
1264 | dictRehashMilliseconds(dict,100); |
1265 | } |
1266 | |
1267 | start_benchmark(); |
1268 | for (j = 0; j < count; j++) { |
1269 | char *key = stringFromLongLong(j); |
1270 | dictEntry *de = dictFind(dict,key); |
1271 | assert(de != NULL); |
1272 | zfree(key); |
1273 | } |
1274 | end_benchmark("Linear access of existing elements" ); |
1275 | |
1276 | start_benchmark(); |
1277 | for (j = 0; j < count; j++) { |
1278 | char *key = stringFromLongLong(j); |
1279 | dictEntry *de = dictFind(dict,key); |
1280 | assert(de != NULL); |
1281 | zfree(key); |
1282 | } |
1283 | end_benchmark("Linear access of existing elements (2nd round)" ); |
1284 | |
1285 | start_benchmark(); |
1286 | for (j = 0; j < count; j++) { |
1287 | char *key = stringFromLongLong(rand() % count); |
1288 | dictEntry *de = dictFind(dict,key); |
1289 | assert(de != NULL); |
1290 | zfree(key); |
1291 | } |
1292 | end_benchmark("Random access of existing elements" ); |
1293 | |
1294 | start_benchmark(); |
1295 | for (j = 0; j < count; j++) { |
1296 | dictEntry *de = dictGetRandomKey(dict); |
1297 | assert(de != NULL); |
1298 | } |
1299 | end_benchmark("Accessing random keys" ); |
1300 | |
1301 | start_benchmark(); |
1302 | for (j = 0; j < count; j++) { |
1303 | char *key = stringFromLongLong(rand() % count); |
1304 | key[0] = 'X'; |
1305 | dictEntry *de = dictFind(dict,key); |
1306 | assert(de == NULL); |
1307 | zfree(key); |
1308 | } |
1309 | end_benchmark("Accessing missing" ); |
1310 | |
1311 | start_benchmark(); |
1312 | for (j = 0; j < count; j++) { |
1313 | char *key = stringFromLongLong(j); |
1314 | int retval = dictDelete(dict,key); |
1315 | assert(retval == DICT_OK); |
1316 | key[0] += 17; /* Change first number to letter. */ |
1317 | retval = dictAdd(dict,key,(void*)j); |
1318 | assert(retval == DICT_OK); |
1319 | } |
1320 | end_benchmark("Removing and adding" ); |
1321 | dictRelease(dict); |
1322 | return 0; |
1323 | } |
1324 | #endif |
1325 | |