1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Date: Wed Nov 27 12:59:20 CST 2013
19
20#ifndef BUTIL_FLAT_MAP_INL_H
21#define BUTIL_FLAT_MAP_INL_H
22
23namespace butil {
24
25inline uint32_t find_next_prime(uint32_t nbucket) {
26 static const unsigned long prime_list[] = {
27 29ul,
28 53ul, 97ul, 193ul, 389ul, 769ul,
29 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
30 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
31 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
32 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
33 1610612741ul, 3221225473ul, 4294967291ul
34 };
35 const size_t nprimes = sizeof(prime_list) / sizeof(prime_list[0]);
36 for (size_t i = 0; i < nprimes; i++) {
37 if (nbucket <= prime_list[i]) {
38 return prime_list[i];
39 }
40 }
41 return nbucket;
42}
43
44inline uint64_t find_power2(uint64_t b) {
45 b -= 1;
46 b |= (b >> 1);
47 b |= (b >> 2);
48 b |= (b >> 4);
49 b |= (b >> 8);
50 b |= (b >> 16);
51 b |= (b >> 32);
52 return b + 1;
53}
54
55// Using next prime is slower for 10ns on average (due to %). If quality of
56// the hash code is good enough, primeness of nbucket is not important. We
57// choose to trust the hash code (or user should use a better hash algorithm
58// when the collisions are significant) and still stick to round-to-power-2
59// solution right now.
60inline size_t flatmap_round(size_t nbucket) {
61#ifdef FLAT_MAP_ROUND_BUCKET_BY_USE_NEXT_PRIME
62 return find_next_prime(nbucket);
63#else
64 return find_power2(nbucket);
65#endif
66}
67
68inline size_t flatmap_mod(size_t hash_code, size_t nbucket) {
69#ifdef FLAT_MAP_ROUND_BUCKET_BY_USE_NEXT_PRIME
70 return hash_code % nbucket;
71#else
72 return hash_code & (nbucket - 1);
73#endif
74}
75
76// Iterate FlatMap
77template <typename Map, typename Value> class FlatMapIterator {
78public:
79 typedef Value value_type;
80 typedef Value& reference;
81 typedef Value* pointer;
82 typedef typename add_const<Value>::type ConstValue;
83 typedef ConstValue& const_reference;
84 typedef ConstValue* const_pointer;
85 typedef std::forward_iterator_tag iterator_category;
86 typedef ptrdiff_t difference_type;
87 typedef typename remove_const<Value>::type NonConstValue;
88
89 FlatMapIterator() : _node(NULL), _entry(NULL) {}
90 FlatMapIterator(const Map* map, size_t pos) {
91 if (map->initialized()) {
92 _entry = map->_buckets + pos;
93 find_and_set_valid_node();
94 } else {
95 _node = NULL;
96 _entry = NULL;
97 }
98 }
99 FlatMapIterator(const FlatMapIterator<Map, NonConstValue>& rhs)
100 : _node(rhs._node), _entry(rhs._entry) {}
101 ~FlatMapIterator() {} // required by style-checker
102
103 // *this == rhs
104 bool operator==(const FlatMapIterator& rhs) const
105 { return _node == rhs._node; }
106
107 // *this != rhs
108 bool operator!=(const FlatMapIterator& rhs) const
109 { return _node != rhs._node; }
110
111 // ++ it
112 FlatMapIterator& operator++() {
113 if (NULL == _node->next) {
114 ++_entry;
115 find_and_set_valid_node();
116 } else {
117 _node = _node->next;
118 }
119 return *this;
120 }
121
122 // it ++
123 FlatMapIterator operator++(int) {
124 FlatMapIterator tmp = *this;
125 this->operator++();
126 return tmp;
127 }
128
129 reference operator*() { return _node->element().value_ref(); }
130 pointer operator->() { return &_node->element().value_ref(); }
131 const_reference operator*() const { return _node->element().value_ref(); }
132 const_pointer operator->() const { return &_node->element().value_ref(); }
133
134private:
135friend class FlatMapIterator<Map, ConstValue>;
136friend class FlatMap<typename Map::key_type, typename Map::mapped_type,
137 typename Map::hasher, typename Map::key_equal>;
138
139 void find_and_set_valid_node() {
140 for (; !_entry->is_valid(); ++_entry);
141 _node = _entry;
142 }
143
144 typename Map::Bucket* _node;
145 typename Map::Bucket* _entry;
146};
147
148// Iterate SparseFlatMap
149template <typename Map, typename Value> class SparseFlatMapIterator {
150public:
151 typedef Value value_type;
152 typedef Value& reference;
153 typedef Value* pointer;
154 typedef typename add_const<Value>::type ConstValue;
155 typedef ConstValue& const_reference;
156 typedef ConstValue* const_pointer;
157 typedef std::forward_iterator_tag iterator_category;
158 typedef ptrdiff_t difference_type;
159 typedef typename remove_const<Value>::type NonConstValue;
160
161 SparseFlatMapIterator() : _node(NULL), _pos(0), _map(NULL) {}
162 SparseFlatMapIterator(const Map* map, size_t pos) {
163 if (map->initialized()) {
164 _map = map;
165 _pos = pos;
166 find_and_set_valid_node();
167 } else {
168 _node = NULL;
169 _map = NULL;
170 _pos = 0;
171 }
172 }
173 SparseFlatMapIterator(const SparseFlatMapIterator<Map, NonConstValue>& rhs)
174 : _node(rhs._node), _pos(rhs._pos), _map(rhs._map)
175 {}
176 ~SparseFlatMapIterator() {} // required by style-checker
177
178 // *this == rhs
179 bool operator==(const SparseFlatMapIterator& rhs) const
180 { return _node == rhs._node; }
181
182 // *this != rhs
183 bool operator!=(const SparseFlatMapIterator& rhs) const
184 { return _node != rhs._node; }
185
186 // ++ it
187 SparseFlatMapIterator& operator++() {
188 if (NULL == _node->next) {
189 ++_pos;
190 find_and_set_valid_node();
191 } else {
192 _node = _node->next;
193 }
194 return *this;
195 }
196
197 // it ++
198 SparseFlatMapIterator operator++(int) {
199 SparseFlatMapIterator tmp = *this;
200 this->operator++();
201 return tmp;
202 }
203
204 reference operator*() { return _node->element().value_ref(); }
205 pointer operator->() { return &_node->element().value_ref(); }
206 const_reference operator*() const { return _node->element().value_ref(); }
207 const_pointer operator->() const { return &_node->element().value_ref(); }
208
209private:
210friend class SparseFlatMapIterator<Map, ConstValue>;
211
212 void find_and_set_valid_node() {
213 if (!_map->_buckets[_pos].is_valid()) {
214 _pos = bit_array_first1(_map->_thumbnail, _pos + 1, _map->_nbucket);
215 }
216 _node = _map->_buckets + _pos;
217 }
218
219 typename Map::Bucket* _node;
220 size_t _pos;
221 const Map* _map;
222};
223
224
225template <typename _K, typename _T, typename _H, typename _E, bool _S>
226FlatMap<_K, _T, _H, _E, _S>::FlatMap(const hasher& hashfn, const key_equal& eql)
227 : _size(0)
228 , _nbucket(0)
229 , _buckets(NULL)
230 , _thumbnail(NULL)
231 , _load_factor(0)
232 , _hashfn(hashfn)
233 , _eql(eql)
234{}
235
236template <typename _K, typename _T, typename _H, typename _E, bool _S>
237FlatMap<_K, _T, _H, _E, _S>::~FlatMap() {
238 clear();
239 free(_buckets);
240 _buckets = NULL;
241 free(_thumbnail);
242 _thumbnail = NULL;
243 _nbucket = 0;
244 _load_factor = 0;
245}
246
247template <typename _K, typename _T, typename _H, typename _E, bool _S>
248FlatMap<_K, _T, _H, _E, _S>::FlatMap(const FlatMap& rhs)
249 : _size(0)
250 , _nbucket(0)
251 , _buckets(NULL)
252 , _thumbnail(NULL)
253 , _load_factor(rhs._load_factor)
254 , _hashfn(rhs._hashfn)
255 , _eql(rhs._eql) {
256 operator=(rhs);
257}
258
259template <typename _K, typename _T, typename _H, typename _E, bool _S>
260void
261FlatMap<_K, _T, _H, _E, _S>::operator=(const FlatMap<_K, _T, _H, _E, _S>& rhs) {
262 if (this == &rhs) {
263 return;
264 }
265 // NOTE: assignment does not change _load_factor/_hashfn/_eql if |this| is
266 // initialized
267 clear();
268 if (rhs.empty()) {
269 return;
270 }
271 if (!initialized()) {
272 _load_factor = rhs._load_factor;
273 }
274 if (_buckets == NULL || is_too_crowded(rhs._size)) {
275 free(_buckets);
276 _nbucket = rhs._nbucket;
277 // note: need an extra bucket to let iterator know where buckets end
278 _buckets = (Bucket*)malloc(sizeof(Bucket) * (_nbucket + 1/*note*/));
279 if (NULL == _buckets) {
280 LOG(ERROR) << "Fail to new _buckets";
281 return;
282 }
283 if (_S) {
284 free(_thumbnail);
285 _thumbnail = bit_array_malloc(_nbucket);
286 if (NULL == _thumbnail) {
287 LOG(ERROR) << "Fail to new _thumbnail";
288 return;
289 }
290 bit_array_clear(_thumbnail, _nbucket);
291 }
292 }
293 if (_nbucket == rhs._nbucket) {
294 // For equivalent _nbucket, walking through _buckets instead of using
295 // iterators is more efficient.
296 for (size_t i = 0; i < rhs._nbucket; ++i) {
297 if (!rhs._buckets[i].is_valid()) {
298 _buckets[i].set_invalid();
299 } else {
300 if (_S) {
301 bit_array_set(_thumbnail, i);
302 }
303 new (&_buckets[i]) Bucket(rhs._buckets[i]);
304 Bucket* p1 = &_buckets[i];
305 Bucket* p2 = rhs._buckets[i].next;
306 while (p2) {
307 p1->next = new (_pool.get()) Bucket(*p2);
308 p1 = p1->next;
309 p2 = p2->next;
310 }
311 }
312 }
313 _buckets[rhs._nbucket].next = NULL;
314 _size = rhs._size;
315 } else {
316 for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
317 operator[](Element::first_ref_from_value(*it)) =
318 Element::second_ref_from_value(*it);
319 }
320 }
321}
322
323template <typename _K, typename _T, typename _H, typename _E, bool _S>
324int FlatMap<_K, _T, _H, _E, _S>::init(size_t nbucket, u_int load_factor) {
325 if (initialized()) {
326 LOG(ERROR) << "Already initialized";
327 return -1;
328 }
329 if (load_factor < 10 || load_factor > 100) {
330 LOG(ERROR) << "Invalid load_factor=" << load_factor;
331 return -1;
332 }
333 _size = 0;
334 _nbucket = flatmap_round(nbucket);
335 _load_factor = load_factor;
336
337 _buckets = (Bucket*)malloc(sizeof(Bucket) * (_nbucket + 1));
338 if (NULL == _buckets) {
339 LOG(ERROR) << "Fail to new _buckets";
340 return -1;
341 }
342 for (size_t i = 0; i < _nbucket; ++i) {
343 _buckets[i].set_invalid();
344 }
345 _buckets[_nbucket].next = NULL;
346
347 if (_S) {
348 _thumbnail = bit_array_malloc(_nbucket);
349 if (NULL == _thumbnail) {
350 LOG(ERROR) << "Fail to new _thumbnail";
351 return -1;
352 }
353 bit_array_clear(_thumbnail, _nbucket);
354 }
355 return 0;
356}
357
358template <typename _K, typename _T, typename _H, typename _E, bool _S>
359void FlatMap<_K, _T, _H, _E, _S>::swap(FlatMap<_K, _T, _H, _E, _S> & rhs) {
360 std::swap(rhs._size, _size);
361 std::swap(rhs._nbucket, _nbucket);
362 std::swap(rhs._buckets, _buckets);
363 std::swap(rhs._thumbnail, _thumbnail);
364 std::swap(rhs._load_factor, _load_factor);
365 std::swap(rhs._hashfn, _hashfn);
366 std::swap(rhs._eql, _eql);
367 rhs._pool.swap(_pool);
368}
369
370template <typename _K, typename _T, typename _H, typename _E, bool _S>
371_T* FlatMap<_K, _T, _H, _E, _S>::insert(const key_type& key,
372 const mapped_type& value) {
373 mapped_type *p = &operator[](key);
374 *p = value;
375 return p;
376}
377
378template <typename _K, typename _T, typename _H, typename _E, bool _S>
379template <typename K2>
380size_t FlatMap<_K, _T, _H, _E, _S>::erase(const K2& key, _T* old_value) {
381 if (!initialized()) {
382 return 0;
383 }
384 // TODO: Do we need auto collapsing here?
385 const size_t index = flatmap_mod(_hashfn(key), _nbucket);
386 Bucket& first_node = _buckets[index];
387 if (!first_node.is_valid()) {
388 return 0;
389 }
390 if (_eql(first_node.element().first_ref(), key)) {
391 if (old_value) {
392 *old_value = first_node.element().second_ref();
393 }
394 if (first_node.next == NULL) {
395 first_node.element().~Element();
396 first_node.set_invalid();
397 if (_S) {
398 bit_array_unset(_thumbnail, index);
399 }
400 } else {
401 // A seemingly correct solution is to copy the memory of *p to
402 // first_node directly like this:
403 // first_node.element().~Element();
404 // first_node = *p;
405 // It works at most of the time, but is wrong generally.
406 // If _T references self inside like this:
407 // Value {
408 // Value() : num(0), num_ptr(&num) {}
409 // int num;
410 // int* num_ptr;
411 // };
412 // After copying, num_ptr will be invalid.
413 // Calling operator= is the price that we have to pay.
414 Bucket* p = first_node.next;
415 first_node.next = p->next;
416 const_cast<_K&>(first_node.element().first_ref()) =
417 p->element().first_ref();
418 first_node.element().second_ref() = p->element().second_ref();
419 p->element().~Element();
420 _pool.back(p);
421 }
422 --_size;
423 return 1UL;
424 }
425 Bucket *p = first_node.next;
426 Bucket *last_p = &first_node;
427 while (p) {
428 if (_eql(p->element().first_ref(), key)) {
429 if (old_value) {
430 *old_value = p->element().second_ref();
431 }
432 last_p->next = p->next;
433 p->element().~Element();
434 _pool.back(p);
435 --_size;
436 return 1UL;
437 }
438 last_p = p;
439 p = p->next;
440 }
441 return 0;
442}
443
444template <typename _K, typename _T, typename _H, typename _E, bool _S>
445void FlatMap<_K, _T, _H, _E, _S>::clear() {
446 if (0 == _size) {
447 return;
448 }
449 _size = 0;
450 if (NULL != _buckets) {
451 for (size_t i = 0; i < _nbucket; ++i) {
452 Bucket& first_node = _buckets[i];
453 if (first_node.is_valid()) {
454 first_node.element().~Element();
455 Bucket* p = first_node.next;
456 while (p) {
457 Bucket* next_p = p->next;
458 p->element().~Element();
459 _pool.back(p);
460 p = next_p;
461 }
462 first_node.set_invalid();
463 }
464 }
465 }
466 if (NULL != _thumbnail) {
467 bit_array_clear(_thumbnail, _nbucket);
468 }
469}
470
471template <typename _K, typename _T, typename _H, typename _E, bool _S>
472void FlatMap<_K, _T, _H, _E, _S>::clear_and_reset_pool() {
473 clear();
474 _pool.reset();
475}
476
477template <typename _K, typename _T, typename _H, typename _E, bool _S>
478template <typename K2>
479_T* FlatMap<_K, _T, _H, _E, _S>::seek(const K2& key) const {
480 if (!initialized()) {
481 return NULL;
482 }
483 Bucket& first_node = _buckets[flatmap_mod(_hashfn(key), _nbucket)];
484 if (!first_node.is_valid()) {
485 return NULL;
486 }
487 if (_eql(first_node.element().first_ref(), key)) {
488 return &first_node.element().second_ref();
489 }
490 Bucket *p = first_node.next;
491 while (p) {
492 if (_eql(p->element().first_ref(), key)) {
493 return &p->element().second_ref();
494 }
495 p = p->next;
496 }
497 return NULL;
498}
499
500template <typename _K, typename _T, typename _H, typename _E, bool _S>
501_T& FlatMap<_K, _T, _H, _E, _S>::operator[](const key_type& key) {
502 const size_t index = flatmap_mod(_hashfn(key), _nbucket);
503 Bucket& first_node = _buckets[index];
504 if (!first_node.is_valid()) {
505 ++_size;
506 if (_S) {
507 bit_array_set(_thumbnail, index);
508 }
509 new (&first_node) Bucket(key);
510 return first_node.element().second_ref();
511 }
512 if (_eql(first_node.element().first_ref(), key)) {
513 return first_node.element().second_ref();
514 }
515 Bucket *p = first_node.next;
516 if (NULL == p) {
517 if (is_too_crowded(_size)) {
518 if (resize(_nbucket + 1)) {
519 return operator[](key);
520 }
521 // fail to resize is OK
522 }
523 ++_size;
524 Bucket* newp = new (_pool.get()) Bucket(key);
525 first_node.next = newp;
526 return newp->element().second_ref();
527 }
528 while (1) {
529 if (_eql(p->element().first_ref(), key)) {
530 return p->element().second_ref();
531 }
532 if (NULL == p->next) {
533 if (is_too_crowded(_size)) {
534 if (resize(_nbucket + 1)) {
535 return operator[](key);
536 }
537 // fail to resize is OK
538 }
539 ++_size;
540 Bucket* newp = new (_pool.get()) Bucket(key);
541 p->next = newp;
542 return newp->element().second_ref();
543 }
544 p = p->next;
545 }
546}
547
548template <typename _K, typename _T, typename _H, typename _E, bool _S>
549void FlatMap<_K, _T, _H, _E, _S>::save_iterator(
550 const const_iterator& it, PositionHint* hint) const {
551 hint->nbucket = _nbucket;
552 hint->offset = it._entry - _buckets;
553 if (it != end()) {
554 hint->at_entry = (it._entry == it._node);
555 hint->key = it->first;
556 } else {
557 hint->at_entry = false;
558 hint->key = key_type();
559 }
560}
561
562template <typename _K, typename _T, typename _H, typename _E, bool _S>
563typename FlatMap<_K, _T, _H, _E, _S>::const_iterator
564FlatMap<_K, _T, _H, _E, _S>::restore_iterator(const PositionHint& hint) const {
565 if (hint.nbucket != _nbucket/*resized*/ ||
566 hint.offset >= _nbucket/*invalid hint*/) {
567 return begin(); // restart
568 }
569 Bucket& first_node = _buckets[hint.offset];
570 if (hint.at_entry) {
571 return const_iterator(this, hint.offset);
572 }
573 if (!first_node.is_valid()) {
574 // All elements hashed to the entry were removed, try next entry.
575 return const_iterator(this, hint.offset + 1);
576 }
577 Bucket *p = &first_node;
578 do {
579 if (_eql(p->element().first_ref(), hint.key)) {
580 const_iterator it;
581 it._node = p;
582 it._entry = &first_node;
583 return it;
584 }
585 p = p->next;
586 } while (p);
587 // Last element that we iterated (and saved in PositionHint) was removed,
588 // don't know which element to start, just restart at the beginning of
589 // the entry. Some elements in the entry may be revisited, which
590 // shouldn't be often.
591 return const_iterator(this, hint.offset);
592}
593
594template <typename _K, typename _T, typename _H, typename _E, bool _S>
595bool FlatMap<_K, _T, _H, _E, _S>::resize(size_t nbucket2) {
596 nbucket2 = flatmap_round(nbucket2);
597 if (_nbucket == nbucket2) {
598 return false;
599 }
600
601 FlatMap new_map;
602 if (new_map.init(nbucket2, _load_factor) != 0) {
603 LOG(ERROR) << "Fail to init new_map, nbucket=" << nbucket2;
604 return false;
605 }
606 for (iterator it = begin(); it != end(); ++it) {
607 new_map[Element::first_ref_from_value(*it)] =
608 Element::second_ref_from_value(*it);
609 }
610 new_map.swap(*this);
611 return true;
612}
613
614template <typename _K, typename _T, typename _H, typename _E, bool _S>
615BucketInfo FlatMap<_K, _T, _H, _E, _S>::bucket_info() const {
616 size_t max_n = 0;
617 size_t nentry = 0;
618 for (size_t i = 0; i < _nbucket; ++i) {
619 if (_buckets[i].is_valid()) {
620 size_t n = 1;
621 for (Bucket* p = _buckets[i].next; p; p = p->next, ++n);
622 max_n = std::max(max_n, n);
623 ++nentry;
624 }
625 }
626 const BucketInfo info = { max_n, size() / (double)nentry };
627 return info;
628}
629
630inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) {
631 return os << "{maxb=" << info.longest_length
632 << " avgb=" << info.average_length << '}';
633}
634
635template <typename _K, typename _T, typename _H, typename _E, bool _S>
636typename FlatMap<_K, _T, _H, _E, _S>::iterator FlatMap<_K, _T, _H, _E, _S>::begin() {
637 return iterator(this, 0);
638}
639
640template <typename _K, typename _T, typename _H, typename _E, bool _S>
641typename FlatMap<_K, _T, _H, _E, _S>::iterator FlatMap<_K, _T, _H, _E, _S>::end() {
642 return iterator(this, _nbucket);
643}
644
645template <typename _K, typename _T, typename _H, typename _E, bool _S>
646typename FlatMap<_K, _T, _H, _E, _S>::const_iterator FlatMap<_K, _T, _H, _E, _S>::begin() const {
647 return const_iterator(this, 0);
648}
649
650template <typename _K, typename _T, typename _H, typename _E, bool _S>
651typename FlatMap<_K, _T, _H, _E, _S>::const_iterator FlatMap<_K, _T, _H, _E, _S>::end() const {
652 return const_iterator(this, _nbucket);
653}
654
655} // namespace butil
656
657#endif //BUTIL_FLAT_MAP_INL_H
658