1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Date: Wed Nov 27 12:59:20 CST 2013
19
20// This closed addressing hash-map puts first linked node in bucket array
21// directly to save an extra memory indirection. As a result, this map yields
22// close performance to raw array on nearly all operations, probably being the
23// fastest hashmap for small-sized key/value ever.
24//
25// Performance comparisons between several maps:
26// [ value = 8 bytes ]
27// Sequentially inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 11/108/55/58
28// Sequentially erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 7/123/55/37
29// Sequentially inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 10/92/55/54
30// Sequentially erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/67/51/35
31// Sequentially inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 10/100/66/54
32// Sequentially erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/72/55/35
33// [ value = 32 bytes ]
34// Sequentially inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 14/108/56/56
35// Sequentially erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/77/53/38
36// Sequentially inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 14/94/54/53
37// Sequentially erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/66/49/36
38// Sequentially inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 13/106/62/54
39// Sequentially erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/69/53/36
40// [ value = 128 bytes ]
41// Sequentially inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 31/182/96/96
42// Sequentially erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 8/117/51/44
43// Sequentially inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 29/191/100/97
44// Sequentially erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/100/49/44
45// Sequentially inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 30/184/113/114
46// Sequentially erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/99/52/43
47// [ value = 8 bytes ]
48// Randomly inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 11/171/108/60
49// Randomly erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 8/158/126/37
50// Randomly inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 10/159/117/54
51// Randomly erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/153/135/36
52// Randomly inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 12/223/180/55
53// Randomly erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 7/237/210/48
54// [ value = 32 bytes ]
55// Randomly inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 16/179/108/57
56// Randomly erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 5/157/120/38
57// Randomly inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 15/168/127/54
58// Randomly erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 5/164/135/39
59// Randomly inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 19/241/201/56
60// Randomly erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 5/235/218/54
61// [ value = 128 bytes ]
62// Randomly inserting 100 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 35/242/154/97
63// Randomly erasing 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 7/185/119/56
64// Randomly inserting 1000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 35/262/182/99
65// Randomly erasing 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/215/157/66
66// Randomly inserting 10000 into FlatMap/std::map/butil::PooledMap/butil::hash_map takes 44/330/278/114
67// Randomly erasing 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/307/242/90
68// [ value = 8 bytes ]
69// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 6/51/52/13
70// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/98/82/14
71// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/175/170/14
72// [ value = 32 bytes ]
73// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/52/52/14
74// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/84/82/13
75// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/164/156/14
76// [ value = 128 bytes ]
77// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/54/53/14
78// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/88/90/13
79// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/178/185/14
80// [ value = 8 bytes ]
81// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 5/51/49/14
82// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/86/94/14
83// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/177/171/14
84// [ value = 32 bytes ]
85// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/51/53/14
86// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/98/82/13
87// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/163/156/14
88// [ value = 128 bytes ]
89// Seeking 100 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 3/55/53/14
90// Seeking 1000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/88/89/13
91// Seeking 10000 from FlatMap/std::map/butil::PooledMap/butil::hash_map takes 4/177/185/14
92
93#ifndef BUTIL_FLAT_MAP_H
94#define BUTIL_FLAT_MAP_H
95
96#include <stdint.h>
97#include <functional>
98#include <iostream> // std::ostream
99#include "butil/type_traits.h"
100#include "butil/logging.h"
101#include "butil/find_cstr.h"
102#include "butil/single_threaded_pool.h" // SingleThreadedPool
103#include "butil/containers/hash_tables.h" // hash<>
104#include "butil/bit_array.h" // bit_array_*
105#include "butil/strings/string_piece.h" // StringPiece
106
107namespace butil {
108
109template <typename _Map, typename _Element> class FlatMapIterator;
110template <typename _Map, typename _Element> class SparseFlatMapIterator;
111template <typename K, typename T> class FlatMapElement;
112struct FlatMapVoid {}; // Replace void which is not constructible.
113template <typename K> struct DefaultHasher;
114template <typename K> struct DefaultEqualTo;
115
116struct BucketInfo {
117 size_t longest_length;
118 double average_length;
119};
120
121// NOTE: Objects stored in FlatMap MUST be copyable.
122template <typename _K, typename _T,
123 // Compute hash code from key.
124 // Use src/butil/third_party/murmurhash3 to make better distributions.
125 typename _Hash = DefaultHasher<_K>,
126 // Test equivalence between stored-key and passed-key.
127 // stored-key is always on LHS, passed-key is always on RHS.
128 typename _Equal = DefaultEqualTo<_K>,
129 bool _Sparse = false>
130class FlatMap {
131public:
132 typedef _K key_type;
133 typedef _T mapped_type;
134 typedef FlatMapElement<_K, _T> Element;
135 typedef typename Element::value_type value_type;
136 typedef typename conditional<
137 _Sparse, SparseFlatMapIterator<FlatMap, value_type>,
138 FlatMapIterator<FlatMap, value_type> >::type iterator;
139 typedef typename conditional<
140 _Sparse, SparseFlatMapIterator<FlatMap, const value_type>,
141 FlatMapIterator<FlatMap, const value_type> >::type const_iterator;
142 typedef _Hash hasher;
143 typedef _Equal key_equal;
144 struct PositionHint {
145 size_t nbucket;
146 size_t offset;
147 bool at_entry;
148 key_type key;
149 };
150
151 FlatMap(const hasher& hashfn = hasher(), const key_equal& eql = key_equal());
152 ~FlatMap();
153 FlatMap(const FlatMap& rhs);
154 void operator=(const FlatMap& rhs);
155 void swap(FlatMap & rhs);
156
157 // Must be called to initialize this map, otherwise insert/operator[]
158 // crashes, and seek/erase fails.
159 // `nbucket' is the initial number of buckets. `load_factor' is the
160 // maximum value of size()*100/nbucket, if the value is reached, nbucket
161 // will be doubled and all items stored will be rehashed which is costly.
162 // Choosing proper values for these 2 parameters reduces costs.
163 int init(size_t nbucket, u_int load_factor = 80);
164
165 // Insert a pair of |key| and |value|. If size()*100/bucket_count() is
166 // more than load_factor(), a resize() will be done.
167 // Returns address of the inserted value, NULL on error.
168 mapped_type* insert(const key_type& key, const mapped_type& value);
169
170 // Remove |key| and the associated value
171 // Returns: 1 on erased, 0 otherwise.
172 // Remove all items. Allocated spaces are NOT returned by system.
173 template <typename K2>
174 size_t erase(const K2& key, mapped_type* old_value = NULL);
175
176 void clear();
177
178 // Remove all items and return all allocated spaces to system.
179 void clear_and_reset_pool();
180
181 // Search for the value associated with |key|
182 // Returns: address of the value
183 template <typename K2> mapped_type* seek(const K2& key) const;
184
185 // Get the value associated with |key|. If |key| does not exist,
186 // insert with a default-constructed value. If size()*100/bucket_count()
187 // is more than load_factor, a resize will be done.
188 // Returns reference of the value
189 mapped_type& operator[](const key_type& key);
190
191 // Resize this map. This is optional because resizing will be triggered by
192 // insert() or operator[] if there're too many items.
193 // Returns successful or not.
194 bool resize(size_t nbucket);
195
196 // Iterators
197 iterator begin();
198 iterator end();
199 const_iterator begin() const;
200 const_iterator end() const;
201
202 // Iterate FlatMap inconsistently in more-than-one passes. This is used
203 // in multi-threaded environment to divide the critical sections of
204 // iterating big maps into smaller ones. "inconsistently" means that:
205 // * elements added during iteration may be missed.
206 // * some elements may be iterated more than once.
207 // * iteration is restarted at beginning when the map is resized.
208 // Example: (copying all keys in multi-threaded environment)
209 // LOCK;
210 // size_t n = 0;
211 // for (Map::const_iterator it = map.begin(); it != map.end(); ++it) {
212 // if (++n >= 256/*max iterated one pass*/) {
213 // Map::PositionHint hint;
214 // map.save_iterator(it, &hint);
215 // n = 0;
216 // UNLOCK;
217 // LOCK;
218 // it = map.restore_iterator(hint);
219 // if (it == map.begin()) { // resized
220 // keys->clear();
221 // }
222 // if (it == map.end()) {
223 // break;
224 // }
225 // }
226 // keys->push_back(it->first);
227 // }
228 // UNLOCK;
229 void save_iterator(const const_iterator&, PositionHint*) const;
230 const_iterator restore_iterator(const PositionHint&) const;
231
232 // True if init() was successfully called.
233 bool initialized() const { return _buckets != NULL; }
234
235 bool empty() const { return _size == 0; }
236 size_t size() const { return _size; }
237 size_t bucket_count() const { return _nbucket; }
238 u_int load_factor () const { return _load_factor; }
239
240 // Returns #nodes of longest bucket in this map. This scans all buckets.
241 BucketInfo bucket_info() const;
242
243 struct Bucket {
244 explicit Bucket(const _K& k) : next(NULL)
245 { new (element_spaces) Element(k); }
246 Bucket(const Bucket& other) : next(NULL)
247 { new (element_spaces) Element(other.element()); }
248 bool is_valid() const { return next != (const Bucket*)-1UL; }
249 void set_invalid() { next = (Bucket*)-1UL; }
250 // NOTE: Only be called when in_valid() is true.
251 Element& element() {
252 void* spaces = element_spaces; // Suppress strict-aliasing
253 return *reinterpret_cast<Element*>(spaces);
254 }
255 const Element& element() const {
256 const void* spaces = element_spaces;
257 return *reinterpret_cast<const Element*>(spaces);
258 }
259 Bucket* next;
260 char element_spaces[sizeof(Element)];
261 };
262
263private:
264template <typename _Map, typename _Element> friend class FlatMapIterator;
265template <typename _Map, typename _Element> friend class SparseFlatMapIterator;
266 // True if buckets need to be resized before holding `size' elements.
267 inline bool is_too_crowded(size_t size) const
268 { return size * 100 >= _nbucket * _load_factor; }
269
270 size_t _size;
271 size_t _nbucket;
272 Bucket* _buckets;
273 uint64_t* _thumbnail;
274 u_int _load_factor;
275 hasher _hashfn;
276 key_equal _eql;
277 SingleThreadedPool<sizeof(Bucket), 1024, 16> _pool;
278};
279
280template <typename _K,
281 typename _Hash = DefaultHasher<_K>,
282 typename _Equal = DefaultEqualTo<_K>,
283 bool _Sparse = false>
284class FlatSet {
285public:
286 typedef FlatMap<_K, FlatMapVoid, _Hash, _Equal, _Sparse> Map;
287 typedef typename Map::key_type key_type;
288 typedef typename Map::value_type value_type;
289 typedef typename Map::Bucket Bucket;
290 typedef typename Map::iterator iterator;
291 typedef typename Map::const_iterator const_iterator;
292 typedef typename Map::hasher hasher;
293 typedef typename Map::key_equal key_equal;
294
295 FlatSet(const hasher& hashfn = hasher(), const key_equal& eql = key_equal())
296 : _map(hashfn, eql) {}
297 void swap(FlatSet & rhs) { _map.swap(rhs._map); }
298
299 int init(size_t nbucket, u_int load_factor = 80)
300 { return _map.init(nbucket, load_factor); }
301
302 const void* insert(const key_type& key)
303 { return _map.insert(key, FlatMapVoid()); }
304
305 template <typename K2>
306 size_t erase(const K2& key) { return _map.erase(key, NULL); }
307
308 void clear() { return _map.clear(); }
309 void clear_and_reset_pool() { return _map.clear_and_reset_pool(); }
310
311 template <typename K2>
312 const void* seek(const K2& key) const { return _map.seek(key); }
313
314 bool resize(size_t nbucket) { return _map.resize(nbucket); }
315
316 iterator begin() { return _map.begin(); }
317 iterator end() { return _map.end(); }
318 const_iterator begin() const { return _map.begin(); }
319 const_iterator end() const { return _map.end(); }
320
321 bool initialized() const { return _map.initialized(); }
322 bool empty() const { return _map.empty(); }
323 size_t size() const { return _map.size(); }
324 size_t bucket_count() const { return _map.bucket_count(); }
325 u_int load_factor () const { return _map.load_factor(); }
326 BucketInfo bucket_info() const { return _map.bucket_info(); }
327
328private:
329 Map _map;
330};
331
332template <typename _K, typename _T,
333 typename _Hash = DefaultHasher<_K>,
334 typename _Equal = DefaultEqualTo<_K> >
335class SparseFlatMap : public FlatMap<_K, _T, _Hash, _Equal, true> {
336};
337
338template <typename _K,
339 typename _Hash = DefaultHasher<_K>,
340 typename _Equal = DefaultEqualTo<_K> >
341class SparseFlatSet : public FlatSet<_K, _Hash, _Equal, true> {
342};
343
344// Implement FlatMapElement
345template <typename K, typename T>
346class FlatMapElement {
347public:
348 typedef std::pair<const K, T> value_type;
349 // NOTE: Have to initialize _value in this way which is treated by GCC
350 // specially that _value is zeroized(POD) or constructed(non-POD). Other
351 // methods do not work. For example, if we put _value into the std::pair
352 // and do initialization by calling _pair(k, T()), _value will be copy
353 // constructed from the defaultly constructed instance(not zeroized for
354 // POD) which is wrong generally.
355 explicit FlatMapElement(const K& k) : _key(k), _value(T()) {}
356 // ^^^^^^^^^^^
357 const K& first_ref() const { return _key; }
358 T& second_ref() { return _value; }
359 value_type& value_ref() { return *reinterpret_cast<value_type*>(this); }
360 inline static const K& first_ref_from_value(const value_type& v)
361 { return v.first; }
362 inline static const T& second_ref_from_value(const value_type& v)
363 { return v.second; }
364private:
365 const K _key;
366 T _value;
367};
368
369template <typename K>
370class FlatMapElement<K, FlatMapVoid> {
371public:
372 typedef const K value_type;
373 explicit FlatMapElement(const K& k) : _key(k) {}
374 const K& first_ref() const { return _key; }
375 FlatMapVoid& second_ref() { return second_ref_from_value(_key); }
376 value_type& value_ref() { return _key; }
377 inline static const K& first_ref_from_value(value_type& v) { return v; }
378 inline static FlatMapVoid& second_ref_from_value(value_type&) {
379 static FlatMapVoid dummy;
380 return dummy;
381 }
382private:
383 K _key;
384};
385
386// Implement DefaultHasher and DefaultEqualTo
387template <typename K>
388struct DefaultHasher : public BUTIL_HASH_NAMESPACE::hash<K> {
389};
390
391template <>
392struct DefaultHasher<std::string> {
393 std::size_t operator()(const butil::StringPiece& s) const {
394 std::size_t result = 0;
395 for (butil::StringPiece::const_iterator
396 i = s.begin(); i != s.end(); ++i) {
397 result = result * 101 + *i;
398 }
399 return result;
400 }
401 std::size_t operator()(const char* s) const {
402 std::size_t result = 0;
403 for (; *s; ++s) {
404 result = result * 101 + *s;
405 }
406 return result;
407 }
408 std::size_t operator()(const std::string& s) const {
409 std::size_t result = 0;
410 for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
411 result = result * 101 + *i;
412 }
413 return result;
414 }
415};
416
417template <typename K>
418struct DefaultEqualTo : public std::equal_to<K> {
419};
420
421template <>
422struct DefaultEqualTo<std::string> {
423 bool operator()(const std::string& s1, const std::string& s2) const
424 { return s1 == s2; }
425 bool operator()(const std::string& s1, const butil::StringPiece& s2) const
426 { return s1 == s2; }
427 bool operator()(const std::string& s1, const char* s2) const
428 { return s1 == s2; }
429};
430
431// find_cstr and find_lowered_cstr
432template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
433const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
434 const char* key) {
435 return m.seek(key);
436}
437
438template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
439_T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
440 const char* key) {
441 return m.seek(key);
442}
443
444template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
445const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
446 const char* key, size_t length) {
447 return m.seek(butil::StringPiece(key, length));
448}
449
450template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
451_T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
452 const char* key, size_t length) {
453 return m.seek(butil::StringPiece(key, length));
454}
455
456template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
457const _T* find_lowered_cstr(
458 const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
459 const char* key) {
460 return m.seek(*tls_stringmap_temp.get_lowered_string(key));
461}
462
463template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
464_T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
465 const char* key) {
466 return m.seek(*tls_stringmap_temp.get_lowered_string(key));
467}
468
469template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
470const _T* find_lowered_cstr(
471 const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
472 const char* key, size_t length) {
473 return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
474}
475
476template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
477_T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
478 const char* key, size_t length) {
479 return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
480}
481
482} // namespace butil
483
484#include "butil/containers/flat_map_inl.h"
485
486#endif //BUTIL_FLAT_MAP_H
487