1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | |
5 | #include "leveldb/filter_policy.h" |
6 | |
7 | #include "leveldb/slice.h" |
8 | #include "util/hash.h" |
9 | |
10 | namespace leveldb { |
11 | |
12 | namespace { |
13 | static uint32_t BloomHash(const Slice& key) { |
14 | return Hash(key.data(), key.size(), 0xbc9f1d34); |
15 | } |
16 | |
17 | class BloomFilterPolicy : public FilterPolicy { |
18 | public: |
19 | explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) { |
20 | // We intentionally round down to reduce probing cost a little bit |
21 | k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2) |
22 | if (k_ < 1) k_ = 1; |
23 | if (k_ > 30) k_ = 30; |
24 | } |
25 | |
26 | const char* Name() const override { return "leveldb.BuiltinBloomFilter2" ; } |
27 | |
28 | void CreateFilter(const Slice* keys, int n, std::string* dst) const override { |
29 | // Compute bloom filter size (in both bits and bytes) |
30 | size_t bits = n * bits_per_key_; |
31 | |
32 | // For small n, we can see a very high false positive rate. Fix it |
33 | // by enforcing a minimum bloom filter length. |
34 | if (bits < 64) bits = 64; |
35 | |
36 | size_t bytes = (bits + 7) / 8; |
37 | bits = bytes * 8; |
38 | |
39 | const size_t init_size = dst->size(); |
40 | dst->resize(init_size + bytes, 0); |
41 | dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter |
42 | char* array = &(*dst)[init_size]; |
43 | for (int i = 0; i < n; i++) { |
44 | // Use double-hashing to generate a sequence of hash values. |
45 | // See analysis in [Kirsch,Mitzenmacher 2006]. |
46 | uint32_t h = BloomHash(keys[i]); |
47 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
48 | for (size_t j = 0; j < k_; j++) { |
49 | const uint32_t bitpos = h % bits; |
50 | array[bitpos / 8] |= (1 << (bitpos % 8)); |
51 | h += delta; |
52 | } |
53 | } |
54 | } |
55 | |
56 | bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override { |
57 | const size_t len = bloom_filter.size(); |
58 | if (len < 2) return false; |
59 | |
60 | const char* array = bloom_filter.data(); |
61 | const size_t bits = (len - 1) * 8; |
62 | |
63 | // Use the encoded k so that we can read filters generated by |
64 | // bloom filters created using different parameters. |
65 | const size_t k = array[len - 1]; |
66 | if (k > 30) { |
67 | // Reserved for potentially new encodings for short bloom filters. |
68 | // Consider it a match. |
69 | return true; |
70 | } |
71 | |
72 | uint32_t h = BloomHash(key); |
73 | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
74 | for (size_t j = 0; j < k; j++) { |
75 | const uint32_t bitpos = h % bits; |
76 | if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false; |
77 | h += delta; |
78 | } |
79 | return true; |
80 | } |
81 | |
82 | private: |
83 | size_t bits_per_key_; |
84 | size_t k_; |
85 | }; |
86 | } // namespace |
87 | |
88 | const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { |
89 | return new BloomFilterPolicy(bits_per_key); |
90 | } |
91 | |
92 | } // namespace leveldb |
93 | |