1// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5#include "leveldb/filter_policy.h"
6
7#include "leveldb/slice.h"
8#include "util/hash.h"
9
10namespace leveldb {
11
12namespace {
13static uint32_t BloomHash(const Slice& key) {
14 return Hash(key.data(), key.size(), 0xbc9f1d34);
15}
16
17class BloomFilterPolicy : public FilterPolicy {
18 public:
19 explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
20 // We intentionally round down to reduce probing cost a little bit
21 k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
22 if (k_ < 1) k_ = 1;
23 if (k_ > 30) k_ = 30;
24 }
25
26 const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
27
28 void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
29 // Compute bloom filter size (in both bits and bytes)
30 size_t bits = n * bits_per_key_;
31
32 // For small n, we can see a very high false positive rate. Fix it
33 // by enforcing a minimum bloom filter length.
34 if (bits < 64) bits = 64;
35
36 size_t bytes = (bits + 7) / 8;
37 bits = bytes * 8;
38
39 const size_t init_size = dst->size();
40 dst->resize(init_size + bytes, 0);
41 dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
42 char* array = &(*dst)[init_size];
43 for (int i = 0; i < n; i++) {
44 // Use double-hashing to generate a sequence of hash values.
45 // See analysis in [Kirsch,Mitzenmacher 2006].
46 uint32_t h = BloomHash(keys[i]);
47 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
48 for (size_t j = 0; j < k_; j++) {
49 const uint32_t bitpos = h % bits;
50 array[bitpos / 8] |= (1 << (bitpos % 8));
51 h += delta;
52 }
53 }
54 }
55
56 bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
57 const size_t len = bloom_filter.size();
58 if (len < 2) return false;
59
60 const char* array = bloom_filter.data();
61 const size_t bits = (len - 1) * 8;
62
63 // Use the encoded k so that we can read filters generated by
64 // bloom filters created using different parameters.
65 const size_t k = array[len - 1];
66 if (k > 30) {
67 // Reserved for potentially new encodings for short bloom filters.
68 // Consider it a match.
69 return true;
70 }
71
72 uint32_t h = BloomHash(key);
73 const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
74 for (size_t j = 0; j < k; j++) {
75 const uint32_t bitpos = h % bits;
76 if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
77 h += delta;
78 }
79 return true;
80 }
81
82 private:
83 size_t bits_per_key_;
84 size_t k_;
85};
86} // namespace
87
88const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
89 return new BloomFilterPolicy(bits_per_key);
90}
91
92} // namespace leveldb
93