bloom.cc source code [leveldb/util/bloom.cc]

1	// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style license that can be
3	// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5	#include "leveldb/filter_policy.h"
6
7	#include "leveldb/slice.h"
8	#include "util/hash.h"
9
10	namespace leveldb {
11
12	namespace {
13	static uint32_t BloomHash(const Slice& key) {
14	return Hash(key.data(), key.size(), `0xbc9f1d34`);
15	}
16
17	class BloomFilterPolicy : public FilterPolicy {
18	public:
19	explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
20	// We intentionally round down to reduce probing cost a little bit
21	k_ = static_cast<size_t>(bits_per_key * `0.69`); // 0.69 =~ ln(2)
22	if (k_ < `1`) k_ = `1`;
23	if (k_ > `30`) k_ = `30`;
24	}
25
26	const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
27
28	void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
29	// Compute bloom filter size (in both bits and bytes)
30	size_t bits = n * bits_per_key_;
31
32	// For small n, we can see a very high false positive rate. Fix it
33	// by enforcing a minimum bloom filter length.
34	if (bits < `64`) bits = `64`;
35
36	size_t bytes = (bits + `7`) / `8`;
37	bits = bytes * `8`;
38
39	const size_t init_size = dst->size();
40	dst->resize(init_size + bytes, `0`);
41	dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
42	char* array = &(*dst)[init_size];
43	for (int i = `0`; i < n; i++) {
44	// Use double-hashing to generate a sequence of hash values.
45	// See analysis in [Kirsch,Mitzenmacher 2006].
46	uint32_t h = BloomHash(keys[i]);
47	const uint32_t delta = (h >> `17`) \| (h << `15`); // Rotate right 17 bits
48	for (size_t j = `0`; j < k_; j++) {
49	const uint32_t bitpos = h % bits;
50	array[bitpos / `8`] \|= (`1` << (bitpos % `8`));
51	h += delta;
52	}
53	}
54	}
55
56	bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
57	const size_t len = bloom_filter.size();
58	if (len < `2`) return false;
59
60	const char* array = bloom_filter.data();
61	const size_t bits = (len - `1`) * `8`;
62
63	// Use the encoded k so that we can read filters generated by
64	// bloom filters created using different parameters.
65	const size_t k = array[len - `1`];
66	if (k > `30`) {
67	// Reserved for potentially new encodings for short bloom filters.
68	// Consider it a match.
69	return true;
70	}
71
72	uint32_t h = BloomHash(key);
73	const uint32_t delta = (h >> `17`) \| (h << `15`); // Rotate right 17 bits
74	for (size_t j = `0`; j < k; j++) {
75	const uint32_t bitpos = h % bits;
76	if ((array[bitpos / `8`] & (`1` << (bitpos % `8`))) == `0`) return false;
77	h += delta;
78	}
79	return true;
80	}
81
82	private:
83	size_t bits_per_key_;
84	size_t k_;
85	};
86	} // namespace
87
88	const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
89	return new BloomFilterPolicy (bits_per_key);
90	}
91
92	} // namespace leveldb
93

Browse the source code of leveldb/util/bloom.cc