1 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | |
5 | #include "gtest/gtest.h" |
6 | #include "leveldb/filter_policy.h" |
7 | #include "util/coding.h" |
8 | #include "util/logging.h" |
9 | #include "util/testutil.h" |
10 | |
11 | namespace leveldb { |
12 | |
13 | static const int kVerbose = 1; |
14 | |
15 | static Slice Key(int i, char* buffer) { |
16 | EncodeFixed32(buffer, i); |
17 | return Slice(buffer, sizeof(uint32_t)); |
18 | } |
19 | |
20 | class BloomTest : public testing::Test { |
21 | public: |
22 | BloomTest() : policy_(NewBloomFilterPolicy(10)) {} |
23 | |
24 | ~BloomTest() { delete policy_; } |
25 | |
26 | void Reset() { |
27 | keys_.clear(); |
28 | filter_.clear(); |
29 | } |
30 | |
31 | void Add(const Slice& s) { keys_.push_back(s.ToString()); } |
32 | |
33 | void Build() { |
34 | std::vector<Slice> key_slices; |
35 | for (size_t i = 0; i < keys_.size(); i++) { |
36 | key_slices.push_back(Slice(keys_[i])); |
37 | } |
38 | filter_.clear(); |
39 | policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()), |
40 | &filter_); |
41 | keys_.clear(); |
42 | if (kVerbose >= 2) DumpFilter(); |
43 | } |
44 | |
45 | size_t FilterSize() const { return filter_.size(); } |
46 | |
47 | void DumpFilter() { |
48 | std::fprintf(stderr, "F(" ); |
49 | for (size_t i = 0; i + 1 < filter_.size(); i++) { |
50 | const unsigned int c = static_cast<unsigned int>(filter_[i]); |
51 | for (int j = 0; j < 8; j++) { |
52 | std::fprintf(stderr, "%c" , (c & (1 << j)) ? '1' : '.'); |
53 | } |
54 | } |
55 | std::fprintf(stderr, ")\n" ); |
56 | } |
57 | |
58 | bool Matches(const Slice& s) { |
59 | if (!keys_.empty()) { |
60 | Build(); |
61 | } |
62 | return policy_->KeyMayMatch(s, filter_); |
63 | } |
64 | |
65 | double FalsePositiveRate() { |
66 | char buffer[sizeof(int)]; |
67 | int result = 0; |
68 | for (int i = 0; i < 10000; i++) { |
69 | if (Matches(Key(i + 1000000000, buffer))) { |
70 | result++; |
71 | } |
72 | } |
73 | return result / 10000.0; |
74 | } |
75 | |
76 | private: |
77 | const FilterPolicy* policy_; |
78 | std::string filter_; |
79 | std::vector<std::string> keys_; |
80 | }; |
81 | |
82 | TEST_F(BloomTest, EmptyFilter) { |
83 | ASSERT_TRUE(!Matches("hello" )); |
84 | ASSERT_TRUE(!Matches("world" )); |
85 | } |
86 | |
87 | TEST_F(BloomTest, Small) { |
88 | Add("hello" ); |
89 | Add("world" ); |
90 | ASSERT_TRUE(Matches("hello" )); |
91 | ASSERT_TRUE(Matches("world" )); |
92 | ASSERT_TRUE(!Matches("x" )); |
93 | ASSERT_TRUE(!Matches("foo" )); |
94 | } |
95 | |
96 | static int NextLength(int length) { |
97 | if (length < 10) { |
98 | length += 1; |
99 | } else if (length < 100) { |
100 | length += 10; |
101 | } else if (length < 1000) { |
102 | length += 100; |
103 | } else { |
104 | length += 1000; |
105 | } |
106 | return length; |
107 | } |
108 | |
109 | TEST_F(BloomTest, VaryingLengths) { |
110 | char buffer[sizeof(int)]; |
111 | |
112 | // Count number of filters that significantly exceed the false positive rate |
113 | int mediocre_filters = 0; |
114 | int good_filters = 0; |
115 | |
116 | for (int length = 1; length <= 10000; length = NextLength(length)) { |
117 | Reset(); |
118 | for (int i = 0; i < length; i++) { |
119 | Add(Key(i, buffer)); |
120 | } |
121 | Build(); |
122 | |
123 | ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40)) |
124 | << length; |
125 | |
126 | // All added keys must match |
127 | for (int i = 0; i < length; i++) { |
128 | ASSERT_TRUE(Matches(Key(i, buffer))) |
129 | << "Length " << length << "; key " << i; |
130 | } |
131 | |
132 | // Check false positive rate |
133 | double rate = FalsePositiveRate(); |
134 | if (kVerbose >= 1) { |
135 | std::fprintf(stderr, |
136 | "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n" , |
137 | rate * 100.0, length, static_cast<int>(FilterSize())); |
138 | } |
139 | ASSERT_LE(rate, 0.02); // Must not be over 2% |
140 | if (rate > 0.0125) |
141 | mediocre_filters++; // Allowed, but not too often |
142 | else |
143 | good_filters++; |
144 | } |
145 | if (kVerbose >= 1) { |
146 | std::fprintf(stderr, "Filters: %d good, %d mediocre\n" , good_filters, |
147 | mediocre_filters); |
148 | } |
149 | ASSERT_LE(mediocre_filters, good_filters / 5); |
150 | } |
151 | |
152 | // Different bits-per-byte |
153 | |
154 | } // namespace leveldb |
155 | |