1// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5#include "gtest/gtest.h"
6#include "leveldb/filter_policy.h"
7#include "util/coding.h"
8#include "util/logging.h"
9#include "util/testutil.h"
10
11namespace leveldb {
12
13static const int kVerbose = 1;
14
15static Slice Key(int i, char* buffer) {
16 EncodeFixed32(buffer, i);
17 return Slice(buffer, sizeof(uint32_t));
18}
19
20class BloomTest : public testing::Test {
21 public:
22 BloomTest() : policy_(NewBloomFilterPolicy(10)) {}
23
24 ~BloomTest() { delete policy_; }
25
26 void Reset() {
27 keys_.clear();
28 filter_.clear();
29 }
30
31 void Add(const Slice& s) { keys_.push_back(s.ToString()); }
32
33 void Build() {
34 std::vector<Slice> key_slices;
35 for (size_t i = 0; i < keys_.size(); i++) {
36 key_slices.push_back(Slice(keys_[i]));
37 }
38 filter_.clear();
39 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
40 &filter_);
41 keys_.clear();
42 if (kVerbose >= 2) DumpFilter();
43 }
44
45 size_t FilterSize() const { return filter_.size(); }
46
47 void DumpFilter() {
48 std::fprintf(stderr, "F(");
49 for (size_t i = 0; i + 1 < filter_.size(); i++) {
50 const unsigned int c = static_cast<unsigned int>(filter_[i]);
51 for (int j = 0; j < 8; j++) {
52 std::fprintf(stderr, "%c", (c & (1 << j)) ? '1' : '.');
53 }
54 }
55 std::fprintf(stderr, ")\n");
56 }
57
58 bool Matches(const Slice& s) {
59 if (!keys_.empty()) {
60 Build();
61 }
62 return policy_->KeyMayMatch(s, filter_);
63 }
64
65 double FalsePositiveRate() {
66 char buffer[sizeof(int)];
67 int result = 0;
68 for (int i = 0; i < 10000; i++) {
69 if (Matches(Key(i + 1000000000, buffer))) {
70 result++;
71 }
72 }
73 return result / 10000.0;
74 }
75
76 private:
77 const FilterPolicy* policy_;
78 std::string filter_;
79 std::vector<std::string> keys_;
80};
81
82TEST_F(BloomTest, EmptyFilter) {
83 ASSERT_TRUE(!Matches("hello"));
84 ASSERT_TRUE(!Matches("world"));
85}
86
87TEST_F(BloomTest, Small) {
88 Add("hello");
89 Add("world");
90 ASSERT_TRUE(Matches("hello"));
91 ASSERT_TRUE(Matches("world"));
92 ASSERT_TRUE(!Matches("x"));
93 ASSERT_TRUE(!Matches("foo"));
94}
95
96static int NextLength(int length) {
97 if (length < 10) {
98 length += 1;
99 } else if (length < 100) {
100 length += 10;
101 } else if (length < 1000) {
102 length += 100;
103 } else {
104 length += 1000;
105 }
106 return length;
107}
108
109TEST_F(BloomTest, VaryingLengths) {
110 char buffer[sizeof(int)];
111
112 // Count number of filters that significantly exceed the false positive rate
113 int mediocre_filters = 0;
114 int good_filters = 0;
115
116 for (int length = 1; length <= 10000; length = NextLength(length)) {
117 Reset();
118 for (int i = 0; i < length; i++) {
119 Add(Key(i, buffer));
120 }
121 Build();
122
123 ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40))
124 << length;
125
126 // All added keys must match
127 for (int i = 0; i < length; i++) {
128 ASSERT_TRUE(Matches(Key(i, buffer)))
129 << "Length " << length << "; key " << i;
130 }
131
132 // Check false positive rate
133 double rate = FalsePositiveRate();
134 if (kVerbose >= 1) {
135 std::fprintf(stderr,
136 "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
137 rate * 100.0, length, static_cast<int>(FilterSize()));
138 }
139 ASSERT_LE(rate, 0.02); // Must not be over 2%
140 if (rate > 0.0125)
141 mediocre_filters++; // Allowed, but not too often
142 else
143 good_filters++;
144 }
145 if (kVerbose >= 1) {
146 std::fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
147 mediocre_filters);
148 }
149 ASSERT_LE(mediocre_filters, good_filters / 5);
150}
151
152// Different bits-per-byte
153
154} // namespace leveldb
155