1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5#include "leveldb/cache.h"
6
7#include <cassert>
8#include <cstdio>
9#include <cstdlib>
10
11#include "port/port.h"
12#include "port/thread_annotations.h"
13#include "util/hash.h"
14#include "util/mutexlock.h"
15
16namespace leveldb {
17
18Cache::~Cache() {}
19
20namespace {
21
22// LRU cache implementation
23//
24// Cache entries have an "in_cache" boolean indicating whether the cache has a
25// reference on the entry. The only ways that this can become false without the
26// entry being passed to its "deleter" are via Erase(), via Insert() when
27// an element with a duplicate key is inserted, or on destruction of the cache.
28//
29// The cache keeps two linked lists of items in the cache. All items in the
30// cache are in one list or the other, and never both. Items still referenced
31// by clients but erased from the cache are in neither list. The lists are:
32// - in-use: contains the items currently referenced by clients, in no
33// particular order. (This list is used for invariant checking. If we
34// removed the check, elements that would otherwise be on this list could be
35// left as disconnected singleton lists.)
36// - LRU: contains the items not currently referenced by clients, in LRU order
37// Elements are moved between these lists by the Ref() and Unref() methods,
38// when they detect an element in the cache acquiring or losing its only
39// external reference.
40
41// An entry is a variable length heap-allocated structure. Entries
42// are kept in a circular doubly linked list ordered by access time.
43struct LRUHandle {
44 void* value;
45 void (*deleter)(const Slice&, void* value);
46 LRUHandle* next_hash;
47 LRUHandle* next;
48 LRUHandle* prev;
49 size_t charge; // TODO(opt): Only allow uint32_t?
50 size_t key_length;
51 bool in_cache; // Whether entry is in the cache.
52 uint32_t refs; // References, including cache reference, if present.
53 uint32_t hash; // Hash of key(); used for fast sharding and comparisons
54 char key_data[1]; // Beginning of key
55
56 Slice key() const {
57 // next is only equal to this if the LRU handle is the list head of an
58 // empty list. List heads never have meaningful keys.
59 assert(next != this);
60
61 return Slice(key_data, key_length);
62 }
63};
64
65// We provide our own simple hash table since it removes a whole bunch
66// of porting hacks and is also faster than some of the built-in hash
67// table implementations in some of the compiler/runtime combinations
68// we have tested. E.g., readrandom speeds up by ~5% over the g++
69// 4.4.3's builtin hashtable.
70class HandleTable {
71 public:
72 HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
73 ~HandleTable() { delete[] list_; }
74
75 LRUHandle* Lookup(const Slice& key, uint32_t hash) {
76 return *FindPointer(key, hash);
77 }
78
79 LRUHandle* Insert(LRUHandle* h) {
80 LRUHandle** ptr = FindPointer(h->key(), h->hash);
81 LRUHandle* old = *ptr;
82 h->next_hash = (old == nullptr ? nullptr : old->next_hash);
83 *ptr = h;
84 if (old == nullptr) {
85 ++elems_;
86 if (elems_ > length_) {
87 // Since each cache entry is fairly large, we aim for a small
88 // average linked list length (<= 1).
89 Resize();
90 }
91 }
92 return old;
93 }
94
95 LRUHandle* Remove(const Slice& key, uint32_t hash) {
96 LRUHandle** ptr = FindPointer(key, hash);
97 LRUHandle* result = *ptr;
98 if (result != nullptr) {
99 *ptr = result->next_hash;
100 --elems_;
101 }
102 return result;
103 }
104
105 private:
106 // The table consists of an array of buckets where each bucket is
107 // a linked list of cache entries that hash into the bucket.
108 uint32_t length_;
109 uint32_t elems_;
110 LRUHandle** list_;
111
112 // Return a pointer to slot that points to a cache entry that
113 // matches key/hash. If there is no such cache entry, return a
114 // pointer to the trailing slot in the corresponding linked list.
115 LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
116 LRUHandle** ptr = &list_[hash & (length_ - 1)];
117 while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
118 ptr = &(*ptr)->next_hash;
119 }
120 return ptr;
121 }
122
123 void Resize() {
124 uint32_t new_length = 4;
125 while (new_length < elems_) {
126 new_length *= 2;
127 }
128 LRUHandle** new_list = new LRUHandle*[new_length];
129 memset(new_list, 0, sizeof(new_list[0]) * new_length);
130 uint32_t count = 0;
131 for (uint32_t i = 0; i < length_; i++) {
132 LRUHandle* h = list_[i];
133 while (h != nullptr) {
134 LRUHandle* next = h->next_hash;
135 uint32_t hash = h->hash;
136 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
137 h->next_hash = *ptr;
138 *ptr = h;
139 h = next;
140 count++;
141 }
142 }
143 assert(elems_ == count);
144 delete[] list_;
145 list_ = new_list;
146 length_ = new_length;
147 }
148};
149
150// A single shard of sharded cache.
151class LRUCache {
152 public:
153 LRUCache();
154 ~LRUCache();
155
156 // Separate from constructor so caller can easily make an array of LRUCache
157 void SetCapacity(size_t capacity) { capacity_ = capacity; }
158
159 // Like Cache methods, but with an extra "hash" parameter.
160 Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value,
161 size_t charge,
162 void (*deleter)(const Slice& key, void* value));
163 Cache::Handle* Lookup(const Slice& key, uint32_t hash);
164 void Release(Cache::Handle* handle);
165 void Erase(const Slice& key, uint32_t hash);
166 void Prune();
167 size_t TotalCharge() const {
168 MutexLock l(&mutex_);
169 return usage_;
170 }
171
172 private:
173 void LRU_Remove(LRUHandle* e);
174 void LRU_Append(LRUHandle* list, LRUHandle* e);
175 void Ref(LRUHandle* e);
176 void Unref(LRUHandle* e);
177 bool FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
178
179 // Initialized before use.
180 size_t capacity_;
181
182 // mutex_ protects the following state.
183 mutable port::Mutex mutex_;
184 size_t usage_ GUARDED_BY(mutex_);
185
186 // Dummy head of LRU list.
187 // lru.prev is newest entry, lru.next is oldest entry.
188 // Entries have refs==1 and in_cache==true.
189 LRUHandle lru_ GUARDED_BY(mutex_);
190
191 // Dummy head of in-use list.
192 // Entries are in use by clients, and have refs >= 2 and in_cache==true.
193 LRUHandle in_use_ GUARDED_BY(mutex_);
194
195 HandleTable table_ GUARDED_BY(mutex_);
196};
197
198LRUCache::LRUCache() : capacity_(0), usage_(0) {
199 // Make empty circular linked lists.
200 lru_.next = &lru_;
201 lru_.prev = &lru_;
202 in_use_.next = &in_use_;
203 in_use_.prev = &in_use_;
204}
205
206LRUCache::~LRUCache() {
207 assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle
208 for (LRUHandle* e = lru_.next; e != &lru_;) {
209 LRUHandle* next = e->next;
210 assert(e->in_cache);
211 e->in_cache = false;
212 assert(e->refs == 1); // Invariant of lru_ list.
213 Unref(e);
214 e = next;
215 }
216}
217
218void LRUCache::Ref(LRUHandle* e) {
219 if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
220 LRU_Remove(e);
221 LRU_Append(&in_use_, e);
222 }
223 e->refs++;
224}
225
226void LRUCache::Unref(LRUHandle* e) {
227 assert(e->refs > 0);
228 e->refs--;
229 if (e->refs == 0) { // Deallocate.
230 assert(!e->in_cache);
231 (*e->deleter)(e->key(), e->value);
232 free(e);
233 } else if (e->in_cache && e->refs == 1) {
234 // No longer in use; move to lru_ list.
235 LRU_Remove(e);
236 LRU_Append(&lru_, e);
237 }
238}
239
240void LRUCache::LRU_Remove(LRUHandle* e) {
241 e->next->prev = e->prev;
242 e->prev->next = e->next;
243}
244
245void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
246 // Make "e" newest entry by inserting just before *list
247 e->next = list;
248 e->prev = list->prev;
249 e->prev->next = e;
250 e->next->prev = e;
251}
252
253Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
254 MutexLock l(&mutex_);
255 LRUHandle* e = table_.Lookup(key, hash);
256 if (e != nullptr) {
257 Ref(e);
258 }
259 return reinterpret_cast<Cache::Handle*>(e);
260}
261
262void LRUCache::Release(Cache::Handle* handle) {
263 MutexLock l(&mutex_);
264 Unref(reinterpret_cast<LRUHandle*>(handle));
265}
266
267Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
268 size_t charge,
269 void (*deleter)(const Slice& key,
270 void* value)) {
271 MutexLock l(&mutex_);
272
273 LRUHandle* e =
274 reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size()));
275 e->value = value;
276 e->deleter = deleter;
277 e->charge = charge;
278 e->key_length = key.size();
279 e->hash = hash;
280 e->in_cache = false;
281 e->refs = 1; // for the returned handle.
282 std::memcpy(e->key_data, key.data(), key.size());
283
284 if (capacity_ > 0) {
285 e->refs++; // for the cache's reference.
286 e->in_cache = true;
287 LRU_Append(&in_use_, e);
288 usage_ += charge;
289 FinishErase(table_.Insert(e));
290 } else { // don't cache. (capacity_==0 is supported and turns off caching.)
291 // next is read by key() in an assert, so it must be initialized
292 e->next = nullptr;
293 }
294 while (usage_ > capacity_ && lru_.next != &lru_) {
295 LRUHandle* old = lru_.next;
296 assert(old->refs == 1);
297 bool erased = FinishErase(table_.Remove(old->key(), old->hash));
298 if (!erased) { // to avoid unused variable when compiled NDEBUG
299 assert(erased);
300 }
301 }
302
303 return reinterpret_cast<Cache::Handle*>(e);
304}
305
306// If e != nullptr, finish removing *e from the cache; it has already been
307// removed from the hash table. Return whether e != nullptr.
308bool LRUCache::FinishErase(LRUHandle* e) {
309 if (e != nullptr) {
310 assert(e->in_cache);
311 LRU_Remove(e);
312 e->in_cache = false;
313 usage_ -= e->charge;
314 Unref(e);
315 }
316 return e != nullptr;
317}
318
319void LRUCache::Erase(const Slice& key, uint32_t hash) {
320 MutexLock l(&mutex_);
321 FinishErase(table_.Remove(key, hash));
322}
323
324void LRUCache::Prune() {
325 MutexLock l(&mutex_);
326 while (lru_.next != &lru_) {
327 LRUHandle* e = lru_.next;
328 assert(e->refs == 1);
329 bool erased = FinishErase(table_.Remove(e->key(), e->hash));
330 if (!erased) { // to avoid unused variable when compiled NDEBUG
331 assert(erased);
332 }
333 }
334}
335
336static const int kNumShardBits = 4;
337static const int kNumShards = 1 << kNumShardBits;
338
339class ShardedLRUCache : public Cache {
340 private:
341 LRUCache shard_[kNumShards];
342 port::Mutex id_mutex_;
343 uint64_t last_id_;
344
345 static inline uint32_t HashSlice(const Slice& s) {
346 return Hash(s.data(), s.size(), 0);
347 }
348
349 static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); }
350
351 public:
352 explicit ShardedLRUCache(size_t capacity) : last_id_(0) {
353 const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
354 for (int s = 0; s < kNumShards; s++) {
355 shard_[s].SetCapacity(per_shard);
356 }
357 }
358 ~ShardedLRUCache() override {}
359 Handle* Insert(const Slice& key, void* value, size_t charge,
360 void (*deleter)(const Slice& key, void* value)) override {
361 const uint32_t hash = HashSlice(key);
362 return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
363 }
364 Handle* Lookup(const Slice& key) override {
365 const uint32_t hash = HashSlice(key);
366 return shard_[Shard(hash)].Lookup(key, hash);
367 }
368 void Release(Handle* handle) override {
369 LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
370 shard_[Shard(h->hash)].Release(handle);
371 }
372 void Erase(const Slice& key) override {
373 const uint32_t hash = HashSlice(key);
374 shard_[Shard(hash)].Erase(key, hash);
375 }
376 void* Value(Handle* handle) override {
377 return reinterpret_cast<LRUHandle*>(handle)->value;
378 }
379 uint64_t NewId() override {
380 MutexLock l(&id_mutex_);
381 return ++(last_id_);
382 }
383 void Prune() override {
384 for (int s = 0; s < kNumShards; s++) {
385 shard_[s].Prune();
386 }
387 }
388 size_t TotalCharge() const override {
389 size_t total = 0;
390 for (int s = 0; s < kNumShards; s++) {
391 total += shard_[s].TotalCharge();
392 }
393 return total;
394 }
395};
396
397} // end anonymous namespace
398
399Cache* NewLRUCache(size_t capacity) { return new ShardedLRUCache(capacity); }
400
401} // namespace leveldb
402