1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | |
5 | #include "leveldb/cache.h" |
6 | |
7 | #include <cassert> |
8 | #include <cstdio> |
9 | #include <cstdlib> |
10 | |
11 | #include "port/port.h" |
12 | #include "port/thread_annotations.h" |
13 | #include "util/hash.h" |
14 | #include "util/mutexlock.h" |
15 | |
16 | namespace leveldb { |
17 | |
18 | Cache::~Cache() {} |
19 | |
20 | namespace { |
21 | |
22 | // LRU cache implementation |
23 | // |
24 | // Cache entries have an "in_cache" boolean indicating whether the cache has a |
25 | // reference on the entry. The only ways that this can become false without the |
26 | // entry being passed to its "deleter" are via Erase(), via Insert() when |
27 | // an element with a duplicate key is inserted, or on destruction of the cache. |
28 | // |
29 | // The cache keeps two linked lists of items in the cache. All items in the |
30 | // cache are in one list or the other, and never both. Items still referenced |
31 | // by clients but erased from the cache are in neither list. The lists are: |
32 | // - in-use: contains the items currently referenced by clients, in no |
33 | // particular order. (This list is used for invariant checking. If we |
34 | // removed the check, elements that would otherwise be on this list could be |
35 | // left as disconnected singleton lists.) |
36 | // - LRU: contains the items not currently referenced by clients, in LRU order |
37 | // Elements are moved between these lists by the Ref() and Unref() methods, |
38 | // when they detect an element in the cache acquiring or losing its only |
39 | // external reference. |
40 | |
41 | // An entry is a variable length heap-allocated structure. Entries |
42 | // are kept in a circular doubly linked list ordered by access time. |
43 | struct LRUHandle { |
44 | void* value; |
45 | void (*deleter)(const Slice&, void* value); |
46 | LRUHandle* next_hash; |
47 | LRUHandle* next; |
48 | LRUHandle* prev; |
49 | size_t charge; // TODO(opt): Only allow uint32_t? |
50 | size_t key_length; |
51 | bool in_cache; // Whether entry is in the cache. |
52 | uint32_t refs; // References, including cache reference, if present. |
53 | uint32_t hash; // Hash of key(); used for fast sharding and comparisons |
54 | char key_data[1]; // Beginning of key |
55 | |
56 | Slice key() const { |
57 | // next is only equal to this if the LRU handle is the list head of an |
58 | // empty list. List heads never have meaningful keys. |
59 | assert(next != this); |
60 | |
61 | return Slice(key_data, key_length); |
62 | } |
63 | }; |
64 | |
65 | // We provide our own simple hash table since it removes a whole bunch |
66 | // of porting hacks and is also faster than some of the built-in hash |
67 | // table implementations in some of the compiler/runtime combinations |
68 | // we have tested. E.g., readrandom speeds up by ~5% over the g++ |
69 | // 4.4.3's builtin hashtable. |
70 | class HandleTable { |
71 | public: |
72 | HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); } |
73 | ~HandleTable() { delete[] list_; } |
74 | |
75 | LRUHandle* Lookup(const Slice& key, uint32_t hash) { |
76 | return *FindPointer(key, hash); |
77 | } |
78 | |
79 | LRUHandle* Insert(LRUHandle* h) { |
80 | LRUHandle** ptr = FindPointer(h->key(), h->hash); |
81 | LRUHandle* old = *ptr; |
82 | h->next_hash = (old == nullptr ? nullptr : old->next_hash); |
83 | *ptr = h; |
84 | if (old == nullptr) { |
85 | ++elems_; |
86 | if (elems_ > length_) { |
87 | // Since each cache entry is fairly large, we aim for a small |
88 | // average linked list length (<= 1). |
89 | Resize(); |
90 | } |
91 | } |
92 | return old; |
93 | } |
94 | |
95 | LRUHandle* Remove(const Slice& key, uint32_t hash) { |
96 | LRUHandle** ptr = FindPointer(key, hash); |
97 | LRUHandle* result = *ptr; |
98 | if (result != nullptr) { |
99 | *ptr = result->next_hash; |
100 | --elems_; |
101 | } |
102 | return result; |
103 | } |
104 | |
105 | private: |
106 | // The table consists of an array of buckets where each bucket is |
107 | // a linked list of cache entries that hash into the bucket. |
108 | uint32_t length_; |
109 | uint32_t elems_; |
110 | LRUHandle** list_; |
111 | |
112 | // Return a pointer to slot that points to a cache entry that |
113 | // matches key/hash. If there is no such cache entry, return a |
114 | // pointer to the trailing slot in the corresponding linked list. |
115 | LRUHandle** FindPointer(const Slice& key, uint32_t hash) { |
116 | LRUHandle** ptr = &list_[hash & (length_ - 1)]; |
117 | while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { |
118 | ptr = &(*ptr)->next_hash; |
119 | } |
120 | return ptr; |
121 | } |
122 | |
123 | void Resize() { |
124 | uint32_t new_length = 4; |
125 | while (new_length < elems_) { |
126 | new_length *= 2; |
127 | } |
128 | LRUHandle** new_list = new LRUHandle*[new_length]; |
129 | memset(new_list, 0, sizeof(new_list[0]) * new_length); |
130 | uint32_t count = 0; |
131 | for (uint32_t i = 0; i < length_; i++) { |
132 | LRUHandle* h = list_[i]; |
133 | while (h != nullptr) { |
134 | LRUHandle* next = h->next_hash; |
135 | uint32_t hash = h->hash; |
136 | LRUHandle** ptr = &new_list[hash & (new_length - 1)]; |
137 | h->next_hash = *ptr; |
138 | *ptr = h; |
139 | h = next; |
140 | count++; |
141 | } |
142 | } |
143 | assert(elems_ == count); |
144 | delete[] list_; |
145 | list_ = new_list; |
146 | length_ = new_length; |
147 | } |
148 | }; |
149 | |
150 | // A single shard of sharded cache. |
151 | class LRUCache { |
152 | public: |
153 | LRUCache(); |
154 | ~LRUCache(); |
155 | |
156 | // Separate from constructor so caller can easily make an array of LRUCache |
157 | void SetCapacity(size_t capacity) { capacity_ = capacity; } |
158 | |
159 | // Like Cache methods, but with an extra "hash" parameter. |
160 | Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value, |
161 | size_t charge, |
162 | void (*deleter)(const Slice& key, void* value)); |
163 | Cache::Handle* Lookup(const Slice& key, uint32_t hash); |
164 | void Release(Cache::Handle* handle); |
165 | void Erase(const Slice& key, uint32_t hash); |
166 | void Prune(); |
167 | size_t TotalCharge() const { |
168 | MutexLock l(&mutex_); |
169 | return usage_; |
170 | } |
171 | |
172 | private: |
173 | void LRU_Remove(LRUHandle* e); |
174 | void LRU_Append(LRUHandle* list, LRUHandle* e); |
175 | void Ref(LRUHandle* e); |
176 | void Unref(LRUHandle* e); |
177 | bool FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
178 | |
179 | // Initialized before use. |
180 | size_t capacity_; |
181 | |
182 | // mutex_ protects the following state. |
183 | mutable port::Mutex mutex_; |
184 | size_t usage_ GUARDED_BY(mutex_); |
185 | |
186 | // Dummy head of LRU list. |
187 | // lru.prev is newest entry, lru.next is oldest entry. |
188 | // Entries have refs==1 and in_cache==true. |
189 | LRUHandle lru_ GUARDED_BY(mutex_); |
190 | |
191 | // Dummy head of in-use list. |
192 | // Entries are in use by clients, and have refs >= 2 and in_cache==true. |
193 | LRUHandle in_use_ GUARDED_BY(mutex_); |
194 | |
195 | HandleTable table_ GUARDED_BY(mutex_); |
196 | }; |
197 | |
198 | LRUCache::LRUCache() : capacity_(0), usage_(0) { |
199 | // Make empty circular linked lists. |
200 | lru_.next = &lru_; |
201 | lru_.prev = &lru_; |
202 | in_use_.next = &in_use_; |
203 | in_use_.prev = &in_use_; |
204 | } |
205 | |
206 | LRUCache::~LRUCache() { |
207 | assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle |
208 | for (LRUHandle* e = lru_.next; e != &lru_;) { |
209 | LRUHandle* next = e->next; |
210 | assert(e->in_cache); |
211 | e->in_cache = false; |
212 | assert(e->refs == 1); // Invariant of lru_ list. |
213 | Unref(e); |
214 | e = next; |
215 | } |
216 | } |
217 | |
218 | void LRUCache::Ref(LRUHandle* e) { |
219 | if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list. |
220 | LRU_Remove(e); |
221 | LRU_Append(&in_use_, e); |
222 | } |
223 | e->refs++; |
224 | } |
225 | |
226 | void LRUCache::Unref(LRUHandle* e) { |
227 | assert(e->refs > 0); |
228 | e->refs--; |
229 | if (e->refs == 0) { // Deallocate. |
230 | assert(!e->in_cache); |
231 | (*e->deleter)(e->key(), e->value); |
232 | free(e); |
233 | } else if (e->in_cache && e->refs == 1) { |
234 | // No longer in use; move to lru_ list. |
235 | LRU_Remove(e); |
236 | LRU_Append(&lru_, e); |
237 | } |
238 | } |
239 | |
240 | void LRUCache::LRU_Remove(LRUHandle* e) { |
241 | e->next->prev = e->prev; |
242 | e->prev->next = e->next; |
243 | } |
244 | |
245 | void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) { |
246 | // Make "e" newest entry by inserting just before *list |
247 | e->next = list; |
248 | e->prev = list->prev; |
249 | e->prev->next = e; |
250 | e->next->prev = e; |
251 | } |
252 | |
253 | Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { |
254 | MutexLock l(&mutex_); |
255 | LRUHandle* e = table_.Lookup(key, hash); |
256 | if (e != nullptr) { |
257 | Ref(e); |
258 | } |
259 | return reinterpret_cast<Cache::Handle*>(e); |
260 | } |
261 | |
262 | void LRUCache::Release(Cache::Handle* handle) { |
263 | MutexLock l(&mutex_); |
264 | Unref(reinterpret_cast<LRUHandle*>(handle)); |
265 | } |
266 | |
267 | Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value, |
268 | size_t charge, |
269 | void (*deleter)(const Slice& key, |
270 | void* value)) { |
271 | MutexLock l(&mutex_); |
272 | |
273 | LRUHandle* e = |
274 | reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size())); |
275 | e->value = value; |
276 | e->deleter = deleter; |
277 | e->charge = charge; |
278 | e->key_length = key.size(); |
279 | e->hash = hash; |
280 | e->in_cache = false; |
281 | e->refs = 1; // for the returned handle. |
282 | std::memcpy(e->key_data, key.data(), key.size()); |
283 | |
284 | if (capacity_ > 0) { |
285 | e->refs++; // for the cache's reference. |
286 | e->in_cache = true; |
287 | LRU_Append(&in_use_, e); |
288 | usage_ += charge; |
289 | FinishErase(table_.Insert(e)); |
290 | } else { // don't cache. (capacity_==0 is supported and turns off caching.) |
291 | // next is read by key() in an assert, so it must be initialized |
292 | e->next = nullptr; |
293 | } |
294 | while (usage_ > capacity_ && lru_.next != &lru_) { |
295 | LRUHandle* old = lru_.next; |
296 | assert(old->refs == 1); |
297 | bool erased = FinishErase(table_.Remove(old->key(), old->hash)); |
298 | if (!erased) { // to avoid unused variable when compiled NDEBUG |
299 | assert(erased); |
300 | } |
301 | } |
302 | |
303 | return reinterpret_cast<Cache::Handle*>(e); |
304 | } |
305 | |
306 | // If e != nullptr, finish removing *e from the cache; it has already been |
307 | // removed from the hash table. Return whether e != nullptr. |
308 | bool LRUCache::FinishErase(LRUHandle* e) { |
309 | if (e != nullptr) { |
310 | assert(e->in_cache); |
311 | LRU_Remove(e); |
312 | e->in_cache = false; |
313 | usage_ -= e->charge; |
314 | Unref(e); |
315 | } |
316 | return e != nullptr; |
317 | } |
318 | |
319 | void LRUCache::Erase(const Slice& key, uint32_t hash) { |
320 | MutexLock l(&mutex_); |
321 | FinishErase(table_.Remove(key, hash)); |
322 | } |
323 | |
324 | void LRUCache::Prune() { |
325 | MutexLock l(&mutex_); |
326 | while (lru_.next != &lru_) { |
327 | LRUHandle* e = lru_.next; |
328 | assert(e->refs == 1); |
329 | bool erased = FinishErase(table_.Remove(e->key(), e->hash)); |
330 | if (!erased) { // to avoid unused variable when compiled NDEBUG |
331 | assert(erased); |
332 | } |
333 | } |
334 | } |
335 | |
336 | static const int kNumShardBits = 4; |
337 | static const int kNumShards = 1 << kNumShardBits; |
338 | |
339 | class ShardedLRUCache : public Cache { |
340 | private: |
341 | LRUCache shard_[kNumShards]; |
342 | port::Mutex id_mutex_; |
343 | uint64_t last_id_; |
344 | |
345 | static inline uint32_t HashSlice(const Slice& s) { |
346 | return Hash(s.data(), s.size(), 0); |
347 | } |
348 | |
349 | static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); } |
350 | |
351 | public: |
352 | explicit ShardedLRUCache(size_t capacity) : last_id_(0) { |
353 | const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards; |
354 | for (int s = 0; s < kNumShards; s++) { |
355 | shard_[s].SetCapacity(per_shard); |
356 | } |
357 | } |
358 | ~ShardedLRUCache() override {} |
359 | Handle* Insert(const Slice& key, void* value, size_t charge, |
360 | void (*deleter)(const Slice& key, void* value)) override { |
361 | const uint32_t hash = HashSlice(key); |
362 | return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter); |
363 | } |
364 | Handle* Lookup(const Slice& key) override { |
365 | const uint32_t hash = HashSlice(key); |
366 | return shard_[Shard(hash)].Lookup(key, hash); |
367 | } |
368 | void Release(Handle* handle) override { |
369 | LRUHandle* h = reinterpret_cast<LRUHandle*>(handle); |
370 | shard_[Shard(h->hash)].Release(handle); |
371 | } |
372 | void Erase(const Slice& key) override { |
373 | const uint32_t hash = HashSlice(key); |
374 | shard_[Shard(hash)].Erase(key, hash); |
375 | } |
376 | void* Value(Handle* handle) override { |
377 | return reinterpret_cast<LRUHandle*>(handle)->value; |
378 | } |
379 | uint64_t NewId() override { |
380 | MutexLock l(&id_mutex_); |
381 | return ++(last_id_); |
382 | } |
383 | void Prune() override { |
384 | for (int s = 0; s < kNumShards; s++) { |
385 | shard_[s].Prune(); |
386 | } |
387 | } |
388 | size_t TotalCharge() const override { |
389 | size_t total = 0; |
390 | for (int s = 0; s < kNumShards; s++) { |
391 | total += shard_[s].TotalCharge(); |
392 | } |
393 | return total; |
394 | } |
395 | }; |
396 | |
397 | } // end anonymous namespace |
398 | |
399 | Cache* NewLRUCache(size_t capacity) { return new ShardedLRUCache(capacity); } |
400 | |
401 | } // namespace leveldb |
402 | |