1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | // |
5 | // The representation of a DBImpl consists of a set of Versions. The |
6 | // newest version is called "current". Older versions may be kept |
7 | // around to provide a consistent view to live iterators. |
8 | // |
9 | // Each Version keeps track of a set of Table files per level. The |
10 | // entire set of versions is maintained in a VersionSet. |
11 | // |
12 | // Version,VersionSet are thread-compatible, but require external |
13 | // synchronization on all accesses. |
14 | |
15 | #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ |
16 | #define STORAGE_LEVELDB_DB_VERSION_SET_H_ |
17 | |
18 | #include <map> |
19 | #include <set> |
20 | #include <vector> |
21 | |
22 | #include "db/dbformat.h" |
23 | #include "db/version_edit.h" |
24 | #include "port/port.h" |
25 | #include "port/thread_annotations.h" |
26 | |
27 | namespace leveldb { |
28 | |
29 | namespace log { |
30 | class Writer; |
31 | } |
32 | |
33 | class Compaction; |
34 | class Iterator; |
35 | class MemTable; |
36 | class TableBuilder; |
37 | class TableCache; |
38 | class Version; |
39 | class VersionSet; |
40 | class WritableFile; |
41 | |
42 | // Return the smallest index i such that files[i]->largest >= key. |
43 | // Return files.size() if there is no such file. |
44 | // REQUIRES: "files" contains a sorted list of non-overlapping files. |
45 | int FindFile(const InternalKeyComparator& icmp, |
46 | const std::vector<FileMetaData*>& files, const Slice& key); |
47 | |
48 | // Returns true iff some file in "files" overlaps the user key range |
49 | // [*smallest,*largest]. |
50 | // smallest==nullptr represents a key smaller than all keys in the DB. |
51 | // largest==nullptr represents a key largest than all keys in the DB. |
52 | // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges |
53 | // in sorted order. |
54 | bool SomeFileOverlapsRange(const InternalKeyComparator& icmp, |
55 | bool disjoint_sorted_files, |
56 | const std::vector<FileMetaData*>& files, |
57 | const Slice* smallest_user_key, |
58 | const Slice* largest_user_key); |
59 | |
60 | class Version { |
61 | public: |
62 | struct GetStats { |
63 | FileMetaData* seek_file; |
64 | int seek_file_level; |
65 | }; |
66 | |
67 | // Append to *iters a sequence of iterators that will |
68 | // yield the contents of this Version when merged together. |
69 | // REQUIRES: This version has been saved (see VersionSet::SaveTo) |
70 | void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); |
71 | |
72 | // Lookup the value for key. If found, store it in *val and |
73 | // return OK. Else return a non-OK status. Fills *stats. |
74 | // REQUIRES: lock is not held |
75 | Status Get(const ReadOptions&, const LookupKey& key, std::string* val, |
76 | GetStats* stats); |
77 | |
78 | // Adds "stats" into the current state. Returns true if a new |
79 | // compaction may need to be triggered, false otherwise. |
80 | // REQUIRES: lock is held |
81 | bool UpdateStats(const GetStats& stats); |
82 | |
83 | // Record a sample of bytes read at the specified internal key. |
84 | // Samples are taken approximately once every config::kReadBytesPeriod |
85 | // bytes. Returns true if a new compaction may need to be triggered. |
86 | // REQUIRES: lock is held |
87 | bool RecordReadSample(Slice key); |
88 | |
89 | // Reference count management (so Versions do not disappear out from |
90 | // under live iterators) |
91 | void Ref(); |
92 | void Unref(); |
93 | |
94 | void GetOverlappingInputs( |
95 | int level, |
96 | const InternalKey* begin, // nullptr means before all keys |
97 | const InternalKey* end, // nullptr means after all keys |
98 | std::vector<FileMetaData*>* inputs); |
99 | |
100 | // Returns true iff some file in the specified level overlaps |
101 | // some part of [*smallest_user_key,*largest_user_key]. |
102 | // smallest_user_key==nullptr represents a key smaller than all the DB's keys. |
103 | // largest_user_key==nullptr represents a key largest than all the DB's keys. |
104 | bool OverlapInLevel(int level, const Slice* smallest_user_key, |
105 | const Slice* largest_user_key); |
106 | |
107 | // Return the level at which we should place a new memtable compaction |
108 | // result that covers the range [smallest_user_key,largest_user_key]. |
109 | int PickLevelForMemTableOutput(const Slice& smallest_user_key, |
110 | const Slice& largest_user_key); |
111 | |
112 | int NumFiles(int level) const { return files_[level].size(); } |
113 | |
114 | // Return a human readable string that describes this version's contents. |
115 | std::string DebugString() const; |
116 | |
117 | private: |
118 | friend class Compaction; |
119 | friend class VersionSet; |
120 | |
121 | class LevelFileNumIterator; |
122 | |
123 | explicit Version(VersionSet* vset) |
124 | : vset_(vset), |
125 | next_(this), |
126 | prev_(this), |
127 | refs_(0), |
128 | file_to_compact_(nullptr), |
129 | file_to_compact_level_(-1), |
130 | compaction_score_(-1), |
131 | compaction_level_(-1) {} |
132 | |
133 | Version(const Version&) = delete; |
134 | Version& operator=(const Version&) = delete; |
135 | |
136 | ~Version(); |
137 | |
138 | Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; |
139 | |
140 | // Call func(arg, level, f) for every file that overlaps user_key in |
141 | // order from newest to oldest. If an invocation of func returns |
142 | // false, makes no more calls. |
143 | // |
144 | // REQUIRES: user portion of internal_key == user_key. |
145 | void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg, |
146 | bool (*func)(void*, int, FileMetaData*)); |
147 | |
148 | VersionSet* vset_; // VersionSet to which this Version belongs |
149 | Version* next_; // Next version in linked list |
150 | Version* prev_; // Previous version in linked list |
151 | int refs_; // Number of live refs to this version |
152 | |
153 | // List of files per level |
154 | std::vector<FileMetaData*> files_[config::kNumLevels]; |
155 | |
156 | // Next file to compact based on seek stats. |
157 | FileMetaData* file_to_compact_; |
158 | int file_to_compact_level_; |
159 | |
160 | // Level that should be compacted next and its compaction score. |
161 | // Score < 1 means compaction is not strictly needed. These fields |
162 | // are initialized by Finalize(). |
163 | double compaction_score_; |
164 | int compaction_level_; |
165 | }; |
166 | |
167 | class VersionSet { |
168 | public: |
169 | VersionSet(const std::string& dbname, const Options* options, |
170 | TableCache* table_cache, const InternalKeyComparator*); |
171 | VersionSet(const VersionSet&) = delete; |
172 | VersionSet& operator=(const VersionSet&) = delete; |
173 | |
174 | ~VersionSet(); |
175 | |
176 | // Apply *edit to the current version to form a new descriptor that |
177 | // is both saved to persistent state and installed as the new |
178 | // current version. Will release *mu while actually writing to the file. |
179 | // REQUIRES: *mu is held on entry. |
180 | // REQUIRES: no other thread concurrently calls LogAndApply() |
181 | Status LogAndApply(VersionEdit* edit, port::Mutex* mu) |
182 | EXCLUSIVE_LOCKS_REQUIRED(mu); |
183 | |
184 | // Recover the last saved descriptor from persistent storage. |
185 | Status Recover(bool* save_manifest); |
186 | |
187 | // Return the current version. |
188 | Version* current() const { return current_; } |
189 | |
190 | // Return the current manifest file number |
191 | uint64_t ManifestFileNumber() const { return manifest_file_number_; } |
192 | |
193 | // Allocate and return a new file number |
194 | uint64_t NewFileNumber() { return next_file_number_++; } |
195 | |
196 | // Arrange to reuse "file_number" unless a newer file number has |
197 | // already been allocated. |
198 | // REQUIRES: "file_number" was returned by a call to NewFileNumber(). |
199 | void ReuseFileNumber(uint64_t file_number) { |
200 | if (next_file_number_ == file_number + 1) { |
201 | next_file_number_ = file_number; |
202 | } |
203 | } |
204 | |
205 | // Return the number of Table files at the specified level. |
206 | int NumLevelFiles(int level) const; |
207 | |
208 | // Return the combined file size of all files at the specified level. |
209 | int64_t NumLevelBytes(int level) const; |
210 | |
211 | // Return the last sequence number. |
212 | uint64_t LastSequence() const { return last_sequence_; } |
213 | |
214 | // Set the last sequence number to s. |
215 | void SetLastSequence(uint64_t s) { |
216 | assert(s >= last_sequence_); |
217 | last_sequence_ = s; |
218 | } |
219 | |
220 | // Mark the specified file number as used. |
221 | void MarkFileNumberUsed(uint64_t number); |
222 | |
223 | // Return the current log file number. |
224 | uint64_t LogNumber() const { return log_number_; } |
225 | |
226 | // Return the log file number for the log file that is currently |
227 | // being compacted, or zero if there is no such log file. |
228 | uint64_t PrevLogNumber() const { return prev_log_number_; } |
229 | |
230 | // Pick level and inputs for a new compaction. |
231 | // Returns nullptr if there is no compaction to be done. |
232 | // Otherwise returns a pointer to a heap-allocated object that |
233 | // describes the compaction. Caller should delete the result. |
234 | Compaction* PickCompaction(); |
235 | |
236 | // Return a compaction object for compacting the range [begin,end] in |
237 | // the specified level. Returns nullptr if there is nothing in that |
238 | // level that overlaps the specified range. Caller should delete |
239 | // the result. |
240 | Compaction* CompactRange(int level, const InternalKey* begin, |
241 | const InternalKey* end); |
242 | |
243 | // Return the maximum overlapping data (in bytes) at next level for any |
244 | // file at a level >= 1. |
245 | int64_t MaxNextLevelOverlappingBytes(); |
246 | |
247 | // Create an iterator that reads over the compaction inputs for "*c". |
248 | // The caller should delete the iterator when no longer needed. |
249 | Iterator* MakeInputIterator(Compaction* c); |
250 | |
251 | // Returns true iff some level needs a compaction. |
252 | bool NeedsCompaction() const { |
253 | Version* v = current_; |
254 | return (v->compaction_score_ >= 1) || (v->file_to_compact_ != nullptr); |
255 | } |
256 | |
257 | // Add all files listed in any live version to *live. |
258 | // May also mutate some internal state. |
259 | void AddLiveFiles(std::set<uint64_t>* live); |
260 | |
261 | // Return the approximate offset in the database of the data for |
262 | // "key" as of version "v". |
263 | uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); |
264 | |
265 | // Return a human-readable short (single-line) summary of the number |
266 | // of files per level. Uses *scratch as backing store. |
267 | struct LevelSummaryStorage { |
268 | char buffer[100]; |
269 | }; |
270 | const char* LevelSummary(LevelSummaryStorage* scratch) const; |
271 | |
272 | private: |
273 | class Builder; |
274 | |
275 | friend class Compaction; |
276 | friend class Version; |
277 | |
278 | bool ReuseManifest(const std::string& dscname, const std::string& dscbase); |
279 | |
280 | void Finalize(Version* v); |
281 | |
282 | void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest, |
283 | InternalKey* largest); |
284 | |
285 | void GetRange2(const std::vector<FileMetaData*>& inputs1, |
286 | const std::vector<FileMetaData*>& inputs2, |
287 | InternalKey* smallest, InternalKey* largest); |
288 | |
289 | void SetupOtherInputs(Compaction* c); |
290 | |
291 | // Save current contents to *log |
292 | Status WriteSnapshot(log::Writer* log); |
293 | |
294 | void AppendVersion(Version* v); |
295 | |
296 | Env* const env_; |
297 | const std::string dbname_; |
298 | const Options* const options_; |
299 | TableCache* const table_cache_; |
300 | const InternalKeyComparator icmp_; |
301 | uint64_t next_file_number_; |
302 | uint64_t manifest_file_number_; |
303 | uint64_t last_sequence_; |
304 | uint64_t log_number_; |
305 | uint64_t prev_log_number_; // 0 or backing store for memtable being compacted |
306 | |
307 | // Opened lazily |
308 | WritableFile* descriptor_file_; |
309 | log::Writer* descriptor_log_; |
310 | Version dummy_versions_; // Head of circular doubly-linked list of versions. |
311 | Version* current_; // == dummy_versions_.prev_ |
312 | |
313 | // Per-level key at which the next compaction at that level should start. |
314 | // Either an empty string, or a valid InternalKey. |
315 | std::string compact_pointer_[config::kNumLevels]; |
316 | }; |
317 | |
318 | // A Compaction encapsulates information about a compaction. |
319 | class Compaction { |
320 | public: |
321 | ~Compaction(); |
322 | |
323 | // Return the level that is being compacted. Inputs from "level" |
324 | // and "level+1" will be merged to produce a set of "level+1" files. |
325 | int level() const { return level_; } |
326 | |
327 | // Return the object that holds the edits to the descriptor done |
328 | // by this compaction. |
329 | VersionEdit* edit() { return &edit_; } |
330 | |
331 | // "which" must be either 0 or 1 |
332 | int num_input_files(int which) const { return inputs_[which].size(); } |
333 | |
334 | // Return the ith input file at "level()+which" ("which" must be 0 or 1). |
335 | FileMetaData* input(int which, int i) const { return inputs_[which][i]; } |
336 | |
337 | // Maximum size of files to build during this compaction. |
338 | uint64_t MaxOutputFileSize() const { return max_output_file_size_; } |
339 | |
340 | // Is this a trivial compaction that can be implemented by just |
341 | // moving a single input file to the next level (no merging or splitting) |
342 | bool IsTrivialMove() const; |
343 | |
344 | // Add all inputs to this compaction as delete operations to *edit. |
345 | void AddInputDeletions(VersionEdit* edit); |
346 | |
347 | // Returns true if the information we have available guarantees that |
348 | // the compaction is producing data in "level+1" for which no data exists |
349 | // in levels greater than "level+1". |
350 | bool IsBaseLevelForKey(const Slice& user_key); |
351 | |
352 | // Returns true iff we should stop building the current output |
353 | // before processing "internal_key". |
354 | bool ShouldStopBefore(const Slice& internal_key); |
355 | |
356 | // Release the input version for the compaction, once the compaction |
357 | // is successful. |
358 | void ReleaseInputs(); |
359 | |
360 | private: |
361 | friend class Version; |
362 | friend class VersionSet; |
363 | |
364 | Compaction(const Options* options, int level); |
365 | |
366 | int level_; |
367 | uint64_t max_output_file_size_; |
368 | Version* input_version_; |
369 | VersionEdit edit_; |
370 | |
371 | // Each compaction reads inputs from "level_" and "level_+1" |
372 | std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs |
373 | |
374 | // State used to check for number of overlapping grandparent files |
375 | // (parent == level_ + 1, grandparent == level_ + 2) |
376 | std::vector<FileMetaData*> grandparents_; |
377 | size_t grandparent_index_; // Index in grandparent_starts_ |
378 | bool seen_key_; // Some output key has been seen |
379 | int64_t overlapped_bytes_; // Bytes of overlap between current output |
380 | // and grandparent files |
381 | |
382 | // State for implementing IsBaseLevelForKey |
383 | |
384 | // level_ptrs_ holds indices into input_version_->levels_: our state |
385 | // is that we are positioned at one of the file ranges for each |
386 | // higher level than the ones involved in this compaction (i.e. for |
387 | // all L >= level_ + 2). |
388 | size_t level_ptrs_[config::kNumLevels]; |
389 | }; |
390 | |
391 | } // namespace leveldb |
392 | |
393 | #endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ |
394 | |