1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | |
5 | #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ |
6 | #define STORAGE_LEVELDB_DB_DB_IMPL_H_ |
7 | |
8 | #include <atomic> |
9 | #include <deque> |
10 | #include <set> |
11 | #include <string> |
12 | |
13 | #include "db/dbformat.h" |
14 | #include "db/log_writer.h" |
15 | #include "db/snapshot.h" |
16 | #include "leveldb/db.h" |
17 | #include "leveldb/env.h" |
18 | #include "port/port.h" |
19 | #include "port/thread_annotations.h" |
20 | |
21 | namespace leveldb { |
22 | |
23 | class MemTable; |
24 | class TableCache; |
25 | class Version; |
26 | class VersionEdit; |
27 | class VersionSet; |
28 | |
29 | class DBImpl : public DB { |
30 | public: |
31 | DBImpl(const Options& options, const std::string& dbname); |
32 | |
33 | DBImpl(const DBImpl&) = delete; |
34 | DBImpl& operator=(const DBImpl&) = delete; |
35 | |
36 | ~DBImpl() override; |
37 | |
38 | // Implementations of the DB interface |
39 | Status Put(const WriteOptions&, const Slice& key, |
40 | const Slice& value) override; |
41 | Status Delete(const WriteOptions&, const Slice& key) override; |
42 | Status Write(const WriteOptions& options, WriteBatch* updates) override; |
43 | Status Get(const ReadOptions& options, const Slice& key, |
44 | std::string* value) override; |
45 | Iterator* NewIterator(const ReadOptions&) override; |
46 | const Snapshot* GetSnapshot() override; |
47 | void ReleaseSnapshot(const Snapshot* snapshot) override; |
48 | bool GetProperty(const Slice& property, std::string* value) override; |
49 | void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) override; |
50 | void CompactRange(const Slice* begin, const Slice* end) override; |
51 | |
52 | // Extra methods (for testing) that are not in the public DB interface |
53 | |
54 | // Compact any files in the named level that overlap [*begin,*end] |
55 | void TEST_CompactRange(int level, const Slice* begin, const Slice* end); |
56 | |
57 | // Force current memtable contents to be compacted. |
58 | Status TEST_CompactMemTable(); |
59 | |
60 | // Return an internal iterator over the current state of the database. |
61 | // The keys of this iterator are internal keys (see format.h). |
62 | // The returned iterator should be deleted when no longer needed. |
63 | Iterator* TEST_NewInternalIterator(); |
64 | |
65 | // Return the maximum overlapping data (in bytes) at next level for any |
66 | // file at a level >= 1. |
67 | int64_t TEST_MaxNextLevelOverlappingBytes(); |
68 | |
69 | // Record a sample of bytes read at the specified internal key. |
70 | // Samples are taken approximately once every config::kReadBytesPeriod |
71 | // bytes. |
72 | void RecordReadSample(Slice key); |
73 | |
74 | private: |
75 | friend class DB; |
76 | struct CompactionState; |
77 | struct Writer; |
78 | |
79 | // Information for a manual compaction |
80 | struct ManualCompaction { |
81 | int level; |
82 | bool done; |
83 | const InternalKey* begin; // null means beginning of key range |
84 | const InternalKey* end; // null means end of key range |
85 | InternalKey tmp_storage; // Used to keep track of compaction progress |
86 | }; |
87 | |
88 | // Per level compaction stats. stats_[level] stores the stats for |
89 | // compactions that produced data for the specified "level". |
90 | struct CompactionStats { |
91 | CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {} |
92 | |
93 | void Add(const CompactionStats& c) { |
94 | this->micros += c.micros; |
95 | this->bytes_read += c.bytes_read; |
96 | this->bytes_written += c.bytes_written; |
97 | } |
98 | |
99 | int64_t micros; |
100 | int64_t bytes_read; |
101 | int64_t bytes_written; |
102 | }; |
103 | |
104 | Iterator* NewInternalIterator(const ReadOptions&, |
105 | SequenceNumber* latest_snapshot, |
106 | uint32_t* seed); |
107 | |
108 | Status NewDB(); |
109 | |
110 | // Recover the descriptor from persistent storage. May do a significant |
111 | // amount of work to recover recently logged updates. Any changes to |
112 | // be made to the descriptor are added to *edit. |
113 | Status Recover(VersionEdit* edit, bool* save_manifest) |
114 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
115 | |
116 | void MaybeIgnoreError(Status* s) const; |
117 | |
118 | // Delete any unneeded files and stale in-memory entries. |
119 | void RemoveObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
120 | |
121 | // Compact the in-memory write buffer to disk. Switches to a new |
122 | // log-file/memtable and writes a new descriptor iff successful. |
123 | // Errors are recorded in bg_error_. |
124 | void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
125 | |
126 | Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest, |
127 | VersionEdit* edit, SequenceNumber* max_sequence) |
128 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
129 | |
130 | Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) |
131 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
132 | |
133 | Status MakeRoomForWrite(bool force /* compact even if there is room? */) |
134 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
135 | WriteBatch* BuildBatchGroup(Writer** last_writer) |
136 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
137 | |
138 | void RecordBackgroundError(const Status& s); |
139 | |
140 | void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
141 | static void BGWork(void* db); |
142 | void BackgroundCall(); |
143 | void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
144 | void CleanupCompaction(CompactionState* compact) |
145 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
146 | Status DoCompactionWork(CompactionState* compact) |
147 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
148 | |
149 | Status OpenCompactionOutputFile(CompactionState* compact); |
150 | Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); |
151 | Status InstallCompactionResults(CompactionState* compact) |
152 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
153 | |
154 | const Comparator* user_comparator() const { |
155 | return internal_comparator_.user_comparator(); |
156 | } |
157 | |
158 | // Constant after construction |
159 | Env* const env_; |
160 | const InternalKeyComparator internal_comparator_; |
161 | const InternalFilterPolicy internal_filter_policy_; |
162 | const Options options_; // options_.comparator == &internal_comparator_ |
163 | const bool owns_info_log_; |
164 | const bool owns_cache_; |
165 | const std::string dbname_; |
166 | |
167 | // table_cache_ provides its own synchronization |
168 | TableCache* const table_cache_; |
169 | |
170 | // Lock over the persistent DB state. Non-null iff successfully acquired. |
171 | FileLock* db_lock_; |
172 | |
173 | // State below is protected by mutex_ |
174 | port::Mutex mutex_; |
175 | std::atomic<bool> shutting_down_; |
176 | port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_); |
177 | MemTable* mem_; |
178 | MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted |
179 | std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_ |
180 | WritableFile* logfile_; |
181 | uint64_t logfile_number_ GUARDED_BY(mutex_); |
182 | log::Writer* log_; |
183 | uint32_t seed_ GUARDED_BY(mutex_); // For sampling. |
184 | |
185 | // Queue of writers. |
186 | std::deque<Writer*> writers_ GUARDED_BY(mutex_); |
187 | WriteBatch* tmp_batch_ GUARDED_BY(mutex_); |
188 | |
189 | SnapshotList snapshots_ GUARDED_BY(mutex_); |
190 | |
191 | // Set of table files to protect from deletion because they are |
192 | // part of ongoing compactions. |
193 | std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_); |
194 | |
195 | // Has a background compaction been scheduled or is running? |
196 | bool background_compaction_scheduled_ GUARDED_BY(mutex_); |
197 | |
198 | ManualCompaction* manual_compaction_ GUARDED_BY(mutex_); |
199 | |
200 | VersionSet* const versions_ GUARDED_BY(mutex_); |
201 | |
202 | // Have we encountered a background error in paranoid mode? |
203 | Status bg_error_ GUARDED_BY(mutex_); |
204 | |
205 | CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_); |
206 | }; |
207 | |
208 | // Sanitize db options. The caller should delete result.info_log if |
209 | // it is not equal to src.info_log. |
210 | Options SanitizeOptions(const std::string& db, |
211 | const InternalKeyComparator* icmp, |
212 | const InternalFilterPolicy* ipolicy, |
213 | const Options& src); |
214 | |
215 | } // namespace leveldb |
216 | |
217 | #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ |
218 | |