1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | // |
5 | // Decodes the blocks generated by block_builder.cc. |
6 | |
7 | #include "table/block.h" |
8 | |
9 | #include <algorithm> |
10 | #include <cstdint> |
11 | #include <vector> |
12 | |
13 | #include "leveldb/comparator.h" |
14 | #include "table/format.h" |
15 | #include "util/coding.h" |
16 | #include "util/logging.h" |
17 | |
18 | namespace leveldb { |
19 | |
20 | inline uint32_t Block::NumRestarts() const { |
21 | assert(size_ >= sizeof(uint32_t)); |
22 | return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); |
23 | } |
24 | |
25 | Block::Block(const BlockContents& contents) |
26 | : data_(contents.data.data()), |
27 | size_(contents.data.size()), |
28 | owned_(contents.heap_allocated) { |
29 | if (size_ < sizeof(uint32_t)) { |
30 | size_ = 0; // Error marker |
31 | } else { |
32 | size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); |
33 | if (NumRestarts() > max_restarts_allowed) { |
34 | // The size is too small for NumRestarts() |
35 | size_ = 0; |
36 | } else { |
37 | restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); |
38 | } |
39 | } |
40 | } |
41 | |
42 | Block::~Block() { |
43 | if (owned_) { |
44 | delete[] data_; |
45 | } |
46 | } |
47 | |
48 | // Helper routine: decode the next block entry starting at "p", |
49 | // storing the number of shared key bytes, non_shared key bytes, |
50 | // and the length of the value in "*shared", "*non_shared", and |
51 | // "*value_length", respectively. Will not dereference past "limit". |
52 | // |
53 | // If any errors are detected, returns nullptr. Otherwise, returns a |
54 | // pointer to the key delta (just past the three decoded values). |
55 | static inline const char* DecodeEntry(const char* p, const char* limit, |
56 | uint32_t* shared, uint32_t* non_shared, |
57 | uint32_t* value_length) { |
58 | if (limit - p < 3) return nullptr; |
59 | *shared = reinterpret_cast<const uint8_t*>(p)[0]; |
60 | *non_shared = reinterpret_cast<const uint8_t*>(p)[1]; |
61 | *value_length = reinterpret_cast<const uint8_t*>(p)[2]; |
62 | if ((*shared | *non_shared | *value_length) < 128) { |
63 | // Fast path: all three values are encoded in one byte each |
64 | p += 3; |
65 | } else { |
66 | if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; |
67 | if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; |
68 | if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr; |
69 | } |
70 | |
71 | if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) { |
72 | return nullptr; |
73 | } |
74 | return p; |
75 | } |
76 | |
77 | class Block::Iter : public Iterator { |
78 | private: |
79 | const Comparator* const comparator_; |
80 | const char* const data_; // underlying block contents |
81 | uint32_t const restarts_; // Offset of restart array (list of fixed32) |
82 | uint32_t const num_restarts_; // Number of uint32_t entries in restart array |
83 | |
84 | // current_ is offset in data_ of current entry. >= restarts_ if !Valid |
85 | uint32_t current_; |
86 | uint32_t restart_index_; // Index of restart block in which current_ falls |
87 | std::string key_; |
88 | Slice value_; |
89 | Status status_; |
90 | |
91 | inline int Compare(const Slice& a, const Slice& b) const { |
92 | return comparator_->Compare(a, b); |
93 | } |
94 | |
95 | // Return the offset in data_ just past the end of the current entry. |
96 | inline uint32_t NextEntryOffset() const { |
97 | return (value_.data() + value_.size()) - data_; |
98 | } |
99 | |
100 | uint32_t GetRestartPoint(uint32_t index) { |
101 | assert(index < num_restarts_); |
102 | return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); |
103 | } |
104 | |
105 | void SeekToRestartPoint(uint32_t index) { |
106 | key_.clear(); |
107 | restart_index_ = index; |
108 | // current_ will be fixed by ParseNextKey(); |
109 | |
110 | // ParseNextKey() starts at the end of value_, so set value_ accordingly |
111 | uint32_t offset = GetRestartPoint(index); |
112 | value_ = Slice(data_ + offset, 0); |
113 | } |
114 | |
115 | public: |
116 | Iter(const Comparator* comparator, const char* data, uint32_t restarts, |
117 | uint32_t num_restarts) |
118 | : comparator_(comparator), |
119 | data_(data), |
120 | restarts_(restarts), |
121 | num_restarts_(num_restarts), |
122 | current_(restarts_), |
123 | restart_index_(num_restarts_) { |
124 | assert(num_restarts_ > 0); |
125 | } |
126 | |
127 | bool Valid() const override { return current_ < restarts_; } |
128 | Status status() const override { return status_; } |
129 | Slice key() const override { |
130 | assert(Valid()); |
131 | return key_; |
132 | } |
133 | Slice value() const override { |
134 | assert(Valid()); |
135 | return value_; |
136 | } |
137 | |
138 | void Next() override { |
139 | assert(Valid()); |
140 | ParseNextKey(); |
141 | } |
142 | |
143 | void Prev() override { |
144 | assert(Valid()); |
145 | |
146 | // Scan backwards to a restart point before current_ |
147 | const uint32_t original = current_; |
148 | while (GetRestartPoint(restart_index_) >= original) { |
149 | if (restart_index_ == 0) { |
150 | // No more entries |
151 | current_ = restarts_; |
152 | restart_index_ = num_restarts_; |
153 | return; |
154 | } |
155 | restart_index_--; |
156 | } |
157 | |
158 | SeekToRestartPoint(restart_index_); |
159 | do { |
160 | // Loop until end of current entry hits the start of original entry |
161 | } while (ParseNextKey() && NextEntryOffset() < original); |
162 | } |
163 | |
164 | void Seek(const Slice& target) override { |
165 | // Binary search in restart array to find the last restart point |
166 | // with a key < target |
167 | uint32_t left = 0; |
168 | uint32_t right = num_restarts_ - 1; |
169 | int current_key_compare = 0; |
170 | |
171 | if (Valid()) { |
172 | // If we're already scanning, use the current position as a starting |
173 | // point. This is beneficial if the key we're seeking to is ahead of the |
174 | // current position. |
175 | current_key_compare = Compare(key_, target); |
176 | if (current_key_compare < 0) { |
177 | // key_ is smaller than target |
178 | left = restart_index_; |
179 | } else if (current_key_compare > 0) { |
180 | right = restart_index_; |
181 | } else { |
182 | // We're seeking to the key we're already at. |
183 | return; |
184 | } |
185 | } |
186 | |
187 | while (left < right) { |
188 | uint32_t mid = (left + right + 1) / 2; |
189 | uint32_t region_offset = GetRestartPoint(mid); |
190 | uint32_t shared, non_shared, value_length; |
191 | const char* key_ptr = |
192 | DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, |
193 | &non_shared, &value_length); |
194 | if (key_ptr == nullptr || (shared != 0)) { |
195 | CorruptionError(); |
196 | return; |
197 | } |
198 | Slice mid_key(key_ptr, non_shared); |
199 | if (Compare(mid_key, target) < 0) { |
200 | // Key at "mid" is smaller than "target". Therefore all |
201 | // blocks before "mid" are uninteresting. |
202 | left = mid; |
203 | } else { |
204 | // Key at "mid" is >= "target". Therefore all blocks at or |
205 | // after "mid" are uninteresting. |
206 | right = mid - 1; |
207 | } |
208 | } |
209 | |
210 | // We might be able to use our current position within the restart block. |
211 | // This is true if we determined the key we desire is in the current block |
212 | // and is after than the current key. |
213 | assert(current_key_compare == 0 || Valid()); |
214 | bool skip_seek = left == restart_index_ && current_key_compare < 0; |
215 | if (!skip_seek) { |
216 | SeekToRestartPoint(left); |
217 | } |
218 | // Linear search (within restart block) for first key >= target |
219 | while (true) { |
220 | if (!ParseNextKey()) { |
221 | return; |
222 | } |
223 | if (Compare(key_, target) >= 0) { |
224 | return; |
225 | } |
226 | } |
227 | } |
228 | |
229 | void SeekToFirst() override { |
230 | SeekToRestartPoint(0); |
231 | ParseNextKey(); |
232 | } |
233 | |
234 | void SeekToLast() override { |
235 | SeekToRestartPoint(num_restarts_ - 1); |
236 | while (ParseNextKey() && NextEntryOffset() < restarts_) { |
237 | // Keep skipping |
238 | } |
239 | } |
240 | |
241 | private: |
242 | void CorruptionError() { |
243 | current_ = restarts_; |
244 | restart_index_ = num_restarts_; |
245 | status_ = Status::Corruption("bad entry in block" ); |
246 | key_.clear(); |
247 | value_.clear(); |
248 | } |
249 | |
250 | bool ParseNextKey() { |
251 | current_ = NextEntryOffset(); |
252 | const char* p = data_ + current_; |
253 | const char* limit = data_ + restarts_; // Restarts come right after data |
254 | if (p >= limit) { |
255 | // No more entries to return. Mark as invalid. |
256 | current_ = restarts_; |
257 | restart_index_ = num_restarts_; |
258 | return false; |
259 | } |
260 | |
261 | // Decode next entry |
262 | uint32_t shared, non_shared, value_length; |
263 | p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); |
264 | if (p == nullptr || key_.size() < shared) { |
265 | CorruptionError(); |
266 | return false; |
267 | } else { |
268 | key_.resize(shared); |
269 | key_.append(p, non_shared); |
270 | value_ = Slice(p + non_shared, value_length); |
271 | while (restart_index_ + 1 < num_restarts_ && |
272 | GetRestartPoint(restart_index_ + 1) < current_) { |
273 | ++restart_index_; |
274 | } |
275 | return true; |
276 | } |
277 | } |
278 | }; |
279 | |
280 | Iterator* Block::NewIterator(const Comparator* comparator) { |
281 | if (size_ < sizeof(uint32_t)) { |
282 | return NewErrorIterator(Status::Corruption("bad block contents" )); |
283 | } |
284 | const uint32_t num_restarts = NumRestarts(); |
285 | if (num_restarts == 0) { |
286 | return NewEmptyIterator(); |
287 | } else { |
288 | return new Iter(comparator, data_, restart_offset_, num_restarts); |
289 | } |
290 | } |
291 | |
292 | } // namespace leveldb |
293 | |