1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4//
5// Decodes the blocks generated by block_builder.cc.
6
7#include "table/block.h"
8
9#include <algorithm>
10#include <cstdint>
11#include <vector>
12
13#include "leveldb/comparator.h"
14#include "table/format.h"
15#include "util/coding.h"
16#include "util/logging.h"
17
18namespace leveldb {
19
20inline uint32_t Block::NumRestarts() const {
21 assert(size_ >= sizeof(uint32_t));
22 return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
23}
24
25Block::Block(const BlockContents& contents)
26 : data_(contents.data.data()),
27 size_(contents.data.size()),
28 owned_(contents.heap_allocated) {
29 if (size_ < sizeof(uint32_t)) {
30 size_ = 0; // Error marker
31 } else {
32 size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t);
33 if (NumRestarts() > max_restarts_allowed) {
34 // The size is too small for NumRestarts()
35 size_ = 0;
36 } else {
37 restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);
38 }
39 }
40}
41
42Block::~Block() {
43 if (owned_) {
44 delete[] data_;
45 }
46}
47
48// Helper routine: decode the next block entry starting at "p",
49// storing the number of shared key bytes, non_shared key bytes,
50// and the length of the value in "*shared", "*non_shared", and
51// "*value_length", respectively. Will not dereference past "limit".
52//
53// If any errors are detected, returns nullptr. Otherwise, returns a
54// pointer to the key delta (just past the three decoded values).
55static inline const char* DecodeEntry(const char* p, const char* limit,
56 uint32_t* shared, uint32_t* non_shared,
57 uint32_t* value_length) {
58 if (limit - p < 3) return nullptr;
59 *shared = reinterpret_cast<const uint8_t*>(p)[0];
60 *non_shared = reinterpret_cast<const uint8_t*>(p)[1];
61 *value_length = reinterpret_cast<const uint8_t*>(p)[2];
62 if ((*shared | *non_shared | *value_length) < 128) {
63 // Fast path: all three values are encoded in one byte each
64 p += 3;
65 } else {
66 if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr;
67 if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr;
68 if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr;
69 }
70
71 if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
72 return nullptr;
73 }
74 return p;
75}
76
77class Block::Iter : public Iterator {
78 private:
79 const Comparator* const comparator_;
80 const char* const data_; // underlying block contents
81 uint32_t const restarts_; // Offset of restart array (list of fixed32)
82 uint32_t const num_restarts_; // Number of uint32_t entries in restart array
83
84 // current_ is offset in data_ of current entry. >= restarts_ if !Valid
85 uint32_t current_;
86 uint32_t restart_index_; // Index of restart block in which current_ falls
87 std::string key_;
88 Slice value_;
89 Status status_;
90
91 inline int Compare(const Slice& a, const Slice& b) const {
92 return comparator_->Compare(a, b);
93 }
94
95 // Return the offset in data_ just past the end of the current entry.
96 inline uint32_t NextEntryOffset() const {
97 return (value_.data() + value_.size()) - data_;
98 }
99
100 uint32_t GetRestartPoint(uint32_t index) {
101 assert(index < num_restarts_);
102 return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
103 }
104
105 void SeekToRestartPoint(uint32_t index) {
106 key_.clear();
107 restart_index_ = index;
108 // current_ will be fixed by ParseNextKey();
109
110 // ParseNextKey() starts at the end of value_, so set value_ accordingly
111 uint32_t offset = GetRestartPoint(index);
112 value_ = Slice(data_ + offset, 0);
113 }
114
115 public:
116 Iter(const Comparator* comparator, const char* data, uint32_t restarts,
117 uint32_t num_restarts)
118 : comparator_(comparator),
119 data_(data),
120 restarts_(restarts),
121 num_restarts_(num_restarts),
122 current_(restarts_),
123 restart_index_(num_restarts_) {
124 assert(num_restarts_ > 0);
125 }
126
127 bool Valid() const override { return current_ < restarts_; }
128 Status status() const override { return status_; }
129 Slice key() const override {
130 assert(Valid());
131 return key_;
132 }
133 Slice value() const override {
134 assert(Valid());
135 return value_;
136 }
137
138 void Next() override {
139 assert(Valid());
140 ParseNextKey();
141 }
142
143 void Prev() override {
144 assert(Valid());
145
146 // Scan backwards to a restart point before current_
147 const uint32_t original = current_;
148 while (GetRestartPoint(restart_index_) >= original) {
149 if (restart_index_ == 0) {
150 // No more entries
151 current_ = restarts_;
152 restart_index_ = num_restarts_;
153 return;
154 }
155 restart_index_--;
156 }
157
158 SeekToRestartPoint(restart_index_);
159 do {
160 // Loop until end of current entry hits the start of original entry
161 } while (ParseNextKey() && NextEntryOffset() < original);
162 }
163
164 void Seek(const Slice& target) override {
165 // Binary search in restart array to find the last restart point
166 // with a key < target
167 uint32_t left = 0;
168 uint32_t right = num_restarts_ - 1;
169 int current_key_compare = 0;
170
171 if (Valid()) {
172 // If we're already scanning, use the current position as a starting
173 // point. This is beneficial if the key we're seeking to is ahead of the
174 // current position.
175 current_key_compare = Compare(key_, target);
176 if (current_key_compare < 0) {
177 // key_ is smaller than target
178 left = restart_index_;
179 } else if (current_key_compare > 0) {
180 right = restart_index_;
181 } else {
182 // We're seeking to the key we're already at.
183 return;
184 }
185 }
186
187 while (left < right) {
188 uint32_t mid = (left + right + 1) / 2;
189 uint32_t region_offset = GetRestartPoint(mid);
190 uint32_t shared, non_shared, value_length;
191 const char* key_ptr =
192 DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
193 &non_shared, &value_length);
194 if (key_ptr == nullptr || (shared != 0)) {
195 CorruptionError();
196 return;
197 }
198 Slice mid_key(key_ptr, non_shared);
199 if (Compare(mid_key, target) < 0) {
200 // Key at "mid" is smaller than "target". Therefore all
201 // blocks before "mid" are uninteresting.
202 left = mid;
203 } else {
204 // Key at "mid" is >= "target". Therefore all blocks at or
205 // after "mid" are uninteresting.
206 right = mid - 1;
207 }
208 }
209
210 // We might be able to use our current position within the restart block.
211 // This is true if we determined the key we desire is in the current block
212 // and is after than the current key.
213 assert(current_key_compare == 0 || Valid());
214 bool skip_seek = left == restart_index_ && current_key_compare < 0;
215 if (!skip_seek) {
216 SeekToRestartPoint(left);
217 }
218 // Linear search (within restart block) for first key >= target
219 while (true) {
220 if (!ParseNextKey()) {
221 return;
222 }
223 if (Compare(key_, target) >= 0) {
224 return;
225 }
226 }
227 }
228
229 void SeekToFirst() override {
230 SeekToRestartPoint(0);
231 ParseNextKey();
232 }
233
234 void SeekToLast() override {
235 SeekToRestartPoint(num_restarts_ - 1);
236 while (ParseNextKey() && NextEntryOffset() < restarts_) {
237 // Keep skipping
238 }
239 }
240
241 private:
242 void CorruptionError() {
243 current_ = restarts_;
244 restart_index_ = num_restarts_;
245 status_ = Status::Corruption("bad entry in block");
246 key_.clear();
247 value_.clear();
248 }
249
250 bool ParseNextKey() {
251 current_ = NextEntryOffset();
252 const char* p = data_ + current_;
253 const char* limit = data_ + restarts_; // Restarts come right after data
254 if (p >= limit) {
255 // No more entries to return. Mark as invalid.
256 current_ = restarts_;
257 restart_index_ = num_restarts_;
258 return false;
259 }
260
261 // Decode next entry
262 uint32_t shared, non_shared, value_length;
263 p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
264 if (p == nullptr || key_.size() < shared) {
265 CorruptionError();
266 return false;
267 } else {
268 key_.resize(shared);
269 key_.append(p, non_shared);
270 value_ = Slice(p + non_shared, value_length);
271 while (restart_index_ + 1 < num_restarts_ &&
272 GetRestartPoint(restart_index_ + 1) < current_) {
273 ++restart_index_;
274 }
275 return true;
276 }
277 }
278};
279
280Iterator* Block::NewIterator(const Comparator* comparator) {
281 if (size_ < sizeof(uint32_t)) {
282 return NewErrorIterator(Status::Corruption("bad block contents"));
283 }
284 const uint32_t num_restarts = NumRestarts();
285 if (num_restarts == 0) {
286 return NewEmptyIterator();
287 } else {
288 return new Iter(comparator, data_, restart_offset_, num_restarts);
289 }
290}
291
292} // namespace leveldb
293