1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | // |
5 | // BlockBuilder generates blocks where keys are prefix-compressed: |
6 | // |
7 | // When we store a key, we drop the prefix shared with the previous |
8 | // string. This helps reduce the space requirement significantly. |
9 | // Furthermore, once every K keys, we do not apply the prefix |
10 | // compression and store the entire key. We call this a "restart |
11 | // point". The tail end of the block stores the offsets of all of the |
12 | // restart points, and can be used to do a binary search when looking |
13 | // for a particular key. Values are stored as-is (without compression) |
14 | // immediately following the corresponding key. |
15 | // |
16 | // An entry for a particular key-value pair has the form: |
17 | // shared_bytes: varint32 |
18 | // unshared_bytes: varint32 |
19 | // value_length: varint32 |
20 | // key_delta: char[unshared_bytes] |
21 | // value: char[value_length] |
22 | // shared_bytes == 0 for restart points. |
23 | // |
24 | // The trailer of the block has the form: |
25 | // restarts: uint32[num_restarts] |
26 | // num_restarts: uint32 |
27 | // restarts[i] contains the offset within the block of the ith restart point. |
28 | |
29 | #include "table/block_builder.h" |
30 | |
31 | #include <algorithm> |
32 | #include <cassert> |
33 | |
34 | #include "leveldb/comparator.h" |
35 | #include "leveldb/options.h" |
36 | #include "util/coding.h" |
37 | |
38 | namespace leveldb { |
39 | |
40 | BlockBuilder::BlockBuilder(const Options* options) |
41 | : options_(options), restarts_(), counter_(0), finished_(false) { |
42 | assert(options->block_restart_interval >= 1); |
43 | restarts_.push_back(0); // First restart point is at offset 0 |
44 | } |
45 | |
46 | void BlockBuilder::Reset() { |
47 | buffer_.clear(); |
48 | restarts_.clear(); |
49 | restarts_.push_back(0); // First restart point is at offset 0 |
50 | counter_ = 0; |
51 | finished_ = false; |
52 | last_key_.clear(); |
53 | } |
54 | |
55 | size_t BlockBuilder::CurrentSizeEstimate() const { |
56 | return (buffer_.size() + // Raw data buffer |
57 | restarts_.size() * sizeof(uint32_t) + // Restart array |
58 | sizeof(uint32_t)); // Restart array length |
59 | } |
60 | |
61 | Slice BlockBuilder::Finish() { |
62 | // Append restart array |
63 | for (size_t i = 0; i < restarts_.size(); i++) { |
64 | PutFixed32(&buffer_, restarts_[i]); |
65 | } |
66 | PutFixed32(&buffer_, restarts_.size()); |
67 | finished_ = true; |
68 | return Slice(buffer_); |
69 | } |
70 | |
71 | void BlockBuilder::Add(const Slice& key, const Slice& value) { |
72 | Slice last_key_piece(last_key_); |
73 | assert(!finished_); |
74 | assert(counter_ <= options_->block_restart_interval); |
75 | assert(buffer_.empty() // No values yet? |
76 | || options_->comparator->Compare(key, last_key_piece) > 0); |
77 | size_t shared = 0; |
78 | if (counter_ < options_->block_restart_interval) { |
79 | // See how much sharing to do with previous string |
80 | const size_t min_length = std::min(last_key_piece.size(), key.size()); |
81 | while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { |
82 | shared++; |
83 | } |
84 | } else { |
85 | // Restart compression |
86 | restarts_.push_back(buffer_.size()); |
87 | counter_ = 0; |
88 | } |
89 | const size_t non_shared = key.size() - shared; |
90 | |
91 | // Add "<shared><non_shared><value_size>" to buffer_ |
92 | PutVarint32(&buffer_, shared); |
93 | PutVarint32(&buffer_, non_shared); |
94 | PutVarint32(&buffer_, value.size()); |
95 | |
96 | // Add string delta to buffer_ followed by value |
97 | buffer_.append(key.data() + shared, non_shared); |
98 | buffer_.append(value.data(), value.size()); |
99 | |
100 | // Update state |
101 | last_key_.resize(shared); |
102 | last_key_.append(key.data() + shared, non_shared); |
103 | assert(Slice(last_key_) == key); |
104 | counter_++; |
105 | } |
106 | |
107 | } // namespace leveldb |
108 | |