1// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4//
5// BlockBuilder generates blocks where keys are prefix-compressed:
6//
7// When we store a key, we drop the prefix shared with the previous
8// string. This helps reduce the space requirement significantly.
9// Furthermore, once every K keys, we do not apply the prefix
10// compression and store the entire key. We call this a "restart
11// point". The tail end of the block stores the offsets of all of the
12// restart points, and can be used to do a binary search when looking
13// for a particular key. Values are stored as-is (without compression)
14// immediately following the corresponding key.
15//
16// An entry for a particular key-value pair has the form:
17// shared_bytes: varint32
18// unshared_bytes: varint32
19// value_length: varint32
20// key_delta: char[unshared_bytes]
21// value: char[value_length]
22// shared_bytes == 0 for restart points.
23//
24// The trailer of the block has the form:
25// restarts: uint32[num_restarts]
26// num_restarts: uint32
27// restarts[i] contains the offset within the block of the ith restart point.
28
29#include "table/block_builder.h"
30
31#include <algorithm>
32#include <cassert>
33
34#include "leveldb/comparator.h"
35#include "leveldb/options.h"
36#include "util/coding.h"
37
38namespace leveldb {
39
40BlockBuilder::BlockBuilder(const Options* options)
41 : options_(options), restarts_(), counter_(0), finished_(false) {
42 assert(options->block_restart_interval >= 1);
43 restarts_.push_back(0); // First restart point is at offset 0
44}
45
46void BlockBuilder::Reset() {
47 buffer_.clear();
48 restarts_.clear();
49 restarts_.push_back(0); // First restart point is at offset 0
50 counter_ = 0;
51 finished_ = false;
52 last_key_.clear();
53}
54
55size_t BlockBuilder::CurrentSizeEstimate() const {
56 return (buffer_.size() + // Raw data buffer
57 restarts_.size() * sizeof(uint32_t) + // Restart array
58 sizeof(uint32_t)); // Restart array length
59}
60
61Slice BlockBuilder::Finish() {
62 // Append restart array
63 for (size_t i = 0; i < restarts_.size(); i++) {
64 PutFixed32(&buffer_, restarts_[i]);
65 }
66 PutFixed32(&buffer_, restarts_.size());
67 finished_ = true;
68 return Slice(buffer_);
69}
70
71void BlockBuilder::Add(const Slice& key, const Slice& value) {
72 Slice last_key_piece(last_key_);
73 assert(!finished_);
74 assert(counter_ <= options_->block_restart_interval);
75 assert(buffer_.empty() // No values yet?
76 || options_->comparator->Compare(key, last_key_piece) > 0);
77 size_t shared = 0;
78 if (counter_ < options_->block_restart_interval) {
79 // See how much sharing to do with previous string
80 const size_t min_length = std::min(last_key_piece.size(), key.size());
81 while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
82 shared++;
83 }
84 } else {
85 // Restart compression
86 restarts_.push_back(buffer_.size());
87 counter_ = 0;
88 }
89 const size_t non_shared = key.size() - shared;
90
91 // Add "<shared><non_shared><value_size>" to buffer_
92 PutVarint32(&buffer_, shared);
93 PutVarint32(&buffer_, non_shared);
94 PutVarint32(&buffer_, value.size());
95
96 // Add string delta to buffer_ followed by value
97 buffer_.append(key.data() + shared, non_shared);
98 buffer_.append(value.data(), value.size());
99
100 // Update state
101 last_key_.resize(shared);
102 last_key_.append(key.data() + shared, non_shared);
103 assert(Slice(last_key_) == key);
104 counter_++;
105}
106
107} // namespace leveldb
108