1// Copyright 2020 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include <algorithm>
30#include <cmath>
31#include <cstdint>
32#include <cstdlib>
33#include <random>
34#include <string>
35#include <utility>
36#include <vector>
37
38#include "snappy-test.h"
39
40#include "snappy-internal.h"
41#include "snappy-sinksource.h"
42#include "snappy.h"
43#include "snappy_test_data.h"
44
45SNAPPY_FLAG(int32_t, start_len, -1,
46 "Starting prefix size for testing (-1: just full file contents)");
47SNAPPY_FLAG(int32_t, end_len, -1,
48 "Starting prefix size for testing (-1: just full file contents)");
49SNAPPY_FLAG(int32_t, bytes, 10485760,
50 "How many bytes to compress/uncompress per file for timing");
51
52SNAPPY_FLAG(bool, zlib, true,
53 "Run zlib compression (http://www.zlib.net)");
54SNAPPY_FLAG(bool, lzo, true,
55 "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
56SNAPPY_FLAG(bool, lz4, true,
57 "Run LZ4 compression (https://github.com/lz4/lz4)");
58SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
59
60SNAPPY_FLAG(bool, write_compressed, false,
61 "Write compressed versions of each file to <file>.comp");
62SNAPPY_FLAG(bool, write_uncompressed, false,
63 "Write uncompressed versions of each file to <file>.uncomp");
64
65namespace snappy {
66
67namespace {
68
69#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
70
71// To test against code that reads beyond its input, this class copies a
72// string to a newly allocated group of pages, the last of which
73// is made unreadable via mprotect. Note that we need to allocate the
74// memory with mmap(), as POSIX allows mprotect() only on memory allocated
75// with mmap(), and some malloc/posix_memalign implementations expect to
76// be able to read previously allocated memory while doing heap allocations.
77class DataEndingAtUnreadablePage {
78 public:
79 explicit DataEndingAtUnreadablePage(const std::string& s) {
80 const size_t page_size = sysconf(_SC_PAGESIZE);
81 const size_t size = s.size();
82 // Round up space for string to a multiple of page_size.
83 size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
84 alloc_size_ = space_for_string + page_size;
85 mem_ = mmap(NULL, alloc_size_,
86 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
87 CHECK_NE(MAP_FAILED, mem_);
88 protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
89 char* dst = protected_page_ - size;
90 std::memcpy(dst, s.data(), size);
91 data_ = dst;
92 size_ = size;
93 // Make guard page unreadable.
94 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
95 }
96
97 ~DataEndingAtUnreadablePage() {
98 const size_t page_size = sysconf(_SC_PAGESIZE);
99 // Undo the mprotect.
100 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
101 CHECK_EQ(0, munmap(mem_, alloc_size_));
102 }
103
104 const char* data() const { return data_; }
105 size_t size() const { return size_; }
106
107 private:
108 size_t alloc_size_;
109 void* mem_;
110 char* protected_page_;
111 const char* data_;
112 size_t size_;
113};
114
115#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
116
117// Fallback for systems without mmap.
118using DataEndingAtUnreadablePage = std::string;
119
120#endif
121
122enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
123
124const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
125
126size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
127 switch (comp) {
128#ifdef ZLIB_VERSION
129 case ZLIB:
130 return ZLib::MinCompressbufSize(input_size);
131#endif // ZLIB_VERSION
132
133#ifdef LZO_VERSION
134 case LZO:
135 return input_size + input_size/64 + 16 + 3;
136#endif // LZO_VERSION
137
138#ifdef LZ4_VERSION_NUMBER
139 case LZ4:
140 return LZ4_compressBound(input_size);
141#endif // LZ4_VERSION_NUMBER
142
143 case SNAPPY:
144 return snappy::MaxCompressedLength(input_size);
145
146 default:
147 LOG(FATAL) << "Unknown compression type number " << comp;
148 return 0;
149 }
150}
151
152// Returns true if we successfully compressed, false otherwise.
153//
154// If compressed_is_preallocated is set, do not resize the compressed buffer.
155// This is typically what you want for a benchmark, in order to not spend
156// time in the memory allocator. If you do set this flag, however,
157// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
158// number of bytes, and may contain junk bytes at the end after return.
159bool Compress(const char* input, size_t input_size, CompressorType comp,
160 std::string* compressed, bool compressed_is_preallocated) {
161 if (!compressed_is_preallocated) {
162 compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
163 }
164
165 switch (comp) {
166#ifdef ZLIB_VERSION
167 case ZLIB: {
168 ZLib zlib;
169 uLongf destlen = compressed->size();
170 int ret = zlib.Compress(
171 reinterpret_cast<Bytef*>(string_as_array(compressed)),
172 &destlen,
173 reinterpret_cast<const Bytef*>(input),
174 input_size);
175 CHECK_EQ(Z_OK, ret);
176 if (!compressed_is_preallocated) {
177 compressed->resize(destlen);
178 }
179 return true;
180 }
181#endif // ZLIB_VERSION
182
183#ifdef LZO_VERSION
184 case LZO: {
185 unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
186 lzo_uint destlen;
187 int ret = lzo1x_1_15_compress(
188 reinterpret_cast<const uint8_t*>(input),
189 input_size,
190 reinterpret_cast<uint8_t*>(string_as_array(compressed)),
191 &destlen,
192 mem);
193 CHECK_EQ(LZO_E_OK, ret);
194 delete[] mem;
195 if (!compressed_is_preallocated) {
196 compressed->resize(destlen);
197 }
198 break;
199 }
200#endif // LZO_VERSION
201
202#ifdef LZ4_VERSION_NUMBER
203 case LZ4: {
204 int destlen = compressed->size();
205 destlen = LZ4_compress_default(input, string_as_array(compressed),
206 input_size, destlen);
207 CHECK_NE(destlen, 0);
208 if (!compressed_is_preallocated) {
209 compressed->resize(destlen);
210 }
211 break;
212 }
213#endif // LZ4_VERSION_NUMBER
214
215 case SNAPPY: {
216 size_t destlen;
217 snappy::RawCompress(input, input_size,
218 string_as_array(compressed),
219 &destlen);
220 CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
221 if (!compressed_is_preallocated) {
222 compressed->resize(destlen);
223 }
224 break;
225 }
226
227 default: {
228 return false; // the asked-for library wasn't compiled in
229 }
230 }
231 return true;
232}
233
234bool Uncompress(const std::string& compressed, CompressorType comp, int size,
235 std::string* output) {
236 // TODO: Switch to [[maybe_unused]] when we can assume C++17.
237 (void)size;
238 switch (comp) {
239#ifdef ZLIB_VERSION
240 case ZLIB: {
241 output->resize(size);
242 ZLib zlib;
243 uLongf destlen = output->size();
244 int ret = zlib.Uncompress(
245 reinterpret_cast<Bytef*>(string_as_array(output)),
246 &destlen,
247 reinterpret_cast<const Bytef*>(compressed.data()),
248 compressed.size());
249 CHECK_EQ(Z_OK, ret);
250 CHECK_EQ(static_cast<uLongf>(size), destlen);
251 break;
252 }
253#endif // ZLIB_VERSION
254
255#ifdef LZO_VERSION
256 case LZO: {
257 output->resize(size);
258 lzo_uint destlen;
259 int ret = lzo1x_decompress(
260 reinterpret_cast<const uint8_t*>(compressed.data()),
261 compressed.size(),
262 reinterpret_cast<uint8_t*>(string_as_array(output)),
263 &destlen,
264 NULL);
265 CHECK_EQ(LZO_E_OK, ret);
266 CHECK_EQ(static_cast<lzo_uint>(size), destlen);
267 break;
268 }
269#endif // LZO_VERSION
270
271#ifdef LZ4_VERSION_NUMBER
272 case LZ4: {
273 output->resize(size);
274 int destlen = output->size();
275 destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
276 compressed.size(), destlen);
277 CHECK_NE(destlen, 0);
278 CHECK_EQ(size, destlen);
279 break;
280 }
281#endif // LZ4_VERSION_NUMBER
282 case SNAPPY: {
283 snappy::RawUncompress(compressed.data(), compressed.size(),
284 string_as_array(output));
285 break;
286 }
287
288 default: {
289 return false; // the asked-for library wasn't compiled in
290 }
291 }
292 return true;
293}
294
295void Measure(const char* data, size_t length, CompressorType comp, int repeats,
296 int block_size) {
297 // Run tests a few time and pick median running times
298 static const int kRuns = 5;
299 double ctime[kRuns];
300 double utime[kRuns];
301 int compressed_size = 0;
302
303 {
304 // Chop the input into blocks
305 int num_blocks = (length + block_size - 1) / block_size;
306 std::vector<const char*> input(num_blocks);
307 std::vector<size_t> input_length(num_blocks);
308 std::vector<std::string> compressed(num_blocks);
309 std::vector<std::string> output(num_blocks);
310 for (int b = 0; b < num_blocks; ++b) {
311 int input_start = b * block_size;
312 int input_limit = std::min<int>((b+1)*block_size, length);
313 input[b] = data+input_start;
314 input_length[b] = input_limit-input_start;
315 }
316
317 // Pre-grow the output buffers so we don't measure string append time.
318 for (std::string& compressed_block : compressed) {
319 compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
320 }
321
322 // First, try one trial compression to make sure the code is compiled in
323 if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
324 LOG(WARNING) << "Skipping " << names[comp] << ": "
325 << "library not compiled in";
326 return;
327 }
328
329 for (int run = 0; run < kRuns; ++run) {
330 CycleTimer ctimer, utimer;
331
332 // Pre-grow the output buffers so we don't measure string append time.
333 for (std::string& compressed_block : compressed) {
334 compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
335 }
336
337 ctimer.Start();
338 for (int b = 0; b < num_blocks; ++b) {
339 for (int i = 0; i < repeats; ++i)
340 Compress(input[b], input_length[b], comp, &compressed[b], true);
341 }
342 ctimer.Stop();
343
344 // Compress once more, with resizing, so we don't leave junk
345 // at the end that will confuse the decompressor.
346 for (int b = 0; b < num_blocks; ++b) {
347 Compress(input[b], input_length[b], comp, &compressed[b], false);
348 }
349
350 for (int b = 0; b < num_blocks; ++b) {
351 output[b].resize(input_length[b]);
352 }
353
354 utimer.Start();
355 for (int i = 0; i < repeats; ++i) {
356 for (int b = 0; b < num_blocks; ++b)
357 Uncompress(compressed[b], comp, input_length[b], &output[b]);
358 }
359 utimer.Stop();
360
361 ctime[run] = ctimer.Get();
362 utime[run] = utimer.Get();
363 }
364
365 compressed_size = 0;
366 for (const std::string& compressed_item : compressed) {
367 compressed_size += compressed_item.size();
368 }
369 }
370
371 std::sort(ctime, ctime + kRuns);
372 std::sort(utime, utime + kRuns);
373 const int med = kRuns/2;
374
375 float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
376 float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
377 std::string x = names[comp];
378 x += ":";
379 std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
380 : std::string("?");
381 std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
382 "comp %5.1f MB/s uncomp %5s MB/s\n",
383 x.c_str(),
384 block_size/(1<<20),
385 static_cast<int>(length), static_cast<uint32_t>(compressed_size),
386 (compressed_size * 100.0) / std::max<int>(1, length),
387 comp_rate,
388 urate.c_str());
389}
390
391void CompressFile(const char* fname) {
392 std::string fullinput;
393 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
394
395 std::string compressed;
396 Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
397
398 CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
399 file::Defaults()));
400}
401
402void UncompressFile(const char* fname) {
403 std::string fullinput;
404 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
405
406 size_t uncompLength;
407 CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
408 &uncompLength));
409
410 std::string uncompressed;
411 uncompressed.resize(uncompLength);
412 CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
413
414 CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
415 file::Defaults()));
416}
417
418void MeasureFile(const char* fname) {
419 std::string fullinput;
420 CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
421 std::printf("%-40s :\n", fname);
422
423 int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
424 ? fullinput.size()
425 : snappy::GetFlag(FLAGS_start_len);
426 int end_len = fullinput.size();
427 if (snappy::GetFlag(FLAGS_end_len) >= 0) {
428 end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
429 }
430 for (int len = start_len; len <= end_len; ++len) {
431 const char* const input = fullinput.data();
432 int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
433 if (snappy::GetFlag(FLAGS_zlib))
434 Measure(input, len, ZLIB, repeats, 1024 << 10);
435 if (snappy::GetFlag(FLAGS_lzo))
436 Measure(input, len, LZO, repeats, 1024 << 10);
437 if (snappy::GetFlag(FLAGS_lz4))
438 Measure(input, len, LZ4, repeats, 1024 << 10);
439 if (snappy::GetFlag(FLAGS_snappy))
440 Measure(input, len, SNAPPY, repeats, 4096 << 10);
441
442 // For block-size based measurements
443 if (0 && snappy::GetFlag(FLAGS_snappy)) {
444 Measure(input, len, SNAPPY, repeats, 8<<10);
445 Measure(input, len, SNAPPY, repeats, 16<<10);
446 Measure(input, len, SNAPPY, repeats, 32<<10);
447 Measure(input, len, SNAPPY, repeats, 64<<10);
448 Measure(input, len, SNAPPY, repeats, 256<<10);
449 Measure(input, len, SNAPPY, repeats, 1024<<10);
450 }
451 }
452}
453
454} // namespace
455
456} // namespace snappy
457
458int main(int argc, char** argv) {
459 InitGoogle(argv[0], &argc, &argv, true);
460
461 for (int arg = 1; arg < argc; ++arg) {
462 if (snappy::GetFlag(FLAGS_write_compressed)) {
463 snappy::CompressFile(argv[arg]);
464 } else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
465 snappy::UncompressFile(argv[arg]);
466 } else {
467 snappy::MeasureFile(argv[arg]);
468 }
469 }
470 return 0;
471}
472