snappy_test_tool.cc source code [snappy/snappy_test_tool.cc]

1	// Copyright 2020 Google Inc. All Rights Reserved.
2	//
3	// Redistribution and use in source and binary forms, with or without
4	// modification, are permitted provided that the following conditions are
5	// met:
6	//
7	// Redistributions of source code must retain the above copyright*
8	// notice, this list of conditions and the following disclaimer.
9	// Redistributions in binary form must reproduce the above*
10	// copyright notice, this list of conditions and the following disclaimer
11	// in the documentation and/or other materials provided with the
12	// distribution.
13	// Neither the name of Google Inc. nor the names of its*
14	// contributors may be used to endorse or promote products derived from
15	// this software without specific prior written permission.
16	//
17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29	#include <algorithm>
30	#include <cmath>
31	#include <cstdint>
32	#include <cstdlib>
33	#include <random>
34	#include <string>
35	#include <utility>
36	#include <vector>
37
38	#include "snappy-test.h"
39
40	#include "snappy-internal.h"
41	#include "snappy-sinksource.h"
42	#include "snappy.h"
43	#include "snappy_test_data.h"
44
45	SNAPPY_FLAG(int32_t, start_len, -`1`,
46	"Starting prefix size for testing (-1: just full file contents)");
47	SNAPPY_FLAG(int32_t, end_len, -`1`,
48	"Starting prefix size for testing (-1: just full file contents)");
49	SNAPPY_FLAG(int32_t, bytes, `10485760`,
50	"How many bytes to compress/uncompress per file for timing");
51
52	SNAPPY_FLAG(bool, zlib, true,
53	"Run zlib compression (http://www.zlib.net)");
54	SNAPPY_FLAG(bool, lzo, true,
55	"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
56	SNAPPY_FLAG(bool, lz4, true,
57	"Run LZ4 compression (https://github.com/lz4/lz4)");
58	SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
59
60	SNAPPY_FLAG(bool, write_compressed, false,
61	"Write compressed versions of each file to <file>.comp");
62	SNAPPY_FLAG(bool, write_uncompressed, false,
63	"Write uncompressed versions of each file to <file>.uncomp");
64
65	namespace snappy {
66
67	namespace {
68
69	#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
70
71	// To test against code that reads beyond its input, this class copies a
72	// string to a newly allocated group of pages, the last of which
73	// is made unreadable via mprotect. Note that we need to allocate the
74	// memory with mmap(), as POSIX allows mprotect() only on memory allocated
75	// with mmap(), and some malloc/posix_memalign implementations expect to
76	// be able to read previously allocated memory while doing heap allocations.
77	class DataEndingAtUnreadablePage {
78	public:
79	explicit DataEndingAtUnreadablePage(const std::string& s) {
80	const size_t page_size = sysconf(_SC_PAGESIZE);
81	const size_t size = s.size();
82	// Round up space for string to a multiple of page_size.
83	size_t space_for_string = (size + page_size - `1`) & ~(page_size - `1`);
84	alloc_size_ = space_for_string + page_size;
85	mem_ = mmap(NULL, alloc_size_,
86	PROT_READ\|PROT_WRITE, MAP_PRIVATE\|MAP_ANONYMOUS, -`1`, `0`);
87	CHECK_NE(MAP_FAILED, mem_);
88	protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
89	char* dst = protected_page_ - size;
90	std::memcpy(dst, s.data(), size);
91	data_ = dst;
92	size_ = size;
93	// Make guard page unreadable.
94	CHECK_EQ(`0`, mprotect(protected_page_, page_size, PROT_NONE));
95	}
96
97	~DataEndingAtUnreadablePage() {
98	const size_t page_size = sysconf(_SC_PAGESIZE);
99	// Undo the mprotect.
100	CHECK_EQ(`0`, mprotect(protected_page_, page_size, PROT_READ\|PROT_WRITE));
101	CHECK_EQ(`0`, munmap(mem_, alloc_size_));
102	}
103
104	const char* data() const { return data_; }
105	size_t size() const { return size_; }
106
107	private:
108	size_t alloc_size_;
109	void* mem_;
110	char* protected_page_;
111	const char* data_;
112	size_t size_;
113	};
114
115	#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
116
117	// Fallback for systems without mmap.
118	using DataEndingAtUnreadablePage = std::string;
119
120	#endif
121
122	enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
123
124	const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
125
126	size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
127	switch (comp) {
128	#ifdef ZLIB_VERSION
129	case ZLIB:
130	return ZLib::MinCompressbufSize(input_size);
131	#endif // ZLIB_VERSION
132
133	#ifdef LZO_VERSION
134	case LZO:
135	return input_size + input_size/`64` + `16` + `3`;
136	#endif // LZO_VERSION
137
138	#ifdef LZ4_VERSION_NUMBER
139	case LZ4:
140	return LZ4_compressBound(input_size);
141	#endif // LZ4_VERSION_NUMBER
142
143	case SNAPPY:
144	return snappy::MaxCompressedLength(input_size);
145
146	default:
147	LOG(FATAL) << "Unknown compression type number " << comp;
148	return `0`;
149	}
150	}
151
152	// Returns true if we successfully compressed, false otherwise.
153	//
154	// If compressed_is_preallocated is set, do not resize the compressed buffer.
155	// This is typically what you want for a benchmark, in order to not spend
156	// time in the memory allocator. If you do set this flag, however,
157	// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
158	// number of bytes, and may contain junk bytes at the end after return.
159	bool Compress(const char* input, size_t input_size, CompressorType comp,
160	std::string* compressed, bool compressed_is_preallocated) {
161	if (!compressed_is_preallocated) {
162	compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
163	}
164
165	switch (comp) {
166	#ifdef ZLIB_VERSION
167	case ZLIB: {
168	ZLib zlib;
169	uLongf destlen = compressed->size();
170	int ret = zlib.Compress(
171	reinterpret_cast<Bytef*>(string_as_array(compressed)),
172	&destlen,
173	reinterpret_cast<const Bytef*>(input),
174	input_size);
175	CHECK_EQ(Z_OK, ret);
176	if (!compressed_is_preallocated) {
177	compressed->resize(destlen);
178	}
179	return true;
180	}
181	#endif // ZLIB_VERSION
182
183	#ifdef LZO_VERSION
184	case LZO: {
185	unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
186	lzo_uint destlen;
187	int ret = lzo1x_1_15_compress(
188	reinterpret_cast<const uint8_t*>(input),
189	input_size,
190	reinterpret_cast<uint8_t*>(string_as_array(compressed)),
191	&destlen,
192	mem);
193	CHECK_EQ(LZO_E_OK, ret);
194	delete[] mem;
195	if (!compressed_is_preallocated) {
196	compressed->resize(destlen);
197	}
198	break;
199	}
200	#endif // LZO_VERSION
201
202	#ifdef LZ4_VERSION_NUMBER
203	case LZ4: {
204	int destlen = compressed->size();
205	destlen = LZ4_compress_default(input, string_as_array(compressed),
206	input_size, destlen);
207	CHECK_NE(destlen, `0`);
208	if (!compressed_is_preallocated) {
209	compressed->resize(destlen);
210	}
211	break;
212	}
213	#endif // LZ4_VERSION_NUMBER
214
215	case SNAPPY: {
216	size_t destlen;
217	snappy::RawCompress(input, input_size,
218	string_as_array(compressed),
219	&destlen);
220	CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
221	if (!compressed_is_preallocated) {
222	compressed->resize(destlen);
223	}
224	break;
225	}
226
227	default: {
228	return false; // the asked-for library wasn't compiled in
229	}
230	}
231	return true;
232	}
233
234	bool Uncompress(const std::string& compressed, CompressorType comp, int size,
235	std::string* output) {
236	// TODO: Switch to [[maybe_unused]] when we can assume C++17.
237	(void)size;
238	switch (comp) {
239	#ifdef ZLIB_VERSION
240	case ZLIB: {
241	output->resize(size);
242	ZLib zlib;
243	uLongf destlen = output->size();
244	int ret = zlib.Uncompress(
245	reinterpret_cast<Bytef*>(string_as_array(output)),
246	&destlen,
247	reinterpret_cast<const Bytef*>(compressed.data()),
248	compressed.size());
249	CHECK_EQ(Z_OK, ret);
250	CHECK_EQ(static_cast<uLongf>(size), destlen);
251	break;
252	}
253	#endif // ZLIB_VERSION
254
255	#ifdef LZO_VERSION
256	case LZO: {
257	output->resize(size);
258	lzo_uint destlen;
259	int ret = lzo1x_decompress(
260	reinterpret_cast<const uint8_t*>(compressed.data()),
261	compressed.size(),
262	reinterpret_cast<uint8_t*>(string_as_array(output)),
263	&destlen,
264	NULL);
265	CHECK_EQ(LZO_E_OK, ret);
266	CHECK_EQ(static_cast<lzo_uint>(size), destlen);
267	break;
268	}
269	#endif // LZO_VERSION
270
271	#ifdef LZ4_VERSION_NUMBER
272	case LZ4: {
273	output->resize(size);
274	int destlen = output->size();
275	destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
276	compressed.size(), destlen);
277	CHECK_NE(destlen, `0`);
278	CHECK_EQ(size, destlen);
279	break;
280	}
281	#endif // LZ4_VERSION_NUMBER
282	case SNAPPY: {
283	snappy::RawUncompress(compressed.data(), compressed.size(),
284	string_as_array(output));
285	break;
286	}
287
288	default: {
289	return false; // the asked-for library wasn't compiled in
290	}
291	}
292	return true;
293	}
294
295	void Measure(const char* data, size_t length, CompressorType comp, int repeats,
296	int block_size) {
297	// Run tests a few time and pick median running times
298	static const int kRuns = `5`;
299	double ctime[kRuns];
300	double utime[kRuns];
301	int compressed_size = `0`;
302
303	{
304	// Chop the input into blocks
305	int num_blocks = (length + block_size - `1`) / block_size;
306	std::vector<const char*> input(num_blocks);
307	std::vector<size_t> input_length(num_blocks);
308	std::vector<std::string> compressed(num_blocks);
309	std::vector<std::string> output(num_blocks);
310	for (int b = `0`; b < num_blocks; ++b) {
311	int input_start = b * block_size;
312	int input_limit = std::min<int>((b+`1`)*block_size, length);
313	input[b] = data+input_start;
314	input_length[b] = input_limit-input_start;
315	}
316
317	// Pre-grow the output buffers so we don't measure string append time.
318	for (std::string& compressed_block : compressed) {
319	compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
320	}
321
322	// First, try one trial compression to make sure the code is compiled in
323	if (!Compress(input[`0`], input_length[`0`], comp, &compressed[`0`], true)) {
324	LOG(WARNING) << "Skipping " << names[comp] << ": "
325	<< "library not compiled in";
326	return;
327	}
328
329	for (int run = `0`; run < kRuns; ++run) {
330	CycleTimer ctimer, utimer;
331
332	// Pre-grow the output buffers so we don't measure string append time.
333	for (std::string& compressed_block : compressed) {
334	compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
335	}
336
337	ctimer.Start();
338	for (int b = `0`; b < num_blocks; ++b) {
339	for (int i = `0`; i < repeats; ++i)
340	Compress(input[b], input_length[b], comp, &compressed[b], true);
341	}
342	ctimer.Stop();
343
344	// Compress once more, with resizing, so we don't leave junk
345	// at the end that will confuse the decompressor.
346	for (int b = `0`; b < num_blocks; ++b) {
347	Compress(input[b], input_length[b], comp, &compressed[b], false);
348	}
349
350	for (int b = `0`; b < num_blocks; ++b) {
351	output[b].resize(input_length[b]);
352	}
353
354	utimer.Start();
355	for (int i = `0`; i < repeats; ++i) {
356	for (int b = `0`; b < num_blocks; ++b)
357	Uncompress(compressed[b], comp, input_length[b], &output[b]);
358	}
359	utimer.Stop();
360
361	ctime[run] = ctimer.Get();
362	utime[run] = utimer.Get();
363	}
364
365	compressed_size = `0`;
366	for (const std::string& compressed_item : compressed) {
367	compressed_size += compressed_item.size();
368	}
369	}
370
371	std::sort(ctime, ctime + kRuns);
372	std::sort(utime, utime + kRuns);
373	const int med = kRuns/`2`;
374
375	float comp_rate = (length / ctime[med]) * repeats / `1048576.0`;
376	float uncomp_rate = (length / utime[med]) * repeats / `1048576.0`;
377	std::string x = names[comp];
378	x += ":";
379	std::string urate = (uncomp_rate >= `0`) ? StrFormat("%.1f", uncomp_rate)
380	: std::string("?");
381	std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
382	"comp %5.1f MB/s uncomp %5s MB/s\n",
383	x.c_str(),
384	block_size/(`1`<<`20`),
385	static_cast<int>(length), static_cast<uint32_t>(compressed_size),
386	(compressed_size * `100.0`) / std::max<int>(`1`, length),
387	comp_rate,
388	urate.c_str());
389	}
390
391	void CompressFile(const char* fname) {
392	std::string fullinput;
393	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
394
395	std::string compressed;
396	Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
397
398	CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
399	file::Defaults()));
400	}
401
402	void UncompressFile(const char* fname) {
403	std::string fullinput;
404	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
405
406	size_t uncompLength;
407	CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
408	&uncompLength));
409
410	std::string uncompressed;
411	uncompressed.resize(uncompLength);
412	CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
413
414	CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
415	file::Defaults()));
416	}
417
418	void MeasureFile(const char* fname) {
419	std::string fullinput;
420	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
421	std::printf("%-40s :\n", fname);
422
423	int start_len = (snappy::GetFlag(FLAGS_start_len) < `0`)
424	? fullinput.size()
425	: snappy::GetFlag(FLAGS_start_len);
426	int end_len = fullinput.size();
427	if (snappy::GetFlag(FLAGS_end_len) >= `0`) {
428	end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
429	}
430	for (int len = start_len; len <= end_len; ++len) {
431	const char* const input = fullinput.data();
432	int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + `1`);
433	if (snappy::GetFlag(FLAGS_zlib))
434	Measure(input, len, ZLIB, repeats, `1024` << `10`);
435	if (snappy::GetFlag(FLAGS_lzo))
436	Measure(input, len, LZO, repeats, `1024` << `10`);
437	if (snappy::GetFlag(FLAGS_lz4))
438	Measure(input, len, LZ4, repeats, `1024` << `10`);
439	if (snappy::GetFlag(FLAGS_snappy))
440	Measure(input, len, SNAPPY, repeats, `4096` << `10`);
441
442	// For block-size based measurements
443	if (`0` && snappy::GetFlag(FLAGS_snappy)) {
444	Measure(input, len, SNAPPY, repeats, `8`<<`10`);
445	Measure(input, len, SNAPPY, repeats, `16`<<`10`);
446	Measure(input, len, SNAPPY, repeats, `32`<<`10`);
447	Measure(input, len, SNAPPY, repeats, `64`<<`10`);
448	Measure(input, len, SNAPPY, repeats, `256`<<`10`);
449	Measure(input, len, SNAPPY, repeats, `1024`<<`10`);
450	}
451	}
452	}
453
454	} // namespace
455
456	} // namespace snappy
457
458	int main(int argc, char** argv) {
459	InitGoogle(argv[`0`], &argc, &argv, true);
460
461	for (int arg = `1`; arg < argc; ++arg) {
462	if (snappy::GetFlag(FLAGS_write_compressed)) {
463	snappy::CompressFile(argv[arg]);
464	} else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
465	snappy::UncompressFile(argv[arg]);
466	} else {
467	snappy::MeasureFile(argv[arg]);
468	}
469	}
470	return `0`;
471	}
472

Browse the source code of snappy/snappy_test_tool.cc