snappy_unittest.cc source code [snappy/snappy_unittest.cc]

1	// Copyright 2005 and onwards Google Inc.
2	//
3	// Redistribution and use in source and binary forms, with or without
4	// modification, are permitted provided that the following conditions are
5	// met:
6	//
7	// Redistributions of source code must retain the above copyright*
8	// notice, this list of conditions and the following disclaimer.
9	// Redistributions in binary form must reproduce the above*
10	// copyright notice, this list of conditions and the following disclaimer
11	// in the documentation and/or other materials provided with the
12	// distribution.
13	// Neither the name of Google Inc. nor the names of its*
14	// contributors may be used to endorse or promote products derived from
15	// this software without specific prior written permission.
16	//
17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29	#include <algorithm>
30	#include <cmath>
31	#include <cstdlib>
32	#include <random>
33	#include <string>
34	#include <utility>
35	#include <vector>
36
37	#include "snappy-test.h"
38
39	#include "gtest/gtest.h"
40
41	#include "snappy-internal.h"
42	#include "snappy-sinksource.h"
43	#include "snappy.h"
44	#include "snappy_test_data.h"
45
46	SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
47	"If true, we print the decompression table during tests.");
48
49	namespace snappy {
50
51	namespace {
52
53	#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
54
55	// To test against code that reads beyond its input, this class copies a
56	// string to a newly allocated group of pages, the last of which
57	// is made unreadable via mprotect. Note that we need to allocate the
58	// memory with mmap(), as POSIX allows mprotect() only on memory allocated
59	// with mmap(), and some malloc/posix_memalign implementations expect to
60	// be able to read previously allocated memory while doing heap allocations.
61	class DataEndingAtUnreadablePage {
62	public:
63	explicit DataEndingAtUnreadablePage(const std::string& s) {
64	const size_t page_size = sysconf(_SC_PAGESIZE);
65	const size_t size = s.size();
66	// Round up space for string to a multiple of page_size.
67	size_t space_for_string = (size + page_size - `1`) & ~(page_size - `1`);
68	alloc_size_ = space_for_string + page_size;
69	mem_ = mmap(NULL, alloc_size_,
70	PROT_READ\|PROT_WRITE, MAP_PRIVATE\|MAP_ANONYMOUS, -`1`, `0`);
71	CHECK_NE(MAP_FAILED, mem_);
72	protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
73	char* dst = protected_page_ - size;
74	std::memcpy(dst, s.data(), size);
75	data_ = dst;
76	size_ = size;
77	// Make guard page unreadable.
78	CHECK_EQ(`0`, mprotect(protected_page_, page_size, PROT_NONE));
79	}
80
81	~DataEndingAtUnreadablePage() {
82	const size_t page_size = sysconf(_SC_PAGESIZE);
83	// Undo the mprotect.
84	CHECK_EQ(`0`, mprotect(protected_page_, page_size, PROT_READ\|PROT_WRITE));
85	CHECK_EQ(`0`, munmap(mem_, alloc_size_));
86	}
87
88	const char* data() const { return data_; }
89	size_t size() const { return size_; }
90
91	private:
92	size_t alloc_size_;
93	void* mem_;
94	char* protected_page_;
95	const char* data_;
96	size_t size_;
97	};
98
99	#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
100
101	// Fallback for systems without mmap.
102	using DataEndingAtUnreadablePage = std::string;
103
104	#endif
105
106	int VerifyString(const std::string& input) {
107	std::string compressed;
108	DataEndingAtUnreadablePage i(input);
109	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
110	CHECK_EQ(written, compressed.size());
111	CHECK_LE(compressed.size(),
112	snappy::MaxCompressedLength(input.size()));
113	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
114
115	std::string uncompressed;
116	DataEndingAtUnreadablePage c(compressed);
117	CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
118	CHECK_EQ(uncompressed, input);
119	return uncompressed.size();
120	}
121
122	void VerifyStringSink(const std::string& input) {
123	std::string compressed;
124	DataEndingAtUnreadablePage i(input);
125	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
126	CHECK_EQ(written, compressed.size());
127	CHECK_LE(compressed.size(),
128	snappy::MaxCompressedLength(input.size()));
129	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
130
131	std::string uncompressed;
132	uncompressed.resize(input.size());
133	snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
134	DataEndingAtUnreadablePage c(compressed);
135	snappy::ByteArraySource source(c.data(), c.size());
136	CHECK(snappy::Uncompress(&source, &sink));
137	CHECK_EQ(uncompressed, input);
138	}
139
140	struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
141	std::minstd_rand0 rng(input.size());
142	std::uniform_int_distribution<size_t> uniform_1_to_10(`1`, `10`);
143	num = uniform_1_to_10(rng);
144	if (input.size() < num) {
145	num = input.size();
146	}
147	struct iovec* iov = new iovec[num];
148	size_t used_so_far = `0`;
149	std::bernoulli_distribution one_in_five(`1.0` / `5`);
150	for (size_t i = `0`; i < num; ++i) {
151	assert(used_so_far < input.size());
152	iov[i].iov_base = buf + used_so_far;
153	if (i == num - `1`) {
154	iov[i].iov_len = input.size() - used_so_far;
155	} else {
156	// Randomly choose to insert a 0 byte entry.
157	if (one_in_five(rng)) {
158	iov[i].iov_len = `0`;
159	} else {
160	std::uniform_int_distribution<size_t> uniform_not_used_so_far(
161	`0`, input.size() - used_so_far - `1`);
162	iov[i].iov_len = uniform_not_used_so_far(rng);
163	}
164	}
165	used_so_far += iov[i].iov_len;
166	}
167	return iov;
168	}
169
170	int VerifyIOVecSource(const std::string& input) {
171	std::string compressed;
172	std::string copy = input;
173	char* buf = const_cast<char*>(copy.data());
174	size_t num = `0`;
175	struct iovec* iov = GetIOVec(input, buf, num);
176	const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
177	CHECK_EQ(written, compressed.size());
178	CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
179	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
180
181	std::string uncompressed;
182	DataEndingAtUnreadablePage c(compressed);
183	CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
184	CHECK_EQ(uncompressed, input);
185	delete[] iov;
186	return uncompressed.size();
187	}
188
189	void VerifyIOVecSink(const std::string& input) {
190	std::string compressed;
191	DataEndingAtUnreadablePage i(input);
192	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
193	CHECK_EQ(written, compressed.size());
194	CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
195	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
196	char* buf = new char[input.size()];
197	size_t num = `0`;
198	struct iovec* iov = GetIOVec(input, buf, num);
199	CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
200	num));
201	CHECK(!memcmp(buf, input.data(), input.size()));
202	delete[] iov;
203	delete[] buf;
204	}
205
206	// Test that data compressed by a compressor that does not
207	// obey block sizes is uncompressed properly.
208	void VerifyNonBlockedCompression(const std::string& input) {
209	if (input.length() > snappy::kBlockSize) {
210	// We cannot test larger blocks than the maximum block size, obviously.
211	return;
212	}
213
214	std::string prefix;
215	Varint::Append32(&prefix, input.size());
216
217	// Setup compression table
218	snappy::internal::WorkingMemory wmem(input.size());
219	int table_size;
220	uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
221
222	// Compress entire input in one shot
223	std::string compressed;
224	compressed += prefix;
225	compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
226	char* dest = string_as_array(&compressed) + prefix.size();
227	char* end = snappy::internal::CompressFragment(input.data(), input.size(),
228	dest, table, table_size);
229	compressed.resize(end - compressed.data());
230
231	// Uncompress into std::string
232	std::string uncomp_str;
233	CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
234	CHECK_EQ(uncomp_str, input);
235
236	// Uncompress using source/sink
237	std::string uncomp_str2;
238	uncomp_str2.resize(input.size());
239	snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
240	snappy::ByteArraySource source(compressed.data(), compressed.size());
241	CHECK(snappy::Uncompress(&source, &sink));
242	CHECK_EQ(uncomp_str2, input);
243
244	// Uncompress into iovec
245	{
246	static const int kNumBlocks = `10`;
247	struct iovec vec[kNumBlocks];
248	const int block_size = `1` + input.size() / kNumBlocks;
249	std::string iovec_data(block_size * kNumBlocks, `'x'`);
250	for (int i = `0`; i < kNumBlocks; ++i) {
251	vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
252	vec[i].iov_len = block_size;
253	}
254	CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
255	vec, kNumBlocks));
256	CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
257	}
258	}
259
260	// Expand the input so that it is at least K times as big as block size
261	std::string Expand(const std::string& input) {
262	static const int K = `3`;
263	std::string data = input;
264	while (data.size() < K * snappy::kBlockSize) {
265	data += input;
266	}
267	return data;
268	}
269
270	int Verify(const std::string& input) {
271	VLOG(`1`) << "Verifying input of size " << input.size();
272
273	// Compress using string based routines
274	const int result = VerifyString(input);
275
276	// Compress using `iovec`-based routines.
277	CHECK_EQ(VerifyIOVecSource(input), result);
278
279	// Verify using sink based routines
280	VerifyStringSink(input);
281
282	VerifyNonBlockedCompression(input);
283	VerifyIOVecSink(input);
284	if (!input.empty()) {
285	const std::string expanded = Expand(input);
286	VerifyNonBlockedCompression(expanded);
287	VerifyIOVecSink(input);
288	}
289
290	return result;
291	}
292
293	bool IsValidCompressedBuffer(const std::string& c) {
294	return snappy::IsValidCompressedBuffer(c.data(), c.size());
295	}
296	bool Uncompress(const std::string& c, std::string* u) {
297	return snappy::Uncompress(c.data(), c.size(), u);
298	}
299
300	// This test checks to ensure that snappy doesn't coredump if it gets
301	// corrupted data.
302	TEST(CorruptedTest, VerifyCorrupted) {
303	std::string source = "making sure we don't crash with corrupted input";
304	VLOG(`1`) << source;
305	std::string dest;
306	std::string uncmp;
307	snappy::Compress(source.data(), source.size(), &dest);
308
309	// Mess around with the data. It's hard to simulate all possible
310	// corruptions; this is just one example ...
311	CHECK_GT(dest.size(), `3`);
312	dest[`1`]--;
313	dest[`3`]++;
314	// this really ought to fail.
315	CHECK(!IsValidCompressedBuffer(dest));
316	CHECK(!Uncompress(dest, &uncmp));
317
318	// This is testing for a security bug - a buffer that decompresses to 100k
319	// but we lie in the snappy header and only reserve 0 bytes of memory :)
320	source.resize(`100000`);
321	for (char& source_char : source) {
322	source_char = `'A'`;
323	}
324	snappy::Compress(source.data(), source.size(), &dest);
325	dest[`0`] = dest[`1`] = dest[`2`] = dest[`3`] = `0`;
326	CHECK(!IsValidCompressedBuffer(dest));
327	CHECK(!Uncompress(dest, &uncmp));
328
329	if (sizeof(void *) == `4`) {
330	// Another security check; check a crazy big length can't DoS us with an
331	// over-allocation.
332	// Currently this is done only for 32-bit builds. On 64-bit builds,
333	// where 3 GB might be an acceptable allocation size, Uncompress()
334	// attempts to decompress, and sometimes causes the test to run out of
335	// memory.
336	dest[`0`] = dest[`1`] = dest[`2`] = dest[`3`] = `'\xff'`;
337	// This decodes to a really large size, i.e., about 3 GB.
338	dest[`4`] = `'k'`;
339	CHECK(!IsValidCompressedBuffer(dest));
340	CHECK(!Uncompress(dest, &uncmp));
341	} else {
342	LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
343	}
344
345	// This decodes to about 2 MB; much smaller, but should still fail.
346	dest[`0`] = dest[`1`] = dest[`2`] = `'\xff'`;
347	dest[`3`] = `0x00`;
348	CHECK(!IsValidCompressedBuffer(dest));
349	CHECK(!Uncompress(dest, &uncmp));
350
351	// try reading stuff in from a bad file.
352	for (int i = `1`; i <= `3`; ++i) {
353	std::string data =
354	ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), `0`);
355	std::string uncmp;
356	// check that we don't return a crazy length
357	size_t ulen;
358	CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
359	\|\| (ulen < (`1`<<`20`)));
360	uint32_t ulen2;
361	snappy::ByteArraySource source(data.data(), data.size());
362	CHECK(!snappy::GetUncompressedLength(&source, &ulen2) \|\|
363	(ulen2 < (`1`<<`20`)));
364	CHECK(!IsValidCompressedBuffer(data));
365	CHECK(!Uncompress(data, &uncmp));
366	}
367	}
368
369	// Helper routines to construct arbitrary compressed strings.
370	// These mirror the compression code in snappy.cc, but are copied
371	// here so that we can bypass some limitations in the how snappy.cc
372	// invokes these routines.
373	void AppendLiteral(std::string* dst, const std::string& literal) {
374	if (literal.empty()) return;
375	int n = literal.size() - `1`;
376	if (n < `60`) {
377	// Fit length in tag byte
378	dst->push_back(`0` \| (n << `2`));
379	} else {
380	// Encode in upcoming bytes
381	char number[`4`];
382	int count = `0`;
383	while (n > `0`) {
384	number[count++] = n & `0xff`;
385	n >>= `8`;
386	}
387	dst->push_back(`0` \| ((`59`+count) << `2`));
388	*dst += std::string(number, count);
389	}
390	*dst += literal;
391	}
392
393	void AppendCopy(std::string* dst, int offset, int length) {
394	while (length > `0`) {
395	// Figure out how much to copy in one shot
396	int to_copy;
397	if (length >= `68`) {
398	to_copy = `64`;
399	} else if (length > `64`) {
400	to_copy = `60`;
401	} else {
402	to_copy = length;
403	}
404	length -= to_copy;
405
406	if ((to_copy >= `4`) && (to_copy < `12`) && (offset < `2048`)) {
407	assert(to_copy-`4` < `8`); // Must fit in 3 bits
408	dst->push_back(`1` \| ((to_copy-`4`) << `2`) \| ((offset >> `8`) << `5`));
409	dst->push_back(offset & `0xff`);
410	} else if (offset < `65536`) {
411	dst->push_back(`2` \| ((to_copy-`1`) << `2`));
412	dst->push_back(offset & `0xff`);
413	dst->push_back(offset >> `8`);
414	} else {
415	dst->push_back(`3` \| ((to_copy-`1`) << `2`));
416	dst->push_back(offset & `0xff`);
417	dst->push_back((offset >> `8`) & `0xff`);
418	dst->push_back((offset >> `16`) & `0xff`);
419	dst->push_back((offset >> `24`) & `0xff`);
420	}
421	}
422	}
423
424	TEST(Snappy, SimpleTests) {
425	Verify("");
426	Verify("a");
427	Verify("ab");
428	Verify("abc");
429
430	Verify("aaaaaaa" + std::string(`16`, `'b'`) + std::string("aaaaa") + "abc");
431	Verify("aaaaaaa" + std::string(`256`, `'b'`) + std::string("aaaaa") + "abc");
432	Verify("aaaaaaa" + std::string(`2047`, `'b'`) + std::string("aaaaa") + "abc");
433	Verify("aaaaaaa" + std::string(`65536`, `'b'`) + std::string("aaaaa") + "abc");
434	Verify("abcaaaaaaa" + std::string(`65536`, `'b'`) + std::string("aaaaa") + "abc");
435	}
436
437	// Regression test for cr/345340892.
438	TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
439	Verify("abcabcabcabcabcabcab");
440	Verify("abcabcabcabcabcabcab0123456789ABCDEF");
441
442	Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
443	Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
444	}
445
446	// Regression test for cr/345340892.
447	TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
448	std::mt19937 rng;
449	std::uniform_int_distribution<int> uniform_byte(`0`, `255`);
450	for (int pattern_size = `1`; pattern_size <= `18`; ++pattern_size) {
451	for (int length = `1`; length <= `64`; ++length) {
452	for (int extra_bytes_after_pattern : {`0`, `1`, `15`, `16`, `128`}) {
453	const int size = pattern_size + length + extra_bytes_after_pattern;
454	std::string input;
455	input.resize(size);
456	for (int i = `0`; i < pattern_size; ++i) {
457	input[i] = `'a'` + i;
458	}
459	for (int i = `0`; i < length; ++i) {
460	input[pattern_size + i] = input[i];
461	}
462	for (int i = `0`; i < extra_bytes_after_pattern; ++i) {
463	input[pattern_size + length + i] =
464	static_cast<char>(uniform_byte(rng));
465	}
466	Verify(input);
467	}
468	}
469	}
470	}
471
472	// Verify max blowup (lots of four-byte copies)
473	TEST(Snappy, MaxBlowup) {
474	std::mt19937 rng;
475	std::uniform_int_distribution<int> uniform_byte(`0`, `255`);
476	std::string input;
477	for (int i = `0`; i < `80000`; ++i)
478	input.push_back(static_cast<char>(uniform_byte(rng)));
479
480	for (int i = `0`; i < `80000`; i += `4`) {
481	std::string four_bytes(input.end() - i - `4`, input.end() - i);
482	input.append(four_bytes);
483	}
484	Verify(input);
485	}
486
487	TEST(Snappy, RandomData) {
488	std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
489	std::uniform_int_distribution<int> uniform_0_to_3(`0`, `3`);
490	std::uniform_int_distribution<int> uniform_0_to_8(`0`, `8`);
491	std::uniform_int_distribution<int> uniform_byte(`0`, `255`);
492	std::uniform_int_distribution<size_t> uniform_4k(`0`, `4095`);
493	std::uniform_int_distribution<size_t> uniform_64k(`0`, `65535`);
494	std::bernoulli_distribution one_in_ten(`1.0` / `10`);
495
496	constexpr int num_ops = `20000`;
497	for (int i = `0`; i < num_ops; ++i) {
498	if ((i % `1000`) == `0`) {
499	VLOG(`0`) << "Random op " << i << " of " << num_ops;
500	}
501
502	std::string x;
503	size_t len = uniform_4k(rng);
504	if (i < `100`) {
505	len = `65536` + uniform_64k(rng);
506	}
507	while (x.size() < len) {
508	int run_len = `1`;
509	if (one_in_ten(rng)) {
510	int skewed_bits = uniform_0_to_8(rng);
511	// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
512	std::uniform_int_distribution<int> skewed_low(`0`,
513	(`1` << skewed_bits) - `1`);
514	run_len = skewed_low(rng);
515	}
516	char c = static_cast<char>(uniform_byte(rng));
517	if (i >= `100`) {
518	int skewed_bits = uniform_0_to_3(rng);
519	// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
520	std::uniform_int_distribution<int> skewed_low(`0`,
521	(`1` << skewed_bits) - `1`);
522	c = static_cast<char>(skewed_low(rng));
523	}
524	while (run_len-- > `0` && x.size() < len) {
525	x.push_back(c);
526	}
527	}
528
529	Verify(x);
530	}
531	}
532
533	TEST(Snappy, FourByteOffset) {
534	// The new compressor cannot generate four-byte offsets since
535	// it chops up the input into 32KB pieces. So we hand-emit the
536	// copy manually.
537
538	// The two fragments that make up the input string.
539	std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
540	std::string fragment2 = "some other string";
541
542	// How many times each fragment is emitted.
543	const int n1 = `2`;
544	const int n2 = `100000` / fragment2.size();
545	const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
546
547	std::string compressed;
548	Varint::Append32(&compressed, length);
549
550	AppendLiteral(&compressed, fragment1);
551	std::string src = fragment1;
552	for (int i = `0`; i < n2; ++i) {
553	AppendLiteral(&compressed, fragment2);
554	src += fragment2;
555	}
556	AppendCopy(&compressed, src.size(), fragment1.size());
557	src += fragment1;
558	CHECK_EQ(length, src.size());
559
560	std::string uncompressed;
561	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
562	CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
563	&uncompressed));
564	CHECK_EQ(uncompressed, src);
565	}
566
567	TEST(Snappy, IOVecSourceEdgeCases) {
568	// Validate that empty leading, trailing, and in-between iovecs are handled:
569	// [] [] ['a'] [] ['b'] [].
570	std::string data = "ab";
571	char* buf = const_cast<char*>(data.data());
572	size_t used_so_far = `0`;
573	static const int kLengths[] = {`0`, `0`, `1`, `0`, `1`, `0`};
574	struct iovec iov[ARRAYSIZE(kLengths)];
575	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
576	iov[i].iov_base = buf + used_so_far;
577	iov[i].iov_len = kLengths[i];
578	used_so_far += kLengths[i];
579	}
580	std::string compressed;
581	snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
582	std::string uncompressed;
583	snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
584	CHECK_EQ(data, uncompressed);
585	}
586
587	TEST(Snappy, IOVecSinkEdgeCases) {
588	// Test some tricky edge cases in the iovec output that are not necessarily
589	// exercised by random tests.
590
591	// Our output blocks look like this initially (the last iovec is bigger
592	// than depicted):
593	// [ ] [ ] [ ] [ ] [ ]
594	static const int kLengths[] = { `2`, `1`, `4`, `8`, `128` };
595
596	struct iovec iov[ARRAYSIZE(kLengths)];
597	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
598	iov[i].iov_base = new char[kLengths[i]];
599	iov[i].iov_len = kLengths[i];
600	}
601
602	std::string compressed;
603	Varint::Append32(&compressed, `22`);
604
605	// A literal whose output crosses three blocks.
606	// [ab] [c] [123 ] [ ] [ ]
607	AppendLiteral(&compressed, "abc123");
608
609	// A copy whose output crosses two blocks (source and destination
610	// segments marked).
611	// [ab] [c] [1231] [23 ] [ ]
612	// ^--^ --
613	AppendCopy(&compressed, `3`, `3`);
614
615	// A copy where the input is, at first, in the block before the output:
616	//
617	// [ab] [c] [1231] [231231 ] [ ]
618	// ^--- ^---
619	// Then during the copy, the pointers move such that the input and
620	// output pointers are in the same block:
621	//
622	// [ab] [c] [1231] [23123123] [ ]
623	// ^- ^-
624	// And then they move again, so that the output pointer is no longer
625	// in the same block as the input pointer:
626	// [ab] [c] [1231] [23123123] [123 ]
627	// ^-- ^--
628	AppendCopy(&compressed, `6`, `9`);
629
630	// Finally, a copy where the input is from several blocks back,
631	// and it also crosses three blocks:
632	//
633	// [ab] [c] [1231] [23123123] [123b ]
634	// ^ ^
635	// [ab] [c] [1231] [23123123] [123bc ]
636	// ^ ^
637	// [ab] [c] [1231] [23123123] [123bc12 ]
638	// ^- ^-
639	AppendCopy(&compressed, `17`, `4`);
640
641	CHECK(snappy::RawUncompressToIOVec(
642	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
643	CHECK_EQ(`0`, memcmp(iov[`0`].iov_base, "ab", `2`));
644	CHECK_EQ(`0`, memcmp(iov[`1`].iov_base, "c", `1`));
645	CHECK_EQ(`0`, memcmp(iov[`2`].iov_base, "1231", `4`));
646	CHECK_EQ(`0`, memcmp(iov[`3`].iov_base, "23123123", `8`));
647	CHECK_EQ(`0`, memcmp(iov[`4`].iov_base, "123bc12", `7`));
648
649	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
650	delete[] reinterpret_cast<char *>(iov[i].iov_base);
651	}
652	}
653
654	TEST(Snappy, IOVecLiteralOverflow) {
655	static const int kLengths[] = { `3`, `4` };
656
657	struct iovec iov[ARRAYSIZE(kLengths)];
658	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
659	iov[i].iov_base = new char[kLengths[i]];
660	iov[i].iov_len = kLengths[i];
661	}
662
663	std::string compressed;
664	Varint::Append32(&compressed, `8`);
665
666	AppendLiteral(&compressed, "12345678");
667
668	CHECK(!snappy::RawUncompressToIOVec(
669	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
670
671	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
672	delete[] reinterpret_cast<char *>(iov[i].iov_base);
673	}
674	}
675
676	TEST(Snappy, IOVecCopyOverflow) {
677	static const int kLengths[] = { `3`, `4` };
678
679	struct iovec iov[ARRAYSIZE(kLengths)];
680	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
681	iov[i].iov_base = new char[kLengths[i]];
682	iov[i].iov_len = kLengths[i];
683	}
684
685	std::string compressed;
686	Varint::Append32(&compressed, `8`);
687
688	AppendLiteral(&compressed, "123");
689	AppendCopy(&compressed, `3`, `5`);
690
691	CHECK(!snappy::RawUncompressToIOVec(
692	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
693
694	for (int i = `0`; i < ARRAYSIZE(kLengths); ++i) {
695	delete[] reinterpret_cast<char *>(iov[i].iov_base);
696	}
697	}
698
699	bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
700	const bool result1 = snappy::GetUncompressedLength(compressed.data(),
701	compressed.size(),
702	ulength);
703
704	snappy::ByteArraySource source(compressed.data(), compressed.size());
705	uint32_t length;
706	const bool result2 = snappy::GetUncompressedLength(&source, &length);
707	CHECK_EQ(result1, result2);
708	return result1;
709	}
710
711	TEST(SnappyCorruption, TruncatedVarint) {
712	std::string compressed, uncompressed;
713	size_t ulength;
714	compressed.push_back(`'\xf0'`);
715	CHECK(!CheckUncompressedLength(compressed, &ulength));
716	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
717	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
718	&uncompressed));
719	}
720
721	TEST(SnappyCorruption, UnterminatedVarint) {
722	std::string compressed, uncompressed;
723	size_t ulength;
724	compressed.push_back(`'\x80'`);
725	compressed.push_back(`'\x80'`);
726	compressed.push_back(`'\x80'`);
727	compressed.push_back(`'\x80'`);
728	compressed.push_back(`'\x80'`);
729	compressed.push_back(`10`);
730	CHECK(!CheckUncompressedLength(compressed, &ulength));
731	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
732	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
733	&uncompressed));
734	}
735
736	TEST(SnappyCorruption, OverflowingVarint) {
737	std::string compressed, uncompressed;
738	size_t ulength;
739	compressed.push_back(`'\xfb'`);
740	compressed.push_back(`'\xff'`);
741	compressed.push_back(`'\xff'`);
742	compressed.push_back(`'\xff'`);
743	compressed.push_back(`'\x7f'`);
744	CHECK(!CheckUncompressedLength(compressed, &ulength));
745	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
746	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
747	&uncompressed));
748	}
749
750	TEST(Snappy, ReadPastEndOfBuffer) {
751	// Check that we do not read past end of input
752
753	// Make a compressed string that ends with a single-byte literal
754	std::string compressed;
755	Varint::Append32(&compressed, `1`);
756	AppendLiteral(&compressed, "x");
757
758	std::string uncompressed;
759	DataEndingAtUnreadablePage c(compressed);
760	CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
761	CHECK_EQ(uncompressed, std::string("x"));
762	}
763
764	// Check for an infinite loop caused by a copy with offset==0
765	TEST(Snappy, ZeroOffsetCopy) {
766	const char* compressed = "\x40\x12\x00\x00";
767	// \x40 Length (must be > kMaxIncrementCopyOverflow)
768	// \x12\x00\x00 Copy with offset==0, length==5
769	char uncompressed[`100`];
770	EXPECT_FALSE(snappy::RawUncompress(compressed, `4`, uncompressed));
771	}
772
773	TEST(Snappy, ZeroOffsetCopyValidation) {
774	const char* compressed = "\x05\x12\x00\x00";
775	// \x05 Length
776	// \x12\x00\x00 Copy with offset==0, length==5
777	EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, `4`));
778	}
779
780	int TestFindMatchLength(const char* s1, const char s2, unsigned* length) {
781	uint64_t data;
782	std::pair<size_t, bool> p =
783	snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
784	CHECK_EQ(p.first < `8`, p.second);
785	return p.first;
786	}
787
788	TEST(Snappy, FindMatchLength) {
789	// Exercise all different code paths through the function.
790	// 64-bit version:
791
792	// Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
793	EXPECT_EQ(`6`, TestFindMatchLength("012345", "012345", `6`));
794	EXPECT_EQ(`11`, TestFindMatchLength("01234567abc", "01234567abc", `11`));
795
796	// Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
797	EXPECT_EQ(`9`, TestFindMatchLength("01234567abc", "01234567axc", `9`));
798
799	// Same, but edge cases.
800	EXPECT_EQ(`11`, TestFindMatchLength("01234567abc!", "01234567abc!", `11`));
801	EXPECT_EQ(`11`, TestFindMatchLength("01234567abc!", "01234567abc?", `11`));
802
803	// Find non-match at once in first loop.
804	EXPECT_EQ(`0`, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", `16`));
805	EXPECT_EQ(`1`, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", `16`));
806	EXPECT_EQ(`4`, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", `16`));
807	EXPECT_EQ(`7`, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", `16`));
808
809	// Find non-match in first loop after one block.
810	EXPECT_EQ(`8`, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
811	"abcdefgh?1234567xxxxxxxx", `24`));
812	EXPECT_EQ(`9`, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
813	"abcdefgh0?234567xxxxxxxx", `24`));
814	EXPECT_EQ(`12`, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
815	"abcdefgh01237654xxxxxxxx", `24`));
816	EXPECT_EQ(`15`, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
817	"abcdefgh0123456?xxxxxxxx", `24`));
818
819	// 32-bit version:
820
821	// Short matches.
822	EXPECT_EQ(`0`, TestFindMatchLength("01234567", "?1234567", `8`));
823	EXPECT_EQ(`1`, TestFindMatchLength("01234567", "0?234567", `8`));
824	EXPECT_EQ(`2`, TestFindMatchLength("01234567", "01?34567", `8`));
825	EXPECT_EQ(`3`, TestFindMatchLength("01234567", "012?4567", `8`));
826	EXPECT_EQ(`4`, TestFindMatchLength("01234567", "0123?567", `8`));
827	EXPECT_EQ(`5`, TestFindMatchLength("01234567", "01234?67", `8`));
828	EXPECT_EQ(`6`, TestFindMatchLength("01234567", "012345?7", `8`));
829	EXPECT_EQ(`7`, TestFindMatchLength("01234567", "0123456?", `8`));
830	EXPECT_EQ(`7`, TestFindMatchLength("01234567", "0123456?", `7`));
831	EXPECT_EQ(`7`, TestFindMatchLength("01234567!", "0123456??", `7`));
832
833	// Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
834	EXPECT_EQ(`10`, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", `10`));
835	EXPECT_EQ(`10`, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", `10`));
836	EXPECT_EQ(`13`, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", `13`));
837
838	// Same, but edge cases.
839	EXPECT_EQ(`12`, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", `12`));
840	EXPECT_EQ(`12`, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", `12`));
841
842	// Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
843	EXPECT_EQ(`11`, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", `13`));
844
845	// Find non-match at once in first loop.
846	EXPECT_EQ(`6`, TestFindMatchLength("xxxxxx0123xxxxxxxx",
847	"xxxxxx?123xxxxxxxx", `18`));
848	EXPECT_EQ(`7`, TestFindMatchLength("xxxxxx0123xxxxxxxx",
849	"xxxxxx0?23xxxxxxxx", `18`));
850	EXPECT_EQ(`8`, TestFindMatchLength("xxxxxx0123xxxxxxxx",
851	"xxxxxx0132xxxxxxxx", `18`));
852	EXPECT_EQ(`9`, TestFindMatchLength("xxxxxx0123xxxxxxxx",
853	"xxxxxx012?xxxxxxxx", `18`));
854
855	// Same, but edge cases.
856	EXPECT_EQ(`6`, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", `10`));
857	EXPECT_EQ(`7`, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", `10`));
858	EXPECT_EQ(`8`, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", `10`));
859	EXPECT_EQ(`9`, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", `10`));
860
861	// Find non-match in first loop after one block.
862	EXPECT_EQ(`10`, TestFindMatchLength("xxxxxxabcd0123xx",
863	"xxxxxxabcd?123xx", `16`));
864	EXPECT_EQ(`11`, TestFindMatchLength("xxxxxxabcd0123xx",
865	"xxxxxxabcd0?23xx", `16`));
866	EXPECT_EQ(`12`, TestFindMatchLength("xxxxxxabcd0123xx",
867	"xxxxxxabcd0132xx", `16`));
868	EXPECT_EQ(`13`, TestFindMatchLength("xxxxxxabcd0123xx",
869	"xxxxxxabcd012?xx", `16`));
870
871	// Same, but edge cases.
872	EXPECT_EQ(`10`, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", `14`));
873	EXPECT_EQ(`11`, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", `14`));
874	EXPECT_EQ(`12`, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", `14`));
875	EXPECT_EQ(`13`, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", `14`));
876	}
877
878	TEST(Snappy, FindMatchLengthRandom) {
879	constexpr int kNumTrials = `10000`;
880	constexpr int kTypicalLength = `10`;
881	std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
882	std::uniform_int_distribution<int> uniform_byte(`0`, `255`);
883	std::bernoulli_distribution one_in_two(`1.0` / `2`);
884	std::bernoulli_distribution one_in_typical_length(`1.0` / kTypicalLength);
885
886	for (int i = `0`; i < kNumTrials; ++i) {
887	std::string s, t;
888	char a = static_cast<char>(uniform_byte(rng));
889	char b = static_cast<char>(uniform_byte(rng));
890	while (!one_in_typical_length(rng)) {
891	s.push_back(one_in_two(rng) ? a : b);
892	t.push_back(one_in_two(rng) ? a : b);
893	}
894	DataEndingAtUnreadablePage u(s);
895	DataEndingAtUnreadablePage v(t);
896	size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
897	if (matched == t.size()) {
898	EXPECT_EQ(s, t);
899	} else {
900	EXPECT_NE(s[matched], t[matched]);
901	for (size_t j = `0`; j < matched; ++j) {
902	EXPECT_EQ(s[j], t[j]);
903	}
904	}
905	}
906	}
907
908	uint16_t MakeEntry(unsigned int extra, unsigned int len,
909	unsigned int copy_offset) {
910	// Check that all of the fields fit within the allocated space
911	assert(extra == (extra & `0x7`)); // At most 3 bits
912	assert(copy_offset == (copy_offset & `0x7`)); // At most 3 bits
913	assert(len == (len & `0x7f`)); // At most 7 bits
914	return len \| (copy_offset << `8`) \| (extra << `11`);
915	}
916
917	// Check that the decompression table is correct, and optionally print out
918	// the computed one.
919	TEST(Snappy, VerifyCharTable) {
920	using snappy::internal::LITERAL;
921	using snappy::internal::COPY_1_BYTE_OFFSET;
922	using snappy::internal::COPY_2_BYTE_OFFSET;
923	using snappy::internal::COPY_4_BYTE_OFFSET;
924	using snappy::internal::char_table;
925
926	uint16_t dst[`256`];
927
928	// Place invalid entries in all places to detect missing initialization
929	int assigned = `0`;
930	for (int i = `0`; i < `256`; ++i) {
931	dst[i] = `0xffff`;
932	}
933
934	// Small LITERAL entries. We store (len-1) in the top 6 bits.
935	for (uint8_t len = `1`; len <= `60`; ++len) {
936	dst[LITERAL \| ((len - `1`) << `2`)] = MakeEntry(`0`, len, `0`);
937	assigned++;
938	}
939
940	// Large LITERAL entries. We use 60..63 in the high 6 bits to
941	// encode the number of bytes of length info that follow the opcode.
942	for (uint8_t extra_bytes = `1`; extra_bytes <= `4`; ++extra_bytes) {
943	// We set the length field in the lookup table to 1 because extra
944	// bytes encode len-1.
945	dst[LITERAL \| ((extra_bytes + `59`) << `2`)] = MakeEntry(extra_bytes, `1`, `0`);
946	assigned++;
947	}
948
949	// COPY_1_BYTE_OFFSET.
950	//
951	// The tag byte in the compressed data stores len-4 in 3 bits, and
952	// offset/256 in 3 bits. offset%256 is stored in the next byte.
953	//
954	// This format is used for length in range [4..11] and offset in
955	// range [0..2047]
956	for (uint8_t len = `4`; len < `12`; ++len) {
957	for (uint16_t offset = `0`; offset < `2048`; offset += `256`) {
958	uint8_t offset_high = static_cast<uint8_t>(offset >> `8`);
959	dst[COPY_1_BYTE_OFFSET \| ((len - `4`) << `2`) \| (offset_high << `5`)] =
960	MakeEntry(`1`, len, offset_high);
961	assigned++;
962	}
963	}
964
965	// COPY_2_BYTE_OFFSET.
966	// Tag contains len-1 in top 6 bits, and offset in next two bytes.
967	for (uint8_t len = `1`; len <= `64`; ++len) {
968	dst[COPY_2_BYTE_OFFSET \| ((len - `1`) << `2`)] = MakeEntry(`2`, len, `0`);
969	assigned++;
970	}
971
972	// COPY_4_BYTE_OFFSET.
973	// Tag contents len-1 in top 6 bits, and offset in next four bytes.
974	for (uint8_t len = `1`; len <= `64`; ++len) {
975	dst[COPY_4_BYTE_OFFSET \| ((len - `1`) << `2`)] = MakeEntry(`4`, len, `0`);
976	assigned++;
977	}
978
979	// Check that each entry was initialized exactly once.
980	EXPECT_EQ(`256`, assigned) << "Assigned only " << assigned << " of 256";
981	for (int i = `0`; i < `256`; ++i) {
982	EXPECT_NE(`0xffff`, dst[i]) << "Did not assign byte " << i;
983	}
984
985	if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
986	std::printf("static const uint16_t char_table[256] = {\n ");
987	for (int i = `0`; i < `256`; ++i) {
988	std::printf("0x%04x%s",
989	dst[i],
990	((i == `255`) ? "\n" : (((i % `8`) == `7`) ? ",\n " : ", ")));
991	}
992	std::printf("};\n");
993	}
994
995	// Check that computed table matched recorded table.
996	for (int i = `0`; i < `256`; ++i) {
997	EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
998	}
999	}
1000
1001	TEST(Snappy, TestBenchmarkFiles) {
1002	for (int i = `0`; i < ARRAYSIZE(kTestDataFiles); ++i) {
1003	Verify(ReadTestDataFile(kTestDataFiles[i].filename,
1004	kTestDataFiles[i].size_limit));
1005	}
1006	}
1007
1008	} // namespace
1009
1010	} // namespace snappy
1011

Browse the source code of snappy/snappy_unittest.cc