1// Copyright 2005 and onwards Google Inc.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include <algorithm>
30#include <cmath>
31#include <cstdlib>
32#include <random>
33#include <string>
34#include <utility>
35#include <vector>
36
37#include "snappy-test.h"
38
39#include "gtest/gtest.h"
40
41#include "snappy-internal.h"
42#include "snappy-sinksource.h"
43#include "snappy.h"
44#include "snappy_test_data.h"
45
46SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
47 "If true, we print the decompression table during tests.");
48
49namespace snappy {
50
51namespace {
52
53#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
54
55// To test against code that reads beyond its input, this class copies a
56// string to a newly allocated group of pages, the last of which
57// is made unreadable via mprotect. Note that we need to allocate the
58// memory with mmap(), as POSIX allows mprotect() only on memory allocated
59// with mmap(), and some malloc/posix_memalign implementations expect to
60// be able to read previously allocated memory while doing heap allocations.
61class DataEndingAtUnreadablePage {
62 public:
63 explicit DataEndingAtUnreadablePage(const std::string& s) {
64 const size_t page_size = sysconf(_SC_PAGESIZE);
65 const size_t size = s.size();
66 // Round up space for string to a multiple of page_size.
67 size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
68 alloc_size_ = space_for_string + page_size;
69 mem_ = mmap(NULL, alloc_size_,
70 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
71 CHECK_NE(MAP_FAILED, mem_);
72 protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
73 char* dst = protected_page_ - size;
74 std::memcpy(dst, s.data(), size);
75 data_ = dst;
76 size_ = size;
77 // Make guard page unreadable.
78 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
79 }
80
81 ~DataEndingAtUnreadablePage() {
82 const size_t page_size = sysconf(_SC_PAGESIZE);
83 // Undo the mprotect.
84 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
85 CHECK_EQ(0, munmap(mem_, alloc_size_));
86 }
87
88 const char* data() const { return data_; }
89 size_t size() const { return size_; }
90
91 private:
92 size_t alloc_size_;
93 void* mem_;
94 char* protected_page_;
95 const char* data_;
96 size_t size_;
97};
98
99#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
100
101// Fallback for systems without mmap.
102using DataEndingAtUnreadablePage = std::string;
103
104#endif
105
106int VerifyString(const std::string& input) {
107 std::string compressed;
108 DataEndingAtUnreadablePage i(input);
109 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
110 CHECK_EQ(written, compressed.size());
111 CHECK_LE(compressed.size(),
112 snappy::MaxCompressedLength(input.size()));
113 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
114
115 std::string uncompressed;
116 DataEndingAtUnreadablePage c(compressed);
117 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
118 CHECK_EQ(uncompressed, input);
119 return uncompressed.size();
120}
121
122void VerifyStringSink(const std::string& input) {
123 std::string compressed;
124 DataEndingAtUnreadablePage i(input);
125 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
126 CHECK_EQ(written, compressed.size());
127 CHECK_LE(compressed.size(),
128 snappy::MaxCompressedLength(input.size()));
129 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
130
131 std::string uncompressed;
132 uncompressed.resize(input.size());
133 snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
134 DataEndingAtUnreadablePage c(compressed);
135 snappy::ByteArraySource source(c.data(), c.size());
136 CHECK(snappy::Uncompress(&source, &sink));
137 CHECK_EQ(uncompressed, input);
138}
139
140struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
141 std::minstd_rand0 rng(input.size());
142 std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
143 num = uniform_1_to_10(rng);
144 if (input.size() < num) {
145 num = input.size();
146 }
147 struct iovec* iov = new iovec[num];
148 size_t used_so_far = 0;
149 std::bernoulli_distribution one_in_five(1.0 / 5);
150 for (size_t i = 0; i < num; ++i) {
151 assert(used_so_far < input.size());
152 iov[i].iov_base = buf + used_so_far;
153 if (i == num - 1) {
154 iov[i].iov_len = input.size() - used_so_far;
155 } else {
156 // Randomly choose to insert a 0 byte entry.
157 if (one_in_five(rng)) {
158 iov[i].iov_len = 0;
159 } else {
160 std::uniform_int_distribution<size_t> uniform_not_used_so_far(
161 0, input.size() - used_so_far - 1);
162 iov[i].iov_len = uniform_not_used_so_far(rng);
163 }
164 }
165 used_so_far += iov[i].iov_len;
166 }
167 return iov;
168}
169
170int VerifyIOVecSource(const std::string& input) {
171 std::string compressed;
172 std::string copy = input;
173 char* buf = const_cast<char*>(copy.data());
174 size_t num = 0;
175 struct iovec* iov = GetIOVec(input, buf, num);
176 const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
177 CHECK_EQ(written, compressed.size());
178 CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
179 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
180
181 std::string uncompressed;
182 DataEndingAtUnreadablePage c(compressed);
183 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
184 CHECK_EQ(uncompressed, input);
185 delete[] iov;
186 return uncompressed.size();
187}
188
189void VerifyIOVecSink(const std::string& input) {
190 std::string compressed;
191 DataEndingAtUnreadablePage i(input);
192 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
193 CHECK_EQ(written, compressed.size());
194 CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
195 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
196 char* buf = new char[input.size()];
197 size_t num = 0;
198 struct iovec* iov = GetIOVec(input, buf, num);
199 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
200 num));
201 CHECK(!memcmp(buf, input.data(), input.size()));
202 delete[] iov;
203 delete[] buf;
204}
205
206// Test that data compressed by a compressor that does not
207// obey block sizes is uncompressed properly.
208void VerifyNonBlockedCompression(const std::string& input) {
209 if (input.length() > snappy::kBlockSize) {
210 // We cannot test larger blocks than the maximum block size, obviously.
211 return;
212 }
213
214 std::string prefix;
215 Varint::Append32(&prefix, input.size());
216
217 // Setup compression table
218 snappy::internal::WorkingMemory wmem(input.size());
219 int table_size;
220 uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
221
222 // Compress entire input in one shot
223 std::string compressed;
224 compressed += prefix;
225 compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
226 char* dest = string_as_array(&compressed) + prefix.size();
227 char* end = snappy::internal::CompressFragment(input.data(), input.size(),
228 dest, table, table_size);
229 compressed.resize(end - compressed.data());
230
231 // Uncompress into std::string
232 std::string uncomp_str;
233 CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
234 CHECK_EQ(uncomp_str, input);
235
236 // Uncompress using source/sink
237 std::string uncomp_str2;
238 uncomp_str2.resize(input.size());
239 snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
240 snappy::ByteArraySource source(compressed.data(), compressed.size());
241 CHECK(snappy::Uncompress(&source, &sink));
242 CHECK_EQ(uncomp_str2, input);
243
244 // Uncompress into iovec
245 {
246 static const int kNumBlocks = 10;
247 struct iovec vec[kNumBlocks];
248 const int block_size = 1 + input.size() / kNumBlocks;
249 std::string iovec_data(block_size * kNumBlocks, 'x');
250 for (int i = 0; i < kNumBlocks; ++i) {
251 vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
252 vec[i].iov_len = block_size;
253 }
254 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
255 vec, kNumBlocks));
256 CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
257 }
258}
259
260// Expand the input so that it is at least K times as big as block size
261std::string Expand(const std::string& input) {
262 static const int K = 3;
263 std::string data = input;
264 while (data.size() < K * snappy::kBlockSize) {
265 data += input;
266 }
267 return data;
268}
269
270int Verify(const std::string& input) {
271 VLOG(1) << "Verifying input of size " << input.size();
272
273 // Compress using string based routines
274 const int result = VerifyString(input);
275
276 // Compress using `iovec`-based routines.
277 CHECK_EQ(VerifyIOVecSource(input), result);
278
279 // Verify using sink based routines
280 VerifyStringSink(input);
281
282 VerifyNonBlockedCompression(input);
283 VerifyIOVecSink(input);
284 if (!input.empty()) {
285 const std::string expanded = Expand(input);
286 VerifyNonBlockedCompression(expanded);
287 VerifyIOVecSink(input);
288 }
289
290 return result;
291}
292
293bool IsValidCompressedBuffer(const std::string& c) {
294 return snappy::IsValidCompressedBuffer(c.data(), c.size());
295}
296bool Uncompress(const std::string& c, std::string* u) {
297 return snappy::Uncompress(c.data(), c.size(), u);
298}
299
300// This test checks to ensure that snappy doesn't coredump if it gets
301// corrupted data.
302TEST(CorruptedTest, VerifyCorrupted) {
303 std::string source = "making sure we don't crash with corrupted input";
304 VLOG(1) << source;
305 std::string dest;
306 std::string uncmp;
307 snappy::Compress(source.data(), source.size(), &dest);
308
309 // Mess around with the data. It's hard to simulate all possible
310 // corruptions; this is just one example ...
311 CHECK_GT(dest.size(), 3);
312 dest[1]--;
313 dest[3]++;
314 // this really ought to fail.
315 CHECK(!IsValidCompressedBuffer(dest));
316 CHECK(!Uncompress(dest, &uncmp));
317
318 // This is testing for a security bug - a buffer that decompresses to 100k
319 // but we lie in the snappy header and only reserve 0 bytes of memory :)
320 source.resize(100000);
321 for (char& source_char : source) {
322 source_char = 'A';
323 }
324 snappy::Compress(source.data(), source.size(), &dest);
325 dest[0] = dest[1] = dest[2] = dest[3] = 0;
326 CHECK(!IsValidCompressedBuffer(dest));
327 CHECK(!Uncompress(dest, &uncmp));
328
329 if (sizeof(void *) == 4) {
330 // Another security check; check a crazy big length can't DoS us with an
331 // over-allocation.
332 // Currently this is done only for 32-bit builds. On 64-bit builds,
333 // where 3 GB might be an acceptable allocation size, Uncompress()
334 // attempts to decompress, and sometimes causes the test to run out of
335 // memory.
336 dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
337 // This decodes to a really large size, i.e., about 3 GB.
338 dest[4] = 'k';
339 CHECK(!IsValidCompressedBuffer(dest));
340 CHECK(!Uncompress(dest, &uncmp));
341 } else {
342 LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
343 }
344
345 // This decodes to about 2 MB; much smaller, but should still fail.
346 dest[0] = dest[1] = dest[2] = '\xff';
347 dest[3] = 0x00;
348 CHECK(!IsValidCompressedBuffer(dest));
349 CHECK(!Uncompress(dest, &uncmp));
350
351 // try reading stuff in from a bad file.
352 for (int i = 1; i <= 3; ++i) {
353 std::string data =
354 ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
355 std::string uncmp;
356 // check that we don't return a crazy length
357 size_t ulen;
358 CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
359 || (ulen < (1<<20)));
360 uint32_t ulen2;
361 snappy::ByteArraySource source(data.data(), data.size());
362 CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
363 (ulen2 < (1<<20)));
364 CHECK(!IsValidCompressedBuffer(data));
365 CHECK(!Uncompress(data, &uncmp));
366 }
367}
368
369// Helper routines to construct arbitrary compressed strings.
370// These mirror the compression code in snappy.cc, but are copied
371// here so that we can bypass some limitations in the how snappy.cc
372// invokes these routines.
373void AppendLiteral(std::string* dst, const std::string& literal) {
374 if (literal.empty()) return;
375 int n = literal.size() - 1;
376 if (n < 60) {
377 // Fit length in tag byte
378 dst->push_back(0 | (n << 2));
379 } else {
380 // Encode in upcoming bytes
381 char number[4];
382 int count = 0;
383 while (n > 0) {
384 number[count++] = n & 0xff;
385 n >>= 8;
386 }
387 dst->push_back(0 | ((59+count) << 2));
388 *dst += std::string(number, count);
389 }
390 *dst += literal;
391}
392
393void AppendCopy(std::string* dst, int offset, int length) {
394 while (length > 0) {
395 // Figure out how much to copy in one shot
396 int to_copy;
397 if (length >= 68) {
398 to_copy = 64;
399 } else if (length > 64) {
400 to_copy = 60;
401 } else {
402 to_copy = length;
403 }
404 length -= to_copy;
405
406 if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
407 assert(to_copy-4 < 8); // Must fit in 3 bits
408 dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
409 dst->push_back(offset & 0xff);
410 } else if (offset < 65536) {
411 dst->push_back(2 | ((to_copy-1) << 2));
412 dst->push_back(offset & 0xff);
413 dst->push_back(offset >> 8);
414 } else {
415 dst->push_back(3 | ((to_copy-1) << 2));
416 dst->push_back(offset & 0xff);
417 dst->push_back((offset >> 8) & 0xff);
418 dst->push_back((offset >> 16) & 0xff);
419 dst->push_back((offset >> 24) & 0xff);
420 }
421 }
422}
423
424TEST(Snappy, SimpleTests) {
425 Verify("");
426 Verify("a");
427 Verify("ab");
428 Verify("abc");
429
430 Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
431 Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
432 Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
433 Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
434 Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
435}
436
437// Regression test for cr/345340892.
438TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
439 Verify("abcabcabcabcabcabcab");
440 Verify("abcabcabcabcabcabcab0123456789ABCDEF");
441
442 Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
443 Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
444}
445
446// Regression test for cr/345340892.
447TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
448 std::mt19937 rng;
449 std::uniform_int_distribution<int> uniform_byte(0, 255);
450 for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
451 for (int length = 1; length <= 64; ++length) {
452 for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
453 const int size = pattern_size + length + extra_bytes_after_pattern;
454 std::string input;
455 input.resize(size);
456 for (int i = 0; i < pattern_size; ++i) {
457 input[i] = 'a' + i;
458 }
459 for (int i = 0; i < length; ++i) {
460 input[pattern_size + i] = input[i];
461 }
462 for (int i = 0; i < extra_bytes_after_pattern; ++i) {
463 input[pattern_size + length + i] =
464 static_cast<char>(uniform_byte(rng));
465 }
466 Verify(input);
467 }
468 }
469 }
470}
471
472// Verify max blowup (lots of four-byte copies)
473TEST(Snappy, MaxBlowup) {
474 std::mt19937 rng;
475 std::uniform_int_distribution<int> uniform_byte(0, 255);
476 std::string input;
477 for (int i = 0; i < 80000; ++i)
478 input.push_back(static_cast<char>(uniform_byte(rng)));
479
480 for (int i = 0; i < 80000; i += 4) {
481 std::string four_bytes(input.end() - i - 4, input.end() - i);
482 input.append(four_bytes);
483 }
484 Verify(input);
485}
486
487TEST(Snappy, RandomData) {
488 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
489 std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
490 std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
491 std::uniform_int_distribution<int> uniform_byte(0, 255);
492 std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
493 std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
494 std::bernoulli_distribution one_in_ten(1.0 / 10);
495
496 constexpr int num_ops = 20000;
497 for (int i = 0; i < num_ops; ++i) {
498 if ((i % 1000) == 0) {
499 VLOG(0) << "Random op " << i << " of " << num_ops;
500 }
501
502 std::string x;
503 size_t len = uniform_4k(rng);
504 if (i < 100) {
505 len = 65536 + uniform_64k(rng);
506 }
507 while (x.size() < len) {
508 int run_len = 1;
509 if (one_in_ten(rng)) {
510 int skewed_bits = uniform_0_to_8(rng);
511 // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
512 std::uniform_int_distribution<int> skewed_low(0,
513 (1 << skewed_bits) - 1);
514 run_len = skewed_low(rng);
515 }
516 char c = static_cast<char>(uniform_byte(rng));
517 if (i >= 100) {
518 int skewed_bits = uniform_0_to_3(rng);
519 // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
520 std::uniform_int_distribution<int> skewed_low(0,
521 (1 << skewed_bits) - 1);
522 c = static_cast<char>(skewed_low(rng));
523 }
524 while (run_len-- > 0 && x.size() < len) {
525 x.push_back(c);
526 }
527 }
528
529 Verify(x);
530 }
531}
532
533TEST(Snappy, FourByteOffset) {
534 // The new compressor cannot generate four-byte offsets since
535 // it chops up the input into 32KB pieces. So we hand-emit the
536 // copy manually.
537
538 // The two fragments that make up the input string.
539 std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
540 std::string fragment2 = "some other string";
541
542 // How many times each fragment is emitted.
543 const int n1 = 2;
544 const int n2 = 100000 / fragment2.size();
545 const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
546
547 std::string compressed;
548 Varint::Append32(&compressed, length);
549
550 AppendLiteral(&compressed, fragment1);
551 std::string src = fragment1;
552 for (int i = 0; i < n2; ++i) {
553 AppendLiteral(&compressed, fragment2);
554 src += fragment2;
555 }
556 AppendCopy(&compressed, src.size(), fragment1.size());
557 src += fragment1;
558 CHECK_EQ(length, src.size());
559
560 std::string uncompressed;
561 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
562 CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
563 &uncompressed));
564 CHECK_EQ(uncompressed, src);
565}
566
567TEST(Snappy, IOVecSourceEdgeCases) {
568 // Validate that empty leading, trailing, and in-between iovecs are handled:
569 // [] [] ['a'] [] ['b'] [].
570 std::string data = "ab";
571 char* buf = const_cast<char*>(data.data());
572 size_t used_so_far = 0;
573 static const int kLengths[] = {0, 0, 1, 0, 1, 0};
574 struct iovec iov[ARRAYSIZE(kLengths)];
575 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
576 iov[i].iov_base = buf + used_so_far;
577 iov[i].iov_len = kLengths[i];
578 used_so_far += kLengths[i];
579 }
580 std::string compressed;
581 snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
582 std::string uncompressed;
583 snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
584 CHECK_EQ(data, uncompressed);
585}
586
587TEST(Snappy, IOVecSinkEdgeCases) {
588 // Test some tricky edge cases in the iovec output that are not necessarily
589 // exercised by random tests.
590
591 // Our output blocks look like this initially (the last iovec is bigger
592 // than depicted):
593 // [ ] [ ] [ ] [ ] [ ]
594 static const int kLengths[] = { 2, 1, 4, 8, 128 };
595
596 struct iovec iov[ARRAYSIZE(kLengths)];
597 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
598 iov[i].iov_base = new char[kLengths[i]];
599 iov[i].iov_len = kLengths[i];
600 }
601
602 std::string compressed;
603 Varint::Append32(&compressed, 22);
604
605 // A literal whose output crosses three blocks.
606 // [ab] [c] [123 ] [ ] [ ]
607 AppendLiteral(&compressed, "abc123");
608
609 // A copy whose output crosses two blocks (source and destination
610 // segments marked).
611 // [ab] [c] [1231] [23 ] [ ]
612 // ^--^ --
613 AppendCopy(&compressed, 3, 3);
614
615 // A copy where the input is, at first, in the block before the output:
616 //
617 // [ab] [c] [1231] [231231 ] [ ]
618 // ^--- ^---
619 // Then during the copy, the pointers move such that the input and
620 // output pointers are in the same block:
621 //
622 // [ab] [c] [1231] [23123123] [ ]
623 // ^- ^-
624 // And then they move again, so that the output pointer is no longer
625 // in the same block as the input pointer:
626 // [ab] [c] [1231] [23123123] [123 ]
627 // ^-- ^--
628 AppendCopy(&compressed, 6, 9);
629
630 // Finally, a copy where the input is from several blocks back,
631 // and it also crosses three blocks:
632 //
633 // [ab] [c] [1231] [23123123] [123b ]
634 // ^ ^
635 // [ab] [c] [1231] [23123123] [123bc ]
636 // ^ ^
637 // [ab] [c] [1231] [23123123] [123bc12 ]
638 // ^- ^-
639 AppendCopy(&compressed, 17, 4);
640
641 CHECK(snappy::RawUncompressToIOVec(
642 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
643 CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
644 CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
645 CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
646 CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
647 CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
648
649 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
650 delete[] reinterpret_cast<char *>(iov[i].iov_base);
651 }
652}
653
654TEST(Snappy, IOVecLiteralOverflow) {
655 static const int kLengths[] = { 3, 4 };
656
657 struct iovec iov[ARRAYSIZE(kLengths)];
658 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
659 iov[i].iov_base = new char[kLengths[i]];
660 iov[i].iov_len = kLengths[i];
661 }
662
663 std::string compressed;
664 Varint::Append32(&compressed, 8);
665
666 AppendLiteral(&compressed, "12345678");
667
668 CHECK(!snappy::RawUncompressToIOVec(
669 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
670
671 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
672 delete[] reinterpret_cast<char *>(iov[i].iov_base);
673 }
674}
675
676TEST(Snappy, IOVecCopyOverflow) {
677 static const int kLengths[] = { 3, 4 };
678
679 struct iovec iov[ARRAYSIZE(kLengths)];
680 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
681 iov[i].iov_base = new char[kLengths[i]];
682 iov[i].iov_len = kLengths[i];
683 }
684
685 std::string compressed;
686 Varint::Append32(&compressed, 8);
687
688 AppendLiteral(&compressed, "123");
689 AppendCopy(&compressed, 3, 5);
690
691 CHECK(!snappy::RawUncompressToIOVec(
692 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
693
694 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
695 delete[] reinterpret_cast<char *>(iov[i].iov_base);
696 }
697}
698
699bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
700 const bool result1 = snappy::GetUncompressedLength(compressed.data(),
701 compressed.size(),
702 ulength);
703
704 snappy::ByteArraySource source(compressed.data(), compressed.size());
705 uint32_t length;
706 const bool result2 = snappy::GetUncompressedLength(&source, &length);
707 CHECK_EQ(result1, result2);
708 return result1;
709}
710
711TEST(SnappyCorruption, TruncatedVarint) {
712 std::string compressed, uncompressed;
713 size_t ulength;
714 compressed.push_back('\xf0');
715 CHECK(!CheckUncompressedLength(compressed, &ulength));
716 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
717 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
718 &uncompressed));
719}
720
721TEST(SnappyCorruption, UnterminatedVarint) {
722 std::string compressed, uncompressed;
723 size_t ulength;
724 compressed.push_back('\x80');
725 compressed.push_back('\x80');
726 compressed.push_back('\x80');
727 compressed.push_back('\x80');
728 compressed.push_back('\x80');
729 compressed.push_back(10);
730 CHECK(!CheckUncompressedLength(compressed, &ulength));
731 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
732 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
733 &uncompressed));
734}
735
736TEST(SnappyCorruption, OverflowingVarint) {
737 std::string compressed, uncompressed;
738 size_t ulength;
739 compressed.push_back('\xfb');
740 compressed.push_back('\xff');
741 compressed.push_back('\xff');
742 compressed.push_back('\xff');
743 compressed.push_back('\x7f');
744 CHECK(!CheckUncompressedLength(compressed, &ulength));
745 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
746 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
747 &uncompressed));
748}
749
750TEST(Snappy, ReadPastEndOfBuffer) {
751 // Check that we do not read past end of input
752
753 // Make a compressed string that ends with a single-byte literal
754 std::string compressed;
755 Varint::Append32(&compressed, 1);
756 AppendLiteral(&compressed, "x");
757
758 std::string uncompressed;
759 DataEndingAtUnreadablePage c(compressed);
760 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
761 CHECK_EQ(uncompressed, std::string("x"));
762}
763
764// Check for an infinite loop caused by a copy with offset==0
765TEST(Snappy, ZeroOffsetCopy) {
766 const char* compressed = "\x40\x12\x00\x00";
767 // \x40 Length (must be > kMaxIncrementCopyOverflow)
768 // \x12\x00\x00 Copy with offset==0, length==5
769 char uncompressed[100];
770 EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
771}
772
773TEST(Snappy, ZeroOffsetCopyValidation) {
774 const char* compressed = "\x05\x12\x00\x00";
775 // \x05 Length
776 // \x12\x00\x00 Copy with offset==0, length==5
777 EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
778}
779
780int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
781 uint64_t data;
782 std::pair<size_t, bool> p =
783 snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
784 CHECK_EQ(p.first < 8, p.second);
785 return p.first;
786}
787
788TEST(Snappy, FindMatchLength) {
789 // Exercise all different code paths through the function.
790 // 64-bit version:
791
792 // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
793 EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
794 EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
795
796 // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
797 EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
798
799 // Same, but edge cases.
800 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
801 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
802
803 // Find non-match at once in first loop.
804 EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
805 EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
806 EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
807 EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
808
809 // Find non-match in first loop after one block.
810 EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
811 "abcdefgh?1234567xxxxxxxx", 24));
812 EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
813 "abcdefgh0?234567xxxxxxxx", 24));
814 EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
815 "abcdefgh01237654xxxxxxxx", 24));
816 EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
817 "abcdefgh0123456?xxxxxxxx", 24));
818
819 // 32-bit version:
820
821 // Short matches.
822 EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
823 EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
824 EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
825 EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
826 EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
827 EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
828 EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
829 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
830 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
831 EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
832
833 // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
834 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
835 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
836 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
837
838 // Same, but edge cases.
839 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
840 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
841
842 // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
843 EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
844
845 // Find non-match at once in first loop.
846 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
847 "xxxxxx?123xxxxxxxx", 18));
848 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
849 "xxxxxx0?23xxxxxxxx", 18));
850 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
851 "xxxxxx0132xxxxxxxx", 18));
852 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
853 "xxxxxx012?xxxxxxxx", 18));
854
855 // Same, but edge cases.
856 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
857 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
858 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
859 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
860
861 // Find non-match in first loop after one block.
862 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
863 "xxxxxxabcd?123xx", 16));
864 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
865 "xxxxxxabcd0?23xx", 16));
866 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
867 "xxxxxxabcd0132xx", 16));
868 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
869 "xxxxxxabcd012?xx", 16));
870
871 // Same, but edge cases.
872 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
873 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
874 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
875 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
876}
877
878TEST(Snappy, FindMatchLengthRandom) {
879 constexpr int kNumTrials = 10000;
880 constexpr int kTypicalLength = 10;
881 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
882 std::uniform_int_distribution<int> uniform_byte(0, 255);
883 std::bernoulli_distribution one_in_two(1.0 / 2);
884 std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
885
886 for (int i = 0; i < kNumTrials; ++i) {
887 std::string s, t;
888 char a = static_cast<char>(uniform_byte(rng));
889 char b = static_cast<char>(uniform_byte(rng));
890 while (!one_in_typical_length(rng)) {
891 s.push_back(one_in_two(rng) ? a : b);
892 t.push_back(one_in_two(rng) ? a : b);
893 }
894 DataEndingAtUnreadablePage u(s);
895 DataEndingAtUnreadablePage v(t);
896 size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
897 if (matched == t.size()) {
898 EXPECT_EQ(s, t);
899 } else {
900 EXPECT_NE(s[matched], t[matched]);
901 for (size_t j = 0; j < matched; ++j) {
902 EXPECT_EQ(s[j], t[j]);
903 }
904 }
905 }
906}
907
908uint16_t MakeEntry(unsigned int extra, unsigned int len,
909 unsigned int copy_offset) {
910 // Check that all of the fields fit within the allocated space
911 assert(extra == (extra & 0x7)); // At most 3 bits
912 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
913 assert(len == (len & 0x7f)); // At most 7 bits
914 return len | (copy_offset << 8) | (extra << 11);
915}
916
917// Check that the decompression table is correct, and optionally print out
918// the computed one.
919TEST(Snappy, VerifyCharTable) {
920 using snappy::internal::LITERAL;
921 using snappy::internal::COPY_1_BYTE_OFFSET;
922 using snappy::internal::COPY_2_BYTE_OFFSET;
923 using snappy::internal::COPY_4_BYTE_OFFSET;
924 using snappy::internal::char_table;
925
926 uint16_t dst[256];
927
928 // Place invalid entries in all places to detect missing initialization
929 int assigned = 0;
930 for (int i = 0; i < 256; ++i) {
931 dst[i] = 0xffff;
932 }
933
934 // Small LITERAL entries. We store (len-1) in the top 6 bits.
935 for (uint8_t len = 1; len <= 60; ++len) {
936 dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
937 assigned++;
938 }
939
940 // Large LITERAL entries. We use 60..63 in the high 6 bits to
941 // encode the number of bytes of length info that follow the opcode.
942 for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
943 // We set the length field in the lookup table to 1 because extra
944 // bytes encode len-1.
945 dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
946 assigned++;
947 }
948
949 // COPY_1_BYTE_OFFSET.
950 //
951 // The tag byte in the compressed data stores len-4 in 3 bits, and
952 // offset/256 in 3 bits. offset%256 is stored in the next byte.
953 //
954 // This format is used for length in range [4..11] and offset in
955 // range [0..2047]
956 for (uint8_t len = 4; len < 12; ++len) {
957 for (uint16_t offset = 0; offset < 2048; offset += 256) {
958 uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
959 dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
960 MakeEntry(1, len, offset_high);
961 assigned++;
962 }
963 }
964
965 // COPY_2_BYTE_OFFSET.
966 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
967 for (uint8_t len = 1; len <= 64; ++len) {
968 dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
969 assigned++;
970 }
971
972 // COPY_4_BYTE_OFFSET.
973 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
974 for (uint8_t len = 1; len <= 64; ++len) {
975 dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
976 assigned++;
977 }
978
979 // Check that each entry was initialized exactly once.
980 EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
981 for (int i = 0; i < 256; ++i) {
982 EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
983 }
984
985 if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
986 std::printf("static const uint16_t char_table[256] = {\n ");
987 for (int i = 0; i < 256; ++i) {
988 std::printf("0x%04x%s",
989 dst[i],
990 ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
991 }
992 std::printf("};\n");
993 }
994
995 // Check that computed table matched recorded table.
996 for (int i = 0; i < 256; ++i) {
997 EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
998 }
999}
1000
1001TEST(Snappy, TestBenchmarkFiles) {
1002 for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
1003 Verify(ReadTestDataFile(kTestDataFiles[i].filename,
1004 kTestDataFiles[i].size_limit));
1005 }
1006}
1007
1008} // namespace
1009
1010} // namespace snappy
1011