1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Various stubs for the open-source version of Snappy.
30
31#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
34#if HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include <stdint.h>
39
40#include <cassert>
41#include <cstdlib>
42#include <cstring>
43#include <limits>
44#include <string>
45
46#if HAVE_SYS_MMAN_H
47#include <sys/mman.h>
48#endif
49
50#if HAVE_UNISTD_H
51#include <unistd.h>
52#endif
53
54#if defined(_MSC_VER)
55#include <intrin.h>
56#endif // defined(_MSC_VER)
57
58#ifndef __has_feature
59#define __has_feature(x) 0
60#endif
61
62#if __has_feature(memory_sanitizer)
63#include <sanitizer/msan_interface.h>
64#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
65 __msan_unpoison((address), (size))
66#else
67#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
68#endif // __has_feature(memory_sanitizer)
69
70#include "snappy-stubs-public.h"
71
72// Used to enable 64-bit optimized versions of some routines.
73#if defined(__PPC64__) || defined(__powerpc64__)
74#define ARCH_PPC 1
75#elif defined(__aarch64__) || defined(_M_ARM64)
76#define ARCH_ARM 1
77#endif
78
79// Needed by OS X, among others.
80#ifndef MAP_ANONYMOUS
81#define MAP_ANONYMOUS MAP_ANON
82#endif
83
84// The size of an array, if known at compile-time.
85// Will give unexpected results if used on a pointer.
86// We undefine it first, since some compilers already have a definition.
87#ifdef ARRAYSIZE
88#undef ARRAYSIZE
89#endif
90#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
91
92// Static prediction hints.
93#if HAVE_BUILTIN_EXPECT
94#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
95#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
96#else
97#define SNAPPY_PREDICT_FALSE(x) x
98#define SNAPPY_PREDICT_TRUE(x) x
99#endif // HAVE_BUILTIN_EXPECT
100
101// Inlining hints.
102#if HAVE_ATTRIBUTE_ALWAYS_INLINE
103#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
104#else
105#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
106#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE
107
108// Stubbed version of ABSL_FLAG.
109//
110// In the open source version, flags can only be changed at compile time.
111#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
112 flag_type FLAGS_ ## flag_name = default_value
113
114namespace snappy {
115
116// Stubbed version of absl::GetFlag().
117template <typename T>
118inline T GetFlag(T flag) { return flag; }
119
120static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
121static const int64_t kint64max = std::numeric_limits<int64_t>::max();
122
123// Potentially unaligned loads and stores.
124
125inline uint16_t UNALIGNED_LOAD16(const void *p) {
126 // Compiles to a single movzx/ldrh on clang/gcc/msvc.
127 uint16_t v;
128 std::memcpy(&v, p, sizeof(v));
129 return v;
130}
131
132inline uint32_t UNALIGNED_LOAD32(const void *p) {
133 // Compiles to a single mov/ldr on clang/gcc/msvc.
134 uint32_t v;
135 std::memcpy(&v, p, sizeof(v));
136 return v;
137}
138
139inline uint64_t UNALIGNED_LOAD64(const void *p) {
140 // Compiles to a single mov/ldr on clang/gcc/msvc.
141 uint64_t v;
142 std::memcpy(&v, p, sizeof(v));
143 return v;
144}
145
146inline void UNALIGNED_STORE16(void *p, uint16_t v) {
147 // Compiles to a single mov/strh on clang/gcc/msvc.
148 std::memcpy(p, &v, sizeof(v));
149}
150
151inline void UNALIGNED_STORE32(void *p, uint32_t v) {
152 // Compiles to a single mov/str on clang/gcc/msvc.
153 std::memcpy(p, &v, sizeof(v));
154}
155
156inline void UNALIGNED_STORE64(void *p, uint64_t v) {
157 // Compiles to a single mov/str on clang/gcc/msvc.
158 std::memcpy(p, &v, sizeof(v));
159}
160
161// Convert to little-endian storage, opposite of network format.
162// Convert x from host to little endian: x = LittleEndian.FromHost(x);
163// convert x from little endian to host: x = LittleEndian.ToHost(x);
164//
165// Store values into unaligned memory converting to little endian order:
166// LittleEndian.Store16(p, x);
167//
168// Load unaligned values stored in little endian converting to host order:
169// x = LittleEndian.Load16(p);
170class LittleEndian {
171 public:
172 // Functions to do unaligned loads and stores in little-endian order.
173 static inline uint16_t Load16(const void *ptr) {
174 const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
175
176 // Compiles to a single mov/str on recent clang and gcc.
177 return (static_cast<uint16_t>(buffer[0])) |
178 (static_cast<uint16_t>(buffer[1]) << 8);
179 }
180
181 static inline uint32_t Load32(const void *ptr) {
182 const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
183
184 // Compiles to a single mov/str on recent clang and gcc.
185 return (static_cast<uint32_t>(buffer[0])) |
186 (static_cast<uint32_t>(buffer[1]) << 8) |
187 (static_cast<uint32_t>(buffer[2]) << 16) |
188 (static_cast<uint32_t>(buffer[3]) << 24);
189 }
190
191 static inline uint64_t Load64(const void *ptr) {
192 const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
193
194 // Compiles to a single mov/str on recent clang and gcc.
195 return (static_cast<uint64_t>(buffer[0])) |
196 (static_cast<uint64_t>(buffer[1]) << 8) |
197 (static_cast<uint64_t>(buffer[2]) << 16) |
198 (static_cast<uint64_t>(buffer[3]) << 24) |
199 (static_cast<uint64_t>(buffer[4]) << 32) |
200 (static_cast<uint64_t>(buffer[5]) << 40) |
201 (static_cast<uint64_t>(buffer[6]) << 48) |
202 (static_cast<uint64_t>(buffer[7]) << 56);
203 }
204
205 static inline void Store16(void *dst, uint16_t value) {
206 uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
207
208 // Compiles to a single mov/str on recent clang and gcc.
209 buffer[0] = static_cast<uint8_t>(value);
210 buffer[1] = static_cast<uint8_t>(value >> 8);
211 }
212
213 static void Store32(void *dst, uint32_t value) {
214 uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
215
216 // Compiles to a single mov/str on recent clang and gcc.
217 buffer[0] = static_cast<uint8_t>(value);
218 buffer[1] = static_cast<uint8_t>(value >> 8);
219 buffer[2] = static_cast<uint8_t>(value >> 16);
220 buffer[3] = static_cast<uint8_t>(value >> 24);
221 }
222
223 static void Store64(void* dst, uint64_t value) {
224 uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
225
226 // Compiles to a single mov/str on recent clang and gcc.
227 buffer[0] = static_cast<uint8_t>(value);
228 buffer[1] = static_cast<uint8_t>(value >> 8);
229 buffer[2] = static_cast<uint8_t>(value >> 16);
230 buffer[3] = static_cast<uint8_t>(value >> 24);
231 buffer[4] = static_cast<uint8_t>(value >> 32);
232 buffer[5] = static_cast<uint8_t>(value >> 40);
233 buffer[6] = static_cast<uint8_t>(value >> 48);
234 buffer[7] = static_cast<uint8_t>(value >> 56);
235 }
236
237 static inline constexpr bool IsLittleEndian() {
238#if SNAPPY_IS_BIG_ENDIAN
239 return false;
240#else
241 return true;
242#endif // SNAPPY_IS_BIG_ENDIAN
243 }
244};
245
246// Some bit-manipulation functions.
247class Bits {
248 public:
249 // Return floor(log2(n)) for positive integer n.
250 static int Log2FloorNonZero(uint32_t n);
251
252 // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
253 static int Log2Floor(uint32_t n);
254
255 // Return the first set least / most significant bit, 0-indexed. Returns an
256 // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
257 // that it's 0-indexed.
258 static int FindLSBSetNonZero(uint32_t n);
259
260 static int FindLSBSetNonZero64(uint64_t n);
261
262 private:
263 // No copying
264 Bits(const Bits&);
265 void operator=(const Bits&);
266};
267
268#if HAVE_BUILTIN_CTZ
269
270inline int Bits::Log2FloorNonZero(uint32_t n) {
271 assert(n != 0);
272 // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
273 // represents subtraction in base 2 and observes that there's no carry.
274 //
275 // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
276 // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
277 // function body down to _bit_scan_reverse(x).
278 return 31 ^ __builtin_clz(n);
279}
280
281inline int Bits::Log2Floor(uint32_t n) {
282 return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
283}
284
285inline int Bits::FindLSBSetNonZero(uint32_t n) {
286 assert(n != 0);
287 return __builtin_ctz(n);
288}
289
290#elif defined(_MSC_VER)
291
292inline int Bits::Log2FloorNonZero(uint32_t n) {
293 assert(n != 0);
294 // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
295 unsigned long where;
296 _BitScanReverse(&where, n);
297 return static_cast<int>(where);
298}
299
300inline int Bits::Log2Floor(uint32_t n) {
301 // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
302 unsigned long where;
303 if (_BitScanReverse(&where, n))
304 return static_cast<int>(where);
305 return -1;
306}
307
308inline int Bits::FindLSBSetNonZero(uint32_t n) {
309 assert(n != 0);
310 // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
311 unsigned long where;
312 if (_BitScanForward(&where, n))
313 return static_cast<int>(where);
314 return 32;
315}
316
317#else // Portable versions.
318
319inline int Bits::Log2FloorNonZero(uint32_t n) {
320 assert(n != 0);
321
322 int log = 0;
323 uint32_t value = n;
324 for (int i = 4; i >= 0; --i) {
325 int shift = (1 << i);
326 uint32_t x = value >> shift;
327 if (x != 0) {
328 value = x;
329 log += shift;
330 }
331 }
332 assert(value == 1);
333 return log;
334}
335
336inline int Bits::Log2Floor(uint32_t n) {
337 return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
338}
339
340inline int Bits::FindLSBSetNonZero(uint32_t n) {
341 assert(n != 0);
342
343 int rc = 31;
344 for (int i = 4, shift = 1 << 4; i >= 0; --i) {
345 const uint32_t x = n << shift;
346 if (x != 0) {
347 n = x;
348 rc -= shift;
349 }
350 shift >>= 1;
351 }
352 return rc;
353}
354
355#endif // End portable versions.
356
357#if HAVE_BUILTIN_CTZ
358
359inline int Bits::FindLSBSetNonZero64(uint64_t n) {
360 assert(n != 0);
361 return __builtin_ctzll(n);
362}
363
364#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
365// _BitScanForward64() is only available on x64 and ARM64.
366
367inline int Bits::FindLSBSetNonZero64(uint64_t n) {
368 assert(n != 0);
369 // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
370 unsigned long where;
371 if (_BitScanForward64(&where, n))
372 return static_cast<int>(where);
373 return 64;
374}
375
376#else // Portable version.
377
378// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
379inline int Bits::FindLSBSetNonZero64(uint64_t n) {
380 assert(n != 0);
381
382 const uint32_t bottombits = static_cast<uint32_t>(n);
383 if (bottombits == 0) {
384 // Bottom bits are zero, so scan the top bits.
385 return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
386 } else {
387 return FindLSBSetNonZero(bottombits);
388 }
389}
390
391#endif // HAVE_BUILTIN_CTZ
392
393// Variable-length integer encoding.
394class Varint {
395 public:
396 // Maximum lengths of varint encoding of uint32_t.
397 static const int kMax32 = 5;
398
399 // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
400 // Never reads a character at or beyond limit. If a valid/terminated varint32
401 // was found in the range, stores it in *OUTPUT and returns a pointer just
402 // past the last byte of the varint32. Else returns NULL. On success,
403 // "result <= limit".
404 static const char* Parse32WithLimit(const char* ptr, const char* limit,
405 uint32_t* OUTPUT);
406
407 // REQUIRES "ptr" points to a buffer of length sufficient to hold "v".
408 // EFFECTS Encodes "v" into "ptr" and returns a pointer to the
409 // byte just past the last encoded byte.
410 static char* Encode32(char* ptr, uint32_t v);
411
412 // EFFECTS Appends the varint representation of "value" to "*s".
413 static void Append32(std::string* s, uint32_t value);
414};
415
416inline const char* Varint::Parse32WithLimit(const char* p,
417 const char* l,
418 uint32_t* OUTPUT) {
419 const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
420 const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
421 uint32_t b, result;
422 if (ptr >= limit) return NULL;
423 b = *(ptr++); result = b & 127; if (b < 128) goto done;
424 if (ptr >= limit) return NULL;
425 b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done;
426 if (ptr >= limit) return NULL;
427 b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
428 if (ptr >= limit) return NULL;
429 b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
430 if (ptr >= limit) return NULL;
431 b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
432 return NULL; // Value is too long to be a varint32
433 done:
434 *OUTPUT = result;
435 return reinterpret_cast<const char*>(ptr);
436}
437
438inline char* Varint::Encode32(char* sptr, uint32_t v) {
439 // Operate on characters as unsigneds
440 uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr);
441 static const uint8_t B = 128;
442 if (v < (1 << 7)) {
443 *(ptr++) = static_cast<uint8_t>(v);
444 } else if (v < (1 << 14)) {
445 *(ptr++) = static_cast<uint8_t>(v | B);
446 *(ptr++) = static_cast<uint8_t>(v >> 7);
447 } else if (v < (1 << 21)) {
448 *(ptr++) = static_cast<uint8_t>(v | B);
449 *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
450 *(ptr++) = static_cast<uint8_t>(v >> 14);
451 } else if (v < (1 << 28)) {
452 *(ptr++) = static_cast<uint8_t>(v | B);
453 *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
454 *(ptr++) = static_cast<uint8_t>((v >> 14) | B);
455 *(ptr++) = static_cast<uint8_t>(v >> 21);
456 } else {
457 *(ptr++) = static_cast<uint8_t>(v | B);
458 *(ptr++) = static_cast<uint8_t>((v>>7) | B);
459 *(ptr++) = static_cast<uint8_t>((v>>14) | B);
460 *(ptr++) = static_cast<uint8_t>((v>>21) | B);
461 *(ptr++) = static_cast<uint8_t>(v >> 28);
462 }
463 return reinterpret_cast<char*>(ptr);
464}
465
466// If you know the internal layout of the std::string in use, you can
467// replace this function with one that resizes the string without
468// filling the new space with zeros (if applicable) --
469// it will be non-portable but faster.
470inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
471 s->resize(new_size);
472}
473
474// Return a mutable char* pointing to a string's internal buffer,
475// which may not be null-terminated. Writing through this pointer will
476// modify the string.
477//
478// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
479// next call to a string method that invalidates iterators.
480//
481// As of 2006-04, there is no standard-blessed way of getting a
482// mutable reference to a string's internal buffer. However, issue 530
483// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
484// proposes this as the method. It will officially be part of the standard
485// for C++0x. This should already work on all current implementations.
486inline char* string_as_array(std::string* str) {
487 return str->empty() ? NULL : &*str->begin();
488}
489
490} // namespace snappy
491
492#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
493