1 | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | // |
3 | // Redistribution and use in source and binary forms, with or without |
4 | // modification, are permitted provided that the following conditions are |
5 | // met: |
6 | // |
7 | // * Redistributions of source code must retain the above copyright |
8 | // notice, this list of conditions and the following disclaimer. |
9 | // * Redistributions in binary form must reproduce the above |
10 | // copyright notice, this list of conditions and the following disclaimer |
11 | // in the documentation and/or other materials provided with the |
12 | // distribution. |
13 | // * Neither the name of Google Inc. nor the names of its |
14 | // contributors may be used to endorse or promote products derived from |
15 | // this software without specific prior written permission. |
16 | // |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | // |
29 | // Various stubs for the open-source version of Snappy. |
30 | |
31 | #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ |
32 | #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ |
33 | |
34 | #if HAVE_CONFIG_H |
35 | #include "config.h" |
36 | #endif |
37 | |
38 | #include <stdint.h> |
39 | |
40 | #include <cassert> |
41 | #include <cstdlib> |
42 | #include <cstring> |
43 | #include <limits> |
44 | #include <string> |
45 | |
46 | #if HAVE_SYS_MMAN_H |
47 | #include <sys/mman.h> |
48 | #endif |
49 | |
50 | #if HAVE_UNISTD_H |
51 | #include <unistd.h> |
52 | #endif |
53 | |
54 | #if defined(_MSC_VER) |
55 | #include <intrin.h> |
56 | #endif // defined(_MSC_VER) |
57 | |
58 | #ifndef __has_feature |
59 | #define __has_feature(x) 0 |
60 | #endif |
61 | |
62 | #if __has_feature(memory_sanitizer) |
63 | #include <sanitizer/msan_interface.h> |
64 | #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ |
65 | __msan_unpoison((address), (size)) |
66 | #else |
67 | #define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ |
68 | #endif // __has_feature(memory_sanitizer) |
69 | |
70 | #include "snappy-stubs-public.h" |
71 | |
72 | // Used to enable 64-bit optimized versions of some routines. |
73 | #if defined(__PPC64__) || defined(__powerpc64__) |
74 | #define ARCH_PPC 1 |
75 | #elif defined(__aarch64__) || defined(_M_ARM64) |
76 | #define ARCH_ARM 1 |
77 | #endif |
78 | |
79 | // Needed by OS X, among others. |
80 | #ifndef MAP_ANONYMOUS |
81 | #define MAP_ANONYMOUS MAP_ANON |
82 | #endif |
83 | |
84 | // The size of an array, if known at compile-time. |
85 | // Will give unexpected results if used on a pointer. |
86 | // We undefine it first, since some compilers already have a definition. |
87 | #ifdef ARRAYSIZE |
88 | #undef ARRAYSIZE |
89 | #endif |
90 | #define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))} |
91 | |
92 | // Static prediction hints. |
93 | #if HAVE_BUILTIN_EXPECT |
94 | #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) |
95 | #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) |
96 | #else |
97 | #define SNAPPY_PREDICT_FALSE(x) x |
98 | #define SNAPPY_PREDICT_TRUE(x) x |
99 | #endif // HAVE_BUILTIN_EXPECT |
100 | |
101 | // Inlining hints. |
102 | #if HAVE_ATTRIBUTE_ALWAYS_INLINE |
103 | #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) |
104 | #else |
105 | #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE |
106 | #endif // HAVE_ATTRIBUTE_ALWAYS_INLINE |
107 | |
108 | // Stubbed version of ABSL_FLAG. |
109 | // |
110 | // In the open source version, flags can only be changed at compile time. |
111 | #define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \ |
112 | flag_type FLAGS_ ## flag_name = default_value |
113 | |
114 | namespace snappy { |
115 | |
116 | // Stubbed version of absl::GetFlag(). |
117 | template <typename T> |
118 | inline T GetFlag(T flag) { return flag; } |
119 | |
120 | static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max(); |
121 | static const int64_t kint64max = std::numeric_limits<int64_t>::max(); |
122 | |
123 | // Potentially unaligned loads and stores. |
124 | |
125 | inline uint16_t UNALIGNED_LOAD16(const void *p) { |
126 | // Compiles to a single movzx/ldrh on clang/gcc/msvc. |
127 | uint16_t v; |
128 | std::memcpy(&v, p, sizeof(v)); |
129 | return v; |
130 | } |
131 | |
132 | inline uint32_t UNALIGNED_LOAD32(const void *p) { |
133 | // Compiles to a single mov/ldr on clang/gcc/msvc. |
134 | uint32_t v; |
135 | std::memcpy(&v, p, sizeof(v)); |
136 | return v; |
137 | } |
138 | |
139 | inline uint64_t UNALIGNED_LOAD64(const void *p) { |
140 | // Compiles to a single mov/ldr on clang/gcc/msvc. |
141 | uint64_t v; |
142 | std::memcpy(&v, p, sizeof(v)); |
143 | return v; |
144 | } |
145 | |
146 | inline void UNALIGNED_STORE16(void *p, uint16_t v) { |
147 | // Compiles to a single mov/strh on clang/gcc/msvc. |
148 | std::memcpy(p, &v, sizeof(v)); |
149 | } |
150 | |
151 | inline void UNALIGNED_STORE32(void *p, uint32_t v) { |
152 | // Compiles to a single mov/str on clang/gcc/msvc. |
153 | std::memcpy(p, &v, sizeof(v)); |
154 | } |
155 | |
156 | inline void UNALIGNED_STORE64(void *p, uint64_t v) { |
157 | // Compiles to a single mov/str on clang/gcc/msvc. |
158 | std::memcpy(p, &v, sizeof(v)); |
159 | } |
160 | |
161 | // Convert to little-endian storage, opposite of network format. |
162 | // Convert x from host to little endian: x = LittleEndian.FromHost(x); |
163 | // convert x from little endian to host: x = LittleEndian.ToHost(x); |
164 | // |
165 | // Store values into unaligned memory converting to little endian order: |
166 | // LittleEndian.Store16(p, x); |
167 | // |
168 | // Load unaligned values stored in little endian converting to host order: |
169 | // x = LittleEndian.Load16(p); |
170 | class LittleEndian { |
171 | public: |
172 | // Functions to do unaligned loads and stores in little-endian order. |
173 | static inline uint16_t Load16(const void *ptr) { |
174 | const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr); |
175 | |
176 | // Compiles to a single mov/str on recent clang and gcc. |
177 | return (static_cast<uint16_t>(buffer[0])) | |
178 | (static_cast<uint16_t>(buffer[1]) << 8); |
179 | } |
180 | |
181 | static inline uint32_t Load32(const void *ptr) { |
182 | const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr); |
183 | |
184 | // Compiles to a single mov/str on recent clang and gcc. |
185 | return (static_cast<uint32_t>(buffer[0])) | |
186 | (static_cast<uint32_t>(buffer[1]) << 8) | |
187 | (static_cast<uint32_t>(buffer[2]) << 16) | |
188 | (static_cast<uint32_t>(buffer[3]) << 24); |
189 | } |
190 | |
191 | static inline uint64_t Load64(const void *ptr) { |
192 | const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr); |
193 | |
194 | // Compiles to a single mov/str on recent clang and gcc. |
195 | return (static_cast<uint64_t>(buffer[0])) | |
196 | (static_cast<uint64_t>(buffer[1]) << 8) | |
197 | (static_cast<uint64_t>(buffer[2]) << 16) | |
198 | (static_cast<uint64_t>(buffer[3]) << 24) | |
199 | (static_cast<uint64_t>(buffer[4]) << 32) | |
200 | (static_cast<uint64_t>(buffer[5]) << 40) | |
201 | (static_cast<uint64_t>(buffer[6]) << 48) | |
202 | (static_cast<uint64_t>(buffer[7]) << 56); |
203 | } |
204 | |
205 | static inline void Store16(void *dst, uint16_t value) { |
206 | uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst); |
207 | |
208 | // Compiles to a single mov/str on recent clang and gcc. |
209 | buffer[0] = static_cast<uint8_t>(value); |
210 | buffer[1] = static_cast<uint8_t>(value >> 8); |
211 | } |
212 | |
213 | static void Store32(void *dst, uint32_t value) { |
214 | uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst); |
215 | |
216 | // Compiles to a single mov/str on recent clang and gcc. |
217 | buffer[0] = static_cast<uint8_t>(value); |
218 | buffer[1] = static_cast<uint8_t>(value >> 8); |
219 | buffer[2] = static_cast<uint8_t>(value >> 16); |
220 | buffer[3] = static_cast<uint8_t>(value >> 24); |
221 | } |
222 | |
223 | static void Store64(void* dst, uint64_t value) { |
224 | uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst); |
225 | |
226 | // Compiles to a single mov/str on recent clang and gcc. |
227 | buffer[0] = static_cast<uint8_t>(value); |
228 | buffer[1] = static_cast<uint8_t>(value >> 8); |
229 | buffer[2] = static_cast<uint8_t>(value >> 16); |
230 | buffer[3] = static_cast<uint8_t>(value >> 24); |
231 | buffer[4] = static_cast<uint8_t>(value >> 32); |
232 | buffer[5] = static_cast<uint8_t>(value >> 40); |
233 | buffer[6] = static_cast<uint8_t>(value >> 48); |
234 | buffer[7] = static_cast<uint8_t>(value >> 56); |
235 | } |
236 | |
237 | static inline constexpr bool IsLittleEndian() { |
238 | #if SNAPPY_IS_BIG_ENDIAN |
239 | return false; |
240 | #else |
241 | return true; |
242 | #endif // SNAPPY_IS_BIG_ENDIAN |
243 | } |
244 | }; |
245 | |
246 | // Some bit-manipulation functions. |
247 | class Bits { |
248 | public: |
249 | // Return floor(log2(n)) for positive integer n. |
250 | static int Log2FloorNonZero(uint32_t n); |
251 | |
252 | // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. |
253 | static int Log2Floor(uint32_t n); |
254 | |
255 | // Return the first set least / most significant bit, 0-indexed. Returns an |
256 | // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except |
257 | // that it's 0-indexed. |
258 | static int FindLSBSetNonZero(uint32_t n); |
259 | |
260 | static int FindLSBSetNonZero64(uint64_t n); |
261 | |
262 | private: |
263 | // No copying |
264 | Bits(const Bits&); |
265 | void operator=(const Bits&); |
266 | }; |
267 | |
268 | #if HAVE_BUILTIN_CTZ |
269 | |
270 | inline int Bits::Log2FloorNonZero(uint32_t n) { |
271 | assert(n != 0); |
272 | // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof |
273 | // represents subtraction in base 2 and observes that there's no carry. |
274 | // |
275 | // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). |
276 | // Using "31 ^" here instead of "31 -" allows the optimizer to strip the |
277 | // function body down to _bit_scan_reverse(x). |
278 | return 31 ^ __builtin_clz(n); |
279 | } |
280 | |
281 | inline int Bits::Log2Floor(uint32_t n) { |
282 | return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); |
283 | } |
284 | |
285 | inline int Bits::FindLSBSetNonZero(uint32_t n) { |
286 | assert(n != 0); |
287 | return __builtin_ctz(n); |
288 | } |
289 | |
290 | #elif defined(_MSC_VER) |
291 | |
292 | inline int Bits::Log2FloorNonZero(uint32_t n) { |
293 | assert(n != 0); |
294 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. |
295 | unsigned long where; |
296 | _BitScanReverse(&where, n); |
297 | return static_cast<int>(where); |
298 | } |
299 | |
300 | inline int Bits::Log2Floor(uint32_t n) { |
301 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. |
302 | unsigned long where; |
303 | if (_BitScanReverse(&where, n)) |
304 | return static_cast<int>(where); |
305 | return -1; |
306 | } |
307 | |
308 | inline int Bits::FindLSBSetNonZero(uint32_t n) { |
309 | assert(n != 0); |
310 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. |
311 | unsigned long where; |
312 | if (_BitScanForward(&where, n)) |
313 | return static_cast<int>(where); |
314 | return 32; |
315 | } |
316 | |
317 | #else // Portable versions. |
318 | |
319 | inline int Bits::Log2FloorNonZero(uint32_t n) { |
320 | assert(n != 0); |
321 | |
322 | int log = 0; |
323 | uint32_t value = n; |
324 | for (int i = 4; i >= 0; --i) { |
325 | int shift = (1 << i); |
326 | uint32_t x = value >> shift; |
327 | if (x != 0) { |
328 | value = x; |
329 | log += shift; |
330 | } |
331 | } |
332 | assert(value == 1); |
333 | return log; |
334 | } |
335 | |
336 | inline int Bits::Log2Floor(uint32_t n) { |
337 | return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); |
338 | } |
339 | |
340 | inline int Bits::FindLSBSetNonZero(uint32_t n) { |
341 | assert(n != 0); |
342 | |
343 | int rc = 31; |
344 | for (int i = 4, shift = 1 << 4; i >= 0; --i) { |
345 | const uint32_t x = n << shift; |
346 | if (x != 0) { |
347 | n = x; |
348 | rc -= shift; |
349 | } |
350 | shift >>= 1; |
351 | } |
352 | return rc; |
353 | } |
354 | |
355 | #endif // End portable versions. |
356 | |
357 | #if HAVE_BUILTIN_CTZ |
358 | |
359 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { |
360 | assert(n != 0); |
361 | return __builtin_ctzll(n); |
362 | } |
363 | |
364 | #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) |
365 | // _BitScanForward64() is only available on x64 and ARM64. |
366 | |
367 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { |
368 | assert(n != 0); |
369 | // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. |
370 | unsigned long where; |
371 | if (_BitScanForward64(&where, n)) |
372 | return static_cast<int>(where); |
373 | return 64; |
374 | } |
375 | |
376 | #else // Portable version. |
377 | |
378 | // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). |
379 | inline int Bits::FindLSBSetNonZero64(uint64_t n) { |
380 | assert(n != 0); |
381 | |
382 | const uint32_t bottombits = static_cast<uint32_t>(n); |
383 | if (bottombits == 0) { |
384 | // Bottom bits are zero, so scan the top bits. |
385 | return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32)); |
386 | } else { |
387 | return FindLSBSetNonZero(bottombits); |
388 | } |
389 | } |
390 | |
391 | #endif // HAVE_BUILTIN_CTZ |
392 | |
393 | // Variable-length integer encoding. |
394 | class Varint { |
395 | public: |
396 | // Maximum lengths of varint encoding of uint32_t. |
397 | static const int kMax32 = 5; |
398 | |
399 | // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. |
400 | // Never reads a character at or beyond limit. If a valid/terminated varint32 |
401 | // was found in the range, stores it in *OUTPUT and returns a pointer just |
402 | // past the last byte of the varint32. Else returns NULL. On success, |
403 | // "result <= limit". |
404 | static const char* Parse32WithLimit(const char* ptr, const char* limit, |
405 | uint32_t* OUTPUT); |
406 | |
407 | // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". |
408 | // EFFECTS Encodes "v" into "ptr" and returns a pointer to the |
409 | // byte just past the last encoded byte. |
410 | static char* Encode32(char* ptr, uint32_t v); |
411 | |
412 | // EFFECTS Appends the varint representation of "value" to "*s". |
413 | static void Append32(std::string* s, uint32_t value); |
414 | }; |
415 | |
416 | inline const char* Varint::Parse32WithLimit(const char* p, |
417 | const char* l, |
418 | uint32_t* OUTPUT) { |
419 | const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); |
420 | const unsigned char* limit = reinterpret_cast<const unsigned char*>(l); |
421 | uint32_t b, result; |
422 | if (ptr >= limit) return NULL; |
423 | b = *(ptr++); result = b & 127; if (b < 128) goto done; |
424 | if (ptr >= limit) return NULL; |
425 | b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; |
426 | if (ptr >= limit) return NULL; |
427 | b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; |
428 | if (ptr >= limit) return NULL; |
429 | b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; |
430 | if (ptr >= limit) return NULL; |
431 | b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; |
432 | return NULL; // Value is too long to be a varint32 |
433 | done: |
434 | *OUTPUT = result; |
435 | return reinterpret_cast<const char*>(ptr); |
436 | } |
437 | |
438 | inline char* Varint::Encode32(char* sptr, uint32_t v) { |
439 | // Operate on characters as unsigneds |
440 | uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr); |
441 | static const uint8_t B = 128; |
442 | if (v < (1 << 7)) { |
443 | *(ptr++) = static_cast<uint8_t>(v); |
444 | } else if (v < (1 << 14)) { |
445 | *(ptr++) = static_cast<uint8_t>(v | B); |
446 | *(ptr++) = static_cast<uint8_t>(v >> 7); |
447 | } else if (v < (1 << 21)) { |
448 | *(ptr++) = static_cast<uint8_t>(v | B); |
449 | *(ptr++) = static_cast<uint8_t>((v >> 7) | B); |
450 | *(ptr++) = static_cast<uint8_t>(v >> 14); |
451 | } else if (v < (1 << 28)) { |
452 | *(ptr++) = static_cast<uint8_t>(v | B); |
453 | *(ptr++) = static_cast<uint8_t>((v >> 7) | B); |
454 | *(ptr++) = static_cast<uint8_t>((v >> 14) | B); |
455 | *(ptr++) = static_cast<uint8_t>(v >> 21); |
456 | } else { |
457 | *(ptr++) = static_cast<uint8_t>(v | B); |
458 | *(ptr++) = static_cast<uint8_t>((v>>7) | B); |
459 | *(ptr++) = static_cast<uint8_t>((v>>14) | B); |
460 | *(ptr++) = static_cast<uint8_t>((v>>21) | B); |
461 | *(ptr++) = static_cast<uint8_t>(v >> 28); |
462 | } |
463 | return reinterpret_cast<char*>(ptr); |
464 | } |
465 | |
466 | // If you know the internal layout of the std::string in use, you can |
467 | // replace this function with one that resizes the string without |
468 | // filling the new space with zeros (if applicable) -- |
469 | // it will be non-portable but faster. |
470 | inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { |
471 | s->resize(new_size); |
472 | } |
473 | |
474 | // Return a mutable char* pointing to a string's internal buffer, |
475 | // which may not be null-terminated. Writing through this pointer will |
476 | // modify the string. |
477 | // |
478 | // string_as_array(&str)[i] is valid for 0 <= i < str.size() until the |
479 | // next call to a string method that invalidates iterators. |
480 | // |
481 | // As of 2006-04, there is no standard-blessed way of getting a |
482 | // mutable reference to a string's internal buffer. However, issue 530 |
483 | // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) |
484 | // proposes this as the method. It will officially be part of the standard |
485 | // for C++0x. This should already work on all current implementations. |
486 | inline char* string_as_array(std::string* str) { |
487 | return str->empty() ? NULL : &*str->begin(); |
488 | } |
489 | |
490 | } // namespace snappy |
491 | |
492 | #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ |
493 | |