1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#pragma once
14
15#include <algorithm>
16#include <cassert>
17#include <climits>
18#include <cmath>
19#include <cstdint>
20#include <cstring>
21#include <limits>
22#include <type_traits>
23
24#ifdef __ANDROID_NDK__
25#include <android/api-level.h>
26#endif
27
28#ifndef __has_builtin
29#define __has_builtin(x) 0
30#endif
31
32#ifndef LLVM_GNUC_PREREQ
33#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
34#define LLVM_GNUC_PREREQ(maj, min, patch) \
35 ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \
36 ((maj) << 20) + ((min) << 10) + (patch))
37#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
38#define LLVM_GNUC_PREREQ(maj, min, patch) \
39 ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
40#else
41#define LLVM_GNUC_PREREQ(maj, min, patch) 0
42#endif
43#endif
44
45#ifdef _MSC_VER
46// Declare these intrinsics manually rather including intrin.h. It's very
47// expensive, and MathExtras.h is popular.
48// #include <intrin.h>
49extern "C" {
50unsigned char _BitScanForward(unsigned long* _Index, unsigned long _Mask);
51unsigned char _BitScanForward64(unsigned long* _Index, unsigned __int64 _Mask);
52unsigned char _BitScanReverse(unsigned long* _Index, unsigned long _Mask);
53unsigned char _BitScanReverse64(unsigned long* _Index, unsigned __int64 _Mask);
54}
55#endif
56
57namespace c10 {
58namespace llvm {
59/// The behavior an operation has on an input of 0.
60enum ZeroBehavior {
61 /// The returned value is undefined.
62 ZB_Undefined,
63 /// The returned value is numeric_limits<T>::max()
64 ZB_Max,
65 /// The returned value is numeric_limits<T>::digits
66 ZB_Width
67};
68
69namespace detail {
70template <typename T, std::size_t SizeOfT>
71struct TrailingZerosCounter {
72 static std::size_t count(T Val, ZeroBehavior) {
73 if (!Val)
74 return std::numeric_limits<T>::digits;
75 if (Val & 0x1)
76 return 0;
77
78 // Bisection method.
79 std::size_t ZeroBits = 0;
80 T Shift = std::numeric_limits<T>::digits >> 1;
81 T Mask = std::numeric_limits<T>::max() >> Shift;
82 while (Shift) {
83 if ((Val & Mask) == 0) {
84 Val >>= Shift;
85 ZeroBits |= Shift;
86 }
87 Shift >>= 1;
88 Mask >>= Shift;
89 }
90 return ZeroBits;
91 }
92};
93
94#if (defined(__GNUC__) && __GNUC__ >= 4) || defined(_MSC_VER)
95template <typename T>
96struct TrailingZerosCounter<T, 4> {
97 static std::size_t count(T Val, ZeroBehavior ZB) {
98 if (ZB != ZB_Undefined && Val == 0)
99 return 32;
100
101#if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0)
102 return __builtin_ctz(Val);
103#elif defined(_MSC_VER)
104 unsigned long Index;
105 _BitScanForward(&Index, Val);
106 return Index;
107#endif
108 }
109};
110
111#if !defined(_MSC_VER) || defined(_M_X64)
112template <typename T>
113struct TrailingZerosCounter<T, 8> {
114 static std::size_t count(T Val, ZeroBehavior ZB) {
115 if (ZB != ZB_Undefined && Val == 0)
116 return 64;
117
118#if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0)
119 return __builtin_ctzll(Val);
120#elif defined(_MSC_VER)
121 unsigned long Index;
122 _BitScanForward64(&Index, Val);
123 return Index;
124#endif
125 }
126};
127#endif
128#endif
129} // namespace detail
130
131/// Count number of 0's from the least significant bit to the most
132/// stopping at the first 1.
133///
134/// Only unsigned integral types are allowed.
135///
136/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
137/// valid arguments.
138template <typename T>
139std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
140 static_assert(
141 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
142 "Only unsigned integral types are allowed.");
143 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
144}
145
146namespace detail {
147template <typename T, std::size_t SizeOfT>
148struct LeadingZerosCounter {
149 static std::size_t count(T Val, ZeroBehavior) {
150 if (!Val)
151 return std::numeric_limits<T>::digits;
152
153 // Bisection method.
154 std::size_t ZeroBits = 0;
155 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
156 T Tmp = Val >> Shift;
157 if (Tmp)
158 Val = Tmp;
159 else
160 ZeroBits |= Shift;
161 }
162 return ZeroBits;
163 }
164};
165
166#if (defined(__GNUC__) && __GNUC__ >= 4) || defined(_MSC_VER)
167template <typename T>
168struct LeadingZerosCounter<T, 4> {
169 static std::size_t count(T Val, ZeroBehavior ZB) {
170 if (ZB != ZB_Undefined && Val == 0)
171 return 32;
172
173#if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0)
174 return __builtin_clz(Val);
175#elif defined(_MSC_VER)
176 unsigned long Index;
177 _BitScanReverse(&Index, Val);
178 return Index ^ 31;
179#endif
180 }
181};
182
183#if !defined(_MSC_VER) || defined(_M_X64)
184template <typename T>
185struct LeadingZerosCounter<T, 8> {
186 static std::size_t count(T Val, ZeroBehavior ZB) {
187 if (ZB != ZB_Undefined && Val == 0)
188 return 64;
189
190#if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0)
191 return __builtin_clzll(Val);
192#elif defined(_MSC_VER)
193 unsigned long Index;
194 _BitScanReverse64(&Index, Val);
195 return Index ^ 63;
196#endif
197 }
198};
199#endif
200#endif
201} // namespace detail
202
203/// Count number of 0's from the most significant bit to the least
204/// stopping at the first 1.
205///
206/// Only unsigned integral types are allowed.
207///
208/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
209/// valid arguments.
210template <typename T>
211std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
212 static_assert(
213 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
214 "Only unsigned integral types are allowed.");
215 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
216}
217
218/// Get the index of the first set bit starting from the least
219/// significant bit.
220///
221/// Only unsigned integral types are allowed.
222///
223/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
224/// valid arguments.
225template <typename T>
226T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
227 if (ZB == ZB_Max && Val == 0)
228 return std::numeric_limits<T>::max();
229
230 return countTrailingZeros(Val, ZB_Undefined);
231}
232
233/// Create a bitmask with the N right-most bits set to 1, and all other
234/// bits set to 0. Only unsigned types are allowed.
235template <typename T>
236T maskTrailingOnes(unsigned N) {
237 static_assert(std::is_unsigned<T>::value, "Invalid type!");
238 const unsigned Bits = CHAR_BIT * sizeof(T);
239 assert(N <= Bits && "Invalid bit index");
240 return N == 0 ? 0 : (T(-1) >> (Bits - N));
241}
242
243/// Create a bitmask with the N left-most bits set to 1, and all other
244/// bits set to 0. Only unsigned types are allowed.
245template <typename T>
246T maskLeadingOnes(unsigned N) {
247 return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
248}
249
250/// Create a bitmask with the N right-most bits set to 0, and all other
251/// bits set to 1. Only unsigned types are allowed.
252template <typename T>
253T maskTrailingZeros(unsigned N) {
254 return maskLeadingOnes<T>(CHAR_BIT * sizeof(T) - N);
255}
256
257/// Create a bitmask with the N left-most bits set to 0, and all other
258/// bits set to 1. Only unsigned types are allowed.
259template <typename T>
260T maskLeadingZeros(unsigned N) {
261 return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
262}
263
264/// Get the index of the last set bit starting from the least
265/// significant bit.
266///
267/// Only unsigned integral types are allowed.
268///
269/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
270/// valid arguments.
271template <typename T>
272T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
273 if (ZB == ZB_Max && Val == 0)
274 return std::numeric_limits<T>::max();
275
276 // Use ^ instead of - because both gcc and llvm can remove the associated ^
277 // in the __builtin_clz intrinsic on x86.
278 return countLeadingZeros(Val, ZB_Undefined) ^
279 (std::numeric_limits<T>::digits - 1);
280}
281
282/// Macro compressed bit reversal table for 256 bits.
283///
284/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
285static const unsigned char BitReverseTable256[256] = {
286#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
287#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
288#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
289 R6(0),
290 R6(2),
291 R6(1),
292 R6(3)
293#undef R2
294#undef R4
295#undef R6
296};
297
298/// Reverse the bits in \p Val.
299template <typename T>
300T reverseBits(T Val) {
301 unsigned char in[sizeof(Val)];
302 unsigned char out[sizeof(Val)];
303 std::memcpy(in, &Val, sizeof(Val));
304 for (unsigned i = 0; i < sizeof(Val); ++i)
305 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
306 std::memcpy(&Val, out, sizeof(Val));
307 return Val;
308}
309
310// NOTE: The following support functions use the _32/_64 extensions instead of
311// type overloading so that signed and unsigned integers can be used without
312// ambiguity.
313
314/// Return the high 32 bits of a 64 bit value.
315constexpr inline uint32_t Hi_32(uint64_t Value) {
316 return static_cast<uint32_t>(Value >> 32);
317}
318
319/// Return the low 32 bits of a 64 bit value.
320constexpr inline uint32_t Lo_32(uint64_t Value) {
321 return static_cast<uint32_t>(Value);
322}
323
324/// Make a 64-bit integer from a high / low pair of 32-bit integers.
325constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
326 return ((uint64_t)High << 32) | (uint64_t)Low;
327}
328
329/// Checks if an integer fits into the given bit width.
330template <unsigned N>
331constexpr inline bool isInt(int64_t x) {
332 return N >= 64 ||
333 (-(INT64_C(1) << (N - 1)) <= x && x < (INT64_C(1) << (N - 1)));
334}
335// Template specializations to get better code for common cases.
336template <>
337constexpr inline bool isInt<8>(int64_t x) {
338 return static_cast<int8_t>(x) == x;
339}
340template <>
341constexpr inline bool isInt<16>(int64_t x) {
342 return static_cast<int16_t>(x) == x;
343}
344template <>
345constexpr inline bool isInt<32>(int64_t x) {
346 return static_cast<int32_t>(x) == x;
347}
348
349/// Checks if a signed integer is an N bit number shifted left by S.
350template <unsigned N, unsigned S>
351constexpr inline bool isShiftedInt(int64_t x) {
352 static_assert(
353 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355 return isInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0);
356}
357
358/// Checks if an unsigned integer fits into the given bit width.
359///
360/// This is written as two functions rather than as simply
361///
362/// return N >= 64 || X < (UINT64_C(1) << N);
363///
364/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365/// left too many places.
366template <unsigned N>
367constexpr inline typename std::enable_if<(N < 64), bool>::type isUInt(
368 uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1) << (N));
371}
372template <unsigned N>
373constexpr inline typename std::enable_if<N >= 64, bool>::type isUInt(
374 uint64_t /*X*/) {
375 return true;
376}
377
378// Template specializations to get better code for common cases.
379template <>
380constexpr inline bool isUInt<8>(uint64_t x) {
381 return static_cast<uint8_t>(x) == x;
382}
383template <>
384constexpr inline bool isUInt<16>(uint64_t x) {
385 return static_cast<uint16_t>(x) == x;
386}
387template <>
388constexpr inline bool isUInt<32>(uint64_t x) {
389 return static_cast<uint32_t>(x) == x;
390}
391
392/// Checks if a unsigned integer is an N bit number shifted left by S.
393template <unsigned N, unsigned S>
394constexpr inline bool isShiftedUInt(uint64_t x) {
395 static_assert(
396 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
397 static_assert(
398 N + S <= 64, "isShiftedUInt<N, S> with N + S > 64 is too wide.");
399 // Per the two static_asserts above, S must be strictly less than 64. So
400 // 1 << S is not undefined behavior.
401 return isUInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0);
402}
403
404/// Gets the maximum value for a N-bit unsigned integer.
405inline uint64_t maxUIntN(uint64_t N) {
406 assert(N > 0 && N <= 64 && "integer width out of range");
407
408 // uint64_t(1) << 64 is undefined behavior, so we can't do
409 // (uint64_t(1) << N) - 1
410 // without checking first that N != 64. But this works and doesn't have a
411 // branch.
412 return UINT64_MAX >> (64 - N);
413}
414
415// Ignore the false warning "Arithmetic overflow" for MSVC
416#ifdef _MSC_VER
417#pragma warning(push)
418#pragma warning(disable : 4146)
419#endif
420
421/// Gets the minimum value for a N-bit signed integer.
422inline int64_t minIntN(int64_t N) {
423 assert(N > 0 && N <= 64 && "integer width out of range");
424
425 return -(UINT64_C(1) << (N - 1));
426}
427
428#ifdef _MSC_VER
429#pragma warning(pop)
430#endif
431
432/// Gets the maximum value for a N-bit signed integer.
433inline int64_t maxIntN(int64_t N) {
434 assert(N > 0 && N <= 64 && "integer width out of range");
435
436 // This relies on two's complement wraparound when N == 64, so we convert to
437 // int64_t only at the very end to avoid UB.
438 return (UINT64_C(1) << (N - 1)) - 1;
439}
440
441/// Checks if an unsigned integer fits into the given (dynamic) bit width.
442inline bool isUIntN(unsigned N, uint64_t x) {
443 return N >= 64 || x <= maxUIntN(N);
444}
445
446/// Checks if an signed integer fits into the given (dynamic) bit width.
447inline bool isIntN(unsigned N, int64_t x) {
448 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
449}
450
451/// Return true if the argument is a non-empty sequence of ones starting at the
452/// least significant bit with the remainder zero (32 bit version).
453/// Ex. isMask_32(0x0000FFFFU) == true.
454constexpr inline bool isMask_32(uint32_t Value) {
455 return Value && ((Value + 1) & Value) == 0;
456}
457
458/// Return true if the argument is a non-empty sequence of ones starting at the
459/// least significant bit with the remainder zero (64 bit version).
460constexpr inline bool isMask_64(uint64_t Value) {
461 return Value && ((Value + 1) & Value) == 0;
462}
463
464/// Return true if the argument contains a non-empty sequence of ones with the
465/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
466constexpr inline bool isShiftedMask_32(uint32_t Value) {
467 return Value && isMask_32((Value - 1) | Value);
468}
469
470/// Return true if the argument contains a non-empty sequence of ones with the
471/// remainder zero (64 bit version.)
472constexpr inline bool isShiftedMask_64(uint64_t Value) {
473 return Value && isMask_64((Value - 1) | Value);
474}
475
476/// Return true if the argument is a power of two > 0.
477/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
478constexpr inline bool isPowerOf2_32(uint32_t Value) {
479 return Value && !(Value & (Value - 1));
480}
481
482/// Return true if the argument is a power of two > 0 (64 bit edition.)
483constexpr inline bool isPowerOf2_64(uint64_t Value) {
484 return Value && !(Value & (Value - 1));
485}
486
487/// Count the number of ones from the most significant bit to the first
488/// zero bit.
489///
490/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
491/// Only unsigned integral types are allowed.
492///
493/// \param ZB the behavior on an input of all ones. Only ZB_Width and
494/// ZB_Undefined are valid arguments.
495template <typename T>
496std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
497 static_assert(
498 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
499 "Only unsigned integral types are allowed.");
500 return countLeadingZeros<T>(~Value, ZB);
501}
502
503/// Count the number of ones from the least significant bit to the first
504/// zero bit.
505///
506/// Ex. countTrailingOnes(0x00FF00FF) == 8.
507/// Only unsigned integral types are allowed.
508///
509/// \param ZB the behavior on an input of all ones. Only ZB_Width and
510/// ZB_Undefined are valid arguments.
511template <typename T>
512std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
513 static_assert(
514 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
515 "Only unsigned integral types are allowed.");
516 return countTrailingZeros<T>(~Value, ZB);
517}
518
519namespace detail {
520template <typename T, std::size_t SizeOfT>
521struct PopulationCounter {
522 static unsigned count(T Value) {
523 // Generic version, forward to 32 bits.
524 static_assert(SizeOfT <= 4, "Not implemented!");
525#if defined(__GNUC__) && __GNUC__ >= 4
526 return __builtin_popcount(Value);
527#else
528 uint32_t v = Value;
529 v = v - ((v >> 1) & 0x55555555);
530 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
531 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
532#endif
533 }
534};
535
536template <typename T>
537struct PopulationCounter<T, 8> {
538 static unsigned count(T Value) {
539#if defined(__GNUC__) && __GNUC__ >= 4
540 return __builtin_popcountll(Value);
541#else
542 uint64_t v = Value;
543 v = v - ((v >> 1) & 0x5555555555555555ULL);
544 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
545 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
546 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
547#endif
548 }
549};
550} // namespace detail
551
552/// Count the number of set bits in a value.
553/// Ex. countPopulation(0xF000F000) = 8
554/// Returns 0 if the word is zero.
555template <typename T>
556inline unsigned countPopulation(T Value) {
557 static_assert(
558 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
559 "Only unsigned integral types are allowed.");
560 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561}
562
563/// Return the log base 2 of the specified value.
564inline double Log2(double Value) {
565#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
566 return __builtin_log(Value) / __builtin_log(2.0);
567#else
568 return log2(Value);
569#endif
570}
571
572/// Return the floor log base 2 of the specified value, -1 if the value is zero.
573/// (32 bit edition.)
574/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
575inline unsigned Log2_32(uint32_t Value) {
576 return static_cast<unsigned>(31 - countLeadingZeros(Value));
577}
578
579/// Return the floor log base 2 of the specified value, -1 if the value is zero.
580/// (64 bit edition.)
581inline unsigned Log2_64(uint64_t Value) {
582 return static_cast<unsigned>(63 - countLeadingZeros(Value));
583}
584
585/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
586/// (32 bit edition).
587/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
588inline unsigned Log2_32_Ceil(uint32_t Value) {
589 return static_cast<unsigned>(32 - countLeadingZeros(Value - 1));
590}
591
592/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
593/// (64 bit edition.)
594inline unsigned Log2_64_Ceil(uint64_t Value) {
595 return static_cast<unsigned>(64 - countLeadingZeros(Value - 1));
596}
597
598/// Return the greatest common divisor of the values using Euclid's algorithm.
599inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
600 while (B) {
601 uint64_t T = B;
602 B = A % B;
603 A = T;
604 }
605 return A;
606}
607
608/// This function takes a 64-bit integer and returns the bit equivalent double.
609inline double BitsToDouble(uint64_t Bits) {
610 double D;
611 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
612 memcpy(&D, &Bits, sizeof(Bits));
613 return D;
614}
615
616/// This function takes a 32-bit integer and returns the bit equivalent float.
617inline float BitsToFloat(uint32_t Bits) {
618 // TODO: Use bit_cast once C++20 becomes available.
619 float F;
620 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
621 memcpy(&F, &Bits, sizeof(Bits));
622 return F;
623}
624
625/// This function takes a double and returns the bit equivalent 64-bit integer.
626/// Note that copying doubles around changes the bits of NaNs on some hosts,
627/// notably x86, so this routine cannot be used if these bits are needed.
628inline uint64_t DoubleToBits(double Double) {
629 uint64_t Bits;
630 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
631 memcpy(&Bits, &Double, sizeof(Double));
632 return Bits;
633}
634
635/// This function takes a float and returns the bit equivalent 32-bit integer.
636/// Note that copying floats around changes the bits of NaNs on some hosts,
637/// notably x86, so this routine cannot be used if these bits are needed.
638inline uint32_t FloatToBits(float Float) {
639 uint32_t Bits;
640 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
641 memcpy(&Bits, &Float, sizeof(Float));
642 return Bits;
643}
644
645/// A and B are either alignments or offsets. Return the minimum alignment that
646/// may be assumed after adding the two together.
647constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
648 // The largest power of 2 that divides both A and B.
649 //
650 // Replace "-Value" by "1+~Value" in the following commented code to avoid
651 // MSVC warning C4146
652 // return (A | B) & -(A | B);
653 return (A | B) & (1 + ~(A | B));
654}
655
656/// Aligns \c Addr to \c Alignment bytes, rounding up.
657///
658/// Alignment should be a power of two. This method rounds up, so
659/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
660inline uintptr_t alignAddr(const void* Addr, size_t Alignment) {
661 assert(
662 Alignment && isPowerOf2_64((uint64_t)Alignment) &&
663 "Alignment is not a power of two!");
664
665 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr);
666
667 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
668}
669
670/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
671/// bytes, rounding up.
672inline size_t alignmentAdjustment(const void* Ptr, size_t Alignment) {
673 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
674}
675
676/// Returns the next power of two (in 64-bits) that is strictly greater than A.
677/// Returns zero on overflow.
678inline uint64_t NextPowerOf2(uint64_t A) {
679 A |= (A >> 1);
680 A |= (A >> 2);
681 A |= (A >> 4);
682 A |= (A >> 8);
683 A |= (A >> 16);
684 A |= (A >> 32);
685 return A + 1;
686}
687
688/// Returns the power of two which is less than or equal to the given value.
689/// Essentially, it is a floor operation across the domain of powers of two.
690inline uint64_t PowerOf2Floor(uint64_t A) {
691 if (!A)
692 return 0;
693 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
694}
695
696/// Returns the power of two which is greater than or equal to the given value.
697/// Essentially, it is a ceil operation across the domain of powers of two.
698inline uint64_t PowerOf2Ceil(uint64_t A) {
699 if (!A)
700 return 0;
701 return NextPowerOf2(A - 1);
702}
703
704/// Returns the next integer (mod 2**64) that is greater than or equal to
705/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
706///
707/// If non-zero \p Skew is specified, the return value will be a minimal
708/// integer that is greater than or equal to \p Value and equal to
709/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
710/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
711///
712/// Examples:
713/// \code
714/// alignTo(5, 8) = 8
715/// alignTo(17, 8) = 24
716/// alignTo(~0LL, 8) = 0
717/// alignTo(321, 255) = 510
718///
719/// alignTo(5, 8, 7) = 7
720/// alignTo(17, 8, 1) = 17
721/// alignTo(~0LL, 8, 3) = 3
722/// alignTo(321, 255, 42) = 552
723/// \endcode
724inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
725 assert(Align != 0u && "Align can't be 0.");
726 Skew %= Align;
727 return (Value + Align - 1 - Skew) / Align * Align + Skew;
728}
729
730/// Returns the next integer (mod 2**64) that is greater than or equal to
731/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
732template <uint64_t Align>
733constexpr inline uint64_t alignTo(uint64_t Value) {
734 static_assert(Align != 0u, "Align must be non-zero");
735 return (Value + Align - 1) / Align * Align;
736}
737
738/// Returns the integer ceil(Numerator / Denominator).
739inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
740 return alignTo(Numerator, Denominator) / Denominator;
741}
742
743/// \c alignTo for contexts where a constant expression is required.
744/// \sa alignTo
745///
746/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
747template <uint64_t Align>
748struct AlignTo {
749 static_assert(Align != 0u, "Align must be non-zero");
750 template <uint64_t Value>
751 struct from_value {
752 static const uint64_t value = (Value + Align - 1) / Align * Align;
753 };
754};
755
756/// Returns the largest uint64_t less than or equal to \p Value and is
757/// \p Skew mod \p Align. \p Align must be non-zero
758inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
759 assert(Align != 0u && "Align can't be 0.");
760 Skew %= Align;
761 return (Value - Skew) / Align * Align + Skew;
762}
763
764/// Returns the offset to the next integer (mod 2**64) that is greater than
765/// or equal to \p Value and is a multiple of \p Align. \p Align must be
766/// non-zero.
767inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
768 return alignTo(Value, Align) - Value;
769}
770
771/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
772/// Requires 0 < B <= 32.
773template <unsigned B>
774constexpr inline int32_t SignExtend32(uint32_t X) {
775 static_assert(B > 0, "Bit width can't be 0.");
776 static_assert(B <= 32, "Bit width out of range.");
777 return int32_t(X << (32 - B)) >> (32 - B);
778}
779
780/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
781/// Requires 0 < B < 32.
782inline int32_t SignExtend32(uint32_t X, unsigned B) {
783 assert(B > 0 && "Bit width can't be 0.");
784 assert(B <= 32 && "Bit width out of range.");
785 return int32_t(X << (32 - B)) >> (32 - B);
786}
787
788/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
789/// Requires 0 < B < 64.
790template <unsigned B>
791constexpr inline int64_t SignExtend64(uint64_t x) {
792 static_assert(B > 0, "Bit width can't be 0.");
793 static_assert(B <= 64, "Bit width out of range.");
794 return int64_t(x << (64 - B)) >> (64 - B);
795}
796
797/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
798/// Requires 0 < B < 64.
799inline int64_t SignExtend64(uint64_t X, unsigned B) {
800 assert(B > 0 && "Bit width can't be 0.");
801 assert(B <= 64 && "Bit width out of range.");
802 return int64_t(X << (64 - B)) >> (64 - B);
803}
804
805/// Subtract two unsigned integers, X and Y, of type T and return the absolute
806/// value of the result.
807template <typename T>
808typename std::enable_if<std::is_unsigned<T>::value, T>::type AbsoluteDifference(
809 T X,
810 T Y) {
811 return std::max(X, Y) - std::min(X, Y);
812}
813
814/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
815/// maximum representable value of T on overflow. ResultOverflowed indicates if
816/// the result is larger than the maximum representable value of type T.
817template <typename T>
818typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingAdd(
819 T X,
820 T Y,
821 bool* ResultOverflowed = nullptr) {
822 bool Dummy;
823 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
824 // Hacker's Delight, p. 29
825 T Z = X + Y;
826 Overflowed = (Z < X || Z < Y);
827 if (Overflowed)
828 return std::numeric_limits<T>::max();
829 else
830 return Z;
831}
832
833/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
834/// maximum representable value of T on overflow. ResultOverflowed indicates if
835/// the result is larger than the maximum representable value of type T.
836template <typename T>
837typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingMultiply(
838 T X,
839 T Y,
840 bool* ResultOverflowed = nullptr) {
841 bool Dummy;
842 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
843
844 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
845 // because it fails for uint16_t (where multiplication can have undefined
846 // behavior due to promotion to int), and requires a division in addition
847 // to the multiplication.
848
849 Overflowed = false;
850
851 // Log2(Z) would be either Log2Z or Log2Z + 1.
852 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
853 // will necessarily be less than Log2Max as desired.
854 int Log2Z = Log2_64(X) + Log2_64(Y);
855 const T Max = std::numeric_limits<T>::max();
856 int Log2Max = Log2_64(Max);
857 if (Log2Z < Log2Max) {
858 return X * Y;
859 }
860 if (Log2Z > Log2Max) {
861 Overflowed = true;
862 return Max;
863 }
864
865 // We're going to use the top bit, and maybe overflow one
866 // bit past it. Multiply all but the bottom bit then add
867 // that on at the end.
868 T Z = (X >> 1) * Y;
869 if (Z & ~(Max >> 1)) {
870 Overflowed = true;
871 return Max;
872 }
873 Z <<= 1;
874 if (X & 1)
875 return SaturatingAdd(Z, Y, ResultOverflowed);
876
877 return Z;
878}
879
880/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
881/// the product. Clamp the result to the maximum representable value of T on
882/// overflow. ResultOverflowed indicates if the result is larger than the
883/// maximum representable value of type T.
884template <typename T>
885typename std::enable_if<std::is_unsigned<T>::value, T>::type
886SaturatingMultiplyAdd(T X, T Y, T A, bool* ResultOverflowed = nullptr) {
887 bool Dummy;
888 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
889
890 T Product = SaturatingMultiply(X, Y, &Overflowed);
891 if (Overflowed)
892 return Product;
893
894 return SaturatingAdd(A, Product, &Overflowed);
895}
896
897/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
898extern const float huge_valf;
899} // namespace llvm
900} // namespace c10
901