llvmMathExtras.h source code [pytorch/c10/util/llvmMathExtras.h]

1	//===-- llvm/Support/MathExtras.h - Useful math functions -------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains some functions that are useful for math stuff.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#pragma once
14
15	#include <algorithm>
16	#include <cassert>
17	#include <climits>
18	#include <cmath>
19	#include <cstdint>
20	#include <cstring>
21	#include <limits>
22	#include <type_traits>
23
24	#ifdef __ANDROID_NDK__
25	#include <android/api-level.h>
26	#endif
27
28	#ifndef __has_builtin
29	#define __has_builtin(x) 0
30	#endif
31
32	#ifndef LLVM_GNUC_PREREQ
33	#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
34	#define LLVM_GNUC_PREREQ(maj, min, patch) \
35	((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \
36	((maj) << 20) + ((min) << 10) + (patch))
37	#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
38	#define LLVM_GNUC_PREREQ(maj, min, patch) \
39	((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
40	#else
41	#define LLVM_GNUC_PREREQ(maj, min, patch) 0
42	#endif
43	#endif
44
45	#ifdef _MSC_VER
46	// Declare these intrinsics manually rather including intrin.h. It's very
47	// expensive, and MathExtras.h is popular.
48	// #include <intrin.h>
49	extern "C" {
50	unsigned char _BitScanForward(unsigned long* _Index, unsigned long _Mask);
51	unsigned char _BitScanForward64(unsigned long* _Index, unsigned __int64 _Mask);
52	unsigned char _BitScanReverse(unsigned long* _Index, unsigned long _Mask);
53	unsigned char _BitScanReverse64(unsigned long* _Index, unsigned __int64 _Mask);
54	}
55	#endif
56
57	namespace c10 {
58	namespace llvm {
59	/// The behavior an operation has on an input of 0.
60	enum ZeroBehavior {
61	/// The returned value is undefined.
62	ZB_Undefined,
63	/// The returned value is numeric_limits<T>::max()
64	ZB_Max,
65	/// The returned value is numeric_limits<T>::digits
66	ZB_Width
67	};
68
69	namespace detail {
70	template <typename T, std::size_t SizeOfT>
71	struct TrailingZerosCounter {
72	static std::size_t count(T Val, ZeroBehavior) {
73	if (!Val)
74	return std::numeric_limits<T>::digits;
75	if (Val & `0x1`)
76	return `0`;
77
78	// Bisection method.
79	std::size_t ZeroBits = `0`;
80	T Shift = std::numeric_limits<T>::digits >> `1`;
81	T Mask = std::numeric_limits<T>::max() >> Shift;
82	while (Shift) {
83	if ((Val & Mask) == `0`) {
84	Val >>= Shift;
85	ZeroBits \|= Shift;
86	}
87	Shift >>= `1`;
88	Mask >>= Shift;
89	}
90	return ZeroBits;
91	}
92	};
93
94	#if (defined(__GNUC__) && __GNUC__ >= 4) \|\| defined(_MSC_VER)
95	template <typename T>
96	struct TrailingZerosCounter<T, `4`> {
97	static std::size_t count(T Val, ZeroBehavior ZB) {
98	if (ZB != ZB_Undefined && Val == `0`)
99	return `32`;
100
101	#if __has_builtin(__builtin_ctz) \|\| LLVM_GNUC_PREREQ(4, 0, 0)
102	return __builtin_ctz(Val);
103	#elif defined(_MSC_VER)
104	unsigned long Index;
105	_BitScanForward(&Index, Val);
106	return Index;
107	#endif
108	}
109	};
110
111	#if !defined(_MSC_VER) \|\| defined(_M_X64)
112	template <typename T>
113	struct TrailingZerosCounter<T, `8`> {
114	static std::size_t count(T Val, ZeroBehavior ZB) {
115	if (ZB != ZB_Undefined && Val == `0`)
116	return `64`;
117
118	#if __has_builtin(__builtin_ctzll) \|\| LLVM_GNUC_PREREQ(4, 0, 0)
119	return __builtin_ctzll(Val);
120	#elif defined(_MSC_VER)
121	unsigned long Index;
122	_BitScanForward64(&Index, Val);
123	return Index;
124	#endif
125	}
126	};
127	#endif
128	#endif
129	} // namespace detail
130
131	/// Count number of 0's from the least significant bit to the most
132	/// stopping at the first 1.
133	///
134	/// Only unsigned integral types are allowed.
135	///
136	/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
137	/// valid arguments.
138	template <typename T>
139	std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
140	static_assert(
141	std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
142	"Only unsigned integral types are allowed.");
143	return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
144	}
145
146	namespace detail {
147	template <typename T, std::size_t SizeOfT>
148	struct LeadingZerosCounter {
149	static std::size_t count(T Val, ZeroBehavior) {
150	if (!Val)
151	return std::numeric_limits<T>::digits;
152
153	// Bisection method.
154	std::size_t ZeroBits = `0`;
155	for (T Shift = std::numeric_limits<T>::digits >> `1`; Shift; Shift >>= `1`) {
156	T Tmp = Val >> Shift;
157	if (Tmp)
158	Val = Tmp;
159	else
160	ZeroBits \|= Shift;
161	}
162	return ZeroBits;
163	}
164	};
165
166	#if (defined(__GNUC__) && __GNUC__ >= 4) \|\| defined(_MSC_VER)
167	template <typename T>
168	struct LeadingZerosCounter<T, `4`> {
169	static std::size_t count(T Val, ZeroBehavior ZB) {
170	if (ZB != ZB_Undefined && Val == `0`)
171	return `32`;
172
173	#if __has_builtin(__builtin_clz) \|\| LLVM_GNUC_PREREQ(4, 0, 0)
174	return __builtin_clz(Val);
175	#elif defined(_MSC_VER)
176	unsigned long Index;
177	_BitScanReverse(&Index, Val);
178	return Index ^ `31`;
179	#endif
180	}
181	};
182
183	#if !defined(_MSC_VER) \|\| defined(_M_X64)
184	template <typename T>
185	struct LeadingZerosCounter<T, `8`> {
186	static std::size_t count(T Val, ZeroBehavior ZB) {
187	if (ZB != ZB_Undefined && Val == `0`)
188	return `64`;
189
190	#if __has_builtin(__builtin_clzll) \|\| LLVM_GNUC_PREREQ(4, 0, 0)
191	return __builtin_clzll(Val);
192	#elif defined(_MSC_VER)
193	unsigned long Index;
194	_BitScanReverse64(&Index, Val);
195	return Index ^ `63`;
196	#endif
197	}
198	};
199	#endif
200	#endif
201	} // namespace detail
202
203	/// Count number of 0's from the most significant bit to the least
204	/// stopping at the first 1.
205	///
206	/// Only unsigned integral types are allowed.
207	///
208	/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
209	/// valid arguments.
210	template <typename T>
211	std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
212	static_assert(
213	std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
214	"Only unsigned integral types are allowed.");
215	return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
216	}
217
218	/// Get the index of the first set bit starting from the least
219	/// significant bit.
220	///
221	/// Only unsigned integral types are allowed.
222	///
223	/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
224	/// valid arguments.
225	template <typename T>
226	T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
227	if (ZB == ZB_Max && Val == `0`)
228	return std::numeric_limits<T>::max();
229
230	return countTrailingZeros(Val, ZB_Undefined);
231	}
232
233	/// Create a bitmask with the N right-most bits set to 1, and all other
234	/// bits set to 0. Only unsigned types are allowed.
235	template <typename T>
236	T maskTrailingOnes(unsigned N) {
237	static_assert(std::is_unsigned<T>::value, "Invalid type!");
238	const unsigned Bits = CHAR_BIT * sizeof(T);
239	assert(N <= Bits && "Invalid bit index");
240	return N == `0` ? `0` : (T(-`1`) >> (Bits - N));
241	}
242
243	/// Create a bitmask with the N left-most bits set to 1, and all other
244	/// bits set to 0. Only unsigned types are allowed.
245	template <typename T>
246	T maskLeadingOnes(unsigned N) {
247	return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
248	}
249
250	/// Create a bitmask with the N right-most bits set to 0, and all other
251	/// bits set to 1. Only unsigned types are allowed.
252	template <typename T>
253	T maskTrailingZeros(unsigned N) {
254	return maskLeadingOnes<T>(CHAR_BIT * sizeof(T) - N);
255	}
256
257	/// Create a bitmask with the N left-most bits set to 0, and all other
258	/// bits set to 1. Only unsigned types are allowed.
259	template <typename T>
260	T maskLeadingZeros(unsigned N) {
261	return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
262	}
263
264	/// Get the index of the last set bit starting from the least
265	/// significant bit.
266	///
267	/// Only unsigned integral types are allowed.
268	///
269	/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
270	/// valid arguments.
271	template <typename T>
272	T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
273	if (ZB == ZB_Max && Val == `0`)
274	return std::numeric_limits<T>::max();
275
276	// Use ^ instead of - because both gcc and llvm can remove the associated ^
277	// in the __builtin_clz intrinsic on x86.
278	return countLeadingZeros(Val, ZB_Undefined) ^
279	(std::numeric_limits<T>::digits - `1`);
280	}
281
282	/// Macro compressed bit reversal table for 256 bits.
283	///
284	/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
285	static const unsigned char BitReverseTable256[`256`] = {
286	#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
287	#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
288	#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
289	R6(`0`),
290	R6(`2`),
291	R6(`1`),
292	R6(`3`)
293	#undef R2
294	#undef R4
295	#undef R6
296	};
297
298	/// Reverse the bits in \p Val.
299	template <typename T>
300	T reverseBits(T Val) {
301	unsigned char in[sizeof(Val)];
302	unsigned char out[sizeof(Val)];
303	std::memcpy(in, &Val, sizeof(Val));
304	for (unsigned i = `0`; i < sizeof(Val); ++i)
305	out[(sizeof(Val) - i) - `1`] = BitReverseTable256[in[i]];
306	std::memcpy(&Val, out, sizeof(Val));
307	return Val;
308	}
309
310	// NOTE: The following support functions use the _32/_64 extensions instead of
311	// type overloading so that signed and unsigned integers can be used without
312	// ambiguity.
313
314	/// Return the high 32 bits of a 64 bit value.
315	constexpr inline uint32_t Hi_32(uint64_t Value) {
316	return static_cast<uint32_t>(Value >> `32`);
317	}
318
319	/// Return the low 32 bits of a 64 bit value.
320	constexpr inline uint32_t Lo_32(uint64_t Value) {
321	return static_cast<uint32_t>(Value);
322	}
323
324	/// Make a 64-bit integer from a high / low pair of 32-bit integers.
325	constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
326	return ((uint64_t)High << `32`) \| (uint64_t)Low;
327	}
328
329	/// Checks if an integer fits into the given bit width.
330	template <unsigned N>
331	constexpr inline bool isInt(int64_t x) {
332	return N >= `64` \|\|
333	(-(INT64_C(`1`) << (N - `1`)) <= x && x < (INT64_C(`1`) << (N - `1`)));
334	}
335	// Template specializations to get better code for common cases.
336	template <>
337	constexpr inline bool isInt<`8`>(int64_t x) {
338	return static_cast<int8_t>(x) == x;
339	}
340	template <>
341	constexpr inline bool isInt<`16`>(int64_t x) {
342	return static_cast<int16_t>(x) == x;
343	}
344	template <>
345	constexpr inline bool isInt<`32`>(int64_t x) {
346	return static_cast<int32_t>(x) == x;
347	}
348
349	/// Checks if a signed integer is an N bit number shifted left by S.
350	template <unsigned N, unsigned S>
351	constexpr inline bool isShiftedInt(int64_t x) {
352	static_assert(
353	N > `0`, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354	static_assert(N + S <= `64`, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355	return isInt<N + S>(x) && (x % (UINT64_C(`1`) << S) == `0`);
356	}
357
358	/// Checks if an unsigned integer fits into the given bit width.
359	///
360	/// This is written as two functions rather than as simply
361	///
362	/// return N >= 64 \|\| X < (UINT64_C(1) << N);
363	///
364	/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365	/// left too many places.
366	template <unsigned N>
367	constexpr inline typename std::enable_if<(N < `64`), bool>::type isUInt(
368	uint64_t X) {
369	static_assert(N > `0`, "isUInt<0> doesn't make sense");
370	return X < (UINT64_C(`1`) << (N));
371	}
372	template <unsigned N>
373	constexpr inline typename std::enable_if<N >= `64`, bool>::type isUInt(
374	uint64_t /X/) {
375	return true;
376	}
377
378	// Template specializations to get better code for common cases.
379	template <>
380	constexpr inline bool isUInt<`8`>(uint64_t x) {
381	return static_cast<uint8_t>(x) == x;
382	}
383	template <>
384	constexpr inline bool isUInt<`16`>(uint64_t x) {
385	return static_cast<uint16_t>(x) == x;
386	}
387	template <>
388	constexpr inline bool isUInt<`32`>(uint64_t x) {
389	return static_cast<uint32_t>(x) == x;
390	}
391
392	/// Checks if a unsigned integer is an N bit number shifted left by S.
393	template <unsigned N, unsigned S>
394	constexpr inline bool isShiftedUInt(uint64_t x) {
395	static_assert(
396	N > `0`, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
397	static_assert(
398	N + S <= `64`, "isShiftedUInt<N, S> with N + S > 64 is too wide.");
399	// Per the two static_asserts above, S must be strictly less than 64. So
400	// 1 << S is not undefined behavior.
401	return isUInt<N + S>(x) && (x % (UINT64_C(`1`) << S) == `0`);
402	}
403
404	/// Gets the maximum value for a N-bit unsigned integer.
405	inline uint64_t maxUIntN(uint64_t N) {
406	assert(N > `0` && N <= `64` && "integer width out of range");
407
408	// uint64_t(1) << 64 is undefined behavior, so we can't do
409	// (uint64_t(1) << N) - 1
410	// without checking first that N != 64. But this works and doesn't have a
411	// branch.
412	return UINT64_MAX >> (`64` - N);
413	}
414
415	// Ignore the false warning "Arithmetic overflow" for MSVC
416	#ifdef _MSC_VER
417	#pragma warning(push)
418	#pragma warning(disable : 4146)
419	#endif
420
421	/// Gets the minimum value for a N-bit signed integer.
422	inline int64_t minIntN(int64_t N) {
423	assert(N > `0` && N <= `64` && "integer width out of range");
424
425	return -(UINT64_C(`1`) << (N - `1`));
426	}
427
428	#ifdef _MSC_VER
429	#pragma warning(pop)
430	#endif
431
432	/// Gets the maximum value for a N-bit signed integer.
433	inline int64_t maxIntN(int64_t N) {
434	assert(N > `0` && N <= `64` && "integer width out of range");
435
436	// This relies on two's complement wraparound when N == 64, so we convert to
437	// int64_t only at the very end to avoid UB.
438	return (UINT64_C(`1`) << (N - `1`)) - `1`;
439	}
440
441	/// Checks if an unsigned integer fits into the given (dynamic) bit width.
442	inline bool isUIntN(unsigned N, uint64_t x) {
443	return N >= `64` \|\| x <= maxUIntN(N);
444	}
445
446	/// Checks if an signed integer fits into the given (dynamic) bit width.
447	inline bool isIntN(unsigned N, int64_t x) {
448	return N >= `64` \|\| (minIntN(N) <= x && x <= maxIntN(N));
449	}
450
451	/// Return true if the argument is a non-empty sequence of ones starting at the
452	/// least significant bit with the remainder zero (32 bit version).
453	/// Ex. isMask_32(0x0000FFFFU) == true.
454	constexpr inline bool isMask_32(uint32_t Value) {
455	return Value && ((Value + `1`) & Value) == `0`;
456	}
457
458	/// Return true if the argument is a non-empty sequence of ones starting at the
459	/// least significant bit with the remainder zero (64 bit version).
460	constexpr inline bool isMask_64(uint64_t Value) {
461	return Value && ((Value + `1`) & Value) == `0`;
462	}
463
464	/// Return true if the argument contains a non-empty sequence of ones with the
465	/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
466	constexpr inline bool isShiftedMask_32(uint32_t Value) {
467	return Value && isMask_32((Value - `1`) \| Value);
468	}
469
470	/// Return true if the argument contains a non-empty sequence of ones with the
471	/// remainder zero (64 bit version.)
472	constexpr inline bool isShiftedMask_64(uint64_t Value) {
473	return Value && isMask_64((Value - `1`) \| Value);
474	}
475
476	/// Return true if the argument is a power of two > 0.
477	/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
478	constexpr inline bool isPowerOf2_32(uint32_t Value) {
479	return Value && !(Value & (Value - `1`));
480	}
481
482	/// Return true if the argument is a power of two > 0 (64 bit edition.)
483	constexpr inline bool isPowerOf2_64(uint64_t Value) {
484	return Value && !(Value & (Value - `1`));
485	}
486
487	/// Count the number of ones from the most significant bit to the first
488	/// zero bit.
489	///
490	/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
491	/// Only unsigned integral types are allowed.
492	///
493	/// \param ZB the behavior on an input of all ones. Only ZB_Width and
494	/// ZB_Undefined are valid arguments.
495	template <typename T>
496	std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
497	static_assert(
498	std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
499	"Only unsigned integral types are allowed.");
500	return countLeadingZeros<T>(~Value, ZB);
501	}
502
503	/// Count the number of ones from the least significant bit to the first
504	/// zero bit.
505	///
506	/// Ex. countTrailingOnes(0x00FF00FF) == 8.
507	/// Only unsigned integral types are allowed.
508	///
509	/// \param ZB the behavior on an input of all ones. Only ZB_Width and
510	/// ZB_Undefined are valid arguments.
511	template <typename T>
512	std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
513	static_assert(
514	std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
515	"Only unsigned integral types are allowed.");
516	return countTrailingZeros<T>(~Value, ZB);
517	}
518
519	namespace detail {
520	template <typename T, std::size_t SizeOfT>
521	struct PopulationCounter {
522	static unsigned count(T Value) {
523	// Generic version, forward to 32 bits.
524	static_assert(SizeOfT <= `4`, "Not implemented!");
525	#if defined(__GNUC__) && __GNUC__ >= 4
526	return __builtin_popcount(Value);
527	#else
528	uint32_t v = Value;
529	v = v - ((v >> `1`) & `0x55555555`);
530	v = (v & `0x33333333`) + ((v >> `2`) & `0x33333333`);
531	return ((v + (v >> `4`) & `0xF0F0F0F`) * `0x1010101`) >> `24`;
532	#endif
533	}
534	};
535
536	template <typename T>
537	struct PopulationCounter<T, `8`> {
538	static unsigned count(T Value) {
539	#if defined(__GNUC__) && __GNUC__ >= 4
540	return __builtin_popcountll(Value);
541	#else
542	uint64_t v = Value;
543	v = v - ((v >> `1`) & `0x5555555555555555ULL`);
544	v = (v & `0x3333333333333333ULL`) + ((v >> `2`) & `0x3333333333333333ULL`);
545	v = (v + (v >> `4`)) & `0x0F0F0F0F0F0F0F0FULL`;
546	return unsigned((uint64_t)(v * `0x0101010101010101ULL`) >> `56`);
547	#endif
548	}
549	};
550	} // namespace detail
551
552	/// Count the number of set bits in a value.
553	/// Ex. countPopulation(0xF000F000) = 8
554	/// Returns 0 if the word is zero.
555	template <typename T>
556	inline unsigned countPopulation(T Value) {
557	static_assert(
558	std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed,
559	"Only unsigned integral types are allowed.");
560	return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561	}
562
563	/// Return the log base 2 of the specified value.
564	inline double Log2(double Value) {
565	#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
566	return __builtin_log(Value) / __builtin_log(`2.0`);
567	#else
568	return log2(Value);
569	#endif
570	}
571
572	/// Return the floor log base 2 of the specified value, -1 if the value is zero.
573	/// (32 bit edition.)
574	/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
575	inline unsigned Log2_32(uint32_t Value) {
576	return static_cast<unsigned>(`31` - countLeadingZeros(Value));
577	}
578
579	/// Return the floor log base 2 of the specified value, -1 if the value is zero.
580	/// (64 bit edition.)
581	inline unsigned Log2_64(uint64_t Value) {
582	return static_cast<unsigned>(`63` - countLeadingZeros(Value));
583	}
584
585	/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
586	/// (32 bit edition).
587	/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
588	inline unsigned Log2_32_Ceil(uint32_t Value) {
589	return static_cast<unsigned>(`32` - countLeadingZeros(Value - `1`));
590	}
591
592	/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
593	/// (64 bit edition.)
594	inline unsigned Log2_64_Ceil(uint64_t Value) {
595	return static_cast<unsigned>(`64` - countLeadingZeros(Value - `1`));
596	}
597
598	/// Return the greatest common divisor of the values using Euclid's algorithm.
599	inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
600	while (B) {
601	uint64_t T = B;
602	B = A % B;
603	A = T;
604	}
605	return A;
606	}
607
608	/// This function takes a 64-bit integer and returns the bit equivalent double.
609	inline double BitsToDouble(uint64_t Bits) {
610	double D;
611	static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
612	memcpy(&D, &Bits, sizeof(Bits));
613	return D;
614	}
615
616	/// This function takes a 32-bit integer and returns the bit equivalent float.
617	inline float BitsToFloat(uint32_t Bits) {
618	// TODO: Use bit_cast once C++20 becomes available.
619	float F;
620	static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
621	memcpy(&F, &Bits, sizeof(Bits));
622	return F;
623	}
624
625	/// This function takes a double and returns the bit equivalent 64-bit integer.
626	/// Note that copying doubles around changes the bits of NaNs on some hosts,
627	/// notably x86, so this routine cannot be used if these bits are needed.
628	inline uint64_t DoubleToBits(double Double) {
629	uint64_t Bits;
630	static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
631	memcpy(&Bits, &Double, sizeof(Double));
632	return Bits;
633	}
634
635	/// This function takes a float and returns the bit equivalent 32-bit integer.
636	/// Note that copying floats around changes the bits of NaNs on some hosts,
637	/// notably x86, so this routine cannot be used if these bits are needed.
638	inline uint32_t FloatToBits(float Float) {
639	uint32_t Bits;
640	static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
641	memcpy(&Bits, &Float, sizeof(Float));
642	return Bits;
643	}
644
645	/// A and B are either alignments or offsets. Return the minimum alignment that
646	/// may be assumed after adding the two together.
647	constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
648	// The largest power of 2 that divides both A and B.
649	//
650	// Replace "-Value" by "1+~Value" in the following commented code to avoid
651	// MSVC warning C4146
652	// return (A \| B) & -(A \| B);
653	return (A \| B) & (`1` + ~(A \| B));
654	}
655
656	/// Aligns \c Addr to \c Alignment bytes, rounding up.
657	///
658	/// Alignment should be a power of two. This method rounds up, so
659	/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
660	inline uintptr_t alignAddr(const void* Addr, size_t Alignment) {
661	assert(
662	Alignment && isPowerOf2_64((uint64_t)Alignment) &&
663	"Alignment is not a power of two!");
664
665	assert((uintptr_t)Addr + Alignment - `1` >= (uintptr_t)Addr);
666
667	return (((uintptr_t)Addr + Alignment - `1`) & ~(uintptr_t)(Alignment - `1`));
668	}
669
670	/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
671	/// bytes, rounding up.
672	inline size_t alignmentAdjustment(const void* Ptr, size_t Alignment) {
673	return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
674	}
675
676	/// Returns the next power of two (in 64-bits) that is strictly greater than A.
677	/// Returns zero on overflow.
678	inline uint64_t NextPowerOf2(uint64_t A) {
679	A \|= (A >> `1`);
680	A \|= (A >> `2`);
681	A \|= (A >> `4`);
682	A \|= (A >> `8`);
683	A \|= (A >> `16`);
684	A \|= (A >> `32`);
685	return A + `1`;
686	}
687
688	/// Returns the power of two which is less than or equal to the given value.
689	/// Essentially, it is a floor operation across the domain of powers of two.
690	inline uint64_t PowerOf2Floor(uint64_t A) {
691	if (!A)
692	return `0`;
693	return `1ull` << (`63` - countLeadingZeros(A, ZB_Undefined));
694	}
695
696	/// Returns the power of two which is greater than or equal to the given value.
697	/// Essentially, it is a ceil operation across the domain of powers of two.
698	inline uint64_t PowerOf2Ceil(uint64_t A) {
699	if (!A)
700	return `0`;
701	return NextPowerOf2(A - `1`);
702	}
703
704	/// Returns the next integer (mod 264) that is greater than or equal to
705	/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
706	///
707	/// If non-zero \p Skew is specified, the return value will be a minimal
708	/// integer that is greater than or equal to \p Value and equal to
709	/// \p Align N + \p Skew for some integer N. If \p Skew is larger than*
710	/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
711	///
712	/// Examples:
713	/// \code
714	/// alignTo(5, 8) = 8
715	/// alignTo(17, 8) = 24
716	/// alignTo(~0LL, 8) = 0
717	/// alignTo(321, 255) = 510
718	///
719	/// alignTo(5, 8, 7) = 7
720	/// alignTo(17, 8, 1) = 17
721	/// alignTo(~0LL, 8, 3) = 3
722	/// alignTo(321, 255, 42) = 552
723	/// \endcode
724	inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = `0`) {
725	assert(Align != `0u` && "Align can't be 0.");
726	Skew %= Align;
727	return (Value + Align - `1` - Skew) / Align * Align + Skew;
728	}
729
730	/// Returns the next integer (mod 264) that is greater than or equal to
731	/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
732	template <uint64_t Align>
733	constexpr inline uint64_t alignTo(uint64_t Value) {
734	static_assert(Align != `0u`, "Align must be non-zero");
735	return (Value + Align - `1`) / Align * Align;
736	}
737
738	/// Returns the integer ceil(Numerator / Denominator).
739	inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
740	return alignTo(Numerator, Denominator) / Denominator;
741	}
742
743	/// \c alignTo for contexts where a constant expression is required.
744	/// \sa alignTo
745	///
746	/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
747	template <uint64_t Align>
748	struct AlignTo {
749	static_assert(Align != `0u`, "Align must be non-zero");
750	template <uint64_t Value>
751	struct from_value {
752	static const uint64_t value = (Value + Align - `1`) / Align * Align;
753	};
754	};
755
756	/// Returns the largest uint64_t less than or equal to \p Value and is
757	/// \p Skew mod \p Align. \p Align must be non-zero
758	inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = `0`) {
759	assert(Align != `0u` && "Align can't be 0.");
760	Skew %= Align;
761	return (Value - Skew) / Align * Align + Skew;
762	}
763
764	/// Returns the offset to the next integer (mod 264) that is greater than
765	/// or equal to \p Value and is a multiple of \p Align. \p Align must be
766	/// non-zero.
767	inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
768	return alignTo(Value, Align) - Value;
769	}
770
771	/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
772	/// Requires 0 < B <= 32.
773	template <unsigned B>
774	constexpr inline int32_t SignExtend32(uint32_t X) {
775	static_assert(B > `0`, "Bit width can't be 0.");
776	static_assert(B <= `32`, "Bit width out of range.");
777	return int32_t(X << (`32` - B)) >> (`32` - B);
778	}
779
780	/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
781	/// Requires 0 < B < 32.
782	inline int32_t SignExtend32(uint32_t X, unsigned B) {
783	assert(B > `0` && "Bit width can't be 0.");
784	assert(B <= `32` && "Bit width out of range.");
785	return int32_t(X << (`32` - B)) >> (`32` - B);
786	}
787
788	/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
789	/// Requires 0 < B < 64.
790	template <unsigned B>
791	constexpr inline int64_t SignExtend64(uint64_t x) {
792	static_assert(B > `0`, "Bit width can't be 0.");
793	static_assert(B <= `64`, "Bit width out of range.");
794	return int64_t(x << (`64` - B)) >> (`64` - B);
795	}
796
797	/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
798	/// Requires 0 < B < 64.
799	inline int64_t SignExtend64(uint64_t X, unsigned B) {
800	assert(B > `0` && "Bit width can't be 0.");
801	assert(B <= `64` && "Bit width out of range.");
802	return int64_t(X << (`64` - B)) >> (`64` - B);
803	}
804
805	/// Subtract two unsigned integers, X and Y, of type T and return the absolute
806	/// value of the result.
807	template <typename T>
808	typename std::enable_if<std::is_unsigned<T>::value, T>::type AbsoluteDifference(
809	T X,
810	T Y) {
811	return std::max(X, Y) - std::min(X, Y);
812	}
813
814	/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
815	/// maximum representable value of T on overflow. ResultOverflowed indicates if
816	/// the result is larger than the maximum representable value of type T.
817	template <typename T>
818	typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingAdd(
819	T X,
820	T Y,
821	bool* ResultOverflowed = nullptr) {
822	bool Dummy;
823	bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
824	// Hacker's Delight, p. 29
825	T Z = X + Y;
826	Overflowed = (Z < X \|\| Z < Y);
827	if (Overflowed)
828	return std::numeric_limits<T>::max();
829	else
830	return Z;
831	}
832
833	/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
834	/// maximum representable value of T on overflow. ResultOverflowed indicates if
835	/// the result is larger than the maximum representable value of type T.
836	template <typename T>
837	typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingMultiply(
838	T X,
839	T Y,
840	bool* ResultOverflowed = nullptr) {
841	bool Dummy;
842	bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
843
844	// Hacker's Delight, p. 30 has a different algorithm, but we don't use that
845	// because it fails for uint16_t (where multiplication can have undefined
846	// behavior due to promotion to int), and requires a division in addition
847	// to the multiplication.
848
849	Overflowed = false;
850
851	// Log2(Z) would be either Log2Z or Log2Z + 1.
852	// Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
853	// will necessarily be less than Log2Max as desired.
854	int Log2Z = Log2_64(X) + Log2_64(Y);
855	const T Max = std::numeric_limits<T>::max();
856	int Log2Max = Log2_64(Max);
857	if (Log2Z < Log2Max) {
858	return X * Y;
859	}
860	if (Log2Z > Log2Max) {
861	Overflowed = true;
862	return Max;
863	}
864
865	// We're going to use the top bit, and maybe overflow one
866	// bit past it. Multiply all but the bottom bit then add
867	// that on at the end.
868	T Z = (X >> `1`) * Y;
869	if (Z & ~(Max >> `1`)) {
870	Overflowed = true;
871	return Max;
872	}
873	Z <<= `1`;
874	if (X & `1`)
875	return SaturatingAdd(Z, Y, ResultOverflowed);
876
877	return Z;
878	}
879
880	/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
881	/// the product. Clamp the result to the maximum representable value of T on
882	/// overflow. ResultOverflowed indicates if the result is larger than the
883	/// maximum representable value of type T.
884	template <typename T>
885	typename std::enable_if<std::is_unsigned<T>::value, T>::type
886	SaturatingMultiplyAdd(T X, T Y, T A, bool* ResultOverflowed = nullptr) {
887	bool Dummy;
888	bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
889
890	T Product = SaturatingMultiply(X, Y, &Overflowed);
891	if (Overflowed)
892	return Product;
893
894	return SaturatingAdd(A, Product, &Overflowed);
895	}
896
897	/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
898	extern const float huge_valf;
899	} // namespace llvm
900	} // namespace c10
901

Browse the source code of pytorch/c10/util/llvmMathExtras.h