builtin_fp16.h source code [tvm/3rdparty/compiler-rt/builtin_fp16.h]

1	/*
2	* Copyright (c) 2009-2015 by llvm/compiler-rt contributors
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a copy
5	* of this software and associated documentation files (the "Software"), to deal
6	* in the Software without restriction, including without limitation the rights
7	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8	* copies of the Software, and to permit persons to whom the Software is
9	* furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20	* THE SOFTWARE.
21	* \file builtin_fp16.h
22	* \brief Functions for conversion between fp32 and fp16, adopted from compiler-rt.
23	*/
24	#ifndef COMPILER_RT_BUILTIN_FP16_H_
25	#define COMPILER_RT_BUILTIN_FP16_H_
26
27	#ifdef _MSC_VER
28	#pragma warning(disable : 4305 4805)
29	#endif
30
31	#include <cstdint>
32
33	static inline uint32_t __clz(uint32_t x) {
34	// count leading zeros
35	int n = `32`;
36	uint32_t y;
37
38	y = x >> `16`;
39	if (y) {
40	n = n - `16`;
41	x = y;
42	}
43	y = x >> `8`;
44	if (y) {
45	n = n - `8`;
46	x = y;
47	}
48	y = x >> `4`;
49	if (y) {
50	n = n - `4`;
51	x = y;
52	}
53	y = x >> `2`;
54	if (y) {
55	n = n - `2`;
56	x = y;
57	}
58	y = x >> `1`;
59	if (y) return n - `2`;
60	return n - x;
61	}
62
63	template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
64	int DST_SIG_BITS>
65	static inline DST_T __truncXfYf2__(SRC_T a) {
66	// Various constants whose values follow from the type parameters.
67	// Any reasonable optimizer will fold and propagate all of these.
68	const int srcBits = sizeof(SRC_T) * `8`;
69	const int srcExpBits = srcBits - SRC_SIG_BITS - `1`;
70	const int srcInfExp = (`1` << srcExpBits) - `1`;
71	const int srcExpBias = srcInfExp >> `1`;
72
73	const SRC_REP_T srcMinNormal = SRC_REP_T(`1`) << SRC_SIG_BITS;
74	const SRC_REP_T srcSignificandMask = srcMinNormal - `1`;
75	const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
76	const SRC_REP_T srcSignMask = SRC_REP_T(`1`) << (SRC_SIG_BITS + srcExpBits);
77	const SRC_REP_T srcAbsMask = srcSignMask - `1`;
78	const SRC_REP_T roundMask = (SRC_REP_T(`1`) << (SRC_SIG_BITS - DST_SIG_BITS)) - `1`;
79	const SRC_REP_T halfway = SRC_REP_T(`1`) << (SRC_SIG_BITS - DST_SIG_BITS - `1`);
80	const SRC_REP_T srcQNaN = SRC_REP_T(`1`) << (SRC_SIG_BITS - `1`);
81	const SRC_REP_T srcNaNCode = srcQNaN - `1`;
82
83	const int dstBits = sizeof(DST_T) * `8`;
84	const int dstExpBits = dstBits - DST_SIG_BITS - `1`;
85	const int dstInfExp = (`1` << dstExpBits) - `1`;
86	const int dstExpBias = dstInfExp >> `1`;
87
88	const int underflowExponent = srcExpBias + `1` - dstExpBias;
89	const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
90	const SRC_REP_T underflow = (SRC_REP_T)underflowExponent << SRC_SIG_BITS;
91	const SRC_REP_T overflow = (SRC_REP_T)overflowExponent << SRC_SIG_BITS;
92
93	const DST_REP_T dstQNaN = DST_REP_T(`1`) << (DST_SIG_BITS - `1`);
94	const DST_REP_T dstNaNCode = dstQNaN - `1`;
95
96	// Break a into a sign and representation of the absolute value
97	union SrcExchangeType {
98	SRC_T f;
99	SRC_REP_T i;
100	};
101	SrcExchangeType src_rep;
102	src_rep.f = a;
103	const SRC_REP_T aRep = src_rep.i;
104	const SRC_REP_T aAbs = aRep & srcAbsMask;
105	const SRC_REP_T sign = aRep & srcSignMask;
106	DST_REP_T absResult;
107
108	if (aAbs - underflow < aAbs - overflow) {
109	// The exponent of a is within the range of normal numbers in the
110	// destination format. We can convert by simply right-shifting with
111	// rounding and adjusting the exponent.
112	absResult = aAbs >> (SRC_SIG_BITS - DST_SIG_BITS);
113	absResult -= (DST_REP_T)(srcExpBias - dstExpBias) << DST_SIG_BITS;
114
115	const SRC_REP_T roundBits = aAbs & roundMask;
116	// Round to nearest
117	if (roundBits > halfway) absResult++;
118	// Ties to even
119	else if (roundBits == halfway)
120	absResult += absResult & `1`;
121	} else if (aAbs > srcInfinity) {
122	// a is NaN.
123	// Conjure the result by beginning with infinity, setting the qNaN
124	// bit and inserting the (truncated) trailing NaN field.
125	absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
126	absResult \|= dstQNaN;
127	absResult \|= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
128	} else if (aAbs >= overflow) {
129	// a overflows to infinity.
130	absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
131	} else {
132	// a underflows on conversion to the destination type or is an exact
133	// zero. The result may be a denormal or zero. Extract the exponent
134	// to get the shift amount for the denormalization.
135	const int aExp = aAbs >> SRC_SIG_BITS;
136	const int shift = srcExpBias - dstExpBias - aExp + `1`;
137
138	const SRC_REP_T significand = (aRep & srcSignificandMask) \| srcMinNormal;
139
140	// Right shift by the denormalization amount with sticky.
141	if (shift > SRC_SIG_BITS) {
142	absResult = `0`;
143	} else {
144	const bool sticky = significand << (srcBits - shift);
145	SRC_REP_T denormalizedSignificand = significand >> shift \| sticky;
146	absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
147	const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
148	// Round to nearest
149	if (roundBits > halfway) absResult++;
150	// Ties to even
151	else if (roundBits == halfway)
152	absResult += absResult & `1`;
153	}
154	}
155
156	// Apply the signbit to (DST_T)abs(a).
157	const DST_REP_T result = absResult \| sign >> (srcBits - dstBits);
158	union DstExchangeType {
159	DST_T f;
160	DST_REP_T i;
161	};
162	DstExchangeType dst_rep;
163	dst_rep.i = result;
164	return dst_rep.f;
165	}
166
167	template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
168	int DST_SIG_BITS>
169	static inline DST_T __extendXfYf2__(SRC_T a) {
170	// Various constants whose values follow from the type parameters.
171	// Any reasonable optimizer will fold and propagate all of these.
172	const int srcBits = sizeof(SRC_T) * `8`;
173	const int srcExpBits = srcBits - SRC_SIG_BITS - `1`;
174	const int srcInfExp = (`1` << srcExpBits) - `1`;
175	const int srcExpBias = srcInfExp >> `1`;
176
177	const SRC_REP_T srcMinNormal = SRC_REP_T(`1`) << SRC_SIG_BITS;
178	const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
179	const SRC_REP_T srcSignMask = SRC_REP_T(`1`) << (SRC_SIG_BITS + srcExpBits);
180	const SRC_REP_T srcAbsMask = srcSignMask - `1`;
181	const SRC_REP_T srcQNaN = SRC_REP_T(`1`) << (SRC_SIG_BITS - `1`);
182	const SRC_REP_T srcNaNCode = srcQNaN - `1`;
183
184	const int dstBits = sizeof(DST_T) * `8`;
185	const int dstExpBits = dstBits - DST_SIG_BITS - `1`;
186	const int dstInfExp = (`1` << dstExpBits) - `1`;
187	const int dstExpBias = dstInfExp >> `1`;
188
189	const DST_REP_T dstMinNormal = DST_REP_T(`1`) << DST_SIG_BITS;
190
191	// Break a into a sign and representation of the absolute value
192	union SrcExchangeType {
193	SRC_T f;
194	SRC_REP_T i;
195	};
196	SrcExchangeType src_rep;
197	src_rep.f = a;
198	const SRC_REP_T aRep = src_rep.i;
199	const SRC_REP_T aAbs = aRep & srcAbsMask;
200	const SRC_REP_T sign = aRep & srcSignMask;
201	DST_REP_T absResult;
202
203	// If sizeof(SRC_REP_T) < sizeof(int), the subtraction result is promoted
204	// to (signed) int. To avoid that, explicitly cast to SRC_REP_T.
205	if ((SRC_REP_T)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
206	// a is a normal number.
207	// Extend to the destination type by shifting the significand and
208	// exponent into the proper position and rebiasing the exponent.
209	absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS);
210	absResult += (DST_REP_T)(dstExpBias - srcExpBias) << DST_SIG_BITS;
211	}
212
213	else if (aAbs >= srcInfinity) {
214	// a is NaN or infinity.
215	// Conjure the result by beginning with infinity, then setting the qNaN
216	// bit (if needed) and right-aligning the rest of the trailing NaN
217	// payload field.
218	absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
219	absResult \|= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
220	absResult \|= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
221	} else if (aAbs) {
222	// a is denormal.
223	// renormalize the significand and clear the leading bit, then insert
224	// the correct adjusted exponent in the destination type.
225	const int scale = __clz(aAbs) - __clz(srcMinNormal);
226	absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS + scale);
227	absResult ^= dstMinNormal;
228	const int resultExponent = dstExpBias - srcExpBias - scale + `1`;
229	absResult \|= (DST_REP_T)resultExponent << DST_SIG_BITS;
230	} else {
231	// a is zero.
232	absResult = `0`;
233	}
234
235	// Apply the signbit to (DST_T)abs(a).
236	const DST_REP_T result = absResult \| (DST_REP_T)sign << (dstBits - srcBits);
237	union DstExchangeType {
238	DST_T f;
239	DST_REP_T i;
240	};
241	DstExchangeType dst_rep;
242	dst_rep.i = result;
243	return dst_rep.f;
244	}
245
246	#endif // COMPILER_RT_BUILTIN_FP16_H_
247

Browse the source code of tvm/3rdparty/compiler-rt/builtin_fp16.h