1/*
2 * Copyright (c) 2009-2015 by llvm/compiler-rt contributors
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 * \file builtin_fp16.h
22 * \brief Functions for conversion between fp32 and fp16, adopted from compiler-rt.
23 */
24#ifndef COMPILER_RT_BUILTIN_FP16_H_
25#define COMPILER_RT_BUILTIN_FP16_H_
26
27#ifdef _MSC_VER
28#pragma warning(disable : 4305 4805)
29#endif
30
31#include <cstdint>
32
33static inline uint32_t __clz(uint32_t x) {
34 // count leading zeros
35 int n = 32;
36 uint32_t y;
37
38 y = x >> 16;
39 if (y) {
40 n = n - 16;
41 x = y;
42 }
43 y = x >> 8;
44 if (y) {
45 n = n - 8;
46 x = y;
47 }
48 y = x >> 4;
49 if (y) {
50 n = n - 4;
51 x = y;
52 }
53 y = x >> 2;
54 if (y) {
55 n = n - 2;
56 x = y;
57 }
58 y = x >> 1;
59 if (y) return n - 2;
60 return n - x;
61}
62
63template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
64 int DST_SIG_BITS>
65static inline DST_T __truncXfYf2__(SRC_T a) {
66 // Various constants whose values follow from the type parameters.
67 // Any reasonable optimizer will fold and propagate all of these.
68 const int srcBits = sizeof(SRC_T) * 8;
69 const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
70 const int srcInfExp = (1 << srcExpBits) - 1;
71 const int srcExpBias = srcInfExp >> 1;
72
73 const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
74 const SRC_REP_T srcSignificandMask = srcMinNormal - 1;
75 const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
76 const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
77 const SRC_REP_T srcAbsMask = srcSignMask - 1;
78 const SRC_REP_T roundMask = (SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS)) - 1;
79 const SRC_REP_T halfway = SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS - 1);
80 const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
81 const SRC_REP_T srcNaNCode = srcQNaN - 1;
82
83 const int dstBits = sizeof(DST_T) * 8;
84 const int dstExpBits = dstBits - DST_SIG_BITS - 1;
85 const int dstInfExp = (1 << dstExpBits) - 1;
86 const int dstExpBias = dstInfExp >> 1;
87
88 const int underflowExponent = srcExpBias + 1 - dstExpBias;
89 const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
90 const SRC_REP_T underflow = (SRC_REP_T)underflowExponent << SRC_SIG_BITS;
91 const SRC_REP_T overflow = (SRC_REP_T)overflowExponent << SRC_SIG_BITS;
92
93 const DST_REP_T dstQNaN = DST_REP_T(1) << (DST_SIG_BITS - 1);
94 const DST_REP_T dstNaNCode = dstQNaN - 1;
95
96 // Break a into a sign and representation of the absolute value
97 union SrcExchangeType {
98 SRC_T f;
99 SRC_REP_T i;
100 };
101 SrcExchangeType src_rep;
102 src_rep.f = a;
103 const SRC_REP_T aRep = src_rep.i;
104 const SRC_REP_T aAbs = aRep & srcAbsMask;
105 const SRC_REP_T sign = aRep & srcSignMask;
106 DST_REP_T absResult;
107
108 if (aAbs - underflow < aAbs - overflow) {
109 // The exponent of a is within the range of normal numbers in the
110 // destination format. We can convert by simply right-shifting with
111 // rounding and adjusting the exponent.
112 absResult = aAbs >> (SRC_SIG_BITS - DST_SIG_BITS);
113 absResult -= (DST_REP_T)(srcExpBias - dstExpBias) << DST_SIG_BITS;
114
115 const SRC_REP_T roundBits = aAbs & roundMask;
116 // Round to nearest
117 if (roundBits > halfway) absResult++;
118 // Ties to even
119 else if (roundBits == halfway)
120 absResult += absResult & 1;
121 } else if (aAbs > srcInfinity) {
122 // a is NaN.
123 // Conjure the result by beginning with infinity, setting the qNaN
124 // bit and inserting the (truncated) trailing NaN field.
125 absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
126 absResult |= dstQNaN;
127 absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
128 } else if (aAbs >= overflow) {
129 // a overflows to infinity.
130 absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
131 } else {
132 // a underflows on conversion to the destination type or is an exact
133 // zero. The result may be a denormal or zero. Extract the exponent
134 // to get the shift amount for the denormalization.
135 const int aExp = aAbs >> SRC_SIG_BITS;
136 const int shift = srcExpBias - dstExpBias - aExp + 1;
137
138 const SRC_REP_T significand = (aRep & srcSignificandMask) | srcMinNormal;
139
140 // Right shift by the denormalization amount with sticky.
141 if (shift > SRC_SIG_BITS) {
142 absResult = 0;
143 } else {
144 const bool sticky = significand << (srcBits - shift);
145 SRC_REP_T denormalizedSignificand = significand >> shift | sticky;
146 absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
147 const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
148 // Round to nearest
149 if (roundBits > halfway) absResult++;
150 // Ties to even
151 else if (roundBits == halfway)
152 absResult += absResult & 1;
153 }
154 }
155
156 // Apply the signbit to (DST_T)abs(a).
157 const DST_REP_T result = absResult | sign >> (srcBits - dstBits);
158 union DstExchangeType {
159 DST_T f;
160 DST_REP_T i;
161 };
162 DstExchangeType dst_rep;
163 dst_rep.i = result;
164 return dst_rep.f;
165}
166
167template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
168 int DST_SIG_BITS>
169static inline DST_T __extendXfYf2__(SRC_T a) {
170 // Various constants whose values follow from the type parameters.
171 // Any reasonable optimizer will fold and propagate all of these.
172 const int srcBits = sizeof(SRC_T) * 8;
173 const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
174 const int srcInfExp = (1 << srcExpBits) - 1;
175 const int srcExpBias = srcInfExp >> 1;
176
177 const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
178 const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
179 const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
180 const SRC_REP_T srcAbsMask = srcSignMask - 1;
181 const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
182 const SRC_REP_T srcNaNCode = srcQNaN - 1;
183
184 const int dstBits = sizeof(DST_T) * 8;
185 const int dstExpBits = dstBits - DST_SIG_BITS - 1;
186 const int dstInfExp = (1 << dstExpBits) - 1;
187 const int dstExpBias = dstInfExp >> 1;
188
189 const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS;
190
191 // Break a into a sign and representation of the absolute value
192 union SrcExchangeType {
193 SRC_T f;
194 SRC_REP_T i;
195 };
196 SrcExchangeType src_rep;
197 src_rep.f = a;
198 const SRC_REP_T aRep = src_rep.i;
199 const SRC_REP_T aAbs = aRep & srcAbsMask;
200 const SRC_REP_T sign = aRep & srcSignMask;
201 DST_REP_T absResult;
202
203 // If sizeof(SRC_REP_T) < sizeof(int), the subtraction result is promoted
204 // to (signed) int. To avoid that, explicitly cast to SRC_REP_T.
205 if ((SRC_REP_T)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
206 // a is a normal number.
207 // Extend to the destination type by shifting the significand and
208 // exponent into the proper position and rebiasing the exponent.
209 absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS);
210 absResult += (DST_REP_T)(dstExpBias - srcExpBias) << DST_SIG_BITS;
211 }
212
213 else if (aAbs >= srcInfinity) {
214 // a is NaN or infinity.
215 // Conjure the result by beginning with infinity, then setting the qNaN
216 // bit (if needed) and right-aligning the rest of the trailing NaN
217 // payload field.
218 absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
219 absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
220 absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
221 } else if (aAbs) {
222 // a is denormal.
223 // renormalize the significand and clear the leading bit, then insert
224 // the correct adjusted exponent in the destination type.
225 const int scale = __clz(aAbs) - __clz(srcMinNormal);
226 absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS + scale);
227 absResult ^= dstMinNormal;
228 const int resultExponent = dstExpBias - srcExpBias - scale + 1;
229 absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS;
230 } else {
231 // a is zero.
232 absResult = 0;
233 }
234
235 // Apply the signbit to (DST_T)abs(a).
236 const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits);
237 union DstExchangeType {
238 DST_T f;
239 DST_REP_T i;
240 };
241 DstExchangeType dst_rep;
242 dst_rep.i = result;
243 return dst_rep.f;
244}
245
246#endif // COMPILER_RT_BUILTIN_FP16_H_
247