libjit_defs.h source code [glow/lib/LLVMIRCodeGen/libjit/libjit_defs.h]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#ifndef GLOW_LLVMIRCODEGEN_LIBJIT_LIBJIT_DEFS_H
17	#define GLOW_LLVMIRCODEGEN_LIBJIT_LIBJIT_DEFS_H
18
19	#include <assert.h>
20	#include <cmath>
21	#include <cstdlib>
22	#include <math.h>
23	#include <stdint.h>
24	#include <string.h>
25
26	#include "libjit_dim_t.h"
27
28	#define LIBJIT_ALWAYS_INLINE static inline __attribute__((always_inline))
29
30	#if defined(_MSC_VER)
31	#include <BaseTsd.h>
32	typedef SSIZE_T ssize_t;
33	#endif
34
35	#if defined(__clang__)
36	using float4 = float __attribute__((ext_vector_type(`4`)));
37	using float8 = float __attribute__((ext_vector_type(`8`)));
38	#elif defined(__GNUC__) \|\| defined(__GNUG__)
39	using float4 = float __attribute__((vector_size(`16`)));
40	using float8 = float __attribute__((vector_size(`32`)));
41	#endif
42
43	/// Loads a simd float8 value from \p ptr.
44	#define LoadFloat8(PTR) ((const float8 )(PTR))
45
46	/// Stores the simd float8 value to \p ptr.
47	#define StoreFloat8(PTR, VAL) ((float8 )(PTR)) = (VAL);
48
49	/// Accumulate (+=) the simd float8 value to \p ptr.
50	#define AddFloat8(PTR, VAL) ((float8 )(PTR)) += (VAL);
51
52	/// Broadcast the input value to a float8.
53	#if defined(__clang__)
54	#define BroadcastFloat8(VAL) ((float8)(VAL))
55	#elif defined(__GNUC__) \|\| defined(__GNUG__)
56	#define BroadcastFloat8(VAL) ((VAL) - (float8){0})
57	#endif
58
59	#define MIN(a, b) (((a) < (b)) ? (a) : (b))
60	#define MAX(a, b) (((a) > (b)) ? (a) : (b))
61	#define AT(tensor, dims, numDims, indices, numIndices) \
62	tensor[get_element_ptr(tensor, dims, numDims, indices, numIndices)]
63
64	/// Perform an unaligned load of a float8 from a float pointer.
65	inline float8 LoaduFloat8(const float *p) {
66	float8 res;
67	memcpy(&res, p, sizeof(float8));
68	return res;
69	}
70
71	/// Perform an unaligned store to a float pointer.
72	inline void StoreuFloat8(float p, float8 v) { memcpy(p, &v, sizeof*(float8)); }
73
74	/// Perform an unaligned addition to a float pointer.
75	inline void AdduFloat8(float *p, float8 v) {
76	StoreuFloat8(p, LoaduFloat8(p) + v);
77	}
78
79	/// \returns the index of the element at x,y,z,w,q,r.
80	inline dim_t libjit_getXYZWQR(const dim_t *dims, dim_t x, dim_t y, dim_t z,
81	dim_t w, dim_t q, dim_t r) {
82	return (x * dims[`1`] * dims[`2`] * dims[`3`] * dims[`4`] * dims[`5`]) +
83	(y * dims[`2`] * dims[`3`] * dims[`4`] * dims[`5`]) +
84	(z * dims[`3`] * dims[`4`] * dims[`5`]) + (w * dims[`4`] * dims[`5`]) +
85	(q * dims[`5`]) + r;
86	}
87
88	/// \returns the index of the element at x,y,z,w,q.
89	inline dim_t libjit_getXYZWQ(const dim_t *dims, dim_t x, dim_t y, dim_t z,
90	dim_t w, dim_t q) {
91	return (x * dims[`1`] * dims[`2`] * dims[`3`] * dims[`4`]) +
92	(y * dims[`2`] * dims[`3`] * dims[`4`]) + (z * dims[`3`] * dims[`4`]) +
93	(w * dims[`4`]) + q;
94	}
95
96	/// \returns the index of the element at x,y,z,w.
97	inline dim_t libjit_getXYZW(const dim_t *dims, dim_t x, dim_t y, dim_t z,
98	dim_t w) {
99	return (x * dims[`1`] * dims[`2`] * dims[`3`]) + (y * dims[`2`] * dims[`3`]) +
100	(z * dims[`3`]) + w;
101	}
102
103	/// \returns the index of the element at x,y,z.
104	inline dim_t libjit_getXYZ(const dim_t *dims, dim_t x, dim_t y, dim_t z) {
105	return (x * dims[`1`] * dims[`2`]) + (y * dims[`2`]) + z;
106	}
107
108	/// \returns the index of the element at x,y.
109	inline dim_t libjit_getXY(const dim_t *dims, dim_t x, dim_t y) {
110	return (x * dims[`1`]) + y;
111	}
112
113	/// Computes the function Sigmoid(x) for float \p input.
114	/// When the LIBJIT compile option "-ffast-math" is enabled the intermediate
115	/// computation expf(x) for Sigmoid operator is not handled properly for very
116	/// large positive values which results in NaN values for the Sigmoid output.
117	/// Therefore when the "-ffast-math" is enabled we compute the Sigmoid such that
118	/// we avoid computing large values for the "expf" function.
119	LIBJIT_ALWAYS_INLINE
120	float libjit_sigmoid_f(float input) {
121	#ifdef FFAST_MATH
122	float sigmoidVal = `1` / (`1` + expf(-std::abs(input)));
123	return (float)(std::signbit(input)) + std::copysignf(sigmoidVal, input);
124	#else
125	float e = expf(-input);
126	return `1` / (e + `1`);
127	#endif // FFAST_MATH
128	}
129
130	/// Computes the function Tanh(x) for float \p input.
131	/// When the LIBJIT compile option "-ffast-math" is enabled the intermediate
132	/// computation expf(x) for Tanh operator is not handled properly for very
133	/// large positive values which results in NaN values for the Tanh output.
134	/// Therefore when the "-ffast-math" is enabled we compute the Tanh such that
135	/// we avoid computing large values for the "expf" function.
136	LIBJIT_ALWAYS_INLINE
137	float libjit_tanh_f(float input) {
138	#ifdef FFAST_MATH
139	float tanhVal = -`1` + `2` / (expf(-`2` * std::abs(input)) + `1`);
140	return std::copysignf(tanhVal, input);
141	#else
142	return `1` - `2` / (expf(input * `2`) + `1`);
143	#endif // FFAST_MATH
144	}
145
146	/// \returns the clipped value of the input to INT8 range [-128, 127].
147	LIBJIT_ALWAYS_INLINE
148	int8_t libjit_clip_i8(int32_t val) { return (int8_t)MIN(MAX(val, -`128`), `127`); }
149
150	/// \returns the clipped value of the input to INT16 range [-32768, 32767].
151	LIBJIT_ALWAYS_INLINE
152	int16_t libjit_clip_i16(int32_t val) {
153	return (int16_t)MIN(MAX(val, -`32768`), `32767`);
154	}
155
156	/// Scales a 32-bit or 64-bit integer to a 32-bit integer using the integer
157	/// shift-mult-shift method.
158	template <typename SrcTy = int32_t, typename DestTy = int32_t>
159	LIBJIT_ALWAYS_INLINE DestTy libjit_scale(SrcTy input, int32_t pre, int32_t post,
160	int32_t scale, int32_t offset) {
161	// The operation x >> post is rounded down to negative infinity. To get to
162	// round-nearest we add (1 << (post - 1)) to the value prior to shifting.
163	// Rounding is performed only when shifting right (pos > 0).
164	SrcTy rtn = (post > `0`) ? (`1` << (post - `1`)) : `0`;
165
166	// NOTICE: If your tests are failing because of signed integer overflow then
167	// this is a bug in the test and not in the program. You should make sure that
168	// the inputs to the operations do not overflow. The semantics of the
169	// quantization process is such that the result for values that fall out of
170	// range is undefined. The conversion procedure will only tolerate a few bits
171	// of overflow and the result will be clipped.
172	return ((((input >> pre) * scale) + rtn) >> post) + offset;
173	}
174
175	/// Applies an activation function to a FLOAT input value \p input based on
176	/// the activation type \p actType and the activation arguments \p actArgs.
177	/// NOTE: The type of the activation must be in sync with the FusedActivation
178	/// enumeration in glow\include\glow\Graph\Nodes.h.
179	LIBJIT_ALWAYS_INLINE
180	float libjit_activation_f(float input, int32_t actType, const float *actArgs) {
181	if (actType == `0`) {
182	// No activation.
183	return input;
184	} else if (actType == `1`) {
185	// Relu.
186	return MAX(input, `0`);
187	} else if (actType == `2`) {
188	// Clip.
189	return MIN(MAX(input, actArgs[`0`]), actArgs[`1`]);
190	} else if (actType == `3`) {
191	// Tanh.
192	return libjit_tanh_f(input);
193	} else if (actType == `4`) {
194	// Sigmoid.
195	return libjit_sigmoid_f(input);
196	} else {
197	// LeakyRelu.
198	return (input >= `0`) ? input : actArgs[`0`] * input;
199	}
200	}
201
202	/// Applies an activation function to a QUANTIZED input value \p input based on
203	/// the activation type \p actType and the activation arguments \p actArgs.
204	/// NOTE: The type of the activation must be in sync with the FusedActivation
205	/// enumeration in glow\include\glow\Graph\Nodes.h.
206	LIBJIT_ALWAYS_INLINE
207	int32_t libjit_activation_i32(int32_t input, int32_t offset, int32_t actType,
208	const int32_t *actArgs) {
209	if (actType == `0`) {
210	// No activation.
211	return input;
212	} else if (actType == `1`) {
213	// Relu.
214	return MAX(input, offset);
215	} else if (actType == `2`) {
216	// Clip.
217	return MIN(MAX(input, actArgs[`0`]), actArgs[`1`]);
218	} else if (actType == `3`) {
219	// Tanh.
220	assert(false && "Fused Tanh for quantized type not supported!");
221	return input;
222	} else if (actType == `4`) {
223	// Sigmoid.
224	assert(false && "Fused Sigmoid for quantized type not supported!");
225	return input;
226	} else {
227	// LeakyRelu.
228	return (input >= offset)
229	? input
230	: libjit_scale<int32_t>(input - offset, actArgs[`0`], actArgs[`1`],
231	actArgs[`2`], offset);
232	}
233	}
234
235	/// Divides the 32-bit integer \p input with \p divider. The division is done
236	/// with rounding for better precision. Input can be both positive or negative.
237	/// Divider is assumed strictly positive.
238	LIBJIT_ALWAYS_INLINE
239	int32_t libjit_div_round_i32(int32_t input, int32_t divider) {
240	// Division rounding term which is added for positive input and subtracted
241	// for negative input.
242	int32_t rnd = (divider >> `1`);
243	return (input > `0`) ? ((input + rnd) / divider) : ((input - rnd) / divider);
244	}
245
246	#ifdef _WIN32
247	#define libjit_aligned_malloc(p, a, s) \
248	((((p)) = _aligned_malloc((s), (a))), (p) ? 0 : errno)
249	#define libjit_aligned_free(p) _aligned_free(p)
250	#else
251	#define libjit_aligned_malloc(p, a, s) posix_memalign(p, a, s)
252	#define libjit_aligned_free(p) free(p)
253	#endif
254
255	/// This function computes the minimum filter index based on the the minimum
256	/// input index \p inp_min.
257	LIBJIT_ALWAYS_INLINE ssize_t libjit_conv_flt_min(ssize_t inp_min) {
258	return MAX(`0`, -inp_min);
259	}
260
261	/// This function computes the maximum filter index based on the the input size
262	/// \p inp_size, the filter size \p flt_size and the minimum input index
263	/// \p inp_min.
264	LIBJIT_ALWAYS_INLINE ssize_t libjit_conv_flt_max(ssize_t inp_size,
265	ssize_t flt_size,
266	ssize_t inp_min) {
267	return MIN(flt_size, inp_size - inp_min);
268	}
269
270	/// This function computes the effective filter length given the minimum filter
271	/// index \p flt_min and the maximum filter index \p flt_max.
272	LIBJIT_ALWAYS_INLINE ssize_t libjit_conv_flt_len(ssize_t flt_min,
273	ssize_t flt_max) {
274	return MAX(`0`, flt_max - flt_min);
275	}
276
277	#endif // GLOW_LLVMIRCODEGEN_LIBJIT_LIBJIT_DEFS_H
278

Browse the source code of glow/lib/LLVMIRCodeGen/libjit/libjit_defs.h