1// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <stdint.h>
7#include <stddef.h>
8#include <assert.h>
9#include <math.h>
10
11#include <fp16.h>
12
13#include <xnnpack/math.h>
14#include <xnnpack/microparams-init.h>
15#include <xnnpack/unaligned.h>
16
17
18size_t xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params(
19 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
20 int8_t output_zero_point,
21 int8_t output_min,
22 int8_t output_max)
23{
24 params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
25 params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
26 params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
27 params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
28 return sizeof(params->fp32_scalar_fmagic);
29}
30
31size_t xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params(
32 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
33 int8_t output_zero_point,
34 int8_t output_min,
35 int8_t output_max)
36{
37 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
38 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
39 params->fp32_scalar_imagic.magic_bias = 12582912.0f;
40 params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
41 params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
42 params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
43 return sizeof(params->fp32_scalar_imagic);
44}
45
46size_t xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params(
47 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
48 int8_t output_zero_point,
49 int8_t output_min,
50 int8_t output_max)
51{
52 params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
53 params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
54 params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
55 return sizeof(params->fp32_scalar_lrintf);
56}
57
58#if XNN_ARCH_X86 || XNN_ARCH_X86_64
59size_t xnn_init_qc8_conv_minmax_fp32_sse2_params(
60 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
61 int8_t output_zero_point,
62 int8_t output_min,
63 int8_t output_max)
64{
65 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
66 for (uint32_t i = 0; i < 4; i++) {
67 params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
68 }
69 for (uint32_t i = 0; i < 8; i++) {
70 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
71 params->fp32_sse2.output_min[i] = (int16_t) output_min;
72 }
73 return sizeof(params->fp32_sse2);
74}
75
76size_t xnn_init_qc8_conv_minmax_fp32_sse4_params(
77 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
78 int8_t output_zero_point,
79 int8_t output_min,
80 int8_t output_max)
81{
82 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
83 for (uint32_t i = 0; i < 4; i++) {
84 params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
85 }
86 for (uint32_t i = 0; i < 8; i++) {
87 params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
88 }
89 for (uint32_t i = 0; i < 16; i++) {
90 params->fp32_sse4.output_min[i] = output_min;
91 }
92 return sizeof(params->fp32_sse4);
93}
94
95size_t xnn_init_qc8_conv_minmax_fp32_avx2_params(
96 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
97 int8_t output_zero_point,
98 int8_t output_min,
99 int8_t output_max)
100{
101 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
102 for (uint32_t i = 0; i < 8; i++) {
103 params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
104 }
105 for (uint32_t i = 0; i < 16; i++) {
106 params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
107 }
108 for (uint32_t i = 0; i < 32; i++) {
109 params->fp32_avx2.output_min[i] = output_min;
110 }
111 return sizeof(params->fp32_avx2);
112}
113
114size_t xnn_init_qc8_conv_minmax_fp32_avx512_params(
115 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
116 int8_t output_zero_point,
117 int8_t output_min,
118 int8_t output_max)
119{
120 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
121 for (uint32_t i = 0; i < 16; i++) {
122 params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
123 }
124 for (uint32_t i = 0; i < 32; i++) {
125 params->fp32_avx512.output_zero_point[i] = (int16_t) output_zero_point;
126 }
127 for (uint32_t i = 0; i < 64; i++) {
128 params->fp32_avx512.output_min[i] = output_min;
129 }
130 return sizeof(params->fp32_avx512);
131}
132#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
133
134#if XNN_ARCH_ARM
135size_t xnn_init_qc8_conv_minmax_fp32_armsimd32_params(
136 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
137 int8_t output_zero_point,
138 int8_t output_min,
139 int8_t output_max)
140{
141 params->fp32_armsimd32.magic_bias = 12582912.0f;
142 params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
143 params->fp32_armsimd32.output_min = (uint32_t) (uint8_t) output_min * UINT32_C(0x01010101);
144 params->fp32_armsimd32.output_max = (uint32_t) (uint8_t) output_max * UINT32_C(0x01010101);
145 return sizeof(params->fp32_armsimd32);
146}
147#endif // XNN_ARCH_ARM
148
149#if XNN_ARCH_ARM || XNN_ARCH_ARM64
150size_t xnn_init_qc8_conv_minmax_fp32_neon_params(
151 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
152 int8_t output_zero_point,
153 int8_t output_min,
154 int8_t output_max)
155{
156 params->fp32_neon.magic_bias = 12582912.0f;
157 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
158 params->fp32_neon.output_min = output_min;
159 params->fp32_neon.output_max = output_max;
160 return sizeof(params->fp32_neon);
161}
162
163size_t xnn_init_qc8_conv_minmax_fp32_neonv8_params(
164 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
165 int8_t output_zero_point,
166 int8_t output_min,
167 int8_t output_max)
168{
169 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
170 params->fp32_neonv8.output_min = output_min;
171 params->fp32_neonv8.output_max = output_max;
172 return sizeof(params->fp32_neonv8);
173}
174#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
175
176#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
177size_t xnn_init_qc8_conv_minmax_fp32_wasmsimd_params(
178 union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
179 int8_t output_zero_point,
180 int8_t output_min,
181 int8_t output_max)
182{
183 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
184 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
185 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
186 for (uint32_t i = 0; i < 2; i++) {
187 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
188 params->fp32_wasmsimd.magic_min[i] = magic_min;
189 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
190 }
191 for (uint32_t i = 0; i < 8; i++) {
192 params->fp32_wasmsimd.output_max[i] = output_max;
193 }
194 return sizeof(params->fp32_wasmsimd);
195}
196#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
197
198size_t xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params(
199 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
200 float scale,
201 int8_t output_zero_point,
202 int8_t output_min,
203 int8_t output_max)
204{
205 assert(scale >= 0x1.0p-32f);
206 assert(scale < 256.0f);
207
208 params->fp32_scalar_fmagic.scale = scale;
209 params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
210 params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
211 params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
212 params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
213 return sizeof(params->fp32_scalar_fmagic);
214}
215
216size_t xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params(
217 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
218 float scale,
219 int8_t output_zero_point,
220 int8_t output_min,
221 int8_t output_max)
222{
223 assert(scale >= 0x1.0p-32f);
224 assert(scale < 256.0f);
225
226 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
227 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
228 params->fp32_scalar_imagic.scale = scale;
229 params->fp32_scalar_imagic.magic_bias = 12582912.0f;
230 params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
231 params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
232 params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
233 return sizeof(params->fp32_scalar_imagic);
234}
235
236size_t xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params(
237 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
238 float scale,
239 int8_t output_zero_point,
240 int8_t output_min,
241 int8_t output_max)
242{
243 assert(scale >= 0x1.0p-32f);
244 assert(scale < 256.0f);
245
246 params->fp32_scalar_lrintf.scale = scale;
247 params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
248 params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
249 params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
250 return sizeof(params->fp32_scalar_lrintf);
251}
252
253size_t xnn_init_qs8_conv_minmax_rndnu_scalar_params(
254 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
255 float scale,
256 int8_t output_zero_point,
257 int8_t output_min,
258 int8_t output_max)
259{
260 assert(scale >= 0x1.0p-32f);
261 assert(scale < 256.0f);
262
263 // Compute requantization parameters.
264 const uint32_t scale_bits = float_as_uint32(scale);
265
266 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
267 assert(multiplier >= INT32_C(0x00800000));
268 assert(multiplier <= INT32_C(0x00FFFFFF));
269
270 // Shift is in [16, 55] range.
271 const uint32_t shift = 127 + 23 - (scale_bits >> 23);
272 assert(shift >= 16);
273 assert(shift < 56);
274
275 const int64_t rounding = INT64_C(1) << (shift - 1);
276 const int32_t output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
277 const int32_t output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
278
279 params->rndnu_scalar.multiplier = multiplier;
280 params->rndnu_scalar.shift = shift;
281 params->rndnu_scalar.rounding = rounding;
282 params->rndnu_scalar.output_min_less_zero_point = output_min_less_zero_point;
283 params->rndnu_scalar.output_max_less_zero_point = output_max_less_zero_point;
284 params->rndnu_scalar.output_zero_point = (int32_t) output_zero_point;
285 return sizeof(params->rndnu_scalar);
286}
287
288#if XNN_ARCH_X86 || XNN_ARCH_X86_64
289size_t xnn_init_qs8_conv_minmax_fp32_sse2_params(
290 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
291 float scale,
292 int8_t output_zero_point,
293 int8_t output_min,
294 int8_t output_max)
295{
296 assert(scale >= 0x1.0p-32f);
297 assert(scale < 256.0f);
298
299 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
300 for (uint32_t i = 0; i < 4; i++) {
301 params->fp32_sse2.scale[i] = scale;
302 params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
303 }
304 for (uint32_t i = 0; i < 8; i++) {
305 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
306 params->fp32_sse2.output_min[i] = (int16_t) output_min;
307 }
308 return sizeof(params->fp32_sse2);
309}
310
311size_t xnn_init_qs8_conv_minmax_fp32_sse4_params(
312 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
313 float scale,
314 int8_t output_zero_point,
315 int8_t output_min,
316 int8_t output_max)
317{
318 assert(scale >= 0x1.0p-32f);
319 assert(scale < 256.0f);
320
321 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
322 for (uint32_t i = 0; i < 4; i++) {
323 params->fp32_sse4.scale[i] = scale;
324 params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
325 }
326 for (uint32_t i = 0; i < 8; i++) {
327 params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
328 }
329 for (uint32_t i = 0; i < 16; i++) {
330 params->fp32_sse4.output_min[i] = output_min;
331 }
332 return sizeof(params->fp32_sse4);
333}
334
335size_t xnn_init_qs8_conv_minmax_fp32_avx2_params(
336 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
337 float scale,
338 int8_t output_zero_point,
339 int8_t output_min,
340 int8_t output_max)
341{
342 assert(scale >= 0x1.0p-32f);
343 assert(scale < 256.0f);
344
345 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
346 for (uint32_t i = 0; i < 8; i++) {
347 params->fp32_avx2.scale[i] = scale;
348 params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
349 }
350 for (uint32_t i = 0; i < 16; i++) {
351 params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
352 }
353 for (uint32_t i = 0; i < 32; i++) {
354 params->fp32_avx2.output_min[i] = output_min;
355 }
356 return sizeof(params->fp32_avx2);
357}
358
359size_t xnn_init_qs8_conv_minmax_fp32_avx512_params(
360 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
361 float scale,
362 int8_t output_zero_point,
363 int8_t output_min,
364 int8_t output_max)
365{
366 assert(scale >= 0x1.0p-32f);
367 assert(scale < 256.0f);
368
369 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
370 for (uint32_t i = 0; i < 16; i++) {
371 params->fp32_avx512.scale[i] = scale;
372 params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
373 }
374 for (uint32_t i = 0; i < 32; i++) {
375 params->fp32_avx512.output_zero_point[i] = (int16_t) output_zero_point;
376 }
377 for (uint32_t i = 0; i < 64; i++) {
378 params->fp32_avx512.output_min[i] = output_min;
379 }
380 return sizeof(params->fp32_avx512);
381}
382#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
383
384#if XNN_ARCH_ARM
385size_t xnn_init_qs8_conv_minmax_fp32_armsimd32_params(
386 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
387 float scale,
388 int8_t output_zero_point,
389 int8_t output_min,
390 int8_t output_max)
391{
392 assert(scale >= 0x1.0p-32f);
393 assert(scale < 256.0f);
394
395 params->fp32_armsimd32.scale = scale;
396 params->fp32_armsimd32.magic_bias = 12582912.0f;
397 params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
398 params->fp32_armsimd32.output_min = (uint32_t) (uint8_t) output_min * UINT32_C(0x01010101);
399 params->fp32_armsimd32.output_max = (uint32_t) (uint8_t) output_max * UINT32_C(0x01010101);
400 return sizeof(params->fp32_armsimd32);
401}
402#endif // XNN_ARCH_ARM
403
404#if XNN_ARCH_ARM || XNN_ARCH_ARM64
405size_t xnn_init_qs8_conv_minmax_fp32_neon_params(
406 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
407 float scale,
408 int8_t output_zero_point,
409 int8_t output_min,
410 int8_t output_max)
411{
412 assert(scale >= 0x1.0p-32f);
413 assert(scale < 256.0f);
414
415 params->fp32_neon.scale = scale;
416 params->fp32_neon.magic_bias = 12582912.0f;
417 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
418 params->fp32_neon.output_min = output_min;
419 params->fp32_neon.output_max = output_max;
420 return sizeof(params->fp32_neon);
421}
422
423size_t xnn_init_qs8_conv_minmax_fp32_neonv8_params(
424 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
425 float scale,
426 int8_t output_zero_point,
427 int8_t output_min,
428 int8_t output_max)
429{
430 assert(scale >= 0x1.0p-32f);
431 assert(scale < 256.0f);
432
433 params->fp32_neonv8.scale = scale;
434 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
435 params->fp32_neonv8.output_min = output_min;
436 params->fp32_neonv8.output_max = output_max;
437 return sizeof(params->fp32_neonv8);
438}
439
440size_t xnn_init_qs8_conv_minmax_rndnu_neon_params(
441 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
442 float scale,
443 int8_t output_zero_point,
444 int8_t output_min,
445 int8_t output_max)
446{
447 assert(scale >= 0x1.0p-32f);
448 assert(scale < 256.0f);
449
450 // Compute requantization parameters.
451 const uint32_t scale_bits = float_as_uint32(scale);
452
453 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
454 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
455 assert(multiplier >= INT32_C(0x40000000));
456 assert(multiplier <= INT32_C(0x7FFFFF80));
457
458 // Shift is in [-8, 31] range.
459 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
460 assert(shift >= -8);
461 assert(shift < 32);
462
463 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
464 const int32_t post_shift = math_max_s32(shift, 1);
465 const int32_t pre_shift = shift - post_shift;
466
467 params->rndnu_neon.right_pre_shift = -pre_shift;
468 params->rndnu_neon.multiplier = multiplier;
469 params->rndnu_neon.right_post_shift = -post_shift;
470 params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
471 params->rndnu_neon.output_min = output_min;
472 params->rndnu_neon.output_max = output_max;
473 return sizeof(params->rndnu_neon);
474}
475#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
476
477#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
478size_t xnn_init_qs8_conv_minmax_fp32_wasmsimd_params(
479 union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
480 float scale,
481 int8_t output_zero_point,
482 int8_t output_min,
483 int8_t output_max)
484{
485 assert(scale >= 0x1.0p-32f);
486 assert(scale < 256.0f);
487
488 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
489 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
490 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
491 for (uint32_t i = 0; i < 2; i++) {
492 params->fp32_wasmsimd.scale[i] = scale;
493 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
494 params->fp32_wasmsimd.magic_min[i] = magic_min;
495 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
496 }
497 for (uint32_t i = 0; i < 8; i++) {
498 params->fp32_wasmsimd.output_max[i] = output_max;
499 }
500 return sizeof(params->fp32_wasmsimd);
501}
502#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
503
504size_t xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params(
505 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
506 uint8_t kernel_zero_point,
507 float scale,
508 uint8_t output_zero_point,
509 uint8_t output_min,
510 uint8_t output_max)
511{
512 assert(scale >= 0x1.0p-32f);
513 assert(scale < 256.0f);
514
515 params->fp32_scalar_fmagic.kernel_zero_point = (int32_t) kernel_zero_point;
516 params->fp32_scalar_fmagic.scale = scale;
517 params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
518 params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
519 params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
520 params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
521 return sizeof(params->fp32_scalar_fmagic);
522}
523
524size_t xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params(
525 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
526 uint8_t kernel_zero_point,
527 float scale,
528 uint8_t output_zero_point,
529 uint8_t output_min,
530 uint8_t output_max)
531{
532 assert(scale >= 0x1.0p-32f);
533 assert(scale < 256.0f);
534
535 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
536 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
537 params->fp32_scalar_imagic.kernel_zero_point = (int32_t) kernel_zero_point;
538 params->fp32_scalar_imagic.scale = scale;
539 params->fp32_scalar_imagic.magic_bias = 12582912.0f;
540 params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
541 params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
542 params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
543 return sizeof(params->fp32_scalar_imagic);
544}
545
546size_t xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params(
547 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
548 uint8_t kernel_zero_point,
549 float scale,
550 uint8_t output_zero_point,
551 uint8_t output_min,
552 uint8_t output_max)
553{
554 assert(scale >= 0x1.0p-32f);
555 assert(scale < 256.0f);
556
557 params->fp32_scalar_lrintf.kernel_zero_point = (int32_t) kernel_zero_point;
558 params->fp32_scalar_lrintf.scale = scale;
559 params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
560 params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
561 params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
562 return sizeof(params->fp32_scalar_lrintf);
563}
564
565size_t xnn_init_qu8_conv_minmax_rndnu_scalar_params(
566 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
567 uint8_t kernel_zero_point,
568 float scale,
569 uint8_t output_zero_point,
570 uint8_t output_min,
571 uint8_t output_max)
572{
573 assert(scale >= 0x1.0p-32f);
574 assert(scale < 256.0f);
575
576 // Compute requantization parameters.
577 const uint32_t scale_bits = float_as_uint32(scale);
578
579 // Multiplier is in [0x00800000, 0x007FFFFF] range.
580 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
581 assert(multiplier >= INT32_C(0x00800000));
582 assert(multiplier <= INT32_C(0x00FFFFFF));
583
584 // Shift is in [16, 55] range.
585 const uint32_t shift = 127 + 23 - (scale_bits >> 23);
586 assert(shift >= 16);
587 assert(shift < 56);
588
589 const int64_t rounding = INT64_C(1) << (shift - 1);
590 const int32_t output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
591 const int32_t output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
592
593 params->rndnu_scalar.kernel_zero_point = (int32_t) kernel_zero_point;
594 params->rndnu_scalar.multiplier = multiplier;
595 params->rndnu_scalar.rounding = rounding;
596 params->rndnu_scalar.shift = shift;
597 params->rndnu_scalar.output_min_less_zero_point = output_min_less_zero_point;
598 params->rndnu_scalar.output_max_less_zero_point = output_max_less_zero_point;
599 params->rndnu_scalar.output_zero_point = (int32_t) output_zero_point;
600 return sizeof(params->rndnu_scalar);
601}
602
603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
604size_t xnn_init_qu8_conv_minmax_fp32_sse2_params(
605 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
606 uint8_t kernel_zero_point,
607 float scale,
608 uint8_t output_zero_point,
609 uint8_t output_min,
610 uint8_t output_max)
611{
612 assert(scale >= 0x1.0p-32f);
613 assert(scale < 256.0f);
614
615 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
616 for (uint32_t i = 0; i < 4; i++) {
617 params->fp32_sse2.scale[i] = scale;
618 params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
619 }
620 for (uint32_t i = 0; i < 8; i++) {
621 params->fp32_sse2.kernel_zero_point[i] = (int16_t) kernel_zero_point;
622 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
623 }
624 for (uint32_t i = 0; i < 16; i++) {
625 params->fp32_sse2.output_min[i] = output_min;
626 }
627 return sizeof(params->fp32_sse2);
628}
629
630size_t xnn_init_qu8_conv_minmax_fp32_avx2_params(
631 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
632 uint8_t kernel_zero_point,
633 float scale,
634 uint8_t output_zero_point,
635 uint8_t output_min,
636 uint8_t output_max)
637{
638 assert(scale >= 0x1.0p-32f);
639 assert(scale < 256.0f);
640
641 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
642 for (uint32_t i = 0; i < 8; i++) {
643 params->fp32_avx2.scale[i] = scale;
644 params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
645 }
646 for (uint32_t i = 0; i < 16; i++) {
647 params->fp32_avx2.kernel_zero_point[i] = (int16_t) kernel_zero_point;
648 params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
649 }
650 for (uint32_t i = 0; i < 32; i++) {
651 params->fp32_avx2.output_min[i] = output_min;
652 }
653 return sizeof(params->fp32_avx2);
654}
655
656size_t xnn_init_qu8_conv_minmax_fp32_avx512_params(
657 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
658 uint8_t kernel_zero_point,
659 float scale,
660 uint8_t output_zero_point,
661 uint8_t output_min,
662 uint8_t output_max)
663{
664 assert(scale >= 0x1.0p-32f);
665 assert(scale < 256.0f);
666
667 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
668 for (uint32_t i = 0; i < 16; i++) {
669 params->fp32_avx512.scale[i] = scale;
670 params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
671 }
672 for (uint32_t i = 0; i < 32; i++) {
673 params->fp32_avx512.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point;
674 params->fp32_avx512.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
675 }
676 for (uint32_t i = 0; i < 64; i++) {
677 params->fp32_avx512.output_min[i] = output_min;
678 }
679 return sizeof(params->fp32_avx512);
680}
681#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
682
683#if XNN_ARCH_ARM
684size_t xnn_init_qu8_conv_minmax_fp32_armsimd32_params(
685 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
686 uint8_t kernel_zero_point,
687 float scale,
688 uint8_t output_zero_point,
689 uint8_t output_min,
690 uint8_t output_max)
691{
692 assert(scale >= 0x1.0p-32f);
693 assert(scale < 256.0f);
694
695 const int32_t minus_kernel_zero_point = -(int32_t) kernel_zero_point;
696 params->fp32_armsimd32.scale = scale;
697 params->fp32_armsimd32.magic_bias = 12582912.0f;
698 params->fp32_armsimd32.minus_kernel_zero_point = (uint32_t) (uint16_t) minus_kernel_zero_point * UINT32_C(0x00010001);
699 params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
700 params->fp32_armsimd32.output_min = (uint32_t) output_min * UINT32_C(0x01010101);
701 params->fp32_armsimd32.output_max = (uint32_t) output_max * UINT32_C(0x01010101);
702 return sizeof(params->fp32_armsimd32);
703}
704#endif // XNN_ARCH_ARM
705
706#if XNN_ARCH_ARM || XNN_ARCH_ARM64
707size_t xnn_init_qu8_conv_minmax_fp32_neon_params(
708 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
709 uint8_t kernel_zero_point,
710 float scale,
711 uint8_t output_zero_point,
712 uint8_t output_min,
713 uint8_t output_max)
714{
715 assert(scale >= 0x1.0p-32f);
716 assert(scale < 256.0f);
717
718 params->fp32_neon.kernel_zero_point[0] = kernel_zero_point;
719 params->fp32_neon.kernel_zero_point[1] = kernel_zero_point;
720 params->fp32_neon.kernel_zero_point[2] = kernel_zero_point;
721 params->fp32_neon.kernel_zero_point[3] = kernel_zero_point;
722 params->fp32_neon.scale = scale;
723 params->fp32_neon.magic_bias = 12582912.0f;
724 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
725 params->fp32_neon.output_min = output_min;
726 params->fp32_neon.output_max = output_max;
727 return sizeof(params->fp32_neon);
728}
729
730size_t xnn_init_qu8_conv_minmax_fp32_neonv8_params(
731 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
732 uint8_t kernel_zero_point,
733 float scale,
734 uint8_t output_zero_point,
735 uint8_t output_min,
736 uint8_t output_max)
737{
738 assert(scale >= 0x1.0p-32f);
739 assert(scale < 256.0f);
740
741 params->fp32_neonv8.kernel_zero_point[0] = kernel_zero_point;
742 params->fp32_neonv8.kernel_zero_point[1] = kernel_zero_point;
743 params->fp32_neonv8.kernel_zero_point[2] = kernel_zero_point;
744 params->fp32_neonv8.kernel_zero_point[3] = kernel_zero_point;
745 params->fp32_neonv8.scale = scale;
746 params->fp32_neonv8.output_zero_point = (int16_t) (uint16_t) output_zero_point;
747 params->fp32_neonv8.output_min = output_min;
748 params->fp32_neonv8.output_max = output_max;
749 return sizeof(params->fp32_neonv8);
750}
751
752size_t xnn_init_qu8_conv_minmax_rndnu_neon_params(
753 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
754 uint8_t kernel_zero_point,
755 float scale,
756 uint8_t output_zero_point,
757 uint8_t output_min,
758 uint8_t output_max)
759{
760 assert(scale >= 0x1.0p-32f);
761 assert(scale < 256.0f);
762
763 // Compute requantization parameters.
764 const uint32_t scale_bits = float_as_uint32(scale);
765
766 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
767 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
768 assert(multiplier >= INT32_C(0x40000000));
769 assert(multiplier <= INT32_C(0x7FFFFF80));
770
771 // Shift is in [-8, 31] range.
772 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
773 assert(shift >= -8);
774 assert(shift < 32);
775
776 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
777 const int32_t post_shift = math_max_s32(shift, 1);
778 const int32_t pre_shift = shift - post_shift;
779
780 params->rndnu_neon.kernel_zero_point[0] = kernel_zero_point;
781 params->rndnu_neon.kernel_zero_point[1] = kernel_zero_point;
782 params->rndnu_neon.kernel_zero_point[2] = kernel_zero_point;
783 params->rndnu_neon.kernel_zero_point[3] = kernel_zero_point;
784 params->rndnu_neon.right_pre_shift = -pre_shift;
785 params->rndnu_neon.multiplier = multiplier;
786 params->rndnu_neon.right_post_shift = -post_shift;
787 params->rndnu_neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
788 params->rndnu_neon.output_min = output_min;
789 params->rndnu_neon.output_max = output_max;
790 return sizeof(params->rndnu_neon);
791}
792#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
793
794#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
795size_t xnn_init_qu8_conv_minmax_fp32_wasmsimd_params(
796 union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
797 uint8_t kernel_zero_point,
798 float scale,
799 uint8_t output_zero_point,
800 uint8_t output_min,
801 uint8_t output_max)
802{
803 assert(scale >= 0x1.0p-32f);
804 assert(scale < 256.0f);
805
806 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
807 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
808 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
809 for (uint32_t i = 0; i < 4; i++) {
810 params->fp32_wasmsimd.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point;
811 }
812 for (uint32_t i = 0; i < 2; i++) {
813 params->fp32_wasmsimd.scale[i] = scale;
814 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
815 params->fp32_wasmsimd.magic_min[i] = magic_min;
816 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
817 }
818 for (uint32_t i = 0; i < 8; i++) {
819 params->fp32_wasmsimd.output_max[i] = output_max;
820 }
821 return sizeof(params->fp32_wasmsimd);
822}
823#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
824
825void xnn_init_qc8_scale_fp32_params(
826 size_t channels,
827 size_t channels_tile,
828 size_t stride,
829 const float scale[XNN_MIN_ELEMENTS(1)],
830 void* packed_w)
831{
832 for (size_t tile_start = 0; tile_start < channels; tile_start += channels_tile) {
833 const size_t tile_size = min(channels - tile_start, channels_tile);
834 for (size_t tile_offset = 0; tile_offset < tile_size; tile_offset++) {
835 unaligned_indexed_store_f32(packed_w, tile_offset, scale[tile_start + tile_offset]);
836 }
837 packed_w = (void*) ((uintptr_t) packed_w + stride);
838 }
839}
840
841size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_fmagic_params(
842 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
843 int32_t init_bias,
844 float scale,
845 int8_t output_zero_point,
846 int8_t output_min,
847 int8_t output_max)
848{
849 assert(scale >= 0x1.0p-32f);
850 assert(scale < 256.0f);
851
852 params->fp32_scalar_fmagic.init_bias = init_bias;
853 params->fp32_scalar_fmagic.scale = scale;
854 params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
855 params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
856 params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
857 params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
858 return sizeof(params->fp32_scalar_fmagic);
859}
860
861void xnn_update_qs8_avgpool_minmax_fp32_scalar_fmagic_params(
862 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
863 int32_t init_bias,
864 float scale)
865{
866 assert(scale >= 0x1.0p-32f);
867 assert(scale < 256.0f);
868
869 params->fp32_scalar_fmagic.init_bias = init_bias;
870 params->fp32_scalar_fmagic.scale = scale;
871}
872
873size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params(
874 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
875 int32_t init_bias,
876 float scale,
877 int8_t output_zero_point,
878 int8_t output_min,
879 int8_t output_max)
880{
881 assert(scale >= 0x1.0p-32f);
882 assert(scale < 256.0f);
883
884 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
885 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
886 params->fp32_scalar_imagic.init_bias = init_bias;
887 params->fp32_scalar_imagic.scale = scale;
888 params->fp32_scalar_imagic.magic_bias = 12582912.0f;
889 params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
890 params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
891 params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
892 return sizeof(params->fp32_scalar_imagic);
893}
894
895void xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params(
896 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
897 int32_t init_bias,
898 float scale)
899{
900 assert(scale >= 0x1.0p-32f);
901 assert(scale < 256.0f);
902
903 params->fp32_scalar_imagic.init_bias = init_bias;
904 params->fp32_scalar_imagic.scale = scale;
905}
906
907size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_lrintf_params(
908 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
909 int32_t init_bias,
910 float scale,
911 int8_t output_zero_point,
912 int8_t output_min,
913 int8_t output_max)
914{
915 assert(scale >= 0x1.0p-32f);
916 assert(scale < 256.0f);
917
918 params->fp32_scalar_lrintf.init_bias = init_bias;
919 params->fp32_scalar_lrintf.scale = scale;
920 params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
921 params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
922 params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
923 return sizeof(params->fp32_scalar_lrintf);
924}
925
926void xnn_update_qs8_avgpool_minmax_fp32_scalar_lrintf_params(
927 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
928 int32_t init_bias,
929 float scale)
930{
931 assert(scale >= 0x1.0p-32f);
932 assert(scale < 256.0f);
933
934 params->fp32_scalar_lrintf.init_bias = init_bias;
935 params->fp32_scalar_lrintf.scale = scale;
936}
937
938#if XNN_ARCH_X86 || XNN_ARCH_X86_64
939size_t xnn_init_qs8_avgpool_minmax_fp32_sse2_params(
940 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
941 int32_t init_bias,
942 float scale,
943 int8_t output_zero_point,
944 int8_t output_min,
945 int8_t output_max)
946{
947 assert(scale >= 0x1.0p-32f);
948 assert(scale < 256.0f);
949
950 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
951 for (uint32_t i = 0; i < 4; i++) {
952 params->fp32_sse2.init_bias[i] = init_bias;
953 params->fp32_sse2.scale[i] = scale;
954 params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
955 }
956 for (uint32_t i = 0; i < 8; i++) {
957 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
958 params->fp32_sse2.output_min[i] = (int16_t) output_min;
959 }
960 return sizeof(params->fp32_sse2);
961}
962
963void xnn_update_qs8_avgpool_minmax_fp32_sse2_params(
964 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
965 int32_t init_bias,
966 float scale)
967{
968 assert(scale >= 0x1.0p-32f);
969 assert(scale < 256.0f);
970
971 for (uint32_t i = 0; i < 4; i++) {
972 params->fp32_sse2.init_bias[i] = init_bias;
973 params->fp32_sse2.scale[i] = scale;
974 }
975}
976
977size_t xnn_init_qs8_avgpool_minmax_fp32_sse4_params(
978 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
979 int32_t init_bias,
980 float scale,
981 int8_t output_zero_point,
982 int8_t output_min,
983 int8_t output_max)
984{
985 assert(scale >= 0x1.0p-32f);
986 assert(scale < 256.0f);
987
988 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
989 for (uint32_t i = 0; i < 4; i++) {
990 params->fp32_sse4.init_bias[i] = init_bias;
991 params->fp32_sse4.scale[i] = scale;
992 params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
993 }
994 for (uint32_t i = 0; i < 8; i++) {
995 params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
996 }
997 for (uint32_t i = 0; i < 16; i++) {
998 params->fp32_sse4.output_min[i] = output_min;
999 }
1000 return sizeof(params->fp32_sse4);
1001}
1002
1003void xnn_update_qs8_avgpool_minmax_fp32_sse4_params(
1004 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1005 int32_t init_bias,
1006 float scale)
1007{
1008 assert(scale >= 0x1.0p-32f);
1009 assert(scale < 256.0f);
1010
1011 for (uint32_t i = 0; i < 4; i++) {
1012 params->fp32_sse4.init_bias[i] = init_bias;
1013 params->fp32_sse4.scale[i] = scale;
1014 }
1015}
1016#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1017
1018#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1019size_t xnn_init_qs8_avgpool_minmax_fp32_neon_params(
1020 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1021 int32_t init_bias,
1022 float scale,
1023 int8_t output_zero_point,
1024 int8_t output_min,
1025 int8_t output_max)
1026{
1027 assert(scale >= 0x1.0p-32f);
1028 assert(scale < 256.0f);
1029
1030 params->fp32_neon.init_bias = init_bias;
1031 params->fp32_neon.scale = scale;
1032 params->fp32_neon.magic_bias = 12582912.0f;
1033 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1034 params->fp32_neon.output_min = output_min;
1035 params->fp32_neon.output_max = output_max;
1036 return sizeof(params->fp32_neon);
1037}
1038
1039void xnn_update_qs8_avgpool_minmax_fp32_neon_params(
1040 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1041 int32_t init_bias,
1042 float scale)
1043{
1044 assert(scale >= 0x1.0p-32f);
1045 assert(scale < 256.0f);
1046
1047 params->fp32_neon.init_bias = init_bias;
1048 params->fp32_neon.scale = scale;
1049}
1050
1051size_t xnn_init_qs8_avgpool_minmax_fp32_neonv8_params(
1052 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1053 int32_t init_bias,
1054 float scale,
1055 int8_t output_zero_point,
1056 int8_t output_min,
1057 int8_t output_max)
1058{
1059 assert(scale >= 0x1.0p-32f);
1060 assert(scale < 256.0f);
1061
1062 params->fp32_neonv8.init_bias = init_bias;
1063 params->fp32_neonv8.scale = scale;
1064 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
1065 params->fp32_neonv8.output_min = output_min;
1066 params->fp32_neonv8.output_max = output_max;
1067 return sizeof(params->fp32_neonv8);
1068}
1069
1070void xnn_update_qs8_avgpool_minmax_fp32_neonv8_params(
1071 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1072 int32_t init_bias,
1073 float scale)
1074{
1075 assert(scale >= 0x1.0p-32f);
1076 assert(scale < 256.0f);
1077
1078 params->fp32_neonv8.init_bias = init_bias;
1079 params->fp32_neonv8.scale = scale;
1080}
1081
1082size_t xnn_init_qs8_avgpool_minmax_rndnu_neon_params(
1083 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1084 int32_t init_bias,
1085 float scale,
1086 int8_t output_zero_point,
1087 int8_t output_min,
1088 int8_t output_max)
1089{
1090 assert(scale >= 0x1.0p-32f);
1091 assert(scale < 256.0f);
1092
1093 // Compute requantization parameters.
1094 const uint32_t scale_bits = float_as_uint32(scale);
1095
1096 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1097 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1098 assert(multiplier >= INT32_C(0x40000000));
1099 assert(multiplier <= INT32_C(0x7FFFFF80));
1100
1101 // Shift is in [-8, 31] range.
1102 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1103 assert(shift >= -8);
1104 assert(shift < 32);
1105
1106 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1107 const int32_t post_shift = math_max_s32(shift, 1);
1108 const int32_t pre_shift = shift - post_shift;
1109
1110 params->rndnu_neon.init_bias = init_bias;
1111 params->rndnu_neon.left_pre_shift = -pre_shift;
1112 params->rndnu_neon.multiplier = multiplier;
1113 params->rndnu_neon.left_post_shift = -post_shift;
1114 params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
1115 params->rndnu_neon.output_min = output_min;
1116 params->rndnu_neon.output_max = output_max;
1117 return sizeof(params->rndnu_neon);
1118}
1119
1120void xnn_update_qs8_avgpool_minmax_rndnu_neon_params(
1121 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1122 int32_t init_bias,
1123 float scale)
1124{
1125 assert(scale >= 0x1.0p-32f);
1126 assert(scale < 256.0f);
1127
1128 // Compute requantization parameters.
1129 const uint32_t scale_bits = float_as_uint32(scale);
1130
1131 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1132 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1133 assert(multiplier >= INT32_C(0x40000000));
1134 assert(multiplier <= INT32_C(0x7FFFFF80));
1135
1136 // Shift is in [-8, 31] range.
1137 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1138 assert(shift >= -8);
1139 assert(shift < 32);
1140
1141 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1142 const int32_t post_shift = math_max_s32(shift, 1);
1143 const int32_t pre_shift = shift - post_shift;
1144
1145 params->rndnu_neon.init_bias = init_bias;
1146 params->rndnu_neon.left_pre_shift = -pre_shift;
1147 params->rndnu_neon.multiplier = multiplier;
1148 params->rndnu_neon.left_post_shift = -post_shift;
1149}
1150#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1151
1152#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1153size_t xnn_init_qs8_avgpool_minmax_fp32_wasmsimd_params(
1154 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1155 int32_t init_bias,
1156 float scale,
1157 int8_t output_zero_point,
1158 int8_t output_min,
1159 int8_t output_max)
1160{
1161 assert(scale >= 0x1.0p-32f);
1162 assert(scale < 256.0f);
1163
1164 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1165 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1166 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1167 for (uint32_t i = 0; i < 2; i++) {
1168 params->fp32_wasmsimd.init_bias[i] = init_bias;
1169 params->fp32_wasmsimd.scale[i] = scale;
1170 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
1171 params->fp32_wasmsimd.magic_min[i] = magic_min;
1172 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
1173 }
1174 for (uint32_t i = 0; i < 8; i++) {
1175 params->fp32_wasmsimd.output_max[i] = output_max;
1176 }
1177 return sizeof(params->fp32_wasmsimd);
1178}
1179
1180void xnn_update_qs8_avgpool_minmax_fp32_wasmsimd_params(
1181 union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1182 int32_t init_bias,
1183 float scale)
1184{
1185 assert(scale >= 0x1.0p-32f);
1186 assert(scale < 256.0f);
1187
1188 for (uint32_t i = 0; i < 2; i++) {
1189 params->fp32_wasmsimd.init_bias[i] = init_bias;
1190 params->fp32_wasmsimd.scale[i] = scale;
1191 }
1192}
1193#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1194
1195size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_fmagic_params(
1196 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1197 int32_t init_bias,
1198 float scale,
1199 uint8_t output_zero_point,
1200 uint8_t output_min,
1201 uint8_t output_max)
1202{
1203 assert(scale >= 0x1.0p-32f);
1204 assert(scale < 256.0f);
1205
1206 params->fp32_scalar_fmagic.init_bias = init_bias;
1207 params->fp32_scalar_fmagic.scale = scale;
1208 params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1209 params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1210 params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
1211 params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1212 return sizeof(params->fp32_scalar_fmagic);
1213}
1214
1215void xnn_update_qu8_avgpool_minmax_fp32_scalar_fmagic_params(
1216 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1217 int32_t init_bias,
1218 float scale)
1219{
1220 assert(scale >= 0x1.0p-32f);
1221 assert(scale < 256.0f);
1222
1223 params->fp32_scalar_fmagic.init_bias = init_bias;
1224 params->fp32_scalar_fmagic.scale = scale;
1225}
1226
1227size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params(
1228 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1229 int32_t init_bias,
1230 float scale,
1231 uint8_t output_zero_point,
1232 uint8_t output_min,
1233 uint8_t output_max)
1234{
1235 assert(scale >= 0x1.0p-32f);
1236 assert(scale < 256.0f);
1237
1238 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1239 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1240 params->fp32_scalar_imagic.init_bias = init_bias;
1241 params->fp32_scalar_imagic.scale = scale;
1242 params->fp32_scalar_imagic.magic_bias = 12582912.0f;
1243 params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1244 params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
1245 params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1246 return sizeof(params->fp32_scalar_imagic);
1247}
1248
1249void xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params(
1250 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1251 int32_t init_bias,
1252 float scale)
1253{
1254 assert(scale >= 0x1.0p-32f);
1255 assert(scale < 256.0f);
1256
1257 params->fp32_scalar_imagic.init_bias = init_bias;
1258 params->fp32_scalar_imagic.scale = scale;
1259}
1260
1261size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_lrintf_params(
1262 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1263 int32_t init_bias,
1264 float scale,
1265 uint8_t output_zero_point,
1266 uint8_t output_min,
1267 uint8_t output_max)
1268{
1269 assert(scale >= 0x1.0p-32f);
1270 assert(scale < 256.0f);
1271
1272 params->fp32_scalar_lrintf.init_bias = init_bias;
1273 params->fp32_scalar_lrintf.scale = scale;
1274 params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1275 params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1276 params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
1277 return sizeof(params->fp32_scalar_lrintf);
1278}
1279
1280void xnn_update_qu8_avgpool_minmax_fp32_scalar_lrintf_params(
1281 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1282 int32_t init_bias,
1283 float scale)
1284{
1285 assert(scale >= 0x1.0p-32f);
1286 assert(scale < 256.0f);
1287
1288 params->fp32_scalar_lrintf.init_bias = init_bias;
1289 params->fp32_scalar_lrintf.scale = scale;
1290}
1291
1292#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1293size_t xnn_init_qu8_avgpool_minmax_fp32_sse2_params(
1294 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1295 int32_t init_bias,
1296 float scale,
1297 uint8_t output_zero_point,
1298 uint8_t output_min,
1299 uint8_t output_max)
1300{
1301 assert(scale >= 0x1.0p-32f);
1302 assert(scale < 256.0f);
1303
1304 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1305 for (uint32_t i = 0; i < 4; i++) {
1306 params->fp32_sse2.init_bias[i] = init_bias;
1307 params->fp32_sse2.scale[i] = scale;
1308 params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
1309 }
1310 for (uint32_t i = 0; i < 8; i++) {
1311 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
1312 }
1313 for (uint32_t i = 0; i < 16; i++) {
1314 params->fp32_sse2.output_min[i] = output_min;
1315 }
1316 return sizeof(params->fp32_sse2);
1317}
1318
1319void xnn_update_qu8_avgpool_minmax_fp32_sse2_params(
1320 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1321 int32_t init_bias,
1322 float scale)
1323{
1324 assert(scale >= 0x1.0p-32f);
1325 assert(scale < 256.0f);
1326
1327 for (uint32_t i = 0; i < 4; i++) {
1328 params->fp32_sse2.init_bias[i] = init_bias;
1329 params->fp32_sse2.scale[i] = scale;
1330 }
1331}
1332
1333size_t xnn_init_qu8_avgpool_minmax_fp32_sse4_params(
1334 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1335 int32_t init_bias,
1336 float scale,
1337 uint8_t output_zero_point,
1338 uint8_t output_min,
1339 uint8_t output_max)
1340{
1341 assert(scale >= 0x1.0p-32f);
1342 assert(scale < 256.0f);
1343
1344 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1345 for (uint32_t i = 0; i < 4; i++) {
1346 params->fp32_sse4.init_bias[i] = init_bias;
1347 params->fp32_sse4.scale[i] = scale;
1348 params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
1349 }
1350 for (uint32_t i = 0; i < 8; i++) {
1351 params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
1352 }
1353 for (uint32_t i = 0; i < 16; i++) {
1354 params->fp32_sse4.output_min[i] = output_min;
1355 }
1356 return sizeof(params->fp32_sse4);
1357}
1358
1359void xnn_update_qu8_avgpool_minmax_fp32_sse4_params(
1360 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1361 int32_t init_bias,
1362 float scale)
1363{
1364 assert(scale >= 0x1.0p-32f);
1365 assert(scale < 256.0f);
1366
1367 for (uint32_t i = 0; i < 4; i++) {
1368 params->fp32_sse4.init_bias[i] = init_bias;
1369 params->fp32_sse4.scale[i] = scale;
1370 }
1371}
1372#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1373
1374#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1375size_t xnn_init_qu8_avgpool_minmax_fp32_neon_params(
1376 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1377 int32_t init_bias,
1378 float scale,
1379 uint8_t output_zero_point,
1380 uint8_t output_min,
1381 uint8_t output_max)
1382{
1383 assert(scale >= 0x1.0p-32f);
1384 assert(scale < 256.0f);
1385
1386 params->fp32_neon.init_bias = init_bias;
1387 params->fp32_neon.scale = scale;
1388 params->fp32_neon.magic_bias = 12582912.0f;
1389 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1390 params->fp32_neon.output_min = output_min;
1391 params->fp32_neon.output_max = output_max;
1392 return sizeof(params->fp32_neon);
1393}
1394
1395void xnn_update_qu8_avgpool_minmax_fp32_neon_params(
1396 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1397 int32_t init_bias,
1398 float scale)
1399{
1400 assert(scale >= 0x1.0p-32f);
1401 assert(scale < 256.0f);
1402
1403 params->fp32_neon.init_bias = init_bias;
1404 params->fp32_neon.scale = scale;
1405}
1406
1407size_t xnn_init_qu8_avgpool_minmax_fp32_neonv8_params(
1408 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1409 int32_t init_bias,
1410 float scale,
1411 uint8_t output_zero_point,
1412 uint8_t output_min,
1413 uint8_t output_max)
1414{
1415 assert(scale >= 0x1.0p-32f);
1416 assert(scale < 256.0f);
1417
1418 params->fp32_neonv8.init_bias = init_bias;
1419 params->fp32_neonv8.scale = scale;
1420 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
1421 params->fp32_neonv8.output_min = output_min;
1422 params->fp32_neonv8.output_max = output_max;
1423 return sizeof(params->fp32_neonv8);
1424}
1425
1426void xnn_update_qu8_avgpool_minmax_fp32_neonv8_params(
1427 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1428 int32_t init_bias,
1429 float scale)
1430{
1431 assert(scale >= 0x1.0p-32f);
1432 assert(scale < 256.0f);
1433
1434 params->fp32_neonv8.init_bias = init_bias;
1435 params->fp32_neonv8.scale = scale;
1436}
1437
1438size_t xnn_init_qu8_avgpool_minmax_rndnu_neon_params(
1439 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1440 int32_t init_bias,
1441 float scale,
1442 uint8_t output_zero_point,
1443 uint8_t output_min,
1444 uint8_t output_max)
1445{
1446 assert(scale >= 0x1.0p-32f);
1447 assert(scale < 256.0f);
1448
1449 // Compute requantization parameters.
1450 const uint32_t scale_bits = float_as_uint32(scale);
1451
1452 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1453 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1454 assert(multiplier >= INT32_C(0x40000000));
1455 assert(multiplier <= INT32_C(0x7FFFFF80));
1456
1457 // Shift is in [-8, 31] range.
1458 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1459 assert(shift >= -8);
1460 assert(shift < 32);
1461
1462 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1463 const int32_t post_shift = math_max_s32(shift, 1);
1464 const int32_t pre_shift = shift - post_shift;
1465
1466 params->rndnu_neon.init_bias = init_bias;
1467 params->rndnu_neon.left_pre_shift = -pre_shift;
1468 params->rndnu_neon.multiplier = multiplier;
1469 params->rndnu_neon.left_post_shift = -post_shift;
1470 params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
1471 params->rndnu_neon.output_min = output_min;
1472 params->rndnu_neon.output_max = output_max;
1473 return sizeof(params->rndnu_neon);
1474}
1475
1476void xnn_update_qu8_avgpool_minmax_rndnu_neon_params(
1477 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1478 int32_t init_bias,
1479 float scale)
1480{
1481 assert(scale >= 0x1.0p-32f);
1482 assert(scale < 256.0f);
1483
1484 // Compute requantization parameters.
1485 const uint32_t scale_bits = float_as_uint32(scale);
1486
1487 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1488 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1489 assert(multiplier >= INT32_C(0x40000000));
1490 assert(multiplier <= INT32_C(0x7FFFFF80));
1491
1492 // Shift is in [-8, 31] range.
1493 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1494 assert(shift >= -8);
1495 assert(shift < 32);
1496
1497 // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1498 const int32_t post_shift = math_max_s32(shift, 1);
1499 const int32_t pre_shift = shift - post_shift;
1500
1501 params->rndnu_neon.init_bias = init_bias;
1502 params->rndnu_neon.left_pre_shift = -pre_shift;
1503 params->rndnu_neon.multiplier = multiplier;
1504 params->rndnu_neon.left_post_shift = -post_shift;
1505}
1506#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1507
1508#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1509size_t xnn_init_qu8_avgpool_minmax_fp32_wasmsimd_params(
1510 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1511 int32_t init_bias,
1512 float scale,
1513 uint8_t output_zero_point,
1514 uint8_t output_min,
1515 uint8_t output_max)
1516{
1517 assert(scale >= 0x1.0p-32f);
1518 assert(scale < 256.0f);
1519
1520 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1521 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1522 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1523 for (uint32_t i = 0; i < 2; i++) {
1524 params->fp32_wasmsimd.init_bias[i] = init_bias;
1525 params->fp32_wasmsimd.scale[i] = scale;
1526 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
1527 params->fp32_wasmsimd.magic_min[i] = magic_min;
1528 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
1529 }
1530 for (uint32_t i = 0; i < 8; i++) {
1531 params->fp32_wasmsimd.output_max[i] = output_max;
1532 }
1533 return sizeof(params->fp32_wasmsimd);
1534}
1535
1536void xnn_update_qu8_avgpool_minmax_fp32_wasmsimd_params(
1537 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1538 int32_t init_bias,
1539 float scale)
1540{
1541 assert(scale >= 0x1.0p-32f);
1542 assert(scale < 256.0f);
1543
1544 for (uint32_t i = 0; i < 2; i++) {
1545 params->fp32_wasmsimd.init_bias[i] = init_bias;
1546 params->fp32_wasmsimd.scale[i] = scale;
1547 }
1548}
1549#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1550
1551size_t xnn_init_qu8_avgpool_minmax_scalar_params(
1552 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1553 int32_t bias,
1554 float scale,
1555 uint8_t output_zero_point,
1556 uint8_t output_min,
1557 uint8_t output_max)
1558{
1559 // Compute requantization parameters.
1560 assert(scale >= 0x1.0p-32f);
1561 assert(scale < 256.0f);
1562 const uint32_t scale_bits = float_as_uint32(scale);
1563
1564 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1565 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1566 assert(multiplier >= INT32_C(0x00800000));
1567 assert(multiplier <= INT32_C(0x00FFFFFF));
1568
1569 // Shift is in [16, 55] range.
1570 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1571 assert(shift >= 16);
1572 assert(shift < 64);
1573
1574 const uint32_t right_shift = (uint32_t) shift;
1575 const int64_t rounding = INT64_C(1) << (right_shift - 1);
1576 params->scalar.bias = bias;
1577 params->scalar.rounding = rounding;
1578 params->scalar.multiplier = multiplier;
1579 params->scalar.right_shift = right_shift;
1580 params->scalar.output_min_less_zero_point =
1581 (int32_t) (uint32_t) output_min - (int32_t) (uint32_t) output_zero_point;
1582 params->scalar.output_max_less_zero_point =
1583 (int32_t) (uint32_t) output_max - (int32_t) (uint32_t) output_zero_point;
1584 params->scalar.output_zero_point = (int32_t) (uint32_t) output_zero_point;
1585 return sizeof(params->scalar);
1586}
1587
1588#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1589size_t xnn_init_qu8_avgpool_minmax_neon_params(
1590 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1591 int32_t bias,
1592 float scale,
1593 uint8_t output_zero_point,
1594 uint8_t output_min,
1595 uint8_t output_max)
1596{
1597 // Compute requantization parameters.
1598 assert(scale >= 0x1.0p-32f);
1599 assert(scale < 256.0f);
1600 const uint32_t scale_bits = float_as_uint32(scale);
1601
1602 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1603 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1604 assert(multiplier >= INT32_C(0x00800000));
1605 assert(multiplier <= INT32_C(0x00FFFFFF));
1606
1607 // Shift is in [16, 55] range.
1608 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1609 assert(shift >= 16);
1610 assert(shift < 64);
1611
1612 params->neon.bias = bias;
1613 params->neon.multiplier = multiplier;
1614 params->neon.left_shift = (int64_t) -shift;
1615 params->neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
1616 params->neon.output_min = output_min;
1617 params->neon.output_max = output_max;
1618 return sizeof(params->neon);
1619}
1620#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1621
1622#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1623size_t xnn_init_qu8_avgpool_minmax_sse2_params(
1624 union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1625 int32_t bias,
1626 float scale,
1627 uint8_t output_zero_point,
1628 uint8_t output_min,
1629 uint8_t output_max)
1630{
1631 // Compute requantization parameters.
1632 assert(scale >= 0x1.0p-32f);
1633 assert(scale < 256.0f);
1634 const uint32_t scale_bits = float_as_uint32(scale);
1635
1636 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1637 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1638 assert(multiplier >= INT32_C(0x00800000));
1639 assert(multiplier <= INT32_C(0x00FFFFFF));
1640
1641 // Shift is in [16, 55] range.
1642 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1643 assert(shift >= 16);
1644 assert(shift < 64);
1645
1646 const uint32_t right_shift = (uint32_t) shift;
1647 const uint64_t rounding = UINT64_C(1) << (right_shift - 1);
1648 params->sse2.bias[0] = bias;
1649 params->sse2.bias[1] = bias;
1650 params->sse2.bias[2] = bias;
1651 params->sse2.bias[3] = bias;
1652 params->sse2.multiplier[0] = (uint32_t) multiplier;
1653 params->sse2.multiplier[1] = (uint32_t) multiplier;
1654 params->sse2.multiplier[2] = (uint32_t) multiplier;
1655 params->sse2.multiplier[3] = (uint32_t) multiplier;
1656 params->sse2.rounding[0] = rounding;
1657 params->sse2.rounding[1] = rounding;
1658 params->sse2.right_shift[0] = (uint64_t) right_shift;
1659 params->sse2.right_shift[1] = (uint64_t) right_shift;
1660 for (uint32_t i = 0; i < 8; i++) {
1661 params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
1662 }
1663 for (uint32_t i = 0; i < 16; i++) {
1664 params->sse2.output_min[i] = output_min;
1665 params->sse2.output_max[i] = output_max;
1666 }
1667 return sizeof(params->sse2);
1668}
1669#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1670
1671void xnn_update_qu8_avgpool_minmax_scalar_params(
1672 union xnn_qu8_avgpool_minmax_params* params,
1673 int32_t bias,
1674 float scale)
1675{
1676 // Compute requantization parameters.
1677 assert(scale >= 0x1.0p-32f);
1678 assert(scale < 256.0f);
1679 const uint32_t scale_bits = float_as_uint32(scale);
1680
1681 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1682 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1683 assert(multiplier >= INT32_C(0x00800000));
1684 assert(multiplier <= INT32_C(0x00FFFFFF));
1685
1686 // Shift is in [16, 55] range.
1687 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1688 assert(shift >= 16);
1689 assert(shift < 64);
1690
1691 const int64_t rounding = INT64_C(1) << ((uint32_t) shift - 1);
1692 params->scalar.bias = bias;
1693 params->scalar.multiplier = multiplier;
1694 params->scalar.rounding = rounding;
1695 params->scalar.right_shift = (uint32_t) shift;
1696}
1697
1698#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1699void xnn_update_qu8_avgpool_minmax_neon_params(
1700 union xnn_qu8_avgpool_minmax_params* params,
1701 int32_t bias,
1702 float scale)
1703{
1704 // Compute requantization parameters.
1705 assert(scale >= 0x1.0p-32f);
1706 assert(scale < 256.0f);
1707 const uint32_t scale_bits = float_as_uint32(scale);
1708
1709 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1710 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1711 assert(multiplier >= INT32_C(0x00800000));
1712 assert(multiplier <= INT32_C(0x00FFFFFF));
1713
1714 // Shift is in [16, 55] range.
1715 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1716 assert(shift >= 16);
1717 assert(shift < 64);
1718
1719 params->neon.bias = bias;
1720 params->neon.multiplier = multiplier;
1721 params->neon.left_shift = (int64_t) -shift;
1722}
1723#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1724
1725#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1726void xnn_update_qu8_avgpool_minmax_sse2_params(
1727 union xnn_qu8_avgpool_minmax_params* params,
1728 int32_t bias,
1729 float scale)
1730{
1731 // Compute requantization parameters.
1732 assert(scale >= 0x1.0p-32f);
1733 assert(scale < 256.0f);
1734 const uint32_t scale_bits = float_as_uint32(scale);
1735
1736 // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1737 const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1738 assert(multiplier >= INT32_C(0x00800000));
1739 assert(multiplier <= INT32_C(0x00FFFFFF));
1740
1741 // Shift is in [16, 55] range.
1742 const int32_t shift = 127 + 23 - (scale_bits >> 23);
1743 assert(shift >= 16);
1744 assert(shift < 64);
1745
1746 const uint64_t rounding = UINT64_C(1) << ((uint32_t) shift - 1);
1747 params->sse2.bias[0] = bias;
1748 params->sse2.bias[1] = bias;
1749 params->sse2.bias[2] = bias;
1750 params->sse2.bias[3] = bias;
1751 params->sse2.multiplier[0] = (uint32_t) multiplier;
1752 params->sse2.multiplier[1] = (uint32_t) multiplier;
1753 params->sse2.multiplier[2] = (uint32_t) multiplier;
1754 params->sse2.multiplier[3] = (uint32_t) multiplier;
1755 params->sse2.rounding[0] = rounding;
1756 params->sse2.rounding[1] = rounding;
1757 params->sse2.right_shift[0] = (uint64_t) (uint32_t) shift;
1758 params->sse2.right_shift[1] = (uint64_t) (uint32_t) shift;
1759}
1760#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1761
1762void xnn_update_f32_scaleminmax_scalar_params(
1763 union xnn_f32_scaleminmax_params* params,
1764 float scale)
1765{
1766 params->scalar.scale = scale;
1767}
1768
1769#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1770void xnn_update_f32_scaleminmax_sse_params(
1771 union xnn_f32_scaleminmax_params* params,
1772 float scale)
1773{
1774 for (uint32_t i = 0; i < 4; i++) {
1775 params->sse.scale[i] = scale;
1776 }
1777}
1778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1781size_t xnn_init_f16_scaleminmax_fp16arith_params(
1782 union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1783 uint16_t scale,
1784 uint16_t min,
1785 uint16_t max)
1786{
1787 params->fp16arith.scale = scale;
1788 params->fp16arith.min = min;
1789 params->fp16arith.max = max;
1790 return sizeof(params->fp16arith);
1791}
1792#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1793
1794#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1795size_t xnn_init_f16_scaleminmax_avx_params(
1796 union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1797 uint16_t scale,
1798 uint16_t min,
1799 uint16_t max)
1800{
1801 const float scale_f32 = fp16_ieee_to_fp32_value(scale);
1802 const float min_f32 = fp16_ieee_to_fp32_value(min);
1803 const float max_f32 = fp16_ieee_to_fp32_value(max);
1804 for (uint32_t i = 0; i < 8; i++) {
1805 params->avx.scale[i] = scale_f32;
1806 params->avx.min[i] = min_f32;
1807 params->avx.max[i] = max_f32;
1808 }
1809 return sizeof(params->avx);
1810}
1811#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1812
1813#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1814void xnn_update_f16_scaleminmax_fp16arith_params(
1815 union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1816 uint16_t scale)
1817{
1818 params->fp16arith.scale = scale;
1819}
1820#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1821
1822#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1823void xnn_update_f16_scaleminmax_avx_params(
1824 union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1825 uint16_t scale)
1826{
1827 const float scale_f32 = fp16_ieee_to_fp32_value(scale);
1828 for (uint32_t i = 0; i < 8; i++) {
1829 params->avx.scale[i] = scale_f32;
1830 }
1831}
1832#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1833
1834size_t xnn_init_f32_scaleminmax_scalar_params(
1835 union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1836 float scale,
1837 float min,
1838 float max)
1839{
1840 params->scalar.scale = scale;
1841 params->scalar.min = min;
1842 params->scalar.max = max;
1843 return sizeof(params->scalar);
1844}
1845
1846#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1847size_t xnn_init_f32_scaleminmax_sse_params(
1848 union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1849 float scale,
1850 float min,
1851 float max)
1852{
1853 for (uint32_t i = 0; i < 4; i++) {
1854 params->sse.scale[i] = scale;
1855 params->sse.min[i] = min;
1856 params->sse.max[i] = max;
1857 }
1858 return sizeof(params->sse);
1859}
1860#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1861
1862size_t xnn_init_f32_gavgpool_params(
1863 union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1864 float multiplier,
1865 float output_min,
1866 float output_max,
1867 uint32_t width)
1868{
1869 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
1870 for (uint32_t i = 0; i < 4; i++) {
1871 params->sse.multiplier[i] = multiplier;
1872 params->sse.output_min[i] = output_min;
1873 params->sse.output_max[i] = output_max;
1874 }
1875
1876 const uint32_t w = (width - 1) & 3;
1877 params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
1878 params->sse.mask[1] = -(uint32_t) (w >= 1);
1879 params->sse.mask[2] = -(uint32_t) (w >= 2);
1880 params->sse.mask[3] = -(uint32_t) (w >= 3);
1881 return sizeof(params->sse);
1882 #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
1883 params->neon.multiplier = multiplier;
1884 params->neon.output_min = output_min;
1885 params->neon.output_max = output_max;
1886
1887 const uint32_t w = (width - 1) & 3;
1888 params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
1889 params->neon.mask[1] = -(uint32_t) (w >= 1);
1890 params->neon.mask[2] = -(uint32_t) (w >= 2);
1891 params->neon.mask[3] = -(uint32_t) (w >= 3);
1892 return sizeof(params->neon);
1893 #else
1894 params->scalar.multiplier = multiplier;
1895 params->scalar.output_min = output_min;
1896 params->scalar.output_max = output_max;
1897
1898 const uint32_t w = (width - 1) & 3;
1899 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
1900 params->scalar.mask[1] = -(int32_t) (w >= 1);
1901 params->scalar.mask[2] = -(int32_t) (w >= 2);
1902 params->scalar.mask[3] = -(int32_t) (w >= 3);
1903 return sizeof(params->scalar);
1904 #endif
1905}
1906
1907size_t xnn_init_f16_gavgpool_neonfp16arith_x4_params(
1908 union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1909 uint16_t multiplier,
1910 uint16_t output_min,
1911 uint16_t output_max,
1912 uint32_t width)
1913{
1914 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1915 params->neonfp16arith.multiplier = multiplier;
1916 params->neonfp16arith.output_min = output_min;
1917 params->neonfp16arith.output_max = output_max;
1918
1919 const uint32_t w = (width - 1) & 3;
1920 params->neonfp16arith.mask[0] = UINT16_C(0xFFFF);
1921 params->neonfp16arith.mask[1] = -(uint16_t) (w >= 1);
1922 params->neonfp16arith.mask[2] = -(uint16_t) (w >= 2);
1923 params->neonfp16arith.mask[3] = -(uint16_t) (w >= 3);
1924 return sizeof(params->neonfp16arith);
1925 #else
1926 return 0;
1927 #endif
1928}
1929
1930size_t xnn_init_f16_gavgpool_neonfp16arith_x8_params(
1931 union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1932 uint16_t multiplier,
1933 uint16_t output_min,
1934 uint16_t output_max,
1935 uint32_t width)
1936{
1937 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1938 params->neonfp16arith.multiplier = multiplier;
1939 params->neonfp16arith.output_min = output_min;
1940 params->neonfp16arith.output_max = output_max;
1941
1942 const uint32_t w = (width - 1) & 7;
1943 params->neonfp16arith.mask[0] = UINT16_C(0xFFFF);
1944 params->neonfp16arith.mask[1] = -(uint16_t) (w >= 1);
1945 params->neonfp16arith.mask[2] = -(uint16_t) (w >= 2);
1946 params->neonfp16arith.mask[3] = -(uint16_t) (w >= 3);
1947 params->neonfp16arith.mask[4] = -(uint16_t) (w >= 4);
1948 params->neonfp16arith.mask[5] = -(uint16_t) (w >= 5);
1949 params->neonfp16arith.mask[6] = -(uint16_t) (w >= 6);
1950 params->neonfp16arith.mask[7] = -(uint16_t) (w >= 7);
1951 return sizeof(params->neonfp16arith);
1952 #else
1953 return 0;
1954 #endif
1955}
1956
1957void xnn_update_f32_gavgpool_params(
1958 union xnn_f32_gavgpool_params* params,
1959 float multiplier,
1960 uint32_t width)
1961{
1962 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
1963 for (uint32_t i = 0; i < 4; i++) {
1964 params->sse.multiplier[i] = multiplier;
1965 }
1966
1967 const uint32_t w = (width - 1) & 3;
1968 params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
1969 params->sse.mask[1] = -(uint32_t) (w >= 1);
1970 params->sse.mask[2] = -(uint32_t) (w >= 2);
1971 params->sse.mask[3] = -(uint32_t) (w >= 3);
1972 #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
1973 params->neon.multiplier = multiplier;
1974
1975 const uint32_t w = (width - 1) & 3;
1976 params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
1977 params->neon.mask[1] = -(uint32_t) (w >= 1);
1978 params->neon.mask[2] = -(uint32_t) (w >= 2);
1979 params->neon.mask[3] = -(uint32_t) (w >= 3);
1980 #else
1981 params->scalar.multiplier = multiplier;
1982
1983 const uint32_t w = (width - 1) & 3;
1984 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
1985 params->scalar.mask[1] = -(int32_t) (w >= 1);
1986 params->scalar.mask[2] = -(int32_t) (w >= 2);
1987 params->scalar.mask[3] = -(int32_t) (w >= 3);
1988 #endif
1989}
1990
1991size_t xnn_init_scalar_f32_gavgpool_params(
1992 union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1993 float multiplier,
1994 float output_min,
1995 float output_max,
1996 uint32_t width)
1997{
1998 params->scalar.multiplier = multiplier;
1999 params->scalar.output_min = output_min;
2000 params->scalar.output_max = output_max;
2001
2002 const uint32_t w = (width - 1) & 3;
2003 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
2004 params->scalar.mask[1] = -(int32_t) (w >= 1);
2005 params->scalar.mask[2] = -(int32_t) (w >= 2);
2006 params->scalar.mask[3] = -(int32_t) (w >= 3);
2007 return sizeof(params->scalar);
2008}
2009
2010size_t xnn_init_bf16_minmax_scalar_params(
2011 union xnn_bf16_minmax_params params[XNN_MIN_ELEMENTS(1)],
2012 uint16_t output_min,
2013 uint16_t output_max)
2014{
2015 params->scalar.min = uint32_as_float((uint32_t) output_min << 16);
2016 params->scalar.max = uint32_as_float((uint32_t) output_max << 16);
2017 return sizeof(params->scalar);
2018}
2019
2020#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2021size_t xnn_init_f16_minmax_fp16arith_params(
2022 union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS(1)],
2023 uint16_t min,
2024 uint16_t max)
2025{
2026 params->fp16arith.min = min;
2027 params->fp16arith.max = max;
2028 return sizeof(params->fp16arith);
2029}
2030#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2031
2032#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2033size_t xnn_init_f16_minmax_avx_params(
2034 union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS(1)],
2035 uint16_t min,
2036 uint16_t max)
2037{
2038 const float min_f32 = fp16_ieee_to_fp32_value(min);
2039 const float max_f32 = fp16_ieee_to_fp32_value(max);
2040 for (uint32_t i = 0; i < 8; i++) {
2041 params->avx.min[i] = min_f32;
2042 params->avx.max[i] = max_f32;
2043 }
2044 return sizeof(params->avx);
2045}
2046#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2047
2048#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2049size_t xnn_init_f32_default_avx_params(
2050 union xnn_f32_default_params params[XNN_MIN_ELEMENTS(1)])
2051{
2052 for (uint32_t i = 0; i < 7; i++) {
2053 params->avx.mask_table[i] = -1;
2054 }
2055 for (uint32_t i = 7; i < 14; i++) {
2056 params->avx.mask_table[i] = 0;
2057 }
2058 return sizeof(params->avx);
2059}
2060#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2061
2062size_t xnn_init_f32_minmax_params(
2063 union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2064 float output_min,
2065 float output_max)
2066{
2067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
2068 for (uint32_t i = 0; i < 4; i++) {
2069 params->sse.min[i] = output_min;
2070 params->sse.max[i] = output_max;
2071 }
2072 return sizeof(params->sse);
2073 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2074 params->wasmsimd.min[0] = output_min;
2075 params->wasmsimd.min[1] = output_min;
2076 params->wasmsimd.max[0] = output_max;
2077 params->wasmsimd.max[1] = output_max;
2078 return sizeof(params->wasmsimd);
2079 #else
2080 params->scalar.min = output_min;
2081 params->scalar.max = output_max;
2082 return sizeof(params->scalar);
2083 #endif
2084}
2085
2086#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2087size_t xnn_init_f32_minmax_sse_params(
2088 union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2089 float output_min,
2090 float output_max)
2091{
2092 for (uint32_t i = 0; i < 4; i++) {
2093 params->sse.min[i] = output_min;
2094 params->sse.max[i] = output_max;
2095 }
2096 return sizeof(params->sse);
2097}
2098
2099size_t xnn_init_f32_minmax_avx_params(
2100 union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2101 float output_min,
2102 float output_max)
2103{
2104 for (uint32_t i = 0; i < 8; i++) {
2105 params->avx.min[i] = output_min;
2106 params->avx.max[i] = output_max;
2107 }
2108 for (uint32_t i = 0; i < 7; i++) {
2109 params->avx.mask_table[i] = -1;
2110 }
2111 for (uint32_t i = 7; i < 14; i++) {
2112 params->avx.mask_table[i] = 0;
2113 }
2114 return sizeof(params->avx);
2115}
2116#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2117
2118#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2119size_t xnn_init_f32_minmax_wasmsimd_params(
2120 union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2121 float output_min,
2122 float output_max)
2123{
2124 params->wasmsimd.min[0] = output_min;
2125 params->wasmsimd.min[1] = output_min;
2126 params->wasmsimd.max[0] = output_max;
2127 params->wasmsimd.max[1] = output_max;
2128 return sizeof(params->wasmsimd);
2129}
2130#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2131
2132size_t xnn_init_f32_minmax_scalar_params(
2133 union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2134 float output_min,
2135 float output_max)
2136{
2137 params->scalar.min = output_min;
2138 params->scalar.max = output_max;
2139 return sizeof(params->scalar);
2140}
2141
2142#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2143size_t xnn_init_f16_hswish_fp16arith_params(
2144 union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS(1)])
2145{
2146 params->fp16arith.sixth = UINT16_C(0x3155);
2147 params->fp16arith.three = UINT16_C(0x4200);
2148 params->fp16arith.six = UINT16_C(0x4600);
2149 return sizeof(params->fp16arith);
2150}
2151#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2152
2153#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2154size_t xnn_init_f16_hswish_avx_params(
2155 union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS(1)])
2156{
2157 for (uint32_t i = 0; i < 8; i++) {
2158 params->avx.sixth[i] = 0x1.554000p-3f;
2159 params->avx.three[i] = 3.0f;
2160 params->avx.six[i] = UINT16_C(0x4600);
2161 }
2162 return sizeof(params->avx);
2163}
2164#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2165
2166size_t xnn_init_f32_hswish_scalar_params(
2167 union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2168{
2169 params->scalar.sixth = 0x1.555556p-3f;
2170 params->scalar.three = 3.0f;
2171 params->scalar.six = 6.0f;
2172 return sizeof(params->scalar);
2173}
2174
2175#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2176size_t xnn_init_f32_hswish_sse_params(
2177 union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2178{
2179 for (uint32_t i = 0; i < 4; i++) {
2180 params->sse.sixth[i] = 0x1.555556p-3f;
2181 params->sse.half[i] = 0.5f;
2182 params->sse.one[i] = 1.0f;
2183 }
2184 return sizeof(params->sse);
2185}
2186
2187size_t xnn_init_f32_hswish_avx_params(
2188 union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2189{
2190 for (uint32_t i = 0; i < 8; i++) {
2191 params->avx.sixth[i] = 0x1.555556p-3f;
2192 params->avx.half[i] = 0.5f;
2193 params->avx.one[i] = 1.0f;
2194 }
2195 for (uint32_t i = 0; i < 7; i++) {
2196 params->avx.mask_table[i] = -1;
2197 }
2198 for (uint32_t i = 7; i < 14; i++) {
2199 params->avx.mask_table[i] = 0;
2200 }
2201 return sizeof(params->avx);
2202}
2203
2204size_t xnn_init_f32_hswish_avx512_params(
2205 union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2206{
2207 params->avx512.sixth = 0x1.555556p-3f;
2208 params->avx512.half = 0.5f;
2209 params->avx512.one = 1.0f;
2210 return sizeof(params->avx512);
2211}
2212#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2213
2214#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2215size_t xnn_init_f32_hswish_wasmsimd_params(
2216 union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2217{
2218 for (uint32_t i = 0; i < 2; i++) {
2219 params->wasmsimd.sixth[i] = 0x1.555556p-3f;
2220 params->wasmsimd.three[i] = 3.0f;
2221 params->wasmsimd.six[i] = 6.0f;
2222 }
2223 return sizeof(params->wasmsimd);
2224}
2225#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2226
2227#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2228size_t xnn_init_f16_sigmoid_fp16arith_rr2_p2_params(
2229 union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2230{
2231 params->fp16arith_rr2_p2.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
2232 params->fp16arith_rr2_p2.minus_log2e = UINT16_C(0xBDC5); // -0x1.714p+0h
2233 params->fp16arith_rr2_p2.ln2_hi = UINT16_C(0x398C); // 0x1.630p-1h
2234 params->fp16arith_rr2_p2.ln2_lo = UINT16_C(0x8AF4); // -0x1.BD0p-13h
2235 params->fp16arith_rr2_p2.c2 = UINT16_C(0x37F9); // 0x1.FE4p-2h
2236 params->fp16arith_rr2_p2.c1 = UINT16_C(0xBC0E); // -0x1.038p+0h
2237 params->fp16arith_rr2_p2.denorm_cutoff = UINT16_C(0xC8DA); // -0x1.368p+3h
2238 return sizeof(params->fp16arith_rr2_p2);
2239}
2240#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2241
2242#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2243size_t xnn_init_f16_sigmoid_avx2_rr1_p2_params(
2244 union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2245{
2246 for (uint32_t i = 0; i < 8; i++) {
2247 params->avx2_rr1_p2.sign_mask[i] = -0.0f;
2248 params->avx2_rr1_p2.magic_bias[i] = 0x1.8000FEp23f;
2249 params->avx2_rr1_p2.log2e[i] = 0x1.715476p0f;
2250 params->avx2_rr1_p2.minus_ln2[i] = -0x1.62E43p-1f;
2251 params->avx2_rr1_p2.c2[i] = 0x1.FF3A32p-2f;
2252 params->avx2_rr1_p2.c1[i] = 0x1.039E10p+0f;
2253 params->avx2_rr1_p2.one[i] = 1.0f;
2254 params->avx2_rr1_p2.denorm_cutoff[i] = -0x1.368000p+3f;
2255 }
2256 return sizeof(params->avx2_rr1_p2);
2257}
2258#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260size_t xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params(
2261 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2262{
2263 params->scalar_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
2264 params->scalar_rr2_lut64_p2.minus_log2e = -0x1.715476p0f;
2265 params->scalar_rr2_lut64_p2.ln2_hi = 0x1.630000p-1f;
2266 params->scalar_rr2_lut64_p2.ln2_lo = -0x1.BD0106p-13f;
2267 params->scalar_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2268 params->scalar_rr2_lut64_p2.one = 1.0f;
2269 params->scalar_rr2_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2270 return sizeof(params->scalar_rr2_lut64_p2);
2271}
2272
2273size_t xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params(
2274 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2275{
2276 params->scalar_rr2_lut2048_p1.magic_bias = 0x1.800000p12f;
2277 params->scalar_rr2_lut2048_p1.minus_log2e = -0x1.715476p0f;
2278 params->scalar_rr2_lut2048_p1.ln2_hi = 0x1.600000p-1f;
2279 params->scalar_rr2_lut2048_p1.ln2_lo = 0x1.7217F8p-8f;
2280 params->scalar_rr2_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2281 params->scalar_rr2_lut2048_p1.one = 1.0f;
2282 params->scalar_rr2_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2283 return sizeof(params->scalar_rr2_lut2048_p1);
2284}
2285
2286size_t xnn_init_f32_sigmoid_scalar_rr2_p5_params(
2287 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2288{
2289 params->scalar_rr2_p5.magic_bias = 0x1.8000FEp23f;
2290 params->scalar_rr2_p5.minus_log2e = -0x1.715476p0f;
2291 params->scalar_rr2_p5.ln2_hi = 0x1.62E400p-1f;
2292 params->scalar_rr2_p5.ln2_lo = 0x1.7F7D1Cp-20f;
2293 params->scalar_rr2_p5.c5 = -0x1.0F9F9Cp-7f;
2294 params->scalar_rr2_p5.c4 = 0x1.573A1Ap-5f;
2295 params->scalar_rr2_p5.c3 = -0x1.555A80p-3f;
2296 params->scalar_rr2_p5.c2 = 0x1.FFFDC6p-2f;
2297 params->scalar_rr2_p5.c1 = -0x1.FFFFF6p-1f;
2298 params->scalar_rr2_p5.one = 1.0f;
2299 params->scalar_rr2_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2300 return sizeof(params->scalar_rr2_p5);
2301}
2302
2303#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2304size_t xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params(
2305 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2306{
2307 params->neon_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
2308 params->neon_rr2_lut64_p2.minus_log2e = -0x1.715476p0f;
2309 params->neon_rr2_lut64_p2.ln2_hi = 0x1.630000p-1f;
2310 params->neon_rr2_lut64_p2.ln2_lo = -0x1.BD0106p-13f;
2311 params->neon_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2312 params->neon_rr2_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2313 return sizeof(params->neon_rr2_lut64_p2);
2314}
2315
2316size_t xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params(
2317 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2318{
2319 params->neon_rr2_lut2048_p1.magic_bias = 0x1.800000p12f;
2320 params->neon_rr2_lut2048_p1.minus_log2e = -0x1.715476p0f;
2321 params->neon_rr2_lut2048_p1.ln2_hi = 0x1.600000p-1f;
2322 params->neon_rr2_lut2048_p1.ln2_lo = 0x1.7217F8p-8f;
2323 params->neon_rr2_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2324 params->neon_rr2_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2325 return sizeof(params->neon_rr2_lut2048_p1);
2326}
2327
2328size_t xnn_init_f32_sigmoid_neon_rr2_p5_params(
2329 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2330{
2331 params->neon_rr2_p5.magic_bias = 0x1.8000FEp23f;
2332 params->neon_rr2_p5.minus_log2e = -0x1.715476p0f;
2333 params->neon_rr2_p5.ln2_hi = 0x1.62E400p-1f;
2334 params->neon_rr2_p5.ln2_lo = 0x1.7F7D1Cp-20f;
2335 params->neon_rr2_p5.c5 = -0x1.0F9F9Cp-7f;
2336 params->neon_rr2_p5.c4 = 0x1.573A1Ap-5f;
2337 params->neon_rr2_p5.c3 = -0x1.555A80p-3f;
2338 params->neon_rr2_p5.c2 = 0x1.FFFDC6p-2f;
2339 params->neon_rr2_p5.c1 = -0x1.FFFFF6p-1f;
2340 params->neon_rr2_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2341 return sizeof(params->neon_rr2_p5);
2342}
2343
2344size_t xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params(
2345 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2346{
2347 params->neonfma_rr1_lut2048_p1.magic_bias = 0x1.800000p12f;
2348 params->neonfma_rr1_lut2048_p1.minus_log2e = -0x1.715476p0f;
2349 params->neonfma_rr1_lut2048_p1.ln2 = 0x1.62E430p-1f;
2350 params->neonfma_rr1_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2351 params->neonfma_rr1_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2352 return sizeof(params->neonfma_rr1_lut2048_p1);
2353}
2354
2355size_t xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params(
2356 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2357{
2358 params->neonfma_rr1_lut64_p2.magic_bias = 0x1.800000p17f;
2359 params->neonfma_rr1_lut64_p2.minus_log2e = -0x1.715476p0f;
2360 params->neonfma_rr1_lut64_p2.ln2 = 0x1.62E430p-1f;
2361 params->neonfma_rr1_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2362 params->neonfma_rr1_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2363 return sizeof(params->neonfma_rr1_lut64_p2);
2364}
2365
2366size_t xnn_init_f32_sigmoid_neonfma_rr1_p5_params(
2367 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2368{
2369 params->neonfma_rr1_p5.magic_bias = 0x1.8000FEp23f;
2370 params->neonfma_rr1_p5.minus_log2e = -0x1.715476p0f;
2371 params->neonfma_rr1_p5.ln2 = 0x1.62E430p-1f;
2372 params->neonfma_rr1_p5.c5 = -0x1.0F9F9Cp-7f;
2373 params->neonfma_rr1_p5.c4 = 0x1.573A1Ap-5f;
2374 params->neonfma_rr1_p5.c3 = -0x1.555A80p-3f;
2375 params->neonfma_rr1_p5.c2 = 0x1.FFFDC6p-2f;
2376 params->neonfma_rr1_p5.c1 = -0x1.FFFFF6p-1f;
2377 params->neonfma_rr1_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2378 return sizeof(params->neonfma_rr1_p5);
2379}
2380#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2381
2382#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2383size_t xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params(
2384 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2385{
2386 for (uint32_t i = 0; i < 4; i++) {
2387 params->sse2_rr2_lut64_p2.sign_mask[i] = -0.0f;
2388 params->sse2_rr2_lut64_p2.magic_bias[i] = 0x1.800000p17f;
2389 params->sse2_rr2_lut64_p2.log2e[i] = 0x1.715476p0f;
2390 params->sse2_rr2_lut64_p2.index_mask[i] = UINT32_C(0x3F);
2391 params->sse2_rr2_lut64_p2.minus_ln2_hi[i] = -0x1.630000p-1f;
2392 params->sse2_rr2_lut64_p2.minus_ln2_lo[i] = 0x1.BD0106p-13f;
2393 params->sse2_rr2_lut64_p2.c2[i] = 0x1.FFFF0Ap-2f;
2394 params->sse2_rr2_lut64_p2.one[i] = 1.0f;
2395 params->sse2_rr2_lut64_p2.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2396 }
2397 return sizeof(params->sse2_rr2_lut64_p2);
2398}
2399
2400size_t xnn_init_f32_sigmoid_sse2_rr2_p5_params(
2401 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2402{
2403 for (uint32_t i = 0; i < 4; i++) {
2404 params->sse2_rr2_p5.sign_mask[i] = -0.0f;
2405 params->sse2_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2406 params->sse2_rr2_p5.log2e[i] = 0x1.715476p0f;
2407 params->sse2_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
2408 params->sse2_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2409 params->sse2_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
2410 params->sse2_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2411 params->sse2_rr2_p5.c3[i] = 0x1.555A80p-3f;
2412 params->sse2_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2413 params->sse2_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
2414 params->sse2_rr2_p5.one[i] = 1.0f;
2415 params->sse2_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2416 }
2417 return sizeof(params->sse2_rr2_p5);
2418}
2419
2420size_t xnn_init_f32_sigmoid_avx_rr2_p5_params(
2421 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2422{
2423 for (uint32_t i = 0; i < 8; i++) {
2424 params->avx_rr2_p5.sign_mask[i] = -0.0f;
2425 params->avx_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2426 params->avx_rr2_p5.log2e[i] = 0x1.715476p0f;
2427 params->avx_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
2428 params->avx_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2429 params->avx_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
2430 params->avx_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2431 params->avx_rr2_p5.c3[i] = 0x1.555A80p-3f;
2432 params->avx_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2433 params->avx_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
2434 params->avx_rr2_p5.one[i] = 1.0f;
2435 params->avx_rr2_p5.two[i] = 2.0f;
2436 params->avx_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2437 }
2438 for (uint32_t i = 0; i < 7; i++) {
2439 params->avx_rr2_p5.mask_table[i] = -1;
2440 }
2441 for (uint32_t i = 7; i < 14; i++) {
2442 params->avx_rr2_p5.mask_table[i] = 0;
2443 }
2444 return sizeof(params->avx_rr2_p5);
2445}
2446
2447size_t xnn_init_f32_sigmoid_avx2_rr1_p5_params(
2448 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2449{
2450 for (uint32_t i = 0; i < 8; i++) {
2451 params->avx2_rr1_p5.sign_mask[i] = -0.0f;
2452 params->avx2_rr1_p5.magic_bias[i] = 0x1.8000FEp23f;
2453 params->avx2_rr1_p5.log2e[i] = 0x1.715476p0f;
2454 params->avx2_rr1_p5.minus_ln2[i] = -0x1.62E430p-1f;
2455 params->avx2_rr1_p5.c5[i] = 0x1.0F9F9Cp-7f;
2456 params->avx2_rr1_p5.c4[i] = 0x1.573A1Ap-5f;
2457 params->avx2_rr1_p5.c3[i] = 0x1.555A80p-3f;
2458 params->avx2_rr1_p5.c2[i] = 0x1.FFFDC6p-2f;
2459 params->avx2_rr1_p5.c1[i] = 0x1.FFFFF6p-1f;
2460 params->avx2_rr1_p5.one[i] = 1.0f;
2461 params->avx2_rr1_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2462 }
2463 for (uint32_t i = 0; i < 7; i++) {
2464 params->avx2_rr1_p5.mask_table[i] = -1;
2465 }
2466 for (uint32_t i = 7; i < 14; i++) {
2467 params->avx2_rr1_p5.mask_table[i] = 0;
2468 }
2469 return sizeof(params->avx2_rr1_p5);
2470}
2471
2472size_t xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params(
2473 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2474{
2475 params->avx512_rr1_lut16_p3.sign_mask = UINT32_C(0x80000000);
2476 params->avx512_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
2477 params->avx512_rr1_lut16_p3.log2e = 0x1.715476p0f;
2478 params->avx512_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
2479 params->avx512_rr1_lut16_p3.c3 = 0x1.55559Ap-3f;
2480 params->avx512_rr1_lut16_p3.c2 = 0x1.00021Ep-1f;
2481 params->avx512_rr1_lut16_p3.one = 1.0f;
2482 params->avx512_rr1_lut16_p3.table[ 0] = 0x1.000000p+0f;
2483 params->avx512_rr1_lut16_p3.table[ 1] = 0x1.0B5586p+0f;
2484 params->avx512_rr1_lut16_p3.table[ 2] = 0x1.172B84p+0f;
2485 params->avx512_rr1_lut16_p3.table[ 3] = 0x1.2387A6p+0f;
2486 params->avx512_rr1_lut16_p3.table[ 4] = 0x1.306FE0p+0f;
2487 params->avx512_rr1_lut16_p3.table[ 5] = 0x1.3DEA64p+0f;
2488 params->avx512_rr1_lut16_p3.table[ 6] = 0x1.4BFDAEp+0f;
2489 params->avx512_rr1_lut16_p3.table[ 7] = 0x1.5AB07Ep+0f;
2490 params->avx512_rr1_lut16_p3.table[ 8] = 0x1.6A09E6p+0f;
2491 params->avx512_rr1_lut16_p3.table[ 9] = 0x1.7A1148p+0f;
2492 params->avx512_rr1_lut16_p3.table[10] = 0x1.8ACE54p+0f;
2493 params->avx512_rr1_lut16_p3.table[11] = 0x1.9C4918p+0f;
2494 params->avx512_rr1_lut16_p3.table[12] = 0x1.AE89FAp+0f;
2495 params->avx512_rr1_lut16_p3.table[13] = 0x1.C199BEp+0f;
2496 params->avx512_rr1_lut16_p3.table[14] = 0x1.D5818Ep+0f;
2497 params->avx512_rr1_lut16_p3.table[15] = 0x1.EA4AFAp+0f;
2498 return sizeof(params->avx512_rr1_lut16_p3);
2499}
2500
2501size_t xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params(
2502 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2503{
2504 params->avx512_rr2_lut32_p2.sign_mask = UINT32_C(0x80000000);
2505 params->avx512_rr2_lut32_p2.magic_bias = 0x1.800000p18f;
2506 params->avx512_rr2_lut32_p2.log2e = 0x1.715476p0f;
2507 params->avx512_rr2_lut32_p2.minus_ln2_hi = -0x1.62E430p-1f;
2508 params->avx512_rr2_lut32_p2.minus_ln2_lo = 0x1.05C61p-29f;
2509 params->avx512_rr2_lut32_p2.c2 = 0x1.000000p-1f;
2510 params->avx512_rr2_lut32_p2.c1 = 0x1.0000F6p-0f;
2511 params->avx512_rr2_lut32_p2.one = 1.0f;
2512
2513 params->avx512_rr2_lut32_p2.table_lo[ 0] = 0x1.000000p+0f;
2514 params->avx512_rr2_lut32_p2.table_lo[ 1] = 0x1.059B0Ep+0f;
2515 params->avx512_rr2_lut32_p2.table_lo[ 2] = 0x1.0B5586p+0f;
2516 params->avx512_rr2_lut32_p2.table_lo[ 3] = 0x1.11301Ep+0f;
2517 params->avx512_rr2_lut32_p2.table_lo[ 4] = 0x1.172B84p+0f;
2518 params->avx512_rr2_lut32_p2.table_lo[ 5] = 0x1.1D4874p+0f;
2519 params->avx512_rr2_lut32_p2.table_lo[ 6] = 0x1.2387A6p+0f;
2520 params->avx512_rr2_lut32_p2.table_lo[ 7] = 0x1.29E9E0p+0f;
2521 params->avx512_rr2_lut32_p2.table_lo[ 8] = 0x1.306FE0p+0f;
2522 params->avx512_rr2_lut32_p2.table_lo[ 9] = 0x1.371A74p+0f;
2523 params->avx512_rr2_lut32_p2.table_lo[10] = 0x1.3DEA64p+0f;
2524 params->avx512_rr2_lut32_p2.table_lo[11] = 0x1.44E086p+0f;
2525 params->avx512_rr2_lut32_p2.table_lo[12] = 0x1.4BFDAEp+0f;
2526 params->avx512_rr2_lut32_p2.table_lo[13] = 0x1.5342B6p+0f;
2527 params->avx512_rr2_lut32_p2.table_lo[14] = 0x1.5AB07Ep+0f;
2528 params->avx512_rr2_lut32_p2.table_lo[15] = 0x1.6247ECp+0f;
2529
2530 params->avx512_rr2_lut32_p2.table_hi[ 0] = 0x1.6A09E6p+0f;
2531 params->avx512_rr2_lut32_p2.table_hi[ 1] = 0x1.71F75Ep+0f;
2532 params->avx512_rr2_lut32_p2.table_hi[ 2] = 0x1.7A1148p+0f;
2533 params->avx512_rr2_lut32_p2.table_hi[ 3] = 0x1.82589Ap+0f;
2534 params->avx512_rr2_lut32_p2.table_hi[ 4] = 0x1.8ACE54p+0f;
2535 params->avx512_rr2_lut32_p2.table_hi[ 5] = 0x1.93737Cp+0f;
2536 params->avx512_rr2_lut32_p2.table_hi[ 6] = 0x1.9C4918p+0f;
2537 params->avx512_rr2_lut32_p2.table_hi[ 7] = 0x1.A5503Cp+0f;
2538 params->avx512_rr2_lut32_p2.table_hi[ 8] = 0x1.AE89FAp+0f;
2539 params->avx512_rr2_lut32_p2.table_hi[ 9] = 0x1.B7F770p+0f;
2540 params->avx512_rr2_lut32_p2.table_hi[10] = 0x1.C199BEp+0f;
2541 params->avx512_rr2_lut32_p2.table_hi[11] = 0x1.CB720Ep+0f;
2542 params->avx512_rr2_lut32_p2.table_hi[12] = 0x1.D5818Ep+0f;
2543 params->avx512_rr2_lut32_p2.table_hi[13] = 0x1.DFC974p+0f;
2544 params->avx512_rr2_lut32_p2.table_hi[14] = 0x1.EA4AFAp+0f;
2545 params->avx512_rr2_lut32_p2.table_hi[15] = 0x1.F50766p+0f;
2546 return sizeof(params->avx512_rr2_lut32_p2);
2547}
2548
2549size_t xnn_init_f32_sigmoid_avx512_rr1_p5_params(
2550 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2551{
2552 params->avx512_rr1_p5.sign_mask = UINT32_C(0x80000000);
2553 params->avx512_rr1_p5.log2e = 0x1.715476p0f;
2554 params->avx512_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
2555 params->avx512_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
2556 params->avx512_rr1_p5.c4 = 0x1.573A1Ap-5f;
2557 params->avx512_rr1_p5.c3 = 0x1.555A80p-3f;
2558 params->avx512_rr1_p5.c2 = 0x1.FFFDC6p-2f;
2559 params->avx512_rr1_p5.c1 = 0x1.FFFFF6p-1f;
2560 params->avx512_rr1_p5.one = 1.0f;
2561 return sizeof(params->avx512_rr1_p5);
2562}
2563#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2564
2565#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2566size_t xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params(
2567 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2568{
2569 for (uint32_t i = 0; i < 2; i++) {
2570 params->wasmsimd_rr2_lut64_p2.magic_bias[i] = 0x1.800000p17f;
2571 params->wasmsimd_rr2_lut64_p2.minus_log2e[i] = -0x1.715476p0f;
2572 params->wasmsimd_rr2_lut64_p2.index_mask[i] = UINT32_C(0x3F);
2573 params->wasmsimd_rr2_lut64_p2.ln2_hi[i] = 0x1.630000p-1f;
2574 params->wasmsimd_rr2_lut64_p2.ln2_lo[i] = -0x1.BD0106p-13f;
2575 params->wasmsimd_rr2_lut64_p2.c2[i] = 0x1.FFFF0Ap-2f;
2576 params->wasmsimd_rr2_lut64_p2.one[i] = 1.0f;
2577 params->wasmsimd_rr2_lut64_p2.denorm_cutoff[i] = 0x1.5D589Ep+6f;
2578 }
2579 return sizeof(params->wasmsimd_rr2_lut64_p2);
2580}
2581
2582size_t xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params(
2583 union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2584{
2585 for (uint32_t i = 0; i < 2; i++) {
2586 params->wasmsimd_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2587 params->wasmsimd_rr2_p5.minus_log2e[i] = -0x1.715476p+0f;
2588 params->wasmsimd_rr2_p5.ln2_hi[i] = 0x1.62E400p-1f;
2589 params->wasmsimd_rr2_p5.ln2_lo[i] = 0x1.7F7D1Cp-20f;
2590 params->wasmsimd_rr2_p5.c5[i] = -0x1.0F9F9Cp-7f;
2591 params->wasmsimd_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2592 params->wasmsimd_rr2_p5.c3[i] = -0x1.555A80p-3f;
2593 params->wasmsimd_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2594 params->wasmsimd_rr2_p5.c1[i] = -0x1.FFFFF6p-1f;
2595 params->wasmsimd_rr2_p5.one[i] = 1.0f;
2596 params->wasmsimd_rr2_p5.denorm_cutoff[i] = 0x1.5D589Ep+6f;
2597 }
2598 return sizeof(params->wasmsimd_rr2_p5);
2599}
2600#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2601
2602#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2603size_t xnn_init_f16_abs_sse_params(
2604 union xnn_f16_abs_params params[XNN_MIN_ELEMENTS(1)])
2605{
2606 for (uint32_t i = 0; i < 8; i++) {
2607 params->sse.nonsign_mask[i] = UINT16_C(0x7FFF);
2608 }
2609 return sizeof(params->sse);
2610}
2611#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2612
2613#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2614size_t xnn_init_f32_abs_sse_params(
2615 union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2616{
2617 for (uint32_t i = 0; i < 4; i++) {
2618 params->sse.nonsign_mask[i] = math_nonsign_mask_f32();
2619 }
2620 return sizeof(params->sse);
2621}
2622
2623size_t xnn_init_f32_abs_avx_params(
2624 union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2625{
2626 for (uint32_t i = 0; i < 8; i++) {
2627 params->avx.nonsign_mask[i] = math_nonsign_mask_f32();
2628 }
2629 for (uint32_t i = 0; i < 7; i++) {
2630 params->avx.mask_table[i] = -1;
2631 }
2632 for (uint32_t i = 7; i < 14; i++) {
2633 params->avx.mask_table[i] = 0;
2634 }
2635 return sizeof(params->avx);
2636}
2637
2638size_t xnn_init_f32_abs_avx512_params(
2639 union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2640{
2641 params->avx512.nonsign_mask = UINT32_C(0x7FFFFFFF);
2642 return sizeof(params->avx512);
2643}
2644#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2645
2646#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2647size_t xnn_init_f32_abs_wasmsimd_params(
2648 union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2649{
2650 params->wasmsimd.nonsign_mask[0] = math_nonsign_mask_f32();
2651 params->wasmsimd.nonsign_mask[1] = math_nonsign_mask_f32();
2652 return sizeof(params->wasmsimd);
2653}
2654#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2655
2656#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2657size_t xnn_init_f16_neg_sse_params(
2658 union xnn_f16_neg_params params[XNN_MIN_ELEMENTS(1)])
2659{
2660 for (uint32_t i = 0; i < 8; i++) {
2661 params->sse.sign_mask[i] = UINT16_C(0x8000);
2662 }
2663 return sizeof(params->sse);
2664}
2665#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2666
2667#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2668size_t xnn_init_f32_neg_sse_params(
2669 union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2670{
2671 for (uint32_t i = 0; i < 4; i++) {
2672 params->sse.sign_mask[i] = -0.0f;
2673 }
2674 return sizeof(params->sse);
2675}
2676
2677size_t xnn_init_f32_neg_avx_params(
2678 union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2679{
2680 for (uint32_t i = 0; i < 8; i++) {
2681 params->avx.sign_mask[i] = -0.0f;
2682 }
2683 for (uint32_t i = 0; i < 7; i++) {
2684 params->avx.mask_table[i] = -1;
2685 }
2686 for (uint32_t i = 7; i < 14; i++) {
2687 params->avx.mask_table[i] = 0;
2688 }
2689 return sizeof(params->avx);
2690}
2691
2692size_t xnn_init_f32_neg_avx512_params(
2693 union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2694{
2695 params->avx512.sign_mask = UINT32_C(0x80000000);
2696 return sizeof(params->avx512);
2697}
2698#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2699
2700#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2701size_t xnn_init_f32_neg_wasmsimd_params(
2702 union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2703{
2704 params->wasmsimd.sign_mask[0] = -0.0f;
2705 params->wasmsimd.sign_mask[1] = -0.0f;
2706 return sizeof(params->wasmsimd);
2707}
2708#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2709
2710#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2711size_t xnn_init_f32_rnd_sse2_params(
2712 union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS(1)])
2713{
2714 for (uint32_t i = 0; i < 4; i++) {
2715 params->sse2.sign_mask[i] = -0.0f;
2716 params->sse2.one[i] = 1.0f;
2717 }
2718 return sizeof(params->sse2);
2719}
2720
2721size_t xnn_init_f32_rnd_avx_params(
2722 union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS(1)])
2723{
2724 for (uint32_t i = 0; i < 7; i++) {
2725 params->avx.mask_table[i] = -1;
2726 }
2727 for (uint32_t i = 7; i < 14; i++) {
2728 params->avx.mask_table[i] = 0;
2729 }
2730 return sizeof(params->avx);
2731}
2732#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2733
2734#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2735size_t xnn_init_f16_elu_fp16arith_rr1_p3_params(
2736 union xnn_f16_elu_params params[XNN_MIN_ELEMENTS(1)],
2737 uint16_t prescale,
2738 uint16_t alpha,
2739 uint16_t beta)
2740{
2741 params->fp16arith_rr1_p3.prescale = prescale;
2742 params->fp16arith_rr1_p3.sat_cutoff = UINT16_C(0xC829); // -0x1.0A4p+3h;
2743 params->fp16arith_rr1_p3.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
2744 params->fp16arith_rr1_p3.log2e = UINT16_C(0x3DC5); // 0x1.714p+0h
2745 params->fp16arith_rr1_p3.minus_ln2 = UINT16_C(0xB98C); // -0x1.62E430p-1h
2746 params->fp16arith_rr1_p3.c3 = UINT16_C(0x315B); // 0x1.56Cp-3h
2747 params->fp16arith_rr1_p3.c2 = UINT16_C(0x3808); // 0x1.020p-1h
2748 params->fp16arith_rr1_p3.minus_alpha = alpha ^ UINT16_C(0x8000);
2749 params->fp16arith_rr1_p3.beta = beta;
2750 return sizeof(params->fp16arith_rr1_p3);
2751}
2752#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2753
2754#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2755size_t xnn_init_f16_elu_avx2_rr1_p3_params(
2756 union xnn_f16_elu_params params[XNN_MIN_ELEMENTS(1)],
2757 uint16_t prescale,
2758 uint16_t alpha,
2759 uint16_t beta)
2760{
2761 for (uint32_t i = 0; i < 8; i++) {
2762 params->avx2_rr1_p3.prescale[i] = fp16_ieee_to_fp32_value(prescale);
2763 params->avx2_rr1_p3.sat_cutoff[i] = -0x1.0A4000p+3f;
2764 params->avx2_rr1_p3.magic_bias[i] = 0x1.8000FEp23f;
2765 params->avx2_rr1_p3.log2e[i] = 0x1.715476p+0f;
2766 params->avx2_rr1_p3.minus_ln2[i] = -0x1.62E430p-1f;
2767 params->avx2_rr1_p3.c3[i] = 0x1.5554DCp-3f;
2768 params->avx2_rr1_p3.c2[i] = 0x1.01EBB2p-1f;
2769 params->avx2_rr1_p3.c1[i] = 0x1.0002F2p+0f;
2770 params->avx2_rr1_p3.alpha[i] = fp16_ieee_to_fp32_value(alpha);
2771 params->avx2_rr1_p3.beta[i] = fp16_ieee_to_fp32_value(beta);
2772 }
2773 return sizeof(params->avx2_rr1_p3);
2774}
2775#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2776
2777size_t xnn_init_f32_elu_scalar_rr2_lut16_p3_params(
2778 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2779 float prescale,
2780 float alpha,
2781 float beta)
2782{
2783 params->scalar_rr2_lut16_p3.prescale = prescale;
2784 params->scalar_rr2_lut16_p3.alpha = alpha;
2785 params->scalar_rr2_lut16_p3.beta = beta;
2786 params->scalar_rr2_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2787 params->scalar_rr2_lut16_p3.magic_bias = 0x1.800000p19f;
2788 params->scalar_rr2_lut16_p3.log2e = 0x1.715476p+0f;
2789 params->scalar_rr2_lut16_p3.minus_ln2_hi = -0x1.62E400p-1f;
2790 params->scalar_rr2_lut16_p3.minus_ln2_lo = -0x1.7F7D1Cp-20f;
2791 params->scalar_rr2_lut16_p3.c3 = 0x1.55561Cp-3f;
2792 params->scalar_rr2_lut16_p3.c2 = 0x1.0001ECp-1f;
2793 params->scalar_rr2_lut16_p3.one = 1.0f;
2794 return sizeof(params->scalar_rr2_lut16_p3);
2795}
2796
2797size_t xnn_init_f32_elu_scalar_rr2_p6_params(
2798 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2799 float prescale,
2800 float alpha,
2801 float beta)
2802{
2803 params->scalar_rr2_p6.prescale = prescale;
2804 params->scalar_rr2_p6.alpha = alpha;
2805 params->scalar_rr2_p6.beta = beta;
2806 params->scalar_rr2_p6.sat_cutoff = -0x1.154246p+4f;
2807 params->scalar_rr2_p6.magic_bias = 0x1.8000FEp23f;
2808 params->scalar_rr2_p6.log2e = 0x1.715476p+0f;
2809 params->scalar_rr2_p6.minus_ln2_hi = -0x1.62E440p-1f;
2810 params->scalar_rr2_p6.minus_ln2_lo = 0x1.0105C6p-21f;
2811 params->scalar_rr2_p6.c6 = 0x1.6b7338p-10f;
2812 params->scalar_rr2_p6.c5 = 0x1.12278Ep-7f;
2813 params->scalar_rr2_p6.c4 = 0x1.555716p-5f;
2814 params->scalar_rr2_p6.c3 = 0x1.5554B0p-3f;
2815 params->scalar_rr2_p6.c2 = 0x1.FFFFFEp-2f;
2816 params->scalar_rr2_p6.one = 1.0f;
2817 return sizeof(params->scalar_rr2_p6);
2818}
2819
2820#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2821size_t xnn_init_f32_elu_neon_rr2_lut16_p3_params(
2822 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2823 float prescale,
2824 float alpha,
2825 float beta)
2826{
2827 params->neon_rr2_lut16_p3.prescale = prescale;
2828 params->neon_rr2_lut16_p3.alpha = alpha;
2829 params->neon_rr2_lut16_p3.beta = beta;
2830 params->neon_rr2_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2831 params->neon_rr2_lut16_p3.magic_bias = 0x1.800000p19f;
2832 params->neon_rr2_lut16_p3.log2e = 0x1.715476p+0f;
2833 params->neon_rr2_lut16_p3.minus_ln2_hi = -0x1.62E400p-1f;
2834 params->neon_rr2_lut16_p3.minus_ln2_lo = -0x1.7F7D1Cp-20f;
2835 params->neon_rr2_lut16_p3.c3 = 0x1.55561Cp-3f;
2836 params->neon_rr2_lut16_p3.c2 = 0x1.0001ECp-1f;
2837 return sizeof(params->neon_rr2_lut16_p3);
2838}
2839
2840size_t xnn_init_f32_elu_neon_rr2_p6_params(
2841 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2842 float prescale,
2843 float alpha,
2844 float beta)
2845{
2846 params->neon_rr2_p6.prescale = prescale;
2847 params->neon_rr2_p6.alpha = alpha;
2848 params->neon_rr2_p6.beta = beta;
2849 params->neon_rr2_p6.sat_cutoff = -0x1.154246p+4f;
2850 params->neon_rr2_p6.magic_bias = 0x1.8000FEp23f;
2851 params->neon_rr2_p6.log2e = 0x1.715476p+0f;
2852 params->neon_rr2_p6.minus_ln2_hi = -0x1.62E440p-1f;
2853 params->neon_rr2_p6.minus_ln2_lo = 0x1.0105C6p-21f;
2854 params->neon_rr2_p6.c6 = 0x1.6b7338p-10f;
2855 params->neon_rr2_p6.c5 = 0x1.12278Ep-7f;
2856 params->neon_rr2_p6.c4 = 0x1.555716p-5f;
2857 params->neon_rr2_p6.c3 = 0x1.5554B0p-3f;
2858 params->neon_rr2_p6.c2 = 0x1.FFFFFEp-2f;
2859 return sizeof(params->neon_rr2_p6);
2860}
2861
2862size_t xnn_init_f32_elu_neonfma_rr1_lut16_p3_params(
2863 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2864 float prescale,
2865 float alpha,
2866 float beta)
2867{
2868 params->neonfma_rr1_lut16_p3.prescale = prescale;
2869 params->neonfma_rr1_lut16_p3.alpha = alpha;
2870 params->neonfma_rr1_lut16_p3.beta = beta;
2871 params->neonfma_rr1_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2872 params->neonfma_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
2873 params->neonfma_rr1_lut16_p3.log2e = 0x1.715476p+0f;
2874 params->neonfma_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
2875 params->neonfma_rr1_lut16_p3.c3 = 0x1.55561Cp-3f;
2876 params->neonfma_rr1_lut16_p3.c2 = 0x1.0001ECp-1f;
2877 return sizeof(params->neonfma_rr1_lut16_p3);
2878}
2879
2880size_t xnn_init_f32_elu_neonfma_rr1_p6_params(
2881 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2882 float prescale,
2883 float alpha,
2884 float beta)
2885{
2886 params->neonfma_rr1_p6.prescale = prescale;
2887 params->neonfma_rr1_p6.alpha = alpha;
2888 params->neonfma_rr1_p6.beta = beta;
2889 params->neonfma_rr1_p6.sat_cutoff = -0x1.154246p+4f;
2890 params->neonfma_rr1_p6.magic_bias = 0x1.8000FEp23f;
2891 params->neonfma_rr1_p6.log2e = 0x1.715476p+0f;
2892 params->neonfma_rr1_p6.minus_ln2 = -0x1.62E430p-1f;
2893 params->neonfma_rr1_p6.c6 = 0x1.6b7338p-10f;
2894 params->neonfma_rr1_p6.c5 = 0x1.12278Ep-7f;
2895 params->neonfma_rr1_p6.c4 = 0x1.555716p-5f;
2896 params->neonfma_rr1_p6.c3 = 0x1.5554B0p-3f;
2897 params->neonfma_rr1_p6.c2 = 0x1.FFFFFEp-2f;
2898 return sizeof(params->neonfma_rr1_p6);
2899}
2900#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2901
2902#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2903size_t xnn_init_f32_elu_sse2_rr2_lut16_p3_params(
2904 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2905 float prescale,
2906 float alpha,
2907 float beta)
2908{
2909 for (uint32_t i = 0; i < 4; i++) {
2910 params->sse2_rr2_lut16_p3.prescale[i] = prescale;
2911 params->sse2_rr2_lut16_p3.alpha[i] = alpha;
2912 params->sse2_rr2_lut16_p3.beta[i] = beta;
2913 params->sse2_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
2914 params->sse2_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
2915 params->sse2_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
2916 params->sse2_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
2917 params->sse2_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
2918 params->sse2_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2919 params->sse2_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
2920 params->sse2_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
2921 params->sse2_rr2_lut16_p3.one[i] = 1.0f;
2922 }
2923 return sizeof(params->sse2_rr2_lut16_p3);
2924}
2925
2926size_t xnn_init_f32_elu_sse2_rr2_p6_params(
2927 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2928 float prescale,
2929 float alpha,
2930 float beta)
2931{
2932 for (uint32_t i = 0; i < 4; i++) {
2933 params->sse2_rr2_p6.prescale[i] = prescale;
2934 params->sse2_rr2_p6.alpha[i] = alpha;
2935 params->sse2_rr2_p6.beta[i] = beta;
2936 params->sse2_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
2937 params->sse2_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
2938 params->sse2_rr2_p6.log2e[i] = 0x1.715476p+0f;
2939 params->sse2_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
2940 params->sse2_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
2941 params->sse2_rr2_p6.c6[i] = 0x1.6b7338p-10f;
2942 params->sse2_rr2_p6.c5[i] = 0x1.12278Ep-7f;
2943 params->sse2_rr2_p6.c4[i] = 0x1.555716p-5f;
2944 params->sse2_rr2_p6.c3[i] = 0x1.5554B0p-3f;
2945 params->sse2_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
2946 params->sse2_rr2_p6.one[i] = 1.0f;
2947 }
2948 return sizeof(params->sse2_rr2_p6);
2949}
2950
2951size_t xnn_init_f32_elu_avx_rr2_lut16_p3_params(
2952 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2953 float prescale,
2954 float alpha,
2955 float beta)
2956{
2957 for (uint32_t i = 0; i < 8; i++) {
2958 params->avx_rr2_lut16_p3.prescale[i] = prescale;
2959 params->avx_rr2_lut16_p3.alpha[i] = alpha;
2960 params->avx_rr2_lut16_p3.beta[i] = beta;
2961 params->avx_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
2962 params->avx_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
2963 params->avx_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
2964 params->avx_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
2965 params->avx_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
2966 params->avx_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2967 params->avx_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
2968 params->avx_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
2969 params->avx_rr2_lut16_p3.one[i] = 1.0f;
2970 }
2971 for (uint32_t i = 0; i < 7; i++) {
2972 params->avx_rr2_lut16_p3.mask_table[i] = -1;
2973 }
2974 for (uint32_t i = 7; i < 14; i++) {
2975 params->avx_rr2_lut16_p3.mask_table[i] = 0;
2976 }
2977 return sizeof(params->avx_rr2_lut16_p3);
2978}
2979
2980size_t xnn_init_f32_elu_avx_rr2_lut4_p4_params(
2981 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2982 float prescale,
2983 float alpha,
2984 float beta)
2985{
2986 for (uint32_t i = 0; i < 8; i++) {
2987 params->avx_rr2_lut4_p4.prescale[i] = prescale;
2988 params->avx_rr2_lut4_p4.alpha[i] = alpha;
2989 params->avx_rr2_lut4_p4.beta[i] = beta;
2990 params->avx_rr2_lut4_p4.sat_cutoff[i] = -0x1.154246p+4f;
2991 params->avx_rr2_lut4_p4.magic_bias[i] = 0x1.8003F8p21f;
2992 params->avx_rr2_lut4_p4.log2e[i] = 0x1.715476p+0f;
2993 params->avx_rr2_lut4_p4.index_mask[i] = UINT32_C(0x3);
2994 }
2995 params->avx_rr2_lut4_p4.table[0] = 0x1.000000p+0f;
2996 params->avx_rr2_lut4_p4.table[1] = 0x1.306FE0p+0f;
2997 params->avx_rr2_lut4_p4.table[2] = 0x1.6A09E6p+0f;
2998 params->avx_rr2_lut4_p4.table[3] = 0x1.AE89FAp+0f;
2999 params->avx_rr2_lut4_p4.table[4] = 0x1.000000p+0f;
3000 params->avx_rr2_lut4_p4.table[5] = 0x1.306FE0p+0f;
3001 params->avx_rr2_lut4_p4.table[6] = 0x1.6A09E6p+0f;
3002 params->avx_rr2_lut4_p4.table[7] = 0x1.AE89FAp+0f;
3003 for (uint32_t i = 0; i < 8; i++) {
3004 params->avx_rr2_lut4_p4.minus_ln2_hi[i] = -0x1.62E400p-1f;
3005 params->avx_rr2_lut4_p4.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3006 params->avx_rr2_lut4_p4.c4[i] = 0x1.554F9Ap-5f;
3007 params->avx_rr2_lut4_p4.c3[i] = 0x1.557082p-3f;
3008 params->avx_rr2_lut4_p4.c2[i] = 0x1.000002p-1f;
3009 params->avx_rr2_lut4_p4.one[i] = 1.0f;
3010 }
3011 for (uint32_t i = 0; i < 7; i++) {
3012 params->avx_rr2_lut4_p4.mask_table[i] = -1;
3013 }
3014 for (uint32_t i = 7; i < 14; i++) {
3015 params->avx_rr2_lut4_p4.mask_table[i] = 0;
3016 }
3017 return sizeof(params->avx_rr2_lut4_p4);
3018}
3019
3020size_t xnn_init_f32_elu_avx_rr2_p6_params(
3021 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3022 float prescale,
3023 float alpha,
3024 float beta)
3025{
3026 for (uint32_t i = 0; i < 8; i++) {
3027 params->avx_rr2_p6.prescale[i] = prescale;
3028 params->avx_rr2_p6.alpha[i] = alpha;
3029 params->avx_rr2_p6.beta[i] = beta;
3030 params->avx_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
3031 params->avx_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
3032 params->avx_rr2_p6.log2e[i] = 0x1.715476p+0f;
3033 params->avx_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
3034 params->avx_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
3035 params->avx_rr2_p6.c6[i] = 0x1.6b7338p-10f;
3036 params->avx_rr2_p6.c5[i] = 0x1.12278Ep-7f;
3037 params->avx_rr2_p6.c4[i] = 0x1.555716p-5f;
3038 params->avx_rr2_p6.c3[i] = 0x1.5554B0p-3f;
3039 params->avx_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
3040 params->avx_rr2_p6.one[i] = 1.0f;
3041 }
3042 for (uint32_t i = 0; i < 7; i++) {
3043 params->avx_rr2_p6.mask_table[i] = -1;
3044 }
3045 for (uint32_t i = 7; i < 14; i++) {
3046 params->avx_rr2_p6.mask_table[i] = 0;
3047 }
3048 return sizeof(params->avx_rr2_p6);
3049}
3050
3051size_t xnn_init_f32_elu_avx2_rr1_lut16_p3_params(
3052 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3053 float prescale,
3054 float alpha,
3055 float beta)
3056{
3057 for (uint32_t i = 0; i < 8; i++) {
3058 params->avx2_rr1_lut16_p3.prescale[i] = prescale;
3059 params->avx2_rr1_lut16_p3.alpha[i] = alpha;
3060 params->avx2_rr1_lut16_p3.beta[i] = beta;
3061 params->avx2_rr1_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
3062 params->avx2_rr1_lut16_p3.magic_bias[i] = 0x1.800000p19f;
3063 params->avx2_rr1_lut16_p3.log2e[i] = 0x1.715476p+0f;
3064 params->avx2_rr1_lut16_p3.index_mask[i] = UINT32_C(0xF);
3065 params->avx2_rr1_lut16_p3.minus_ln2[i] = -0x1.62E430p-1f;
3066 params->avx2_rr1_lut16_p3.c3[i] = 0x1.55561Cp-3f;
3067 params->avx2_rr1_lut16_p3.c2[i] = 0x1.0001ECp-1f;
3068 }
3069 for (uint32_t i = 0; i < 7; i++) {
3070 params->avx2_rr1_lut16_p3.mask_table[i] = -1;
3071 }
3072 for (uint32_t i = 7; i < 14; i++) {
3073 params->avx2_rr1_lut16_p3.mask_table[i] = 0;
3074 }
3075 return sizeof(params->avx2_rr1_lut16_p3);
3076}
3077
3078size_t xnn_init_f32_elu_avx2_rr1_lut8_p4_params(
3079 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3080 float prescale,
3081 float alpha,
3082 float beta)
3083{
3084 for (uint32_t i = 0; i < 8; i++) {
3085 params->avx2_rr1_lut8_p4.prescale[i] = prescale;
3086 params->avx2_rr1_lut8_p4.alpha[i] = alpha;
3087 params->avx2_rr1_lut8_p4.beta[i] = beta;
3088 params->avx2_rr1_lut8_p4.sat_cutoff[i] = -0x1.154246p+4f;
3089 params->avx2_rr1_lut8_p4.magic_bias[i] = 0x1.800000p20f;
3090 params->avx2_rr1_lut8_p4.log2e[i] = 0x1.715476p+0f;
3091 }
3092 params->avx2_rr1_lut8_p4.table[0] = UINT32_C(0x3F800000);
3093 params->avx2_rr1_lut8_p4.table[1] = UINT32_C(0x3F7B95C2);
3094 params->avx2_rr1_lut8_p4.table[2] = UINT32_C(0x3F7837F0);
3095 params->avx2_rr1_lut8_p4.table[3] = UINT32_C(0x3F75FED7);
3096 params->avx2_rr1_lut8_p4.table[4] = UINT32_C(0x3F7504F3);
3097 params->avx2_rr1_lut8_p4.table[5] = UINT32_C(0x3F75672A);
3098 params->avx2_rr1_lut8_p4.table[6] = UINT32_C(0x3F7744FD);
3099 params->avx2_rr1_lut8_p4.table[7] = UINT32_C(0x3F7AC0C7);
3100 for (uint32_t i = 0; i < 8; i++) {
3101 params->avx2_rr1_lut8_p4.minus_ln2[i] = -0x1.62E430p-1f;
3102 params->avx2_rr1_lut8_p4.c4[i] = 0x1.5558ECp-5f;
3103 params->avx2_rr1_lut8_p4.c3[i] = 0x1.555C20p-3f;
3104 params->avx2_rr1_lut8_p4.c2[i] = 0x1.000000p-1f;
3105 }
3106 for (uint32_t i = 0; i < 7; i++) {
3107 params->avx2_rr1_lut8_p4.mask_table[i] = -1;
3108 }
3109 for (uint32_t i = 7; i < 14; i++) {
3110 params->avx2_rr1_lut8_p4.mask_table[i] = 0;
3111 }
3112 return sizeof(params->avx2_rr1_lut8_p4);
3113}
3114
3115size_t xnn_init_f32_elu_avx2_rr1_lut4_p4_params(
3116 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3117 float prescale,
3118 float alpha,
3119 float beta)
3120{
3121 for (uint32_t i = 0; i < 8; i++) {
3122 params->avx2_rr1_lut4_p4.prescale[i] = prescale;
3123 params->avx2_rr1_lut4_p4.alpha[i] = alpha;
3124 params->avx2_rr1_lut4_p4.beta[i] = beta;
3125 params->avx2_rr1_lut4_p4.sat_cutoff[i] = -0x1.154246p+4f;
3126 params->avx2_rr1_lut4_p4.magic_bias[i] = 0x1.800000p21f;
3127 params->avx2_rr1_lut4_p4.log2e[i] = 0x1.715476p+0f;
3128 }
3129 params->avx2_rr1_lut4_p4.table[0] = 0x1.000000p+0f;
3130 params->avx2_rr1_lut4_p4.table[1] = 0x1.F06FE0p-1f;
3131 params->avx2_rr1_lut4_p4.table[2] = 0x1.EA09E6p-1f;
3132 params->avx2_rr1_lut4_p4.table[3] = 0x1.EE89FAp-1f;
3133 params->avx2_rr1_lut4_p4.table[4] = 0x1.000000p+0f;
3134 params->avx2_rr1_lut4_p4.table[5] = 0x1.F06FE0p-1f;
3135 params->avx2_rr1_lut4_p4.table[6] = 0x1.EA09E6p-1f;
3136 params->avx2_rr1_lut4_p4.table[7] = 0x1.EE89FAp-1f;
3137 for (uint32_t i = 0; i < 8; i++) {
3138 params->avx2_rr1_lut4_p4.minus_ln2[i] = -0x1.62E430p-1f;
3139 params->avx2_rr1_lut4_p4.c4[i] = 0x1.554F9Ap-5f;
3140 params->avx2_rr1_lut4_p4.c3[i] = 0x1.557082p-3f;
3141 params->avx2_rr1_lut4_p4.c2[i] = 0x1.000002p-1f;
3142 }
3143 for (uint32_t i = 0; i < 7; i++) {
3144 params->avx2_rr1_lut4_p4.mask_table[i] = -1;
3145 }
3146 for (uint32_t i = 7; i < 14; i++) {
3147 params->avx2_rr1_lut4_p4.mask_table[i] = 0;
3148 }
3149 return sizeof(params->avx2_rr1_lut4_p4);
3150}
3151
3152size_t xnn_init_f32_elu_avx2_rr1_p6_params(
3153 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3154 float prescale,
3155 float alpha,
3156 float beta)
3157{
3158 for (uint32_t i = 0; i < 8; i++) {
3159 params->avx2_rr1_p6.prescale[i] = prescale;
3160 params->avx2_rr1_p6.alpha[i] = alpha;
3161 params->avx2_rr1_p6.beta[i] = beta;
3162 params->avx2_rr1_p6.sat_cutoff[i] = -0x1.154246p+4f;
3163 params->avx2_rr1_p6.magic_bias[i] = 0x1.8000FEp23f;
3164 params->avx2_rr1_p6.log2e[i] = 0x1.715476p+0f;
3165 params->avx2_rr1_p6.minus_ln2[i] = -0x1.62E430p-1f;
3166 params->avx2_rr1_p6.c6[i] = 0x1.6B7338p-10f;
3167 params->avx2_rr1_p6.c5[i] = 0x1.12278Ep-7f;
3168 params->avx2_rr1_p6.c4[i] = 0x1.555716p-5f;
3169 params->avx2_rr1_p6.c3[i] = 0x1.5554B0p-3f;
3170 params->avx2_rr1_p6.c2[i] = 0x1.FFFFFEp-2f;
3171 }
3172 for (uint32_t i = 0; i < 7; i++) {
3173 params->avx2_rr1_p6.mask_table[i] = -1;
3174 }
3175 for (uint32_t i = 7; i < 14; i++) {
3176 params->avx2_rr1_p6.mask_table[i] = 0;
3177 }
3178 return sizeof(params->avx2_rr1_p6);
3179}
3180
3181size_t xnn_init_f32_elu_avx512_rr1_lut16_p3_params(
3182 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3183 float prescale,
3184 float alpha,
3185 float beta)
3186{
3187 params->avx512_rr1_lut16_p3.prescale = prescale;
3188 params->avx512_rr1_lut16_p3.alpha = alpha;
3189 params->avx512_rr1_lut16_p3.beta = beta;
3190 params->avx512_rr1_lut16_p3.sat_cutoff = -0x1.154246p+4f;
3191 params->avx512_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
3192 params->avx512_rr1_lut16_p3.log2e = 0x1.715476p+0f;
3193 params->avx512_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
3194 params->avx512_rr1_lut16_p3.c3 = 0x1.55561Cp-3f;
3195 params->avx512_rr1_lut16_p3.c2 = 0x1.0001ECp-1f;
3196 params->avx512_rr1_lut16_p3.table[ 0] = UINT32_C(0x3F800000);
3197 params->avx512_rr1_lut16_p3.table[ 1] = UINT32_C(0x3F7DAAC3);
3198 params->avx512_rr1_lut16_p3.table[ 2] = UINT32_C(0x3F7B95C2);
3199 params->avx512_rr1_lut16_p3.table[ 3] = UINT32_C(0x3F79C3D3);
3200 params->avx512_rr1_lut16_p3.table[ 4] = UINT32_C(0x3F7837F0);
3201 params->avx512_rr1_lut16_p3.table[ 5] = UINT32_C(0x3F76F532);
3202 params->avx512_rr1_lut16_p3.table[ 6] = UINT32_C(0x3F75FED7);
3203 params->avx512_rr1_lut16_p3.table[ 7] = UINT32_C(0x3F75583F);
3204 params->avx512_rr1_lut16_p3.table[ 8] = UINT32_C(0x3F7504F3);
3205 params->avx512_rr1_lut16_p3.table[ 9] = UINT32_C(0x3F7508A4);
3206 params->avx512_rr1_lut16_p3.table[10] = UINT32_C(0x3F75672A);
3207 params->avx512_rr1_lut16_p3.table[11] = UINT32_C(0x3F76248C);
3208 params->avx512_rr1_lut16_p3.table[12] = UINT32_C(0x3F7744FD);
3209 params->avx512_rr1_lut16_p3.table[13] = UINT32_C(0x3F78CCDF);
3210 params->avx512_rr1_lut16_p3.table[14] = UINT32_C(0x3F7AC0C7);
3211 params->avx512_rr1_lut16_p3.table[15] = UINT32_C(0x3F7D257D);
3212 return sizeof(params->avx512_rr1_lut16_p3);
3213}
3214
3215size_t xnn_init_f32_elu_avx512_rr1_p6_params(
3216 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3217 float prescale,
3218 float alpha,
3219 float beta)
3220{
3221 params->avx512_rr1_p6.prescale = prescale;
3222 params->avx512_rr1_p6.alpha = alpha;
3223 params->avx512_rr1_p6.beta = beta;
3224 params->avx512_rr1_p6.sat_cutoff = -0x1.154246p+4f;
3225 params->avx512_rr1_p6.magic_bias = 0x1.8000FEp23f;
3226 params->avx512_rr1_p6.log2e = 0x1.715476p+0f;
3227 params->avx512_rr1_p6.minus_ln2 = -0x1.62E430p-1f;
3228 params->avx512_rr1_p6.c6 = 0x1.6B7338p-10f;
3229 params->avx512_rr1_p6.c5 = 0x1.12278Ep-7f;
3230 params->avx512_rr1_p6.c4 = 0x1.555716p-5f;
3231 params->avx512_rr1_p6.c3 = 0x1.5554B0p-3f;
3232 params->avx512_rr1_p6.c2 = 0x1.FFFFFEp-2f;
3233 return sizeof(params->avx512_rr1_p6);
3234}
3235#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3236
3237#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3238size_t xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params(
3239 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3240 float prescale,
3241 float alpha,
3242 float beta)
3243{
3244 for (uint32_t i = 0; i < 2; i++) {
3245 params->wasmsimd_rr2_lut16_p3.prescale[i] = prescale;
3246 params->wasmsimd_rr2_lut16_p3.alpha[i] = alpha;
3247 params->wasmsimd_rr2_lut16_p3.beta[i] = beta;
3248 params->wasmsimd_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
3249 params->wasmsimd_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
3250 params->wasmsimd_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
3251 params->wasmsimd_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
3252 params->wasmsimd_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
3253 params->wasmsimd_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3254 params->wasmsimd_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
3255 params->wasmsimd_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
3256 params->wasmsimd_rr2_lut16_p3.one[i] = 1.0f;
3257 }
3258 return sizeof(params->wasmsimd_rr2_lut16_p3);
3259}
3260
3261size_t xnn_init_f32_elu_wasmsimd_rr2_p6_params(
3262 union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3263 float prescale,
3264 float alpha,
3265 float beta)
3266{
3267 for (uint32_t i = 0; i < 2; i++) {
3268 params->wasmsimd_rr2_p6.prescale[i] = prescale;
3269 params->wasmsimd_rr2_p6.alpha[i] = alpha;
3270 params->wasmsimd_rr2_p6.beta[i] = beta;
3271 params->wasmsimd_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
3272 params->wasmsimd_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
3273 params->wasmsimd_rr2_p6.log2e[i] = 0x1.715476p+0f;
3274 params->wasmsimd_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
3275 params->wasmsimd_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
3276 params->wasmsimd_rr2_p6.c6[i] = 0x1.6b7338p-10f;
3277 params->wasmsimd_rr2_p6.c5[i] = 0x1.12278Ep-7f;
3278 params->wasmsimd_rr2_p6.c4[i] = 0x1.555716p-5f;
3279 params->wasmsimd_rr2_p6.c3[i] = 0x1.5554B0p-3f;
3280 params->wasmsimd_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
3281 params->wasmsimd_rr2_p6.one[i] = 1.0f;
3282 }
3283 return sizeof(params->wasmsimd_rr2_p6);
3284}
3285#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3286
3287#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3288size_t xnn_init_f16_expminus_fp16arith_rr2_p2_params(
3289 union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS(1)])
3290{
3291 params->fp16arith_rr2_p2.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
3292 params->fp16arith_rr2_p2.log2e = UINT16_C(0x3DC5); // 0x1.714p+0h
3293 params->fp16arith_rr2_p2.minus_ln2_hi = UINT16_C(0xB98C); // -0x1.630p-1h
3294 params->fp16arith_rr2_p2.minus_ln2_lo = UINT16_C(0x0AF4); // 0x1.BD0p-13h
3295 params->fp16arith_rr2_p2.c2 = UINT16_C(0x37F9); // 0x1.FE4p-2h
3296 params->fp16arith_rr2_p2.c1 = UINT16_C(0x3C0E); // 0x1.038p+0h
3297 params->fp16arith_rr2_p2.denorm_cutoff = UINT16_C(0xC8DA); // -0x1.368p+3h
3298 return sizeof(params->fp16arith_rr2_p2);
3299}
3300#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3301
3302#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3303size_t xnn_init_f16_expminus_avx2_rr1_p2_params(
3304 union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS(1)])
3305{
3306 for (uint32_t i = 0; i < 8; i++) {
3307 params->avx2_rr1_p2.magic_bias[i] = 0x1.8000FEp23f;
3308 params->avx2_rr1_p2.log2e[i] = 0x1.715476p0f;
3309 params->avx2_rr1_p2.minus_ln2[i] = -0x1.62E43p-1f;
3310 params->avx2_rr1_p2.c2[i] = 0x1.FF3A32p-2f;
3311 params->avx2_rr1_p2.c1[i] = 0x1.039E10p+0f;
3312 params->avx2_rr1_p2.denorm_cutoff[i] = -0x1.368000p+3f;
3313 }
3314 return sizeof(params->avx2_rr1_p2);
3315}
3316#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3317
3318size_t xnn_init_f32_expminus_scalar_rr2_p5_params(
3319 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3320{
3321 params->scalar_rr2_p5.log2e = 0x1.715476p+0f;
3322 params->scalar_rr2_p5.magic_bias = 0x1.8000FEp23f;
3323 params->scalar_rr2_p5.minus_ln2_hi = -0x1.62E400p-1f;
3324 params->scalar_rr2_p5.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3325 params->scalar_rr2_p5.c5 = 0x1.0F9F9Cp-7f;
3326 params->scalar_rr2_p5.c4 = 0x1.573A1Ap-5f;
3327 params->scalar_rr2_p5.c3 = 0x1.555A80p-3f;
3328 params->scalar_rr2_p5.c2 = 0x1.FFFDC6p-2f;
3329 params->scalar_rr2_p5.c1 = 0x1.FFFFF6p-1f;
3330 params->scalar_rr2_p5.denorm_cutoff = -0x1.5D589Ep6f;
3331 return sizeof(params->scalar_rr2_p5);
3332}
3333
3334size_t xnn_init_f32_expminus_scalar_rr2_lut64_p2_params(
3335 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3336{
3337 params->scalar_rr2_lut64_p2.log2e = 0x1.715476p0f;
3338 params->scalar_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
3339 params->scalar_rr2_lut64_p2.minus_ln2_hi = -0x1.630000p-1f;
3340 params->scalar_rr2_lut64_p2.minus_ln2_lo = 0x1.BD0106p-13f;
3341 params->scalar_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3342 params->scalar_rr2_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3343 return sizeof(params->scalar_rr2_lut64_p2);
3344}
3345
3346#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3347size_t xnn_init_f32_expminus_neon_rr2_p5_params(
3348 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3349{
3350 params->neon_rr2_p5.log2e = 0x1.715476p+0f;
3351 params->neon_rr2_p5.magic_bias = 0x1.8000FEp23f;
3352 params->neon_rr2_p5.minus_ln2_hi = -0x1.62E400p-1f;
3353 params->neon_rr2_p5.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3354 params->neon_rr2_p5.c5 = 0x1.0F9F9Cp-7f;
3355 params->neon_rr2_p5.c4 = 0x1.573A1Ap-5f;
3356 params->neon_rr2_p5.c3 = 0x1.555A80p-3f;
3357 params->neon_rr2_p5.c2 = 0x1.FFFDC6p-2f;
3358 params->neon_rr2_p5.c1 = 0x1.FFFFF6p-1f;
3359 params->neon_rr2_p5.denorm_cutoff = -0x1.5D589Ep6f;
3360 return sizeof(params->neon_rr2_p5);
3361}
3362
3363size_t xnn_init_f32_expminus_neon_rr2_lut64_p2_params(
3364 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3365{
3366 params->neon_rr2_lut64_p2.log2e = 0x1.715476p+0f;
3367 params->neon_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
3368 params->neon_rr2_lut64_p2.minus_ln2_hi = -0x1.62E400p-1f;
3369 params->neon_rr2_lut64_p2.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3370 params->neon_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3371 params->neon_rr2_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3372 return sizeof(params->neon_rr2_lut64_p2);
3373}
3374
3375size_t xnn_init_f32_expminus_neonfma_rr1_p5_params(
3376 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3377{
3378 params->neonfma_rr1_p5.log2e = 0x1.715476p+0f;
3379 params->neonfma_rr1_p5.magic_bias = 0x1.8000FEp23f;
3380 params->neonfma_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
3381 params->neonfma_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
3382 params->neonfma_rr1_p5.c4 = 0x1.573A1Ap-5f;
3383 params->neonfma_rr1_p5.c3 = 0x1.555A80p-3f;
3384 params->neonfma_rr1_p5.c2 = 0x1.FFFDC6p-2f;
3385 params->neonfma_rr1_p5.c1 = 0x1.FFFFF6p-1f;
3386 params->neonfma_rr1_p5.denorm_cutoff = -0x1.5D589Ep6f;
3387 return sizeof(params->neonfma_rr1_p5);
3388}
3389
3390size_t xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params(
3391 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3392{
3393 params->neonfma_rr1_lut64_p2.log2e = 0x1.715476p+0f;
3394 params->neonfma_rr1_lut64_p2.magic_bias = 0x1.800000p17f;
3395 params->neonfma_rr1_lut64_p2.minus_ln2 = -0x1.62E430p-1f;
3396 params->neonfma_rr1_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3397 params->neonfma_rr1_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3398 return sizeof(params->neonfma_rr1_lut64_p2);
3399}
3400#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3401
3402#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3403size_t xnn_init_f32_expminus_sse2_rr2_p5_params(
3404 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3405{
3406 for (uint32_t i = 0; i < 4; i++) {
3407 params->sse2_rr2_p5.log2e[i] = 0x1.715476p+0f;
3408 params->sse2_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
3409 params->sse2_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
3410 params->sse2_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3411 params->sse2_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
3412 params->sse2_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
3413 params->sse2_rr2_p5.c3[i] = 0x1.555A80p-3f;
3414 params->sse2_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
3415 params->sse2_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
3416 params->sse2_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3417 }
3418 return sizeof(params->sse2_rr2_p5);
3419}
3420
3421size_t xnn_init_f32_expminus_avx2_rr1_p5_params(
3422 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3423{
3424 for (uint32_t i = 0; i < 8; i++) {
3425 params->avx2_rr1_p5.log2e[i] = 0x1.715476p+0f;
3426 params->avx2_rr1_p5.magic_bias[i] = 0x1.8000FEp23f;
3427 params->avx2_rr1_p5.minus_ln2[i] = -0x1.62E430p-1f;
3428 params->avx2_rr1_p5.c5[i] = 0x1.0F9F9Cp-7f;
3429 params->avx2_rr1_p5.c4[i] = 0x1.573A1Ap-5f;
3430 params->avx2_rr1_p5.c3[i] = 0x1.555A80p-3f;
3431 params->avx2_rr1_p5.c2[i] = 0x1.FFFDC6p-2f;
3432 params->avx2_rr1_p5.c1[i] = 0x1.FFFFF6p-1f;
3433 params->avx2_rr1_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3434 }
3435 for (uint32_t i = 0; i < 7; i++) {
3436 params->avx2_rr1_p5.mask_table[i] = -1;
3437 }
3438 for (uint32_t i = 7; i < 14; i++) {
3439 params->avx2_rr1_p5.mask_table[i] = 0;
3440 }
3441 return sizeof(params->avx2_rr1_p5);
3442}
3443
3444size_t xnn_init_f32_expminus_avx512_rr1_p5_params(
3445 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3446{
3447 params->avx512_rr1_p5.log2e = 0x1.715476p+0f;
3448 params->avx512_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
3449 params->avx512_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
3450 params->avx512_rr1_p5.c4 = 0x1.573A1Ap-5f;
3451 params->avx512_rr1_p5.c3 = 0x1.555A80p-3f;
3452 params->avx512_rr1_p5.c2 = 0x1.FFFDC6p-2f;
3453 params->avx512_rr1_p5.c1 = 0x1.FFFFF6p-1f;
3454 params->avx512_rr1_p5.c0 = 1.0f;
3455 return sizeof(params->avx512_rr1_p5);
3456}
3457#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3458
3459#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3460size_t xnn_init_f32_expminus_wasmsimd_rr2_p5_params(
3461 union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3462{
3463 for (uint32_t i = 0; i < 2; i++) {
3464 params->wasmsimd_rr2_p5.log2e[i] = 0x1.715476p+0f;
3465 params->wasmsimd_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
3466 params->wasmsimd_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
3467 params->wasmsimd_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3468 params->wasmsimd_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
3469 params->wasmsimd_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
3470 params->wasmsimd_rr2_p5.c3[i] = 0x1.555A80p-3f;
3471 params->wasmsimd_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
3472 params->wasmsimd_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
3473 params->wasmsimd_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3474 }
3475 return sizeof(params->wasmsimd_rr2_p5);
3476}
3477#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3478
3479#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3480size_t xnn_init_f16_lrelu_fp16arith_params(
3481 union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3482 uint16_t slope)
3483{
3484 params->fp16arith.slope = slope;
3485 return sizeof(params->fp16arith);
3486}
3487#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3488
3489#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3490size_t xnn_init_f16_lrelu_avx_params(
3491 union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3492 uint16_t slope)
3493{
3494 for (uint32_t i = 0; i < 8; i++) {
3495 params->avx.slope[i] = fp16_ieee_to_fp32_value(slope);
3496 }
3497 return sizeof(params->avx);
3498}
3499#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3500
3501size_t xnn_init_f32_lrelu_scalar_params(
3502 union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3503 float slope)
3504{
3505 params->scalar.slope = slope;
3506 return sizeof(params->scalar);
3507}
3508
3509#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3510size_t xnn_init_f32_lrelu_sse_params(
3511 union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3512 float slope)
3513{
3514 for (uint32_t i = 0; i < 4; i++) {
3515 params->sse.slope[i] = slope;
3516 }
3517 return sizeof(params->sse);
3518}
3519
3520size_t xnn_init_f32_lrelu_avx_params(
3521 union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3522 float slope)
3523{
3524 for (uint32_t i = 0; i < 8; i++) {
3525 params->avx.slope[i] = slope;
3526 }
3527 for (uint32_t i = 0; i < 7; i++) {
3528 params->avx.mask_table[i] = -1;
3529 }
3530 for (uint32_t i = 7; i < 14; i++) {
3531 params->avx.mask_table[i] = 0;
3532 }
3533 return sizeof(params->avx);
3534}
3535#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3536
3537#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3538size_t xnn_init_f32_lrelu_wasmsimd_params(
3539 union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3540 float slope)
3541{
3542 params->wasmsimd.slope[0] = slope;
3543 params->wasmsimd.slope[1] = slope;
3544 return sizeof(params->wasmsimd);
3545}
3546#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3547
3548size_t xnn_init_qs8_lrelu_scalar_select_params(
3549 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3550 float positive_scale,
3551 float negative_scale,
3552 int8_t input_zero_point,
3553 int8_t output_zero_point)
3554{
3555 assert(positive_scale >= 0x1.0p-8f);
3556 assert(positive_scale <= 0x1.0p+7f);
3557 assert(negative_scale <= 0x1.0p+7f);
3558 assert(negative_scale >= -0x1.FFFC00p+6f);
3559 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3560
3561 const long positive_multiplier = lrintf(256.0f * positive_scale);
3562 assert(positive_multiplier >= 1L);
3563 assert(positive_multiplier <= 32768L);
3564 const long negative_multiplier = lrintf(256.0f * negative_scale);
3565 assert(negative_multiplier <= 32768L);
3566 assert(negative_multiplier >= -32767L);
3567 assert(negative_multiplier != 0L);
3568 params->scalar_select.input_zero_point = (int32_t) input_zero_point;
3569 params->scalar_select.positive_multiplier = (int32_t) positive_multiplier;
3570 params->scalar_select.negative_multiplier = (int32_t) negative_multiplier;
3571 params->scalar_select.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3572 return sizeof(params->scalar_select);
3573}
3574
3575size_t xnn_init_qs8_lrelu_scalar_andxor_params(
3576 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3577 float positive_scale,
3578 float negative_scale,
3579 int8_t input_zero_point,
3580 int8_t output_zero_point)
3581{
3582 assert(positive_scale >= 0x1.0p-8f);
3583 assert(positive_scale <= 0x1.0p+7f);
3584 assert(negative_scale <= 0x1.0p+7f);
3585 assert(negative_scale >= -0x1.FFFC00p+6f);
3586 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3587
3588 const long positive_multiplier = lrintf(256.0f * positive_scale);
3589 assert(positive_multiplier >= 1L);
3590 assert(positive_multiplier <= 32768L);
3591 const long negative_multiplier = lrintf(256.0f * negative_scale);
3592 assert(negative_multiplier <= 32768L);
3593 assert(negative_multiplier >= -32767L);
3594 assert(negative_multiplier != 0L);
3595 params->scalar_andxor.input_zero_point = (int32_t) input_zero_point;
3596 params->scalar_andxor.multiplier_base = (int32_t) positive_multiplier;
3597 params->scalar_andxor.multiplier_diff = (int32_t) negative_multiplier ^ (int32_t) positive_multiplier;
3598 params->scalar_andxor.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3599 return sizeof(params->scalar_andxor);
3600}
3601
3602#if XNN_ARCH_ARM
3603size_t xnn_init_qs8_lrelu_armsimd32_params(
3604 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3605 float positive_scale,
3606 float negative_scale,
3607 int8_t input_zero_point,
3608 int8_t output_zero_point)
3609{
3610 assert(positive_scale >= 0x1.0p-8f);
3611 assert(positive_scale <= 0x1.0p+7f);
3612 assert(negative_scale <= 0x1.0p+7f);
3613 assert(negative_scale >= -0x1.FFFC00p+6f);
3614 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3615
3616 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3617 assert(positive_multiplier <= -1L);
3618 assert(positive_multiplier >= -32768L);
3619 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3620 assert(negative_multiplier >= -32768L);
3621 assert(negative_multiplier <= 32767L);
3622 assert(negative_multiplier != 0L);
3623 params->armsimd32.input_zero_point = (uint32_t) (uint16_t) (int16_t) input_zero_point * UINT32_C(0x00010001);
3624 params->armsimd32.positive_multiplier = (uint32_t) (uint16_t) (int16_t) positive_multiplier * UINT32_C(0x00010001);
3625 params->armsimd32.negative_multiplier = (uint32_t) (uint16_t) (int16_t) negative_multiplier * UINT32_C(0x00010001);
3626 params->armsimd32.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3627 return sizeof(params->armsimd32);
3628}
3629#endif // XNN_ARCH_ARM
3630
3631#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3632size_t xnn_init_qs8_lrelu_neon_params(
3633 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3634 float positive_scale,
3635 float negative_scale,
3636 int8_t input_zero_point,
3637 int8_t output_zero_point)
3638{
3639 assert(positive_scale >= 0x1.0p-8f);
3640 assert(positive_scale <= 0x1.0p+7f);
3641 assert(negative_scale <= 0x1.0p+7f);
3642 assert(negative_scale >= -0x1.FFFC00p+6f);
3643 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3644
3645 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3646 assert(positive_multiplier <= -1L);
3647 assert(positive_multiplier >= -32768L);
3648 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3649 assert(negative_multiplier >= -32768L);
3650 assert(negative_multiplier <= 32767L);
3651 assert(negative_multiplier != 0L);
3652 params->neon.input_zero_point = (int16_t) input_zero_point;
3653 params->neon.positive_multiplier = (int16_t) positive_multiplier;
3654 params->neon.negative_multiplier = (int16_t) negative_multiplier;
3655 params->neon.output_zero_point = (int16_t) output_zero_point;
3656 return sizeof(params->neon);
3657}
3658#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3659
3660#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3661size_t xnn_init_qs8_lrelu_sse2_params(
3662 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3663 float positive_scale,
3664 float negative_scale,
3665 int8_t input_zero_point,
3666 int8_t output_zero_point)
3667{
3668 assert(positive_scale >= 0x1.0p-8f);
3669 assert(positive_scale <= 0x1.0p+7f);
3670 assert(negative_scale <= 0x1.0p+7f);
3671 assert(negative_scale >= -0x1.FFFC00p+6f);
3672 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3673
3674 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3675 assert(positive_multiplier <= -1L);
3676 assert(positive_multiplier >= -32768L);
3677 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3678 assert(negative_multiplier >= -32768L);
3679 assert(negative_multiplier <= 32767L);
3680 assert(negative_multiplier != 0L);
3681 const int16_t multiplier_base = (int16_t) negative_multiplier;
3682 const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3683 for (uint32_t i = 0; i < 8; i++) {
3684 params->sse2.input_zero_point[i] = (int16_t) input_zero_point;
3685 params->sse2.multiplier_diff[i] = multiplier_diff;
3686 params->sse2.multiplier_base[i] = multiplier_base;
3687 params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
3688 }
3689 return sizeof(params->sse2);
3690}
3691
3692size_t xnn_init_qs8_lrelu_avx_params(
3693 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3694 float positive_scale,
3695 float negative_scale,
3696 int8_t input_zero_point,
3697 int8_t output_zero_point)
3698{
3699 assert(positive_scale >= 0x1.0p-8f);
3700 assert(positive_scale <= 0x1.0p+7f);
3701 assert(negative_scale <= 0x1.0p+7f);
3702 assert(negative_scale >= -0x1.FFFC00p+6f);
3703 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3704
3705 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3706 assert(positive_multiplier <= -1L);
3707 assert(positive_multiplier >= -32768L);
3708 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3709 assert(negative_multiplier >= -32768L);
3710 assert(negative_multiplier <= 32767L);
3711 assert(negative_multiplier != 0L);
3712 for (uint32_t i = 0; i < 8; i++) {
3713 params->avx.input_zero_point[i] = (int16_t) input_zero_point;
3714 params->avx.positive_multiplier[i] = (int16_t) positive_multiplier;
3715 params->avx.negative_multiplier[i] = (int16_t) negative_multiplier;
3716 params->avx.output_zero_point[i] = (int16_t) output_zero_point;
3717 }
3718 return sizeof(params->avx);
3719}
3720
3721size_t xnn_init_qs8_lrelu_avx2_params(
3722 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3723 float positive_scale,
3724 float negative_scale,
3725 int8_t input_zero_point,
3726 int8_t output_zero_point)
3727{
3728 assert(positive_scale >= 0x1.0p-8f);
3729 assert(positive_scale <= 0x1.0p+7f);
3730 assert(negative_scale <= 0x1.0p+7f);
3731 assert(negative_scale >= -0x1.FFFC00p+6f);
3732 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3733
3734 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3735 assert(positive_multiplier <= -1L);
3736 assert(positive_multiplier >= -32768L);
3737 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3738 assert(negative_multiplier >= -32768L);
3739 assert(negative_multiplier <= 32767L);
3740 assert(negative_multiplier != 0L);
3741 for (uint32_t i = 0; i < 16; i++) {
3742 params->avx2.input_zero_point[i] = (int16_t) input_zero_point;
3743 params->avx2.positive_multiplier[i] = (int16_t) positive_multiplier;
3744 params->avx2.negative_multiplier[i] = (int16_t) negative_multiplier;
3745 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
3746 }
3747 return sizeof(params->avx2);
3748}
3749#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3750
3751#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3752size_t xnn_init_qs8_lrelu_wasmsimd_arm_params(
3753 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3754 float positive_scale,
3755 float negative_scale,
3756 int8_t input_zero_point,
3757 int8_t output_zero_point)
3758{
3759 assert(positive_scale >= 0x1.0p-8f);
3760 assert(positive_scale <= 0x1.0p+7f);
3761 assert(negative_scale <= 0x1.0p+7f);
3762 assert(negative_scale >= -0x1.FFFC00p+6f);
3763 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3764
3765 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3766 assert(positive_multiplier <= -1L);
3767 assert(positive_multiplier >= -32768L);
3768 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3769 assert(negative_multiplier >= -32768L);
3770 assert(negative_multiplier <= 32767L);
3771 assert(negative_multiplier != 0L);
3772 for (uint32_t i = 0; i < 4; i++) {
3773 params->wasmsimd_arm.input_zero_point[i] = (int16_t) input_zero_point;
3774 params->wasmsimd_arm.positive_multiplier[i] = (int16_t) positive_multiplier;
3775 params->wasmsimd_arm.negative_multiplier[i] = (int16_t) negative_multiplier;
3776 params->wasmsimd_arm.output_zero_point[i] = (int16_t) output_zero_point;
3777 }
3778 return sizeof(params->wasmsimd_arm);
3779}
3780
3781size_t xnn_init_qs8_lrelu_wasmsimd_x86_params(
3782 union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3783 float positive_scale,
3784 float negative_scale,
3785 int8_t input_zero_point,
3786 int8_t output_zero_point)
3787{
3788 assert(positive_scale >= 0x1.0p-8f);
3789 assert(positive_scale <= 0x1.0p+7f);
3790 assert(negative_scale <= 0x1.0p+7f);
3791 assert(negative_scale >= -0x1.FFFC00p+6f);
3792 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3793
3794 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3795 assert(positive_multiplier <= -1L);
3796 assert(positive_multiplier >= -32768L);
3797 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3798 assert(negative_multiplier >= -32768L);
3799 assert(negative_multiplier <= 32767L);
3800 assert(negative_multiplier != 0L);
3801 const int16_t multiplier_base = (int16_t) negative_multiplier;
3802 const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3803 for (uint32_t i = 0; i < 4; i++) {
3804 params->wasmsimd_x86.input_zero_point[i] = (int16_t) input_zero_point;
3805 params->wasmsimd_x86.multiplier_diff[i] = multiplier_diff;
3806 params->wasmsimd_x86.multiplier_base[i] = multiplier_base;
3807 params->wasmsimd_x86.output_zero_point[i] = (int16_t) output_zero_point;
3808 }
3809 return sizeof(params->wasmsimd_x86);
3810}
3811#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3812
3813size_t xnn_init_qu8_lrelu_scalar_select_params(
3814 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3815 float positive_scale,
3816 float negative_scale,
3817 uint8_t input_zero_point,
3818 uint8_t output_zero_point)
3819{
3820 assert(positive_scale >= 0x1.0p-8f);
3821 assert(positive_scale <= 0x1.0p+7f);
3822 assert(negative_scale <= 0x1.0p+7f);
3823 assert(negative_scale >= -0x1.FFFC00p+6f);
3824 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3825
3826 const long positive_multiplier = lrintf(256.0f * positive_scale);
3827 assert(positive_multiplier >= 1L);
3828 assert(positive_multiplier <= 32768L);
3829 const long negative_multiplier = lrintf(256.0f * negative_scale);
3830 assert(negative_multiplier <= 32768L);
3831 assert(negative_multiplier >= -32767L);
3832 assert(negative_multiplier != 0L);
3833 params->scalar_select.input_zero_point = (int32_t) input_zero_point;
3834 params->scalar_select.positive_multiplier = (int32_t) positive_multiplier;
3835 params->scalar_select.negative_multiplier = (int32_t) negative_multiplier;
3836 params->scalar_select.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3837 return sizeof(params->scalar_select);
3838}
3839
3840size_t xnn_init_qu8_lrelu_scalar_andxor_params(
3841 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3842 float positive_scale,
3843 float negative_scale,
3844 uint8_t input_zero_point,
3845 uint8_t output_zero_point)
3846{
3847 assert(positive_scale >= 0x1.0p-8f);
3848 assert(positive_scale <= 0x1.0p+7f);
3849 assert(negative_scale <= 0x1.0p+7f);
3850 assert(negative_scale >= -0x1.FFFC00p+6f);
3851 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3852
3853 const long positive_multiplier = lrintf(256.0f * positive_scale);
3854 assert(positive_multiplier >= 1L);
3855 assert(positive_multiplier <= 32768L);
3856 const long negative_multiplier = lrintf(256.0f * negative_scale);
3857 assert(negative_multiplier <= 32768L);
3858 assert(negative_multiplier >= -32767L);
3859 assert(negative_multiplier != 0L);
3860 params->scalar_andxor.input_zero_point = (int32_t) input_zero_point;
3861 params->scalar_andxor.multiplier_base = (int32_t) positive_multiplier;
3862 params->scalar_andxor.multiplier_diff = (int32_t) negative_multiplier ^ (int32_t) positive_multiplier;
3863 params->scalar_andxor.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3864 return sizeof(params->scalar_andxor);
3865}
3866
3867#if XNN_ARCH_ARM
3868size_t xnn_init_qu8_lrelu_armsimd32_params(
3869 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3870 float positive_scale,
3871 float negative_scale,
3872 uint8_t input_zero_point,
3873 uint8_t output_zero_point)
3874{
3875 assert(positive_scale >= 0x1.0p-8f);
3876 assert(positive_scale <= 0x1.0p+7f);
3877 assert(negative_scale <= 0x1.0p+7f);
3878 assert(negative_scale >= -0x1.FFFC00p+6f);
3879 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3880
3881 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3882 assert(positive_multiplier <= -1L);
3883 assert(positive_multiplier >= -32768L);
3884 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3885 assert(negative_multiplier >= -32768L);
3886 assert(negative_multiplier <= 32767L);
3887 assert(negative_multiplier != 0L);
3888 params->armsimd32.input_zero_point = (uint32_t) input_zero_point * UINT32_C(0x00010001);
3889 params->armsimd32.positive_multiplier = (uint32_t) (uint16_t) (int16_t) positive_multiplier * UINT32_C(0x00010001);
3890 params->armsimd32.negative_multiplier = (uint32_t) (uint16_t) (int16_t) negative_multiplier * UINT32_C(0x00010001);
3891 params->armsimd32.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3892 return sizeof(params->armsimd32);
3893}
3894#endif // XNN_ARCH_ARM
3895
3896#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3897size_t xnn_init_qu8_lrelu_neon_params(
3898 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3899 float positive_scale,
3900 float negative_scale,
3901 uint8_t input_zero_point,
3902 uint8_t output_zero_point)
3903{
3904 assert(positive_scale >= 0x1.0p-8f);
3905 assert(positive_scale <= 0x1.0p+7f);
3906 assert(negative_scale <= 0x1.0p+7f);
3907 assert(negative_scale >= -0x1.FFFC00p+6f);
3908 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3909
3910 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3911 assert(positive_multiplier <= -1L);
3912 assert(positive_multiplier >= -32768L);
3913 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3914 assert(negative_multiplier >= -32768L);
3915 assert(negative_multiplier <= 32767L);
3916 assert(negative_multiplier != 0L);
3917 params->neon.input_zero_point = (uint16_t) input_zero_point;
3918 params->neon.positive_multiplier = (int16_t) positive_multiplier;
3919 params->neon.negative_multiplier = (int16_t) negative_multiplier;
3920 params->neon.output_zero_point = (int16_t) output_zero_point;
3921 return sizeof(params->neon);
3922}
3923#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3924
3925#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3926size_t xnn_init_qu8_lrelu_sse2_params(
3927 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3928 float positive_scale,
3929 float negative_scale,
3930 uint8_t input_zero_point,
3931 uint8_t output_zero_point)
3932{
3933 assert(positive_scale >= 0x1.0p-8f);
3934 assert(positive_scale <= 0x1.0p+7f);
3935 assert(negative_scale <= 0x1.0p+7f);
3936 assert(negative_scale >= -0x1.FFFC00p+6f);
3937 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3938
3939 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3940 assert(positive_multiplier <= -1L);
3941 assert(positive_multiplier >= -32768L);
3942 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3943 assert(negative_multiplier >= -32768L);
3944 assert(negative_multiplier <= 32767L);
3945 assert(negative_multiplier != 0L);
3946 const int16_t multiplier_base = (int16_t) negative_multiplier;
3947 const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3948 for (uint32_t i = 0; i < 8; i++) {
3949 params->sse2.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3950 params->sse2.multiplier_diff[i] = multiplier_diff;
3951 params->sse2.multiplier_base[i] = multiplier_base;
3952 params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3953 }
3954 return sizeof(params->sse2);
3955}
3956
3957size_t xnn_init_qu8_lrelu_avx_params(
3958 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3959 float positive_scale,
3960 float negative_scale,
3961 uint8_t input_zero_point,
3962 uint8_t output_zero_point)
3963{
3964 assert(positive_scale >= 0x1.0p-8f);
3965 assert(positive_scale <= 0x1.0p+7f);
3966 assert(negative_scale <= 0x1.0p+7f);
3967 assert(negative_scale >= -0x1.FFFC00p+6f);
3968 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3969
3970 const long positive_multiplier = lrintf(-256.0f * positive_scale);
3971 assert(positive_multiplier <= -1L);
3972 assert(positive_multiplier >= -32768L);
3973 const long negative_multiplier = lrintf(-256.0f * negative_scale);
3974 assert(negative_multiplier >= -32768L);
3975 assert(negative_multiplier <= 32767L);
3976 assert(negative_multiplier != 0L);
3977 for (uint32_t i = 0; i < 8; i++) {
3978 params->avx.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3979 params->avx.positive_multiplier[i] = (int16_t) positive_multiplier;
3980 params->avx.negative_multiplier[i] = (int16_t) negative_multiplier;
3981 params->avx.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3982 }
3983 return sizeof(params->avx);
3984}
3985
3986size_t xnn_init_qu8_lrelu_avx2_params(
3987 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3988 float positive_scale,
3989 float negative_scale,
3990 uint8_t input_zero_point,
3991 uint8_t output_zero_point)
3992{
3993 assert(positive_scale >= 0x1.0p-8f);
3994 assert(positive_scale <= 0x1.0p+7f);
3995 assert(negative_scale <= 0x1.0p+7f);
3996 assert(negative_scale >= -0x1.FFFC00p+6f);
3997 assert(fabsf(negative_scale) >= 0x1.0p-8f);
3998
3999 const long positive_multiplier = lrintf(-256.0f * positive_scale);
4000 assert(positive_multiplier <= -1L);
4001 assert(positive_multiplier >= -32768L);
4002 const long negative_multiplier = lrintf(-256.0f * negative_scale);
4003 assert(negative_multiplier >= -32768L);
4004 assert(negative_multiplier <= 32767L);
4005 assert(negative_multiplier != 0L);
4006 for (uint32_t i = 0; i < 16; i++) {
4007 params->avx2.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
4008 params->avx2.positive_multiplier[i] = (int16_t) positive_multiplier;
4009 params->avx2.negative_multiplier[i] = (int16_t) negative_multiplier;
4010 params->avx2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4011 }
4012 return sizeof(params->avx2);
4013}
4014#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4015
4016#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4017size_t xnn_init_qu8_lrelu_wasmsimd_arm_params(
4018 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
4019 float positive_scale,
4020 float negative_scale,
4021 uint8_t input_zero_point,
4022 uint8_t output_zero_point)
4023{
4024 assert(positive_scale >= 0x1.0p-8f);
4025 assert(positive_scale <= 0x1.0p+7f);
4026 assert(negative_scale <= 0x1.0p+7f);
4027 assert(negative_scale >= -0x1.FFFC00p+6f);
4028 assert(fabsf(negative_scale) >= 0x1.0p-8f);
4029
4030 const long positive_multiplier = lrintf(-256.0f * positive_scale);
4031 assert(positive_multiplier <= -1L);
4032 assert(positive_multiplier >= -32768L);
4033 const long negative_multiplier = lrintf(-256.0f * negative_scale);
4034 assert(negative_multiplier >= -32768L);
4035 assert(negative_multiplier <= 32767L);
4036 assert(negative_multiplier != 0L);
4037 for (uint32_t i = 0; i < 4; i++) {
4038 params->wasmsimd_arm.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
4039 params->wasmsimd_arm.positive_multiplier[i] = (int16_t) positive_multiplier;
4040 params->wasmsimd_arm.negative_multiplier[i] = (int16_t) negative_multiplier;
4041 params->wasmsimd_arm.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4042 }
4043 return sizeof(params->wasmsimd_arm);
4044}
4045
4046size_t xnn_init_qu8_lrelu_wasmsimd_x86_params(
4047 union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
4048 float positive_scale,
4049 float negative_scale,
4050 uint8_t input_zero_point,
4051 uint8_t output_zero_point)
4052{
4053 assert(positive_scale >= 0x1.0p-8f);
4054 assert(positive_scale <= 0x1.0p+7f);
4055 assert(negative_scale <= 0x1.0p+7f);
4056 assert(negative_scale >= -0x1.FFFC00p+6f);
4057 assert(fabsf(negative_scale) >= 0x1.0p-8f);
4058
4059 const long positive_multiplier = lrintf(-256.0f * positive_scale);
4060 assert(positive_multiplier <= -1L);
4061 assert(positive_multiplier >= -32768L);
4062 const long negative_multiplier = lrintf(-256.0f * negative_scale);
4063 assert(negative_multiplier >= -32768L);
4064 assert(negative_multiplier <= 32767L);
4065 assert(negative_multiplier != 0L);
4066 const int16_t multiplier_base = (int16_t) negative_multiplier;
4067 const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
4068 for (uint32_t i = 0; i < 4; i++) {
4069 params->wasmsimd_x86.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
4070 params->wasmsimd_x86.multiplier_diff[i] = multiplier_diff;
4071 params->wasmsimd_x86.multiplier_base[i] = multiplier_base;
4072 params->wasmsimd_x86.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4073 }
4074 return sizeof(params->wasmsimd_x86);
4075}
4076#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4077
4078#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4079size_t xnn_init_f32_sqrt_avx_params(
4080 union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4081{
4082 for (uint32_t i = 0; i < 7; i++) {
4083 params->avx.mask_table[i] = -1;
4084 }
4085 for (uint32_t i = 7; i < 14; i++) {
4086 params->avx.mask_table[i] = 0;
4087 }
4088 return sizeof(params->avx);
4089}
4090
4091size_t xnn_init_f32_sqrt_fma_params(
4092 union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4093{
4094 for (uint32_t i = 0; i < 8; i++) {
4095 params->fma.half[i] = 0.5f;
4096 }
4097 for (uint32_t i = 0; i < 7; i++) {
4098 params->fma.mask_table[i] = -1;
4099 }
4100 for (uint32_t i = 7; i < 14; i++) {
4101 params->fma.mask_table[i] = 0;
4102 }
4103 return sizeof(params->fma);
4104}
4105
4106size_t xnn_init_f32_sqrt_avx512_params(
4107 union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4108{
4109 params->avx512.half = 0.5f;
4110 return sizeof(params->avx512);
4111}
4112#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4113
4114size_t xnn_init_f32_chw_params(
4115 union xnn_f32_chw_params params[XNN_MIN_ELEMENTS(1)],
4116 uint32_t width,
4117 float output_min,
4118 float output_max)
4119{
4120 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4121 for (uint32_t i = 0; i < 4; i++) {
4122 params->sse.min[i] = output_min;
4123 params->sse.max[i] = output_max;
4124 }
4125
4126 const uint32_t w4 = (width - 1) & 3;
4127 params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
4128 params->sse.mask[1] = -(uint32_t) (w4 >= 1);
4129 params->sse.mask[2] = -(uint32_t) (w4 >= 2);
4130 params->sse.mask[3] = -(uint32_t) (w4 >= 3);
4131
4132 const uint32_t w8 = (width - 1) & 7;
4133 params->sse.mask_even[0] = UINT32_C(0xFFFFFFFF);
4134 params->sse.mask_even[1] = -(uint32_t) (w8 >= 2);
4135 params->sse.mask_even[2] = -(uint32_t) (w8 >= 4);
4136 params->sse.mask_even[3] = -(uint32_t) (w8 >= 6);
4137 params->sse.mask_odd[0] = -(uint32_t) (w8 >= 1);
4138 params->sse.mask_odd[1] = -(uint32_t) (w8 >= 3);
4139 params->sse.mask_odd[2] = -(uint32_t) (w8 >= 5);
4140 params->sse.mask_odd[3] = -(uint32_t) (w8 >= 7);
4141 return sizeof(params->sse);
4142 #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4143 params->neon.min = output_min;
4144 params->neon.max = output_max;
4145
4146 const uint32_t w4 = (width - 1) & 3;
4147 params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
4148 params->neon.mask[1] = -(uint32_t) (w4 >= 1);
4149 params->neon.mask[2] = -(uint32_t) (w4 >= 2);
4150 params->neon.mask[3] = -(uint32_t) (w4 >= 3);
4151
4152 const uint32_t w8 = (width - 1) & 7;
4153 params->neon.mask_even[0] = UINT32_C(0xFFFFFFFF);
4154 params->neon.mask_even[1] = -(uint32_t) (w8 >= 2);
4155 params->neon.mask_even[2] = -(uint32_t) (w8 >= 4);
4156 params->neon.mask_even[3] = -(uint32_t) (w8 >= 6);
4157 params->neon.mask_odd[0] = -(uint32_t) (w8 >= 1);
4158 params->neon.mask_odd[1] = -(uint32_t) (w8 >= 3);
4159 params->neon.mask_odd[2] = -(uint32_t) (w8 >= 5);
4160 params->neon.mask_odd[3] = -(uint32_t) (w8 >= 7);
4161 return sizeof(params->neon);
4162 #else
4163 params->scalar.min = output_min;
4164 params->scalar.max = output_max;
4165
4166 const uint32_t w4 = (width - 1) & 3;
4167 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4168 params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4169 params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4170 params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4171
4172 const uint32_t w8 = (width - 1) & 7;
4173 params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4174 params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4175 params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4176 params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4177 params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4178 params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4179 params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4180 params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4181 return sizeof(params->scalar);
4182 #endif
4183}
4184
4185size_t xnn_init_f16_chw_params(
4186 union xnn_f16_chw_params params[XNN_MIN_ELEMENTS(1)],
4187 uint32_t width,
4188 uint16_t output_min,
4189 uint16_t output_max)
4190{
4191 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
4192 params->neonfp16arith.min = output_min;
4193 params->neonfp16arith.max = output_max;
4194
4195 const uint32_t w4 = (width - 1) & 3;
4196 params->neonfp16arith.maskx4[0] = UINT16_C(0xFFFF);
4197 params->neonfp16arith.maskx4[1] = -(uint16_t) (w4 >= 1);
4198 params->neonfp16arith.maskx4[2] = -(uint16_t) (w4 >= 2);
4199 params->neonfp16arith.maskx4[3] = -(uint16_t) (w4 >= 3);
4200
4201 const uint32_t w8 = (width - 1) & 7;
4202 params->neonfp16arith.maskx8[0] = UINT16_C(0xFFFF);
4203 params->neonfp16arith.maskx8[1] = -(uint16_t) (w8 >= 1);
4204 params->neonfp16arith.maskx8[2] = -(uint16_t) (w8 >= 2);
4205 params->neonfp16arith.maskx8[3] = -(uint16_t) (w8 >= 3);
4206 params->neonfp16arith.maskx8[4] = -(uint16_t) (w8 >= 4);
4207 params->neonfp16arith.maskx8[5] = -(uint16_t) (w8 >= 5);
4208 params->neonfp16arith.maskx8[6] = -(uint16_t) (w8 >= 6);
4209 params->neonfp16arith.maskx8[7] = -(uint16_t) (w8 >= 7);
4210
4211 params->neonfp16arith.maskx4_even[0] = UINT16_C(0xFFFF);
4212 params->neonfp16arith.maskx4_even[1] = -(uint16_t) (w8 >= 2);
4213 params->neonfp16arith.maskx4_even[2] = -(uint16_t) (w8 >= 4);
4214 params->neonfp16arith.maskx4_even[3] = -(uint16_t) (w8 >= 6);
4215 params->neonfp16arith.maskx4_odd[0] = -(uint16_t) (w8 >= 1);
4216 params->neonfp16arith.maskx4_odd[1] = -(uint16_t) (w8 >= 3);
4217 params->neonfp16arith.maskx4_odd[2] = -(uint16_t) (w8 >= 5);
4218 params->neonfp16arith.maskx4_odd[3] = -(uint16_t) (w8 >= 7);
4219 return sizeof(params->neonfp16arith);
4220 #else
4221 return 0;
4222 #endif
4223}
4224
4225void xnn_update_f32_chw_params(
4226 union xnn_f32_chw_params* params,
4227 uint32_t width)
4228{
4229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4230 const uint32_t w4 = (width - 1) & 3;
4231 params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
4232 params->sse.mask[1] = -(uint32_t) (w4 >= 1);
4233 params->sse.mask[2] = -(uint32_t) (w4 >= 2);
4234 params->sse.mask[3] = -(uint32_t) (w4 >= 3);
4235
4236 const uint32_t w8 = (width - 1) & 7;
4237 params->sse.mask_even[0] = UINT32_C(0xFFFFFFFF);
4238 params->sse.mask_even[1] = -(uint32_t) (w8 >= 2);
4239 params->sse.mask_even[2] = -(uint32_t) (w8 >= 4);
4240 params->sse.mask_even[3] = -(uint32_t) (w8 >= 6);
4241 params->sse.mask_odd[0] = -(uint32_t) (w8 >= 1);
4242 params->sse.mask_odd[1] = -(uint32_t) (w8 >= 3);
4243 params->sse.mask_odd[2] = -(uint32_t) (w8 >= 5);
4244 params->sse.mask_odd[3] = -(uint32_t) (w8 >= 7);
4245 #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4246 const uint32_t w4 = (width - 1) & 3;
4247 params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
4248 params->neon.mask[1] = -(uint32_t) (w4 >= 1);
4249 params->neon.mask[2] = -(uint32_t) (w4 >= 2);
4250 params->neon.mask[3] = -(uint32_t) (w4 >= 3);
4251
4252 const uint32_t w8 = (width - 1) & 7;
4253 params->neon.mask_even[0] = UINT32_C(0xFFFFFFFF);
4254 params->neon.mask_even[1] = -(uint32_t) (w8 >= 2);
4255 params->neon.mask_even[2] = -(uint32_t) (w8 >= 4);
4256 params->neon.mask_even[3] = -(uint32_t) (w8 >= 6);
4257 params->neon.mask_odd[0] = -(uint32_t) (w8 >= 1);
4258 params->neon.mask_odd[1] = -(uint32_t) (w8 >= 3);
4259 params->neon.mask_odd[2] = -(uint32_t) (w8 >= 5);
4260 params->neon.mask_odd[3] = -(uint32_t) (w8 >= 7);
4261 #else
4262 const uint32_t w4 = (width - 1) & 3;
4263 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4264 params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4265 params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4266 params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4267
4268 const uint32_t w8 = (width - 1) & 7;
4269 params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4270 params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4271 params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4272 params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4273 params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4274 params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4275 params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4276 params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4277 #endif
4278}
4279
4280size_t xnn_init_scalar_f32_chw_params(
4281 union xnn_f32_chw_params params[XNN_MIN_ELEMENTS(1)],
4282 uint32_t width,
4283 float output_min,
4284 float output_max)
4285{
4286 params->scalar.min = output_min;
4287 params->scalar.max = output_max;
4288
4289 const uint32_t w4 = (width - 1) & 3;
4290 params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4291 params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4292 params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4293 params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4294
4295 const uint32_t w8 = (width - 1) & 7;
4296 params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4297 params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4298 params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4299 params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4300 params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4301 params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4302 params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4303 params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4304 return sizeof(params->scalar);
4305}
4306
4307#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4308size_t xnn_init_s8_minmax_sse2_params(
4309 union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4310 int8_t output_min,
4311 int8_t output_max)
4312{
4313 assert(output_min < output_max);
4314
4315 const uint8_t output_min_with_bias = UINT8_C(0x80) ^ (uint8_t) output_min;
4316 const uint8_t output_max_with_bias = UINT8_C(0x80) ^ (uint8_t) output_max;
4317 for (uint32_t i = 0; i < 16; i++) {
4318 params->sse2.bias[i] = UINT8_C(0x80);
4319 params->sse2.min_with_bias[i] = output_min_with_bias;
4320 params->sse2.max_with_bias[i] = output_max_with_bias;
4321 }
4322 return sizeof(params->sse2);
4323}
4324
4325size_t xnn_init_s8_minmax_sse4_params(
4326 union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4327 int8_t output_min,
4328 int8_t output_max)
4329{
4330 assert(output_min < output_max);
4331
4332 for (uint32_t i = 0; i < 16; i++) {
4333 params->sse4.min[i] = output_min;
4334 params->sse4.max[i] = output_max;
4335 }
4336 return sizeof(params->sse4);
4337}
4338#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4339
4340#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4341size_t xnn_init_s8_minmax_neon_params(
4342 union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4343 int8_t output_min,
4344 int8_t output_max)
4345{
4346 assert(output_min < output_max);
4347
4348 params->neon.min = output_min;
4349 params->neon.max = output_max;
4350 return sizeof(params->neon);
4351}
4352#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4353
4354#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4355size_t xnn_init_s8_minmax_wasmsimd_params(
4356 union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4357 int8_t output_min,
4358 int8_t output_max)
4359{
4360 assert(output_min < output_max);
4361
4362 for (uint32_t i = 0; i < 8; i++) {
4363 params->wasmsimd.min[i] = output_min;
4364 params->wasmsimd.max[i] = output_max;
4365 }
4366 return sizeof(params->wasmsimd);
4367}
4368#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4369
4370size_t xnn_init_s8_minmax_scalar_params(
4371 union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4372 int8_t output_min,
4373 int8_t output_max)
4374{
4375 assert(output_min < output_max);
4376
4377 params->scalar.min = (int32_t) output_min;
4378 params->scalar.max = (int32_t) output_max;
4379 return sizeof(params->scalar);
4380}
4381
4382size_t xnn_init_u8_minmax_params(
4383 union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4384 uint8_t output_min,
4385 uint8_t output_max)
4386{
4387 assert(output_min < output_max);
4388
4389 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4390 for (uint32_t i = 0; i < 16; i++) {
4391 params->sse2.min[i] = output_min;
4392 params->sse2.max[i] = output_max;
4393 }
4394 return sizeof(params->sse2);
4395 #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4396 params->neon.min = output_min;
4397 params->neon.max = output_max;
4398 return sizeof(params->neon);
4399 #else
4400 params->scalar.min = (uint32_t) output_min;
4401 params->scalar.max = (uint32_t) output_max;
4402 return sizeof(params->scalar);
4403 #endif
4404}
4405
4406#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4407size_t xnn_init_u8_minmax_sse2_params(
4408 union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4409 uint8_t output_min,
4410 uint8_t output_max)
4411{
4412 assert(output_min < output_max);
4413
4414 for (uint32_t i = 0; i < 16; i++) {
4415 params->sse2.min[i] = output_min;
4416 params->sse2.max[i] = output_max;
4417 }
4418 return sizeof(params->sse2);
4419}
4420#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4421
4422#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4423size_t xnn_init_u8_minmax_wasmsimd_params(
4424 union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4425 uint8_t output_min,
4426 uint8_t output_max)
4427{
4428 assert(output_min < output_max);
4429
4430 for (uint32_t i = 0; i < 8; i++) {
4431 params->wasmsimd.min[i] = output_min;
4432 params->wasmsimd.max[i] = output_max;
4433 }
4434 return sizeof(params->wasmsimd);
4435}
4436#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4437
4438#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4439size_t xnn_init_u8_minmax_neon_params(
4440 union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4441 uint8_t output_min,
4442 uint8_t output_max)
4443{
4444 assert(output_min < output_max);
4445
4446 params->neon.min = output_min;
4447 params->neon.max = output_max;
4448 return sizeof(params->neon);
4449}
4450#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4451
4452size_t xnn_init_u8_minmax_scalar_params(
4453 union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4454 uint8_t output_min,
4455 uint8_t output_max)
4456{
4457 assert(output_min < output_max);
4458
4459 params->scalar.min = (uint32_t) output_min;
4460 params->scalar.max = (uint32_t) output_max;
4461 return sizeof(params->scalar);
4462}
4463
4464#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4465size_t xnn_init_qu8_add_minmax_sse2_params(
4466 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4467 uint8_t a_zero_point,
4468 uint8_t b_zero_point,
4469 uint8_t output_zero_point,
4470 float a_output_scale,
4471 float b_output_scale,
4472 uint8_t output_min,
4473 uint8_t output_max)
4474{
4475 const float abs_a_output_scale = fabsf(a_output_scale);
4476 const float abs_b_output_scale = fabsf(b_output_scale);
4477 assert(abs_a_output_scale >= 0x1.0p-10f);
4478 assert(abs_b_output_scale >= 0x1.0p-10f);
4479 assert(abs_a_output_scale < 0x1.0p+8f);
4480 assert(abs_b_output_scale < 0x1.0p+8f);
4481
4482 // Compute requantization parameters.
4483 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4484 assert(max_abs_output_scale >= 0x1.0p-10f);
4485 assert(max_abs_output_scale < 0x1.0p+8f);
4486 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4487 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4488
4489 // Shift is in [12, 30] range.
4490 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4491 assert(shift <= 30);
4492 assert(shift >= 12);
4493
4494 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4495 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4496 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4497 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4498 assert(abs_a_multiplier <= INT32_C(0x00200000));
4499 assert(abs_b_multiplier <= INT32_C(0x00200000));
4500
4501 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4502 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4503
4504 const int32_t rounding = INT32_C(1) << (shift - 1);
4505 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4506 for (uint32_t i = 0; i < 4; i++) {
4507 params->sse2.bias[i] = bias;
4508 }
4509 const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4510 const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4511 const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4512 const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4513 for (uint32_t i = 0; i < 8; i++) {
4514 params->sse2.a_multiplier_lo[i] = a_multiplier_lo;
4515 params->sse2.a_multiplier_hi[i] = a_multiplier_hi;
4516 params->sse2.b_multiplier_lo[i] = b_multiplier_lo;
4517 params->sse2.b_multiplier_hi[i] = b_multiplier_hi;
4518 }
4519 params->sse2.shift = shift;
4520 params->sse2.b_multiplier = (uint32_t) b_multiplier;
4521 for (uint32_t i = 0; i < 8; i++) {
4522 params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4523 }
4524 for (uint32_t i = 0; i < 16; i++) {
4525 params->sse2.output_min[i] = output_min;
4526 params->sse2.output_max[i] = output_max;
4527 }
4528 return sizeof(params->sse2);
4529}
4530
4531size_t xnn_init_qu8_add_minmax_sse4_params(
4532 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4533 uint8_t a_zero_point,
4534 uint8_t b_zero_point,
4535 uint8_t output_zero_point,
4536 float a_output_scale,
4537 float b_output_scale,
4538 uint8_t output_min,
4539 uint8_t output_max)
4540{
4541 const float abs_a_output_scale = fabsf(a_output_scale);
4542 const float abs_b_output_scale = fabsf(b_output_scale);
4543 assert(abs_a_output_scale >= 0x1.0p-10f);
4544 assert(abs_b_output_scale >= 0x1.0p-10f);
4545 assert(abs_a_output_scale < 0x1.0p+8f);
4546 assert(abs_b_output_scale < 0x1.0p+8f);
4547
4548 // Compute requantization parameters.
4549 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4550 assert(max_abs_output_scale >= 0x1.0p-10f);
4551 assert(max_abs_output_scale < 0x1.0p+8f);
4552 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4553 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4554
4555 // Shift is in [12, 30] range.
4556 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4557 assert(shift <= 30);
4558 assert(shift >= 12);
4559
4560 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4561 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4562 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4563 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4564 assert(abs_a_multiplier <= INT32_C(0x00200000));
4565 assert(abs_b_multiplier <= INT32_C(0x00200000));
4566
4567 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4568 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4569
4570 const int32_t rounding = INT32_C(1) << (shift - 1);
4571 const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4572 for (uint32_t i = 0; i < 4; i++) {
4573 params->sse4.bias[i] = bias;
4574 params->sse4.a_multiplier[i] = a_multiplier;
4575 params->sse4.b_multiplier[i] = b_multiplier;
4576 }
4577 for (uint32_t i = 0; i < 2; i++) {
4578 params->sse4.shift[i] = (uint64_t) shift;
4579 }
4580 for (uint32_t i = 0; i < 8; i++) {
4581 params->sse4.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4582 }
4583 for (uint32_t i = 0; i < 16; i++) {
4584 params->sse4.output_min[i] = output_min;
4585 params->sse4.output_max[i] = output_max;
4586 }
4587 return sizeof(params->sse4);
4588}
4589
4590size_t xnn_init_qu8_add_minmax_avx2_params(
4591 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4592 uint8_t a_zero_point,
4593 uint8_t b_zero_point,
4594 uint8_t output_zero_point,
4595 float a_output_scale,
4596 float b_output_scale,
4597 uint8_t output_min,
4598 uint8_t output_max)
4599{
4600 const float abs_a_output_scale = fabsf(a_output_scale);
4601 const float abs_b_output_scale = fabsf(b_output_scale);
4602 assert(abs_a_output_scale >= 0x1.0p-10f);
4603 assert(abs_b_output_scale >= 0x1.0p-10f);
4604 assert(abs_a_output_scale < 0x1.0p+8f);
4605 assert(abs_b_output_scale < 0x1.0p+8f);
4606
4607 // Compute requantization parameters.
4608 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4609 assert(max_abs_output_scale >= 0x1.0p-10f);
4610 assert(max_abs_output_scale < 0x1.0p+8f);
4611 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4612 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4613
4614 // Shift is in [12, 30] range.
4615 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4616 assert(shift <= 30);
4617 assert(shift >= 12);
4618
4619 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4620 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4621 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4622 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4623 assert(abs_a_multiplier <= INT32_C(0x00200000));
4624 assert(abs_b_multiplier <= INT32_C(0x00200000));
4625
4626 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4627 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4628
4629 const int32_t rounding = INT32_C(1) << (shift - 1);
4630 const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4631 for (uint32_t i = 0; i < 8; i++) {
4632 params->avx2.bias[i] = bias;
4633 params->avx2.a_multiplier[i] = a_multiplier;
4634 params->avx2.b_multiplier[i] = b_multiplier;
4635 }
4636 for (uint32_t i = 0; i < 4; i++) {
4637 params->avx2.shift[i] = (uint64_t) shift;
4638 }
4639 for (uint32_t i = 0; i < 16; i++) {
4640 params->avx2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4641 params->avx2.output_min[i] = output_min;
4642 params->avx2.output_max[i] = output_max;
4643 }
4644 return sizeof(params->avx2);
4645}
4646
4647size_t xnn_init_qu8_add_minmax_avx512_params(
4648 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4649 uint8_t a_zero_point,
4650 uint8_t b_zero_point,
4651 uint8_t output_zero_point,
4652 float a_output_scale,
4653 float b_output_scale,
4654 uint8_t output_min,
4655 uint8_t output_max)
4656{
4657 const float abs_a_output_scale = fabsf(a_output_scale);
4658 const float abs_b_output_scale = fabsf(b_output_scale);
4659 assert(abs_a_output_scale >= 0x1.0p-10f);
4660 assert(abs_b_output_scale >= 0x1.0p-10f);
4661 assert(abs_a_output_scale < 0x1.0p+8f);
4662 assert(abs_b_output_scale < 0x1.0p+8f);
4663
4664 // Compute requantization parameters.
4665 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4666 assert(max_abs_output_scale >= 0x1.0p-10f);
4667 assert(max_abs_output_scale < 0x1.0p+8f);
4668 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4669 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4670
4671 // Shift is in [12, 30] range.
4672 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4673 assert(shift <= 30);
4674 assert(shift >= 12);
4675
4676 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4677 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4678 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4679 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4680 assert(abs_a_multiplier <= INT32_C(0x00200000));
4681 assert(abs_b_multiplier <= INT32_C(0x00200000));
4682
4683 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4684 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4685
4686 const int32_t rounding = INT32_C(1) << (shift - 1);
4687 const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4688 for (uint32_t i = 0; i < 16; i++) {
4689 params->avx512.bias[i] = bias;
4690 params->avx512.a_multiplier[i] = a_multiplier;
4691 params->avx512.b_multiplier[i] = b_multiplier;
4692 }
4693 for (uint32_t i = 0; i < 8; i++) {
4694 params->avx512.shift[i] = (uint64_t) shift;
4695 }
4696 for (uint32_t i = 0; i < 32; i++) {
4697 params->avx512.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4698 params->avx512.output_min[i] = output_min;
4699 params->avx512.output_max[i] = output_max;
4700 }
4701 return sizeof(params->avx512);
4702}
4703#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4704
4705#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4706size_t xnn_init_qu8_add_minmax_neon_params(
4707 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4708 uint8_t a_zero_point,
4709 uint8_t b_zero_point,
4710 uint8_t output_zero_point,
4711 float a_output_scale,
4712 float b_output_scale,
4713 uint8_t output_min,
4714 uint8_t output_max)
4715{
4716 const float abs_a_output_scale = fabsf(a_output_scale);
4717 const float abs_b_output_scale = fabsf(b_output_scale);
4718 assert(abs_a_output_scale >= 0x1.0p-10f);
4719 assert(abs_b_output_scale >= 0x1.0p-10f);
4720 assert(abs_a_output_scale < 0x1.0p+8f);
4721 assert(abs_b_output_scale < 0x1.0p+8f);
4722
4723 // Compute requantization parameters.
4724 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4725 assert(max_abs_output_scale >= 0x1.0p-10f);
4726 assert(max_abs_output_scale < 0x1.0p+8f);
4727 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4728 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4729
4730 // Shift is in [12, 30] range.
4731 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4732 assert(shift <= 30);
4733 assert(shift >= 12);
4734
4735 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4736 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4737 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4738 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4739 assert(abs_a_multiplier <= INT32_C(0x00200000));
4740 assert(abs_b_multiplier <= INT32_C(0x00200000));
4741
4742 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4743 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4744
4745 params->neon.a_zero_point = a_zero_point;
4746 params->neon.b_zero_point = b_zero_point;
4747 params->neon.a_multiplier = (int32_t) a_multiplier;
4748 params->neon.b_multiplier = (int32_t) b_multiplier;
4749 params->neon.right_shift = (int32_t) -shift;
4750 params->neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
4751 params->neon.output_min = output_min;
4752 params->neon.output_max = output_max;
4753 return sizeof(params->neon);
4754}
4755#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4756
4757#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4758size_t xnn_init_qu8_add_minmax_wasmsimd_params(
4759 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4760 uint8_t a_zero_point,
4761 uint8_t b_zero_point,
4762 uint8_t output_zero_point,
4763 float a_output_scale,
4764 float b_output_scale,
4765 uint8_t output_min,
4766 uint8_t output_max)
4767{
4768 const float abs_a_output_scale = fabsf(a_output_scale);
4769 const float abs_b_output_scale = fabsf(b_output_scale);
4770 assert(abs_a_output_scale >= 0x1.0p-10f);
4771 assert(abs_b_output_scale >= 0x1.0p-10f);
4772 assert(abs_a_output_scale < 0x1.0p+8f);
4773 assert(abs_b_output_scale < 0x1.0p+8f);
4774
4775 // Compute requantization parameters.
4776 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4777 assert(max_abs_output_scale >= 0x1.0p-10f);
4778 assert(max_abs_output_scale < 0x1.0p+8f);
4779 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4780 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4781
4782 // Shift is in [12, 30] range.
4783 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4784 assert(shift <= 30);
4785 assert(shift >= 12);
4786
4787 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4788 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4789 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4790 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4791 assert(abs_a_multiplier <= INT32_C(0x00200000));
4792 assert(abs_b_multiplier <= INT32_C(0x00200000));
4793
4794 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4795 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4796
4797 const int32_t rounding = INT32_C(1) << (shift - 1);
4798 const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4799 for (uint32_t i = 0; i < 2; i++) {
4800 params->wasmsimd.bias[i] = bias;
4801 params->wasmsimd.a_multiplier[i] = a_multiplier;
4802 params->wasmsimd.b_multiplier[i] = b_multiplier;
4803 }
4804 params->wasmsimd.shift = shift;
4805 for (uint32_t i = 0; i < 4; i++) {
4806 params->wasmsimd.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4807 }
4808 for (uint32_t i = 0; i < 8; i++) {
4809 params->wasmsimd.output_min[i] = output_min;
4810 params->wasmsimd.output_max[i] = output_max;
4811 }
4812 return sizeof(params->wasmsimd);
4813}
4814#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4815
4816size_t xnn_init_qu8_add_minmax_scalar_params(
4817 union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4818 uint8_t a_zero_point,
4819 uint8_t b_zero_point,
4820 uint8_t output_zero_point,
4821 float a_output_scale,
4822 float b_output_scale,
4823 uint8_t output_min,
4824 uint8_t output_max)
4825{
4826 const float abs_a_output_scale = fabsf(a_output_scale);
4827 const float abs_b_output_scale = fabsf(b_output_scale);
4828 assert(abs_a_output_scale >= 0x1.0p-10f);
4829 assert(abs_b_output_scale >= 0x1.0p-10f);
4830 assert(abs_a_output_scale < 0x1.0p+8f);
4831 assert(abs_b_output_scale < 0x1.0p+8f);
4832
4833 // Compute requantization parameters.
4834 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4835 assert(max_abs_output_scale >= 0x1.0p-10f);
4836 assert(max_abs_output_scale < 0x1.0p+8f);
4837 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4838 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4839
4840 // Shift is in [12, 30] range.
4841 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4842 assert(shift <= 30);
4843 assert(shift >= 12);
4844
4845 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4846 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4847 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4848 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4849 assert(abs_a_multiplier <= INT32_C(0x00200000));
4850 assert(abs_b_multiplier <= INT32_C(0x00200000));
4851
4852 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4853 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4854
4855 const int32_t rounding = INT32_C(1) << (shift - 1);
4856 params->scalar.bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4857 params->scalar.a_multiplier = a_multiplier;
4858 params->scalar.b_multiplier = b_multiplier;
4859 params->scalar.shift = shift;
4860 params->scalar.output_min_less_zero_point = (int32_t) (uint32_t) output_min - (int32_t) (uint32_t) output_zero_point;
4861 params->scalar.output_max_less_zero_point = (int32_t) (uint32_t) output_max - (int32_t) (uint32_t) output_zero_point;
4862 params->scalar.output_zero_point = (int32_t) (uint32_t) output_zero_point;
4863 return sizeof(params->scalar);
4864}
4865
4866#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4867size_t xnn_init_qs8_add_minmax_sse2_params(
4868 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4869 int8_t a_zero_point,
4870 int8_t b_zero_point,
4871 int8_t output_zero_point,
4872 float a_output_scale,
4873 float b_output_scale,
4874 int8_t output_min,
4875 int8_t output_max)
4876{
4877 const float abs_a_output_scale = fabsf(a_output_scale);
4878 const float abs_b_output_scale = fabsf(b_output_scale);
4879 assert(abs_a_output_scale >= 0x1.0p-10f);
4880 assert(abs_b_output_scale >= 0x1.0p-10f);
4881 assert(abs_a_output_scale < 0x1.0p+8f);
4882 assert(abs_b_output_scale < 0x1.0p+8f);
4883
4884 // Compute requantization parameters.
4885 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4886 assert(max_abs_output_scale >= 0x1.0p-10f);
4887 assert(max_abs_output_scale < 0x1.0p+8f);
4888 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4889 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4890
4891 // Shift is in [12, 30] range.
4892 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4893 assert(shift <= 30);
4894 assert(shift >= 12);
4895
4896 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4897 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4898 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4899 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4900 assert(abs_a_multiplier <= INT32_C(0x00200000));
4901 assert(abs_b_multiplier <= INT32_C(0x00200000));
4902
4903 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4904 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4905
4906 const int32_t rounding = INT32_C(1) << (shift - 1);
4907 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4908 for (uint32_t i = 0; i < 4; i++) {
4909 params->sse2.bias[i] = bias;
4910 }
4911 const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4912 const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4913 const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4914 const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4915 for (uint32_t i = 0; i < 8; i++) {
4916 params->sse2.a_multiplier_lo[i] = a_multiplier_lo;
4917 params->sse2.a_multiplier_hi[i] = a_multiplier_hi;
4918 params->sse2.b_multiplier_lo[i] = b_multiplier_lo;
4919 params->sse2.b_multiplier_hi[i] = b_multiplier_hi;
4920 }
4921 params->sse2.shift = shift;
4922 params->sse2.b_multiplier = (uint32_t) b_multiplier;
4923 for (uint32_t i = 0; i < 8; i++) {
4924 params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
4925 params->sse2.output_min[i] = (int16_t) output_min;
4926 params->sse2.output_max[i] = (int16_t) output_max;
4927 }
4928 return sizeof(params->sse2);
4929}
4930
4931size_t xnn_init_qs8_add_minmax_sse4_mul16_params(
4932 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4933 int8_t a_zero_point,
4934 int8_t b_zero_point,
4935 int8_t output_zero_point,
4936 float a_output_scale,
4937 float b_output_scale,
4938 int8_t output_min,
4939 int8_t output_max)
4940{
4941 const float abs_a_output_scale = fabsf(a_output_scale);
4942 const float abs_b_output_scale = fabsf(b_output_scale);
4943 assert(abs_a_output_scale >= 0x1.0p-10f);
4944 assert(abs_b_output_scale >= 0x1.0p-10f);
4945 assert(abs_a_output_scale < 0x1.0p+8f);
4946 assert(abs_b_output_scale < 0x1.0p+8f);
4947
4948 // Compute requantization parameters.
4949 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4950 assert(max_abs_output_scale >= 0x1.0p-10f);
4951 assert(max_abs_output_scale < 0x1.0p+8f);
4952 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4953 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4954
4955 // Shift is in [12, 30] range.
4956 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4957 assert(shift <= 30);
4958 assert(shift >= 12);
4959
4960 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4961 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4962 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4963 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4964 assert(abs_a_multiplier <= INT32_C(0x00200000));
4965 assert(abs_b_multiplier <= INT32_C(0x00200000));
4966
4967 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4968 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4969
4970 const int32_t rounding = INT32_C(1) << (shift - 1);
4971 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4972 for (uint32_t i = 0; i < 4; i++) {
4973 params->sse4_mul16.bias[i] = bias;
4974 }
4975 const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4976 const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4977 const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4978 const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4979 for (uint32_t i = 0; i < 8; i++) {
4980 params->sse4_mul16.a_multiplier_lo[i] = a_multiplier_lo;
4981 params->sse4_mul16.a_multiplier_hi[i] = a_multiplier_hi;
4982 params->sse4_mul16.b_multiplier_lo[i] = b_multiplier_lo;
4983 params->sse4_mul16.b_multiplier_hi[i] = b_multiplier_hi;
4984 }
4985 params->sse4_mul16.shift = shift;
4986 params->sse4_mul16.b_multiplier = (uint32_t) b_multiplier;
4987 for (uint32_t i = 0; i < 8; i++) {
4988 params->sse4_mul16.output_zero_point[i] = (int16_t) output_zero_point;
4989 }
4990 for (uint32_t i = 0; i < 16; i++) {
4991 params->sse4_mul16.output_min[i] = output_min;
4992 params->sse4_mul16.output_max[i] = output_max;
4993 }
4994 return sizeof(params->sse4_mul16);
4995}
4996
4997size_t xnn_init_qs8_add_minmax_sse4_mul32_params(
4998 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4999 int8_t a_zero_point,
5000 int8_t b_zero_point,
5001 int8_t output_zero_point,
5002 float a_output_scale,
5003 float b_output_scale,
5004 int8_t output_min,
5005 int8_t output_max)
5006{
5007 const float abs_a_output_scale = fabsf(a_output_scale);
5008 const float abs_b_output_scale = fabsf(b_output_scale);
5009 assert(abs_a_output_scale >= 0x1.0p-10f);
5010 assert(abs_b_output_scale >= 0x1.0p-10f);
5011 assert(abs_a_output_scale < 0x1.0p+8f);
5012 assert(abs_b_output_scale < 0x1.0p+8f);
5013
5014 // Compute requantization parameters.
5015 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5016 assert(max_abs_output_scale >= 0x1.0p-10f);
5017 assert(max_abs_output_scale < 0x1.0p+8f);
5018 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5019 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5020
5021 // Shift is in [12, 30] range.
5022 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5023 assert(shift <= 30);
5024 assert(shift >= 12);
5025
5026 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5027 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5028 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5029 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5030 assert(abs_a_multiplier <= INT32_C(0x00200000));
5031 assert(abs_b_multiplier <= INT32_C(0x00200000));
5032
5033 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5034 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5035
5036 const int32_t rounding = INT32_C(1) << (shift - 1);
5037 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5038 for (uint32_t i = 0; i < 4; i++) {
5039 params->sse4_mul32.bias[i] = bias;
5040 params->sse4_mul32.a_multiplier[i] = a_multiplier;
5041 params->sse4_mul32.b_multiplier[i] = b_multiplier;
5042 }
5043 for (uint32_t i = 0; i < 2; i++) {
5044 params->sse4_mul32.shift[i] = (uint64_t) shift;
5045 }
5046 for (uint32_t i = 0; i < 8; i++) {
5047 params->sse4_mul32.output_zero_point[i] = (int16_t) output_zero_point;
5048 }
5049 for (uint32_t i = 0; i < 16; i++) {
5050 params->sse4_mul32.output_min[i] = output_min;
5051 params->sse4_mul32.output_max[i] = output_max;
5052 }
5053 return sizeof(params->sse4_mul32);
5054}
5055
5056size_t xnn_init_qs8_add_minmax_avx2_params(
5057 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5058 int8_t a_zero_point,
5059 int8_t b_zero_point,
5060 int8_t output_zero_point,
5061 float a_output_scale,
5062 float b_output_scale,
5063 int8_t output_min,
5064 int8_t output_max)
5065{
5066 const float abs_a_output_scale = fabsf(a_output_scale);
5067 const float abs_b_output_scale = fabsf(b_output_scale);
5068 assert(abs_a_output_scale >= 0x1.0p-10f);
5069 assert(abs_b_output_scale >= 0x1.0p-10f);
5070 assert(abs_a_output_scale < 0x1.0p+8f);
5071 assert(abs_b_output_scale < 0x1.0p+8f);
5072
5073 // Compute requantization parameters.
5074 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5075 assert(max_abs_output_scale >= 0x1.0p-10f);
5076 assert(max_abs_output_scale < 0x1.0p+8f);
5077 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5078 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5079
5080 // Shift is in [12, 30] range.
5081 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5082 assert(shift <= 30);
5083 assert(shift >= 12);
5084
5085 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5086 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5087 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5088 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5089 assert(abs_a_multiplier <= INT32_C(0x00200000));
5090 assert(abs_b_multiplier <= INT32_C(0x00200000));
5091
5092 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5093 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5094
5095 const int32_t rounding = INT32_C(1) << (shift - 1);
5096 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5097 for (uint32_t i = 0; i < 8; i++) {
5098 params->avx2.bias[i] = bias;
5099 params->avx2.a_multiplier[i] = a_multiplier;
5100 params->avx2.b_multiplier[i] = b_multiplier;
5101 }
5102 for (uint32_t i = 0; i < 4; i++) {
5103 params->avx2.shift[i] = (uint64_t) shift;
5104 }
5105 for (uint32_t i = 0; i < 16; i++) {
5106 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
5107 params->avx2.output_min[i] = output_min;
5108 params->avx2.output_max[i] = output_max;
5109 }
5110 return sizeof(params->avx2);
5111}
5112
5113size_t xnn_init_qs8_add_minmax_avx512_params(
5114 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5115 int8_t a_zero_point,
5116 int8_t b_zero_point,
5117 int8_t output_zero_point,
5118 float a_output_scale,
5119 float b_output_scale,
5120 int8_t output_min,
5121 int8_t output_max)
5122{
5123 const float abs_a_output_scale = fabsf(a_output_scale);
5124 const float abs_b_output_scale = fabsf(b_output_scale);
5125 assert(abs_a_output_scale >= 0x1.0p-10f);
5126 assert(abs_b_output_scale >= 0x1.0p-10f);
5127 assert(abs_a_output_scale < 0x1.0p+8f);
5128 assert(abs_b_output_scale < 0x1.0p+8f);
5129
5130 // Compute requantization parameters.
5131 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5132 assert(max_abs_output_scale >= 0x1.0p-10f);
5133 assert(max_abs_output_scale < 0x1.0p+8f);
5134 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5135 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5136
5137 // Shift is in [12, 30] range.
5138 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5139 assert(shift <= 30);
5140 assert(shift >= 12);
5141
5142 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5143 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5144 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5145 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5146 assert(abs_a_multiplier <= INT32_C(0x00200000));
5147 assert(abs_b_multiplier <= INT32_C(0x00200000));
5148
5149 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5150 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5151
5152 const int32_t rounding = INT32_C(1) << (shift - 1);
5153 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5154 for (uint32_t i = 0; i < 16; i++) {
5155 params->avx512.bias[i] = bias;
5156 params->avx512.a_multiplier[i] = a_multiplier;
5157 params->avx512.b_multiplier[i] = b_multiplier;
5158 }
5159 for (uint32_t i = 0; i < 8; i++) {
5160 params->avx512.shift[i] = (uint64_t) shift;
5161 }
5162 for (uint32_t i = 0; i < 32; i++) {
5163 params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
5164 params->avx512.output_min[i] = output_min;
5165 params->avx512.output_max[i] = output_max;
5166 }
5167 return sizeof(params->avx512);
5168}
5169#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5170
5171#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5172size_t xnn_init_qs8_add_minmax_neon_params(
5173 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5174 int8_t a_zero_point,
5175 int8_t b_zero_point,
5176 int8_t output_zero_point,
5177 float a_output_scale,
5178 float b_output_scale,
5179 int8_t output_min,
5180 int8_t output_max)
5181{
5182 const float abs_a_output_scale = fabsf(a_output_scale);
5183 const float abs_b_output_scale = fabsf(b_output_scale);
5184 assert(abs_a_output_scale >= 0x1.0p-10f);
5185 assert(abs_b_output_scale >= 0x1.0p-10f);
5186 assert(abs_a_output_scale < 0x1.0p+8f);
5187 assert(abs_b_output_scale < 0x1.0p+8f);
5188
5189 // Compute requantization parameters.
5190 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5191 assert(max_abs_output_scale >= 0x1.0p-10f);
5192 assert(max_abs_output_scale < 0x1.0p+8f);
5193 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5194 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5195
5196 // Shift is in [12, 30] range.
5197 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5198 assert(shift <= 30);
5199 assert(shift >= 12);
5200
5201 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5202 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5203 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5204 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5205 assert(abs_a_multiplier <= INT32_C(0x00200000));
5206 assert(abs_b_multiplier <= INT32_C(0x00200000));
5207
5208 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5209 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5210
5211 params->neon.a_zero_point = a_zero_point;
5212 params->neon.b_zero_point = b_zero_point;
5213 params->neon.a_multiplier = (int32_t) a_multiplier;
5214 params->neon.b_multiplier = (int32_t) b_multiplier;
5215 params->neon.right_shift = (int32_t) -shift;
5216 params->neon.output_zero_point = (int16_t) output_zero_point;
5217 params->neon.output_min = output_min;
5218 params->neon.output_max = output_max;
5219 return sizeof(params->neon);
5220}
5221#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5222
5223#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5224size_t xnn_init_qs8_add_minmax_wasmsimd_params(
5225 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5226 int8_t a_zero_point,
5227 int8_t b_zero_point,
5228 int8_t output_zero_point,
5229 float a_output_scale,
5230 float b_output_scale,
5231 int8_t output_min,
5232 int8_t output_max)
5233{
5234 const float abs_a_output_scale = fabsf(a_output_scale);
5235 const float abs_b_output_scale = fabsf(b_output_scale);
5236 assert(abs_a_output_scale >= 0x1.0p-10f);
5237 assert(abs_b_output_scale >= 0x1.0p-10f);
5238 assert(abs_a_output_scale < 0x1.0p+8f);
5239 assert(abs_b_output_scale < 0x1.0p+8f);
5240
5241 // Compute requantization parameters.
5242 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5243 assert(max_abs_output_scale >= 0x1.0p-10f);
5244 assert(max_abs_output_scale < 0x1.0p+8f);
5245 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5246 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5247
5248 // Shift is in [12, 30] range.
5249 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5250 assert(shift <= 30);
5251 assert(shift >= 12);
5252
5253 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5254 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5255 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5256 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5257 assert(abs_a_multiplier <= INT32_C(0x00200000));
5258 assert(abs_b_multiplier <= INT32_C(0x00200000));
5259
5260 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5261 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5262
5263 const int32_t rounding = INT32_C(1) << (shift - 1);
5264 const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5265 for (uint32_t i = 0; i < 2; i++) {
5266 params->wasmsimd.bias[i] = bias;
5267 params->wasmsimd.a_multiplier[i] = a_multiplier;
5268 params->wasmsimd.b_multiplier[i] = b_multiplier;
5269 }
5270 params->wasmsimd.shift = shift;
5271 for (uint32_t i = 0; i < 4; i++) {
5272 params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
5273 }
5274 for (uint32_t i = 0; i < 8; i++) {
5275 params->wasmsimd.output_min[i] = output_min;
5276 params->wasmsimd.output_max[i] = output_max;
5277 }
5278 return sizeof(params->wasmsimd);
5279}
5280#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5281
5282size_t xnn_init_qs8_add_minmax_scalar_params(
5283 union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5284 int8_t a_zero_point,
5285 int8_t b_zero_point,
5286 int8_t output_zero_point,
5287 float a_output_scale,
5288 float b_output_scale,
5289 int8_t output_min,
5290 int8_t output_max)
5291{
5292 const float abs_a_output_scale = fabsf(a_output_scale);
5293 const float abs_b_output_scale = fabsf(b_output_scale);
5294 assert(abs_a_output_scale >= 0x1.0p-10f);
5295 assert(abs_b_output_scale >= 0x1.0p-10f);
5296 assert(abs_a_output_scale < 0x1.0p+8f);
5297 assert(abs_b_output_scale < 0x1.0p+8f);
5298
5299 // Compute requantization parameters.
5300 const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5301 assert(max_abs_output_scale >= 0x1.0p-10f);
5302 assert(max_abs_output_scale < 0x1.0p+8f);
5303 const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5304 const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5305
5306 // Shift is in [12, 30] range.
5307 const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5308 assert(shift <= 30);
5309 assert(shift >= 12);
5310
5311 // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5312 const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5313 const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5314 assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5315 assert(abs_a_multiplier <= INT32_C(0x00200000));
5316 assert(abs_b_multiplier <= INT32_C(0x00200000));
5317
5318 const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5319 const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5320
5321 const int32_t rounding = INT32_C(1) << (shift - 1);
5322 params->scalar.bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5323 params->scalar.a_multiplier = a_multiplier;
5324 params->scalar.b_multiplier = b_multiplier;
5325 params->scalar.shift = shift;
5326 params->scalar.output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
5327 params->scalar.output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
5328 params->scalar.output_zero_point = (int32_t) output_zero_point;
5329 return sizeof(params->scalar);
5330}
5331
5332size_t xnn_init_qu8_mul_minmax_fp32_scalar_params(
5333 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5334 uint8_t a_zero_point,
5335 uint8_t b_zero_point,
5336 uint8_t output_zero_point,
5337 float product_output_scale,
5338 uint8_t output_min,
5339 uint8_t output_max)
5340{
5341 assert(product_output_scale >= 0x1.0p-16f);
5342 assert(product_output_scale < 0x1.0p+8f);
5343
5344 params->fp32_scalar.a_zero_point = (int16_t) (uint16_t) a_zero_point;
5345 params->fp32_scalar.b_zero_point = (int16_t) (uint16_t) b_zero_point;
5346 params->fp32_scalar.scale = product_output_scale;
5347 params->fp32_scalar.output_min_less_zero_point = (float) (int32_t) ((uint32_t) output_min - (uint32_t) output_zero_point);
5348 params->fp32_scalar.output_max_less_zero_point = (float) (int32_t) ((uint32_t) output_max - (uint32_t) output_zero_point);
5349 params->fp32_scalar.magic_bias = 12582912.0f;
5350 params->fp32_scalar.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) (uint32_t) output_zero_point;
5351 return sizeof(params->fp32_scalar);
5352}
5353
5354#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5355size_t xnn_init_qu8_mul_minmax_fp32_neon_params(
5356 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5357 uint8_t a_zero_point,
5358 uint8_t b_zero_point,
5359 uint8_t output_zero_point,
5360 float product_output_scale,
5361 uint8_t output_min,
5362 uint8_t output_max)
5363{
5364 assert(product_output_scale >= 0x1.0p-16f);
5365 assert(product_output_scale < 0x1.0p+8f);
5366
5367 params->fp32_neon.a_zero_point[0] = a_zero_point;
5368 params->fp32_neon.a_zero_point[1] = a_zero_point;
5369 params->fp32_neon.b_zero_point[0] = b_zero_point;
5370 params->fp32_neon.b_zero_point[1] = b_zero_point;
5371 params->fp32_neon.scale = product_output_scale;
5372 params->fp32_neon.magic_bias = 12582912.0f;
5373 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5374 params->fp32_neon.output_min = output_min;
5375 params->fp32_neon.output_max = output_max;
5376 return sizeof(params->fp32_neon);
5377}
5378
5379size_t xnn_init_qu8_mul_minmax_fp32_neonv8_params(
5380 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5381 uint8_t a_zero_point,
5382 uint8_t b_zero_point,
5383 uint8_t output_zero_point,
5384 float product_output_scale,
5385 uint8_t output_min,
5386 uint8_t output_max)
5387{
5388 assert(product_output_scale >= 0x1.0p-16f);
5389 assert(product_output_scale < 0x1.0p+8f);
5390
5391 params->fp32_neonv8.a_zero_point[0] = a_zero_point;
5392 params->fp32_neonv8.a_zero_point[1] = a_zero_point;
5393 params->fp32_neonv8.b_zero_point[0] = b_zero_point;
5394 params->fp32_neonv8.b_zero_point[1] = b_zero_point;
5395 params->fp32_neonv8.scale = product_output_scale;
5396 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
5397 params->fp32_neonv8.output_min = output_min;
5398 params->fp32_neonv8.output_max = output_max;
5399 return sizeof(params->fp32_neonv8);
5400}
5401
5402size_t xnn_init_qu8_mul_minmax_rndnu_neon_params(
5403 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5404 uint8_t a_zero_point,
5405 uint8_t b_zero_point,
5406 uint8_t output_zero_point,
5407 float product_output_scale,
5408 uint8_t output_min,
5409 uint8_t output_max)
5410{
5411 assert(product_output_scale >= 0x1.0p-16f);
5412 assert(product_output_scale < 0x1.0p+8f);
5413
5414 // Compute requantization parameters.
5415 const uint32_t scale_bits = float_as_uint32(product_output_scale);
5416
5417 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
5418 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
5419 assert(multiplier >= INT32_C(0x40000000));
5420 assert(multiplier <= INT32_C(0x7FFFFF80));
5421
5422 // Shift is in [-8, 15] range.
5423 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
5424 assert(shift >= -8);
5425 assert(shift < 16);
5426
5427 // Split shift into pre_shift + post_shift, post_shift in [1, 15] range.
5428 const int32_t post_shift = math_max_s32(shift, 1);
5429 const int32_t pre_shift = shift - post_shift;
5430
5431 params->rndnu_neon.a_zero_point[0] = a_zero_point;
5432 params->rndnu_neon.a_zero_point[1] = a_zero_point;
5433 params->rndnu_neon.b_zero_point[0] = b_zero_point;
5434 params->rndnu_neon.b_zero_point[1] = b_zero_point;
5435 params->rndnu_neon.left_pre_shift = -pre_shift;
5436 params->rndnu_neon.multiplier = multiplier;
5437 params->rndnu_neon.left_post_shift = -post_shift;
5438 params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
5439 params->rndnu_neon.output_min = output_min;
5440 params->rndnu_neon.output_max = output_max;
5441 return sizeof(params->rndnu_neon);
5442}
5443#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5444
5445#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5446size_t xnn_init_qu8_mul_minmax_fp32_sse2_params(
5447 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5448 uint8_t a_zero_point,
5449 uint8_t b_zero_point,
5450 uint8_t output_zero_point,
5451 float product_output_scale,
5452 uint8_t output_min,
5453 uint8_t output_max)
5454{
5455 assert(product_output_scale >= 0x1.0p-16f);
5456 assert(product_output_scale < 0x1.0p+8f);
5457
5458 for (uint32_t i = 0; i < 8; i++) {
5459 params->fp32_sse2.a_zero_point[i] = (int16_t) (uint16_t) a_zero_point;
5460 params->fp32_sse2.b_zero_point[i] = (int16_t) (uint16_t) b_zero_point;
5461 }
5462 for (uint32_t i = 0; i < 4; i++) {
5463 params->fp32_sse2.scale[i] = product_output_scale;
5464 }
5465 for (uint32_t i = 0; i < 8; i++) {
5466 params->fp32_sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
5467 }
5468 for (uint32_t i = 0; i < 16; i++) {
5469 params->fp32_sse2.output_min[i] = output_min;
5470 params->fp32_sse2.output_max[i] = output_max;
5471 }
5472 return sizeof(params->fp32_sse2);
5473}
5474#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5475
5476#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5477size_t xnn_init_qu8_mul_minmax_fp32_wasmsimd_params(
5478 union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5479 uint8_t a_zero_point,
5480 uint8_t b_zero_point,
5481 uint8_t output_zero_point,
5482 float product_output_scale,
5483 uint8_t output_min,
5484 uint8_t output_max)
5485{
5486 assert(product_output_scale >= 0x1.0p-16f);
5487 assert(product_output_scale < 0x1.0p+8f);
5488
5489 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5490 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5491 const int32_t magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5492 for (uint32_t i = 0; i < 4; i++) {
5493 params->fp32_wasmsimd.a_zero_point[i] = (int16_t) a_zero_point;
5494 params->fp32_wasmsimd.b_zero_point[i] = (int16_t) b_zero_point;
5495 }
5496 for (uint32_t i = 0; i < 2; i++) {
5497 params->fp32_wasmsimd.scale[i] = product_output_scale;
5498 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
5499 params->fp32_wasmsimd.magic_min[i] = magic_min;
5500 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_output_zero_point;
5501 }
5502 for (uint32_t i = 0; i < 8; i++) {
5503 params->fp32_wasmsimd.output_max[i] = output_max;
5504 }
5505 return sizeof(params->fp32_wasmsimd);
5506}
5507#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5508
5509size_t xnn_init_qs8_mul_minmax_fp32_scalar_params(
5510 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5511 int8_t a_zero_point,
5512 int8_t b_zero_point,
5513 int8_t output_zero_point,
5514 float product_output_scale,
5515 int8_t output_min,
5516 int8_t output_max)
5517{
5518 assert(product_output_scale >= 0x1.0p-16f);
5519 assert(product_output_scale < 0x1.0p+8f);
5520
5521 params->fp32_scalar.a_zero_point = (int16_t) a_zero_point;
5522 params->fp32_scalar.b_zero_point = (int16_t) b_zero_point;
5523 params->fp32_scalar.scale = product_output_scale;
5524 params->fp32_scalar.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5525 params->fp32_scalar.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5526 params->fp32_scalar.magic_bias = 12582912.0f;
5527 params->fp32_scalar.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5528 return sizeof(params->fp32_scalar);
5529}
5530
5531#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5532size_t xnn_init_qs8_mul_minmax_fp32_neon_params(
5533 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5534 int8_t a_zero_point,
5535 int8_t b_zero_point,
5536 int8_t output_zero_point,
5537 float product_output_scale,
5538 int8_t output_min,
5539 int8_t output_max)
5540{
5541 assert(product_output_scale >= 0x1.0p-16f);
5542 assert(product_output_scale < 0x1.0p+8f);
5543
5544 params->fp32_neon.a_zero_point[0] = a_zero_point;
5545 params->fp32_neon.a_zero_point[1] = a_zero_point;
5546 params->fp32_neon.b_zero_point[0] = b_zero_point;
5547 params->fp32_neon.b_zero_point[1] = b_zero_point;
5548 params->fp32_neon.scale = product_output_scale;
5549 params->fp32_neon.magic_bias = 12582912.0f;
5550 params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5551 params->fp32_neon.output_min = output_min;
5552 params->fp32_neon.output_max = output_max;
5553 return sizeof(params->fp32_neon);
5554}
5555
5556size_t xnn_init_qs8_mul_minmax_fp32_neonv8_params(
5557 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5558 int8_t a_zero_point,
5559 int8_t b_zero_point,
5560 int8_t output_zero_point,
5561 float product_output_scale,
5562 int8_t output_min,
5563 int8_t output_max)
5564{
5565 assert(product_output_scale >= 0x1.0p-16f);
5566 assert(product_output_scale < 0x1.0p+8f);
5567
5568 params->fp32_neonv8.a_zero_point[0] = a_zero_point;
5569 params->fp32_neonv8.a_zero_point[1] = a_zero_point;
5570 params->fp32_neonv8.b_zero_point[0] = b_zero_point;
5571 params->fp32_neonv8.b_zero_point[1] = b_zero_point;
5572 params->fp32_neonv8.scale = product_output_scale;
5573 params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
5574 params->fp32_neonv8.output_min = output_min;
5575 params->fp32_neonv8.output_max = output_max;
5576 return sizeof(params->fp32_neonv8);
5577}
5578
5579size_t xnn_init_qs8_mul_minmax_rndnu_neon_params(
5580 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5581 int8_t a_zero_point,
5582 int8_t b_zero_point,
5583 int8_t output_zero_point,
5584 float product_output_scale,
5585 int8_t output_min,
5586 int8_t output_max)
5587{
5588 assert(product_output_scale >= 0x1.0p-16f);
5589 assert(product_output_scale < 0x1.0p+8f);
5590
5591 // Compute requantization parameters.
5592 const uint32_t scale_bits = float_as_uint32(product_output_scale);
5593
5594 // Multiplier is in [0x40000000, 0x7FFFFF80] range.
5595 const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
5596 assert(multiplier >= INT32_C(0x40000000));
5597 assert(multiplier <= INT32_C(0x7FFFFF80));
5598
5599 // Shift is in [-8, 15] range.
5600 const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
5601 assert(shift >= -8);
5602 assert(shift < 16);
5603
5604 // Split shift into pre_shift + post_shift, post_shift in [1, 15] range.
5605 const int32_t post_shift = math_max_s32(shift, 1);
5606 const int32_t pre_shift = shift - post_shift;
5607
5608 params->rndnu_neon.a_zero_point[0] = a_zero_point;
5609 params->rndnu_neon.a_zero_point[1] = a_zero_point;
5610 params->rndnu_neon.b_zero_point[0] = b_zero_point;
5611 params->rndnu_neon.b_zero_point[1] = b_zero_point;
5612 params->rndnu_neon.left_pre_shift = -pre_shift;
5613 params->rndnu_neon.multiplier = multiplier;
5614 params->rndnu_neon.left_post_shift = -post_shift;
5615 params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
5616 params->rndnu_neon.output_min = output_min;
5617 params->rndnu_neon.output_max = output_max;
5618 return sizeof(params->rndnu_neon);
5619}
5620#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5621
5622#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5623size_t xnn_init_qs8_mul_minmax_fp32_sse2_params(
5624 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5625 int8_t a_zero_point,
5626 int8_t b_zero_point,
5627 int8_t output_zero_point,
5628 float product_output_scale,
5629 int8_t output_min,
5630 int8_t output_max)
5631{
5632 assert(product_output_scale >= 0x1.0p-16f);
5633 assert(product_output_scale < 0x1.0p+8f);
5634
5635 for (uint32_t i = 0; i < 8; i++) {
5636 params->fp32_sse2.a_zero_point[i] = (int16_t) a_zero_point;
5637 params->fp32_sse2.b_zero_point[i] = (int16_t) b_zero_point;
5638 }
5639 for (uint32_t i = 0; i < 4; i++) {
5640 params->fp32_sse2.scale[i] = product_output_scale;
5641 }
5642 for (uint32_t i = 0; i < 8; i++) {
5643 params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
5644 }
5645 for (uint32_t i = 0; i < 8; i++) {
5646 params->fp32_sse2.output_min[i] = (int16_t) output_min;
5647 params->fp32_sse2.output_max[i] = (int16_t) output_max;
5648 }
5649 return sizeof(params->fp32_sse2);
5650}
5651
5652size_t xnn_init_qs8_mul_minmax_fp32_sse4_params(
5653 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5654 int8_t a_zero_point,
5655 int8_t b_zero_point,
5656 int8_t output_zero_point,
5657 float product_output_scale,
5658 int8_t output_min,
5659 int8_t output_max)
5660{
5661 assert(product_output_scale >= 0x1.0p-16f);
5662 assert(product_output_scale < 0x1.0p+8f);
5663
5664 for (uint32_t i = 0; i < 8; i++) {
5665 params->fp32_sse4.a_zero_point[i] = (int16_t) a_zero_point;
5666 params->fp32_sse4.b_zero_point[i] = (int16_t) b_zero_point;
5667 }
5668 for (uint32_t i = 0; i < 4; i++) {
5669 params->fp32_sse4.scale[i] = product_output_scale;
5670 }
5671 for (uint32_t i = 0; i < 8; i++) {
5672 params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
5673 }
5674 for (uint32_t i = 0; i < 16; i++) {
5675 params->fp32_sse4.output_min[i] = output_min;
5676 params->fp32_sse4.output_max[i] = output_max;
5677 }
5678 return sizeof(params->fp32_sse4);
5679}
5680#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5681
5682#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5683size_t xnn_init_qs8_mul_minmax_fp32_wasmsimd_params(
5684 union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5685 int8_t a_zero_point,
5686 int8_t b_zero_point,
5687 int8_t output_zero_point,
5688 float product_output_scale,
5689 int8_t output_min,
5690 int8_t output_max)
5691{
5692 assert(product_output_scale >= 0x1.0p-16f);
5693 assert(product_output_scale < 0x1.0p+8f);
5694
5695 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5696 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5697 const int32_t magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5698 for (uint32_t i = 0; i < 4; i++) {
5699 params->fp32_wasmsimd.a_zero_point[i] = (int16_t) a_zero_point;
5700 params->fp32_wasmsimd.b_zero_point[i] = (int16_t) b_zero_point;
5701 }
5702 for (uint32_t i = 0; i < 2; i++) {
5703 params->fp32_wasmsimd.scale[i] = product_output_scale;
5704 params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
5705 params->fp32_wasmsimd.magic_min[i] = magic_min;
5706 params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_output_zero_point;
5707 }
5708 for (uint32_t i = 0; i < 8; i++) {
5709 params->fp32_wasmsimd.output_max[i] = output_max;
5710 }
5711 return sizeof(params->fp32_wasmsimd);
5712}
5713#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5714
5715size_t xnn_init_f16_f32_cvt_scalar_params(
5716 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5717{
5718 params->scalar.sign_mask = UINT32_C(0x80000000);
5719 params->scalar.exp_offset = UINT32_C(0x70000000);
5720 params->scalar.exp_scale = 0x1.0p-112f;
5721 params->scalar.magic_mask = UINT32_C(0x3F000000);
5722 params->scalar.magic_bias = 0.5f;
5723 params->scalar.denorm_cutoff = UINT32_C(0x08000000);
5724 return sizeof(params->scalar);
5725}
5726
5727#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5728size_t xnn_init_f16_f32_cvt_neon_params(
5729 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5730{
5731 params->neon.exp_scale = 0x1.0p-112f;
5732 return sizeof(params->neon);
5733}
5734#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5735
5736#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5737size_t xnn_init_f16_f32_cvt_sse_int16_params(
5738 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5739{
5740 for (uint32_t i = 0; i < 8; i++) {
5741 params->sse_int16.sign_mask[i] = UINT16_C(0x8000);
5742 params->sse_int16.exp_offset[i] = UINT16_C(0x7000);
5743 }
5744 for (uint32_t i = 0; i < 4; i++) {
5745 params->sse_int16.exp_scale[i] = 0x1.0p-112f;
5746 }
5747 for (uint32_t i = 0; i < 8; i++) {
5748 params->sse_int16.magic_mask[i] = UINT16_C(0x3F00);
5749 }
5750 for (uint32_t i = 0; i < 4; i++) {
5751 params->sse_int16.magic_bias[i] = 0.5f;
5752 }
5753 for (uint32_t i = 0; i < 8; i++) {
5754 params->sse_int16.denorm_cutoff[i] = INT16_C(0x0400);
5755 }
5756 return sizeof(params->sse_int16);
5757}
5758
5759size_t xnn_init_f16_f32_cvt_sse_int32_params(
5760 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5761{
5762 for (uint32_t i = 0; i < 4; i++) {
5763 params->sse_int32.sign_mask[i] = UINT32_C(0x80000000);
5764 params->sse_int32.exp_offset[i] = UINT32_C(0x70000000);
5765 params->sse_int32.exp_scale[i] = 0x1.0p-112f;
5766 params->sse_int32.magic_bias[i] = UINT32_C(0x3F000000);
5767 params->sse_int32.denorm_cutoff[i] = INT32_C(0x04000000);
5768 }
5769 return sizeof(params->sse_int32);
5770}
5771#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5772
5773#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5774size_t xnn_init_f16_f32_cvt_wasmsimd_int16_params(
5775 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5776{
5777 for (uint32_t i = 0; i < 4; i++) {
5778 params->wasmsimd_int16.sign_mask[i] = UINT16_C(0x8000);
5779 params->wasmsimd_int16.exp_offset[i] = UINT16_C(0x7000);
5780 }
5781 for (uint32_t i = 0; i < 2; i++) {
5782 params->wasmsimd_int16.exp_scale[i] = 0x1.0p-112f;
5783 }
5784 for (uint32_t i = 0; i < 4; i++) {
5785 params->wasmsimd_int16.magic_mask[i] = UINT16_C(0x3F00);
5786 }
5787 for (uint32_t i = 0; i < 2; i++) {
5788 params->wasmsimd_int16.magic_bias[i] = 0.5f;
5789 }
5790 for (uint32_t i = 0; i < 4; i++) {
5791 params->wasmsimd_int16.denorm_cutoff[i] = INT16_C(0x0400);
5792 }
5793 return sizeof(params->wasmsimd_int16);
5794}
5795
5796size_t xnn_init_f16_f32_cvt_wasmsimd_int32_params(
5797 union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5798{
5799 for (uint32_t i = 0; i < 2; i++) {
5800 params->wasmsimd_int32.sign_mask[i] = UINT32_C(0x80000000);
5801 params->wasmsimd_int32.exp_offset[i] = UINT32_C(0x70000000);
5802 params->wasmsimd_int32.exp_scale[i] = 0x1.0p-112f;
5803 params->wasmsimd_int32.magic_bias[i] = UINT32_C(0x3F000000);
5804 params->wasmsimd_int32.denorm_cutoff[i] = INT32_C(0x04000000);
5805 }
5806 return sizeof(params->wasmsimd_int32);
5807}
5808#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5809
5810size_t xnn_init_f32_f16_cvt_scalar_bitcast_params(
5811 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5812{
5813 params->scalar_bitcast.nonsign_mask = UINT32_C(0x7FFFFFFF);
5814 params->scalar_bitcast.exp_bias = UINT32_C(0x07800000);
5815 params->scalar_bitcast.scale_to_inf = 0x1.0p+112f;
5816 params->scalar_bitcast.expw_max = UINT32_C(0x7F800000);
5817 params->scalar_bitcast.scale_to_zero = 0x1.0p-110f;
5818 params->scalar_bitcast.bias_min = UINT32_C(0x40000000);
5819 params->scalar_bitcast.exph_mask = UINT16_C(0x7C00);
5820 params->scalar_bitcast.manth_mask = UINT16_C(0x0FFF);
5821 params->scalar_bitcast.nanh = UINT16_C(0x7E00);
5822 return sizeof(params->scalar_bitcast);
5823}
5824
5825size_t xnn_init_f32_f16_cvt_scalar_fabsf_params(
5826 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5827{
5828 params->scalar_fabsf.scale_to_inf = 0x1.0p+112f;
5829 params->scalar_fabsf.exp_bias = UINT32_C(0x07800000);
5830 params->scalar_fabsf.scale_to_zero = 0x1.0p-110f;
5831 params->scalar_fabsf.expw_max = UINT32_C(0x7F800000);
5832 params->scalar_fabsf.bias_min = UINT32_C(0x40000000);
5833 params->scalar_fabsf.exph_mask = UINT16_C(0x7C00);
5834 params->scalar_fabsf.manth_mask = UINT16_C(0x0FFF);
5835 params->scalar_fabsf.nanh = UINT16_C(0x7E00);
5836 return sizeof(params->scalar_fabsf);
5837}
5838
5839#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5840size_t xnn_init_f32_f16_cvt_neon_params(
5841 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5842{
5843 params->neon.exp_bias = UINT32_C(0x07800000);
5844 params->neon.scale_to_inf = 0x1.0p+112f;
5845 params->neon.expw_max = UINT32_C(0x7F800000);
5846 params->neon.scale_to_zero = 0x1.0p-110f;
5847 return sizeof(params->neon);
5848}
5849#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5850
5851#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5852size_t xnn_init_f32_f16_cvt_sse2_params(
5853 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5854{
5855 for (uint32_t i = 0; i < 4; i++) {
5856 params->sse2.nonsign_mask[i] = UINT32_C(0x7FFFFFFF);
5857 params->sse2.exp_bias[i] = UINT32_C(0x07800000);
5858 params->sse2.scale_to_inf[i] = 0x1.0p+112f;
5859 params->sse2.expw_max[i] = UINT32_C(0x7F800000);
5860 params->sse2.scale_to_zero[i] = 0x1.0p-110f;
5861 }
5862 params->sse2.bias_min[0] = INT16_C(0x8000);
5863 params->sse2.bias_min[1] = INT16_C(0x4000);
5864 params->sse2.bias_min[2] = INT16_C(0x8000);
5865 params->sse2.bias_min[3] = INT16_C(0x4000);
5866 params->sse2.bias_min[4] = INT16_C(0x8000);
5867 params->sse2.bias_min[5] = INT16_C(0x4000);
5868 params->sse2.bias_min[6] = INT16_C(0x8000);
5869 params->sse2.bias_min[7] = INT16_C(0x4000);
5870 for (uint32_t i = 0; i < 4; i++) {
5871 params->sse2.manth_mask[i] = UINT32_C(0x00000FFF);
5872 params->sse2.exph_mask[i] = UINT32_C(0x00007C00);
5873 }
5874 for (uint32_t i = 0; i < 8; i++) {
5875 params->sse2.nanh[i] = UINT16_C(0x7E00);
5876 }
5877 return sizeof(params->sse2);
5878}
5879
5880size_t xnn_init_f32_f16_cvt_f16c_params(
5881 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5882{
5883 for (uint32_t i = 0; i < 7; i++) {
5884 params->f16c.mask_table[i] = -1;
5885 }
5886 for (uint32_t i = 7; i < 14; i++) {
5887 params->f16c.mask_table[i] = 0;
5888 }
5889 return sizeof(params->f16c);
5890}
5891#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5892
5893#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5894size_t xnn_init_f32_f16_cvt_wasmsimd_params(
5895 union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5896{
5897 for (uint32_t i = 0; i < 2; i++) {
5898 params->wasmsimd.exp_bias[i] = UINT32_C(0x07800000);
5899 params->wasmsimd.scale_to_inf[i] = 0x1.0p+112f;
5900 params->wasmsimd.expw_max[i] = UINT32_C(0x7F800000);
5901 params->wasmsimd.scale_to_zero[i] = 0x1.0p-110f;
5902 }
5903 params->wasmsimd.bias_min[0] = INT16_C(0x8000);
5904 params->wasmsimd.bias_min[1] = INT16_C(0x4000);
5905 params->wasmsimd.bias_min[2] = INT16_C(0x8000);
5906 params->wasmsimd.bias_min[3] = INT16_C(0x4000);
5907 for (uint32_t i = 0; i < 2; i++) {
5908 params->wasmsimd.manth_mask[i] = UINT32_C(0x00000FFF);
5909 params->wasmsimd.exph_mask[i] = UINT32_C(0x00007C00);
5910 }
5911 for (uint32_t i = 0; i < 4; i++) {
5912 params->wasmsimd.nanh[i] = UINT16_C(0x7E00);
5913 }
5914 return sizeof(params->wasmsimd);
5915}
5916#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5917
5918size_t xnn_init_f32_qs8_cvt_scalar_fmagic_params(
5919 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5920 float scale,
5921 int8_t output_zero_point,
5922 int8_t output_min,
5923 int8_t output_max)
5924{
5925 params->scalar_fmagic.scale = scale;
5926 params->scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5927 params->scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5928 params->scalar_fmagic.magic_bias = 12582912.0f;
5929 params->scalar_fmagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5930 return sizeof(params->scalar_fmagic);
5931}
5932
5933size_t xnn_init_f32_qs8_cvt_scalar_imagic_params(
5934 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5935 float scale,
5936 int8_t output_zero_point,
5937 int8_t output_min,
5938 int8_t output_max)
5939{
5940 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5941 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5942 params->scalar_imagic.scale = scale;
5943 params->scalar_imagic.magic_bias = 12582912.0f;
5944 params->scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5945 params->scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
5946 params->scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5947 return sizeof(params->scalar_imagic);
5948}
5949
5950size_t xnn_init_f32_qs8_cvt_scalar_lrintf_params(
5951 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5952 float scale,
5953 int8_t output_zero_point,
5954 int8_t output_min,
5955 int8_t output_max)
5956{
5957 params->scalar_lrintf.scale = scale;
5958 params->scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5959 params->scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5960 params->scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
5961 return sizeof(params->scalar_lrintf);
5962}
5963
5964#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5965size_t xnn_init_f32_qs8_cvt_neon_params(
5966 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5967 float scale,
5968 int8_t output_zero_point,
5969 int8_t output_min,
5970 int8_t output_max)
5971{
5972 params->neon.scale = scale;
5973 params->neon.magic_bias = 12582912.0f;
5974 params->neon.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5975 params->neon.output_min = output_min;
5976 params->neon.output_max = output_max;
5977 return sizeof(params->neon);
5978}
5979
5980size_t xnn_init_f32_qs8_cvt_neonv8_params(
5981 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5982 float scale,
5983 int8_t output_zero_point,
5984 int8_t output_min,
5985 int8_t output_max)
5986{
5987 params->neonv8.scale = scale;
5988 params->neonv8.output_zero_point = (int16_t) output_zero_point;
5989 params->neonv8.output_min = output_min;
5990 params->neonv8.output_max = output_max;
5991 return sizeof(params->neonv8);
5992}
5993#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5994
5995#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5996size_t xnn_init_f32_qs8_cvt_sse2_params(
5997 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5998 float scale,
5999 int8_t output_zero_point,
6000 int8_t output_min,
6001 int8_t output_max)
6002{
6003 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6004 for (uint32_t i = 0; i < 4; i++) {
6005 params->sse2.scale[i] = scale;
6006 params->sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
6007 }
6008 for (uint32_t i = 0; i < 8; i++) {
6009 params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
6010 params->sse2.output_min[i] = (int16_t) output_min;
6011 }
6012 return sizeof(params->sse2);
6013}
6014
6015size_t xnn_init_f32_qs8_cvt_sse4_params(
6016 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6017 float scale,
6018 int8_t output_zero_point,
6019 int8_t output_min,
6020 int8_t output_max)
6021{
6022 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6023 for (uint32_t i = 0; i < 4; i++) {
6024 params->sse4.scale[i] = scale;
6025 params->sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
6026 }
6027 for (uint32_t i = 0; i < 8; i++) {
6028 params->sse4.output_zero_point[i] = (int16_t) output_zero_point;
6029 }
6030 for (uint32_t i = 0; i < 16; i++) {
6031 params->sse4.output_min[i] = output_min;
6032 }
6033 return sizeof(params->sse4);
6034}
6035
6036size_t xnn_init_f32_qs8_cvt_avx_params(
6037 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6038 float scale,
6039 int8_t output_zero_point,
6040 int8_t output_min,
6041 int8_t output_max)
6042{
6043 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6044 for (uint32_t i = 0; i < 8; i++) {
6045 params->avx.scale[i] = scale;
6046 params->avx.output_max_less_zero_point[i] = output_max_less_zero_point;
6047 }
6048 for (uint32_t i = 0; i < 8; i++) {
6049 params->avx.output_zero_point[i] = (int16_t) output_zero_point;
6050 }
6051 for (uint32_t i = 0; i < 16; i++) {
6052 params->avx.output_min[i] = output_min;
6053 }
6054 for (uint32_t i = 0; i < 7; i++) {
6055 params->avx.mask_table[i] = -1;
6056 }
6057 for (uint32_t i = 7; i < 14; i++) {
6058 params->avx.mask_table[i] = 0;
6059 }
6060 return sizeof(params->avx);
6061}
6062
6063size_t xnn_init_f32_qs8_cvt_avx2_params(
6064 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6065 float scale,
6066 int8_t output_zero_point,
6067 int8_t output_min,
6068 int8_t output_max)
6069{
6070 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6071 for (uint32_t i = 0; i < 8; i++) {
6072 params->avx2.scale[i] = scale;
6073 params->avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
6074 }
6075 for (uint32_t i = 0; i < 16; i++) {
6076 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6077 }
6078 params->avx2.shuffle_mask[0] = 0;
6079 params->avx2.shuffle_mask[1] = 4;
6080 params->avx2.shuffle_mask[2] = 1;
6081 params->avx2.shuffle_mask[3] = 5;
6082 params->avx2.shuffle_mask[4] = 2;
6083 params->avx2.shuffle_mask[5] = 6;
6084 params->avx2.shuffle_mask[6] = 3;
6085 params->avx2.shuffle_mask[7] = 7;
6086 for (uint32_t i = 0; i < 32; i++) {
6087 params->avx2.output_min[i] = output_min;
6088 }
6089 for (uint32_t i = 0; i < 7; i++) {
6090 params->avx2.mask_table[i] = -1;
6091 }
6092 for (uint32_t i = 7; i < 14; i++) {
6093 params->avx2.mask_table[i] = 0;
6094 }
6095 return sizeof(params->avx2);
6096}
6097
6098size_t xnn_init_f32_qs8_cvt_avx512_params(
6099 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6100 float scale,
6101 int8_t output_zero_point,
6102 int8_t output_min,
6103 int8_t output_max)
6104{
6105 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6106 for (uint32_t i = 0; i < 16; i++) {
6107 params->avx512.scale[i] = scale;
6108 params->avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
6109 }
6110 for (uint32_t i = 0; i < 32; i++) {
6111 params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
6112 }
6113 for (uint32_t i = 0; i < 64; i++) {
6114 params->avx512.output_min[i] = output_min;
6115 }
6116 params->avx512.shuffle512_mask[0] = 0;
6117 params->avx512.shuffle512_mask[1] = 4;
6118 params->avx512.shuffle512_mask[2] = 8;
6119 params->avx512.shuffle512_mask[3] = 12;
6120 params->avx512.shuffle512_mask[4] = 1;
6121 params->avx512.shuffle512_mask[5] = 5;
6122 params->avx512.shuffle512_mask[6] = 9;
6123 params->avx512.shuffle512_mask[7] = 13;
6124 params->avx512.shuffle512_mask[8] = 2;
6125 params->avx512.shuffle512_mask[9] = 6;
6126 params->avx512.shuffle512_mask[10] = 10;
6127 params->avx512.shuffle512_mask[11] = 14;
6128 params->avx512.shuffle512_mask[12] = 3;
6129 params->avx512.shuffle512_mask[13] = 7;
6130 params->avx512.shuffle512_mask[14] = 11;
6131 params->avx512.shuffle512_mask[15] = 15;
6132 params->avx512.shuffle256_mask[0] = 0;
6133 params->avx512.shuffle256_mask[1] = 4;
6134 params->avx512.shuffle256_mask[2] = 2;
6135 params->avx512.shuffle256_mask[3] = 6;
6136 params->avx512.shuffle256_mask[4] = 1;
6137 params->avx512.shuffle256_mask[5] = 5;
6138 params->avx512.shuffle256_mask[6] = 3;
6139 params->avx512.shuffle256_mask[7] = 7;
6140 return sizeof(params->avx512);
6141}
6142#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6143
6144#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6145size_t xnn_init_f32_qs8_cvt_wasmsimd_cvt_params(
6146 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6147 float scale,
6148 int8_t output_zero_point,
6149 int8_t output_min,
6150 int8_t output_max)
6151{
6152 for (uint32_t i = 0; i < 2; i++) {
6153 params->wasmsimd_cvt.scale[i] = scale;
6154 }
6155 for (uint32_t i = 0; i < 4; i++) {
6156 params->wasmsimd_cvt.output_zero_point[i] = (int16_t) output_zero_point;
6157 }
6158 for (uint32_t i = 0; i < 8; i++) {
6159 params->wasmsimd_cvt.output_min[i] = output_min;
6160 params->wasmsimd_cvt.output_max[i] = output_max;
6161 }
6162 return sizeof(params->wasmsimd_cvt);
6163}
6164
6165size_t xnn_init_f32_qs8_cvt_wasmsimd_magic_params(
6166 union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6167 float scale,
6168 int8_t output_zero_point,
6169 int8_t output_min,
6170 int8_t output_max)
6171{
6172 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6173 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6174 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6175 for (uint32_t i = 0; i < 2; i++) {
6176 params->wasmsimd_magic.scale[i] = scale;
6177 params->wasmsimd_magic.magic_bias[i] = 12582912.0f;
6178 params->wasmsimd_magic.magic_min[i] = magic_min;
6179 params->wasmsimd_magic.magic_bias_less_zero_point[i] = magic_bias_less_zero_point;
6180 }
6181 for (uint32_t i = 0; i < 8; i++) {
6182 params->wasmsimd_magic.output_max[i] = output_max;
6183 }
6184 return sizeof(params->wasmsimd_magic);
6185}
6186#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6187
6188size_t xnn_init_f32_qu8_cvt_scalar_fmagic_params(
6189 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6190 float scale,
6191 uint8_t output_zero_point,
6192 uint8_t output_min,
6193 uint8_t output_max)
6194{
6195 params->scalar_fmagic.scale = scale;
6196 params->scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6197 params->scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6198 params->scalar_fmagic.magic_bias = 12582912.0f;
6199 params->scalar_fmagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6200 return sizeof(params->scalar_fmagic);
6201}
6202
6203size_t xnn_init_f32_qu8_cvt_scalar_imagic_params(
6204 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6205 float scale,
6206 uint8_t output_zero_point,
6207 uint8_t output_min,
6208 uint8_t output_max)
6209{
6210 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6211 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6212 params->scalar_imagic.scale = scale;
6213 params->scalar_imagic.magic_bias = 12582912.0f;
6214 params->scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6215 params->scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
6216 params->scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6217 return sizeof(params->scalar_imagic);
6218}
6219
6220size_t xnn_init_f32_qu8_cvt_scalar_lrintf_params(
6221 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6222 float scale,
6223 uint8_t output_zero_point,
6224 uint8_t output_min,
6225 uint8_t output_max)
6226{
6227 params->scalar_lrintf.scale = scale;
6228 params->scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6229 params->scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6230 params->scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
6231 return sizeof(params->scalar_lrintf);
6232}
6233
6234#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6235size_t xnn_init_f32_qu8_cvt_neon_params(
6236 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6237 float scale,
6238 uint8_t output_zero_point,
6239 uint8_t output_min,
6240 uint8_t output_max)
6241{
6242 params->neon.scale = scale;
6243 params->neon.magic_bias = 12582912.0f;
6244 params->neon.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6245 params->neon.output_min = output_min;
6246 params->neon.output_max = output_max;
6247 return sizeof(params->neon);
6248}
6249
6250size_t xnn_init_f32_qu8_cvt_neonv8_params(
6251 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6252 float scale,
6253 uint8_t output_zero_point,
6254 uint8_t output_min,
6255 uint8_t output_max)
6256{
6257 params->neonv8.scale = scale;
6258 params->neonv8.output_zero_point = (int16_t) output_zero_point;
6259 params->neonv8.output_min = output_min;
6260 params->neonv8.output_max = output_max;
6261 return sizeof(params->neonv8);
6262}
6263#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6264
6265#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6266size_t xnn_init_f32_qu8_cvt_sse2_params(
6267 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6268 float scale,
6269 uint8_t output_zero_point,
6270 uint8_t output_min,
6271 uint8_t output_max)
6272{
6273 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6274 for (uint32_t i = 0; i < 4; i++) {
6275 params->sse2.scale[i] = scale;
6276 params->sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
6277 }
6278 for (uint32_t i = 0; i < 8; i++) {
6279 params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
6280 }
6281 for (uint32_t i = 0; i < 16; i++) {
6282 params->sse2.output_min[i] = output_min;
6283 }
6284 return sizeof(params->sse2);
6285}
6286
6287size_t xnn_init_f32_qu8_cvt_avx_params(
6288 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6289 float scale,
6290 uint8_t output_zero_point,
6291 uint8_t output_min,
6292 uint8_t output_max)
6293{
6294 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6295 for (uint32_t i = 0; i < 8; i++) {
6296 params->avx.scale[i] = scale;
6297 params->avx.output_max_less_zero_point[i] = output_max_less_zero_point;
6298 }
6299 for (uint32_t i = 0; i < 8; i++) {
6300 params->avx.output_zero_point[i] = (int16_t) output_zero_point;
6301 }
6302 for (uint32_t i = 0; i < 16; i++) {
6303 params->avx.output_min[i] = output_min;
6304 }
6305 for (uint32_t i = 0; i < 7; i++) {
6306 params->avx.mask_table[i] = -1;
6307 }
6308 for (uint32_t i = 7; i < 14; i++) {
6309 params->avx.mask_table[i] = 0;
6310 }
6311 return sizeof(params->avx);
6312}
6313
6314size_t xnn_init_f32_qu8_cvt_avx2_params(
6315 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6316 float scale,
6317 uint8_t output_zero_point,
6318 uint8_t output_min,
6319 uint8_t output_max)
6320{
6321 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6322 for (uint32_t i = 0; i < 8; i++) {
6323 params->avx2.scale[i] = scale;
6324 params->avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
6325 }
6326 for (uint32_t i = 0; i < 16; i++) {
6327 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6328 }
6329 params->avx2.shuffle_mask[0] = 0;
6330 params->avx2.shuffle_mask[1] = 4;
6331 params->avx2.shuffle_mask[2] = 1;
6332 params->avx2.shuffle_mask[3] = 5;
6333 params->avx2.shuffle_mask[4] = 2;
6334 params->avx2.shuffle_mask[5] = 6;
6335 params->avx2.shuffle_mask[6] = 3;
6336 params->avx2.shuffle_mask[7] = 7;
6337 for (uint32_t i = 0; i < 32; i++) {
6338 params->avx2.output_min[i] = output_min;
6339 }
6340 for (uint32_t i = 0; i < 7; i++) {
6341 params->avx2.mask_table[i] = -1;
6342 }
6343 for (uint32_t i = 7; i < 14; i++) {
6344 params->avx2.mask_table[i] = 0;
6345 }
6346 return sizeof(params->avx2);
6347}
6348
6349size_t xnn_init_f32_qu8_cvt_avx512_params(
6350 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6351 float scale,
6352 uint8_t output_zero_point,
6353 uint8_t output_min,
6354 uint8_t output_max)
6355{
6356 const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6357 for (uint32_t i = 0; i < 16; i++) {
6358 params->avx512.scale[i] = scale;
6359 params->avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
6360 }
6361 for (uint32_t i = 0; i < 32; i++) {
6362 params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
6363 }
6364 for (uint32_t i = 0; i < 64; i++) {
6365 params->avx512.output_min[i] = output_min;
6366 }
6367 params->avx512.shuffle512_mask[0] = 0;
6368 params->avx512.shuffle512_mask[1] = 4;
6369 params->avx512.shuffle512_mask[2] = 8;
6370 params->avx512.shuffle512_mask[3] = 12;
6371 params->avx512.shuffle512_mask[4] = 1;
6372 params->avx512.shuffle512_mask[5] = 5;
6373 params->avx512.shuffle512_mask[6] = 9;
6374 params->avx512.shuffle512_mask[7] = 13;
6375 params->avx512.shuffle512_mask[8] = 2;
6376 params->avx512.shuffle512_mask[9] = 6;
6377 params->avx512.shuffle512_mask[10] = 10;
6378 params->avx512.shuffle512_mask[11] = 14;
6379 params->avx512.shuffle512_mask[12] = 3;
6380 params->avx512.shuffle512_mask[13] = 7;
6381 params->avx512.shuffle512_mask[14] = 11;
6382 params->avx512.shuffle512_mask[15] = 15;
6383 params->avx512.shuffle256_mask[0] = 0;
6384 params->avx512.shuffle256_mask[1] = 4;
6385 params->avx512.shuffle256_mask[2] = 2;
6386 params->avx512.shuffle256_mask[3] = 6;
6387 params->avx512.shuffle256_mask[4] = 1;
6388 params->avx512.shuffle256_mask[5] = 5;
6389 params->avx512.shuffle256_mask[6] = 3;
6390 params->avx512.shuffle256_mask[7] = 7;
6391 return sizeof(params->avx512);
6392}
6393#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6394
6395#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6396size_t xnn_init_f32_qu8_cvt_wasmsimd_cvt_params(
6397 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6398 float scale,
6399 uint8_t output_zero_point,
6400 uint8_t output_min,
6401 uint8_t output_max)
6402{
6403 for (uint32_t i = 0; i < 2; i++) {
6404 params->wasmsimd_cvt.scale[i] = scale;
6405 }
6406 for (uint32_t i = 0; i < 4; i++) {
6407 params->wasmsimd_cvt.output_zero_point[i] = (int16_t) output_zero_point;
6408 }
6409 for (uint32_t i = 0; i < 8; i++) {
6410 params->wasmsimd_cvt.output_min[i] = output_min;
6411 params->wasmsimd_cvt.output_max[i] = output_max;
6412 }
6413 return sizeof(params->wasmsimd_cvt);
6414}
6415
6416size_t xnn_init_f32_qu8_cvt_wasmsimd_magic_params(
6417 union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6418 float scale,
6419 uint8_t output_zero_point,
6420 uint8_t output_min,
6421 uint8_t output_max)
6422{
6423 const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6424 const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6425 const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6426 for (uint32_t i = 0; i < 2; i++) {
6427 params->wasmsimd_magic.scale[i] = scale;
6428 params->wasmsimd_magic.magic_bias[i] = 12582912.0f;
6429 params->wasmsimd_magic.magic_min[i] = magic_min;
6430 params->wasmsimd_magic.magic_bias_less_zero_point[i] = magic_bias_less_zero_point;
6431 }
6432 for (uint32_t i = 0; i < 8; i++) {
6433 params->wasmsimd_magic.output_max[i] = output_max;
6434 }
6435 return sizeof(params->wasmsimd_magic);
6436}
6437#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6438
6439size_t xnn_init_qs8_cvt_scalar_params(
6440 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6441 float input_output_scale,
6442 int8_t input_zero_point,
6443 int8_t output_zero_point)
6444{
6445 assert(input_output_scale >= 0x1.0p-8);
6446 assert(input_output_scale <= 0x1.0p+7);
6447
6448 const long multiplier = lrintf(256.0f * input_output_scale);
6449 assert(multiplier >= 1L);
6450 assert(multiplier <= 32768L);
6451 params->scalar.bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6452 params->scalar.multiplier = (int32_t) multiplier;
6453 return sizeof(params->scalar);
6454}
6455
6456#if XNN_ARCH_ARM
6457size_t xnn_init_qs8_cvt_armsimd32_params(
6458 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6459 float input_output_scale,
6460 int8_t input_zero_point,
6461 int8_t output_zero_point)
6462{
6463 assert(input_output_scale >= 0x1.0p-8);
6464 assert(input_output_scale <= 0x1.0p+7);
6465
6466 const long multiplier = lrintf(131072.0f * input_output_scale);
6467 assert(multiplier >= 512L);
6468 assert(multiplier <= 16777216L);
6469 const uint16_t minus_input_zero_point = -(int16_t) input_zero_point;
6470 params->armsimd32.minus_input_zero_point = (uint32_t) minus_input_zero_point * UINT32_C(0x00010001);
6471 params->armsimd32.multiplier = (int32_t) multiplier;
6472 params->armsimd32.bias = ((int32_t) output_zero_point << 1) + INT32_C(1);
6473 return sizeof(params->armsimd32);
6474}
6475#endif // XNN_ARCH_ARM
6476
6477#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6478size_t xnn_init_qs8_cvt_neon_params(
6479 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6480 float input_output_scale,
6481 int8_t input_zero_point,
6482 int8_t output_zero_point)
6483{
6484 assert(input_output_scale >= 0x1.0p-8);
6485 assert(input_output_scale <= 0x1.0p+7);
6486
6487 const long multiplier = lrintf(-256.0f * input_output_scale);
6488 assert(multiplier <= -1L);
6489 assert(multiplier >= -32768L);
6490 params->neon.input_zero_point = (int16_t) input_zero_point;
6491 params->neon.multiplier = (int16_t) multiplier;
6492 params->neon.output_zero_point = (int16_t) output_zero_point;
6493 return sizeof(params->neon);
6494}
6495#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6496
6497#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6498size_t xnn_init_qs8_cvt_sse2_params(
6499 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6500 float input_output_scale,
6501 int8_t input_zero_point,
6502 int8_t output_zero_point)
6503{
6504 assert(input_output_scale >= 0x1.0p-8);
6505 assert(input_output_scale <= 0x1.0p+7);
6506
6507 const long multiplier = lrintf(-256.0f * input_output_scale);
6508 assert(multiplier <= -1L);
6509 assert(multiplier >= -32768L);
6510 const int32_t bias = ((int32_t) output_zero_point << 8) + (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6511 for (uint32_t i = 0; i < 8; i++) {
6512 params->sse2.multiplier[i] = (int16_t) multiplier;
6513 }
6514 for (uint32_t i = 0; i < 4; i++) {
6515 params->sse2.bias[i] = bias;
6516 }
6517 return sizeof(params->sse2);
6518}
6519
6520size_t xnn_init_qs8_cvt_ssse3_params(
6521 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6522 float input_output_scale,
6523 int8_t input_zero_point,
6524 int8_t output_zero_point)
6525{
6526 assert(input_output_scale >= 0x1.0p-8);
6527 assert(input_output_scale <= 0x1.0p+7);
6528
6529 const long multiplier = lrintf(-256.0f * input_output_scale);
6530 assert(multiplier <= -1L);
6531 assert(multiplier >= -32768L);
6532 for (uint32_t i = 0; i < 8; i++) {
6533 params->ssse3.input_zero_point[i] = (int16_t) input_zero_point;
6534 params->ssse3.multiplier[i] = (int16_t) multiplier;
6535 params->ssse3.output_zero_point[i] = (int16_t) output_zero_point;
6536 }
6537 return sizeof(params->ssse3);
6538}
6539
6540size_t xnn_init_qs8_cvt_avx2_params(
6541 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6542 float input_output_scale,
6543 int8_t input_zero_point,
6544 int8_t output_zero_point)
6545{
6546 assert(input_output_scale >= 0x1.0p-8);
6547 assert(input_output_scale <= 0x1.0p+7);
6548
6549 const long multiplier = lrintf(-256.0f * input_output_scale);
6550 assert(multiplier <= -1L);
6551 assert(multiplier >= -32768L);
6552 for (uint32_t i = 0; i < 16; i++) {
6553 params->avx2.input_zero_point[i] = (int16_t) input_zero_point;
6554 params->avx2.multiplier[i] = (int16_t) multiplier;
6555 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6556 }
6557 return sizeof(params->avx2);
6558}
6559#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6560
6561#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6562size_t xnn_init_qs8_cvt_wasmsimd_params(
6563 union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6564 float input_output_scale,
6565 int8_t input_zero_point,
6566 int8_t output_zero_point)
6567{
6568 assert(input_output_scale >= 0x1.0p-8);
6569 assert(input_output_scale <= 0x1.0p+7);
6570
6571 const long multiplier = lrintf(-256.0f * input_output_scale);
6572 assert(multiplier <= -1L);
6573 assert(multiplier >= -32768L);
6574 for (uint32_t i = 0; i < 4; i++) {
6575 params->wasmsimd.input_zero_point[i] = (int16_t) input_zero_point;
6576 params->wasmsimd.multiplier[i] = (int16_t) multiplier;
6577 params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
6578 }
6579 return sizeof(params->wasmsimd);
6580}
6581#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6582
6583size_t xnn_init_qs8_f32_cvt_scalar_params(
6584 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6585 float scale,
6586 int8_t zero_point)
6587{
6588 params->scalar.zero_point = (int32_t) zero_point;
6589 params->scalar.scale = scale;
6590 return sizeof(params->scalar);
6591}
6592
6593#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6594size_t xnn_init_qs8_f32_cvt_neon_params(
6595 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6596 float scale,
6597 int8_t zero_point)
6598{
6599 params->neon.minus_zero_point[0] = -(int16_t) zero_point;
6600 params->neon.minus_zero_point[1] = -(int16_t) zero_point;
6601 params->neon.scale = scale;
6602 return sizeof(params->neon);
6603}
6604#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6605
6606#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6607size_t xnn_init_qs8_f32_cvt_sse2_params(
6608 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6609 float scale,
6610 int8_t zero_point)
6611{
6612 for (uint32_t i = 0; i < 16; i++) {
6613 params->sse2.sign_mask[i] = UINT8_C(0x80);
6614 }
6615 for (uint32_t i = 0; i < 8; i++) {
6616 params->sse2.magic_exp[i] = UINT16_C(0x4B00);
6617 }
6618 const float magic_bias = (float) (INT32_C(0x00800080) + (int32_t) zero_point);
6619 for (uint32_t i = 0; i < 4; i++) {
6620 params->sse2.magic_bias[i] = magic_bias;
6621 params->sse2.scale[i] = scale;
6622 }
6623 return sizeof(params->sse2);
6624}
6625
6626size_t xnn_init_qs8_f32_cvt_sse4_params(
6627 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6628 float scale,
6629 int8_t zero_point)
6630{
6631 for (uint32_t i = 0; i < 4; i++) {
6632 params->sse4.minus_zero_point[i] = -(int32_t) zero_point;
6633 params->sse4.scale[i] = scale;
6634 }
6635 return sizeof(params->sse4);
6636}
6637
6638size_t xnn_init_qs8_f32_cvt_avx_params(
6639 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6640 float scale,
6641 int8_t zero_point)
6642{
6643 for (uint32_t i = 0; i < 8; i++) {
6644 params->avx.minus_zero_point[i] = -(int32_t) zero_point;
6645 params->avx.scale[i] = scale;
6646 }
6647 return sizeof(params->avx);
6648}
6649
6650size_t xnn_init_qs8_f32_cvt_avx512_params(
6651 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6652 float scale,
6653 int8_t zero_point)
6654{
6655 for (uint32_t i = 0; i < 16; i++) {
6656 params->avx512.minus_zero_point[i] = -(int32_t) zero_point;
6657 params->avx512.scale[i] = scale;
6658 }
6659 return sizeof(params->avx512);
6660}
6661#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6662
6663#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6664size_t xnn_init_qs8_f32_cvt_wasmsimd_params(
6665 union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6666 float scale,
6667 int8_t zero_point)
6668{
6669 for (uint32_t i = 0; i < 4; i++) {
6670 params->wasmsimd.minus_zero_point[i] = -(int16_t) zero_point;
6671 }
6672 for (uint32_t i = 0; i < 2; i++) {
6673 params->wasmsimd.scale[i] = scale;
6674 }
6675 return sizeof(params->wasmsimd);
6676}
6677#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6678
6679size_t xnn_init_qu8_cvt_scalar_params(
6680 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6681 float input_output_scale,
6682 uint8_t input_zero_point,
6683 uint8_t output_zero_point)
6684{
6685 assert(input_output_scale >= 0x1.0p-8);
6686 assert(input_output_scale <= 0x1.0p+7);
6687
6688 const long multiplier = lrintf(256.0f * input_output_scale);
6689 assert(multiplier >= 1L);
6690 assert(multiplier <= 32768L);
6691 params->scalar.bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6692 params->scalar.multiplier = (int32_t) multiplier;
6693 return sizeof(params->scalar);
6694}
6695
6696#if XNN_ARCH_ARM
6697size_t xnn_init_qu8_cvt_armsimd32_params(
6698 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6699 float input_output_scale,
6700 uint8_t input_zero_point,
6701 uint8_t output_zero_point)
6702{
6703 assert(input_output_scale >= 0x1.0p-8);
6704 assert(input_output_scale <= 0x1.0p+7);
6705
6706 const long multiplier = lrintf(131072.0f * input_output_scale);
6707 assert(multiplier >= 512L);
6708 assert(multiplier <= 16777216L);
6709 const uint16_t minus_input_zero_point = -(int16_t) input_zero_point;
6710 params->armsimd32.minus_input_zero_point = (uint32_t) minus_input_zero_point * UINT32_C(0x00010001);
6711 params->armsimd32.multiplier = (int32_t) multiplier;
6712 params->armsimd32.bias = ((int32_t) output_zero_point << 1) + INT32_C(1);
6713 return sizeof(params->armsimd32);
6714}
6715#endif // XNN_ARCH_ARM
6716
6717#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6718size_t xnn_init_qu8_cvt_neon_params(
6719 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6720 float input_output_scale,
6721 uint8_t input_zero_point,
6722 uint8_t output_zero_point)
6723{
6724 assert(input_output_scale >= 0x1.0p-8);
6725 assert(input_output_scale <= 0x1.0p+7);
6726
6727 const long multiplier = lrintf(-256.0f * input_output_scale);
6728 assert(multiplier <= -1L);
6729 assert(multiplier >= -32768L);
6730 params->neon.input_zero_point = (uint16_t) input_zero_point;
6731 params->neon.multiplier = (int16_t) multiplier;
6732 params->neon.output_zero_point = (int16_t) output_zero_point;
6733 return sizeof(params->neon);
6734}
6735#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6736
6737#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6738size_t xnn_init_qu8_cvt_sse2_params(
6739 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6740 float input_output_scale,
6741 uint8_t input_zero_point,
6742 uint8_t output_zero_point)
6743{
6744 assert(input_output_scale >= 0x1.0p-8);
6745 assert(input_output_scale <= 0x1.0p+7);
6746
6747 const long multiplier = lrintf(256.0f * input_output_scale);
6748 assert(multiplier >= 1L);
6749 assert(multiplier <= 32768L);
6750 const int32_t bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6751 for (uint32_t i = 0; i < 8; i++) {
6752 params->sse2.multiplier[i] = (uint16_t) multiplier;
6753 }
6754 for (uint32_t i = 0; i < 4; i++) {
6755 params->sse2.bias[i] = bias;
6756 }
6757 return sizeof(params->sse2);
6758}
6759
6760size_t xnn_init_qu8_cvt_ssse3_params(
6761 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6762 float input_output_scale,
6763 uint8_t input_zero_point,
6764 uint8_t output_zero_point)
6765{
6766 assert(input_output_scale >= 0x1.0p-8);
6767 assert(input_output_scale <= 0x1.0p+7);
6768
6769 const long multiplier = lrintf(-256.0f * input_output_scale);
6770 assert(multiplier <= -1L);
6771 assert(multiplier >= -32768L);
6772 for (uint32_t i = 0; i < 8; i++) {
6773 params->ssse3.input_zero_point[i] = (uint16_t) input_zero_point;
6774 params->ssse3.multiplier[i] = (int16_t) multiplier;
6775 params->ssse3.output_zero_point[i] = (int16_t) output_zero_point;
6776 }
6777 return sizeof(params->ssse3);
6778}
6779
6780size_t xnn_init_qu8_cvt_avx2_params(
6781 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6782 float input_output_scale,
6783 uint8_t input_zero_point,
6784 uint8_t output_zero_point)
6785{
6786 assert(input_output_scale >= 0x1.0p-8);
6787 assert(input_output_scale <= 0x1.0p+7);
6788
6789 const long multiplier = lrintf(-256.0f * input_output_scale);
6790 assert(multiplier <= -1L);
6791 assert(multiplier >= -32768L);
6792 for (uint32_t i = 0; i < 16; i++) {
6793 params->avx2.input_zero_point[i] = (uint16_t) input_zero_point;
6794 params->avx2.multiplier[i] = (int16_t) multiplier;
6795 params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6796 }
6797 return sizeof(params->avx2);
6798}
6799#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6800
6801#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6802size_t xnn_init_qu8_cvt_wasmsimd_params(
6803 union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6804 float input_output_scale,
6805 uint8_t input_zero_point,
6806 uint8_t output_zero_point)
6807{
6808 assert(input_output_scale >= 0x1.0p-8);
6809 assert(input_output_scale <= 0x1.0p+7);
6810
6811 const long multiplier = lrintf(-256.0f * input_output_scale);
6812 assert(multiplier <= -1L);
6813 assert(multiplier >= -32768L);
6814 for (uint32_t i = 0; i < 4; i++) {
6815 params->wasmsimd.input_zero_point[i] = (uint16_t) input_zero_point;
6816 params->wasmsimd.multiplier[i] = (int16_t) multiplier;
6817 params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
6818 }
6819 return sizeof(params->wasmsimd);
6820}
6821#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6822
6823size_t xnn_init_qu8_f32_cvt_scalar_params(
6824 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6825 float scale,
6826 uint8_t zero_point)
6827{
6828 params->scalar.zero_point = (int32_t) zero_point;
6829 params->scalar.scale = scale;
6830 return sizeof(params->scalar);
6831}
6832
6833#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6834size_t xnn_init_qu8_f32_cvt_neon_params(
6835 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6836 float scale,
6837 uint8_t zero_point)
6838{
6839 params->neon.minus_zero_point[0] = -(int16_t) zero_point;
6840 params->neon.minus_zero_point[1] = -(int16_t) zero_point;
6841 params->neon.scale = scale;
6842 return sizeof(params->neon);
6843}
6844#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6845
6846#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6847size_t xnn_init_qu8_f32_cvt_sse2_params(
6848 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6849 float scale,
6850 uint8_t zero_point)
6851{
6852 for (uint32_t i = 0; i < 8; i++) {
6853 params->sse2.magic_exp[i] = UINT16_C(0x4B00);
6854 }
6855 const float magic_bias = (float) (INT32_C(0x00800000) + (int32_t) zero_point);
6856 for (uint32_t i = 0; i < 4; i++) {
6857 params->sse2.magic_bias[i] = magic_bias;
6858 params->sse2.scale[i] = scale;
6859 }
6860 return sizeof(params->sse2);
6861}
6862
6863size_t xnn_init_qu8_f32_cvt_sse4_params(
6864 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6865 float scale,
6866 uint8_t zero_point)
6867{
6868 for (uint32_t i = 0; i < 4; i++) {
6869 params->sse4.minus_zero_point[i] = -(int32_t) zero_point;
6870 params->sse4.scale[i] = scale;
6871 }
6872 return sizeof(params->sse4);
6873}
6874
6875size_t xnn_init_qu8_f32_cvt_avx_params(
6876 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6877 float scale,
6878 uint8_t zero_point)
6879{
6880 for (uint32_t i = 0; i < 8; i++) {
6881 params->avx.minus_zero_point[i] = -(int32_t) zero_point;
6882 params->avx.scale[i] = scale;
6883 }
6884 return sizeof(params->avx);
6885}
6886
6887size_t xnn_init_qu8_f32_cvt_avx512_params(
6888 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6889 float scale,
6890 uint8_t zero_point)
6891{
6892 for (uint32_t i = 0; i < 16; i++) {
6893 params->avx512.minus_zero_point[i] = -(int32_t) zero_point;
6894 params->avx512.scale[i] = scale;
6895 }
6896 return sizeof(params->avx512);
6897}
6898#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6899
6900#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6901size_t xnn_init_qu8_f32_cvt_wasmsimd_params(
6902 union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6903 float scale,
6904 uint8_t zero_point)
6905{
6906 for (uint32_t i = 0; i < 4; i++) {
6907 params->wasmsimd.minus_zero_point[i] = -(int16_t) zero_point;
6908 }
6909 for (uint32_t i = 0; i < 2; i++) {
6910 params->wasmsimd.scale[i] = scale;
6911 }
6912 return sizeof(params->wasmsimd);
6913}
6914#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6915