1// Copyright 2022 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7#include <stddef.h>
8
9#ifdef _WIN32
10 #include <windows.h>
11#else
12 #include <pthread.h>
13#endif
14
15#include <xnnpack/common.h>
16#include <xnnpack/config.h>
17#include <xnnpack/microparams-init.h>
18#include <xnnpack/vbinary.h>
19#include <xnnpack/vadd.h>
20#include <xnnpack/vmul.h>
21
22static struct xnn_binary_elementwise_config f16_vadd_config = {0};
23static struct xnn_binary_elementwise_config f16_vdiv_config = {0};
24static struct xnn_binary_elementwise_config f16_vmax_config = {0};
25static struct xnn_binary_elementwise_config f16_vmin_config = {0};
26static struct xnn_binary_elementwise_config f16_vmul_config = {0};
27static struct xnn_binary_elementwise_config f16_vsub_config = {0};
28static struct xnn_binary_elementwise_config f16_vsqrdiff_config = {0};
29
30static struct xnn_binary_elementwise_config f32_vadd_config = {0};
31static struct xnn_binary_elementwise_config f32_vdiv_config = {0};
32static struct xnn_binary_elementwise_config f32_vmax_config = {0};
33static struct xnn_binary_elementwise_config f32_vmin_config = {0};
34static struct xnn_binary_elementwise_config f32_vmul_config = {0};
35static struct xnn_binary_elementwise_config f32_vsub_config = {0};
36static struct xnn_binary_elementwise_config f32_vsqrdiff_config = {0};
37
38static struct xnn_binary_elementwise_config qs8_vadd_config = {0};
39static struct xnn_binary_elementwise_config qs8_vmul_config = {0};
40
41static struct xnn_binary_elementwise_config qu8_vadd_config = {0};
42static struct xnn_binary_elementwise_config qu8_vmul_config = {0};
43
44#if XNN_PLATFORM_WINDOWS
45 static INIT_ONCE init_guard_f16_vadd = INIT_ONCE_STATIC_INIT;
46 static INIT_ONCE init_guard_f16_vdiv = INIT_ONCE_STATIC_INIT;
47 static INIT_ONCE init_guard_f16_vmax = INIT_ONCE_STATIC_INIT;
48 static INIT_ONCE init_guard_f16_vmin = INIT_ONCE_STATIC_INIT;
49 static INIT_ONCE init_guard_f16_vmul = INIT_ONCE_STATIC_INIT;
50 static INIT_ONCE init_guard_f16_vsub = INIT_ONCE_STATIC_INIT;
51 static INIT_ONCE init_guard_f16_vsqrdiff = INIT_ONCE_STATIC_INIT;
52 static INIT_ONCE init_guard_f32_vadd = INIT_ONCE_STATIC_INIT;
53 static INIT_ONCE init_guard_f32_vdiv = INIT_ONCE_STATIC_INIT;
54 static INIT_ONCE init_guard_f32_vmax = INIT_ONCE_STATIC_INIT;
55 static INIT_ONCE init_guard_f32_vmin = INIT_ONCE_STATIC_INIT;
56 static INIT_ONCE init_guard_f32_vmul = INIT_ONCE_STATIC_INIT;
57 static INIT_ONCE init_guard_f32_vsub = INIT_ONCE_STATIC_INIT;
58 static INIT_ONCE init_guard_f32_vsqrdiff = INIT_ONCE_STATIC_INIT;
59 static INIT_ONCE init_guard_qs8_vadd = INIT_ONCE_STATIC_INIT;
60 static INIT_ONCE init_guard_qs8_vmul = INIT_ONCE_STATIC_INIT;
61 static INIT_ONCE init_guard_qu8_vadd = INIT_ONCE_STATIC_INIT;
62 static INIT_ONCE init_guard_qu8_vmul = INIT_ONCE_STATIC_INIT;
63#else
64 static pthread_once_t init_guard_f16_vadd = PTHREAD_ONCE_INIT;
65 static pthread_once_t init_guard_f16_vdiv = PTHREAD_ONCE_INIT;
66 static pthread_once_t init_guard_f16_vmax = PTHREAD_ONCE_INIT;
67 static pthread_once_t init_guard_f16_vmin = PTHREAD_ONCE_INIT;
68 static pthread_once_t init_guard_f16_vmul = PTHREAD_ONCE_INIT;
69 static pthread_once_t init_guard_f16_vsub = PTHREAD_ONCE_INIT;
70 static pthread_once_t init_guard_f16_vsqrdiff = PTHREAD_ONCE_INIT;
71 static pthread_once_t init_guard_f32_vadd = PTHREAD_ONCE_INIT;
72 static pthread_once_t init_guard_f32_vdiv = PTHREAD_ONCE_INIT;
73 static pthread_once_t init_guard_f32_vmax = PTHREAD_ONCE_INIT;
74 static pthread_once_t init_guard_f32_vmin = PTHREAD_ONCE_INIT;
75 static pthread_once_t init_guard_f32_vmul = PTHREAD_ONCE_INIT;
76 static pthread_once_t init_guard_f32_vsub = PTHREAD_ONCE_INIT;
77 static pthread_once_t init_guard_f32_vsqrdiff = PTHREAD_ONCE_INIT;
78 static pthread_once_t init_guard_qs8_vadd = PTHREAD_ONCE_INIT;
79 static pthread_once_t init_guard_qs8_vmul = PTHREAD_ONCE_INIT;
80 static pthread_once_t init_guard_qu8_vadd = PTHREAD_ONCE_INIT;
81 static pthread_once_t init_guard_qu8_vmul = PTHREAD_ONCE_INIT;
82#endif
83
84
85static void init_f16_vadd_config(void) {
86 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
87 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
88 assert(hardware_config != NULL);
89 if (hardware_config->use_arm_neon_fp16_arith) {
90 f16_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16;
91 f16_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16;
92 f16_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16;
93 f16_vadd_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
94 f16_vadd_config.element_tile = 16;
95 }
96 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
97 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
98 assert(hardware_config != NULL);
99 if (hardware_config->use_arm_neon_fp16_arith) {
100 f16_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vadd_minmax_ukernel__neonfp16arith_x16;
101 f16_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16;
102 f16_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__neonfp16arith_x16;
103 f16_vadd_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
104 f16_vadd_config.element_tile = 16;
105 }
106 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
107 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
108 assert(hardware_config != NULL);
109 if (hardware_config->use_x86_avx2) {
110 f16_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vadd_minmax_ukernel__f16c_x16;
111 f16_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__f16c_x16;
112 f16_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vaddc_minmax_ukernel__f16c_x16;
113 f16_vadd_config.init.f16_minmax = xnn_init_f16_minmax_avx_params;
114 f16_vadd_config.element_tile = 16;
115 }
116 #endif
117}
118
119static void init_f16_vdiv_config(void) {
120 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
121 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
122 assert(hardware_config != NULL);
123 if (hardware_config->use_arm_neon_fp16_arith) {
124 f16_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdiv_minmax_ukernel__fp16arith_x2;
125 f16_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdivc_minmax_ukernel__fp16arith_x2;
126 f16_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrdivc_minmax_ukernel__fp16arith_x2;
127 f16_vdiv_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
128 f16_vdiv_config.element_tile = 2;
129 }
130 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
131 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
132 assert(hardware_config != NULL);
133 if (hardware_config->use_arm_neon_fp16_arith) {
134 f16_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdiv_minmax_ukernel__aarch64_neonfp16arith_x8;
135 f16_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdivc_minmax_ukernel__aarch64_neonfp16arith_x8;
136 f16_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrdivc_minmax_ukernel__aarch64_neonfp16arith_x8;
137 f16_vdiv_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
138 f16_vdiv_config.element_tile = 8;
139 }
140 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
141 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
142 assert(hardware_config != NULL);
143 if (hardware_config->use_x86_avx2) {
144 f16_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdiv_minmax_ukernel__f16c_x8;
145 f16_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vdivc_minmax_ukernel__f16c_x8;
146 f16_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrdivc_minmax_ukernel__f16c_x8;
147 f16_vdiv_config.init.f16_minmax = xnn_init_f16_minmax_avx_params;
148 f16_vdiv_config.element_tile = 8;
149 }
150 #endif
151}
152
153static void init_f16_vmax_config(void) {
154 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
155 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
156 assert(hardware_config != NULL);
157 if (hardware_config->use_arm_neon_fp16_arith) {
158 f16_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmax_ukernel__neonfp16arith_x16;
159 f16_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__neonfp16arith_x16;
160 f16_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__neonfp16arith_x16;
161 f16_vmax_config.element_tile = 16;
162 }
163 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
164 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
165 assert(hardware_config != NULL);
166 if (hardware_config->use_arm_neon_fp16_arith) {
167 f16_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmax_ukernel__neonfp16arith_x16;
168 f16_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__neonfp16arith_x16;
169 f16_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__neonfp16arith_x16;
170 f16_vmax_config.element_tile = 16;
171 }
172 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
173 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
174 assert(hardware_config != NULL);
175 if (hardware_config->use_x86_avx2) {
176 f16_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmax_ukernel__f16c_x16;
177 f16_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__f16c_x16;
178 f16_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmaxc_ukernel__f16c_x16;
179 f16_vmax_config.element_tile = 16;
180 }
181 #endif
182}
183
184static void init_f16_vmin_config(void) {
185 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
186 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
187 assert(hardware_config != NULL);
188 if (hardware_config->use_arm_neon_fp16_arith) {
189 f16_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmin_ukernel__neonfp16arith_x16;
190 f16_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__neonfp16arith_x16;
191 f16_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__neonfp16arith_x16;
192 f16_vmin_config.element_tile = 16;
193 }
194 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
195 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
196 assert(hardware_config != NULL);
197 if (hardware_config->use_arm_neon_fp16_arith) {
198 f16_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmin_ukernel__neonfp16arith_x16;
199 f16_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__neonfp16arith_x16;
200 f16_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__neonfp16arith_x16;
201 f16_vmin_config.element_tile = 16;
202 }
203 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
204 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
205 assert(hardware_config != NULL);
206 if (hardware_config->use_x86_avx2) {
207 f16_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmin_ukernel__f16c_x16;
208 f16_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__f16c_x16;
209 f16_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vminc_ukernel__f16c_x16;
210 f16_vmin_config.element_tile = 16;
211 }
212 #endif
213}
214
215static void init_f16_vmul_config(void) {
216 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
217 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
218 assert(hardware_config != NULL);
219 if (hardware_config->use_arm_neon_fp16_arith) {
220 f16_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16;
221 f16_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16;
222 f16_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16;
223 f16_vmul_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
224 f16_vmul_config.element_tile = 16;
225 }
226 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
227 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
228 assert(hardware_config != NULL);
229 if (hardware_config->use_arm_neon_fp16_arith) {
230 f16_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmul_minmax_ukernel__neonfp16arith_x16;
231 f16_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16;
232 f16_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__neonfp16arith_x16;
233 f16_vmul_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
234 f16_vmul_config.element_tile = 16;
235 }
236 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
237 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
238 assert(hardware_config != NULL);
239 if (hardware_config->use_x86_avx2) {
240 f16_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmul_minmax_ukernel__f16c_x16;
241 f16_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__f16c_x16;
242 f16_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vmulc_minmax_ukernel__f16c_x16;
243 f16_vmul_config.init.f16_minmax = xnn_init_f16_minmax_avx_params;
244 f16_vmul_config.element_tile = 16;
245 }
246 #endif
247}
248
249static void init_f16_vsub_config(void) {
250 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
251 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
252 assert(hardware_config != NULL);
253 if (hardware_config->use_arm_neon_fp16_arith) {
254 f16_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16;
255 f16_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsubc_minmax_ukernel__neonfp16arith_x16;
256 f16_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrsubc_minmax_ukernel__neonfp16arith_x16;
257 f16_vsub_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
258 f16_vsub_config.element_tile = 16;
259 }
260 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
261 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
262 assert(hardware_config != NULL);
263 if (hardware_config->use_arm_neon_fp16_arith) {
264 f16_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsub_minmax_ukernel__neonfp16arith_x16;
265 f16_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsubc_minmax_ukernel__neonfp16arith_x16;
266 f16_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrsubc_minmax_ukernel__neonfp16arith_x16;
267 f16_vsub_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
268 f16_vsub_config.element_tile = 16;
269 }
270 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
271 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
272 assert(hardware_config != NULL);
273 if (hardware_config->use_x86_avx2) {
274 f16_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsub_minmax_ukernel__f16c_x16;
275 f16_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsubc_minmax_ukernel__f16c_x16;
276 f16_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vrsubc_minmax_ukernel__f16c_x16;
277 f16_vsub_config.init.f16_minmax = xnn_init_f16_minmax_avx_params;
278 f16_vsub_config.element_tile = 16;
279 }
280 #endif
281}
282
283static void init_f16_vsqrdiff_config(void) {
284 #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR
285 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
286 assert(hardware_config != NULL);
287 if (hardware_config->use_arm_neon_fp16_arith) {
288 f16_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16;
289 f16_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16;
290 f16_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16;
291 f16_vsqrdiff_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
292 f16_vsqrdiff_config.element_tile = 16;
293 }
294 #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR
295 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
296 assert(hardware_config != NULL);
297 if (hardware_config->use_arm_neon_fp16_arith) {
298 f16_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiff_ukernel__neonfp16arith_x16;
299 f16_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16;
300 f16_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__neonfp16arith_x16;
301 f16_vsqrdiff_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params;
302 f16_vsqrdiff_config.element_tile = 16;
303 }
304 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
305 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
306 assert(hardware_config != NULL);
307 if (hardware_config->use_x86_avx2) {
308 f16_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiff_ukernel__f16c_x16;
309 f16_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__f16c_x16;
310 f16_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f16_vsqrdiffc_ukernel__f16c_x16;
311 f16_vsqrdiff_config.init.f16_minmax = xnn_init_f16_minmax_avx_params;
312 f16_vsqrdiff_config.element_tile = 16;
313 }
314 #endif
315}
316
317static void init_f32_vadd_config(void) {
318 #if XNN_ARCH_ARM
319 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
320 assert(hardware_config != NULL);
321 if (hardware_config->use_arm_neon){
322 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__neon_x8;
323 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__neon_x8;
324 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__neon_x8;
325 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
326 f32_vadd_config.element_tile = 8;
327 } else if (!XNN_PLATFORM_MOBILE) {
328 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__scalar_x8;
329 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__scalar_x8;
330 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__scalar_x8;
331 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
332 f32_vadd_config.element_tile = 8;
333 }
334 #elif XNN_ARCH_ARM64
335 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__neon_x8;
336 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__neon_x8;
337 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__neon_x8;
338 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
339 f32_vadd_config.element_tile = 8;
340 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
341 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
342 assert(hardware_config != NULL);
343 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
344 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__avx512f_x32;
345 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx512f_x32;
346 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx512f_x32;
347 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
348 f32_vadd_config.element_tile = 32;
349 } else if (hardware_config->use_x86_avx) {
350 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__avx_x16;
351 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx_x16;
352 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx_x16;
353 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_avx_params;
354 f32_vadd_config.element_tile = 16;
355 } else {
356 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__sse_x8;
357 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__sse_x8;
358 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__sse_x8;
359 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_sse_params;
360 f32_vadd_config.element_tile = 8;
361 }
362 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
363 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
364 assert(hardware_config != NULL);
365 if (hardware_config->is_x86) {
366 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__wasmsimd_x86_x16;
367 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x16;
368 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x16;
369 f32_vadd_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_ukernel__wasmsimd_x16;
370 f32_vadd_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_ukernel__wasmsimd_x16;
371 f32_vadd_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_ukernel__wasmsimd_x16;
372 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
373 f32_vadd_config.element_tile = 16;
374 } else {
375 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__wasmsimd_arm_x16;
376 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x16;
377 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x16;
378 f32_vadd_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_ukernel__wasmsimd_x16;
379 f32_vadd_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_ukernel__wasmsimd_x16;
380 f32_vadd_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_ukernel__wasmsimd_x16;
381 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
382 f32_vadd_config.element_tile = 16;
383 }
384 #elif XNN_ARCH_WASM
385 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__wasm_x8;
386 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasm_x8;
387 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__wasm_x8;
388 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
389 f32_vadd_config.element_tile = 8;
390 #elif XNN_ARCH_RISCV
391 f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__scalar_x8;
392 f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__scalar_x8;
393 f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__scalar_x8;
394 f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
395 f32_vadd_config.element_tile = 8;
396 #else
397 #error "Unsupported architecture"
398 #endif
399}
400
401static void init_f32_vdiv_config(void) {
402 #if XNN_ARCH_ARM
403 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
404 assert(hardware_config != NULL);
405 if (hardware_config->use_arm_neon){
406 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__scalar_x2;
407 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__scalar_x2;
408 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__scalar_x2;
409 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
410 f32_vdiv_config.element_tile = 2;
411 } else if (!XNN_PLATFORM_MOBILE) {
412 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__scalar_x2;
413 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__scalar_x2;
414 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__scalar_x2;
415 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
416 f32_vdiv_config.element_tile = 2;
417 }
418 #elif XNN_ARCH_ARM64
419 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__aarch64_neon_x8;
420 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__aarch64_neon_x8;
421 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__aarch64_neon_x8;
422 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
423 f32_vdiv_config.element_tile = 8;
424 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
425 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
426 assert(hardware_config != NULL);
427 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
428 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__avx512f_x32;
429 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__avx512f_x32;
430 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__avx512f_x32;
431 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
432 f32_vdiv_config.element_tile = 32;
433 } else if (hardware_config->use_x86_avx) {
434 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__avx_x16;
435 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__avx_x16;
436 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__avx_x16;
437 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_avx_params;
438 f32_vdiv_config.element_tile = 16;
439 } else {
440 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__sse_x8;
441 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__sse_x8;
442 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__sse_x8;
443 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_sse_params;
444 f32_vdiv_config.element_tile = 8;
445 }
446 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
447 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
448 assert(hardware_config != NULL);
449 if (hardware_config->is_x86) {
450 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__wasmsimd_x86_x16;
451 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x16;
452 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x16;
453 f32_vdiv_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_ukernel__wasmsimd_x16;
454 f32_vdiv_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_ukernel__wasmsimd_x16;
455 f32_vdiv_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_ukernel__wasmsimd_x16;
456 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
457 f32_vdiv_config.element_tile = 16;
458 } else {
459 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__wasmsimd_arm_x16;
460 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x16;
461 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x16;
462 f32_vdiv_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_ukernel__wasmsimd_x16;
463 f32_vdiv_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_ukernel__wasmsimd_x16;
464 f32_vdiv_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_ukernel__wasmsimd_x16;
465 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
466 f32_vdiv_config.element_tile = 16;
467 }
468 #elif XNN_ARCH_WASM
469 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__wasm_x8;
470 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__wasm_x8;
471 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__wasm_x8;
472 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
473 f32_vdiv_config.element_tile = 8;
474 #elif XNN_ARCH_RISCV
475 f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__scalar_x2;
476 f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__scalar_x2;
477 f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__scalar_x2;
478 f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
479 f32_vdiv_config.element_tile = 2;
480 #else
481 #error "Unsupported architecture"
482 #endif
483}
484
485static void init_f32_vmax_config(void) {
486 #if XNN_ARCH_ARM
487 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
488 assert(hardware_config != NULL);
489 if (hardware_config->use_arm_neon){
490 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__neon_x8;
491 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__neon_x8;
492 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__neon_x8;
493 f32_vmax_config.element_tile = 8;
494 } else if (!XNN_PLATFORM_MOBILE) {
495 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__scalar_x8;
496 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__scalar_x8;
497 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__scalar_x8;
498 f32_vmax_config.element_tile = 8;
499 }
500 #elif XNN_ARCH_ARM64
501 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__neon_x8;
502 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__neon_x8;
503 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__neon_x8;
504 f32_vmax_config.element_tile = 8;
505 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
506 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
507 assert(hardware_config != NULL);
508 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
509 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__avx512f_x32;
510 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx512f_x32;
511 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx512f_x32;
512 f32_vmax_config.element_tile = 32;
513 } else if (hardware_config->use_x86_avx) {
514 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__avx_x16;
515 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx_x16;
516 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx_x16;
517 f32_vmax_config.init.f32_default = xnn_init_f32_default_avx_params;
518 f32_vmax_config.element_tile = 16;
519 } else {
520 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__sse_x8;
521 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__sse_x8;
522 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__sse_x8;
523 f32_vmax_config.element_tile = 8;
524 }
525 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
526 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
527 assert(hardware_config != NULL);
528 if (hardware_config->is_x86) {
529 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__wasmsimd_x86_x16;
530 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16;
531 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasmsimd_x86_x16;
532 f32_vmax_config.element_tile = 16;
533 } else {
534 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__wasmsimd_arm_x16;
535 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasmsimd_arm_x16;
536 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasmsimd_arm_x16;
537 f32_vmax_config.element_tile = 16;
538 }
539 #elif XNN_ARCH_WASM
540 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__wasm_x8;
541 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasm_x8;
542 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__wasm_x8;
543 f32_vmax_config.element_tile = 8;
544 #elif XNN_ARCH_RISCV
545 f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__scalar_x8;
546 f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__scalar_x8;
547 f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__scalar_x8;
548 f32_vmax_config.element_tile = 8;
549 #else
550 #error "Unsupported architecture"
551 #endif
552}
553
554static void init_f32_vmin_config(void) {
555 #if XNN_ARCH_ARM
556 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
557 assert(hardware_config != NULL);
558 if (hardware_config->use_arm_neon){
559 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__neon_x8;
560 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__neon_x8;
561 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__neon_x8;
562 f32_vmin_config.element_tile = 8;
563 } else if (!XNN_PLATFORM_MOBILE) {
564 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__scalar_x8;
565 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__scalar_x8;
566 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__scalar_x8;
567 f32_vmin_config.element_tile = 8;
568 }
569 #elif XNN_ARCH_ARM64
570 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__neon_x8;
571 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__neon_x8;
572 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__neon_x8;
573 f32_vmin_config.element_tile = 8;
574 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
575 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
576 assert(hardware_config != NULL);
577 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
578 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__avx512f_x32;
579 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx512f_x32;
580 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx512f_x32;
581 f32_vmin_config.element_tile = 32;
582 } else if (hardware_config->use_x86_avx) {
583 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__avx_x16;
584 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx_x16;
585 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx_x16;
586 f32_vmin_config.init.f32_default = xnn_init_f32_default_avx_params;
587 f32_vmin_config.element_tile = 16;
588 } else {
589 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__sse_x8;
590 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__sse_x8;
591 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__sse_x8;
592 f32_vmin_config.element_tile = 8;
593 }
594 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
595 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
596 assert(hardware_config != NULL);
597 if (hardware_config->is_x86) {
598 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__wasmsimd_x86_x16;
599 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasmsimd_x86_x16;
600 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasmsimd_x86_x16;
601 f32_vmin_config.element_tile = 16;
602 } else {
603 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__wasmsimd_arm_x16;
604 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasmsimd_arm_x16;
605 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasmsimd_arm_x16;
606 f32_vmin_config.element_tile = 16;
607 }
608 #elif XNN_ARCH_WASM
609 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__wasm_x8;
610 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasm_x8;
611 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__wasm_x8;
612 f32_vmin_config.element_tile = 8;
613 #elif XNN_ARCH_RISCV
614 f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__scalar_x8;
615 f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__scalar_x8;
616 f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__scalar_x8;
617 f32_vmin_config.element_tile = 8;
618 #else
619 #error "Unsupported architecture"
620 #endif
621}
622
623static void init_f32_vmul_config(void) {
624 #if XNN_ARCH_ARM
625 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
626 assert(hardware_config != NULL);
627 if (hardware_config->use_arm_neon){
628 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__neon_x8;
629 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__neon_x8;
630 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__neon_x8;
631 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
632 f32_vmul_config.element_tile = 8;
633 } else if (!XNN_PLATFORM_MOBILE) {
634 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__scalar_x8;
635 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__scalar_x8;
636 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__scalar_x8;
637 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
638 f32_vmul_config.element_tile = 8;
639 }
640 #elif XNN_ARCH_ARM64
641 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__neon_x8;
642 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__neon_x8;
643 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__neon_x8;
644 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
645 f32_vmul_config.element_tile = 8;
646 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
647 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
648 assert(hardware_config != NULL);
649 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
650 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__avx512f_x32;
651 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx512f_x32;
652 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx512f_x32;
653 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
654 f32_vmul_config.element_tile = 32;
655 } else if (hardware_config->use_x86_avx) {
656 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__avx_x16;
657 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx_x16;
658 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx_x16;
659 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_avx_params;
660 f32_vmul_config.element_tile = 16;
661 } else {
662 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__sse_x8;
663 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__sse_x8;
664 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__sse_x8;
665 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_sse_params;
666 f32_vmul_config.element_tile = 8;
667 }
668 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
669 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
670 assert(hardware_config != NULL);
671 if (hardware_config->is_x86) {
672 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__wasmsimd_x86_x16;
673 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x16;
674 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x16;
675 f32_vmul_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_ukernel__wasmsimd_x16;
676 f32_vmul_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_ukernel__wasmsimd_x16;
677 f32_vmul_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_ukernel__wasmsimd_x16;
678 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
679 f32_vmul_config.element_tile = 16;
680 } else {
681 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__wasmsimd_arm_x16;
682 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x16;
683 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x16;
684 f32_vmul_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_ukernel__wasmsimd_x16;
685 f32_vmul_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_ukernel__wasmsimd_x16;
686 f32_vmul_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_ukernel__wasmsimd_x16;
687 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
688 f32_vmul_config.element_tile = 16;
689 }
690 #elif XNN_ARCH_WASM
691 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__wasm_x8;
692 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasm_x8;
693 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__wasm_x8;
694 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
695 f32_vmul_config.element_tile = 8;
696 #elif XNN_ARCH_RISCV
697 f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__scalar_x8;
698 f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__scalar_x8;
699 f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__scalar_x8;
700 f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
701 f32_vmul_config.element_tile = 8;
702 #else
703 #error "Unsupported architecture"
704 #endif
705}
706
707static void init_f32_vsub_config(void) {
708 #if XNN_ARCH_ARM
709 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
710 assert(hardware_config != NULL);
711 if (hardware_config->use_arm_neon){
712 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__neon_x8;
713 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__neon_x8;
714 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__neon_x8;
715 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
716 f32_vsub_config.element_tile = 8;
717 } else if (!XNN_PLATFORM_MOBILE) {
718 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__scalar_x8;
719 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__scalar_x8;
720 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__scalar_x8;
721 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
722 f32_vsub_config.element_tile = 8;
723 }
724 #elif XNN_ARCH_ARM64
725 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__neon_x8;
726 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__neon_x8;
727 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__neon_x8;
728 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
729 f32_vsub_config.element_tile = 8;
730 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
731 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
732 assert(hardware_config != NULL);
733 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
734 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__avx512f_x32;
735 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__avx512f_x32;
736 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__avx512f_x32;
737 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
738 f32_vsub_config.element_tile = 32;
739 } else if (hardware_config->use_x86_avx) {
740 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__avx_x16;
741 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__avx_x16;
742 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__avx_x16;
743 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_avx_params;
744 f32_vsub_config.element_tile = 16;
745 } else {
746 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__sse_x8;
747 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__sse_x8;
748 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__sse_x8;
749 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_sse_params;
750 f32_vsub_config.element_tile = 8;
751 }
752 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
753 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
754 assert(hardware_config != NULL);
755 if (hardware_config->is_x86) {
756 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__wasmsimd_x86_x16;
757 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x16;
758 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x16;
759 f32_vsub_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_ukernel__wasmsimd_x16;
760 f32_vsub_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_ukernel__wasmsimd_x16;
761 f32_vsub_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_ukernel__wasmsimd_x16;
762 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
763 f32_vsub_config.element_tile = 16;
764 } else {
765 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__wasmsimd_arm_x16;
766 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x16;
767 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x16;
768 f32_vsub_config.linear.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_ukernel__wasmsimd_x16;
769 f32_vsub_config.linear.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_ukernel__wasmsimd_x16;
770 f32_vsub_config.linear.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_ukernel__wasmsimd_x16;
771 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params;
772 f32_vsub_config.element_tile = 16;
773 }
774 #elif XNN_ARCH_WASM
775 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__wasm_x8;
776 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__wasm_x8;
777 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__wasm_x8;
778 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
779 f32_vsub_config.element_tile = 8;
780 #elif XNN_ARCH_RISCV
781 f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__scalar_x8;
782 f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__scalar_x8;
783 f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__scalar_x8;
784 f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params;
785 f32_vsub_config.element_tile = 8;
786 #else
787 #error "Unsupported architecture"
788 #endif
789}
790
791static void init_f32_vsqrdiff_config(void) {
792 #if XNN_ARCH_ARM
793 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
794 assert(hardware_config != NULL);
795 if (hardware_config->use_arm_neon){
796 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__neon_x8;
797 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__neon_x8;
798 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__neon_x8;
799 f32_vsqrdiff_config.element_tile = 8;
800 } else if (!XNN_PLATFORM_MOBILE) {
801 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__scalar_x8;
802 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
803 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
804 f32_vsqrdiff_config.element_tile = 8;
805 }
806 #elif XNN_ARCH_ARM64
807 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__neon_x8;
808 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__neon_x8;
809 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__neon_x8;
810 f32_vsqrdiff_config.element_tile = 8;
811 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
812 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
813 assert(hardware_config != NULL);
814 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) {
815 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__avx512f_x32;
816 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx512f_x32;
817 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx512f_x32;
818 f32_vsqrdiff_config.element_tile = 32;
819 } else if (hardware_config->use_x86_avx) {
820 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__avx_x16;
821 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx_x16;
822 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx_x16;
823 f32_vsqrdiff_config.init.f32_default = xnn_init_f32_default_avx_params;
824 f32_vsqrdiff_config.element_tile = 16;
825 } else {
826 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__sse_x8;
827 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__sse_x8;
828 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__sse_x8;
829 f32_vsqrdiff_config.element_tile = 8;
830 }
831 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
832 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__wasmsimd_x16;
833 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__wasmsimd_x16;
834 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__wasmsimd_x16;
835 f32_vsqrdiff_config.element_tile = 16;
836 #elif XNN_ARCH_WASM
837 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__scalar_x8;
838 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
839 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
840 f32_vsqrdiff_config.element_tile = 8;
841 #elif XNN_ARCH_RISCV
842 f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__scalar_x8;
843 f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
844 f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__scalar_x8;
845 f32_vsqrdiff_config.element_tile = 8;
846 #else
847 #error "Unsupported architecture"
848 #endif
849}
850
851static void init_qs8_vadd_config(void) {
852 #if XNN_ARCH_ARM
853 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
854 assert(hardware_config != NULL);
855 if (hardware_config->use_arm_neon){
856 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16;
857 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16;
858 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16;
859 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_neon_params;
860 qs8_vadd_config.element_tile = 16;
861 } else if (!XNN_PLATFORM_MOBILE) {
862 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__scalar_x1;
863 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x1;
864 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x1;
865 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_scalar_params;
866 qs8_vadd_config.element_tile = 1;
867 }
868 #elif XNN_ARCH_ARM64
869 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__neon_ld64_x32;
870 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32;
871 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32;
872 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_neon_params;
873 qs8_vadd_config.element_tile = 32;
874 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
875 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
876 assert(hardware_config != NULL);
877 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) {
878 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16;
879 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16;
880 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16;
881 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_avx512_params;
882 qs8_vadd_config.element_tile = 16;
883 } else if (hardware_config->use_x86_xop) {
884 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8;
885 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8;
886 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8;
887 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul32_params;
888 qs8_vadd_config.element_tile = 8;
889 } else if (hardware_config->use_x86_avx2) {
890 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x16;
891 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16;
892 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16;
893 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_avx2_params;
894 qs8_vadd_config.element_tile = 16;
895 } else if (hardware_config->use_x86_avx) {
896 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x8;
897 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8;
898 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8;
899 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul32_params;
900 qs8_vadd_config.element_tile = 8;
901 } else if (hardware_config->use_x86_sse4_1) {
902 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8;
903 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8;
904 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8;
905 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_sse4_mul16_params;
906 qs8_vadd_config.element_tile = 8;
907 } else {
908 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8;
909 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8;
910 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8;
911 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_sse2_params;
912 qs8_vadd_config.element_tile = 8;
913 }
914 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
915 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32;
916 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32;
917 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32;
918 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_wasmsimd_params;
919 qs8_vadd_config.element_tile = 32;
920 #elif XNN_ARCH_WASM
921 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__scalar_x4;
922 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x4;
923 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x4;
924 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_scalar_params;
925 qs8_vadd_config.element_tile = 4;
926 #elif XNN_ARCH_RISCV
927 qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__scalar_x4;
928 qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x4;
929 qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__scalar_x4;
930 qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_scalar_params;
931 qs8_vadd_config.element_tile = 4;
932 #else
933 #error "Unsupported architecture"
934 #endif
935}
936
937static void init_qs8_vmul_config(void) {
938 #if XNN_ARCH_ARM
939 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
940 assert(hardware_config != NULL);
941 if (hardware_config->use_arm_neon){
942 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x16;
943 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
944 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
945 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_rndnu_neon_params;
946 qs8_vmul_config.element_tile = 16;
947 } else if (!XNN_PLATFORM_MOBILE) {
948 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4;
949 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
950 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
951 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params;
952 qs8_vmul_config.element_tile = 4;
953 }
954 #elif XNN_ARCH_ARM64
955 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x16;
956 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
957 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
958 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_rndnu_neon_params;
959 qs8_vmul_config.element_tile = 16;
960 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
961 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
962 assert(hardware_config != NULL);
963 if (hardware_config->use_x86_avx) {
964 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16;
965 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16;
966 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16;
967 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse4_params;
968 qs8_vmul_config.element_tile = 16;
969 } else if (hardware_config->use_x86_sse4_1) {
970 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
971 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
972 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
973 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse4_params;
974 qs8_vmul_config.element_tile = 16;
975 } else {
976 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
977 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
978 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
979 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_sse2_params;
980 qs8_vmul_config.element_tile = 8;
981 }
982 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
983 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
984 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
985 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
986 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_wasmsimd_params;
987 qs8_vmul_config.element_tile = 8;
988 #elif XNN_ARCH_WASM
989 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4;
990 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
991 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
992 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params;
993 qs8_vmul_config.element_tile = 4;
994 #elif XNN_ARCH_RISCV
995 qs8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4;
996 qs8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
997 qs8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4;
998 qs8_vmul_config.init.qs8_mul = xnn_init_qs8_mul_minmax_fp32_scalar_params;
999 qs8_vmul_config.element_tile = 4;
1000 #else
1001 #error "Unsupported architecture"
1002 #endif
1003}
1004
1005static void init_qu8_vadd_config(void) {
1006 #if XNN_ARCH_ARM
1007 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1008 assert(hardware_config != NULL);
1009 if (hardware_config->use_arm_neon){
1010 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__neon_ld64_x16;
1011 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16;
1012 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16;
1013 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_neon_params;
1014 qu8_vadd_config.element_tile = 8;
1015 } else if (!XNN_PLATFORM_MOBILE) {
1016 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__scalar_x1;
1017 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x1;
1018 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x1;
1019 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_scalar_params;
1020 qu8_vadd_config.element_tile = 1;
1021 }
1022 #elif XNN_ARCH_ARM64
1023 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__neon_ld64_x32;
1024 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32;
1025 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32;
1026 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_neon_params;
1027 qu8_vadd_config.element_tile = 8;
1028 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
1029 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1030 assert(hardware_config != NULL);
1031 if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) {
1032 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16;
1033 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16;
1034 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16;
1035 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_avx512_params;
1036 qu8_vadd_config.element_tile = 16;
1037 } else if (hardware_config->use_x86_xop) {
1038 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__xop_mul32_ld32_x8;
1039 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8;
1040 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8;
1041 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_sse4_params;
1042 qu8_vadd_config.element_tile = 8;
1043 } else if (hardware_config->use_x86_avx2) {
1044 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_x16;
1045 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16;
1046 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16;
1047 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_avx2_params;
1048 qu8_vadd_config.element_tile = 16;
1049 } else if (hardware_config->use_x86_avx) {
1050 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__avx_mul32_ld32_x8;
1051 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8;
1052 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8;
1053 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_sse4_params;
1054 qu8_vadd_config.element_tile = 8;
1055 } else if (hardware_config->use_x86_sse4_1) {
1056 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__sse41_mul16_ld64_x8;
1057 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8;
1058 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8;
1059 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_sse2_params;
1060 qu8_vadd_config.element_tile = 8;
1061 } else {
1062 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__sse2_mul16_ld64_x8;
1063 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8;
1064 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8;
1065 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_sse2_params;
1066 qu8_vadd_config.element_tile = 8;
1067 }
1068 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1069 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32;
1070 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32;
1071 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32;
1072 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_wasmsimd_params;
1073 qu8_vadd_config.element_tile = 32;
1074 #elif XNN_ARCH_WASM
1075 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__scalar_x4;
1076 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x4;
1077 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x4;
1078 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_scalar_params;
1079 qu8_vadd_config.element_tile = 4;
1080 #elif XNN_ARCH_RISCV
1081 qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__scalar_x4;
1082 qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x4;
1083 qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__scalar_x4;
1084 qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_scalar_params;
1085 qu8_vadd_config.element_tile = 4;
1086 #else
1087 #error "Unsupported architecture"
1088 #endif
1089}
1090
1091static void init_qu8_vmul_config(void) {
1092 #if XNN_ARCH_ARM
1093 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1094 assert(hardware_config != NULL);
1095 if (hardware_config->use_arm_neon){
1096 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x16;
1097 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
1098 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
1099 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_rndnu_neon_params;
1100 qu8_vmul_config.element_tile = 16;
1101 } else if (!XNN_PLATFORM_MOBILE) {
1102 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4;
1103 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1104 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1105 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params;
1106 qu8_vmul_config.element_tile = 4;
1107 }
1108 #elif XNN_ARCH_ARM64
1109 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x16;
1110 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
1111 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16;
1112 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_rndnu_neon_params;
1113 qu8_vmul_config.element_tile = 16;
1114 #elif XNN_ARCH_X86 || XNN_ARCH_X86_64
1115 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1116 assert(hardware_config != NULL);
1117 if (hardware_config->use_x86_avx) {
1118 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16;
1119 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16;
1120 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16;
1121 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params;
1122 qu8_vmul_config.element_tile = 16;
1123 } else if (hardware_config->use_x86_sse4_1) {
1124 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
1125 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
1126 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16;
1127 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params;
1128 qu8_vmul_config.element_tile = 16;
1129 } else {
1130 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
1131 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
1132 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8;
1133 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_sse2_params;
1134 qu8_vmul_config.element_tile = 8;
1135 }
1136 #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1137 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
1138 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
1139 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8;
1140 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_wasmsimd_params;
1141 qu8_vmul_config.element_tile = 8;
1142 #elif XNN_ARCH_WASM
1143 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4;
1144 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1145 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1146 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params;
1147 qu8_vmul_config.element_tile = 4;
1148 #elif XNN_ARCH_RISCV
1149 qu8_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4;
1150 qu8_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1151 qu8_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4;
1152 qu8_vmul_config.init.qu8_mul = xnn_init_qu8_mul_minmax_fp32_scalar_params;
1153 qu8_vmul_config.element_tile = 4;
1154 #else
1155 #error "Unsupported architecture"
1156 #endif
1157}
1158
1159#if XNN_PLATFORM_WINDOWS
1160 static BOOL CALLBACK init_f16_vadd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1161 init_f16_vadd_config();
1162 return TRUE;
1163 }
1164
1165 static BOOL CALLBACK init_f16_vdiv_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1166 init_f16_vdiv_config();
1167 return TRUE;
1168 }
1169
1170 static BOOL CALLBACK init_f16_vmax_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1171 init_f16_vmax_config();
1172 return TRUE;
1173 }
1174
1175 static BOOL CALLBACK init_f16_vmin_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1176 init_f16_vmin_config();
1177 return TRUE;
1178 }
1179
1180 static BOOL CALLBACK init_f16_vmul_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1181 init_f16_vmul_config();
1182 return TRUE;
1183 }
1184
1185 static BOOL CALLBACK init_f16_vsub_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1186 init_f16_vsub_config();
1187 return TRUE;
1188 }
1189
1190 static BOOL CALLBACK init_f16_vsqrdiff_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1191 init_f16_vsqrdiff_config();
1192 return TRUE;
1193 }
1194
1195 static BOOL CALLBACK init_f32_vadd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1196 init_f32_vadd_config();
1197 return TRUE;
1198 }
1199
1200 static BOOL CALLBACK init_f32_vdiv_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1201 init_f32_vdiv_config();
1202 return TRUE;
1203 }
1204
1205 static BOOL CALLBACK init_f32_vmax_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1206 init_f32_vmax_config();
1207 return TRUE;
1208 }
1209
1210 static BOOL CALLBACK init_f32_vmin_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1211 init_f32_vmin_config();
1212 return TRUE;
1213 }
1214
1215 static BOOL CALLBACK init_f32_vmul_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1216 init_f32_vmul_config();
1217 return TRUE;
1218 }
1219
1220 static BOOL CALLBACK init_f32_vsub_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1221 init_f32_vsub_config();
1222 return TRUE;
1223 }
1224
1225 static BOOL CALLBACK init_f32_vsqrdiff_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1226 init_f32_vsqrdiff_config();
1227 return TRUE;
1228 }
1229
1230 static BOOL CALLBACK init_qs8_vadd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1231 init_qs8_vadd_config();
1232 return TRUE;
1233 }
1234
1235 static BOOL CALLBACK init_qs8_vmul_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1236 init_qs8_vmul_config();
1237 return TRUE;
1238 }
1239
1240 static BOOL CALLBACK init_qu8_vadd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1241 init_qu8_vadd_config();
1242 return TRUE;
1243 }
1244
1245 static BOOL CALLBACK init_qu8_vmul_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
1246 init_qu8_vmul_config();
1247 return TRUE;
1248 }
1249#endif
1250
1251static bool is_f16_compatible_config(const struct xnn_hardware_config hardware_config[restrict XNN_MIN_ELEMENTS(1)]) {
1252 #if (XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR) || (XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR)
1253 return hardware_config->use_arm_neon_fp16_arith;
1254 #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE
1255 return hardware_config->use_x86_avx2;
1256 #else
1257 return false;
1258 #endif
1259}
1260
1261const struct xnn_binary_elementwise_config* xnn_init_f16_vadd_config() {
1262 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1263 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1264 return NULL;
1265 }
1266 #if XNN_PLATFORM_WINDOWS
1267 InitOnceExecuteOnce(&init_guard_f16_vadd, &init_f16_vadd_config_windows, NULL, NULL);
1268 #else
1269 pthread_once(&init_guard_f16_vadd, &init_f16_vadd_config);
1270 #endif
1271 return &f16_vadd_config;
1272}
1273
1274const struct xnn_binary_elementwise_config* xnn_init_f16_vdiv_config() {
1275 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1276 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1277 return NULL;
1278 }
1279 #if XNN_PLATFORM_WINDOWS
1280 InitOnceExecuteOnce(&init_guard_f16_vdiv, &init_f16_vdiv_config_windows, NULL, NULL);
1281 #else
1282 pthread_once(&init_guard_f16_vdiv, &init_f16_vdiv_config);
1283 #endif
1284 return &f16_vdiv_config;
1285}
1286
1287const struct xnn_binary_elementwise_config* xnn_init_f16_vmax_config() {
1288 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1289 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1290 return NULL;
1291 }
1292 #if XNN_PLATFORM_WINDOWS
1293 InitOnceExecuteOnce(&init_guard_f16_vmax, &init_f16_vmax_config_windows, NULL, NULL);
1294 #else
1295 pthread_once(&init_guard_f16_vmax, &init_f16_vmax_config);
1296 #endif
1297 return &f16_vmax_config;
1298}
1299
1300const struct xnn_binary_elementwise_config* xnn_init_f16_vmin_config() {
1301 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1302 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1303 return NULL;
1304 }
1305 #if XNN_PLATFORM_WINDOWS
1306 InitOnceExecuteOnce(&init_guard_f16_vmin, &init_f16_vmin_config_windows, NULL, NULL);
1307 #else
1308 pthread_once(&init_guard_f16_vmin, &init_f16_vmin_config);
1309 #endif
1310 return &f16_vmin_config;
1311}
1312
1313const struct xnn_binary_elementwise_config* xnn_init_f16_vmul_config() {
1314 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1315 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1316 return NULL;
1317 }
1318 #if XNN_PLATFORM_WINDOWS
1319 InitOnceExecuteOnce(&init_guard_f16_vmul, &init_f16_vmul_config_windows, NULL, NULL);
1320 #else
1321 pthread_once(&init_guard_f16_vmul, &init_f16_vmul_config);
1322 #endif
1323 return &f16_vmul_config;
1324}
1325
1326const struct xnn_binary_elementwise_config* xnn_init_f16_vsub_config() {
1327 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1328 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1329 return NULL;
1330 }
1331 #if XNN_PLATFORM_WINDOWS
1332 InitOnceExecuteOnce(&init_guard_f16_vsub, &init_f16_vsub_config_windows, NULL, NULL);
1333 #else
1334 pthread_once(&init_guard_f16_vsub, &init_f16_vsub_config);
1335 #endif
1336 return &f16_vsub_config;
1337}
1338
1339const struct xnn_binary_elementwise_config* xnn_init_f16_vsqrdiff_config() {
1340 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1341 if (hardware_config == NULL || !is_f16_compatible_config(hardware_config)) {
1342 return NULL;
1343 }
1344 #if XNN_PLATFORM_WINDOWS
1345 InitOnceExecuteOnce(&init_guard_f16_vsqrdiff, &init_f16_vsqrdiff_config_windows, NULL, NULL);
1346 #else
1347 pthread_once(&init_guard_f16_vsqrdiff, &init_f16_vsqrdiff_config);
1348 #endif
1349 return &f16_vsqrdiff_config;
1350}
1351
1352const struct xnn_binary_elementwise_config* xnn_init_f32_vadd_config() {
1353 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1354 if (hardware_config == NULL) {
1355 return NULL;
1356 }
1357 #if XNN_PLATFORM_WINDOWS
1358 InitOnceExecuteOnce(&init_guard_f32_vadd, &init_f32_vadd_config_windows, NULL, NULL);
1359 #else
1360 pthread_once(&init_guard_f32_vadd, &init_f32_vadd_config);
1361 #endif
1362 return &f32_vadd_config;
1363}
1364
1365const struct xnn_binary_elementwise_config* xnn_init_f32_vdiv_config() {
1366 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1367 if (hardware_config == NULL) {
1368 return NULL;
1369 }
1370 #if XNN_PLATFORM_WINDOWS
1371 InitOnceExecuteOnce(&init_guard_f32_vdiv, &init_f32_vdiv_config_windows, NULL, NULL);
1372 #else
1373 pthread_once(&init_guard_f32_vdiv, &init_f32_vdiv_config);
1374 #endif
1375 return &f32_vdiv_config;
1376}
1377
1378const struct xnn_binary_elementwise_config* xnn_init_f32_vmax_config() {
1379 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1380 if (hardware_config == NULL) {
1381 return NULL;
1382 }
1383 #if XNN_PLATFORM_WINDOWS
1384 InitOnceExecuteOnce(&init_guard_f32_vmax, &init_f32_vmax_config_windows, NULL, NULL);
1385 #else
1386 pthread_once(&init_guard_f32_vmax, &init_f32_vmax_config);
1387 #endif
1388 return &f32_vmax_config;
1389}
1390
1391const struct xnn_binary_elementwise_config* xnn_init_f32_vmin_config() {
1392 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1393 if (hardware_config == NULL) {
1394 return NULL;
1395 }
1396 #if XNN_PLATFORM_WINDOWS
1397 InitOnceExecuteOnce(&init_guard_f32_vmin, &init_f32_vmin_config_windows, NULL, NULL);
1398 #else
1399 pthread_once(&init_guard_f32_vmin, &init_f32_vmin_config);
1400 #endif
1401 return &f32_vmin_config;
1402}
1403
1404const struct xnn_binary_elementwise_config* xnn_init_f32_vmul_config() {
1405 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1406 if (hardware_config == NULL) {
1407 return NULL;
1408 }
1409 #if XNN_PLATFORM_WINDOWS
1410 InitOnceExecuteOnce(&init_guard_f32_vmul, &init_f32_vmul_config_windows, NULL, NULL);
1411 #else
1412 pthread_once(&init_guard_f32_vmul, &init_f32_vmul_config);
1413 #endif
1414 return &f32_vmul_config;
1415}
1416
1417const struct xnn_binary_elementwise_config* xnn_init_f32_vsub_config() {
1418 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1419 if (hardware_config == NULL) {
1420 return NULL;
1421 }
1422 #if XNN_PLATFORM_WINDOWS
1423 InitOnceExecuteOnce(&init_guard_f32_vsub, &init_f32_vsub_config_windows, NULL, NULL);
1424 #else
1425 pthread_once(&init_guard_f32_vsub, &init_f32_vsub_config);
1426 #endif
1427 return &f32_vsub_config;
1428}
1429
1430const struct xnn_binary_elementwise_config* xnn_init_f32_vsqrdiff_config() {
1431 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1432 if (hardware_config == NULL) {
1433 return NULL;
1434 }
1435 #if XNN_PLATFORM_WINDOWS
1436 InitOnceExecuteOnce(&init_guard_f32_vsqrdiff, &init_f32_vsqrdiff_config_windows, NULL, NULL);
1437 #else
1438 pthread_once(&init_guard_f32_vsqrdiff, &init_f32_vsqrdiff_config);
1439 #endif
1440 return &f32_vsqrdiff_config;
1441}
1442
1443const struct xnn_binary_elementwise_config* xnn_init_qs8_vadd_config() {
1444 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1445 if (hardware_config == NULL) {
1446 return NULL;
1447 }
1448 #if XNN_PLATFORM_WINDOWS
1449 InitOnceExecuteOnce(&init_guard_qs8_vadd, &init_qs8_vadd_config_windows, NULL, NULL);
1450 #else
1451 pthread_once(&init_guard_qs8_vadd, &init_qs8_vadd_config);
1452 #endif
1453 return &qs8_vadd_config;
1454}
1455
1456const struct xnn_binary_elementwise_config* xnn_init_qs8_vmul_config() {
1457 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1458 if (hardware_config == NULL) {
1459 return NULL;
1460 }
1461 #if XNN_PLATFORM_WINDOWS
1462 InitOnceExecuteOnce(&init_guard_qs8_vmul, &init_qs8_vmul_config_windows, NULL, NULL);
1463 #else
1464 pthread_once(&init_guard_qs8_vmul, &init_qs8_vmul_config);
1465 #endif
1466 return &qs8_vmul_config;
1467}
1468
1469const struct xnn_binary_elementwise_config* xnn_init_qu8_vadd_config() {
1470 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1471 if (hardware_config == NULL) {
1472 return NULL;
1473 }
1474 #if XNN_PLATFORM_WINDOWS
1475 InitOnceExecuteOnce(&init_guard_qu8_vadd, &init_qu8_vadd_config_windows, NULL, NULL);
1476 #else
1477 pthread_once(&init_guard_qu8_vadd, &init_qu8_vadd_config);
1478 #endif
1479 return &qu8_vadd_config;
1480}
1481
1482const struct xnn_binary_elementwise_config* xnn_init_qu8_vmul_config() {
1483 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
1484 if (hardware_config == NULL) {
1485 return NULL;
1486 }
1487 #if XNN_PLATFORM_WINDOWS
1488 InitOnceExecuteOnce(&init_guard_qu8_vmul, &init_qu8_vmul_config_windows, NULL, NULL);
1489 #else
1490 pthread_once(&init_guard_qu8_vmul, &init_qu8_vmul_config);
1491 #endif
1492 return &qu8_vmul_config;
1493}
1494