1// Copyright 2022 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <stdbool.h>
7#include <math.h> // For INFINITY
8
9#include <xnnpack/common.h>
10
11#if XNN_PLATFORM_WINDOWS
12 #include <windows.h>
13
14 #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
15 #define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
16 #endif
17#else
18 #include <pthread.h>
19#endif
20
21#if !XNN_PLATFORM_WEB
22 #include <cpuinfo.h>
23#endif
24
25#if XNN_ARCH_RISCV
26 #include <sys/auxv.h>
27
28 #define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A'))
29#endif
30
31#if XNN_ARCH_WASMRELAXEDSIMD
32 #include <wasm_simd128.h>
33#endif
34
35#include <xnnpack/config.h>
36#include <xnnpack/log.h>
37
38
39static struct xnn_hardware_config hardware_config = {0};
40
41#if XNN_PLATFORM_WINDOWS
42 static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT;
43#else
44 static pthread_once_t init_guard = PTHREAD_ONCE_INIT;
45#endif
46
47static void init_hardware_config(void) {
48 #if XNN_ARCH_ARM
49 hardware_config.use_arm_v6 = cpuinfo_has_arm_v6();
50 hardware_config.use_arm_vfpv2 = cpuinfo_has_arm_vfpv2();
51 hardware_config.use_arm_vfpv3 = cpuinfo_has_arm_vfpv3();
52 hardware_config.use_arm_neon = cpuinfo_has_arm_neon();
53 hardware_config.use_arm_neon_fp16 = cpuinfo_has_arm_neon_fp16();
54 hardware_config.use_arm_neon_fma = cpuinfo_has_arm_neon_fma();
55 hardware_config.use_arm_neon_v8 = cpuinfo_has_arm_neon_v8();
56 #endif
57
58 #if XNN_ARCH_ARM64 || XNN_ARCH_ARM
59 #if XNN_PLATFORM_WINDOWS
60 SYSTEM_INFO system_info;
61 GetSystemInfo(&system_info);
62 switch (system_info.wProcessorLevel) {
63 case 0x803: // Kryo 385 Silver
64 hardware_config.use_arm_neon_fp16_arith = true;
65 break;
66 default:
67 // Assume that Dot Product support implies FP16 support.
68 // ARM manuals don't guarantee that, but it holds in practice.
69 hardware_config.use_arm_neon_fp16_arith = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
70 break;
71 }
72 hardware_config.use_arm_fp16_arith = hardware_config.use_arm_neon_fp16_arith;
73
74 hardware_config.use_arm_neon_bf16 = false;
75 hardware_config.use_arm_neon_dot = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
76 #else
77 hardware_config.use_arm_fp16_arith = cpuinfo_has_arm_fp16_arith();
78 hardware_config.use_arm_neon_fp16_arith = cpuinfo_has_arm_neon_fp16_arith();
79 hardware_config.use_arm_neon_bf16 = cpuinfo_has_arm_neon_bf16();
80 hardware_config.use_arm_neon_dot = cpuinfo_has_arm_neon_dot();
81 #endif
82 #endif
83
84 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
85 hardware_config.use_x86_ssse3 = cpuinfo_has_x86_ssse3();
86 hardware_config.use_x86_sse4_1 = cpuinfo_has_x86_sse4_1();
87 hardware_config.use_x86_avx = cpuinfo_has_x86_avx();
88 hardware_config.use_x86_f16c = cpuinfo_has_x86_f16c();
89 hardware_config.use_x86_fma3 = cpuinfo_has_x86_fma3();
90 hardware_config.use_x86_xop = cpuinfo_has_x86_xop();
91 hardware_config.use_x86_avx2 = cpuinfo_has_x86_avx2();
92 hardware_config.use_x86_avx512f = cpuinfo_has_x86_avx512f();
93 hardware_config.use_x86_avx512skx = hardware_config.use_x86_avx512f &&
94 cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl();
95 hardware_config.use_x86_avx512vbmi = hardware_config.use_x86_avx512skx && cpuinfo_has_x86_avx512vbmi();
96 #endif // !XNN_ARCH_X86 && !XNN_ARCH_X86_64
97
98 #if XNN_ARCH_RISCV
99 hardware_config.use_rvv = (getauxval(AT_HWCAP) & COMPAT_HWCAP_ISA_V) != 0;
100 #endif
101
102 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
103 // Unlike most other architectures, on x86/x86-64 when floating-point instructions
104 // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
105 // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
106 // of two infinities (must produce NaN per IEEE 754 standard).
107 static const volatile float inf = INFINITY;
108 hardware_config.is_x86 = signbit(inf - inf);
109 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
110
111 #if XNN_ARCH_WASMRELAXEDSIMD
112 // Check if out-of-bounds behavior of Relaxed Swizzle is consistent with PSHUFB.
113 const v128_t table = wasm_i8x16_const(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
114 const v128_t index_mask = wasm_i8x16_const_splat(INT8_C(0x8F));
115 const volatile v128_t index_increment = wasm_i8x16_const_splat(16); // volatile to confuse Clang which otherwise mis-compiles
116 v128_t index = wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
117 v128_t diff = wasm_i8x16_const_splat(0);
118 for (uint32_t i = 16; i != 0; i--) {
119 const v128_t pshufb_result = wasm_i8x16_swizzle(table, wasm_v128_and(index, index_mask));
120 const v128_t relaxed_result = __builtin_wasm_relaxed_swizzle_i8x16(table, index);
121 diff = wasm_v128_or(diff, wasm_v128_xor(pshufb_result, relaxed_result));
122 index = wasm_i8x16_add(index, index_increment);
123 }
124 hardware_config.use_wasm_pshufb = !wasm_v128_any_true(diff);
125 #endif // XNN_ARCH_WASMRELAXEDSIMD
126}
127
128#if XNN_PLATFORM_WINDOWS
129 static BOOL CALLBACK init_hardware_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
130 init_hardware_config();
131 return TRUE;
132 }
133#endif
134
135const struct xnn_hardware_config* xnn_init_hardware_config() {
136 #if !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV && !(XNN_ARCH_ARM64 && XNN_PLATFORM_WINDOWS)
137 if (!cpuinfo_initialize()) {
138 xnn_log_error("failed to initialize cpuinfo");
139 return NULL;
140 }
141 #endif // !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV && !(XNN_ARCH_ARM64 && XNN_PLATFORM_WINDOWS)
142 #if XNN_ARCH_ARM
143 #if XNN_PLATFORM_MOBILE
144 if (!cpuinfo_has_arm_neon()) {
145 xnn_log_debug("unsupported hardware: ARM NEON not detected");
146 return NULL;
147 }
148 #else
149 if (!cpuinfo_has_arm_v6()) {
150 xnn_log_debug("unsupported hardware: ARMv6 not detected");
151 return NULL;
152 }
153
154 if (!cpuinfo_has_arm_vfpv2() && !cpuinfo_has_arm_vfpv3()) {
155 xnn_log_debug("unsupported hardware: VFP FPU not detected");
156 return NULL;
157 }
158 #endif
159 #endif // XNN_ARCH_ARM
160 #if XNN_ARCH_X86
161 if (!cpuinfo_has_x86_sse2()) {
162 xnn_log_debug("unsupported hardware: SSE2 not detected");
163 return NULL;
164 }
165 #endif // XNN_ARCH_X86
166
167 #if XNN_PLATFORM_WINDOWS
168 InitOnceExecuteOnce(&init_guard, &init_hardware_config_windows, NULL, NULL);
169 #else
170 pthread_once(&init_guard, &init_hardware_config);
171 #endif
172 return &hardware_config;
173}
174