1 | // Copyright 2022 Google LLC |
2 | // |
3 | // This source code is licensed under the BSD-style license found in the |
4 | // LICENSE file in the root directory of this source tree. |
5 | |
6 | #include <stdbool.h> |
7 | #include <math.h> // For INFINITY |
8 | |
9 | #include <xnnpack/common.h> |
10 | |
11 | #if XNN_PLATFORM_WINDOWS |
12 | #include <windows.h> |
13 | |
14 | #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE |
15 | #define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 |
16 | #endif |
17 | #else |
18 | #include <pthread.h> |
19 | #endif |
20 | |
21 | #if !XNN_PLATFORM_WEB |
22 | #include <cpuinfo.h> |
23 | #endif |
24 | |
25 | #if XNN_ARCH_RISCV |
26 | #include <sys/auxv.h> |
27 | |
28 | #define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A')) |
29 | #endif |
30 | |
31 | #if XNN_ARCH_WASMRELAXEDSIMD |
32 | #include <wasm_simd128.h> |
33 | #endif |
34 | |
35 | #include <xnnpack/config.h> |
36 | #include <xnnpack/log.h> |
37 | |
38 | |
39 | static struct xnn_hardware_config hardware_config = {0}; |
40 | |
41 | #if XNN_PLATFORM_WINDOWS |
42 | static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT; |
43 | #else |
44 | static pthread_once_t init_guard = PTHREAD_ONCE_INIT; |
45 | #endif |
46 | |
47 | static void init_hardware_config(void) { |
48 | #if XNN_ARCH_ARM |
49 | hardware_config.use_arm_v6 = cpuinfo_has_arm_v6(); |
50 | hardware_config.use_arm_vfpv2 = cpuinfo_has_arm_vfpv2(); |
51 | hardware_config.use_arm_vfpv3 = cpuinfo_has_arm_vfpv3(); |
52 | hardware_config.use_arm_neon = cpuinfo_has_arm_neon(); |
53 | hardware_config.use_arm_neon_fp16 = cpuinfo_has_arm_neon_fp16(); |
54 | hardware_config.use_arm_neon_fma = cpuinfo_has_arm_neon_fma(); |
55 | hardware_config.use_arm_neon_v8 = cpuinfo_has_arm_neon_v8(); |
56 | #endif |
57 | |
58 | #if XNN_ARCH_ARM64 || XNN_ARCH_ARM |
59 | #if XNN_PLATFORM_WINDOWS |
60 | SYSTEM_INFO system_info; |
61 | GetSystemInfo(&system_info); |
62 | switch (system_info.wProcessorLevel) { |
63 | case 0x803: // Kryo 385 Silver |
64 | hardware_config.use_arm_neon_fp16_arith = true; |
65 | break; |
66 | default: |
67 | // Assume that Dot Product support implies FP16 support. |
68 | // ARM manuals don't guarantee that, but it holds in practice. |
69 | hardware_config.use_arm_neon_fp16_arith = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); |
70 | break; |
71 | } |
72 | hardware_config.use_arm_fp16_arith = hardware_config.use_arm_neon_fp16_arith; |
73 | |
74 | hardware_config.use_arm_neon_bf16 = false; |
75 | hardware_config.use_arm_neon_dot = !!IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); |
76 | #else |
77 | hardware_config.use_arm_fp16_arith = cpuinfo_has_arm_fp16_arith(); |
78 | hardware_config.use_arm_neon_fp16_arith = cpuinfo_has_arm_neon_fp16_arith(); |
79 | hardware_config.use_arm_neon_bf16 = cpuinfo_has_arm_neon_bf16(); |
80 | hardware_config.use_arm_neon_dot = cpuinfo_has_arm_neon_dot(); |
81 | #endif |
82 | #endif |
83 | |
84 | #if XNN_ARCH_X86 || XNN_ARCH_X86_64 |
85 | hardware_config.use_x86_ssse3 = cpuinfo_has_x86_ssse3(); |
86 | hardware_config.use_x86_sse4_1 = cpuinfo_has_x86_sse4_1(); |
87 | hardware_config.use_x86_avx = cpuinfo_has_x86_avx(); |
88 | hardware_config.use_x86_f16c = cpuinfo_has_x86_f16c(); |
89 | hardware_config.use_x86_fma3 = cpuinfo_has_x86_fma3(); |
90 | hardware_config.use_x86_xop = cpuinfo_has_x86_xop(); |
91 | hardware_config.use_x86_avx2 = cpuinfo_has_x86_avx2(); |
92 | hardware_config.use_x86_avx512f = cpuinfo_has_x86_avx512f(); |
93 | hardware_config.use_x86_avx512skx = hardware_config.use_x86_avx512f && |
94 | cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl(); |
95 | hardware_config.use_x86_avx512vbmi = hardware_config.use_x86_avx512skx && cpuinfo_has_x86_avx512vbmi(); |
96 | #endif // !XNN_ARCH_X86 && !XNN_ARCH_X86_64 |
97 | |
98 | #if XNN_ARCH_RISCV |
99 | hardware_config.use_rvv = (getauxval(AT_HWCAP) & COMPAT_HWCAP_ISA_V) != 0; |
100 | #endif |
101 | |
102 | #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD |
103 | // Unlike most other architectures, on x86/x86-64 when floating-point instructions |
104 | // have no NaN arguments, but produce NaN output, the output NaN has sign bit set. |
105 | // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction |
106 | // of two infinities (must produce NaN per IEEE 754 standard). |
107 | static const volatile float inf = INFINITY; |
108 | hardware_config.is_x86 = signbit(inf - inf); |
109 | #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD |
110 | |
111 | #if XNN_ARCH_WASMRELAXEDSIMD |
112 | // Check if out-of-bounds behavior of Relaxed Swizzle is consistent with PSHUFB. |
113 | const v128_t table = wasm_i8x16_const(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
114 | const v128_t index_mask = wasm_i8x16_const_splat(INT8_C(0x8F)); |
115 | const volatile v128_t index_increment = wasm_i8x16_const_splat(16); // volatile to confuse Clang which otherwise mis-compiles |
116 | v128_t index = wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
117 | v128_t diff = wasm_i8x16_const_splat(0); |
118 | for (uint32_t i = 16; i != 0; i--) { |
119 | const v128_t pshufb_result = wasm_i8x16_swizzle(table, wasm_v128_and(index, index_mask)); |
120 | const v128_t relaxed_result = __builtin_wasm_relaxed_swizzle_i8x16(table, index); |
121 | diff = wasm_v128_or(diff, wasm_v128_xor(pshufb_result, relaxed_result)); |
122 | index = wasm_i8x16_add(index, index_increment); |
123 | } |
124 | hardware_config.use_wasm_pshufb = !wasm_v128_any_true(diff); |
125 | #endif // XNN_ARCH_WASMRELAXEDSIMD |
126 | } |
127 | |
128 | #if XNN_PLATFORM_WINDOWS |
129 | static BOOL CALLBACK init_hardware_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { |
130 | init_hardware_config(); |
131 | return TRUE; |
132 | } |
133 | #endif |
134 | |
135 | const struct xnn_hardware_config* xnn_init_hardware_config() { |
136 | #if !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV && !(XNN_ARCH_ARM64 && XNN_PLATFORM_WINDOWS) |
137 | if (!cpuinfo_initialize()) { |
138 | xnn_log_error("failed to initialize cpuinfo" ); |
139 | return NULL; |
140 | } |
141 | #endif // !XNN_PLATFORM_WEB && !XNN_ARCH_RISCV && !(XNN_ARCH_ARM64 && XNN_PLATFORM_WINDOWS) |
142 | #if XNN_ARCH_ARM |
143 | #if XNN_PLATFORM_MOBILE |
144 | if (!cpuinfo_has_arm_neon()) { |
145 | xnn_log_debug("unsupported hardware: ARM NEON not detected" ); |
146 | return NULL; |
147 | } |
148 | #else |
149 | if (!cpuinfo_has_arm_v6()) { |
150 | xnn_log_debug("unsupported hardware: ARMv6 not detected" ); |
151 | return NULL; |
152 | } |
153 | |
154 | if (!cpuinfo_has_arm_vfpv2() && !cpuinfo_has_arm_vfpv3()) { |
155 | xnn_log_debug("unsupported hardware: VFP FPU not detected" ); |
156 | return NULL; |
157 | } |
158 | #endif |
159 | #endif // XNN_ARCH_ARM |
160 | #if XNN_ARCH_X86 |
161 | if (!cpuinfo_has_x86_sse2()) { |
162 | xnn_log_debug("unsupported hardware: SSE2 not detected" ); |
163 | return NULL; |
164 | } |
165 | #endif // XNN_ARCH_X86 |
166 | |
167 | #if XNN_PLATFORM_WINDOWS |
168 | InitOnceExecuteOnce(&init_guard, &init_hardware_config_windows, NULL, NULL); |
169 | #else |
170 | pthread_once(&init_guard, &init_hardware_config); |
171 | #endif |
172 | return &hardware_config; |
173 | } |
174 | |