1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <cstring> |
18 | #include <mutex> |
19 | #include <string> |
20 | |
21 | #include "common/utils.hpp" |
22 | |
23 | #include "cpu/x64/cpu_isa_traits.hpp" |
24 | |
25 | namespace dnnl { |
26 | namespace impl { |
27 | namespace cpu { |
28 | namespace x64 { |
29 | |
30 | namespace { |
31 | #ifdef DNNL_ENABLE_MAX_CPU_ISA |
32 | cpu_isa_t init_max_cpu_isa() { |
33 | cpu_isa_t max_cpu_isa_val = isa_all; |
34 | static std::string isa_val = getenv_string_user("MAX_CPU_ISA" ); |
35 | if (!isa_val.empty()) { |
36 | |
37 | #define IF_HANDLE_CASE(cpu_isa) \ |
38 | if (isa_val.compare(cpu_isa_traits<cpu_isa>::user_option_env) == 0) \ |
39 | max_cpu_isa_val = cpu_isa |
40 | #define ELSEIF_HANDLE_CASE(cpu_isa) else IF_HANDLE_CASE(cpu_isa) |
41 | |
42 | IF_HANDLE_CASE(isa_all); |
43 | ELSEIF_HANDLE_CASE(sse41); |
44 | ELSEIF_HANDLE_CASE(avx); |
45 | ELSEIF_HANDLE_CASE(avx2); |
46 | ELSEIF_HANDLE_CASE(avx2_vnni); |
47 | ELSEIF_HANDLE_CASE(avx2_vnni_2); |
48 | ELSEIF_HANDLE_CASE(avx512_core); |
49 | ELSEIF_HANDLE_CASE(avx512_core_vnni); |
50 | ELSEIF_HANDLE_CASE(avx512_core_bf16); |
51 | ELSEIF_HANDLE_CASE(avx512_core_fp16); |
52 | ELSEIF_HANDLE_CASE(avx512_core_amx); |
53 | ELSEIF_HANDLE_CASE(avx512_core_amx_fp16); |
54 | |
55 | #undef IF_HANDLE_CASE |
56 | #undef ELSEIF_HANDLE_CASE |
57 | } |
58 | return max_cpu_isa_val; |
59 | } |
60 | |
61 | set_once_before_first_get_setting_t<cpu_isa_t> &max_cpu_isa() { |
62 | static set_once_before_first_get_setting_t<cpu_isa_t> max_cpu_isa_setting( |
63 | init_max_cpu_isa()); |
64 | return max_cpu_isa_setting; |
65 | } |
66 | #endif |
67 | |
68 | #ifdef DNNL_ENABLE_CPU_ISA_HINTS |
69 | dnnl_cpu_isa_hints_t init_cpu_isa_hints() { |
70 | dnnl_cpu_isa_hints_t cpu_isa_hints_val = dnnl_cpu_isa_no_hints; |
71 | static std::string hints_val = getenv_string_user("CPU_ISA_HINTS" ); |
72 | if (!hints_val.empty()) { |
73 | if (hints_val.compare("prefer_ymm" ) == 0) |
74 | cpu_isa_hints_val = dnnl_cpu_isa_prefer_ymm; |
75 | } |
76 | return cpu_isa_hints_val; |
77 | } |
78 | |
79 | set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t> &cpu_isa_hints() { |
80 | static set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t> |
81 | cpu_isa_hints_setting(init_cpu_isa_hints()); |
82 | return cpu_isa_hints_setting; |
83 | } |
84 | #endif |
85 | } // namespace |
86 | |
87 | struct isa_info_t { |
88 | isa_info_t(cpu_isa_t aisa) : isa(aisa) {}; |
89 | |
90 | // this converter is needed as code base defines certain ISAs |
91 | // that the library does not expose (e.g. avx512_core_bf16_ymm), |
92 | // so the internal and external enum types do not coincide. |
93 | dnnl_cpu_isa_t convert_to_public_enum(void) const { |
94 | switch (isa) { |
95 | case avx512_core_amx_fp16: return dnnl_cpu_isa_avx512_core_amx_fp16; |
96 | case avx512_core_amx: return dnnl_cpu_isa_avx512_core_amx; |
97 | case avx512_core_fp16: return dnnl_cpu_isa_avx512_core_fp16; |
98 | case avx512_core_bf16_ymm: // fallback to avx512_core_bf16 |
99 | case avx512_core_bf16: return dnnl_cpu_isa_avx512_core_bf16; |
100 | case avx512_core_vnni: return dnnl_cpu_isa_avx512_core_vnni; |
101 | case avx512_core: return dnnl_cpu_isa_avx512_core; |
102 | case avx2_vnni_2: return dnnl_cpu_isa_avx2_vnni_2; |
103 | case avx2_vnni: return dnnl_cpu_isa_avx2_vnni; |
104 | case avx2: return dnnl_cpu_isa_avx2; |
105 | case avx: return dnnl_cpu_isa_avx; |
106 | case sse41: return dnnl_cpu_isa_sse41; |
107 | default: return dnnl_cpu_isa_default; |
108 | } |
109 | } |
110 | |
111 | const char *get_name() const { |
112 | switch (isa) { |
113 | case avx512_core_amx_fp16: |
114 | return "Intel AVX-512 with float16, Intel DL Boost and " |
115 | "bfloat16 support and Intel AMX with bfloat16, float16 " |
116 | "and 8-bit integer support " ; |
117 | case avx512_core_amx: |
118 | return "Intel AVX-512 with float16, Intel DL Boost and " |
119 | "bfloat16 support and Intel AMX with bfloat16 and 8-bit " |
120 | "integer support" ; |
121 | case avx512_core_fp16: |
122 | return "Intel AVX-512 with float16, Intel DL Boost and " |
123 | "bfloat16 support " ; |
124 | case avx512_core_bf16_ymm: |
125 | return "Intel AVX-512 with Intel DL Boost and bfloat16 support " |
126 | "on Ymm/Zmm" ; |
127 | case avx512_core_bf16: |
128 | return "Intel AVX-512 with Intel DL Boost and bfloat16 support" ; |
129 | case avx512_core_vnni: return "Intel AVX-512 with Intel DL Boost" ; |
130 | case avx512_core: |
131 | return "Intel AVX-512 with AVX512BW, AVX512VL, and AVX512DQ " |
132 | "extensions" ; |
133 | case avx2_vnni_2: |
134 | return "Intel AVX2 with Intel DL Boost, float16 and bfloat16 " |
135 | "support" ; |
136 | case avx2_vnni: return "Intel AVX2 with Intel DL Boost" ; |
137 | case avx2: return "Intel AVX2" ; |
138 | case avx: return "Intel AVX" ; |
139 | case sse41: return "Intel SSE4.1" ; |
140 | default: return "Intel 64" ; |
141 | } |
142 | } |
143 | |
144 | cpu_isa_t isa; |
145 | }; |
146 | |
147 | static isa_info_t get_isa_info_t(void) { |
148 | #ifdef DNNL_ENABLE_MAX_CPU_ISA |
149 | // descending order due to mayiuse check |
150 | #define HANDLE_CASE(cpu_isa) \ |
151 | if (mayiuse(cpu_isa)) return isa_info_t(cpu_isa); |
152 | HANDLE_CASE(avx512_core_amx_fp16); |
153 | HANDLE_CASE(avx512_core_amx); |
154 | HANDLE_CASE(avx512_core_fp16); |
155 | HANDLE_CASE(avx512_core_bf16_ymm); |
156 | HANDLE_CASE(avx512_core_bf16); |
157 | HANDLE_CASE(avx512_core_vnni); |
158 | HANDLE_CASE(avx512_core); |
159 | HANDLE_CASE(avx2_vnni_2); |
160 | HANDLE_CASE(avx2_vnni); |
161 | HANDLE_CASE(avx2); |
162 | HANDLE_CASE(avx); |
163 | HANDLE_CASE(sse41); |
164 | #undef HANDLE_CASE |
165 | #endif |
166 | return isa_info_t(isa_undef); |
167 | } |
168 | |
169 | const char *get_isa_info() { |
170 | return get_isa_info_t().get_name(); |
171 | } |
172 | |
173 | cpu_isa_t get_max_cpu_isa() { |
174 | return get_isa_info_t().isa; |
175 | } |
176 | |
177 | cpu_isa_t get_max_cpu_isa_mask(bool soft) { |
178 | MAYBE_UNUSED(soft); |
179 | #ifdef DNNL_ENABLE_MAX_CPU_ISA |
180 | return max_cpu_isa().get(soft); |
181 | #else |
182 | return isa_all; |
183 | #endif |
184 | } |
185 | |
186 | dnnl_cpu_isa_hints_t get_cpu_isa_hints(bool soft) { |
187 | MAYBE_UNUSED(soft); |
188 | #ifdef DNNL_ENABLE_CPU_ISA_HINTS |
189 | return cpu_isa_hints().get(soft); |
190 | #else |
191 | return dnnl_cpu_isa_no_hints; |
192 | #endif |
193 | } |
194 | |
195 | status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) { |
196 | using namespace dnnl::impl::status; |
197 | #ifdef DNNL_ENABLE_MAX_CPU_ISA |
198 | using namespace dnnl::impl; |
199 | using namespace dnnl::impl::cpu; |
200 | |
201 | cpu_isa_t isa_to_set = isa_undef; |
202 | #define HANDLE_CASE(cpu_isa) \ |
203 | case cpu_isa_traits<cpu_isa>::user_option_val: isa_to_set = cpu_isa; break; |
204 | switch (isa) { |
205 | HANDLE_CASE(isa_all); |
206 | HANDLE_CASE(sse41); |
207 | HANDLE_CASE(avx); |
208 | HANDLE_CASE(avx2); |
209 | HANDLE_CASE(avx2_vnni); |
210 | HANDLE_CASE(avx2_vnni_2); |
211 | HANDLE_CASE(avx512_core); |
212 | HANDLE_CASE(avx512_core_vnni); |
213 | HANDLE_CASE(avx512_core_bf16); |
214 | HANDLE_CASE(avx512_core_amx); |
215 | HANDLE_CASE(avx512_core_fp16); |
216 | HANDLE_CASE(avx512_core_amx_fp16); |
217 | default: return invalid_arguments; |
218 | } |
219 | assert(isa_to_set != isa_undef); |
220 | #undef HANDLE_CASE |
221 | |
222 | if (max_cpu_isa().set(isa_to_set)) |
223 | return success; |
224 | else |
225 | return invalid_arguments; |
226 | #else |
227 | return unimplemented; |
228 | #endif |
229 | } |
230 | |
231 | dnnl_cpu_isa_t get_effective_cpu_isa() { |
232 | return get_isa_info_t().convert_to_public_enum(); |
233 | } |
234 | |
235 | status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) { |
236 | using namespace dnnl::impl::status; |
237 | #ifdef DNNL_ENABLE_CPU_ISA_HINTS |
238 | using namespace dnnl::impl; |
239 | using namespace dnnl::impl::cpu; |
240 | |
241 | if (cpu_isa_hints().set(isa_hints)) |
242 | return success; |
243 | else |
244 | return runtime_error; |
245 | #else |
246 | return unimplemented; |
247 | #endif |
248 | } |
249 | |
250 | namespace amx { |
251 | |
252 | int get_max_palette() { |
253 | if (mayiuse(amx_tile)) { |
254 | static const unsigned int EAX = []() { |
255 | unsigned int data[4] = {}; |
256 | Xbyak::util::Cpu::getCpuidEx(0x1D, 0, data); |
257 | return data[0]; |
258 | }(); |
259 | return EAX; |
260 | } else { |
261 | return 0; |
262 | } |
263 | } |
264 | int get_target_palette() { |
265 | constexpr int max_supported_palette = 1; |
266 | return nstl::min(max_supported_palette, get_max_palette()); |
267 | } |
268 | |
269 | namespace { |
270 | enum class info_kind_t { max_tiles, max_column_bytes, max_rows }; |
271 | |
272 | std::vector<int> get_palettes_info(info_kind_t info_kind) { |
273 | std::vector<int> palettes_info; |
274 | for (int p = 1; p <= get_max_palette(); p++) { |
275 | unsigned int data[4] = {}; |
276 | const unsigned int &EBX = data[1]; |
277 | const unsigned int &ECX = data[2]; |
278 | Xbyak::util::Cpu::getCpuidEx(0x1D, p, data); |
279 | |
280 | switch (info_kind) { |
281 | case info_kind_t::max_tiles: |
282 | palettes_info.push_back(EBX >> 16); |
283 | break; |
284 | case info_kind_t::max_column_bytes: |
285 | palettes_info.push_back((EBX << 16) >> 16); |
286 | break; |
287 | case info_kind_t::max_rows: |
288 | palettes_info.push_back((ECX << 16) >> 16); |
289 | break; |
290 | default: assert(!"unknown info_kind" ); break; |
291 | } |
292 | } |
293 | assert((int)palettes_info.size() == get_max_palette()); |
294 | return palettes_info; |
295 | } |
296 | |
297 | } // namespace |
298 | |
299 | int get_max_tiles(int palette) { |
300 | if (mayiuse(amx_tile)) { |
301 | if (palette > get_max_palette() || palette <= 0) return -1; |
302 | static const std::vector<int> palettes |
303 | = get_palettes_info(info_kind_t::max_tiles); |
304 | return palettes.at(palette - 1); |
305 | } else { |
306 | return 0; |
307 | } |
308 | } |
309 | |
310 | int get_max_column_bytes(int palette) { |
311 | if (mayiuse(amx_tile)) { |
312 | if (palette > get_max_palette() || palette <= 0) return -1; |
313 | static const std::vector<int> palettes |
314 | = get_palettes_info(info_kind_t::max_column_bytes); |
315 | return palettes.at(palette - 1); |
316 | } else { |
317 | return 0; |
318 | } |
319 | } |
320 | |
321 | int get_max_rows(int palette) { |
322 | if (mayiuse(amx_tile)) { |
323 | if (palette > get_max_palette() || palette <= 0) return -1; |
324 | static const std::vector<int> palettes |
325 | = get_palettes_info(info_kind_t::max_rows); |
326 | return palettes.at(palette - 1); |
327 | } else { |
328 | return 0; |
329 | } |
330 | } |
331 | |
332 | namespace { |
333 | #ifdef __linux__ |
334 | #include <sys/syscall.h> |
335 | |
336 | #define XFEATURE_XTILECFG 17 |
337 | #define XFEATURE_XTILEDATA 18 |
338 | #define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) |
339 | #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) |
340 | #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) |
341 | #define ARCH_GET_XCOMP_PERM 0x1022 |
342 | #define ARCH_REQ_XCOMP_PERM 0x1023 |
343 | |
344 | bool init() { |
345 | unsigned long bitmask = 0; |
346 | long status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); |
347 | if (0 != status) return false; |
348 | if (bitmask & XFEATURE_MASK_XTILEDATA) return true; |
349 | |
350 | status = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); |
351 | if (0 != status) |
352 | return false; // XFEATURE_XTILEDATA setup is failed, TMUL usage is not allowed |
353 | status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); |
354 | |
355 | // XFEATURE_XTILEDATA setup is failed, can't use TMUL |
356 | if (0 != status || !(bitmask & XFEATURE_MASK_XTILEDATA)) return false; |
357 | |
358 | // XFEATURE_XTILEDATA set successfully, TMUL usage is allowed |
359 | return true; |
360 | } |
361 | #else |
362 | bool init() { |
363 | return true; |
364 | } |
365 | #endif |
366 | |
367 | set_once_before_first_get_setting_t<bool> &amx_setting() { |
368 | static set_once_before_first_get_setting_t<bool> setting(init()); |
369 | return setting; |
370 | } |
371 | } // namespace |
372 | |
373 | bool is_available() { |
374 | return amx_setting().get(); |
375 | } |
376 | } // namespace amx |
377 | |
378 | } // namespace x64 |
379 | } // namespace cpu |
380 | } // namespace impl |
381 | } // namespace dnnl |
382 | |