1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <cstring>
18#include <mutex>
19#include <string>
20
21#include "common/utils.hpp"
22
23#include "cpu/x64/cpu_isa_traits.hpp"
24
25namespace dnnl {
26namespace impl {
27namespace cpu {
28namespace x64 {
29
30namespace {
31#ifdef DNNL_ENABLE_MAX_CPU_ISA
32cpu_isa_t init_max_cpu_isa() {
33 cpu_isa_t max_cpu_isa_val = isa_all;
34 static std::string isa_val = getenv_string_user("MAX_CPU_ISA");
35 if (!isa_val.empty()) {
36
37#define IF_HANDLE_CASE(cpu_isa) \
38 if (isa_val.compare(cpu_isa_traits<cpu_isa>::user_option_env) == 0) \
39 max_cpu_isa_val = cpu_isa
40#define ELSEIF_HANDLE_CASE(cpu_isa) else IF_HANDLE_CASE(cpu_isa)
41
42 IF_HANDLE_CASE(isa_all);
43 ELSEIF_HANDLE_CASE(sse41);
44 ELSEIF_HANDLE_CASE(avx);
45 ELSEIF_HANDLE_CASE(avx2);
46 ELSEIF_HANDLE_CASE(avx2_vnni);
47 ELSEIF_HANDLE_CASE(avx2_vnni_2);
48 ELSEIF_HANDLE_CASE(avx512_core);
49 ELSEIF_HANDLE_CASE(avx512_core_vnni);
50 ELSEIF_HANDLE_CASE(avx512_core_bf16);
51 ELSEIF_HANDLE_CASE(avx512_core_fp16);
52 ELSEIF_HANDLE_CASE(avx512_core_amx);
53 ELSEIF_HANDLE_CASE(avx512_core_amx_fp16);
54
55#undef IF_HANDLE_CASE
56#undef ELSEIF_HANDLE_CASE
57 }
58 return max_cpu_isa_val;
59}
60
61set_once_before_first_get_setting_t<cpu_isa_t> &max_cpu_isa() {
62 static set_once_before_first_get_setting_t<cpu_isa_t> max_cpu_isa_setting(
63 init_max_cpu_isa());
64 return max_cpu_isa_setting;
65}
66#endif
67
68#ifdef DNNL_ENABLE_CPU_ISA_HINTS
69dnnl_cpu_isa_hints_t init_cpu_isa_hints() {
70 dnnl_cpu_isa_hints_t cpu_isa_hints_val = dnnl_cpu_isa_no_hints;
71 static std::string hints_val = getenv_string_user("CPU_ISA_HINTS");
72 if (!hints_val.empty()) {
73 if (hints_val.compare("prefer_ymm") == 0)
74 cpu_isa_hints_val = dnnl_cpu_isa_prefer_ymm;
75 }
76 return cpu_isa_hints_val;
77}
78
79set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t> &cpu_isa_hints() {
80 static set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t>
81 cpu_isa_hints_setting(init_cpu_isa_hints());
82 return cpu_isa_hints_setting;
83}
84#endif
85} // namespace
86
87struct isa_info_t {
88 isa_info_t(cpu_isa_t aisa) : isa(aisa) {};
89
90 // this converter is needed as code base defines certain ISAs
91 // that the library does not expose (e.g. avx512_core_bf16_ymm),
92 // so the internal and external enum types do not coincide.
93 dnnl_cpu_isa_t convert_to_public_enum(void) const {
94 switch (isa) {
95 case avx512_core_amx_fp16: return dnnl_cpu_isa_avx512_core_amx_fp16;
96 case avx512_core_amx: return dnnl_cpu_isa_avx512_core_amx;
97 case avx512_core_fp16: return dnnl_cpu_isa_avx512_core_fp16;
98 case avx512_core_bf16_ymm: // fallback to avx512_core_bf16
99 case avx512_core_bf16: return dnnl_cpu_isa_avx512_core_bf16;
100 case avx512_core_vnni: return dnnl_cpu_isa_avx512_core_vnni;
101 case avx512_core: return dnnl_cpu_isa_avx512_core;
102 case avx2_vnni_2: return dnnl_cpu_isa_avx2_vnni_2;
103 case avx2_vnni: return dnnl_cpu_isa_avx2_vnni;
104 case avx2: return dnnl_cpu_isa_avx2;
105 case avx: return dnnl_cpu_isa_avx;
106 case sse41: return dnnl_cpu_isa_sse41;
107 default: return dnnl_cpu_isa_default;
108 }
109 }
110
111 const char *get_name() const {
112 switch (isa) {
113 case avx512_core_amx_fp16:
114 return "Intel AVX-512 with float16, Intel DL Boost and "
115 "bfloat16 support and Intel AMX with bfloat16, float16 "
116 "and 8-bit integer support ";
117 case avx512_core_amx:
118 return "Intel AVX-512 with float16, Intel DL Boost and "
119 "bfloat16 support and Intel AMX with bfloat16 and 8-bit "
120 "integer support";
121 case avx512_core_fp16:
122 return "Intel AVX-512 with float16, Intel DL Boost and "
123 "bfloat16 support ";
124 case avx512_core_bf16_ymm:
125 return "Intel AVX-512 with Intel DL Boost and bfloat16 support "
126 "on Ymm/Zmm";
127 case avx512_core_bf16:
128 return "Intel AVX-512 with Intel DL Boost and bfloat16 support";
129 case avx512_core_vnni: return "Intel AVX-512 with Intel DL Boost";
130 case avx512_core:
131 return "Intel AVX-512 with AVX512BW, AVX512VL, and AVX512DQ "
132 "extensions";
133 case avx2_vnni_2:
134 return "Intel AVX2 with Intel DL Boost, float16 and bfloat16 "
135 "support";
136 case avx2_vnni: return "Intel AVX2 with Intel DL Boost";
137 case avx2: return "Intel AVX2";
138 case avx: return "Intel AVX";
139 case sse41: return "Intel SSE4.1";
140 default: return "Intel 64";
141 }
142 }
143
144 cpu_isa_t isa;
145};
146
147static isa_info_t get_isa_info_t(void) {
148#ifdef DNNL_ENABLE_MAX_CPU_ISA
149 // descending order due to mayiuse check
150#define HANDLE_CASE(cpu_isa) \
151 if (mayiuse(cpu_isa)) return isa_info_t(cpu_isa);
152 HANDLE_CASE(avx512_core_amx_fp16);
153 HANDLE_CASE(avx512_core_amx);
154 HANDLE_CASE(avx512_core_fp16);
155 HANDLE_CASE(avx512_core_bf16_ymm);
156 HANDLE_CASE(avx512_core_bf16);
157 HANDLE_CASE(avx512_core_vnni);
158 HANDLE_CASE(avx512_core);
159 HANDLE_CASE(avx2_vnni_2);
160 HANDLE_CASE(avx2_vnni);
161 HANDLE_CASE(avx2);
162 HANDLE_CASE(avx);
163 HANDLE_CASE(sse41);
164#undef HANDLE_CASE
165#endif
166 return isa_info_t(isa_undef);
167}
168
169const char *get_isa_info() {
170 return get_isa_info_t().get_name();
171}
172
173cpu_isa_t get_max_cpu_isa() {
174 return get_isa_info_t().isa;
175}
176
177cpu_isa_t get_max_cpu_isa_mask(bool soft) {
178 MAYBE_UNUSED(soft);
179#ifdef DNNL_ENABLE_MAX_CPU_ISA
180 return max_cpu_isa().get(soft);
181#else
182 return isa_all;
183#endif
184}
185
186dnnl_cpu_isa_hints_t get_cpu_isa_hints(bool soft) {
187 MAYBE_UNUSED(soft);
188#ifdef DNNL_ENABLE_CPU_ISA_HINTS
189 return cpu_isa_hints().get(soft);
190#else
191 return dnnl_cpu_isa_no_hints;
192#endif
193}
194
195status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) {
196 using namespace dnnl::impl::status;
197#ifdef DNNL_ENABLE_MAX_CPU_ISA
198 using namespace dnnl::impl;
199 using namespace dnnl::impl::cpu;
200
201 cpu_isa_t isa_to_set = isa_undef;
202#define HANDLE_CASE(cpu_isa) \
203 case cpu_isa_traits<cpu_isa>::user_option_val: isa_to_set = cpu_isa; break;
204 switch (isa) {
205 HANDLE_CASE(isa_all);
206 HANDLE_CASE(sse41);
207 HANDLE_CASE(avx);
208 HANDLE_CASE(avx2);
209 HANDLE_CASE(avx2_vnni);
210 HANDLE_CASE(avx2_vnni_2);
211 HANDLE_CASE(avx512_core);
212 HANDLE_CASE(avx512_core_vnni);
213 HANDLE_CASE(avx512_core_bf16);
214 HANDLE_CASE(avx512_core_amx);
215 HANDLE_CASE(avx512_core_fp16);
216 HANDLE_CASE(avx512_core_amx_fp16);
217 default: return invalid_arguments;
218 }
219 assert(isa_to_set != isa_undef);
220#undef HANDLE_CASE
221
222 if (max_cpu_isa().set(isa_to_set))
223 return success;
224 else
225 return invalid_arguments;
226#else
227 return unimplemented;
228#endif
229}
230
231dnnl_cpu_isa_t get_effective_cpu_isa() {
232 return get_isa_info_t().convert_to_public_enum();
233}
234
235status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) {
236 using namespace dnnl::impl::status;
237#ifdef DNNL_ENABLE_CPU_ISA_HINTS
238 using namespace dnnl::impl;
239 using namespace dnnl::impl::cpu;
240
241 if (cpu_isa_hints().set(isa_hints))
242 return success;
243 else
244 return runtime_error;
245#else
246 return unimplemented;
247#endif
248}
249
250namespace amx {
251
252int get_max_palette() {
253 if (mayiuse(amx_tile)) {
254 static const unsigned int EAX = []() {
255 unsigned int data[4] = {};
256 Xbyak::util::Cpu::getCpuidEx(0x1D, 0, data);
257 return data[0];
258 }();
259 return EAX;
260 } else {
261 return 0;
262 }
263}
264int get_target_palette() {
265 constexpr int max_supported_palette = 1;
266 return nstl::min(max_supported_palette, get_max_palette());
267}
268
269namespace {
270enum class info_kind_t { max_tiles, max_column_bytes, max_rows };
271
272std::vector<int> get_palettes_info(info_kind_t info_kind) {
273 std::vector<int> palettes_info;
274 for (int p = 1; p <= get_max_palette(); p++) {
275 unsigned int data[4] = {};
276 const unsigned int &EBX = data[1];
277 const unsigned int &ECX = data[2];
278 Xbyak::util::Cpu::getCpuidEx(0x1D, p, data);
279
280 switch (info_kind) {
281 case info_kind_t::max_tiles:
282 palettes_info.push_back(EBX >> 16);
283 break;
284 case info_kind_t::max_column_bytes:
285 palettes_info.push_back((EBX << 16) >> 16);
286 break;
287 case info_kind_t::max_rows:
288 palettes_info.push_back((ECX << 16) >> 16);
289 break;
290 default: assert(!"unknown info_kind"); break;
291 }
292 }
293 assert((int)palettes_info.size() == get_max_palette());
294 return palettes_info;
295}
296
297} // namespace
298
299int get_max_tiles(int palette) {
300 if (mayiuse(amx_tile)) {
301 if (palette > get_max_palette() || palette <= 0) return -1;
302 static const std::vector<int> palettes
303 = get_palettes_info(info_kind_t::max_tiles);
304 return palettes.at(palette - 1);
305 } else {
306 return 0;
307 }
308}
309
310int get_max_column_bytes(int palette) {
311 if (mayiuse(amx_tile)) {
312 if (palette > get_max_palette() || palette <= 0) return -1;
313 static const std::vector<int> palettes
314 = get_palettes_info(info_kind_t::max_column_bytes);
315 return palettes.at(palette - 1);
316 } else {
317 return 0;
318 }
319}
320
321int get_max_rows(int palette) {
322 if (mayiuse(amx_tile)) {
323 if (palette > get_max_palette() || palette <= 0) return -1;
324 static const std::vector<int> palettes
325 = get_palettes_info(info_kind_t::max_rows);
326 return palettes.at(palette - 1);
327 } else {
328 return 0;
329 }
330}
331
332namespace {
333#ifdef __linux__
334#include <sys/syscall.h>
335
336#define XFEATURE_XTILECFG 17
337#define XFEATURE_XTILEDATA 18
338#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
339#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
340#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
341#define ARCH_GET_XCOMP_PERM 0x1022
342#define ARCH_REQ_XCOMP_PERM 0x1023
343
344bool init() {
345 unsigned long bitmask = 0;
346 long status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
347 if (0 != status) return false;
348 if (bitmask & XFEATURE_MASK_XTILEDATA) return true;
349
350 status = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
351 if (0 != status)
352 return false; // XFEATURE_XTILEDATA setup is failed, TMUL usage is not allowed
353 status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
354
355 // XFEATURE_XTILEDATA setup is failed, can't use TMUL
356 if (0 != status || !(bitmask & XFEATURE_MASK_XTILEDATA)) return false;
357
358 // XFEATURE_XTILEDATA set successfully, TMUL usage is allowed
359 return true;
360}
361#else
362bool init() {
363 return true;
364}
365#endif
366
367set_once_before_first_get_setting_t<bool> &amx_setting() {
368 static set_once_before_first_get_setting_t<bool> setting(init());
369 return setting;
370}
371} // namespace
372
373bool is_available() {
374 return amx_setting().get();
375}
376} // namespace amx
377
378} // namespace x64
379} // namespace cpu
380} // namespace impl
381} // namespace dnnl
382