1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/tsl/platform/cpu_info.h" |
17 | |
18 | #include "absl/base/call_once.h" |
19 | #include "tensorflow/tsl/platform/logging.h" |
20 | #include "tensorflow/tsl/platform/platform.h" |
21 | #include "tensorflow/tsl/platform/types.h" |
22 | #if defined(PLATFORM_IS_X86) |
23 | #include <mutex> // NOLINT |
24 | #endif |
25 | |
26 | // SIMD extension querying is only available on x86. |
27 | #ifdef PLATFORM_IS_X86 |
28 | #ifdef PLATFORM_WINDOWS |
29 | // Visual Studio defines a builtin function for CPUID, so use that if possible. |
30 | #define GETCPUID(a, b, c, d, a_inp, c_inp) \ |
31 | { \ |
32 | int cpu_info[4] = {-1}; \ |
33 | __cpuidex(cpu_info, a_inp, c_inp); \ |
34 | a = cpu_info[0]; \ |
35 | b = cpu_info[1]; \ |
36 | c = cpu_info[2]; \ |
37 | d = cpu_info[3]; \ |
38 | } |
39 | #else |
40 | // Otherwise use gcc-format assembler to implement the underlying instructions. |
41 | #define GETCPUID(a, b, c, d, a_inp, c_inp) \ |
42 | asm("mov %%rbx, %%rdi\n" \ |
43 | "cpuid\n" \ |
44 | "xchg %%rdi, %%rbx\n" \ |
45 | : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ |
46 | : "a"(a_inp), "2"(c_inp)) |
47 | #endif |
48 | #endif |
49 | |
50 | namespace tsl { |
51 | namespace port { |
52 | namespace { |
53 | |
54 | #ifdef PLATFORM_IS_X86 |
55 | class CPUIDInfo; |
56 | void InitCPUIDInfo(); |
57 | |
58 | CPUIDInfo *cpuid = nullptr; |
59 | |
60 | #ifdef PLATFORM_WINDOWS |
61 | // Visual Studio defines a builtin function, so use that if possible. |
62 | int GetXCR0EAX() { return _xgetbv(0); } |
63 | #else |
64 | int GetXCR0EAX() { |
65 | int eax, edx; |
66 | asm("XGETBV" : "=a" (eax), "=d" (edx) : "c" (0)); |
67 | return eax; |
68 | } |
69 | #endif |
70 | |
71 | // Structure for basic CPUID info |
72 | class CPUIDInfo { |
73 | public: |
74 | CPUIDInfo() |
75 | : have_adx_(0), |
76 | have_aes_(0), |
77 | have_amx_bf16_(0), |
78 | have_amx_int8_(0), |
79 | have_amx_tile_(0), |
80 | have_avx_(0), |
81 | have_avx2_(0), |
82 | have_avx512f_(0), |
83 | have_avx512cd_(0), |
84 | have_avx512er_(0), |
85 | have_avx512pf_(0), |
86 | have_avx512vl_(0), |
87 | have_avx512bw_(0), |
88 | have_avx512dq_(0), |
89 | have_avx512vbmi_(0), |
90 | have_avx512ifma_(0), |
91 | have_avx512_4vnniw_(0), |
92 | have_avx512_4fmaps_(0), |
93 | have_avx512_bf16_(0), |
94 | have_avx512_vnni_(0), |
95 | have_avx_vnni_(0), |
96 | have_bmi1_(0), |
97 | have_bmi2_(0), |
98 | have_cmov_(0), |
99 | have_cmpxchg16b_(0), |
100 | have_cmpxchg8b_(0), |
101 | have_f16c_(0), |
102 | have_fma_(0), |
103 | have_mmx_(0), |
104 | have_pclmulqdq_(0), |
105 | have_popcnt_(0), |
106 | have_prefetchw_(0), |
107 | have_prefetchwt1_(0), |
108 | have_rdrand_(0), |
109 | have_rdseed_(0), |
110 | have_smap_(0), |
111 | have_sse_(0), |
112 | have_sse2_(0), |
113 | have_sse3_(0), |
114 | have_sse4_1_(0), |
115 | have_sse4_2_(0), |
116 | have_ssse3_(0), |
117 | have_hypervisor_(0) {} |
118 | |
119 | static void Initialize() { |
120 | // Initialize cpuid struct |
121 | CHECK(cpuid == nullptr) << __func__ << " ran more than once" ; |
122 | cpuid = new CPUIDInfo; |
123 | |
124 | uint32 eax, ebx, ecx, edx; |
125 | |
126 | // Get vendor string (issue CPUID with eax = 0) |
127 | GETCPUID(eax, ebx, ecx, edx, 0, 0); |
128 | cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4); |
129 | cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4); |
130 | cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4); |
131 | |
132 | // To get general information and extended features we send eax = 1 and |
133 | // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. |
134 | // (See Intel 64 and IA-32 Architectures Software Developer's Manual |
135 | // Volume 2A: Instruction Set Reference, A-M CPUID). |
136 | GETCPUID(eax, ebx, ecx, edx, 1, 0); |
137 | |
138 | cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf); |
139 | cpuid->family_ = static_cast<int>((eax >> 8) & 0xf); |
140 | |
141 | cpuid->have_aes_ = (ecx >> 25) & 0x1; |
142 | cpuid->have_cmov_ = (edx >> 15) & 0x1; |
143 | cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1; |
144 | cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1; |
145 | cpuid->have_mmx_ = (edx >> 23) & 0x1; |
146 | cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1; |
147 | cpuid->have_popcnt_ = (ecx >> 23) & 0x1; |
148 | cpuid->have_rdrand_ = (ecx >> 30) & 0x1; |
149 | cpuid->have_sse2_ = (edx >> 26) & 0x1; |
150 | cpuid->have_sse3_ = ecx & 0x1; |
151 | cpuid->have_sse4_1_ = (ecx >> 19) & 0x1; |
152 | cpuid->have_sse4_2_ = (ecx >> 20) & 0x1; |
153 | cpuid->have_sse_ = (edx >> 25) & 0x1; |
154 | cpuid->have_ssse3_ = (ecx >> 9) & 0x1; |
155 | cpuid->have_hypervisor_ = (ecx >> 31) & 1; |
156 | |
157 | const uint64 xcr0_xmm_mask = 0x2; |
158 | const uint64 xcr0_ymm_mask = 0x4; |
159 | const uint64 xcr0_maskreg_mask = 0x20; |
160 | const uint64 xcr0_zmm0_15_mask = 0x40; |
161 | const uint64 xcr0_zmm16_31_mask = 0x80; |
162 | |
163 | const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask; |
164 | const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask | |
165 | xcr0_zmm0_15_mask | xcr0_zmm16_31_mask; |
166 | |
167 | const bool have_avx = |
168 | // Does the OS support XGETBV instruction use by applications? |
169 | ((ecx >> 27) & 0x1) && |
170 | // Does the OS save/restore XMM and YMM state? |
171 | ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) && |
172 | // Is AVX supported in hardware? |
173 | ((ecx >> 28) & 0x1); |
174 | |
175 | const bool have_avx512 = |
176 | // Does the OS support XGETBV instruction use by applications? |
177 | ((ecx >> 27) & 0x1) && |
178 | // Does the OS save/restore ZMM state? |
179 | ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask); |
180 | |
181 | cpuid->have_avx_ = have_avx; |
182 | cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1); |
183 | cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1); |
184 | |
185 | // Get standard level 7 structured extension features (issue CPUID with |
186 | // eax = 7 and ecx = 0), which is required to check for AVX2 support as |
187 | // well as other Haswell (and beyond) features. (See Intel 64 and IA-32 |
188 | // Architectures Software Developer's Manual Volume 2A: Instruction Set |
189 | // Reference, A-M CPUID). |
190 | GETCPUID(eax, ebx, ecx, edx, 7, 0); |
191 | const uint32 kMaxNumSubLeaves = eax; |
192 | |
193 | cpuid->have_adx_ = (ebx >> 19) & 0x1; |
194 | cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1); |
195 | cpuid->have_bmi1_ = (ebx >> 3) & 0x1; |
196 | cpuid->have_bmi2_ = (ebx >> 8) & 0x1; |
197 | cpuid->have_prefetchwt1_ = ecx & 0x1; |
198 | cpuid->have_rdseed_ = (ebx >> 18) & 0x1; |
199 | cpuid->have_smap_ = (ebx >> 20) & 0x1; |
200 | |
201 | cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1); |
202 | cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1); |
203 | cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1); |
204 | cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1); |
205 | cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1); |
206 | cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1); |
207 | cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1); |
208 | cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1); |
209 | cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1); |
210 | cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1); |
211 | cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1); |
212 | cpuid->have_avx512_vnni_ = have_avx512 && ((ecx >> 11) & 0x1); |
213 | |
214 | // The latest Intel 64 and IA-32 Architectures Software Developer's Manual |
215 | // Volume 2A (December 2021) does not have information on AMX yet. We use |
216 | // the information from Xbyak in oneDNN. |
217 | // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L536-L538 |
218 | cpuid->have_amx_tile_ = (edx >> 24) & 0x1; |
219 | cpuid->have_amx_int8_ = (edx >> 25) & 0x1; |
220 | cpuid->have_amx_bf16_ = (edx >> 22) & 0x1; |
221 | |
222 | // Get more Structured Extended Feature info by issuing CPUID with |
223 | // sub-leaf = 1 (eax = 7, ecx = 1) |
224 | if (kMaxNumSubLeaves >= 1) { |
225 | GETCPUID(eax, ebx, ecx, edx, 7, 1); |
226 | cpuid->have_avx_vnni_ = (eax >> 4) & 0x1; |
227 | cpuid->have_avx512_bf16_ = have_avx512 && ((eax >> 5) & 0x1); |
228 | } |
229 | } |
230 | |
231 | static bool TestFeature(CPUFeature feature) { |
232 | InitCPUIDInfo(); |
233 | // clang-format off |
234 | switch (feature) { |
235 | case ADX: return cpuid->have_adx_; |
236 | case AES: return cpuid->have_aes_; |
237 | case AMX_BF16: return cpuid->have_amx_bf16_; |
238 | case AMX_INT8: return cpuid->have_amx_int8_; |
239 | case AMX_TILE: return cpuid->have_amx_tile_; |
240 | case AVX2: return cpuid->have_avx2_; |
241 | case AVX: return cpuid->have_avx_; |
242 | case AVX512F: return cpuid->have_avx512f_; |
243 | case AVX512CD: return cpuid->have_avx512cd_; |
244 | case AVX512PF: return cpuid->have_avx512pf_; |
245 | case AVX512ER: return cpuid->have_avx512er_; |
246 | case AVX512VL: return cpuid->have_avx512vl_; |
247 | case AVX512BW: return cpuid->have_avx512bw_; |
248 | case AVX512DQ: return cpuid->have_avx512dq_; |
249 | case AVX512VBMI: return cpuid->have_avx512vbmi_; |
250 | case AVX512IFMA: return cpuid->have_avx512ifma_; |
251 | case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_; |
252 | case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_; |
253 | case AVX512_BF16: return cpuid->have_avx512_bf16_; |
254 | case AVX512_VNNI: return cpuid->have_avx512_vnni_; |
255 | case AVX_VNNI: return cpuid->have_avx_vnni_; |
256 | case BMI1: return cpuid->have_bmi1_; |
257 | case BMI2: return cpuid->have_bmi2_; |
258 | case CMOV: return cpuid->have_cmov_; |
259 | case CMPXCHG16B: return cpuid->have_cmpxchg16b_; |
260 | case CMPXCHG8B: return cpuid->have_cmpxchg8b_; |
261 | case F16C: return cpuid->have_f16c_; |
262 | case FMA: return cpuid->have_fma_; |
263 | case MMX: return cpuid->have_mmx_; |
264 | case PCLMULQDQ: return cpuid->have_pclmulqdq_; |
265 | case POPCNT: return cpuid->have_popcnt_; |
266 | case PREFETCHW: return cpuid->have_prefetchw_; |
267 | case PREFETCHWT1: return cpuid->have_prefetchwt1_; |
268 | case RDRAND: return cpuid->have_rdrand_; |
269 | case RDSEED: return cpuid->have_rdseed_; |
270 | case SMAP: return cpuid->have_smap_; |
271 | case SSE2: return cpuid->have_sse2_; |
272 | case SSE3: return cpuid->have_sse3_; |
273 | case SSE4_1: return cpuid->have_sse4_1_; |
274 | case SSE4_2: return cpuid->have_sse4_2_; |
275 | case SSE: return cpuid->have_sse_; |
276 | case SSSE3: return cpuid->have_ssse3_; |
277 | case HYPERVISOR: return cpuid->have_hypervisor_; |
278 | default: |
279 | break; |
280 | } |
281 | // clang-format on |
282 | return false; |
283 | } |
284 | |
285 | string vendor_str() const { return vendor_str_; } |
286 | int family() const { return family_; } |
287 | int model_num() { return model_num_; } |
288 | |
289 | private: |
290 | int have_adx_ : 1; |
291 | int have_aes_ : 1; |
292 | int have_amx_bf16_ : 1; |
293 | int have_amx_int8_ : 1; |
294 | int have_amx_tile_ : 1; |
295 | int have_avx_ : 1; |
296 | int have_avx2_ : 1; |
297 | int have_avx512f_ : 1; |
298 | int have_avx512cd_ : 1; |
299 | int have_avx512er_ : 1; |
300 | int have_avx512pf_ : 1; |
301 | int have_avx512vl_ : 1; |
302 | int have_avx512bw_ : 1; |
303 | int have_avx512dq_ : 1; |
304 | int have_avx512vbmi_ : 1; |
305 | int have_avx512ifma_ : 1; |
306 | int have_avx512_4vnniw_ : 1; |
307 | int have_avx512_4fmaps_ : 1; |
308 | int have_avx512_bf16_ : 1; |
309 | int have_avx512_vnni_ : 1; |
310 | int have_avx_vnni_ : 1; |
311 | int have_bmi1_ : 1; |
312 | int have_bmi2_ : 1; |
313 | int have_cmov_ : 1; |
314 | int have_cmpxchg16b_ : 1; |
315 | int have_cmpxchg8b_ : 1; |
316 | int have_f16c_ : 1; |
317 | int have_fma_ : 1; |
318 | int have_mmx_ : 1; |
319 | int have_pclmulqdq_ : 1; |
320 | int have_popcnt_ : 1; |
321 | int have_prefetchw_ : 1; |
322 | int have_prefetchwt1_ : 1; |
323 | int have_rdrand_ : 1; |
324 | int have_rdseed_ : 1; |
325 | int have_smap_ : 1; |
326 | int have_sse_ : 1; |
327 | int have_sse2_ : 1; |
328 | int have_sse3_ : 1; |
329 | int have_sse4_1_ : 1; |
330 | int have_sse4_2_ : 1; |
331 | int have_ssse3_ : 1; |
332 | int have_hypervisor_ : 1; |
333 | string vendor_str_; |
334 | int family_; |
335 | int model_num_; |
336 | }; |
337 | |
338 | absl::once_flag cpuid_once_flag; |
339 | |
340 | void InitCPUIDInfo() { |
341 | // This ensures that CPUIDInfo::Initialize() is called exactly |
342 | // once regardless of how many threads concurrently call us |
343 | absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize); |
344 | } |
345 | |
346 | #endif // PLATFORM_IS_X86 |
347 | |
348 | } // namespace |
349 | |
350 | bool TestCPUFeature(CPUFeature feature) { |
351 | #ifdef PLATFORM_IS_X86 |
352 | return CPUIDInfo::TestFeature(feature); |
353 | #else |
354 | return false; |
355 | #endif |
356 | } |
357 | |
358 | std::string CPUVendorIDString() { |
359 | #ifdef PLATFORM_IS_X86 |
360 | InitCPUIDInfo(); |
361 | return cpuid->vendor_str(); |
362 | #else |
363 | return "" ; |
364 | #endif |
365 | } |
366 | |
367 | int CPUFamily() { |
368 | #ifdef PLATFORM_IS_X86 |
369 | InitCPUIDInfo(); |
370 | return cpuid->family(); |
371 | #else |
372 | return 0; |
373 | #endif |
374 | } |
375 | |
376 | int CPUModelNum() { |
377 | #ifdef PLATFORM_IS_X86 |
378 | InitCPUIDInfo(); |
379 | return cpuid->model_num(); |
380 | #else |
381 | return 0; |
382 | #endif |
383 | } |
384 | |
385 | int CPUIDNumSMT() { |
386 | #ifdef PLATFORM_IS_X86 |
387 | // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration |
388 | // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A) |
389 | // Section: Detecting Hardware Multi-threads Support and Topology |
390 | // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures |
391 | // Other cases not supported |
392 | uint32 eax, ebx, ecx, edx; |
393 | // Check if system supports Leaf 11 |
394 | GETCPUID(eax, ebx, ecx, edx, 0, 0); |
395 | if (eax >= 11) { |
396 | // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0 |
397 | // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11, |
398 | // ECX=0):ECX[15:8] is 1 |
399 | GETCPUID(eax, ebx, ecx, edx, 11, 0); |
400 | if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) { |
401 | return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width |
402 | } |
403 | } |
404 | #endif // PLATFORM_IS_X86 |
405 | return 0; |
406 | } |
407 | |
408 | } // namespace port |
409 | } // namespace tsl |
410 | |