1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/tsl/platform/cpu_info.h"
17
18#include "absl/base/call_once.h"
19#include "tensorflow/tsl/platform/logging.h"
20#include "tensorflow/tsl/platform/platform.h"
21#include "tensorflow/tsl/platform/types.h"
22#if defined(PLATFORM_IS_X86)
23#include <mutex> // NOLINT
24#endif
25
26// SIMD extension querying is only available on x86.
27#ifdef PLATFORM_IS_X86
28#ifdef PLATFORM_WINDOWS
29// Visual Studio defines a builtin function for CPUID, so use that if possible.
30#define GETCPUID(a, b, c, d, a_inp, c_inp) \
31 { \
32 int cpu_info[4] = {-1}; \
33 __cpuidex(cpu_info, a_inp, c_inp); \
34 a = cpu_info[0]; \
35 b = cpu_info[1]; \
36 c = cpu_info[2]; \
37 d = cpu_info[3]; \
38 }
39#else
40// Otherwise use gcc-format assembler to implement the underlying instructions.
41#define GETCPUID(a, b, c, d, a_inp, c_inp) \
42 asm("mov %%rbx, %%rdi\n" \
43 "cpuid\n" \
44 "xchg %%rdi, %%rbx\n" \
45 : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
46 : "a"(a_inp), "2"(c_inp))
47#endif
48#endif
49
50namespace tsl {
51namespace port {
52namespace {
53
54#ifdef PLATFORM_IS_X86
55class CPUIDInfo;
56void InitCPUIDInfo();
57
58CPUIDInfo *cpuid = nullptr;
59
60#ifdef PLATFORM_WINDOWS
61// Visual Studio defines a builtin function, so use that if possible.
62int GetXCR0EAX() { return _xgetbv(0); }
63#else
64int GetXCR0EAX() {
65 int eax, edx;
66 asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
67 return eax;
68}
69#endif
70
71// Structure for basic CPUID info
72class CPUIDInfo {
73 public:
74 CPUIDInfo()
75 : have_adx_(0),
76 have_aes_(0),
77 have_amx_bf16_(0),
78 have_amx_int8_(0),
79 have_amx_tile_(0),
80 have_avx_(0),
81 have_avx2_(0),
82 have_avx512f_(0),
83 have_avx512cd_(0),
84 have_avx512er_(0),
85 have_avx512pf_(0),
86 have_avx512vl_(0),
87 have_avx512bw_(0),
88 have_avx512dq_(0),
89 have_avx512vbmi_(0),
90 have_avx512ifma_(0),
91 have_avx512_4vnniw_(0),
92 have_avx512_4fmaps_(0),
93 have_avx512_bf16_(0),
94 have_avx512_vnni_(0),
95 have_avx_vnni_(0),
96 have_bmi1_(0),
97 have_bmi2_(0),
98 have_cmov_(0),
99 have_cmpxchg16b_(0),
100 have_cmpxchg8b_(0),
101 have_f16c_(0),
102 have_fma_(0),
103 have_mmx_(0),
104 have_pclmulqdq_(0),
105 have_popcnt_(0),
106 have_prefetchw_(0),
107 have_prefetchwt1_(0),
108 have_rdrand_(0),
109 have_rdseed_(0),
110 have_smap_(0),
111 have_sse_(0),
112 have_sse2_(0),
113 have_sse3_(0),
114 have_sse4_1_(0),
115 have_sse4_2_(0),
116 have_ssse3_(0),
117 have_hypervisor_(0) {}
118
119 static void Initialize() {
120 // Initialize cpuid struct
121 CHECK(cpuid == nullptr) << __func__ << " ran more than once";
122 cpuid = new CPUIDInfo;
123
124 uint32 eax, ebx, ecx, edx;
125
126 // Get vendor string (issue CPUID with eax = 0)
127 GETCPUID(eax, ebx, ecx, edx, 0, 0);
128 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
129 cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
130 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
131
132 // To get general information and extended features we send eax = 1 and
133 // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
134 // (See Intel 64 and IA-32 Architectures Software Developer's Manual
135 // Volume 2A: Instruction Set Reference, A-M CPUID).
136 GETCPUID(eax, ebx, ecx, edx, 1, 0);
137
138 cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
139 cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
140
141 cpuid->have_aes_ = (ecx >> 25) & 0x1;
142 cpuid->have_cmov_ = (edx >> 15) & 0x1;
143 cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
144 cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
145 cpuid->have_mmx_ = (edx >> 23) & 0x1;
146 cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
147 cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
148 cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
149 cpuid->have_sse2_ = (edx >> 26) & 0x1;
150 cpuid->have_sse3_ = ecx & 0x1;
151 cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
152 cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
153 cpuid->have_sse_ = (edx >> 25) & 0x1;
154 cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
155 cpuid->have_hypervisor_ = (ecx >> 31) & 1;
156
157 const uint64 xcr0_xmm_mask = 0x2;
158 const uint64 xcr0_ymm_mask = 0x4;
159 const uint64 xcr0_maskreg_mask = 0x20;
160 const uint64 xcr0_zmm0_15_mask = 0x40;
161 const uint64 xcr0_zmm16_31_mask = 0x80;
162
163 const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
164 const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
165 xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
166
167 const bool have_avx =
168 // Does the OS support XGETBV instruction use by applications?
169 ((ecx >> 27) & 0x1) &&
170 // Does the OS save/restore XMM and YMM state?
171 ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
172 // Is AVX supported in hardware?
173 ((ecx >> 28) & 0x1);
174
175 const bool have_avx512 =
176 // Does the OS support XGETBV instruction use by applications?
177 ((ecx >> 27) & 0x1) &&
178 // Does the OS save/restore ZMM state?
179 ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
180
181 cpuid->have_avx_ = have_avx;
182 cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
183 cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
184
185 // Get standard level 7 structured extension features (issue CPUID with
186 // eax = 7 and ecx = 0), which is required to check for AVX2 support as
187 // well as other Haswell (and beyond) features. (See Intel 64 and IA-32
188 // Architectures Software Developer's Manual Volume 2A: Instruction Set
189 // Reference, A-M CPUID).
190 GETCPUID(eax, ebx, ecx, edx, 7, 0);
191 const uint32 kMaxNumSubLeaves = eax;
192
193 cpuid->have_adx_ = (ebx >> 19) & 0x1;
194 cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
195 cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
196 cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
197 cpuid->have_prefetchwt1_ = ecx & 0x1;
198 cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
199 cpuid->have_smap_ = (ebx >> 20) & 0x1;
200
201 cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
202 cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
203 cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
204 cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
205 cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
206 cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
207 cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
208 cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
209 cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
210 cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
211 cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
212 cpuid->have_avx512_vnni_ = have_avx512 && ((ecx >> 11) & 0x1);
213
214 // The latest Intel 64 and IA-32 Architectures Software Developer's Manual
215 // Volume 2A (December 2021) does not have information on AMX yet. We use
216 // the information from Xbyak in oneDNN.
217 // https://github.com/oneapi-src/oneDNN/blob/acf8d214cedfe7e24c9446bacc1f9f648c9273f8/src/cpu/x64/xbyak/xbyak_util.h#L536-L538
218 cpuid->have_amx_tile_ = (edx >> 24) & 0x1;
219 cpuid->have_amx_int8_ = (edx >> 25) & 0x1;
220 cpuid->have_amx_bf16_ = (edx >> 22) & 0x1;
221
222 // Get more Structured Extended Feature info by issuing CPUID with
223 // sub-leaf = 1 (eax = 7, ecx = 1)
224 if (kMaxNumSubLeaves >= 1) {
225 GETCPUID(eax, ebx, ecx, edx, 7, 1);
226 cpuid->have_avx_vnni_ = (eax >> 4) & 0x1;
227 cpuid->have_avx512_bf16_ = have_avx512 && ((eax >> 5) & 0x1);
228 }
229 }
230
231 static bool TestFeature(CPUFeature feature) {
232 InitCPUIDInfo();
233 // clang-format off
234 switch (feature) {
235 case ADX: return cpuid->have_adx_;
236 case AES: return cpuid->have_aes_;
237 case AMX_BF16: return cpuid->have_amx_bf16_;
238 case AMX_INT8: return cpuid->have_amx_int8_;
239 case AMX_TILE: return cpuid->have_amx_tile_;
240 case AVX2: return cpuid->have_avx2_;
241 case AVX: return cpuid->have_avx_;
242 case AVX512F: return cpuid->have_avx512f_;
243 case AVX512CD: return cpuid->have_avx512cd_;
244 case AVX512PF: return cpuid->have_avx512pf_;
245 case AVX512ER: return cpuid->have_avx512er_;
246 case AVX512VL: return cpuid->have_avx512vl_;
247 case AVX512BW: return cpuid->have_avx512bw_;
248 case AVX512DQ: return cpuid->have_avx512dq_;
249 case AVX512VBMI: return cpuid->have_avx512vbmi_;
250 case AVX512IFMA: return cpuid->have_avx512ifma_;
251 case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
252 case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
253 case AVX512_BF16: return cpuid->have_avx512_bf16_;
254 case AVX512_VNNI: return cpuid->have_avx512_vnni_;
255 case AVX_VNNI: return cpuid->have_avx_vnni_;
256 case BMI1: return cpuid->have_bmi1_;
257 case BMI2: return cpuid->have_bmi2_;
258 case CMOV: return cpuid->have_cmov_;
259 case CMPXCHG16B: return cpuid->have_cmpxchg16b_;
260 case CMPXCHG8B: return cpuid->have_cmpxchg8b_;
261 case F16C: return cpuid->have_f16c_;
262 case FMA: return cpuid->have_fma_;
263 case MMX: return cpuid->have_mmx_;
264 case PCLMULQDQ: return cpuid->have_pclmulqdq_;
265 case POPCNT: return cpuid->have_popcnt_;
266 case PREFETCHW: return cpuid->have_prefetchw_;
267 case PREFETCHWT1: return cpuid->have_prefetchwt1_;
268 case RDRAND: return cpuid->have_rdrand_;
269 case RDSEED: return cpuid->have_rdseed_;
270 case SMAP: return cpuid->have_smap_;
271 case SSE2: return cpuid->have_sse2_;
272 case SSE3: return cpuid->have_sse3_;
273 case SSE4_1: return cpuid->have_sse4_1_;
274 case SSE4_2: return cpuid->have_sse4_2_;
275 case SSE: return cpuid->have_sse_;
276 case SSSE3: return cpuid->have_ssse3_;
277 case HYPERVISOR: return cpuid->have_hypervisor_;
278 default:
279 break;
280 }
281 // clang-format on
282 return false;
283 }
284
285 string vendor_str() const { return vendor_str_; }
286 int family() const { return family_; }
287 int model_num() { return model_num_; }
288
289 private:
290 int have_adx_ : 1;
291 int have_aes_ : 1;
292 int have_amx_bf16_ : 1;
293 int have_amx_int8_ : 1;
294 int have_amx_tile_ : 1;
295 int have_avx_ : 1;
296 int have_avx2_ : 1;
297 int have_avx512f_ : 1;
298 int have_avx512cd_ : 1;
299 int have_avx512er_ : 1;
300 int have_avx512pf_ : 1;
301 int have_avx512vl_ : 1;
302 int have_avx512bw_ : 1;
303 int have_avx512dq_ : 1;
304 int have_avx512vbmi_ : 1;
305 int have_avx512ifma_ : 1;
306 int have_avx512_4vnniw_ : 1;
307 int have_avx512_4fmaps_ : 1;
308 int have_avx512_bf16_ : 1;
309 int have_avx512_vnni_ : 1;
310 int have_avx_vnni_ : 1;
311 int have_bmi1_ : 1;
312 int have_bmi2_ : 1;
313 int have_cmov_ : 1;
314 int have_cmpxchg16b_ : 1;
315 int have_cmpxchg8b_ : 1;
316 int have_f16c_ : 1;
317 int have_fma_ : 1;
318 int have_mmx_ : 1;
319 int have_pclmulqdq_ : 1;
320 int have_popcnt_ : 1;
321 int have_prefetchw_ : 1;
322 int have_prefetchwt1_ : 1;
323 int have_rdrand_ : 1;
324 int have_rdseed_ : 1;
325 int have_smap_ : 1;
326 int have_sse_ : 1;
327 int have_sse2_ : 1;
328 int have_sse3_ : 1;
329 int have_sse4_1_ : 1;
330 int have_sse4_2_ : 1;
331 int have_ssse3_ : 1;
332 int have_hypervisor_ : 1;
333 string vendor_str_;
334 int family_;
335 int model_num_;
336};
337
338absl::once_flag cpuid_once_flag;
339
340void InitCPUIDInfo() {
341 // This ensures that CPUIDInfo::Initialize() is called exactly
342 // once regardless of how many threads concurrently call us
343 absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
344}
345
346#endif // PLATFORM_IS_X86
347
348} // namespace
349
350bool TestCPUFeature(CPUFeature feature) {
351#ifdef PLATFORM_IS_X86
352 return CPUIDInfo::TestFeature(feature);
353#else
354 return false;
355#endif
356}
357
358std::string CPUVendorIDString() {
359#ifdef PLATFORM_IS_X86
360 InitCPUIDInfo();
361 return cpuid->vendor_str();
362#else
363 return "";
364#endif
365}
366
367int CPUFamily() {
368#ifdef PLATFORM_IS_X86
369 InitCPUIDInfo();
370 return cpuid->family();
371#else
372 return 0;
373#endif
374}
375
376int CPUModelNum() {
377#ifdef PLATFORM_IS_X86
378 InitCPUIDInfo();
379 return cpuid->model_num();
380#else
381 return 0;
382#endif
383}
384
385int CPUIDNumSMT() {
386#ifdef PLATFORM_IS_X86
387 // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
388 // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
389 // Section: Detecting Hardware Multi-threads Support and Topology
390 // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
391 // Other cases not supported
392 uint32 eax, ebx, ecx, edx;
393 // Check if system supports Leaf 11
394 GETCPUID(eax, ebx, ecx, edx, 0, 0);
395 if (eax >= 11) {
396 // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
397 // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
398 // ECX=0):ECX[15:8] is 1
399 GETCPUID(eax, ebx, ecx, edx, 11, 0);
400 if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
401 return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width
402 }
403 }
404#endif // PLATFORM_IS_X86
405 return 0;
406}
407
408} // namespace port
409} // namespace tsl
410