platform.cpp source code [oneDNN/src/cpu/platform.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	* Copyright 2020 FUJITSU LIMITED
4	* Copyright 2022 Arm Ltd. and affiliates
5	*
6	* Licensed under the Apache License, Version 2.0 (the "License");
7	* you may not use this file except in compliance with the License.
8	* You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing, software
13	* distributed under the License is distributed on an "AS IS" BASIS,
14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	* See the License for the specific language governing permissions and
16	* limitations under the License.
17	*******************************************************************************/
18
19	#include <thread>
20
21	#include "cpu/platform.hpp"
22
23	#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
24	#include <algorithm>
25
26	#if defined(_WIN32)
27	#include <windows.h>
28	#elif defined(__GLIBC__)
29	#include <sched.h>
30	#endif
31	#endif
32
33	#if DNNL_X64
34	#include "cpu/x64/cpu_isa_traits.hpp"
35	#elif DNNL_AARCH64
36	#include "cpu/aarch64/cpu_isa_traits.hpp"
37	#if DNNL_AARCH64_USE_ACL
38	// For checking if fp16 isa is supported on the platform
39	#include "arm_compute/core/CPP/CPPTypes.h"
40	// For setting the number of threads for ACL
41	#include "src/common/cpuinfo/CpuInfo.h"
42	#endif
43	#endif
44
45	// For DNNL_X64 build we compute the timestamp using rdtsc. Use std::chrono for
46	// other builds.
47	#if !DNNL_X64
48	#include <chrono>
49	#endif
50
51	namespace dnnl {
52	namespace impl {
53	namespace cpu {
54	namespace platform {
55
56	const char *get_isa_info() {
57	#if DNNL_X64
58	return x64::get_isa_info();
59	#elif DNNL_AARCH64
60	return aarch64::get_isa_info();
61	#else
62	return "Generic";
63	#endif
64	}
65
66	dnnl_cpu_isa_t get_effective_cpu_isa() {
67	#if DNNL_X64
68	return x64::get_effective_cpu_isa();
69	#elif DNNL_AARCH64
70	return aarch64::get_effective_cpu_isa();
71	#else
72	return dnnl_cpu_isa_default;
73	#endif
74	}
75
76	status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) {
77	#if DNNL_X64
78	return x64::set_max_cpu_isa(isa);
79	#else
80	return status::unimplemented;
81	#endif
82	}
83
84	status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) {
85	#if DNNL_X64
86	return x64::set_cpu_isa_hints(isa_hints);
87	#else
88	return status::unimplemented;
89	#endif
90	}
91
92	dnnl_cpu_isa_hints_t get_cpu_isa_hints() {
93	#if DNNL_X64
94	return x64::get_cpu_isa_hints();
95	#else
96	return dnnl_cpu_isa_no_hints;
97	#endif
98	}
99
100	bool prefer_ymm_requested() {
101	#if DNNL_X64
102	const bool prefer_ymm = x64::get_cpu_isa_hints() == dnnl_cpu_isa_prefer_ymm;
103	return prefer_ymm;
104	#else
105	return false;
106	#endif
107	}
108
109	bool has_data_type_support(data_type_t data_type) {
110	// Notice: see notes in header
111	switch (data_type) {
112	case data_type::bf16:
113	#if DNNL_X64
114	return x64::mayiuse(x64::avx512_core)
115	\|\| x64::mayiuse(x64::avx2_vnni_2);
116	#elif DNNL_PPC64
117	#if defined(USE_CBLAS) && defined(BLAS_HAS_SBGEMM) && defined(__MMA__)
118	return true;
119	#endif
120	#else
121	return false;
122	#endif
123	case data_type::f16:
124	#if DNNL_X64
125	return x64::mayiuse(x64::avx512_core_fp16)
126	\|\| x64::mayiuse(x64::avx2_vnni_2);
127	#elif DNNL_AARCH64_USE_ACL
128	return arm_compute::CPUInfo::get().has_fp16();
129	#else
130	return false;
131	#endif
132	default: return true;
133	}
134	}
135
136	bool has_training_support(data_type_t data_type) {
137	// TODO: maybe return false for int8, but some primitives like prelu
138	// have training support
139	switch (data_type) {
140	case data_type::bf16:
141	#if DNNL_X64
142	return x64::mayiuse(x64::avx512_core);
143	#elif DNNL_PPC64
144	#if defined(USE_CBLAS) && defined(BLAS_HAS_SBGEMM) && defined(__MMA__)
145	return true;
146	#endif
147	#else
148	return false;
149	#endif
150	case data_type::f16:
151	#if DNNL_X64
152	return x64::mayiuse(x64::avx512_core_fp16);
153	#else
154	return false;
155	#endif
156	default: return true;
157	}
158	}
159
160	float s8s8_weights_scale_factor() {
161	#if DNNL_X64
162	return x64::mayiuse(x64::avx512_core_vnni) ? `1.0f` : `0.5f`;
163	#else
164	return `1.0f`;
165	#endif
166	}
167
168	unsigned get_per_core_cache_size(int level) {
169	auto guess = [](int level) {
170	switch (level) {
171	case `1`: return `32U` * `1024`;
172	case `2`: return `512U` * `1024`;
173	case `3`: return `1024U` * `1024`;
174	default: return `0U`;
175	}
176	};
177
178	#if DNNL_X64
179	using namespace x64;
180	if (cpu().getDataCacheLevels() == `0`) return guess (level);
181
182	if (level > `0` && (unsigned)level <= cpu().getDataCacheLevels()) {
183	unsigned l = level - `1`;
184	return cpu().getDataCacheSize(l) / cpu().getCoresSharingDataCache(l);
185	} else
186	return `0`;
187	#else
188	return guess(level);
189	#endif
190	}
191
192	unsigned get_num_cores() {
193	#if DNNL_X64
194	return x64::cpu().getNumCores(Xbyak::util::CoreLevel);
195	#elif DNNL_AARCH64_USE_ACL
196	return arm_compute::cpuinfo::num_threads_hint();
197	#else
198	return `1`;
199	#endif
200	}
201
202	#if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
203	// The purpose of this function is to return the potential maximum number of
204	// threads in user's threadpool. It is assumed that the number of threads in an
205	// actual threadpool will not exceed the number cores in a socket reported by
206	// the OS, which may or may not be equal to the number of total physical cores
207	// in a socket depending on the OS configuration (read -- VM environment). In
208	// order to simulate the number of cores available in such environment, this
209	// function supports process affinity.
210	unsigned get_max_threads_to_use() {
211	// TODO: the logic below should involve number of sockets to provide exact
212	// number of cores on 2+ socket systems.
213	int num_cores_per_socket = (int)dnnl::impl::cpu::platform::get_num_cores();
214	// It may happen that XByak doesn't get num of threads identified, e.g. for
215	// AMD. In order to make threadpool working, we supply an additional
216	// condition to have some reasonable number of threads available at
217	// primitive descriptor creation time.
218	if (num_cores_per_socket == `0`)
219	num_cores_per_socket = std::thread::hardware_concurrency();
220
221	#if defined(_WIN32)
222	DWORD_PTR proc_affinity_mask;
223	DWORD_PTR sys_affinity_mask;
224	if (GetProcessAffinityMask(
225	GetCurrentProcess(), &proc_affinity_mask, &sys_affinity_mask)) {
226	int masked_nthr = `0`;
227	for (int i = `0`; i < CHAR_BIT * sizeof(proc_affinity_mask);
228	i++, proc_affinity_mask >>= `1`)
229	masked_nthr += proc_affinity_mask & `1`;
230	return std::min(masked_nthr, num_cores_per_socket);
231	}
232	#elif defined(__GLIBC__)
233	cpu_set_t cpu_set;
234	// Check if the affinity of the process has been set using, e.g.,
235	// numactl.
236	if (::sched_getaffinity(`0`, sizeof(cpu_set_t), &cpu_set) == `0`)
237	return std::min(CPU_COUNT(&cpu_set), num_cores_per_socket);
238	#endif
239	return num_cores_per_socket;
240	}
241	#endif
242
243	int get_vector_register_size() {
244	#if DNNL_X64
245	using namespace x64;
246	if (mayiuse(avx512_core)) return cpu_isa_traits<avx512_core>::vlen;
247	if (mayiuse(avx)) return cpu_isa_traits<avx>::vlen;
248	if (mayiuse(sse41)) return cpu_isa_traits<sse41>::vlen;
249	#elif DNNL_AARCH64
250	using namespace aarch64;
251	if (mayiuse(asimd)) return cpu_isa_traits<asimd>::vlen;
252	if (mayiuse(sve_512)) return cpu_isa_traits<sve_512>::vlen;
253	#endif
254	return `0`;
255	}
256
257	/ The purpose of this function is to provide a very efficient timestamp*
258	* calculation (used primarily for primitive cache). For DNNL_X64, this can be
259	* accomplished using rdtsc since it provides a timestamp value that (i) is
260	* independent for each core, and (ii) is synchronized across cores in multiple
261	* sockets.
262	* TODO: For now, use std::chrono::steady_clock for other builds, however
263	* another more optimized function may be called here.
264	*/
265	size_t get_timestamp() {
266	#if DNNL_X64
267	return static_cast<size_t>(Xbyak::util::Clock::getRdtsc());
268	#else
269	return static_cast<size_t>(
270	std::chrono::steady_clock::now().time_since_epoch().count());
271	#endif
272	}
273
274	} // namespace platform
275	} // namespace cpu
276	} // namespace impl
277	} // namespace dnnl
278

Browse the source code of oneDNN/src/cpu/platform.cpp