device_info.hpp source code [oneDNN/src/gpu/compute/device_info.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_COMPUTE_DEVICE_INFO_HPP
18	#define GPU_COMPUTE_DEVICE_INFO_HPP
19
20	#include <stdint.h>
21	#include <stdlib.h>
22	#include <string.h>
23
24	#include "common/c_types_map.hpp"
25	#include "common/serialization_stream.hpp"
26	#include "common/utils.hpp"
27	#include "common/z_magic.hpp"
28
29	#include "cpu/platform.hpp"
30	#include "oneapi/dnnl/dnnl_config.h"
31
32	namespace dnnl {
33	namespace impl {
34	namespace gpu {
35	namespace compute {
36
37	enum class gpu_arch_t {
38	unknown,
39	gen9,
40	gen11,
41	xe_lp,
42	xe_hp,
43	xe_hpg,
44	xe_hpc,
45	};
46
47	static inline gpu_arch_t str2gpu_arch(const char *str) {
48	#define CASE(_case) \
49	if (!strcmp(STRINGIFY(_case), str)) return gpu_arch_t::_case
50
51	CASE(gen9);
52	CASE(gen11);
53	CASE(xe_lp);
54	CASE(xe_hp);
55	CASE(xe_hpg);
56	CASE(xe_hpc);
57	return gpu_arch_t::unknown;
58	#undef CASE
59	}
60
61	enum class device_ext_t : uint64_t {
62	// clang-format off
63	// OpenCL data types
64	khr_fp16 = `1ull` << `0`,
65	khr_fp64 = `1ull` << `1`,
66	// OpenCL atomics
67	khr_global_int32_base_atomics = `1ull` << `2`,
68	khr_global_int32_extended_atomics = `1ull` << `3`,
69	khr_int64_base_atomics = `1ull` << `4`,
70	khr_int64_extended_atomics = `1ull` << `5`,
71	khr_local_int32_base_atomics = `1ull` << `6`,
72	khr_local_int32_extended_atomics = `1ull` << `7`,
73	// Intel specific Gen9+
74	intel_subgroups = `1ull` << `16`,
75	intel_required_subgroup_size = `1ull` << `17`,
76	intel_subgroups_char = `1ull` << `18`,
77	intel_subgroups_short = `1ull` << `19`,
78	intel_subgroups_long = `1ull` << `20`,
79	// Intel specific Xe_LP+
80	intel_subgroup_local_block_io = `1ull` << `21`,
81	intel_dot_accumulate = `1ull` << `22`,
82	// Intel specific Xe_HP+
83	intel_global_float_atomics = `1ull` << `23`,
84	intel_subgroup_matrix_multiply_accumulate = `1ull` << `24`,
85	intel_subgroup_split_matrix_multiply_accumulate = `1ull` << `25`,
86	intel_variable_eu_thread_count = `1ull` << `26`,
87	// Future extensions
88	future_bf16_cvt = `1ull` << `31`,
89	last
90	// clang-format on
91	};
92
93	static inline const char *ext2cl_str(device_ext_t ext) {
94	#define CASE(x) \
95	case device_ext_t::x: return STRINGIFY(CONCAT2(cl_, x));
96	switch (ext) {
97	CASE(khr_fp16)
98	CASE(khr_fp64)
99
100	CASE(khr_global_int32_base_atomics)
101	CASE(khr_global_int32_extended_atomics)
102	CASE(khr_int64_base_atomics)
103	CASE(khr_int64_extended_atomics)
104	CASE(khr_local_int32_base_atomics)
105	CASE(khr_local_int32_extended_atomics)
106
107	CASE(intel_subgroups)
108	CASE(intel_required_subgroup_size)
109	CASE(intel_subgroups_char)
110	CASE(intel_subgroups_short)
111	CASE(intel_subgroups_long)
112
113	CASE(intel_subgroup_local_block_io)
114	CASE(intel_dot_accumulate)
115
116	CASE(intel_global_float_atomics)
117	CASE(intel_subgroup_matrix_multiply_accumulate)
118	CASE(intel_subgroup_split_matrix_multiply_accumulate)
119	CASE(intel_variable_eu_thread_count)
120	CASE(future_bf16_cvt)
121	default: return nullptr;
122	}
123	#undef CASE
124	}
125
126	struct runtime_version_t {
127	int major;
128	int minor;
129	int build;
130
131	runtime_version_t(int major = `0`, int minor = `0`, int build = `0`)
132	: major {major}, minor {minor}, build {build} {}
133
134	bool operator==(const runtime_version_t &other) const {
135	return (major == other.major) && (minor == other.minor)
136	&& (build == other.build);
137	}
138
139	bool operator!=(const runtime_version_t &other) const {
140	return !(*this == other);
141	}
142
143	bool operator<(const runtime_version_t &other) const {
144	if (major < other.major) return true;
145	if (major > other.major) return false;
146	if (minor < other.minor) return true;
147	if (minor > other.minor) return false;
148	return (build < other.build);
149	}
150
151	bool operator>(const runtime_version_t &other) const {
152	return (other < *this);
153	}
154
155	bool operator<=(const runtime_version_t &other) const {
156	return !(*this > other);
157	}
158
159	bool operator>=(const runtime_version_t &other) const {
160	return !(*this < other);
161	}
162
163	status_t set_from_string(const char *s) {
164	int i_major = `0`, i = `0`;
165
166	for (; s[i] != `'.'`; i++)
167	if (!s[i]) return status::invalid_arguments;
168
169	auto i_minor = ++i;
170
171	for (; s[i] != `'.'`; i++)
172	if (!s[i]) return status::invalid_arguments;
173
174	auto i_build = ++i;
175
176	major = atoi(&s[i_major]);
177	minor = atoi(&s[i_minor]);
178	build = atoi(&s[i_build]);
179
180	return status::success;
181	}
182
183	std::string str() const {
184	return utils::format("%d.%d.%d", major, minor, build);
185	}
186	};
187
188	// Needed workaround for future HW extensions
189	uint64_t get_future_extensions(compute::gpu_arch_t gpu_arch);
190
191	struct device_info_t {
192	public:
193	virtual ~device_info_t() = default;
194
195	status_t init(
196	engine_t engine, const* std::vector<uint8_t> &cache_blob = {}) {
197	if (!cache_blob.empty()) {
198	CHECK(init_from_cache_blob(cache_blob));
199	return init_serialized_device_info(cache_blob);
200	}
201
202	CHECK(init_device_name(engine));
203	CHECK(init_arch(engine));
204	CHECK(init_runtime_version(engine));
205	CHECK(init_extensions(engine));
206	CHECK(init_attributes(engine));
207
208	CHECK(init_attributes_common(engine));
209
210	if (dnnl_version()->gpu_runtime == DNNL_RUNTIME_OCL) {
211	CHECK(init_serialized_device_info());
212	}
213
214	return status::success;
215	}
216
217	bool has(device_ext_t ext) const { return extensions_ & (uint64_t)ext; }
218	gpu_arch_t gpu_arch() const { return gpu_arch_; }
219	int stepping_id() const { return stepping_id_; }
220	int max_eus_per_wg() const { return max_eus_per_wg_; }
221	static int max_eus_per_wg(gpu_arch_t gpu_arch);
222	int max_subgroup_size() const { return max_subgroup_size_; }
223	static int max_subgroup_size(gpu_arch_t gpu_arch);
224	size_t max_wg_size() const { return max_wg_size_; }
225	int eu_count() const { return eu_count_; }
226	int hw_threads() const { return hw_threads_[`0`]; }
227	int hw_threads(bool large_grf_mode) const {
228	return hw_threads_[large_grf_mode ? `1` : `0`];
229	}
230	static int threads_per_eu(gpu_arch_t gpu_arch, bool large_grf_mode = false);
231	static int max_slm_size_per_tg(
232	gpu_arch_t gpu_arch, bool large_grf_mode = false);
233	static int slm_memory_bank_count(gpu_arch_t gpu_arch);
234	static int slm_memory_bank_granularity(gpu_arch_t gpu_arch);
235	size_t llc_cache_size() const { return llc_cache_size_; }
236
237	const runtime_version_t &runtime_version() const {
238	return runtime_version_;
239	}
240	const std::string &name() const { return name_; }
241
242	bool mayiuse_ngen_kernels() const { return mayiuse_ngen_kernels_; }
243
244	bool mayiuse_non_uniform_work_groups() const {
245	return mayiuse_non_uniform_work_groups_;
246	}
247
248	bool mayiuse_sub_group(int size) const;
249
250	const std::vector<uint8_t> &get_cache_blob() const {
251	return serialized_device_info_.get_data();
252	}
253
254	status_t get_cache_blob_size(size_t size) const* {
255	(*size) = serialized_device_info_.get_data().size();
256	return status::success;
257	}
258
259	status_t get_cache_blob(size_t size, uint8_t cache_blob) const* {
260	const auto &cb = serialized_device_info_.get_data();
261	if (size != cb.size()) return status::invalid_arguments;
262	std::memcpy(cache_blob, cb.data(), size);
263	return status::success;
264	}
265
266	protected:
267	virtual status_t init_device_name(engine_t *engine) = `0`;
268	virtual status_t init_arch(engine_t *engine) = `0`;
269	virtual status_t init_runtime_version(engine_t *engine) = `0`;
270	virtual status_t init_extensions(engine_t *engine) = `0`;
271	virtual status_t init_attributes(engine_t *engine) = `0`;
272
273	compute::gpu_arch_t gpu_arch_ = compute::gpu_arch_t::unknown;
274	int stepping_id_ = `0`;
275	bool mayiuse_ngen_kernels_ = false;
276
277	std::string name_;
278	runtime_version_t runtime_version_;
279
280	// total number of hardware threads:
281	// [0] - default mode
282	// [1] - large GRF mode
283	int32_t hw_threads_[`2`] = {`0`, `0`};
284	int32_t eu_count_ = `0`;
285	int32_t max_eus_per_wg_ = `0`;
286	int32_t max_subgroup_size_ = `0`;
287	size_t max_wg_size_ = `0`;
288	size_t llc_cache_size_ = `0`;
289
290	// extensions_ and gpu_arch_ describe effective extensions and GPU architecture.
291	uint64_t extensions_ = `0`;
292
293	private:
294	status_t init_attributes_common(engine_t *engine);
295	status_t init_serialized_device_info(
296	const std::vector<uint8_t> &cache_blob = {});
297	status_t init_from_cache_blob(const std::vector<uint8_t> &cache_blob);
298
299	bool mayiuse_non_uniform_work_groups_ = false;
300
301	serialization_stream_t serialized_device_info_;
302	};
303
304	} // namespace compute
305	} // namespace gpu
306	} // namespace impl
307	} // namespace dnnl
308
309	#endif
310

Browse the source code of oneDNN/src/gpu/compute/device_info.hpp