ocl_utils.hpp source code [oneDNN/src/gpu/ocl/ocl_utils.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_OCL_OCL_UTILS_HPP
18	#define GPU_OCL_OCL_UTILS_HPP
19
20	#include <cinttypes>
21	#include <memory>
22	#include <sstream>
23	#include <string.h>
24	#include <string>
25	#include <utility>
26	#include <vector>
27	#include <CL/cl.h>
28	#include <initializer_list>
29	#include <type_traits>
30	#include <unordered_map>
31	#include <unordered_set>
32
33	#include "common/c_types_map.hpp"
34	#include "common/cpp_compat.hpp"
35	#include "common/internal_defs.hpp"
36	#include "common/utils.hpp"
37	#include "common/verbose.hpp"
38	#include "gpu/compute/kernel_arg_list.hpp"
39	#include "gpu/compute/utils.hpp"
40
41	namespace dnnl {
42	namespace impl {
43	namespace gpu {
44
45	namespace compute {
46	class kernel_t;
47	}
48
49	namespace ocl {
50
51	inline status_t convert_to_dnnl(cl_int cl_status) {
52	switch (cl_status) {
53	case CL_SUCCESS: return status::success;
54	case CL_MEM_OBJECT_ALLOCATION_FAILURE:
55	case CL_OUT_OF_RESOURCES:
56	case CL_OUT_OF_HOST_MEMORY: return status::out_of_memory;
57	case CL_DEVICE_NOT_FOUND:
58	case CL_DEVICE_NOT_AVAILABLE:
59	case CL_COMPILER_NOT_AVAILABLE:
60	case CL_PROFILING_INFO_NOT_AVAILABLE:
61	case CL_MEM_COPY_OVERLAP:
62	case CL_IMAGE_FORMAT_MISMATCH:
63	case CL_IMAGE_FORMAT_NOT_SUPPORTED:
64	case CL_BUILD_PROGRAM_FAILURE:
65	case CL_MAP_FAILURE:
66	case CL_MISALIGNED_SUB_BUFFER_OFFSET:
67	case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
68	case CL_COMPILE_PROGRAM_FAILURE:
69	case CL_LINKER_NOT_AVAILABLE:
70	case CL_LINK_PROGRAM_FAILURE:
71	case CL_DEVICE_PARTITION_FAILED:
72	case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return status::runtime_error;
73	case CL_INVALID_VALUE:
74	case CL_INVALID_DEVICE_TYPE:
75	case CL_INVALID_CONTEXT:
76	case CL_INVALID_QUEUE_PROPERTIES:
77	case CL_INVALID_COMMAND_QUEUE:
78	case CL_INVALID_HOST_PTR:
79	case CL_INVALID_MEM_OBJECT:
80	case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
81	case CL_INVALID_IMAGE_SIZE:
82	case CL_INVALID_SAMPLER:
83	case CL_INVALID_BINARY:
84	case CL_INVALID_BUILD_OPTIONS:
85	case CL_INVALID_PROGRAM:
86	case CL_INVALID_PROGRAM_EXECUTABLE:
87	case CL_INVALID_KERNEL_NAME:
88	case CL_INVALID_KERNEL_DEFINITION: // FI
89	case CL_INVALID_KERNEL:
90	case CL_INVALID_ARG_INDEX:
91	case CL_INVALID_ARG_VALUE: return status::invalid_arguments;
92
93	default: return status::runtime_error;
94	}
95	}
96
97	enum { OCL_BUFFER_ALIGNMENT = `128` };
98
99	#define MAYBE_REPORT_ERROR(msg) \
100	do { \
101	if (get_verbose()) printf("onednn_verbose,gpu,error,%s\n", (msg)); \
102	} while (0)
103
104	#define MAYBE_REPORT_OCL_ERROR(s) \
105	do { \
106	if (dnnl::impl::get_verbose()) \
107	printf("onednn_verbose,gpu,ocl_error,%d\n", (int)(s)); \
108	} while (0)
109
110	#define OCL_CHECK_V(x) \
111	do { \
112	cl_int s = x; \
113	if (s != CL_SUCCESS) { \
114	MAYBE_REPORT_OCL_ERROR(s); \
115	return; \
116	} \
117	} while (0)
118
119	#define OCL_CHECK(x) \
120	do { \
121	cl_int s = x; \
122	if (s != CL_SUCCESS) { \
123	MAYBE_REPORT_OCL_ERROR(s); \
124	return dnnl::impl::gpu::ocl::convert_to_dnnl(s); \
125	} \
126	} while (0)
127
128	// Check for three conditions:
129	// 1. Device and context are compatible, i.e. the device belongs to
130	// the context devices.
131	// 2. Device type matches the passed engine kind
132	// 3. Device/context platfrom is an Intel platform
133	status_t check_device(engine_kind_t eng_kind, cl_device_id dev, cl_context ctx);
134
135	status_t get_ocl_devices(
136	std::vector<cl_device_id> *devices, cl_device_type device_type);
137
138	status_t get_ocl_device_index(size_t *index, cl_device_id device);
139
140	cl_platform_id get_ocl_platform(cl_device_id device);
141	cl_platform_id get_ocl_platform(engine_t *engine);
142
143	namespace details {
144
145	// OpenCL objects reference counting traits
146	template <typename T>
147	struct ocl_ref_traits;
148	//{
149	// static void retain(T t) {}
150	// static void release(T t) {}
151	//};
152
153	template <>
154	struct ocl_ref_traits<cl_context> {
155	static void retain(cl_context t) { clRetainContext(t); }
156	static void release(cl_context t) { clReleaseContext(t); }
157	};
158
159	template <>
160	struct ocl_ref_traits<cl_command_queue> {
161	static void retain(cl_command_queue t) { clRetainCommandQueue(t); }
162	static void release(cl_command_queue t) { clReleaseCommandQueue(t); }
163	};
164
165	template <>
166	struct ocl_ref_traits<cl_program> {
167	static void retain(cl_program t) { clRetainProgram(t); }
168	static void release(cl_program t) { clReleaseProgram(t); }
169	};
170
171	template <>
172	struct ocl_ref_traits<cl_kernel> {
173	static void retain(cl_kernel t) { clRetainKernel(t); }
174	static void release(cl_kernel t) { clReleaseKernel(t); }
175	};
176
177	template <>
178	struct ocl_ref_traits<cl_mem> {
179	static void retain(cl_mem t) { clRetainMemObject(t); }
180	static void release(cl_mem t) { clReleaseMemObject(t); }
181	};
182
183	template <>
184	struct ocl_ref_traits<cl_sampler> {
185	static void retain(cl_sampler t) { clRetainSampler(t); }
186	static void release(cl_sampler t) { clReleaseSampler(t); }
187	};
188
189	template <>
190	struct ocl_ref_traits<cl_event> {
191	static void retain(cl_event t) { clRetainEvent(t); }
192	static void release(cl_event t) { clReleaseEvent(t); }
193	};
194
195	template <>
196	struct ocl_ref_traits<cl_device_id> {
197	static void retain(cl_device_id t) { clRetainDevice(t); }
198	static void release(cl_device_id t) { clReleaseDevice(t); }
199	};
200
201	} // namespace details
202
203	// Generic class providing RAII support for OpenCL objects
204	template <typename T>
205	struct ocl_wrapper_t {
206	ocl_wrapper_t(T t = nullptr, bool retain = false) : t_(t) {
207	if (retain) { do_retain(); }
208	}
209
210	ocl_wrapper_t(const ocl_wrapper_t &other) : t_(other.t_) { do_retain(); }
211
212	ocl_wrapper_t(ocl_wrapper_t &&other) noexcept : t_(std::move(other.t_)) {}
213
214	ocl_wrapper_t &operator=(ocl_wrapper_t other) {
215	using std::swap;
216	swap(t_, other.t_);
217	return *this;
218	}
219
220	~ocl_wrapper_t() { do_release(); }
221
222	operator T() const { return t_; }
223	T get() const { return t_; }
224
225	T release() {
226	T t = t_;
227	t_ = nullptr;
228	return t;
229	}
230
231	private:
232	T t_;
233
234	void do_retain() {
235	if (t_) { details::ocl_ref_traits<T>::retain(t_); }
236	}
237
238	void do_release() {
239	if (t_) { details::ocl_ref_traits<T>::release(t_); }
240	}
241	};
242
243	// Constructs an OpenCL wrapper object (providing RAII support)
244	template <typename T>
245	ocl_wrapper_t<T> make_ocl_wrapper(T t) {
246	return ocl_wrapper_t<T>(t);
247	}
248
249	template <typename F>
250	struct ext_func_t {
251	ext_func_t(const char *name) : ext_func_ptrs_(intel_platforms().size()) {
252	for (size_t i = `0`; i < intel_platforms().size(); ++i) {
253	auto p = intel_platforms()[i];
254	auto it = ext_func_ptrs_.insert({p, load_ext_func(p, name)});
255	assert(it.second);
256	MAYBE_UNUSED(it);
257	}
258	}
259
260	template <typename... Args>
261	typename cpp_compat::invoke_result<F, Args...>::type operator()(
262	engine_t engine, Args... args) const* {
263	auto f = get_func(engine);
264	return f(args...);
265	}
266
267	F get_func(engine_t engine) const* {
268	return get_func(get_ocl_platform(engine));
269	}
270
271	F get_func(cl_platform_id platform) const {
272	return ext_func_ptrs_.at(platform);
273	}
274
275	private:
276	std::unordered_map<cl_platform_id, F> ext_func_ptrs_;
277
278	static F load_ext_func(cl_platform_id platform, const char *name) {
279	return reinterpret_cast<F>(
280	clGetExtensionFunctionAddressForPlatform(platform, name));
281	}
282
283	static const std::vector<cl_platform_id> &intel_platforms() {
284	static auto intel_platforms = get_intel_platforms();
285	return intel_platforms;
286	}
287
288	static std::vector<cl_platform_id> get_intel_platforms() {
289	cl_uint num_platforms = `0`;
290	cl_int err = clGetPlatformIDs(`0`, nullptr, &num_platforms);
291	if (err != CL_SUCCESS) return {};
292
293	std::vector<cl_platform_id> platforms(num_platforms);
294	err = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
295	if (err != CL_SUCCESS) return {};
296
297	std::vector<cl_platform_id> intel_platforms;
298	char vendor_name[`128`] = {};
299	for (cl_platform_id p : platforms) {
300	err = clGetPlatformInfo(p, CL_PLATFORM_VENDOR, sizeof(vendor_name),
301	vendor_name, nullptr);
302	if (err != CL_SUCCESS) continue;
303	if (std::string(vendor_name).find("Intel") != std::string::npos)
304	intel_platforms.push_back(p);
305	}
306
307	// OpenCL can return a list of platforms that contains duplicates.
308	std::sort(intel_platforms.begin(), intel_platforms.end());
309	intel_platforms.erase(
310	std::unique(intel_platforms.begin(), intel_platforms.end()),
311	intel_platforms.end());
312	return intel_platforms;
313	}
314	};
315
316	status_t get_ocl_kernel_arg_type(compute::scalar_type_t *type,
317	cl_kernel ocl_kernel, int idx, bool allow_undef = false);
318
319	#ifdef DNNL_ENABLE_MEM_DEBUG
320	cl_mem DNNL_WEAK clCreateBuffer_wrapper(cl_context context, cl_mem_flags flags,
321	size_t size, void host_ptr, cl_int errcode_ret);
322	#else
323	cl_mem clCreateBuffer_wrapper(cl_context context, cl_mem_flags flags,
324	size_t size, void host_ptr, cl_int errcode_ret);
325	#endif
326
327	status_t get_ocl_program_binary(cl_program program, cl_device_id device,
328	std::shared_ptr<compute::binary_t> &binary);
329
330	status_t get_ocl_program_binary(cl_kernel kernel, cl_device_id device,
331	std::shared_ptr<compute::binary_t> &binary);
332
333	void dump_kernel_binary(cl_kernel ocl_kernel);
334	void dump_kernel_binary(
335	const engine_t engine, const* compute::kernel_t &binary_kernel);
336
337	status_t get_kernel_arg_types(cl_kernel ocl_kernel,
338	std::vector<gpu::compute::scalar_type_t> *arg_types);
339
340	status_t get_ocl_device_eu_count(cl_device_id device, int32_t *eu_count);
341
342	status_t clone_kernel(cl_kernel kernel, cl_kernel *cloned_kernel);
343
344	} // namespace ocl
345	} // namespace gpu
346	} // namespace impl
347	} // namespace dnnl
348
349	#endif
350

Browse the source code of oneDNN/src/gpu/ocl/ocl_utils.hpp