1/*******************************************************************************
2* Copyright 2020-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef ONEAPI_DNNL_DNNL_OCL_HPP
18#define ONEAPI_DNNL_DNNL_OCL_HPP
19
20#include "oneapi/dnnl/dnnl.hpp"
21
22/// @cond DO_NOT_DOCUMENT_THIS
23#include <algorithm>
24#include <cstdlib>
25#include <iterator>
26#include <memory>
27#include <string>
28#include <vector>
29#include <unordered_map>
30
31#include "oneapi/dnnl/dnnl_ocl.h"
32
33#include <CL/cl.h>
34/// @endcond
35
36/// @addtogroup dnnl_api
37/// @{
38
39namespace dnnl {
40
41/// @addtogroup dnnl_api_interop Runtime interoperability API
42/// API extensions to interact with the underlying run-time.
43/// @{
44
45/// @addtogroup dnnl_api_ocl_interop OpenCL interoperability API
46/// API extensions to interact with the underlying OpenCL run-time.
47///
48/// @sa @ref dev_guide_opencl_interoperability in developer guide
49/// @{
50
51/// OpenCL interoperability namespace
52namespace ocl_interop {
53
54/// Memory allocation kind.
55enum class memory_kind {
56 /// USM (device, shared, host, or unknown) memory allocation kind.
57 usm = dnnl_ocl_interop_usm,
58 /// Buffer memory allocation kind - default.
59 buffer = dnnl_ocl_interop_buffer,
60};
61
62/// Converts a memory allocation kind enum value from C++ API to C API type.
63///
64/// @param akind C++ API memory allocation kind enum value.
65/// @returns Corresponding C API memory allocation kind enum value.
66inline dnnl_ocl_interop_memory_kind_t convert_to_c(memory_kind akind) {
67 return static_cast<dnnl_ocl_interop_memory_kind_t>(akind);
68}
69
70/// Returns the cache blob ID of the OpenCL device.
71///
72/// @warning
73/// This API is intended to be used with
74/// #dnnl::ocl_interop::get_engine_cache_blob() and
75/// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector<uint8_t> &).
76/// The returned cache blob ID can only be used as an ID of the cache blob
77/// returned by #dnnl::ocl_interop::get_engine_cache_blob().
78///
79/// @note The cache blob ID can be empty (@p size will be 0 and
80/// @p cache_blob_id will be nullptr) if oneDNN doesn't have anything to
81/// put in the cache blob. (#dnnl_ocl_interop_engine_get_cache_blob will
82/// return an empty cache blob).
83///
84/// @param device An OpenCL device.
85/// @returns A vector containing the cache blob ID.
86inline std::vector<uint8_t> get_engine_cache_blob_id(cl_device_id device) {
87 size_t size = 0;
88 error::wrap_c_api(
89 dnnl_ocl_interop_engine_get_cache_blob_id(device, &size, nullptr),
90 "could not get an engine cache blob id size");
91
92 std::vector<uint8_t> cache_blob_id(size);
93 error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob_id(
94 device, &size, cache_blob_id.data()),
95 "could not get an engine cache blob id");
96 return cache_blob_id;
97}
98
99/// Returns a cache blob for the engine.
100///
101/// @note The cache blob vector can be empty if oneDNN doesn't have anything
102/// to put in the cache blob. It's the user's responsibility to check
103/// whether it's empty prior to passing it to
104/// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector<uint8_t> &)
105///
106/// @param aengine Engine to query for the cache blob.
107/// @returns Vector containing the cache blob.
108inline std::vector<uint8_t> get_engine_cache_blob(const engine &aengine) {
109 size_t size = 0;
110 error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob(
111 aengine.get(), &size, nullptr),
112 "could not get an engine cache blob size");
113
114 std::vector<uint8_t> cache_blob(size);
115 error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob(
116 aengine.get(), &size, cache_blob.data()),
117 "could not get an engine cache blob");
118 return cache_blob;
119}
120
121/// Constructs an engine from the given cache blob.
122///
123/// @param device The OpenCL device that this engine will encapsulate.
124/// @param context The OpenCL context (containing the device) that this
125/// engine will use for all operations.
126/// @param cache_blob Cache blob.
127/// @returns An engine.
128inline engine make_engine(cl_device_id device, cl_context context,
129 const std::vector<uint8_t> &cache_blob) {
130 dnnl_engine_t c_engine;
131 error::wrap_c_api(
132 dnnl_ocl_interop_engine_create_from_cache_blob(&c_engine, device,
133 context, cache_blob.size(), cache_blob.data()),
134 "could not create an engine from cache blob");
135 return engine(c_engine);
136}
137
138/// Constructs an engine from OpenCL device and context objects.
139///
140/// @param device The OpenCL device that this engine will encapsulate.
141/// @param context The OpenCL context (containing the device) that this
142/// engine will use for all operations.
143/// @returns An engine.
144inline engine make_engine(cl_device_id device, cl_context context) {
145 dnnl_engine_t c_engine;
146 error::wrap_c_api(
147 dnnl_ocl_interop_engine_create(&c_engine, device, context),
148 "could not create an engine");
149 return engine(c_engine);
150}
151
152/// Returns OpenCL context associated with the engine.
153///
154/// @param aengine An engine.
155/// @returns Underlying OpenCL context.
156inline cl_context get_context(const engine &aengine) {
157 cl_context context = nullptr;
158 error::wrap_c_api(
159 dnnl_ocl_interop_engine_get_context(aengine.get(), &context),
160 "could not get an OpenCL context from an engine");
161 return context;
162}
163
164/// Returns OpenCL device associated with the engine.
165///
166/// @param aengine An engine.
167/// @returns Underlying OpenCL device.
168inline cl_device_id get_device(const engine &aengine) {
169 cl_device_id device = nullptr;
170 error::wrap_c_api(dnnl_ocl_interop_get_device(aengine.get(), &device),
171 "could not get an OpenCL device from an engine");
172 return device;
173}
174
175/// Constructs an execution stream for the specified engine and OpenCL queue.
176///
177/// @param aengine Engine to create the stream on.
178/// @param queue OpenCL queue to use for the stream.
179/// @returns An execution stream.
180inline stream make_stream(const engine &aengine, cl_command_queue queue) {
181 dnnl_stream_t c_stream;
182 error::wrap_c_api(
183 dnnl_ocl_interop_stream_create(&c_stream, aengine.get(), queue),
184 "could not create a stream");
185 return stream(c_stream);
186}
187
188/// Returns OpenCL queue object associated with the execution stream.
189///
190/// @param astream An execution stream.
191/// @returns Underlying OpenCL queue.
192inline cl_command_queue get_command_queue(const stream &astream) {
193 cl_command_queue queue = nullptr;
194 error::wrap_c_api(
195 dnnl_ocl_interop_stream_get_command_queue(astream.get(), &queue),
196 "could not get an OpenCL command queue from a stream");
197 return queue;
198}
199
200/// Returns the OpenCL memory object associated with the memory object.
201///
202/// @param amemory A memory object.
203/// @returns Underlying OpenCL memory object.
204inline cl_mem get_mem_object(const memory &amemory) {
205 cl_mem mem_object;
206 error::wrap_c_api(
207 dnnl_ocl_interop_memory_get_mem_object(amemory.get(), &mem_object),
208 "could not get OpenCL buffer object from a memory object");
209 return mem_object;
210}
211
212/// Sets the OpenCL memory object associated with the memory object.
213///
214/// For behavioral details see memory::set_data_handle().
215///
216/// @param amemory A memory object.
217/// @param mem_object OpenCL cl_mem object to use as the underlying
218/// storage. It must have at least get_desc().get_size() bytes
219/// allocated.
220inline void set_mem_object(memory &amemory, cl_mem mem_object) {
221 error::wrap_c_api(
222 dnnl_ocl_interop_memory_set_mem_object(amemory.get(), mem_object),
223 "could not set OpenCL buffer object from a memory object");
224}
225
226/// Returns the memory allocation kind associated with a memory object.
227///
228/// @param amemory A memory object.
229///
230/// @returns The underlying memory allocation kind of the memory object.
231inline memory_kind get_memory_kind(const memory &amemory) {
232 dnnl_ocl_interop_memory_kind_t ckind;
233 error::wrap_c_api(
234 dnnl_ocl_interop_memory_get_memory_kind(amemory.get(), &ckind),
235 "could not get memory kind");
236 return static_cast<memory_kind>(ckind);
237}
238
239/// Creates a memory object.
240///
241/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the
242/// constructed memory object will have the underlying buffer set. In this
243/// case, the buffer will be initialized as if:
244/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is
245/// equal to dnnl::ocl_interop::memory_kind::usm, or
246/// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is
247/// equal to dnnl::ocl_interop::memory_kind::buffer.
248///
249/// @param memory_desc Memory descriptor.
250/// @param aengine Engine to use.
251/// @param kind Memory allocation kind to specify the type of handle.
252/// @param handle Handle of the memory buffer to use as an underlying storage.
253/// - A USM pointer to the user-allocated buffer. In this case the library
254/// doesn't own the buffer. Requires @p memory_kind to be equal to
255/// dnnl::ocl_interop::memory_kind::usm.
256/// - An OpenCL buffer. In this case the library doesn't own the buffer.
257/// Requires @p memory_kind be equal to be equal to
258/// dnnl::ocl_interop::memory_kind::buffer.
259/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
260/// allocate the buffer that corresponds to the memory allocation kind
261/// @p memory_kind for the memory object. In this case the library
262/// owns the buffer.
263/// - The DNNL_MEMORY_NONE specific value. Instructs the library to
264/// create memory object without an underlying buffer.
265///
266/// @returns Created memory object.
267inline memory make_memory(const memory::desc &memory_desc,
268 const engine &aengine, memory_kind kind,
269 void *handle = DNNL_MEMORY_ALLOCATE) {
270 dnnl_memory_t c_memory;
271 error::wrap_c_api(
272 dnnl_ocl_interop_memory_create(&c_memory, memory_desc.get(),
273 aengine.get(), convert_to_c(kind), handle),
274 "could not create a memory");
275 return memory(c_memory);
276}
277
278/// Constructs a memory object from an OpenCL buffer.
279///
280/// @param memory_desc Memory descriptor.
281/// @param aengine Engine to use.
282/// @param mem_object An OpenCL buffer to use.
283///
284/// @returns Created memory object.
285inline memory make_memory(const memory::desc &memory_desc,
286 const engine &aengine, cl_mem mem_object) {
287 memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE);
288 set_mem_object(amemory, mem_object);
289 return amemory;
290}
291
292/// Executes computations specified by the primitive in a specified stream and
293/// returns a SYCL event.
294///
295/// Arguments are passed via an arguments map containing
296/// <index, memory object> pairs. The index must be one of the `DNNL_ARG_*`
297/// values such as `DNNL_ARG_SRC`, and the memory must have a memory descriptor
298/// matching the one returned by
299/// #dnnl::primitive_desc::query_md(#query::exec_arg_md, index) unless using
300/// dynamic shapes (see #DNNL_RUNTIME_DIM_VAL).
301///
302/// @param aprimitive Primitive to execute.
303/// @param astream Stream object. The stream must belong to the same engine
304/// as the primitive.
305/// @param args Arguments map.
306/// @param deps Optional vector with `cl_event` dependencies.
307///
308/// @returns Output event. It's the user's responsibility to manage lifetime
309/// of the event.
310inline cl_event execute(const dnnl::primitive &aprimitive,
311 const stream &astream, const std::unordered_map<int, memory> &args,
312 const std::vector<cl_event> &deps = {}) {
313 std::vector<dnnl_exec_arg_t> c_args;
314 c_args.reserve(args.size());
315 for (const auto &a : args)
316 c_args.push_back({a.first, a.second.get()});
317
318 const cl_event *c_deps = deps.empty() ? nullptr : deps.data();
319
320 cl_event return_event;
321 error::wrap_c_api(dnnl_ocl_interop_primitive_execute(aprimitive.get(),
322 astream.get(), (int)c_args.size(), c_args.data(),
323 c_deps, (int)deps.size(), &return_event),
324 "could not execute a primitive");
325 return return_event;
326}
327
328} // namespace ocl_interop
329
330/// @} dnnl_api_ocl_interop
331
332/// @} dnnl_api_interop
333
334} // namespace dnnl
335
336/// @} dnnl_api
337
338#endif
339