1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef ONEAPI_DNNL_DNNL_OCL_HPP |
18 | #define ONEAPI_DNNL_DNNL_OCL_HPP |
19 | |
20 | #include "oneapi/dnnl/dnnl.hpp" |
21 | |
22 | /// @cond DO_NOT_DOCUMENT_THIS |
23 | #include <algorithm> |
24 | #include <cstdlib> |
25 | #include <iterator> |
26 | #include <memory> |
27 | #include <string> |
28 | #include <vector> |
29 | #include <unordered_map> |
30 | |
31 | #include "oneapi/dnnl/dnnl_ocl.h" |
32 | |
33 | #include <CL/cl.h> |
34 | /// @endcond |
35 | |
36 | /// @addtogroup dnnl_api |
37 | /// @{ |
38 | |
39 | namespace dnnl { |
40 | |
41 | /// @addtogroup dnnl_api_interop Runtime interoperability API |
42 | /// API extensions to interact with the underlying run-time. |
43 | /// @{ |
44 | |
45 | /// @addtogroup dnnl_api_ocl_interop OpenCL interoperability API |
46 | /// API extensions to interact with the underlying OpenCL run-time. |
47 | /// |
48 | /// @sa @ref dev_guide_opencl_interoperability in developer guide |
49 | /// @{ |
50 | |
51 | /// OpenCL interoperability namespace |
52 | namespace ocl_interop { |
53 | |
54 | /// Memory allocation kind. |
55 | enum class memory_kind { |
56 | /// USM (device, shared, host, or unknown) memory allocation kind. |
57 | usm = dnnl_ocl_interop_usm, |
58 | /// Buffer memory allocation kind - default. |
59 | buffer = dnnl_ocl_interop_buffer, |
60 | }; |
61 | |
62 | /// Converts a memory allocation kind enum value from C++ API to C API type. |
63 | /// |
64 | /// @param akind C++ API memory allocation kind enum value. |
65 | /// @returns Corresponding C API memory allocation kind enum value. |
66 | inline dnnl_ocl_interop_memory_kind_t convert_to_c(memory_kind akind) { |
67 | return static_cast<dnnl_ocl_interop_memory_kind_t>(akind); |
68 | } |
69 | |
70 | /// Returns the cache blob ID of the OpenCL device. |
71 | /// |
72 | /// @warning |
73 | /// This API is intended to be used with |
74 | /// #dnnl::ocl_interop::get_engine_cache_blob() and |
75 | /// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector<uint8_t> &). |
76 | /// The returned cache blob ID can only be used as an ID of the cache blob |
77 | /// returned by #dnnl::ocl_interop::get_engine_cache_blob(). |
78 | /// |
79 | /// @note The cache blob ID can be empty (@p size will be 0 and |
80 | /// @p cache_blob_id will be nullptr) if oneDNN doesn't have anything to |
81 | /// put in the cache blob. (#dnnl_ocl_interop_engine_get_cache_blob will |
82 | /// return an empty cache blob). |
83 | /// |
84 | /// @param device An OpenCL device. |
85 | /// @returns A vector containing the cache blob ID. |
86 | inline std::vector<uint8_t> get_engine_cache_blob_id(cl_device_id device) { |
87 | size_t size = 0; |
88 | error::wrap_c_api( |
89 | dnnl_ocl_interop_engine_get_cache_blob_id(device, &size, nullptr), |
90 | "could not get an engine cache blob id size" ); |
91 | |
92 | std::vector<uint8_t> cache_blob_id(size); |
93 | error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob_id( |
94 | device, &size, cache_blob_id.data()), |
95 | "could not get an engine cache blob id" ); |
96 | return cache_blob_id; |
97 | } |
98 | |
99 | /// Returns a cache blob for the engine. |
100 | /// |
101 | /// @note The cache blob vector can be empty if oneDNN doesn't have anything |
102 | /// to put in the cache blob. It's the user's responsibility to check |
103 | /// whether it's empty prior to passing it to |
104 | /// #dnnl::ocl_interop::make_engine(cl_device_id, cl_context, const std::vector<uint8_t> &) |
105 | /// |
106 | /// @param aengine Engine to query for the cache blob. |
107 | /// @returns Vector containing the cache blob. |
108 | inline std::vector<uint8_t> get_engine_cache_blob(const engine &aengine) { |
109 | size_t size = 0; |
110 | error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob( |
111 | aengine.get(), &size, nullptr), |
112 | "could not get an engine cache blob size" ); |
113 | |
114 | std::vector<uint8_t> cache_blob(size); |
115 | error::wrap_c_api(dnnl_ocl_interop_engine_get_cache_blob( |
116 | aengine.get(), &size, cache_blob.data()), |
117 | "could not get an engine cache blob" ); |
118 | return cache_blob; |
119 | } |
120 | |
121 | /// Constructs an engine from the given cache blob. |
122 | /// |
123 | /// @param device The OpenCL device that this engine will encapsulate. |
124 | /// @param context The OpenCL context (containing the device) that this |
125 | /// engine will use for all operations. |
126 | /// @param cache_blob Cache blob. |
127 | /// @returns An engine. |
128 | inline engine make_engine(cl_device_id device, cl_context context, |
129 | const std::vector<uint8_t> &cache_blob) { |
130 | dnnl_engine_t c_engine; |
131 | error::wrap_c_api( |
132 | dnnl_ocl_interop_engine_create_from_cache_blob(&c_engine, device, |
133 | context, cache_blob.size(), cache_blob.data()), |
134 | "could not create an engine from cache blob" ); |
135 | return engine(c_engine); |
136 | } |
137 | |
138 | /// Constructs an engine from OpenCL device and context objects. |
139 | /// |
140 | /// @param device The OpenCL device that this engine will encapsulate. |
141 | /// @param context The OpenCL context (containing the device) that this |
142 | /// engine will use for all operations. |
143 | /// @returns An engine. |
144 | inline engine make_engine(cl_device_id device, cl_context context) { |
145 | dnnl_engine_t c_engine; |
146 | error::wrap_c_api( |
147 | dnnl_ocl_interop_engine_create(&c_engine, device, context), |
148 | "could not create an engine" ); |
149 | return engine(c_engine); |
150 | } |
151 | |
152 | /// Returns OpenCL context associated with the engine. |
153 | /// |
154 | /// @param aengine An engine. |
155 | /// @returns Underlying OpenCL context. |
156 | inline cl_context get_context(const engine &aengine) { |
157 | cl_context context = nullptr; |
158 | error::wrap_c_api( |
159 | dnnl_ocl_interop_engine_get_context(aengine.get(), &context), |
160 | "could not get an OpenCL context from an engine" ); |
161 | return context; |
162 | } |
163 | |
164 | /// Returns OpenCL device associated with the engine. |
165 | /// |
166 | /// @param aengine An engine. |
167 | /// @returns Underlying OpenCL device. |
168 | inline cl_device_id get_device(const engine &aengine) { |
169 | cl_device_id device = nullptr; |
170 | error::wrap_c_api(dnnl_ocl_interop_get_device(aengine.get(), &device), |
171 | "could not get an OpenCL device from an engine" ); |
172 | return device; |
173 | } |
174 | |
175 | /// Constructs an execution stream for the specified engine and OpenCL queue. |
176 | /// |
177 | /// @param aengine Engine to create the stream on. |
178 | /// @param queue OpenCL queue to use for the stream. |
179 | /// @returns An execution stream. |
180 | inline stream make_stream(const engine &aengine, cl_command_queue queue) { |
181 | dnnl_stream_t c_stream; |
182 | error::wrap_c_api( |
183 | dnnl_ocl_interop_stream_create(&c_stream, aengine.get(), queue), |
184 | "could not create a stream" ); |
185 | return stream(c_stream); |
186 | } |
187 | |
188 | /// Returns OpenCL queue object associated with the execution stream. |
189 | /// |
190 | /// @param astream An execution stream. |
191 | /// @returns Underlying OpenCL queue. |
192 | inline cl_command_queue get_command_queue(const stream &astream) { |
193 | cl_command_queue queue = nullptr; |
194 | error::wrap_c_api( |
195 | dnnl_ocl_interop_stream_get_command_queue(astream.get(), &queue), |
196 | "could not get an OpenCL command queue from a stream" ); |
197 | return queue; |
198 | } |
199 | |
200 | /// Returns the OpenCL memory object associated with the memory object. |
201 | /// |
202 | /// @param amemory A memory object. |
203 | /// @returns Underlying OpenCL memory object. |
204 | inline cl_mem get_mem_object(const memory &amemory) { |
205 | cl_mem mem_object; |
206 | error::wrap_c_api( |
207 | dnnl_ocl_interop_memory_get_mem_object(amemory.get(), &mem_object), |
208 | "could not get OpenCL buffer object from a memory object" ); |
209 | return mem_object; |
210 | } |
211 | |
212 | /// Sets the OpenCL memory object associated with the memory object. |
213 | /// |
214 | /// For behavioral details see memory::set_data_handle(). |
215 | /// |
216 | /// @param amemory A memory object. |
217 | /// @param mem_object OpenCL cl_mem object to use as the underlying |
218 | /// storage. It must have at least get_desc().get_size() bytes |
219 | /// allocated. |
220 | inline void set_mem_object(memory &amemory, cl_mem mem_object) { |
221 | error::wrap_c_api( |
222 | dnnl_ocl_interop_memory_set_mem_object(amemory.get(), mem_object), |
223 | "could not set OpenCL buffer object from a memory object" ); |
224 | } |
225 | |
226 | /// Returns the memory allocation kind associated with a memory object. |
227 | /// |
228 | /// @param amemory A memory object. |
229 | /// |
230 | /// @returns The underlying memory allocation kind of the memory object. |
231 | inline memory_kind get_memory_kind(const memory &amemory) { |
232 | dnnl_ocl_interop_memory_kind_t ckind; |
233 | error::wrap_c_api( |
234 | dnnl_ocl_interop_memory_get_memory_kind(amemory.get(), &ckind), |
235 | "could not get memory kind" ); |
236 | return static_cast<memory_kind>(ckind); |
237 | } |
238 | |
239 | /// Creates a memory object. |
240 | /// |
241 | /// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the |
242 | /// constructed memory object will have the underlying buffer set. In this |
243 | /// case, the buffer will be initialized as if: |
244 | /// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is |
245 | /// equal to dnnl::ocl_interop::memory_kind::usm, or |
246 | /// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is |
247 | /// equal to dnnl::ocl_interop::memory_kind::buffer. |
248 | /// |
249 | /// @param memory_desc Memory descriptor. |
250 | /// @param aengine Engine to use. |
251 | /// @param kind Memory allocation kind to specify the type of handle. |
252 | /// @param handle Handle of the memory buffer to use as an underlying storage. |
253 | /// - A USM pointer to the user-allocated buffer. In this case the library |
254 | /// doesn't own the buffer. Requires @p memory_kind to be equal to |
255 | /// dnnl::ocl_interop::memory_kind::usm. |
256 | /// - An OpenCL buffer. In this case the library doesn't own the buffer. |
257 | /// Requires @p memory_kind be equal to be equal to |
258 | /// dnnl::ocl_interop::memory_kind::buffer. |
259 | /// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to |
260 | /// allocate the buffer that corresponds to the memory allocation kind |
261 | /// @p memory_kind for the memory object. In this case the library |
262 | /// owns the buffer. |
263 | /// - The DNNL_MEMORY_NONE specific value. Instructs the library to |
264 | /// create memory object without an underlying buffer. |
265 | /// |
266 | /// @returns Created memory object. |
267 | inline memory make_memory(const memory::desc &memory_desc, |
268 | const engine &aengine, memory_kind kind, |
269 | void *handle = DNNL_MEMORY_ALLOCATE) { |
270 | dnnl_memory_t c_memory; |
271 | error::wrap_c_api( |
272 | dnnl_ocl_interop_memory_create(&c_memory, memory_desc.get(), |
273 | aengine.get(), convert_to_c(kind), handle), |
274 | "could not create a memory" ); |
275 | return memory(c_memory); |
276 | } |
277 | |
278 | /// Constructs a memory object from an OpenCL buffer. |
279 | /// |
280 | /// @param memory_desc Memory descriptor. |
281 | /// @param aengine Engine to use. |
282 | /// @param mem_object An OpenCL buffer to use. |
283 | /// |
284 | /// @returns Created memory object. |
285 | inline memory make_memory(const memory::desc &memory_desc, |
286 | const engine &aengine, cl_mem mem_object) { |
287 | memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE); |
288 | set_mem_object(amemory, mem_object); |
289 | return amemory; |
290 | } |
291 | |
292 | /// Executes computations specified by the primitive in a specified stream and |
293 | /// returns a SYCL event. |
294 | /// |
295 | /// Arguments are passed via an arguments map containing |
296 | /// <index, memory object> pairs. The index must be one of the `DNNL_ARG_*` |
297 | /// values such as `DNNL_ARG_SRC`, and the memory must have a memory descriptor |
298 | /// matching the one returned by |
299 | /// #dnnl::primitive_desc::query_md(#query::exec_arg_md, index) unless using |
300 | /// dynamic shapes (see #DNNL_RUNTIME_DIM_VAL). |
301 | /// |
302 | /// @param aprimitive Primitive to execute. |
303 | /// @param astream Stream object. The stream must belong to the same engine |
304 | /// as the primitive. |
305 | /// @param args Arguments map. |
306 | /// @param deps Optional vector with `cl_event` dependencies. |
307 | /// |
308 | /// @returns Output event. It's the user's responsibility to manage lifetime |
309 | /// of the event. |
310 | inline cl_event execute(const dnnl::primitive &aprimitive, |
311 | const stream &astream, const std::unordered_map<int, memory> &args, |
312 | const std::vector<cl_event> &deps = {}) { |
313 | std::vector<dnnl_exec_arg_t> c_args; |
314 | c_args.reserve(args.size()); |
315 | for (const auto &a : args) |
316 | c_args.push_back({a.first, a.second.get()}); |
317 | |
318 | const cl_event *c_deps = deps.empty() ? nullptr : deps.data(); |
319 | |
320 | cl_event return_event; |
321 | error::wrap_c_api(dnnl_ocl_interop_primitive_execute(aprimitive.get(), |
322 | astream.get(), (int)c_args.size(), c_args.data(), |
323 | c_deps, (int)deps.size(), &return_event), |
324 | "could not execute a primitive" ); |
325 | return return_event; |
326 | } |
327 | |
328 | } // namespace ocl_interop |
329 | |
330 | /// @} dnnl_api_ocl_interop |
331 | |
332 | /// @} dnnl_api_interop |
333 | |
334 | } // namespace dnnl |
335 | |
336 | /// @} dnnl_api |
337 | |
338 | #endif |
339 | |