ocl_stream.cpp source code [oneDNN/src/gpu/ocl/ocl_stream.cpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include <cstring>
18
19	#include <CL/cl.h>
20
21	#include "gpu/ocl/ocl_stream.hpp"
22
23	#include "common/verbose.hpp"
24	#include "gpu/ocl/ocl_memory_storage.hpp"
25	#include "gpu/ocl/ocl_utils.hpp"
26	#include "gpu/ocl/profile.hpp"
27	#include "gpu/profile.hpp"
28
29	namespace dnnl {
30	namespace impl {
31	namespace gpu {
32	namespace ocl {
33
34	status_t ocl_stream_t::init() {
35	if (is_profiling_enabled()) {
36	mdapi_helper_ = utils::make_unique<mdapi_helper_t>();
37	}
38	// Restore queue on successful exit, otherwise queue may be released
39	// without retain
40	cl_command_queue queue = queue_;
41	queue_ = nullptr;
42
43	assert(engine()->kind() == engine_kind::gpu);
44
45	ocl_gpu_engine_t *ocl_engine
46	= utils::downcast<ocl_gpu_engine_t *>(engine());
47
48	// Create queue if it is not set
49	if (!queue) {
50	cl_int err;
51	queue = create_queue(ocl_engine->context(), ocl_engine->device(), &err);
52	OCL_CHECK(err);
53	} else {
54	// Check that queue is compatible with the engine
55	cl_context ocl_ctx;
56	OCL_CHECK(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT,
57	sizeof(cl_context), &ocl_ctx, nullptr));
58
59	cl_device_id ocl_dev;
60	OCL_CHECK(clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE,
61	sizeof(cl_device_id), &ocl_dev, nullptr));
62
63	if (ocl_engine->device() != ocl_dev \|\| ocl_engine->context() != ocl_ctx)
64	return status::invalid_arguments;
65
66	OCL_CHECK(clRetainCommandQueue(queue));
67	}
68	queue_ = queue;
69
70	if (gpu::is_profiling_enabled()) {
71	cl_command_queue_properties props;
72	OCL_CHECK(clGetCommandQueueInfo(
73	queue_, CL_QUEUE_PROPERTIES, sizeof(props), &props, nullptr));
74	bool is_out_of_order
75	= (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != `0`;
76	if (is_out_of_order) {
77	if (get_verbose()) {
78	printf("onednn_verbose,gpu,error,OpenCL kernel profiling is "
79	"not "
80	"supported with out-of-order queues\n");
81	fflush(nullptr);
82	}
83	return status::invalid_arguments;
84	}
85	}
86
87	return status::success;
88	}
89
90	cl_command_queue ocl_stream_t::create_queue(
91	cl_context ctx, cl_device_id dev, cl_int err) const* {
92	if (is_profiling_enabled() && mdapi_helper_) {
93	auto ret = mdapi_helper_->create_queue(ctx, dev, err);
94	if (ret) return ret;
95	}
96
97	const bool is_out_of_order = (flags() & stream_flags::out_of_order);
98	if (is_out_of_order) assert(!is_profiling_enabled());
99	#ifdef CL_VERSION_2_0
100	cl_queue_properties profiling_props[]
101	= {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, `0`};
102	cl_queue_properties out_of_order_props[]
103	= {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, `0`};
104	return clCreateCommandQueueWithProperties(ctx, dev,
105	is_profiling_enabled()
106	? profiling_props
107	: is_out_of_order ? out_of_order_props : nullptr,
108	err);
109	#else
110	return clCreateCommandQueue(ctx, dev,
111	is_profiling_enabled()
112	? CL_QUEUE_PROFILING_ENABLE
113	: is_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
114	: `0`,
115	err);
116	#endif
117	}
118
119	void ocl_stream_t::before_exec_hook() {
120	if (is_profiling_enabled()) notify_before_exec();
121	}
122
123	void ocl_stream_t::after_exec_hook() {
124	set_deps({});
125	}
126
127	status_t ocl_stream_t::copy(
128	const memory_storage_t &src, const memory_storage_t &dst, size_t size) {
129
130	if (size == `0`) return status::success;
131
132	if (src.engine()->kind() == engine_kind::cpu
133	&& is_native_runtime(src.engine()->runtime_kind())) {
134	assert(dst.engine()->kind() == engine_kind::gpu);
135
136	void src_ptr = nullptr*;
137	src.get_data_handle(&src_ptr);
138
139	const auto *ocl_dst
140	= utils::downcast<const ocl_memory_storage_base_t *>(&dst);
141	bool usm_dst = ocl_dst->memory_kind() == memory_kind::usm;
142
143	if (usm_dst) {
144	const auto *ocl_usm_dst
145	= utils::downcast<const ocl_usm_memory_storage_t *>(
146	ocl_dst);
147	CHECK(usm::memcpy(this, ocl_usm_dst->usm_ptr(), src_ptr, size));
148	} else {
149	const auto *ocl_buffer_dst
150	= utils::downcast<const ocl_buffer_memory_storage_t *>(
151	ocl_dst);
152
153	cl_mem ocl_mem = ocl_buffer_dst->mem_object();
154	cl_int err = clEnqueueWriteBuffer(queue(), ocl_mem, CL_TRUE, `0`,
155	size, src_ptr, `0`, nullptr, nullptr);
156	OCL_CHECK(err);
157	}
158	} else if (dst.engine()->kind() == engine_kind::cpu
159	&& is_native_runtime(dst.engine()->runtime_kind())) {
160	assert(src.engine()->kind() == engine_kind::gpu);
161
162	void dst_ptr = nullptr*;
163	dst.get_data_handle(&dst_ptr);
164
165	const auto *ocl_src
166	= utils::downcast<const ocl_memory_storage_base_t *>(&src);
167	bool usm_src = ocl_src->memory_kind() == memory_kind::usm;
168
169	if (usm_src) {
170	const auto *ocl_usm_src
171	= utils::downcast<const ocl_usm_memory_storage_t *>(
172	ocl_src);
173	CHECK(usm::memcpy(this, dst_ptr, ocl_usm_src->usm_ptr(), size));
174	} else {
175	const auto *ocl_buffer_src
176	= utils::downcast<const ocl_buffer_memory_storage_t *>(
177	ocl_src);
178
179	cl_mem ocl_mem = ocl_buffer_src->mem_object();
180	cl_int err = clEnqueueReadBuffer(queue(), ocl_mem, CL_TRUE, `0`, size,
181	dst_ptr, `0`, nullptr, nullptr);
182	OCL_CHECK(err);
183	}
184	} else {
185	wait();
186
187	// Use map/unmap
188	void *src_mapped_ptr;
189	void *dst_mapped_ptr;
190
191	CHECK(src.map_data(&src_mapped_ptr, this, size));
192	CHECK(dst.map_data(&dst_mapped_ptr, this, size));
193
194	std::memcpy(static_cast<void *>(dst_mapped_ptr),
195	static_cast<const void *>(src_mapped_ptr), size);
196
197	CHECK(src.unmap_data(src_mapped_ptr, this));
198	CHECK(dst.unmap_data(dst_mapped_ptr, this));
199	}
200	return status::success;
201	}
202
203	status_t ocl_stream_t::fill(
204	const memory_storage_t &dst, uint8_t pattern, size_t size) {
205	using namespace dnnl::impl::utils;
206
207	const auto ocl_dst = downcast<const* ocl_memory_storage_base_t *>(&dst);
208
209	if (ocl_dst->memory_kind() == memory_kind::usm) {
210	const auto *ocl_usm_dst
211	= downcast<const ocl_usm_memory_storage_t *>(ocl_dst);
212	CHECK(usm::fill(
213	this, ocl_usm_dst->usm_ptr(), &pattern, sizeof(pattern), size));
214	} else {
215	const auto *ocl_buffer_dst
216	= downcast<const ocl_buffer_memory_storage_t *>(ocl_dst);
217	cl_int err = clEnqueueFillBuffer(queue(), ocl_buffer_dst->mem_object(),
218	&pattern, sizeof(uint8_t), dst.offset(), size, `0`, nullptr,
219	nullptr);
220	OCL_CHECK(err);
221	}
222	return status::success;
223	}
224
225	} // namespace ocl
226	} // namespace gpu
227	} // namespace impl
228	} // namespace dnnl
229

Browse the source code of oneDNN/src/gpu/ocl/ocl_stream.cpp