1/*******************************************************************************
2* Copyright 2021-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <type_traits>
18
19#include <CL/cl.h>
20
21#include "common/cpp_compat.hpp"
22
23#include "common/utils.hpp"
24#include "common/verbose.hpp"
25#include "gpu/ocl/ocl_gpu_engine.hpp"
26#include "gpu/ocl/ocl_stream.hpp"
27#include "gpu/ocl/ocl_usm_utils.hpp"
28#include "gpu/ocl/ocl_utils.hpp"
29
30namespace dnnl {
31namespace impl {
32namespace gpu {
33namespace ocl {
34namespace usm {
35
36namespace {
37
38cl_device_id get_ocl_device(engine_t *engine) {
39 return utils::downcast<ocl_gpu_engine_t *>(engine)->device();
40}
41
42cl_context get_ocl_context(engine_t *engine) {
43 return utils::downcast<ocl_gpu_engine_t *>(engine)->context();
44}
45
46cl_command_queue get_ocl_queue(stream_t *stream) {
47 return utils::downcast<ocl_stream_t *>(stream)->queue();
48}
49
50} // namespace
51
52bool is_usm_supported(engine_t *engine) {
53 using clSharedMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id,
54 cl_ulong *, size_t, cl_uint, cl_int *);
55 static ext_func_t<clSharedMemAllocINTEL_func_t> ext_func(
56 "clSharedMemAllocINTEL");
57 return (bool)ext_func.get_func(engine);
58}
59
60void *malloc_host(engine_t *engine, size_t size) {
61 using clHostMemAllocINTEL_func_t = void *(*)(cl_context, const cl_ulong *,
62 size_t, cl_uint, cl_int *);
63
64 if (size == 0) return nullptr;
65
66 static ext_func_t<clHostMemAllocINTEL_func_t> ext_func(
67 "clHostMemAllocINTEL");
68 cl_int err;
69 void *p = ext_func(engine, get_ocl_context(engine), nullptr, size, 0, &err);
70 assert(utils::one_of(
71 err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY));
72 return p;
73}
74
75void *malloc_device(engine_t *engine, size_t size) {
76 using clDeviceMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id,
77 cl_ulong *, size_t, cl_uint, cl_int *);
78
79 if (size == 0) return nullptr;
80
81 static ext_func_t<clDeviceMemAllocINTEL_func_t> ext_func(
82 "clDeviceMemAllocINTEL");
83 cl_int err;
84 void *p = ext_func(engine, get_ocl_context(engine), get_ocl_device(engine),
85 nullptr, size, 0, &err);
86 assert(utils::one_of(
87 err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY));
88 return p;
89}
90
91void *malloc_shared(engine_t *engine, size_t size) {
92 using clSharedMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id,
93 cl_ulong *, size_t, cl_uint, cl_int *);
94
95 if (size == 0) return nullptr;
96
97 static ext_func_t<clSharedMemAllocINTEL_func_t> ext_func(
98 "clSharedMemAllocINTEL");
99 cl_int err;
100 void *p = ext_func(engine, get_ocl_context(engine), get_ocl_device(engine),
101 nullptr, size, 0, &err);
102 assert(utils::one_of(
103 err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY));
104 return p;
105}
106
107void free(engine_t *engine, void *ptr) {
108 using clMemFreeINTEL_func_t = cl_int (*)(cl_context, void *);
109
110 if (!ptr) return;
111 static ext_func_t<clMemFreeINTEL_func_t> ext_func("clMemFreeINTEL");
112 cl_int err = ext_func(engine, get_ocl_context(engine), ptr);
113 assert(err == CL_SUCCESS);
114 MAYBE_UNUSED(err);
115}
116
117status_t set_kernel_arg_usm(engine_t *engine, cl_kernel kernel, int arg_index,
118 const void *arg_value) {
119 using clSetKernelArgMemPointerINTEL_func_t
120 = cl_int (*)(cl_kernel, cl_uint, const void *);
121 static ext_func_t<clSetKernelArgMemPointerINTEL_func_t> ext_func(
122 "clSetKernelArgMemPointerINTEL");
123 return convert_to_dnnl(ext_func(engine, kernel, arg_index, arg_value));
124}
125
126status_t memcpy(stream_t *stream, void *dst, const void *src, size_t size) {
127 using clEnqueueMemcpyINTEL_func_t
128 = cl_int (*)(cl_command_queue, cl_bool, void *, const void *,
129 size_t, cl_uint, const cl_event *, cl_event *);
130 static ext_func_t<clEnqueueMemcpyINTEL_func_t> ext_func(
131 "clEnqueueMemcpyINTEL");
132 return convert_to_dnnl(ext_func(stream->engine(), get_ocl_queue(stream),
133 /* blocking */ CL_FALSE, dst, src, size, 0, nullptr, nullptr));
134}
135
136status_t fill(stream_t *stream, void *ptr, const void *pattern,
137 size_t pattern_size, size_t size) {
138 using clEnqueueMemFillINTEL_func_t
139 = cl_int (*)(cl_command_queue, void *, const void *, size_t, size_t,
140 cl_uint, const cl_event *, cl_event *);
141 static ext_func_t<clEnqueueMemFillINTEL_func_t> ext_func(
142 "clEnqueueMemFillINTEL");
143 return convert_to_dnnl(ext_func(stream->engine(), get_ocl_queue(stream),
144 ptr, pattern, pattern_size, size, 0, nullptr, nullptr));
145}
146
147status_t memset(stream_t *stream, void *ptr, int value, size_t size) {
148 uint8_t pattern = (uint8_t)value;
149 return fill(stream, ptr, &pattern, sizeof(uint8_t), size);
150}
151
152ocl_usm_kind_t get_pointer_type(engine_t *engine, const void *ptr) {
153 using clGetMemAllocInfoINTEL_func_t = cl_int (*)(
154 cl_context, const void *, cl_uint, size_t, void *, size_t *);
155
156 // The values are taken from cl_ext.h to avoid dependency on the header.
157 static constexpr cl_uint cl_mem_type_unknown_intel = 0x4196;
158 static constexpr cl_uint cl_mem_type_host_intel = 0x4197;
159 static constexpr cl_uint cl_mem_type_device_intel = 0x4198;
160 static constexpr cl_uint cl_mem_type_shared_intel = 0x4199;
161
162 static constexpr cl_uint cl_mem_alloc_type_intel = 0x419A;
163
164 static ext_func_t<clGetMemAllocInfoINTEL_func_t> ext_func(
165 "clGetMemAllocInfoINTEL");
166
167 if (!ptr) return ocl_usm_kind_t::unknown;
168
169 cl_uint alloc_type;
170 cl_int err = ext_func(engine, get_ocl_context(engine), ptr,
171 cl_mem_alloc_type_intel, sizeof(alloc_type), &alloc_type, nullptr);
172 assert(err == CL_SUCCESS);
173 if (err != CL_SUCCESS) return ocl_usm_kind_t::unknown;
174
175 switch (alloc_type) {
176 case cl_mem_type_unknown_intel: return ocl_usm_kind_t::unknown;
177 case cl_mem_type_host_intel: return ocl_usm_kind_t::host;
178 case cl_mem_type_device_intel: return ocl_usm_kind_t::device;
179 case cl_mem_type_shared_intel: return ocl_usm_kind_t::shared;
180 default: assert(!"unknown alloc type");
181 }
182 return ocl_usm_kind_t::unknown;
183}
184
185} // namespace usm
186} // namespace ocl
187} // namespace gpu
188} // namespace impl
189} // namespace dnnl
190