1 | /******************************************************************************* |
2 | * Copyright 2021-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <type_traits> |
18 | |
19 | #include <CL/cl.h> |
20 | |
21 | #include "common/cpp_compat.hpp" |
22 | |
23 | #include "common/utils.hpp" |
24 | #include "common/verbose.hpp" |
25 | #include "gpu/ocl/ocl_gpu_engine.hpp" |
26 | #include "gpu/ocl/ocl_stream.hpp" |
27 | #include "gpu/ocl/ocl_usm_utils.hpp" |
28 | #include "gpu/ocl/ocl_utils.hpp" |
29 | |
30 | namespace dnnl { |
31 | namespace impl { |
32 | namespace gpu { |
33 | namespace ocl { |
34 | namespace usm { |
35 | |
36 | namespace { |
37 | |
38 | cl_device_id get_ocl_device(engine_t *engine) { |
39 | return utils::downcast<ocl_gpu_engine_t *>(engine)->device(); |
40 | } |
41 | |
42 | cl_context get_ocl_context(engine_t *engine) { |
43 | return utils::downcast<ocl_gpu_engine_t *>(engine)->context(); |
44 | } |
45 | |
46 | cl_command_queue get_ocl_queue(stream_t *stream) { |
47 | return utils::downcast<ocl_stream_t *>(stream)->queue(); |
48 | } |
49 | |
50 | } // namespace |
51 | |
52 | bool is_usm_supported(engine_t *engine) { |
53 | using clSharedMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id, |
54 | cl_ulong *, size_t, cl_uint, cl_int *); |
55 | static ext_func_t<clSharedMemAllocINTEL_func_t> ext_func( |
56 | "clSharedMemAllocINTEL" ); |
57 | return (bool)ext_func.get_func(engine); |
58 | } |
59 | |
60 | void *malloc_host(engine_t *engine, size_t size) { |
61 | using clHostMemAllocINTEL_func_t = void *(*)(cl_context, const cl_ulong *, |
62 | size_t, cl_uint, cl_int *); |
63 | |
64 | if (size == 0) return nullptr; |
65 | |
66 | static ext_func_t<clHostMemAllocINTEL_func_t> ext_func( |
67 | "clHostMemAllocINTEL" ); |
68 | cl_int err; |
69 | void *p = ext_func(engine, get_ocl_context(engine), nullptr, size, 0, &err); |
70 | assert(utils::one_of( |
71 | err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY)); |
72 | return p; |
73 | } |
74 | |
75 | void *malloc_device(engine_t *engine, size_t size) { |
76 | using clDeviceMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id, |
77 | cl_ulong *, size_t, cl_uint, cl_int *); |
78 | |
79 | if (size == 0) return nullptr; |
80 | |
81 | static ext_func_t<clDeviceMemAllocINTEL_func_t> ext_func( |
82 | "clDeviceMemAllocINTEL" ); |
83 | cl_int err; |
84 | void *p = ext_func(engine, get_ocl_context(engine), get_ocl_device(engine), |
85 | nullptr, size, 0, &err); |
86 | assert(utils::one_of( |
87 | err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY)); |
88 | return p; |
89 | } |
90 | |
91 | void *malloc_shared(engine_t *engine, size_t size) { |
92 | using clSharedMemAllocINTEL_func_t = void *(*)(cl_context, cl_device_id, |
93 | cl_ulong *, size_t, cl_uint, cl_int *); |
94 | |
95 | if (size == 0) return nullptr; |
96 | |
97 | static ext_func_t<clSharedMemAllocINTEL_func_t> ext_func( |
98 | "clSharedMemAllocINTEL" ); |
99 | cl_int err; |
100 | void *p = ext_func(engine, get_ocl_context(engine), get_ocl_device(engine), |
101 | nullptr, size, 0, &err); |
102 | assert(utils::one_of( |
103 | err, CL_SUCCESS, CL_OUT_OF_RESOURCES, CL_OUT_OF_HOST_MEMORY)); |
104 | return p; |
105 | } |
106 | |
107 | void free(engine_t *engine, void *ptr) { |
108 | using clMemFreeINTEL_func_t = cl_int (*)(cl_context, void *); |
109 | |
110 | if (!ptr) return; |
111 | static ext_func_t<clMemFreeINTEL_func_t> ext_func("clMemFreeINTEL" ); |
112 | cl_int err = ext_func(engine, get_ocl_context(engine), ptr); |
113 | assert(err == CL_SUCCESS); |
114 | MAYBE_UNUSED(err); |
115 | } |
116 | |
117 | status_t set_kernel_arg_usm(engine_t *engine, cl_kernel kernel, int arg_index, |
118 | const void *arg_value) { |
119 | using clSetKernelArgMemPointerINTEL_func_t |
120 | = cl_int (*)(cl_kernel, cl_uint, const void *); |
121 | static ext_func_t<clSetKernelArgMemPointerINTEL_func_t> ext_func( |
122 | "clSetKernelArgMemPointerINTEL" ); |
123 | return convert_to_dnnl(ext_func(engine, kernel, arg_index, arg_value)); |
124 | } |
125 | |
126 | status_t memcpy(stream_t *stream, void *dst, const void *src, size_t size) { |
127 | using clEnqueueMemcpyINTEL_func_t |
128 | = cl_int (*)(cl_command_queue, cl_bool, void *, const void *, |
129 | size_t, cl_uint, const cl_event *, cl_event *); |
130 | static ext_func_t<clEnqueueMemcpyINTEL_func_t> ext_func( |
131 | "clEnqueueMemcpyINTEL" ); |
132 | return convert_to_dnnl(ext_func(stream->engine(), get_ocl_queue(stream), |
133 | /* blocking */ CL_FALSE, dst, src, size, 0, nullptr, nullptr)); |
134 | } |
135 | |
136 | status_t fill(stream_t *stream, void *ptr, const void *pattern, |
137 | size_t pattern_size, size_t size) { |
138 | using clEnqueueMemFillINTEL_func_t |
139 | = cl_int (*)(cl_command_queue, void *, const void *, size_t, size_t, |
140 | cl_uint, const cl_event *, cl_event *); |
141 | static ext_func_t<clEnqueueMemFillINTEL_func_t> ext_func( |
142 | "clEnqueueMemFillINTEL" ); |
143 | return convert_to_dnnl(ext_func(stream->engine(), get_ocl_queue(stream), |
144 | ptr, pattern, pattern_size, size, 0, nullptr, nullptr)); |
145 | } |
146 | |
147 | status_t memset(stream_t *stream, void *ptr, int value, size_t size) { |
148 | uint8_t pattern = (uint8_t)value; |
149 | return fill(stream, ptr, &pattern, sizeof(uint8_t), size); |
150 | } |
151 | |
152 | ocl_usm_kind_t get_pointer_type(engine_t *engine, const void *ptr) { |
153 | using clGetMemAllocInfoINTEL_func_t = cl_int (*)( |
154 | cl_context, const void *, cl_uint, size_t, void *, size_t *); |
155 | |
156 | // The values are taken from cl_ext.h to avoid dependency on the header. |
157 | static constexpr cl_uint cl_mem_type_unknown_intel = 0x4196; |
158 | static constexpr cl_uint cl_mem_type_host_intel = 0x4197; |
159 | static constexpr cl_uint cl_mem_type_device_intel = 0x4198; |
160 | static constexpr cl_uint cl_mem_type_shared_intel = 0x4199; |
161 | |
162 | static constexpr cl_uint cl_mem_alloc_type_intel = 0x419A; |
163 | |
164 | static ext_func_t<clGetMemAllocInfoINTEL_func_t> ext_func( |
165 | "clGetMemAllocInfoINTEL" ); |
166 | |
167 | if (!ptr) return ocl_usm_kind_t::unknown; |
168 | |
169 | cl_uint alloc_type; |
170 | cl_int err = ext_func(engine, get_ocl_context(engine), ptr, |
171 | cl_mem_alloc_type_intel, sizeof(alloc_type), &alloc_type, nullptr); |
172 | assert(err == CL_SUCCESS); |
173 | if (err != CL_SUCCESS) return ocl_usm_kind_t::unknown; |
174 | |
175 | switch (alloc_type) { |
176 | case cl_mem_type_unknown_intel: return ocl_usm_kind_t::unknown; |
177 | case cl_mem_type_host_intel: return ocl_usm_kind_t::host; |
178 | case cl_mem_type_device_intel: return ocl_usm_kind_t::device; |
179 | case cl_mem_type_shared_intel: return ocl_usm_kind_t::shared; |
180 | default: assert(!"unknown alloc type" ); |
181 | } |
182 | return ocl_usm_kind_t::unknown; |
183 | } |
184 | |
185 | } // namespace usm |
186 | } // namespace ocl |
187 | } // namespace gpu |
188 | } // namespace impl |
189 | } // namespace dnnl |
190 | |