1/*******************************************************************************
2* Copyright 2019-2021 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <CL/cl.h>
18
19#include "gpu/ocl/ocl_buffer_memory_storage.hpp"
20#include "gpu/ocl/ocl_engine.hpp"
21#include "gpu/ocl/ocl_stream.hpp"
22#include "gpu/ocl/ocl_usm_utils.hpp"
23
24namespace dnnl {
25namespace impl {
26namespace gpu {
27namespace ocl {
28
29status_t ocl_buffer_memory_storage_t::init_allocate(size_t size) {
30 auto *ocl_engine = utils::downcast<ocl_gpu_engine_t *>(engine());
31 cl_int err;
32 mem_object_ = clCreateBuffer_wrapper(
33 ocl_engine->context(), CL_MEM_READ_WRITE, size, nullptr, &err);
34
35 OCL_CHECK(err);
36 return status::success;
37}
38
39namespace {
40status_t get_map_queue(
41 cl_command_queue &queue, engine_t *engine, stream_t *stream) {
42 ocl_stream_t *ocl_stream;
43 if (stream == nullptr) {
44 auto *ocl_engine = utils::downcast<ocl_gpu_engine_t *>(engine);
45 status_t status = ocl_engine->get_service_stream(stream);
46 if (status != status::success) { return status::runtime_error; }
47 }
48 ocl_stream = utils::downcast<ocl_stream_t *>(stream);
49 queue = ocl_stream->queue();
50 return status::success;
51}
52} // namespace
53
54status_t ocl_buffer_memory_storage_t::map_data(
55 void **mapped_ptr, stream_t *stream, size_t) const {
56 if (!mem_object()) {
57 *mapped_ptr = nullptr;
58 return status::success;
59 }
60
61 cl_mem_flags mem_flags;
62 OCL_CHECK(clGetMemObjectInfo(mem_object(), CL_MEM_FLAGS, sizeof(mem_flags),
63 &mem_flags, nullptr));
64
65 size_t mem_bytes;
66 OCL_CHECK(clGetMemObjectInfo(
67 mem_object(), CL_MEM_SIZE, sizeof(mem_bytes), &mem_bytes, nullptr));
68
69 cl_map_flags map_flags = 0;
70 if (mem_flags & CL_MEM_READ_WRITE) {
71 map_flags |= CL_MAP_READ;
72 map_flags |= CL_MAP_WRITE;
73 } else if (mem_flags & CL_MEM_READ_ONLY) {
74 map_flags |= CL_MAP_READ;
75 } else if (mem_flags & CL_MEM_WRITE_ONLY) {
76 map_flags |= CL_MAP_WRITE;
77 }
78
79 cl_command_queue queue;
80 CHECK(get_map_queue(queue, engine(), stream));
81
82 // Use blocking operation to simplify the implementation and API
83 cl_int err;
84 *mapped_ptr = clEnqueueMapBuffer(queue, mem_object(), CL_TRUE, map_flags, 0,
85 mem_bytes, 0, nullptr, nullptr, &err);
86 return convert_to_dnnl(err);
87}
88
89status_t ocl_buffer_memory_storage_t::unmap_data(
90 void *mapped_ptr, stream_t *stream) const {
91 if (!mapped_ptr) return status::success;
92 cl_command_queue queue;
93 CHECK(get_map_queue(queue, engine(), stream));
94 OCL_CHECK(clEnqueueUnmapMemObject(queue, mem_object_,
95 const_cast<void *>(mapped_ptr), 0, nullptr, nullptr));
96 OCL_CHECK(clFinish(queue));
97 return status::success;
98}
99
100std::unique_ptr<memory_storage_t> ocl_buffer_memory_storage_t::get_sub_storage(
101 size_t offset, size_t size) const {
102 // Fast return on size = 0.
103 // It also seems clCreateSubBuffer() does not work properly for such case.
104 // Assumption: returned sub-storage won't be used for extracting cl_mem.
105 if (size == 0) return nullptr;
106
107 cl_mem_flags mem_flags;
108 cl_int err;
109 err = clGetMemObjectInfo(
110 mem_object(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, nullptr);
111 assert(err == CL_SUCCESS);
112 if (err != CL_SUCCESS) return nullptr;
113
114 assert(size != 0);
115 cl_buffer_region buffer_region = {base_offset_ + offset, size};
116 ocl_wrapper_t<cl_mem> sub_buffer = clCreateSubBuffer(parent_mem_object(),
117 mem_flags, CL_BUFFER_CREATE_TYPE_REGION, &buffer_region, &err);
118 assert(err == CL_SUCCESS);
119 if (err != CL_SUCCESS) return nullptr;
120
121 auto sub_storage
122 = new ocl_buffer_memory_storage_t(this->engine(), parent_storage());
123 if (sub_storage) {
124 sub_storage->init(memory_flags_t::use_runtime_ptr, size, sub_buffer);
125 sub_storage->base_offset_ = base_offset_ + offset;
126 }
127 return std::unique_ptr<memory_storage_t>(sub_storage);
128}
129
130std::unique_ptr<memory_storage_t> ocl_buffer_memory_storage_t::clone() const {
131 auto storage = new ocl_buffer_memory_storage_t(engine());
132 if (storage) storage->init(memory_flags_t::use_runtime_ptr, 0, mem_object_);
133 return std::unique_ptr<memory_storage_t>(storage);
134}
135
136cl_mem ocl_buffer_memory_storage_t::parent_mem_object() const {
137 return utils::downcast<const ocl_buffer_memory_storage_t *>(
138 parent_storage())
139 ->mem_object();
140}
141
142} // namespace ocl
143} // namespace gpu
144} // namespace impl
145} // namespace dnnl
146