1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_OCL_OCL_STREAM_HPP
18#define GPU_OCL_OCL_STREAM_HPP
19
20#include <memory>
21
22#include "common/c_types_map.hpp"
23#include "common/thread_local_storage.hpp"
24#include "common/utils.hpp"
25#include "gpu/compute/compute.hpp"
26#include "gpu/ocl/mdapi_utils.hpp"
27#include "gpu/ocl/ocl_engine.hpp"
28#include "gpu/ocl/ocl_utils.hpp"
29
30namespace dnnl {
31namespace impl {
32namespace gpu {
33namespace ocl {
34
35struct ocl_stream_t : public compute::compute_stream_t {
36 static status_t create_stream(
37 stream_t **stream, engine_t *engine, unsigned flags) {
38
39 std::unique_ptr<ocl_stream_t> ocl_stream(
40 new ocl_stream_t(engine, flags));
41 if (!ocl_stream) return status::out_of_memory;
42
43 status_t status = ocl_stream->init();
44 if (status != status::success) return status;
45
46 *stream = ocl_stream.release();
47 return status::success;
48 }
49
50 static status_t create_stream(
51 stream_t **stream, engine_t *engine, cl_command_queue queue) {
52 unsigned flags;
53 status_t status = ocl_stream_t::init_flags(&flags, queue);
54 if (status != status::success) return status;
55
56 std::unique_ptr<ocl_stream_t> ocl_stream(
57 new ocl_stream_t(engine, flags, queue));
58 if (!ocl_stream) return status::out_of_memory;
59
60 status = ocl_stream->init();
61 if (status != status::success) return status;
62
63 *stream = ocl_stream.release();
64 return status::success;
65 }
66
67 status_t wait() override {
68 OCL_CHECK(clFinish(queue_));
69 return status::success;
70 }
71
72 void before_exec_hook() override;
73 void after_exec_hook() override;
74
75 cl_command_queue queue() const { return queue_; }
76
77 const mdapi_helper_t &mdapi_helper() const { return *mdapi_helper_; }
78
79 status_t copy(const memory_storage_t &src, const memory_storage_t &dst,
80 size_t size) override;
81
82 status_t fill(
83 const memory_storage_t &dst, uint8_t pattern, size_t size) override;
84
85 ~ocl_stream_t() override {
86 wait();
87 if (queue_) { clReleaseCommandQueue(queue_); }
88 }
89
90 std::vector<ocl_wrapper_t<cl_event>> &get_deps() {
91 auto &deps = const_cast<const ocl_stream_t *>(this)->get_deps();
92 return const_cast<std::vector<ocl_wrapper_t<cl_event>> &>(deps);
93 }
94 const std::vector<ocl_wrapper_t<cl_event>> &get_deps() const {
95 static std::vector<ocl_wrapper_t<cl_event>> empty_deps;
96 return deps_tls_.get(empty_deps);
97 }
98
99 void set_deps(const std::vector<ocl_wrapper_t<cl_event>> &deps) {
100 get_deps() = deps;
101 }
102
103 const ocl_wrapper_t<cl_event> &get_output_event() const {
104 auto &deps = get_deps();
105 assert(deps.size() == 1);
106 return deps[0];
107 }
108
109private:
110 ocl_stream_t(engine_t *engine, unsigned flags)
111 : compute_stream_t(engine, flags), queue_(nullptr) {}
112 ocl_stream_t(engine_t *engine, unsigned flags, cl_command_queue queue)
113 : compute_stream_t(engine, flags), queue_(queue) {}
114 status_t init();
115
116 static status_t init_flags(unsigned *flags, cl_command_queue queue) {
117 *flags = 0;
118 // Determine if the passed queue is in-order/out-of-order
119 cl_command_queue_properties props;
120 OCL_CHECK(clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
121 sizeof(cl_command_queue_properties), &props, nullptr));
122
123 *flags |= (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
124 ? stream_flags::out_of_order
125 : stream_flags::in_order;
126
127 return status::success;
128 }
129
130 cl_command_queue create_queue(
131 cl_context ctx, cl_device_id dev, cl_int *err) const;
132
133 cl_command_queue queue_;
134 std::unique_ptr<mdapi_helper_t> mdapi_helper_;
135 mutable utils::thread_local_storage_t<std::vector<ocl_wrapper_t<cl_event>>>
136 deps_tls_;
137};
138
139} // namespace ocl
140} // namespace gpu
141} // namespace impl
142} // namespace dnnl
143
144#endif
145