1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_OCL_OCL_STREAM_HPP |
18 | #define GPU_OCL_OCL_STREAM_HPP |
19 | |
20 | #include <memory> |
21 | |
22 | #include "common/c_types_map.hpp" |
23 | #include "common/thread_local_storage.hpp" |
24 | #include "common/utils.hpp" |
25 | #include "gpu/compute/compute.hpp" |
26 | #include "gpu/ocl/mdapi_utils.hpp" |
27 | #include "gpu/ocl/ocl_engine.hpp" |
28 | #include "gpu/ocl/ocl_utils.hpp" |
29 | |
30 | namespace dnnl { |
31 | namespace impl { |
32 | namespace gpu { |
33 | namespace ocl { |
34 | |
35 | struct ocl_stream_t : public compute::compute_stream_t { |
36 | static status_t create_stream( |
37 | stream_t **stream, engine_t *engine, unsigned flags) { |
38 | |
39 | std::unique_ptr<ocl_stream_t> ocl_stream( |
40 | new ocl_stream_t(engine, flags)); |
41 | if (!ocl_stream) return status::out_of_memory; |
42 | |
43 | status_t status = ocl_stream->init(); |
44 | if (status != status::success) return status; |
45 | |
46 | *stream = ocl_stream.release(); |
47 | return status::success; |
48 | } |
49 | |
50 | static status_t create_stream( |
51 | stream_t **stream, engine_t *engine, cl_command_queue queue) { |
52 | unsigned flags; |
53 | status_t status = ocl_stream_t::init_flags(&flags, queue); |
54 | if (status != status::success) return status; |
55 | |
56 | std::unique_ptr<ocl_stream_t> ocl_stream( |
57 | new ocl_stream_t(engine, flags, queue)); |
58 | if (!ocl_stream) return status::out_of_memory; |
59 | |
60 | status = ocl_stream->init(); |
61 | if (status != status::success) return status; |
62 | |
63 | *stream = ocl_stream.release(); |
64 | return status::success; |
65 | } |
66 | |
67 | status_t wait() override { |
68 | OCL_CHECK(clFinish(queue_)); |
69 | return status::success; |
70 | } |
71 | |
72 | void before_exec_hook() override; |
73 | void after_exec_hook() override; |
74 | |
75 | cl_command_queue queue() const { return queue_; } |
76 | |
77 | const mdapi_helper_t &mdapi_helper() const { return *mdapi_helper_; } |
78 | |
79 | status_t copy(const memory_storage_t &src, const memory_storage_t &dst, |
80 | size_t size) override; |
81 | |
82 | status_t fill( |
83 | const memory_storage_t &dst, uint8_t pattern, size_t size) override; |
84 | |
85 | ~ocl_stream_t() override { |
86 | wait(); |
87 | if (queue_) { clReleaseCommandQueue(queue_); } |
88 | } |
89 | |
90 | std::vector<ocl_wrapper_t<cl_event>> &get_deps() { |
91 | auto &deps = const_cast<const ocl_stream_t *>(this)->get_deps(); |
92 | return const_cast<std::vector<ocl_wrapper_t<cl_event>> &>(deps); |
93 | } |
94 | const std::vector<ocl_wrapper_t<cl_event>> &get_deps() const { |
95 | static std::vector<ocl_wrapper_t<cl_event>> empty_deps; |
96 | return deps_tls_.get(empty_deps); |
97 | } |
98 | |
99 | void set_deps(const std::vector<ocl_wrapper_t<cl_event>> &deps) { |
100 | get_deps() = deps; |
101 | } |
102 | |
103 | const ocl_wrapper_t<cl_event> &get_output_event() const { |
104 | auto &deps = get_deps(); |
105 | assert(deps.size() == 1); |
106 | return deps[0]; |
107 | } |
108 | |
109 | private: |
110 | ocl_stream_t(engine_t *engine, unsigned flags) |
111 | : compute_stream_t(engine, flags), queue_(nullptr) {} |
112 | ocl_stream_t(engine_t *engine, unsigned flags, cl_command_queue queue) |
113 | : compute_stream_t(engine, flags), queue_(queue) {} |
114 | status_t init(); |
115 | |
116 | static status_t init_flags(unsigned *flags, cl_command_queue queue) { |
117 | *flags = 0; |
118 | // Determine if the passed queue is in-order/out-of-order |
119 | cl_command_queue_properties props; |
120 | OCL_CHECK(clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, |
121 | sizeof(cl_command_queue_properties), &props, nullptr)); |
122 | |
123 | *flags |= (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) |
124 | ? stream_flags::out_of_order |
125 | : stream_flags::in_order; |
126 | |
127 | return status::success; |
128 | } |
129 | |
130 | cl_command_queue create_queue( |
131 | cl_context ctx, cl_device_id dev, cl_int *err) const; |
132 | |
133 | cl_command_queue queue_; |
134 | std::unique_ptr<mdapi_helper_t> mdapi_helper_; |
135 | mutable utils::thread_local_storage_t<std::vector<ocl_wrapper_t<cl_event>>> |
136 | deps_tls_; |
137 | }; |
138 | |
139 | } // namespace ocl |
140 | } // namespace gpu |
141 | } // namespace impl |
142 | } // namespace dnnl |
143 | |
144 | #endif |
145 | |