1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_OCL_RNN_RNN_REORDERS_HPP |
18 | #define GPU_OCL_RNN_RNN_REORDERS_HPP |
19 | |
20 | #include "common/c_types_map.hpp" |
21 | #include "common/memory.hpp" |
22 | #include "common/primitive.hpp" |
23 | #include "common/utils.hpp" |
24 | #include "gpu/compute/compute.hpp" |
25 | #include "gpu/gpu_primitive.hpp" |
26 | #include "gpu/gpu_reorder_pd.hpp" |
27 | #include "gpu/gpu_resource.hpp" |
28 | #include "gpu/ocl/ocl_utils.hpp" |
29 | #include "gpu/primitive_conf.hpp" |
30 | |
31 | namespace dnnl { |
32 | namespace impl { |
33 | namespace gpu { |
34 | namespace ocl { |
35 | |
36 | struct rnn_weights_reorder_t : public gpu_primitive_t { |
37 | using gpu_primitive_t::gpu_primitive_t; |
38 | struct pd_t : public reorder_pd_t { |
39 | using reorder_pd_t::reorder_pd_t; |
40 | |
41 | DECLARE_COMMON_PD_T("cross_engine::rnn" , rnn_weights_reorder_t); |
42 | |
43 | status_t init( |
44 | engine_t *engine, engine_t *src_engine, engine_t *dst_engine) { |
45 | // Note: currently rnn_u8s8_compensation and rnn_s8s8_compensation |
46 | // have common bit so we have to perform additional checks to |
47 | // separate these two cases |
48 | if (IMPLICATION(dst_md()->extra.flags |
49 | & memory_extra_flags::rnn_u8s8_compensation, |
50 | types::extra_flag_rnn_s8s8_compensation_is_set( |
51 | dst_md()->extra.flags))) |
52 | return status::unimplemented; |
53 | |
54 | bool args_ok = true |
55 | && utils::one_of(src_engine->kind(), engine_kind::gpu, |
56 | engine_kind::cpu) |
57 | && dst_engine->kind() == engine_kind::gpu; |
58 | if (!args_ok) return status::unimplemented; |
59 | |
60 | auto *compute_engine |
61 | = utils::downcast<compute::compute_engine_t *>(dst_engine); |
62 | |
63 | args_ok = args_ok |
64 | && compute_engine->mayiuse( |
65 | compute::device_ext_t::intel_subgroups) |
66 | && IMPLICATION( |
67 | utils::one_of(data_type::f16, src_md()->data_type, |
68 | dst_md()->data_type), |
69 | true |
70 | && compute_engine->mayiuse( |
71 | compute::device_ext_t::khr_fp16) |
72 | && compute_engine->mayiuse( |
73 | compute::device_ext_t:: |
74 | intel_subgroups_short)); |
75 | |
76 | auto status = init_conf(engine); |
77 | if (status != status::success) return status; |
78 | init_scratchpad(); |
79 | return status; |
80 | } |
81 | |
82 | status_t init_conf(engine_t *engine); |
83 | status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const; |
84 | |
85 | rnn_reorder_conf_t conf; |
86 | |
87 | private: |
88 | DECLARE_GPU_REORDER_CREATE(); |
89 | |
90 | void init_scratchpad() { |
91 | auto scratchpad = scratchpad_registry().registrar(); |
92 | |
93 | if (conf.do_reorder) { |
94 | size_t sz = conf.nelems; |
95 | scratchpad.book(memory_tracking::names::key_reorder_rnn_space, |
96 | sz, sizeof(float), OCL_BUFFER_ALIGNMENT); |
97 | } |
98 | } |
99 | }; |
100 | |
101 | status_t init(engine_t *engine) override { |
102 | compute::kernel_ctx_t kernel_ctx; |
103 | |
104 | auto status = pd()->init_kernel_ctx(kernel_ctx); |
105 | if (status != status::success) return status; |
106 | |
107 | create_kernel(engine, &kernel_, "wei_reorder" , kernel_ctx); |
108 | if (!kernel_) return status::runtime_error; |
109 | return status::success; |
110 | } |
111 | |
112 | status_t execute(const exec_ctx_t &ctx) const override; |
113 | |
114 | protected: |
115 | status_t init_res_storage( |
116 | engine_t *engine, gpu_resource_t *r) const override { |
117 | if (!pd()->conf.do_reorder) return status::success; |
118 | memory_storage_t *tmp_mem_storage_ptr = nullptr; |
119 | size_t size = pd()->conf.scales_count * sizeof(float); |
120 | CHECK(engine->create_memory_storage(&tmp_mem_storage_ptr, size)); |
121 | |
122 | void *scales_ptr = nullptr; |
123 | std::unique_ptr<memory_storage_t> tmp_mem_storage(tmp_mem_storage_ptr); |
124 | CHECK(tmp_mem_storage->map_data( |
125 | &scales_ptr, nullptr, sizeof(float) * pd()->conf.scales_count)); |
126 | utils::array_copy((float *)scales_ptr, |
127 | pd()->attr()->rnn_weights_qparams_.scales_, |
128 | pd()->conf.scales_count); |
129 | CHECK(tmp_mem_storage->unmap_data(scales_ptr, nullptr)); |
130 | r->add_memory_storage(SCALES_, std::move(tmp_mem_storage)); |
131 | return status::success; |
132 | } |
133 | |
134 | private: |
135 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
136 | compute::kernel_t kernel_; |
137 | enum { SCALES_ = 0 }; |
138 | }; |
139 | |
140 | } // namespace ocl |
141 | } // namespace gpu |
142 | } // namespace impl |
143 | } // namespace dnnl |
144 | |
145 | #endif |
146 | |