1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_OCL_RNN_RNN_REORDERS_HPP
18#define GPU_OCL_RNN_RNN_REORDERS_HPP
19
20#include "common/c_types_map.hpp"
21#include "common/memory.hpp"
22#include "common/primitive.hpp"
23#include "common/utils.hpp"
24#include "gpu/compute/compute.hpp"
25#include "gpu/gpu_primitive.hpp"
26#include "gpu/gpu_reorder_pd.hpp"
27#include "gpu/gpu_resource.hpp"
28#include "gpu/ocl/ocl_utils.hpp"
29#include "gpu/primitive_conf.hpp"
30
31namespace dnnl {
32namespace impl {
33namespace gpu {
34namespace ocl {
35
36struct rnn_weights_reorder_t : public gpu_primitive_t {
37 using gpu_primitive_t::gpu_primitive_t;
38 struct pd_t : public reorder_pd_t {
39 using reorder_pd_t::reorder_pd_t;
40
41 DECLARE_COMMON_PD_T("cross_engine::rnn", rnn_weights_reorder_t);
42
43 status_t init(
44 engine_t *engine, engine_t *src_engine, engine_t *dst_engine) {
45 // Note: currently rnn_u8s8_compensation and rnn_s8s8_compensation
46 // have common bit so we have to perform additional checks to
47 // separate these two cases
48 if (IMPLICATION(dst_md()->extra.flags
49 & memory_extra_flags::rnn_u8s8_compensation,
50 types::extra_flag_rnn_s8s8_compensation_is_set(
51 dst_md()->extra.flags)))
52 return status::unimplemented;
53
54 bool args_ok = true
55 && utils::one_of(src_engine->kind(), engine_kind::gpu,
56 engine_kind::cpu)
57 && dst_engine->kind() == engine_kind::gpu;
58 if (!args_ok) return status::unimplemented;
59
60 auto *compute_engine
61 = utils::downcast<compute::compute_engine_t *>(dst_engine);
62
63 args_ok = args_ok
64 && compute_engine->mayiuse(
65 compute::device_ext_t::intel_subgroups)
66 && IMPLICATION(
67 utils::one_of(data_type::f16, src_md()->data_type,
68 dst_md()->data_type),
69 true
70 && compute_engine->mayiuse(
71 compute::device_ext_t::khr_fp16)
72 && compute_engine->mayiuse(
73 compute::device_ext_t::
74 intel_subgroups_short));
75
76 auto status = init_conf(engine);
77 if (status != status::success) return status;
78 init_scratchpad();
79 return status;
80 }
81
82 status_t init_conf(engine_t *engine);
83 status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
84
85 rnn_reorder_conf_t conf;
86
87 private:
88 DECLARE_GPU_REORDER_CREATE();
89
90 void init_scratchpad() {
91 auto scratchpad = scratchpad_registry().registrar();
92
93 if (conf.do_reorder) {
94 size_t sz = conf.nelems;
95 scratchpad.book(memory_tracking::names::key_reorder_rnn_space,
96 sz, sizeof(float), OCL_BUFFER_ALIGNMENT);
97 }
98 }
99 };
100
101 status_t init(engine_t *engine) override {
102 compute::kernel_ctx_t kernel_ctx;
103
104 auto status = pd()->init_kernel_ctx(kernel_ctx);
105 if (status != status::success) return status;
106
107 create_kernel(engine, &kernel_, "wei_reorder", kernel_ctx);
108 if (!kernel_) return status::runtime_error;
109 return status::success;
110 }
111
112 status_t execute(const exec_ctx_t &ctx) const override;
113
114protected:
115 status_t init_res_storage(
116 engine_t *engine, gpu_resource_t *r) const override {
117 if (!pd()->conf.do_reorder) return status::success;
118 memory_storage_t *tmp_mem_storage_ptr = nullptr;
119 size_t size = pd()->conf.scales_count * sizeof(float);
120 CHECK(engine->create_memory_storage(&tmp_mem_storage_ptr, size));
121
122 void *scales_ptr = nullptr;
123 std::unique_ptr<memory_storage_t> tmp_mem_storage(tmp_mem_storage_ptr);
124 CHECK(tmp_mem_storage->map_data(
125 &scales_ptr, nullptr, sizeof(float) * pd()->conf.scales_count));
126 utils::array_copy((float *)scales_ptr,
127 pd()->attr()->rnn_weights_qparams_.scales_,
128 pd()->conf.scales_count);
129 CHECK(tmp_mem_storage->unmap_data(scales_ptr, nullptr));
130 r->add_memory_storage(SCALES_, std::move(tmp_mem_storage));
131 return status::success;
132 }
133
134private:
135 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
136 compute::kernel_t kernel_;
137 enum { SCALES_ = 0 };
138};
139
140} // namespace ocl
141} // namespace gpu
142} // namespace impl
143} // namespace dnnl
144
145#endif
146