1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP
18#define GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP
19
20#include "common/c_types_map.hpp"
21#include "gpu/compute/compute.hpp"
22#include "gpu/gpu_convolution_pd.hpp"
23#include "gpu/gpu_primitive.hpp"
24#include "gpu/gpu_resource.hpp"
25#include "gpu/ocl/ocl_stream.hpp"
26#include "gpu/ocl/ocl_utils.hpp"
27#include "gpu/primitive_conf.hpp"
28
29namespace dnnl {
30namespace impl {
31namespace gpu {
32namespace ocl {
33
34struct xe_lp_x8s8x_convolution_fwd_t : public gpu_primitive_t {
35 using gpu_primitive_t::gpu_primitive_t;
36 struct pd_t : public gpu_convolution_fwd_pd_t {
37 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
38 const convolution_fwd_pd_t *hint_fwd_pd)
39 : gpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd) {}
40
41 DECLARE_COMMON_PD_T("ocl:xe_lp", xe_lp_x8s8x_convolution_fwd_t);
42
43 status_t init(engine_t *engine) {
44 using namespace prop_kind;
45 using namespace data_type;
46 assert(engine->kind() == engine_kind::gpu);
47 auto *compute_engine
48 = utils::downcast<compute::compute_engine_t *>(engine);
49
50 const auto attr_skip_mask
51 = primitive_attr_t::skip_mask_t::oscale_runtime
52 | primitive_attr_t::skip_mask_t::zero_points_runtime
53 | primitive_attr_t::skip_mask_t::post_ops
54 | primitive_attr_t::skip_mask_t::sum_dt;
55
56 bool ok = true
57 && utils::one_of(this->desc()->prop_kind, forward_training,
58 forward_inference)
59 && this->desc()->alg_kind == alg_kind::convolution_direct
60 && utils::one_of(desc()->src_desc.data_type, u8, s8)
61 && utils::one_of(
62 desc()->dst_desc.data_type, u8, s8, s32, f32)
63 && expect_data_types(desc()->src_desc.data_type, s8, f32,
64 desc()->dst_desc.data_type, s32)
65 && compute_engine->mayiuse(
66 compute::device_ext_t::intel_subgroups)
67 && attr()->has_default_values(
68 attr_skip_mask, desc()->dst_desc.data_type)
69 && attr()->post_ops_.check_sum_consistent_dt(
70 dst_md()->data_type, true)
71 && post_ops_with_binary_ok(attr(), dst_md()->data_type)
72 && zero_points_ok(attr())
73 && IMPLICATION(!attr()->output_scales_.has_default_values(),
74 utils::one_of(
75 attr()->output_scales_.mask_, 0, 1 << 1));
76 if (!ok) return status::unimplemented;
77
78 if (dst_md()->offset0 != 0) return status::unimplemented;
79
80 CHECK(init_conf());
81
82 if (!compute_engine->mayiuse_sub_group({8, conf.sub_group_size}))
83 return status::unimplemented;
84
85 init_scratchpad();
86
87 ok = set_default_formats_common(
88 conf.src_tag, conf.wei_tag, conf.dst_tag);
89 if (!ok) return status::unimplemented;
90
91 CHECK(attr_.set_default_formats(dst_md(0)));
92
93 return status::success;
94 }
95
96 status_t init_conf();
97 status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
98 void init_scratchpad();
99
100 conv_conf_t conf;
101 };
102
103 status_t init(engine_t *engine) override {
104 const char *kernel_name = nullptr;
105 if (pd()->conf.is_nhwc) {
106 if (pd()->conf.is_depthwise) {
107 if (pd()->conf.mb_block == 32)
108 kernel_name = "conv_nhwc_fwd_dw_mb_block_x8s8x";
109 else
110 kernel_name = "conv_nhwc_fwd_dw_ow_block_x8s8x";
111 } else if (pd()->conf.ic <= 4) {
112 kernel_name = "conv_nhwc_fwd_first_x8s8x";
113 } else {
114 kernel_name = "conv_nhwc_fwd_x8s8x";
115 }
116 } else if (pd()->conf.is_depthwise) {
117 if (pd()->conf.mb_block == 32)
118 kernel_name = "conv_dw_fwd_mb_block_x8s8x";
119 else
120 kernel_name = "conv_dw_fwd_ow_block_x8s8x";
121 } else {
122 if (pd()->conf.ic > 4) {
123 if (pd()->conf.mb_block == 32)
124 kernel_name = "conv_fwd_mb_block_x8s8x";
125 else
126 kernel_name = "conv_fwd_ow_block_x8s8x";
127 } else {
128 kernel_name = "conv_fwd_first_x8s8x";
129 }
130 }
131
132 compute::kernel_ctx_t kernel_ctx;
133 auto status = pd()->init_kernel_ctx(kernel_ctx);
134 if (status != status::success) return status;
135
136 create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
137 if (!kernel_) return status::runtime_error;
138
139 if (pd()->conf.attr_info.with_src_zpoints
140 && (pd()->conf.is_depthwise || pd()->conf.ic > 4)) {
141 create_kernel(engine, &src_compensation_kernel_,
142 "xe_lp_x8s8x_compensation", kernel_ctx);
143 if (!src_compensation_kernel_) return status::runtime_error;
144 }
145
146 return status::success;
147 }
148
149 status_t execute(const exec_ctx_t &ctx) const override {
150 return execute_forward(ctx);
151 }
152
153private:
154 status_t execute_forward(const exec_ctx_t &ctx) const;
155 const pd_t *pd() const { return (const pd_t *)gpu_primitive_t::pd().get(); }
156 compute::kernel_t kernel_;
157 compute::kernel_t src_compensation_kernel_;
158};
159
160struct xe_lp_x8s8x_convolution_bwd_data_t : public gpu_primitive_t {
161 using gpu_primitive_t::gpu_primitive_t;
162 struct pd_t : public gpu_convolution_bwd_data_pd_t {
163 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
164 const convolution_fwd_pd_t *hint_fwd_pd)
165 : gpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd) {}
166
167 DECLARE_COMMON_PD_T("ocl:xe_lp", xe_lp_x8s8x_convolution_bwd_data_t);
168
169 status_t init(engine_t *engine) {
170 using namespace prop_kind;
171 using namespace data_type;
172 assert(engine->kind() == engine_kind::gpu);
173 auto *compute_engine
174 = utils::downcast<compute::compute_engine_t *>(engine);
175
176 bool ok = true
177 && utils::one_of(desc()->diff_src_desc.data_type, s8, u8)
178 && utils::one_of(desc()->diff_dst_desc.data_type, s8, u8)
179 && expect_data_types(desc()->diff_src_desc.data_type, s8,
180 f32, desc()->diff_dst_desc.data_type, s32)
181 && desc()->prop_kind == prop_kind::backward_data
182 && desc()->alg_kind == alg_kind::convolution_direct
183 && compute_engine->mayiuse(
184 compute::device_ext_t::intel_subgroups)
185 && attr()->has_default_values();
186
187 if (!ok) return status::unimplemented;
188
189 CHECK(init_conf());
190
191 if (!compute_engine->mayiuse_sub_group({8, conf.sub_group_size}))
192 return status::unimplemented;
193
194 ok = set_default_formats_common(
195 conf.src_tag, conf.wei_tag, conf.dst_tag);
196 return ok ? status::success : status::unimplemented;
197 }
198
199 status_t init_conf();
200 status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
201
202 conv_conf_t conf;
203
204 bool support_bias() const override { return true; }
205 };
206
207 status_t init(engine_t *engine) override {
208 const char *kernel_name = nullptr;
209 if (pd()->conf.ver == ver_mb_block)
210 kernel_name = "conv_bwd_data_mb_block_x8s8x8";
211 else
212 kernel_name = "conv_bwd_data_x8s8x8";
213 compute::kernel_ctx_t kernel_ctx;
214 auto status = pd()->init_kernel_ctx(kernel_ctx);
215 if (status != status::success) return status;
216
217 create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
218 if (!kernel_) return status::runtime_error;
219
220 return status::success;
221 }
222
223 status_t execute(const exec_ctx_t &ctx) const override {
224 return execute_backward_data(ctx);
225 }
226
227private:
228 status_t execute_backward_data(const exec_ctx_t &ctx) const;
229 const pd_t *pd() const { return (const pd_t *)gpu_primitive_t::pd().get(); }
230 compute::kernel_t kernel_;
231};
232
233} // namespace ocl
234} // namespace gpu
235} // namespace impl
236} // namespace dnnl
237
238#endif
239