1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP |
18 | #define GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP |
19 | |
20 | #include "common/c_types_map.hpp" |
21 | #include "gpu/compute/compute.hpp" |
22 | #include "gpu/gpu_convolution_pd.hpp" |
23 | #include "gpu/gpu_primitive.hpp" |
24 | #include "gpu/gpu_resource.hpp" |
25 | #include "gpu/ocl/ocl_stream.hpp" |
26 | #include "gpu/ocl/ocl_utils.hpp" |
27 | #include "gpu/primitive_conf.hpp" |
28 | |
29 | namespace dnnl { |
30 | namespace impl { |
31 | namespace gpu { |
32 | namespace ocl { |
33 | |
34 | struct xe_lp_x8s8x_convolution_fwd_t : public gpu_primitive_t { |
35 | using gpu_primitive_t::gpu_primitive_t; |
36 | struct pd_t : public gpu_convolution_fwd_pd_t { |
37 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
38 | const convolution_fwd_pd_t *hint_fwd_pd) |
39 | : gpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd) {} |
40 | |
41 | DECLARE_COMMON_PD_T("ocl:xe_lp" , xe_lp_x8s8x_convolution_fwd_t); |
42 | |
43 | status_t init(engine_t *engine) { |
44 | using namespace prop_kind; |
45 | using namespace data_type; |
46 | assert(engine->kind() == engine_kind::gpu); |
47 | auto *compute_engine |
48 | = utils::downcast<compute::compute_engine_t *>(engine); |
49 | |
50 | const auto attr_skip_mask |
51 | = primitive_attr_t::skip_mask_t::oscale_runtime |
52 | | primitive_attr_t::skip_mask_t::zero_points_runtime |
53 | | primitive_attr_t::skip_mask_t::post_ops |
54 | | primitive_attr_t::skip_mask_t::sum_dt; |
55 | |
56 | bool ok = true |
57 | && utils::one_of(this->desc()->prop_kind, forward_training, |
58 | forward_inference) |
59 | && this->desc()->alg_kind == alg_kind::convolution_direct |
60 | && utils::one_of(desc()->src_desc.data_type, u8, s8) |
61 | && utils::one_of( |
62 | desc()->dst_desc.data_type, u8, s8, s32, f32) |
63 | && expect_data_types(desc()->src_desc.data_type, s8, f32, |
64 | desc()->dst_desc.data_type, s32) |
65 | && compute_engine->mayiuse( |
66 | compute::device_ext_t::intel_subgroups) |
67 | && attr()->has_default_values( |
68 | attr_skip_mask, desc()->dst_desc.data_type) |
69 | && attr()->post_ops_.check_sum_consistent_dt( |
70 | dst_md()->data_type, true) |
71 | && post_ops_with_binary_ok(attr(), dst_md()->data_type) |
72 | && zero_points_ok(attr()) |
73 | && IMPLICATION(!attr()->output_scales_.has_default_values(), |
74 | utils::one_of( |
75 | attr()->output_scales_.mask_, 0, 1 << 1)); |
76 | if (!ok) return status::unimplemented; |
77 | |
78 | if (dst_md()->offset0 != 0) return status::unimplemented; |
79 | |
80 | CHECK(init_conf()); |
81 | |
82 | if (!compute_engine->mayiuse_sub_group({8, conf.sub_group_size})) |
83 | return status::unimplemented; |
84 | |
85 | init_scratchpad(); |
86 | |
87 | ok = set_default_formats_common( |
88 | conf.src_tag, conf.wei_tag, conf.dst_tag); |
89 | if (!ok) return status::unimplemented; |
90 | |
91 | CHECK(attr_.set_default_formats(dst_md(0))); |
92 | |
93 | return status::success; |
94 | } |
95 | |
96 | status_t init_conf(); |
97 | status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const; |
98 | void init_scratchpad(); |
99 | |
100 | conv_conf_t conf; |
101 | }; |
102 | |
103 | status_t init(engine_t *engine) override { |
104 | const char *kernel_name = nullptr; |
105 | if (pd()->conf.is_nhwc) { |
106 | if (pd()->conf.is_depthwise) { |
107 | if (pd()->conf.mb_block == 32) |
108 | kernel_name = "conv_nhwc_fwd_dw_mb_block_x8s8x" ; |
109 | else |
110 | kernel_name = "conv_nhwc_fwd_dw_ow_block_x8s8x" ; |
111 | } else if (pd()->conf.ic <= 4) { |
112 | kernel_name = "conv_nhwc_fwd_first_x8s8x" ; |
113 | } else { |
114 | kernel_name = "conv_nhwc_fwd_x8s8x" ; |
115 | } |
116 | } else if (pd()->conf.is_depthwise) { |
117 | if (pd()->conf.mb_block == 32) |
118 | kernel_name = "conv_dw_fwd_mb_block_x8s8x" ; |
119 | else |
120 | kernel_name = "conv_dw_fwd_ow_block_x8s8x" ; |
121 | } else { |
122 | if (pd()->conf.ic > 4) { |
123 | if (pd()->conf.mb_block == 32) |
124 | kernel_name = "conv_fwd_mb_block_x8s8x" ; |
125 | else |
126 | kernel_name = "conv_fwd_ow_block_x8s8x" ; |
127 | } else { |
128 | kernel_name = "conv_fwd_first_x8s8x" ; |
129 | } |
130 | } |
131 | |
132 | compute::kernel_ctx_t kernel_ctx; |
133 | auto status = pd()->init_kernel_ctx(kernel_ctx); |
134 | if (status != status::success) return status; |
135 | |
136 | create_kernel(engine, &kernel_, kernel_name, kernel_ctx); |
137 | if (!kernel_) return status::runtime_error; |
138 | |
139 | if (pd()->conf.attr_info.with_src_zpoints |
140 | && (pd()->conf.is_depthwise || pd()->conf.ic > 4)) { |
141 | create_kernel(engine, &src_compensation_kernel_, |
142 | "xe_lp_x8s8x_compensation" , kernel_ctx); |
143 | if (!src_compensation_kernel_) return status::runtime_error; |
144 | } |
145 | |
146 | return status::success; |
147 | } |
148 | |
149 | status_t execute(const exec_ctx_t &ctx) const override { |
150 | return execute_forward(ctx); |
151 | } |
152 | |
153 | private: |
154 | status_t execute_forward(const exec_ctx_t &ctx) const; |
155 | const pd_t *pd() const { return (const pd_t *)gpu_primitive_t::pd().get(); } |
156 | compute::kernel_t kernel_; |
157 | compute::kernel_t src_compensation_kernel_; |
158 | }; |
159 | |
160 | struct xe_lp_x8s8x_convolution_bwd_data_t : public gpu_primitive_t { |
161 | using gpu_primitive_t::gpu_primitive_t; |
162 | struct pd_t : public gpu_convolution_bwd_data_pd_t { |
163 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
164 | const convolution_fwd_pd_t *hint_fwd_pd) |
165 | : gpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd) {} |
166 | |
167 | DECLARE_COMMON_PD_T("ocl:xe_lp" , xe_lp_x8s8x_convolution_bwd_data_t); |
168 | |
169 | status_t init(engine_t *engine) { |
170 | using namespace prop_kind; |
171 | using namespace data_type; |
172 | assert(engine->kind() == engine_kind::gpu); |
173 | auto *compute_engine |
174 | = utils::downcast<compute::compute_engine_t *>(engine); |
175 | |
176 | bool ok = true |
177 | && utils::one_of(desc()->diff_src_desc.data_type, s8, u8) |
178 | && utils::one_of(desc()->diff_dst_desc.data_type, s8, u8) |
179 | && expect_data_types(desc()->diff_src_desc.data_type, s8, |
180 | f32, desc()->diff_dst_desc.data_type, s32) |
181 | && desc()->prop_kind == prop_kind::backward_data |
182 | && desc()->alg_kind == alg_kind::convolution_direct |
183 | && compute_engine->mayiuse( |
184 | compute::device_ext_t::intel_subgroups) |
185 | && attr()->has_default_values(); |
186 | |
187 | if (!ok) return status::unimplemented; |
188 | |
189 | CHECK(init_conf()); |
190 | |
191 | if (!compute_engine->mayiuse_sub_group({8, conf.sub_group_size})) |
192 | return status::unimplemented; |
193 | |
194 | ok = set_default_formats_common( |
195 | conf.src_tag, conf.wei_tag, conf.dst_tag); |
196 | return ok ? status::success : status::unimplemented; |
197 | } |
198 | |
199 | status_t init_conf(); |
200 | status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const; |
201 | |
202 | conv_conf_t conf; |
203 | |
204 | bool support_bias() const override { return true; } |
205 | }; |
206 | |
207 | status_t init(engine_t *engine) override { |
208 | const char *kernel_name = nullptr; |
209 | if (pd()->conf.ver == ver_mb_block) |
210 | kernel_name = "conv_bwd_data_mb_block_x8s8x8" ; |
211 | else |
212 | kernel_name = "conv_bwd_data_x8s8x8" ; |
213 | compute::kernel_ctx_t kernel_ctx; |
214 | auto status = pd()->init_kernel_ctx(kernel_ctx); |
215 | if (status != status::success) return status; |
216 | |
217 | create_kernel(engine, &kernel_, kernel_name, kernel_ctx); |
218 | if (!kernel_) return status::runtime_error; |
219 | |
220 | return status::success; |
221 | } |
222 | |
223 | status_t execute(const exec_ctx_t &ctx) const override { |
224 | return execute_backward_data(ctx); |
225 | } |
226 | |
227 | private: |
228 | status_t execute_backward_data(const exec_ctx_t &ctx) const; |
229 | const pd_t *pd() const { return (const pd_t *)gpu_primitive_t::pd().get(); } |
230 | compute::kernel_t kernel_; |
231 | }; |
232 | |
233 | } // namespace ocl |
234 | } // namespace gpu |
235 | } // namespace impl |
236 | } // namespace dnnl |
237 | |
238 | #endif |
239 | |