xe_lp_x8s8x_convolution.hpp source code [oneDNN/src/gpu/ocl/xe_lp_x8s8x_convolution.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP
18	#define GPU_XE_LP_X8S8S32X_CONVOLUTION_HPP
19
20	#include "common/c_types_map.hpp"
21	#include "gpu/compute/compute.hpp"
22	#include "gpu/gpu_convolution_pd.hpp"
23	#include "gpu/gpu_primitive.hpp"
24	#include "gpu/gpu_resource.hpp"
25	#include "gpu/ocl/ocl_stream.hpp"
26	#include "gpu/ocl/ocl_utils.hpp"
27	#include "gpu/primitive_conf.hpp"
28
29	namespace dnnl {
30	namespace impl {
31	namespace gpu {
32	namespace ocl {
33
34	struct xe_lp_x8s8x_convolution_fwd_t : public gpu_primitive_t {
35	using gpu_primitive_t::gpu_primitive_t;
36	struct pd_t : public gpu_convolution_fwd_pd_t {
37	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
38	const convolution_fwd_pd_t *hint_fwd_pd)
39	: gpu_convolution_fwd_pd_t (adesc, attr, hint_fwd_pd) {}
40
41	DECLARE_COMMON_PD_T("ocl:xe_lp", xe_lp_x8s8x_convolution_fwd_t);
42
43	status_t init(engine_t *engine) {
44	using namespace prop_kind;
45	using namespace data_type;
46	assert(engine->kind() == engine_kind::gpu);
47	auto *compute_engine
48	= utils::downcast<compute::compute_engine_t *>(engine);
49
50	const auto attr_skip_mask
51	= primitive_attr_t::skip_mask_t::oscale_runtime
52	\| primitive_attr_t::skip_mask_t::zero_points_runtime
53	\| primitive_attr_t::skip_mask_t::post_ops
54	\| primitive_attr_t::skip_mask_t::sum_dt;
55
56	bool ok = true
57	&& utils::one_of(this->desc()->prop_kind, forward_training,
58	forward_inference)
59	&& this->desc()->alg_kind == alg_kind::convolution_direct
60	&& utils::one_of(desc()->src_desc.data_type, u8, s8)
61	&& utils::one_of(
62	desc()->dst_desc.data_type, u8, s8, s32, f32)
63	&& expect_data_types(desc()->src_desc.data_type, s8, f32,
64	desc()->dst_desc.data_type, s32)
65	&& compute_engine->mayiuse(
66	compute::device_ext_t::intel_subgroups)
67	&& attr()->has_default_values(
68	attr_skip_mask, desc()->dst_desc.data_type)
69	&& attr()->post_ops_.check_sum_consistent_dt(
70	dst_md()->data_type, true)
71	&& post_ops_with_binary_ok(attr(), dst_md()->data_type)
72	&& zero_points_ok(attr())
73	&& IMPLICATION(!attr()->output_scales_.has_default_values(),
74	utils::one_of(
75	attr()->output_scales_.mask_, `0`, `1` << `1`));
76	if (!ok) return status::unimplemented;
77
78	if (dst_md()->offset0 != `0`) return status::unimplemented;
79
80	CHECK(init_conf());
81
82	if (!compute_engine->mayiuse_sub_group({`8`, conf.sub_group_size}))
83	return status::unimplemented;
84
85	init_scratchpad();
86
87	ok = set_default_formats_common(
88	conf.src_tag, conf.wei_tag, conf.dst_tag);
89	if (!ok) return status::unimplemented;
90
91	CHECK(attr_.set_default_formats(dst_md(`0`)));
92
93	return status::success;
94	}
95
96	status_t init_conf();
97	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
98	void init_scratchpad();
99
100	conv_conf_t conf;
101	};
102
103	status_t init(engine_t *engine) override {
104	const char kernel_name = nullptr*;
105	if (pd()->conf.is_nhwc) {
106	if (pd()->conf.is_depthwise) {
107	if (pd()->conf.mb_block == `32`)
108	kernel_name = "conv_nhwc_fwd_dw_mb_block_x8s8x";
109	else
110	kernel_name = "conv_nhwc_fwd_dw_ow_block_x8s8x";
111	} else if (pd()->conf.ic <= `4`) {
112	kernel_name = "conv_nhwc_fwd_first_x8s8x";
113	} else {
114	kernel_name = "conv_nhwc_fwd_x8s8x";
115	}
116	} else if (pd()->conf.is_depthwise) {
117	if (pd()->conf.mb_block == `32`)
118	kernel_name = "conv_dw_fwd_mb_block_x8s8x";
119	else
120	kernel_name = "conv_dw_fwd_ow_block_x8s8x";
121	} else {
122	if (pd()->conf.ic > `4`) {
123	if (pd()->conf.mb_block == `32`)
124	kernel_name = "conv_fwd_mb_block_x8s8x";
125	else
126	kernel_name = "conv_fwd_ow_block_x8s8x";
127	} else {
128	kernel_name = "conv_fwd_first_x8s8x";
129	}
130	}
131
132	compute::kernel_ctx_t kernel_ctx;
133	auto status = pd()->init_kernel_ctx(kernel_ctx);
134	if (status != status::success) return status;
135
136	create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
137	if (!kernel_) return status::runtime_error;
138
139	if (pd()->conf.attr_info.with_src_zpoints
140	&& (pd()->conf.is_depthwise \|\| pd()->conf.ic > `4`)) {
141	create_kernel(engine, &src_compensation_kernel_,
142	"xe_lp_x8s8x_compensation", kernel_ctx);
143	if (!src_compensation_kernel_) return status::runtime_error;
144	}
145
146	return status::success;
147	}
148
149	status_t execute(const exec_ctx_t &ctx) const override {
150	return execute_forward(ctx);
151	}
152
153	private:
154	status_t execute_forward(const exec_ctx_t &ctx) const;
155	const pd_t pd() const* { return (const pd_t *)gpu_primitive_t::pd().get(); }
156	compute::kernel_t kernel_;
157	compute::kernel_t src_compensation_kernel_;
158	};
159
160	struct xe_lp_x8s8x_convolution_bwd_data_t : public gpu_primitive_t {
161	using gpu_primitive_t::gpu_primitive_t;
162	struct pd_t : public gpu_convolution_bwd_data_pd_t {
163	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
164	const convolution_fwd_pd_t *hint_fwd_pd)
165	: gpu_convolution_bwd_data_pd_t (adesc, attr, hint_fwd_pd) {}
166
167	DECLARE_COMMON_PD_T("ocl:xe_lp", xe_lp_x8s8x_convolution_bwd_data_t);
168
169	status_t init(engine_t *engine) {
170	using namespace prop_kind;
171	using namespace data_type;
172	assert(engine->kind() == engine_kind::gpu);
173	auto *compute_engine
174	= utils::downcast<compute::compute_engine_t *>(engine);
175
176	bool ok = true
177	&& utils::one_of(desc()->diff_src_desc.data_type, s8, u8)
178	&& utils::one_of(desc()->diff_dst_desc.data_type, s8, u8)
179	&& expect_data_types(desc()->diff_src_desc.data_type, s8,
180	f32, desc()->diff_dst_desc.data_type, s32)
181	&& desc()->prop_kind == prop_kind::backward_data
182	&& desc()->alg_kind == alg_kind::convolution_direct
183	&& compute_engine->mayiuse(
184	compute::device_ext_t::intel_subgroups)
185	&& attr()->has_default_values();
186
187	if (!ok) return status::unimplemented;
188
189	CHECK(init_conf());
190
191	if (!compute_engine->mayiuse_sub_group({`8`, conf.sub_group_size}))
192	return status::unimplemented;
193
194	ok = set_default_formats_common(
195	conf.src_tag, conf.wei_tag, conf.dst_tag);
196	return ok ? status::success : status::unimplemented;
197	}
198
199	status_t init_conf();
200	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
201
202	conv_conf_t conf;
203
204	bool support_bias() const override { return true; }
205	};
206
207	status_t init(engine_t *engine) override {
208	const char kernel_name = nullptr*;
209	if (pd()->conf.ver == ver_mb_block)
210	kernel_name = "conv_bwd_data_mb_block_x8s8x8";
211	else
212	kernel_name = "conv_bwd_data_x8s8x8";
213	compute::kernel_ctx_t kernel_ctx;
214	auto status = pd()->init_kernel_ctx(kernel_ctx);
215	if (status != status::success) return status;
216
217	create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
218	if (!kernel_) return status::runtime_error;
219
220	return status::success;
221	}
222
223	status_t execute(const exec_ctx_t &ctx) const override {
224	return execute_backward_data(ctx);
225	}
226
227	private:
228	status_t execute_backward_data(const exec_ctx_t &ctx) const;
229	const pd_t pd() const* { return (const pd_t *)gpu_primitive_t::pd().get(); }
230	compute::kernel_t kernel_;
231	};
232
233	} // namespace ocl
234	} // namespace gpu
235	} // namespace impl
236	} // namespace dnnl
237
238	#endif
239

Browse the source code of oneDNN/src/gpu/ocl/xe_lp_x8s8x_convolution.hpp