gen9_convolution.hpp source code [oneDNN/src/gpu/ocl/gen9_convolution.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_OCL_GEN9_CONVOLUTION_HPP
18	#define GPU_OCL_GEN9_CONVOLUTION_HPP
19
20	#include <assert.h>
21
22	#include "common/c_types_map.hpp"
23	#include "common/primitive.hpp"
24	#include "gpu/compute/compute.hpp"
25	#include "gpu/gpu_convolution_pd.hpp"
26	#include "gpu/gpu_eltwise_pd.hpp"
27	#include "gpu/gpu_primitive.hpp"
28	#include "gpu/gpu_resource.hpp"
29	#include "gpu/ocl/ocl_stream.hpp"
30	#include "gpu/ocl/ocl_utils.hpp"
31	#include "gpu/primitive_conf.hpp"
32
33	namespace dnnl {
34	namespace impl {
35	namespace gpu {
36	namespace ocl {
37
38	struct gen9_convolution_fwd_t : public gpu_primitive_t {
39	using gpu_primitive_t::gpu_primitive_t;
40	struct pd_t : public gpu_convolution_fwd_pd_t {
41	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
42	const convolution_fwd_pd_t *hint_fwd_pd)
43	: gpu_convolution_fwd_pd_t (adesc, attr, hint_fwd_pd) {}
44
45	DECLARE_COMMON_PD_T("ocl:gen9:blocked", gen9_convolution_fwd_t);
46
47	status_t init(engine_t *engine) {
48	using namespace prop_kind;
49	using namespace data_type;
50	assert(engine->kind() == engine_kind::gpu);
51	auto *compute_engine
52	= utils::downcast<compute::compute_engine_t *>(engine);
53
54	auto src_data_t = this->desc()->src_desc.data_type;
55	auto dst_data_t = this->desc()->dst_desc.data_type;
56
57	const auto attr_skip_mask = primitive_attr_t::skip_mask_t::post_ops;
58
59	bool ok = set_default_alg_kind(alg_kind::convolution_direct)
60	&& utils::one_of(this->desc()->prop_kind, forward_training,
61	forward_inference)
62	&& this->desc()->alg_kind == alg_kind::convolution_direct
63	&& utils::one_of(true,
64	expect_data_types(f32, f32, f32, f32, f32),
65	expect_data_types(f32, f32, f32, s8, f32),
66	expect_data_types(f16, f16, f16, s8, f32),
67	expect_data_types(f16, f16, f16, f16, f32))
68	&& compute_engine->mayiuse(
69	compute::device_ext_t::intel_subgroups)
70	&& IMPLICATION(src_data_t == f16,
71	true
72	&& compute_engine->mayiuse(
73	compute::device_ext_t::khr_fp16)
74	&& compute_engine->mayiuse(
75	compute::device_ext_t::
76	intel_subgroups_short))
77	&& !has_zero_dim_memory()
78	&& attr()->has_default_values(attr_skip_mask, dst_data_t)
79	&& post_ops_with_binary_ok(attr(), dst_md()->data_type);
80	if (!ok) return status::unimplemented;
81
82	CHECK(init_conf(engine));
83
84	if (!compute_engine->mayiuse_sub_group(conf.sub_group_size))
85	return status::unimplemented;
86
87	ok = set_default_formats_common(
88	conf.src_tag, conf.wei_tag, conf.dst_tag);
89	if (!ok) return status::unimplemented;
90
91	CHECK(attr_.set_default_formats(dst_md(`0`)));
92
93	return status::success;
94	}
95
96	status_t init_conf(engine_t *engine);
97	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
98
99	conv_conf_t conf;
100	};
101
102	status_t init(engine_t *engine) override {
103	const char kernel_name = nullptr*;
104
105	if (pd()->conf.is_nhwc
106	&& utils::one_of(pd()->conf.src_data_type, data_type::f32,
107	data_type::f16)) {
108	kernel_name = "gen9_conv_nhwc_fwd";
109
110	} else if (pd()->conf.is_depthwise) {
111	kernel_name = "gen9_conv_dw_fwd";
112	} else if (utils::one_of(pd()->desc()->src_desc.data_type,
113	data_type::f16, data_type::f32)) {
114	kernel_name = "gen9_conv_fwd";
115	} else {
116	assert(!"not expected");
117	}
118
119	compute::kernel_ctx_t kernel_ctx;
120	status_t status = pd()->init_kernel_ctx(kernel_ctx);
121	if (status != status::success) return status;
122
123	create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
124	if (!kernel_) return status::runtime_error;
125
126	return status::success;
127	}
128
129	status_t execute(const exec_ctx_t &ctx) const override {
130	return execute_forward(ctx);
131	}
132
133	private:
134	status_t execute_forward(const exec_ctx_t &ctx) const;
135	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
136	compute::kernel_t kernel_;
137	};
138
139	struct gen9_convolution_bwd_data_t : public gpu_primitive_t {
140	using gpu_primitive_t::gpu_primitive_t;
141	struct pd_t : public gpu_convolution_bwd_data_pd_t {
142	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
143	const convolution_fwd_pd_t *hint_fwd_pd)
144	: gpu_convolution_bwd_data_pd_t (adesc, attr, hint_fwd_pd) {}
145
146	DECLARE_COMMON_PD_T("ocl:ncsp:any", gen9_convolution_bwd_data_t);
147
148	status_t init(engine_t *engine) {
149	using namespace data_type;
150	using namespace prop_kind;
151	assert(engine->kind() == engine_kind::gpu);
152	auto *compute_engine
153	= utils::downcast<compute::compute_engine_t *>(engine);
154
155	bool ok = set_default_alg_kind(alg_kind::convolution_direct)
156	&& this->desc()->prop_kind == backward_data
157	&& this->desc()->alg_kind == alg_kind::convolution_direct
158	&& utils::one_of(true,
159	expect_data_types(
160	f32, f32, data_type::undef, f32, f32),
161	expect_data_types(f16, f16, data_type::undef, f16,
162	data_type::undef))
163	&& IMPLICATION(this->with_bias()
164	&& this->desc()->diff_dst_desc.data_type
165	!= f16,
166	this->desc()->bias_desc.data_type == f32)
167	&& IMPLICATION(this->with_bias()
168	&& this->desc()->diff_dst_desc.data_type
169	== f16,
170	this->desc()->bias_desc.data_type == f16)
171	&& compute_engine->mayiuse(
172	compute::device_ext_t::intel_subgroups)
173	&& !has_zero_dim_memory() && attr()->has_default_values();
174	if (!ok) return status::unimplemented;
175
176	CHECK(init_conf(engine));
177
178	if (!compute_engine->mayiuse_sub_group(conf.sub_group_size))
179	return status::unimplemented;
180
181	ok = set_default_formats_common(
182	conf.src_tag, conf.wei_tag, conf.dst_tag);
183	return ok ? status::success : status::unimplemented;
184	}
185
186	status_t init_conf(engine_t *engine);
187	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
188
189	conv_conf_t conf;
190	};
191
192	status_t init(engine_t *engine) override {
193	const char kernel_name = nullptr*;
194	if (pd()->conf.is_depthwise) {
195	kernel_name = "gen9_conv_dw_bwd_data";
196	} else {
197	if (pd()->conf.is_nhwc)
198	kernel_name = "gen9_conv_nhwc_bwd_data";
199	else
200	kernel_name = "gen9_conv_bwd_data";
201	}
202
203	compute::kernel_ctx_t kernel_ctx;
204	status_t status = pd()->init_kernel_ctx(kernel_ctx);
205	if (status != status::success) return status;
206
207	create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
208	if (!kernel_) return status::runtime_error;
209
210	return status::success;
211	}
212
213	status_t execute(const exec_ctx_t &ctx) const override {
214	return execute_backward_data(ctx);
215	}
216
217	private:
218	status_t execute_backward_data(const exec_ctx_t &ctx) const;
219	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
220	compute::kernel_t kernel_;
221	};
222
223	struct gen9_convolution_bwd_weights_t : public gpu_primitive_t {
224	using gpu_primitive_t::gpu_primitive_t;
225	struct pd_t : public gpu_convolution_bwd_weights_pd_t {
226	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
227	const convolution_fwd_pd_t *hint_fwd_pd)
228	: gpu_convolution_bwd_weights_pd_t (adesc, attr, hint_fwd_pd) {}
229
230	pd_t(const pd_t &rhs) = default;
231
232	DECLARE_COMMON_PD_T("ocl:ncsp:any", gen9_convolution_bwd_weights_t);
233
234	status_t init(engine_t *engine) {
235	using namespace data_type;
236	using namespace prop_kind;
237	assert(engine->kind() == engine_kind::gpu);
238	auto *compute_engine
239	= utils::downcast<compute::compute_engine_t *>(engine);
240
241	bool ok = set_default_alg_kind(alg_kind::convolution_direct)
242	&& this->desc()->prop_kind == backward_weights
243	&& this->desc()->alg_kind == alg_kind::convolution_direct
244	&& utils::one_of(this->desc()->diff_weights_desc.data_type,
245	f32, bf16)
246	&& utils::one_of(
247	this->desc()->src_desc.data_type, f32, bf16)
248	&& utils::one_of(
249	this->desc()->diff_dst_desc.data_type, f32, bf16)
250	&& compute_engine->mayiuse(
251	compute::device_ext_t::intel_subgroups)
252	&& compute_engine->mayiuse(
253	compute::device_ext_t::khr_int64_base_atomics)
254	&& !has_zero_dim_memory() && attr()->has_default_values();
255	if (!ok) return status::unimplemented;
256
257	CHECK(init_conf(engine));
258	if (!compute_engine->mayiuse_sub_group(conf.sub_group_size))
259	return status::unimplemented;
260
261	if (!IMPLICATION(utils::one_of(bf16,
262	this->desc()->diff_weights_desc.data_type,
263	this->desc()->src_desc.data_type,
264	this->desc()->diff_dst_desc.data_type),
265	conf.ver == ver_1stconv))
266	return status::unimplemented;
267
268	init_scratchpad();
269	return status::success;
270	}
271
272	status_t init_conf(engine_t *engine);
273	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
274
275	conv_conf_t conf;
276	std::shared_ptr<primitive_desc_t> rpd_wei_;
277	std::shared_ptr<primitive_desc_t> rpd_bia_;
278
279	private:
280	status_t init_scratchpad();
281	};
282
283	status_t init(engine_t *engine) override {
284	const char *kernel_name;
285	if (pd()->conf.is_nhwc) {
286	kernel_name = "gen9_conv_nhwc_bwd_weights";
287	} else {
288	kernel_name = "gen9_conv_bwd_weights";
289	}
290	if (pd()->conf.reorder_wei) {
291	CHECK(create_nested_primitive(
292	wei_reorder_, pd()->rpd_wei_, engine));
293	}
294	if (pd()->conf.reorder_bias) {
295	CHECK(create_nested_primitive(
296	bia_reorder_, pd()->rpd_bia_, engine));
297	}
298	compute::kernel_ctx_t kernel_ctx;
299	status_t status = pd()->init_kernel_ctx(kernel_ctx);
300	if (status != status::success) return status;
301
302	create_kernel(engine, &kernel_, kernel_name, kernel_ctx);
303	if (!kernel_) return status::runtime_error;
304	return status::success;
305	}
306
307	status_t execute(const exec_ctx_t &ctx) const override {
308	return execute_backward_weights(ctx);
309	}
310
311	private:
312	status_t execute_backward_weights(const exec_ctx_t &ctx) const;
313	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
314	compute::kernel_t kernel_;
315	std::shared_ptr<primitive_t> wei_reorder_;
316	std::shared_ptr<primitive_t> bia_reorder_;
317	};
318
319	} // namespace ocl
320	} // namespace gpu
321	} // namespace impl
322	} // namespace dnnl
323
324	#endif
325
326	// vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s
327

Browse the source code of oneDNN/src/gpu/ocl/gen9_convolution.hpp