gen9_wino_convolution.hpp source code [oneDNN/src/gpu/ocl/gen9_wino_convolution.hpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_OCL_GEN9_WINO_CONVOLUTION_HPP
18	#define GPU_OCL_GEN9_WINO_CONVOLUTION_HPP
19
20	#include <assert.h>
21
22	#include "common/c_types_map.hpp"
23	#include "common/primitive.hpp"
24	#include "gpu/compute/compute.hpp"
25	#include "gpu/gpu_convolution_pd.hpp"
26	#include "gpu/gpu_eltwise_pd.hpp"
27	#include "gpu/gpu_primitive.hpp"
28	#include "gpu/gpu_resource.hpp"
29	#include "gpu/ocl/ocl_stream.hpp"
30	#include "gpu/ocl/ocl_utils.hpp"
31	#include "gpu/primitive_conf.hpp"
32
33	namespace dnnl {
34	namespace impl {
35	namespace gpu {
36	namespace ocl {
37
38	struct gen9_wino_convolution_fwd_t : public gpu_primitive_t {
39	using gpu_primitive_t::gpu_primitive_t;
40	struct pd_t : public gpu_convolution_fwd_pd_t {
41	pd_t(const convolution_desc_t adesc, const* primitive_attr_t *attr,
42	const convolution_fwd_pd_t *hint_fwd_pd)
43	: gpu_convolution_fwd_pd_t (adesc, attr, hint_fwd_pd) {}
44
45	DECLARE_COMMON_PD_T("ocl:gen9:wino", gen9_wino_convolution_fwd_t);
46
47	status_t init(engine_t *engine) {
48	using namespace prop_kind;
49	using namespace data_type;
50	assert(engine->kind() == engine_kind::gpu);
51	auto *compute_engine
52	= utils::downcast<compute::compute_engine_t *>(engine);
53
54	auto src_data_t = this->desc()->src_desc.data_type;
55	auto dst_data_t = this->desc()->dst_desc.data_type;
56
57	const auto attr_skip_mask = primitive_attr_t::skip_mask_t::post_ops;
58
59	bool ok = utils::one_of(this->desc()->prop_kind, forward_training,
60	forward_inference)
61	&& (this->desc()->alg_kind == alg_kind::convolution_winograd
62	\|\| this->desc()->alg_kind
63	== alg_kind::convolution_auto)
64	&& utils::one_of(true,
65	expect_data_types(f32, f32, f32, f32, f32),
66	expect_data_types(f16, f16, f16, f16, f32))
67	&& compute_engine->mayiuse(
68	compute::device_ext_t::intel_subgroups)
69	&& IMPLICATION(src_data_t == f16,
70	true
71	&& compute_engine->mayiuse(
72	compute::device_ext_t::khr_fp16)
73	&& compute_engine->mayiuse(
74	compute::device_ext_t::
75	intel_subgroups_short))
76	&& !has_zero_dim_memory()
77	&& attr()->has_default_values(attr_skip_mask, dst_data_t)
78	&& post_ops_with_binary_ok(attr(), dst_data_t);
79	if (!ok) return status::unimplemented;
80
81	CHECK(init_conf(compute_engine));
82
83	int sub_group_size = conf.wino_ic_block / `2`; // LWX
84	if (!compute_engine->mayiuse_sub_group(sub_group_size))
85	return status::unimplemented;
86
87	init_scratchpad();
88
89	ok = set_default_formats_common(
90	conf.src_tag, conf.wei_tag, conf.dst_tag);
91	if (!ok) return status::unimplemented;
92
93	CHECK(attr_.set_default_formats(dst_md(`0`)));
94
95	return status::success;
96	}
97
98	status_t init_conf(compute::compute_engine_t *engine);
99	void init_scratchpad();
100	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
101
102	conv_conf_t conf;
103	};
104
105	status_t init(engine_t *engine) override {
106	bool is_fused = pd()->conf.is_fused;
107	bool is_nonfused_2x3 = pd()->conf.wino_m == `2` && !is_fused;
108
109	std::vector<const char *> kernel_names;
110	if (is_fused) {
111	kernel_names.push_back("gen9_wino_conv_fwd");
112	kernel_names.push_back("gen9_wino_wei_transform");
113	} else if (is_nonfused_2x3) {
114	kernel_names.push_back("gen9_wino_conv_fwd_2x3");
115	kernel_names.push_back("gen9_wino_wei_transform_2x3");
116	kernel_names.push_back("gen9_wino_src_transform_2x3");
117	kernel_names.push_back("gen9_wino_dst_transform_2x3");
118	} else {
119	assert(!"Invalid Winograd version chosen by init_conf");
120	return status::unimplemented;
121	}
122
123	compute::kernel_ctx_t kernel_ctx;
124	status_t status = pd()->init_kernel_ctx(kernel_ctx);
125	if (status != status::success) return status;
126
127	std::vector<compute::kernel_t> kernels;
128	CHECK(create_kernels(engine, &kernels, kernel_names, kernel_ctx));
129	kernel_ = kernels[`0`];
130	wei_trans_kernel_ = kernels[`1`];
131	if (!kernel_ \|\| !wei_trans_kernel_) return status::runtime_error;
132	if (!is_fused) {
133	src_trans_kernel_ = kernels[`2`];
134	dst_trans_kernel_ = kernels[`3`];
135	if (!src_trans_kernel_ \|\| !dst_trans_kernel_)
136	return status::runtime_error;
137	}
138
139	return status::success;
140	}
141
142	status_t execute(const exec_ctx_t &ctx) const override {
143	return execute_forward(ctx);
144	}
145
146	private:
147	status_t execute_forward(const exec_ctx_t &ctx) const;
148	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
149	compute::kernel_t kernel_;
150	compute::kernel_t wei_trans_kernel_;
151	compute::kernel_t src_trans_kernel_;
152	compute::kernel_t dst_trans_kernel_;
153	};
154
155	} // namespace ocl
156	} // namespace gpu
157	} // namespace impl
158	} // namespace dnnl
159
160	#endif
161
162	// vim: et ts=4 sw=4 cindent cino+=l0,\:4,N-s
163

Browse the source code of oneDNN/src/gpu/ocl/gen9_wino_convolution.hpp