conv_kernel.hpp source code [oneDNN/src/gpu/jit/conv/conv_kernel.hpp]

1	/*******************************************************************************
2	* Copyright 2021-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_JIT_CONV_CONV_KERNEL_HPP
18	#define GPU_JIT_CONV_CONV_KERNEL_HPP
19
20	#include "common/cpp_compat.hpp"
21
22	#include "gpu/jit/codegen/codegen.hpp"
23	#include "gpu/jit/codegen/kernel.hpp"
24	#include "gpu/jit/ir/ir.hpp"
25	#include "gpu/jit/ir/kernel_info.hpp"
26	#include "gpu/jit/ir/message.hpp"
27	#include "gpu/jit/ir/reduce.hpp"
28	#include "gpu/jit/ir/reorder.hpp"
29
30	#include "gpu/jit/conv/config.hpp"
31	#include "gpu/jit/conv/grf_usage.hpp"
32	#include "gpu/jit/conv/ir_builder.hpp"
33
34	namespace dnnl {
35	namespace impl {
36	namespace gpu {
37	namespace jit {
38
39	template <ngen::HW hw>
40	class conv_kernel_t : public ir_kernel_t<hw> {
41	public:
42	IR_KERNEL_FORWARD(hw)
43
44	conv_kernel_t(const conv_config_t &cfg, const kernel_info_t &kernel_info,
45	grf_mode_t grf_mode = grf_mode_t::any);
46
47	private:
48	const conv_problem_t &prb_;
49	const conv_config_t &cfg_;
50	};
51
52	template <ngen::HW hw>
53	conv_kernel_t<hw>::conv_kernel_t(const conv_config_t &cfg,
54	const kernel_info_t &kernel_info, grf_mode_t grf_mode)
55	: ir_kernel_t<hw>("gen_conv", cfg.exec_cfg(), kernel_info,
56	utils::one_of(cfg.fma_kind(), fma_kind_t::dpas, fma_kind_t::dpasw),
57	grf_mode)
58	, prb_(cfg.prb())
59	, cfg_(cfg) {
60
61	// XXX: BWD_W does 32x32 multiplication in the inner loop which may cause
62	// hangs when using with split barrier. Switch to emulation to work around
63	// the issue.
64	if (prb_.is_bwd_w && hw < ngen::HW::XeHPC) emu_strategy.emulate64 = true;
65
66	ir_utils::debug_profiler_t profile("Conv Kernel Construction Profile");
67	// Build IR for the kernel.
68	conv_ir_builder_t builder(cfg, kernel_info);
69	stmt_t body = builder.stmt();
70	profile.stamp("Kernel Builder");
71
72	alloc_manager_t alloc_mgr(body);
73	profile.stamp("Alloc_Mgr Construct");
74
75	setup_interface(body);
76	profile.stamp("Setup Interface");
77
78	this->require_signal_header_ = true;
79	generate_prologue();
80
81	profile.stamp("Prologue");
82
83	// Bind "external" variables.
84	expr_binding_t expr_binding(hw);
85	bind_external_vars(
86	body, cfg_.kernel_grid(), builder.local_id(), expr_binding);
87	profile.stamp("Bind Variables");
88
89	#ifdef GEN_CONV_DEBUG
90	profile.stop();
91	verify_grf_usage(cfg, body, ra_.get_grf_usage());
92	profile.start();
93	#endif
94
95	// Generate assembly from IR.
96	ir_to_ngen_t<hw> visitor(this, expr_binding);
97	visitor.visit(body);
98	profile.stamp("Generate Assembly");
99
100	generate_epilogue();
101	profile.stop("Epilogue");
102
103	#ifdef GEN_CONV_PROFILE
104	ir_perf_no_trace() << profile << "\n";
105	#endif
106	#ifdef GEN_CONV_DEBUG
107	ir_trace() << "Actual register usage: "
108	<< ra_.get_peak_grf_usage() << std::endl;
109	int estimated_peak_grf_usage = estimate_register_count(cfg_);
110	if (ra_.get_peak_grf_usage() > estimated_peak_grf_usage) {
111	ir_warning()
112	<< "conv_kernel_t register usage underestimated: estimate = "
113	<< estimated_peak_grf_usage
114	<< ", actual = " << ra_.get_peak_grf_usage() << "\n";
115	}
116	#endif
117	}
118
119	} // namespace jit
120	} // namespace gpu
121	} // namespace impl
122	} // namespace dnnl
123
124	#endif
125

Browse the source code of oneDNN/src/gpu/jit/conv/conv_kernel.hpp