1/*******************************************************************************
2* Copyright 2021-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_JIT_CONV_CONV_KERNEL_HPP
18#define GPU_JIT_CONV_CONV_KERNEL_HPP
19
20#include "common/cpp_compat.hpp"
21
22#include "gpu/jit/codegen/codegen.hpp"
23#include "gpu/jit/codegen/kernel.hpp"
24#include "gpu/jit/ir/ir.hpp"
25#include "gpu/jit/ir/kernel_info.hpp"
26#include "gpu/jit/ir/message.hpp"
27#include "gpu/jit/ir/reduce.hpp"
28#include "gpu/jit/ir/reorder.hpp"
29
30#include "gpu/jit/conv/config.hpp"
31#include "gpu/jit/conv/grf_usage.hpp"
32#include "gpu/jit/conv/ir_builder.hpp"
33
34namespace dnnl {
35namespace impl {
36namespace gpu {
37namespace jit {
38
39template <ngen::HW hw>
40class conv_kernel_t : public ir_kernel_t<hw> {
41public:
42 IR_KERNEL_FORWARD(hw)
43
44 conv_kernel_t(const conv_config_t &cfg, const kernel_info_t &kernel_info,
45 grf_mode_t grf_mode = grf_mode_t::any);
46
47private:
48 const conv_problem_t &prb_;
49 const conv_config_t &cfg_;
50};
51
52template <ngen::HW hw>
53conv_kernel_t<hw>::conv_kernel_t(const conv_config_t &cfg,
54 const kernel_info_t &kernel_info, grf_mode_t grf_mode)
55 : ir_kernel_t<hw>("gen_conv", cfg.exec_cfg(), kernel_info,
56 utils::one_of(cfg.fma_kind(), fma_kind_t::dpas, fma_kind_t::dpasw),
57 grf_mode)
58 , prb_(cfg.prb())
59 , cfg_(cfg) {
60
61 // XXX: BWD_W does 32x32 multiplication in the inner loop which may cause
62 // hangs when using with split barrier. Switch to emulation to work around
63 // the issue.
64 if (prb_.is_bwd_w && hw < ngen::HW::XeHPC) emu_strategy.emulate64 = true;
65
66 ir_utils::debug_profiler_t profile("Conv Kernel Construction Profile");
67 // Build IR for the kernel.
68 conv_ir_builder_t builder(cfg, kernel_info);
69 stmt_t body = builder.stmt();
70 profile.stamp("Kernel Builder");
71
72 alloc_manager_t alloc_mgr(body);
73 profile.stamp("Alloc_Mgr Construct");
74
75 setup_interface(body);
76 profile.stamp("Setup Interface");
77
78 this->require_signal_header_ = true;
79 generate_prologue();
80
81 profile.stamp("Prologue");
82
83 // Bind "external" variables.
84 expr_binding_t expr_binding(hw);
85 bind_external_vars(
86 body, cfg_.kernel_grid(), builder.local_id(), expr_binding);
87 profile.stamp("Bind Variables");
88
89#ifdef GEN_CONV_DEBUG
90 profile.stop();
91 verify_grf_usage(cfg, body, ra_.get_grf_usage());
92 profile.start();
93#endif
94
95 // Generate assembly from IR.
96 ir_to_ngen_t<hw> visitor(this, expr_binding);
97 visitor.visit(body);
98 profile.stamp("Generate Assembly");
99
100 generate_epilogue();
101 profile.stop("Epilogue");
102
103#ifdef GEN_CONV_PROFILE
104 ir_perf_no_trace() << profile << "\n";
105#endif
106#ifdef GEN_CONV_DEBUG
107 ir_trace() << "Actual register usage: "
108 << ra_.get_peak_grf_usage() << std::endl;
109 int estimated_peak_grf_usage = estimate_register_count(cfg_);
110 if (ra_.get_peak_grf_usage() > estimated_peak_grf_usage) {
111 ir_warning()
112 << "conv_kernel_t register usage underestimated: estimate = "
113 << estimated_peak_grf_usage
114 << ", actual = " << ra_.get_peak_grf_usage() << "\n";
115 }
116#endif
117}
118
119} // namespace jit
120} // namespace gpu
121} // namespace impl
122} // namespace dnnl
123
124#endif
125