1 | /******************************************************************************* |
2 | * Copyright 2021-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_JIT_CONV_CONV_KERNEL_HPP |
18 | #define GPU_JIT_CONV_CONV_KERNEL_HPP |
19 | |
20 | #include "common/cpp_compat.hpp" |
21 | |
22 | #include "gpu/jit/codegen/codegen.hpp" |
23 | #include "gpu/jit/codegen/kernel.hpp" |
24 | #include "gpu/jit/ir/ir.hpp" |
25 | #include "gpu/jit/ir/kernel_info.hpp" |
26 | #include "gpu/jit/ir/message.hpp" |
27 | #include "gpu/jit/ir/reduce.hpp" |
28 | #include "gpu/jit/ir/reorder.hpp" |
29 | |
30 | #include "gpu/jit/conv/config.hpp" |
31 | #include "gpu/jit/conv/grf_usage.hpp" |
32 | #include "gpu/jit/conv/ir_builder.hpp" |
33 | |
34 | namespace dnnl { |
35 | namespace impl { |
36 | namespace gpu { |
37 | namespace jit { |
38 | |
39 | template <ngen::HW hw> |
40 | class conv_kernel_t : public ir_kernel_t<hw> { |
41 | public: |
42 | IR_KERNEL_FORWARD(hw) |
43 | |
44 | conv_kernel_t(const conv_config_t &cfg, const kernel_info_t &kernel_info, |
45 | grf_mode_t grf_mode = grf_mode_t::any); |
46 | |
47 | private: |
48 | const conv_problem_t &prb_; |
49 | const conv_config_t &cfg_; |
50 | }; |
51 | |
52 | template <ngen::HW hw> |
53 | conv_kernel_t<hw>::conv_kernel_t(const conv_config_t &cfg, |
54 | const kernel_info_t &kernel_info, grf_mode_t grf_mode) |
55 | : ir_kernel_t<hw>("gen_conv" , cfg.exec_cfg(), kernel_info, |
56 | utils::one_of(cfg.fma_kind(), fma_kind_t::dpas, fma_kind_t::dpasw), |
57 | grf_mode) |
58 | , prb_(cfg.prb()) |
59 | , cfg_(cfg) { |
60 | |
61 | // XXX: BWD_W does 32x32 multiplication in the inner loop which may cause |
62 | // hangs when using with split barrier. Switch to emulation to work around |
63 | // the issue. |
64 | if (prb_.is_bwd_w && hw < ngen::HW::XeHPC) emu_strategy.emulate64 = true; |
65 | |
66 | ir_utils::debug_profiler_t profile("Conv Kernel Construction Profile" ); |
67 | // Build IR for the kernel. |
68 | conv_ir_builder_t builder(cfg, kernel_info); |
69 | stmt_t body = builder.stmt(); |
70 | profile.stamp("Kernel Builder" ); |
71 | |
72 | alloc_manager_t alloc_mgr(body); |
73 | profile.stamp("Alloc_Mgr Construct" ); |
74 | |
75 | setup_interface(body); |
76 | profile.stamp("Setup Interface" ); |
77 | |
78 | this->require_signal_header_ = true; |
79 | generate_prologue(); |
80 | |
81 | profile.stamp("Prologue" ); |
82 | |
83 | // Bind "external" variables. |
84 | expr_binding_t expr_binding(hw); |
85 | bind_external_vars( |
86 | body, cfg_.kernel_grid(), builder.local_id(), expr_binding); |
87 | profile.stamp("Bind Variables" ); |
88 | |
89 | #ifdef GEN_CONV_DEBUG |
90 | profile.stop(); |
91 | verify_grf_usage(cfg, body, ra_.get_grf_usage()); |
92 | profile.start(); |
93 | #endif |
94 | |
95 | // Generate assembly from IR. |
96 | ir_to_ngen_t<hw> visitor(this, expr_binding); |
97 | visitor.visit(body); |
98 | profile.stamp("Generate Assembly" ); |
99 | |
100 | generate_epilogue(); |
101 | profile.stop("Epilogue" ); |
102 | |
103 | #ifdef GEN_CONV_PROFILE |
104 | ir_perf_no_trace() << profile << "\n" ; |
105 | #endif |
106 | #ifdef GEN_CONV_DEBUG |
107 | ir_trace() << "Actual register usage: " |
108 | << ra_.get_peak_grf_usage() << std::endl; |
109 | int estimated_peak_grf_usage = estimate_register_count(cfg_); |
110 | if (ra_.get_peak_grf_usage() > estimated_peak_grf_usage) { |
111 | ir_warning() |
112 | << "conv_kernel_t register usage underestimated: estimate = " |
113 | << estimated_peak_grf_usage |
114 | << ", actual = " << ra_.get_peak_grf_usage() << "\n" ; |
115 | } |
116 | #endif |
117 | } |
118 | |
119 | } // namespace jit |
120 | } // namespace gpu |
121 | } // namespace impl |
122 | } // namespace dnnl |
123 | |
124 | #endif |
125 | |