1/*******************************************************************************
2* Copyright 2021-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_JIT_REORDER_IR_BUILDER_HPP
18#define GPU_JIT_REORDER_IR_BUILDER_HPP
19
20#include <array>
21
22#include "common/convolution_pd.hpp"
23#include "gpu/jit/ir/gemm_schedule.hpp"
24#include "gpu/jit/ir/ir.hpp"
25#include "gpu/jit/ir/ir_builder.hpp"
26#include "gpu/jit/ir/kernel_info.hpp"
27#include "gpu/jit/ir/tensor.hpp"
28
29namespace dnnl {
30namespace impl {
31namespace gpu {
32namespace jit {
33
34class reorder_ir_builder_t : public ir_builder_t {
35public:
36 reorder_ir_builder_t(const exec_config_t &exec_cfg,
37 const kernel_info_t &kernel_info, const layout_t &src_layout,
38 const layout_t &dst_layout)
39 : ir_builder_t(kernel_info)
40 , exec_cfg_(exec_cfg)
41 , src_layout_(src_layout)
42 , dst_layout_(dst_layout) {
43 build();
44 }
45
46 const grid_info_t &kernel_grid() const { return kernel_grid_; }
47
48 static void compute_blocks(const exec_config_t &exec_cfg,
49 const layout_t &src, const layout_t &dst,
50 std::vector<int> &iter_blocks, std::vector<int> &loop_blocks,
51 std::vector<int> &tg_blocks, int max_iter_tile_bytes = 0,
52 int max_thr_tile_bytes = 0);
53
54 static void compute_blocks(const exec_config_t &exec_cfg,
55 const layout_t &src, const layout_t &dst,
56 std::vector<int> &tile_blocks, std::vector<int> &tg_blocks);
57
58 static void compute_grid(const layout_t &src, const layout_t &dst,
59 const std::vector<int> &iter_blocks,
60 const std::vector<int> &loop_blocks,
61 const std::vector<int> &tg_blocks, grid_info_t &kernel_grid,
62 grid_info_t &tg_grid, std::vector<int> *dim2grid = nullptr);
63
64 static compute::nd_range_t nd_range(const exec_config_t &exec_cfg,
65 const layout_t &src, const layout_t &dst);
66
67private:
68 void build() override;
69 bool try_build(const std::vector<int> &iter_blocks,
70 const std::vector<int> &loop_blocks,
71 const std::vector<int> &tg_blocks);
72
73 static int max_tile_size(const hw_config_t &hw_cfg, const layout_t &dst,
74 const layout_t &src) {
75 // XeHPC is fine with 2048 bytes, XeHPG and below can fit 2048 bytes if
76 // reorder is a simple copy.
77 return (hw_cfg.hw() <= ngen::HW::XeHPG && dst != src) ? 1024 : 2048;
78 }
79
80 exec_config_t exec_cfg_;
81 grid_info_t kernel_grid_;
82 grid_info_t tg_grid_;
83 layout_t src_layout_;
84 layout_t dst_layout_;
85};
86
87} // namespace jit
88} // namespace gpu
89} // namespace impl
90} // namespace dnnl
91
92#endif
93