1 | /******************************************************************************* |
2 | * Copyright 2021-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_JIT_REORDER_IR_BUILDER_HPP |
18 | #define GPU_JIT_REORDER_IR_BUILDER_HPP |
19 | |
20 | #include <array> |
21 | |
22 | #include "common/convolution_pd.hpp" |
23 | #include "gpu/jit/ir/gemm_schedule.hpp" |
24 | #include "gpu/jit/ir/ir.hpp" |
25 | #include "gpu/jit/ir/ir_builder.hpp" |
26 | #include "gpu/jit/ir/kernel_info.hpp" |
27 | #include "gpu/jit/ir/tensor.hpp" |
28 | |
29 | namespace dnnl { |
30 | namespace impl { |
31 | namespace gpu { |
32 | namespace jit { |
33 | |
34 | class reorder_ir_builder_t : public ir_builder_t { |
35 | public: |
36 | reorder_ir_builder_t(const exec_config_t &exec_cfg, |
37 | const kernel_info_t &kernel_info, const layout_t &src_layout, |
38 | const layout_t &dst_layout) |
39 | : ir_builder_t(kernel_info) |
40 | , exec_cfg_(exec_cfg) |
41 | , src_layout_(src_layout) |
42 | , dst_layout_(dst_layout) { |
43 | build(); |
44 | } |
45 | |
46 | const grid_info_t &kernel_grid() const { return kernel_grid_; } |
47 | |
48 | static void compute_blocks(const exec_config_t &exec_cfg, |
49 | const layout_t &src, const layout_t &dst, |
50 | std::vector<int> &iter_blocks, std::vector<int> &loop_blocks, |
51 | std::vector<int> &tg_blocks, int max_iter_tile_bytes = 0, |
52 | int max_thr_tile_bytes = 0); |
53 | |
54 | static void compute_blocks(const exec_config_t &exec_cfg, |
55 | const layout_t &src, const layout_t &dst, |
56 | std::vector<int> &tile_blocks, std::vector<int> &tg_blocks); |
57 | |
58 | static void compute_grid(const layout_t &src, const layout_t &dst, |
59 | const std::vector<int> &iter_blocks, |
60 | const std::vector<int> &loop_blocks, |
61 | const std::vector<int> &tg_blocks, grid_info_t &kernel_grid, |
62 | grid_info_t &tg_grid, std::vector<int> *dim2grid = nullptr); |
63 | |
64 | static compute::nd_range_t nd_range(const exec_config_t &exec_cfg, |
65 | const layout_t &src, const layout_t &dst); |
66 | |
67 | private: |
68 | void build() override; |
69 | bool try_build(const std::vector<int> &iter_blocks, |
70 | const std::vector<int> &loop_blocks, |
71 | const std::vector<int> &tg_blocks); |
72 | |
73 | static int max_tile_size(const hw_config_t &hw_cfg, const layout_t &dst, |
74 | const layout_t &src) { |
75 | // XeHPC is fine with 2048 bytes, XeHPG and below can fit 2048 bytes if |
76 | // reorder is a simple copy. |
77 | return (hw_cfg.hw() <= ngen::HW::XeHPG && dst != src) ? 1024 : 2048; |
78 | } |
79 | |
80 | exec_config_t exec_cfg_; |
81 | grid_info_t kernel_grid_; |
82 | grid_info_t tg_grid_; |
83 | layout_t src_layout_; |
84 | layout_t dst_layout_; |
85 | }; |
86 | |
87 | } // namespace jit |
88 | } // namespace gpu |
89 | } // namespace impl |
90 | } // namespace dnnl |
91 | |
92 | #endif |
93 | |