1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_JIT_CONV_SLM_REDUCE_BUILDER_HPP |
18 | #define GPU_JIT_CONV_SLM_REDUCE_BUILDER_HPP |
19 | |
20 | #include "gpu/jit/ir/hw_config.hpp" |
21 | #include "gpu/jit/ir/ir.hpp" |
22 | #include "gpu/jit/ir/tensor.hpp" |
23 | #include "gpu/jit/ngen/ngen.hpp" |
24 | |
25 | #include <vector> |
26 | |
27 | namespace dnnl { |
28 | namespace impl { |
29 | namespace gpu { |
30 | namespace jit { |
31 | |
32 | class slm_reduce_builder_t { |
33 | public: |
34 | slm_reduce_builder_t() = default; |
35 | |
36 | slm_reduce_builder_t(ir_context_t &ir_ctx, const grid_info_t &tg_grid, |
37 | const expr_t ®_buf, const layout_t ®_layout, |
38 | const tensor_t &thr_tile, int dim = 2); |
39 | |
40 | bool is_empty() const { return reg_buf_.is_empty(); } |
41 | |
42 | const layout_t ®_layout() const { return reg_layout_; } |
43 | |
44 | const tensor_t &thr_tile() const { return thr_tile_; } |
45 | |
46 | const stmt_t &store_stmt() const { return store_stmt_; } |
47 | |
48 | const stmt_t &load_stmt() const { return load_stmt_; } |
49 | |
50 | const std::vector<stmt_t> &allocs() const { return allocs_; } |
51 | |
52 | const expr_t &reduce_cond() const { return reduce_cond_; } |
53 | |
54 | stmt_t stmt() const { |
55 | stmt_t ret; |
56 | ret = ret.append(funcs::barrier()); |
57 | ret = ret.append(store_stmt_); |
58 | ret = ret.append(funcs::barrier()); |
59 | ret = ret.append(load_stmt_); |
60 | ret = inject_alloc_stmts(ret, allocs_); |
61 | return ret; |
62 | } |
63 | |
64 | private: |
65 | void build(); |
66 | |
67 | uint32_t reduction_mask() const { |
68 | uint32_t mask = 0xFFFFFFFF; |
69 | for (int i = 0; i < tg_ndims_; i++) { |
70 | int k_dim_idx = reg_layout_.ndims() + i; |
71 | mask &= ~(1 << k_dim_idx); |
72 | } |
73 | return mask; |
74 | } |
75 | |
76 | ir_context_t *ir_ctx_ = nullptr; |
77 | grid_info_t tg_grid_; |
78 | |
79 | expr_t reg_buf_; |
80 | layout_t reg_layout_; |
81 | tensor_t thr_tile_; |
82 | |
83 | int dim_; |
84 | |
85 | expr_t tmp_reg_buf_; |
86 | int tmp_reg_buf_size_ = 0; |
87 | |
88 | expr_t slm_buf_; |
89 | int slm_buf_size_ = 0; |
90 | |
91 | int tg_ndims_; |
92 | |
93 | stmt_t store_stmt_; |
94 | stmt_t load_stmt_; |
95 | expr_t reduce_cond_; |
96 | |
97 | std::vector<stmt_t> allocs_; |
98 | }; |
99 | |
100 | } // namespace jit |
101 | } // namespace gpu |
102 | } // namespace impl |
103 | } // namespace dnnl |
104 | |
105 | #endif |
106 | |