1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_JIT_CONV_SLM_REDUCE_BUILDER_HPP
18#define GPU_JIT_CONV_SLM_REDUCE_BUILDER_HPP
19
20#include "gpu/jit/ir/hw_config.hpp"
21#include "gpu/jit/ir/ir.hpp"
22#include "gpu/jit/ir/tensor.hpp"
23#include "gpu/jit/ngen/ngen.hpp"
24
25#include <vector>
26
27namespace dnnl {
28namespace impl {
29namespace gpu {
30namespace jit {
31
32class slm_reduce_builder_t {
33public:
34 slm_reduce_builder_t() = default;
35
36 slm_reduce_builder_t(ir_context_t &ir_ctx, const grid_info_t &tg_grid,
37 const expr_t &reg_buf, const layout_t &reg_layout,
38 const tensor_t &thr_tile, int dim = 2);
39
40 bool is_empty() const { return reg_buf_.is_empty(); }
41
42 const layout_t &reg_layout() const { return reg_layout_; }
43
44 const tensor_t &thr_tile() const { return thr_tile_; }
45
46 const stmt_t &store_stmt() const { return store_stmt_; }
47
48 const stmt_t &load_stmt() const { return load_stmt_; }
49
50 const std::vector<stmt_t> &allocs() const { return allocs_; }
51
52 const expr_t &reduce_cond() const { return reduce_cond_; }
53
54 stmt_t stmt() const {
55 stmt_t ret;
56 ret = ret.append(funcs::barrier());
57 ret = ret.append(store_stmt_);
58 ret = ret.append(funcs::barrier());
59 ret = ret.append(load_stmt_);
60 ret = inject_alloc_stmts(ret, allocs_);
61 return ret;
62 }
63
64private:
65 void build();
66
67 uint32_t reduction_mask() const {
68 uint32_t mask = 0xFFFFFFFF;
69 for (int i = 0; i < tg_ndims_; i++) {
70 int k_dim_idx = reg_layout_.ndims() + i;
71 mask &= ~(1 << k_dim_idx);
72 }
73 return mask;
74 }
75
76 ir_context_t *ir_ctx_ = nullptr;
77 grid_info_t tg_grid_;
78
79 expr_t reg_buf_;
80 layout_t reg_layout_;
81 tensor_t thr_tile_;
82
83 int dim_;
84
85 expr_t tmp_reg_buf_;
86 int tmp_reg_buf_size_ = 0;
87
88 expr_t slm_buf_;
89 int slm_buf_size_ = 0;
90
91 int tg_ndims_;
92
93 stmt_t store_stmt_;
94 stmt_t load_stmt_;
95 expr_t reduce_cond_;
96
97 std::vector<stmt_t> allocs_;
98};
99
100} // namespace jit
101} // namespace gpu
102} // namespace impl
103} // namespace dnnl
104
105#endif
106