1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_COMPUTE_DISPATCH_HPP
18#define GPU_COMPUTE_DISPATCH_HPP
19
20#include <cassert>
21#include <string>
22
23#include "common/c_types_map.hpp"
24#include "common/utils.hpp"
25#include "gpu/compute/device_info.hpp"
26#include "gpu/compute/kernel_ctx.hpp"
27#include "gpu/compute/utils.hpp"
28
29namespace dnnl {
30namespace impl {
31namespace gpu {
32namespace compute {
33
34void get_optimal_lws(const size_t *gws, size_t *lws, const size_t n,
35 const int mapped_vec_dim_idx, const gpu_arch_t gpu_arch);
36
37class compute_engine_t;
38
39class dispatch_t {
40public:
41 static const int min_nesting_level = -1;
42
43 // md - memory descriptor hint to extract nesting levels based on the layout.
44 dispatch_t(const compute_engine_t *engine = nullptr,
45 const memory_desc_t *md = nullptr);
46
47 nd_range_t nd_range() const {
48 assert(generate_called && "generate() must be called.");
49 return nd_range_;
50 }
51
52 std::string str() const;
53
54 void define_dim(const std::string &name, int md_hint_idx, dim_t size,
55 dim_t block = 1) {
56 define_dim_with_md_hint(name, md_hint_idx, size, block);
57 }
58
59 void define_dim(const std::string &name, dim_t size) {
60 define_dim_with_nesting_level(name, min_nesting_level, size);
61 }
62
63 void define_dim_with_nesting_level(const std::string &name,
64 int nesting_level, dim_t size, dim_t block = 1);
65 status_t vectorize_dim(const std::string &name, int vector_size);
66
67 void def_kernel_macros(kernel_ctx_t &kernel_ctx) const;
68
69 // Attribute suffix is only required to support multiple kernels within a
70 // single kernel context.
71 void set_kernel_attr_suffix(const std::string &suffix) {
72 attr_suffix_ = suffix;
73 }
74
75 void generate(bool generate_lws = true);
76
77 void generate_override(
78 const size_t *grange, const size_t *lrange = nullptr);
79 void set_lws(const size_t *lrange);
80
81private:
82 // Dimension information necessary for mapping to global work IDs.
83 struct dim_info_t {
84 // Dimension name to access from a kernel as GWS_GET_<name>().
85 std::string name;
86
87 // Size of the dimension.
88 dim_t size;
89
90 // Block size that the kernel uses for the dimension. With blocking,
91 // every kernel instance handles a block of indices. Possible values:
92 // 0: flexible blocking
93 // 1: no blocking
94 // > 1: fixed blocking
95 dim_t block;
96
97 // Outermost dimension has the min value.
98 // Innermost dimension has the max value.
99 int nesting_level;
100
101 // -1: no vectorization; at most one dimension may be vectorized.
102 int vector_size;
103
104 // Either of [0, 1, 2] - the ID that the dimension maps to.
105 int gws_index;
106 };
107
108 void define_dim_with_md_hint(const std::string &name, int md_hint_index,
109 dim_t size, dim_t block = 1);
110
111 int find_vectorized_dim() const {
112 int vec_dim_idx = -1;
113 for (int i = 0; i < ndims_; ++i) {
114 if (dims_[i].vector_size != 1) {
115 assert(vec_dim_idx == -1);
116 assert(dims_[i].block > 0);
117 vec_dim_idx = i;
118 }
119 }
120 return vec_dim_idx;
121 }
122
123 dim_t get_gws_stride(int idx) const {
124 dim_t s = 1;
125 for (int i = 0; i < idx; ++i) {
126 if (dims_[i].gws_index == dims_[idx].gws_index) {
127 s *= utils::div_up(dims_[i].size, dims_[i].block);
128 }
129 }
130 return s;
131 }
132
133 const compute_engine_t *engine_;
134
135 int md_ndims_ = 0;
136 int md_nesting_levels_[DNNL_MAX_NDIMS];
137
138 int ndims_ = 0;
139 dim_info_t dims_[DNNL_MAX_NDIMS];
140
141 std::string attr_suffix_ = "DEFAULT";
142 nd_range_t nd_range_;
143 bool generate_called = false;
144};
145
146} // namespace compute
147} // namespace gpu
148} // namespace impl
149} // namespace dnnl
150
151#endif
152