1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_COMPUTE_DISPATCH_HPP |
18 | #define GPU_COMPUTE_DISPATCH_HPP |
19 | |
20 | #include <cassert> |
21 | #include <string> |
22 | |
23 | #include "common/c_types_map.hpp" |
24 | #include "common/utils.hpp" |
25 | #include "gpu/compute/device_info.hpp" |
26 | #include "gpu/compute/kernel_ctx.hpp" |
27 | #include "gpu/compute/utils.hpp" |
28 | |
29 | namespace dnnl { |
30 | namespace impl { |
31 | namespace gpu { |
32 | namespace compute { |
33 | |
34 | void get_optimal_lws(const size_t *gws, size_t *lws, const size_t n, |
35 | const int mapped_vec_dim_idx, const gpu_arch_t gpu_arch); |
36 | |
37 | class compute_engine_t; |
38 | |
39 | class dispatch_t { |
40 | public: |
41 | static const int min_nesting_level = -1; |
42 | |
43 | // md - memory descriptor hint to extract nesting levels based on the layout. |
44 | dispatch_t(const compute_engine_t *engine = nullptr, |
45 | const memory_desc_t *md = nullptr); |
46 | |
47 | nd_range_t nd_range() const { |
48 | assert(generate_called && "generate() must be called." ); |
49 | return nd_range_; |
50 | } |
51 | |
52 | std::string str() const; |
53 | |
54 | void define_dim(const std::string &name, int md_hint_idx, dim_t size, |
55 | dim_t block = 1) { |
56 | define_dim_with_md_hint(name, md_hint_idx, size, block); |
57 | } |
58 | |
59 | void define_dim(const std::string &name, dim_t size) { |
60 | define_dim_with_nesting_level(name, min_nesting_level, size); |
61 | } |
62 | |
63 | void define_dim_with_nesting_level(const std::string &name, |
64 | int nesting_level, dim_t size, dim_t block = 1); |
65 | status_t vectorize_dim(const std::string &name, int vector_size); |
66 | |
67 | void def_kernel_macros(kernel_ctx_t &kernel_ctx) const; |
68 | |
69 | // Attribute suffix is only required to support multiple kernels within a |
70 | // single kernel context. |
71 | void set_kernel_attr_suffix(const std::string &suffix) { |
72 | attr_suffix_ = suffix; |
73 | } |
74 | |
75 | void generate(bool generate_lws = true); |
76 | |
77 | void generate_override( |
78 | const size_t *grange, const size_t *lrange = nullptr); |
79 | void set_lws(const size_t *lrange); |
80 | |
81 | private: |
82 | // Dimension information necessary for mapping to global work IDs. |
83 | struct dim_info_t { |
84 | // Dimension name to access from a kernel as GWS_GET_<name>(). |
85 | std::string name; |
86 | |
87 | // Size of the dimension. |
88 | dim_t size; |
89 | |
90 | // Block size that the kernel uses for the dimension. With blocking, |
91 | // every kernel instance handles a block of indices. Possible values: |
92 | // 0: flexible blocking |
93 | // 1: no blocking |
94 | // > 1: fixed blocking |
95 | dim_t block; |
96 | |
97 | // Outermost dimension has the min value. |
98 | // Innermost dimension has the max value. |
99 | int nesting_level; |
100 | |
101 | // -1: no vectorization; at most one dimension may be vectorized. |
102 | int vector_size; |
103 | |
104 | // Either of [0, 1, 2] - the ID that the dimension maps to. |
105 | int gws_index; |
106 | }; |
107 | |
108 | void define_dim_with_md_hint(const std::string &name, int md_hint_index, |
109 | dim_t size, dim_t block = 1); |
110 | |
111 | int find_vectorized_dim() const { |
112 | int vec_dim_idx = -1; |
113 | for (int i = 0; i < ndims_; ++i) { |
114 | if (dims_[i].vector_size != 1) { |
115 | assert(vec_dim_idx == -1); |
116 | assert(dims_[i].block > 0); |
117 | vec_dim_idx = i; |
118 | } |
119 | } |
120 | return vec_dim_idx; |
121 | } |
122 | |
123 | dim_t get_gws_stride(int idx) const { |
124 | dim_t s = 1; |
125 | for (int i = 0; i < idx; ++i) { |
126 | if (dims_[i].gws_index == dims_[idx].gws_index) { |
127 | s *= utils::div_up(dims_[i].size, dims_[i].block); |
128 | } |
129 | } |
130 | return s; |
131 | } |
132 | |
133 | const compute_engine_t *engine_; |
134 | |
135 | int md_ndims_ = 0; |
136 | int md_nesting_levels_[DNNL_MAX_NDIMS]; |
137 | |
138 | int ndims_ = 0; |
139 | dim_info_t dims_[DNNL_MAX_NDIMS]; |
140 | |
141 | std::string attr_suffix_ = "DEFAULT" ; |
142 | nd_range_t nd_range_; |
143 | bool generate_called = false; |
144 | }; |
145 | |
146 | } // namespace compute |
147 | } // namespace gpu |
148 | } // namespace impl |
149 | } // namespace dnnl |
150 | |
151 | #endif |
152 | |