1 | /******************************************************************************* |
2 | * Copyright 2017-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CONV_HPP |
18 | #define CONV_HPP |
19 | |
20 | #include <assert.h> |
21 | #include <limits.h> |
22 | #include <stdint.h> |
23 | |
24 | #include <iostream> |
25 | |
26 | #include "common.hpp" |
27 | #include "dnn_types.hpp" |
28 | #include "dnnl_common.hpp" |
29 | #include "utils/compare.hpp" |
30 | #include "utils/perf_report.hpp" |
31 | #include "utils/settings.hpp" |
32 | |
33 | namespace conv { |
34 | |
35 | enum alg_t { |
36 | UNDEF, |
37 | DIRECT, |
38 | WINO, |
39 | AUTO, |
40 | convolution_direct = DIRECT, |
41 | convolution_wino = WINO, |
42 | convolution_auto = AUTO, |
43 | }; |
44 | alg_t str2alg(const char *str); |
45 | const char *alg2str(alg_t alg); |
46 | alg_t alg_kind2alg(dnnl_alg_kind_t alg); |
47 | |
48 | struct desc_t { |
49 | int64_t g, mb; |
50 | int64_t ic, id, ih, iw; |
51 | int64_t oc, od, oh, ow; |
52 | int64_t kd, kh, kw; |
53 | int64_t sd, sh, sw; |
54 | int64_t pd, ph, pw; |
55 | int64_t pd_r, ph_r, pw_r; // End side padding for each dimension |
56 | int64_t dd, dh, dw; |
57 | bool has_groups; |
58 | |
59 | std::string name; |
60 | int ndims; |
61 | |
62 | // Initialize dependent opposite-side paddings values |
63 | // from the shape parameters |
64 | void init_pad_r() { |
65 | pw_r = opp_pad(iw, ow, kw, sw, pw, dw); |
66 | ph_r = opp_pad(ih, oh, kh, sh, ph, dh); |
67 | pd_r = opp_pad(id, od, kd, sd, pd, dd); |
68 | } |
69 | |
70 | int64_t desc_nelems(int arg, int mask) const; |
71 | |
72 | dims_t src_dims() const; |
73 | dims_t wei_dims() const; |
74 | dims_t bia_dims() const; |
75 | dims_t dst_dims() const; |
76 | dims_t strides() const; |
77 | dims_t dilations() const; |
78 | dims_t padding() const; |
79 | dims_t padding_r() const; |
80 | |
81 | private: |
82 | int64_t opp_pad(int64_t i, int64_t o, int64_t k, int64_t s, int64_t p, |
83 | int64_t d) const { |
84 | return (o - 1) * s - i + ((k - 1) * (d + 1) + 1) - p; |
85 | } |
86 | }; |
87 | |
88 | int str2desc(desc_t *desc, const char *str); |
89 | std::ostream &operator<<(std::ostream &s, const desc_t &d); |
90 | |
91 | /** configuration structure, that controls initial data filling + error check |
92 | * |
93 | * dt defines convolution precision |
94 | * |
95 | * for each type (SRC, WEI, BIA, and DST) the values are filled as follows: |
96 | * if (rand() > f_sparsity) then: |
97 | * v <-- f_base // it is guaranteed each kernel window |
98 | * // has at least one non-zero element |
99 | * else: |
100 | * v <-- f_min + rand() * f_step % (f_max - f_min) |
101 | * |
102 | * |
103 | * on final check the resulting values should be in [min .. max] range, the |
104 | * relative difference should not exceed eps |
105 | */ |
106 | typedef struct dt_conf_t { |
107 | dnnl_data_type_t dt; |
108 | double min, max; /* representative */ |
109 | int f_min, f_max; /* fill range */ |
110 | int f_base; /* fill base, use 0 */ |
111 | int f_step; /* fill step, use 1 */ |
112 | double f_sparsity; /* amount of non-zeros, default 0.25 */ |
113 | double eps; /* acceptable error */ |
114 | } _dt_conf_t[DAT_TOTAL]; |
115 | |
116 | extern const _dt_conf_t conf_f32; |
117 | extern const _dt_conf_t conf_f32_with_bf16_fpmath; |
118 | extern const _dt_conf_t conf_f32_with_tf32_fpmath; |
119 | |
120 | const dt_conf_t *str2cfg(const char *str); |
121 | std::ostream &operator<<(std::ostream &s, const dt_conf_t *cfg); |
122 | const dt_conf_t *auto_cfg(const alg_t alg, const dt_conf_t *cfg); |
123 | |
124 | struct settings_t : public base_settings_t { |
125 | settings_t() = default; |
126 | |
127 | // ctor to save certain fields from resetting |
128 | settings_t(const char *perf_template) : settings_t() { |
129 | this->perf_template = perf_template; |
130 | } |
131 | |
132 | desc_t desc {}; |
133 | |
134 | std::vector<dir_t> dir {FWD_B}; |
135 | std::vector<const dt_conf_t *> cfg {conf_f32}; |
136 | std::vector<std::string> stag {tag::any}, wtag {tag::any}, dtag {tag::any}; |
137 | std::vector<alg_t> alg {DIRECT}; |
138 | |
139 | const char *perf_template_csv() const { |
140 | static const std::string args |
141 | = "%dir%,%cfg%,%stag%,%wtag%,%dtag%,%alg%" ; |
142 | return perf_template_csv_base(args); |
143 | } |
144 | |
145 | void reset() { *this = settings_t(perf_template); } |
146 | }; |
147 | |
148 | struct prb_t : public desc_t { |
149 | prb_t(const desc_t &desc, dir_t dir, const dt_conf_t *cfg, |
150 | const std::string &stag, const std::string &wtag, |
151 | const std::string &dtag, alg_t alg, const attr_t &attr, |
152 | const thr_ctx_t &ctx_init, const thr_ctx_t &ctx_exe, int64_t mb = 0) |
153 | : desc_t(desc) |
154 | , dir(dir) |
155 | , cfg(cfg) |
156 | , stag(stag) |
157 | , wtag(wtag) |
158 | , dtag(dtag) |
159 | , alg(alg) |
160 | , attr(attr) |
161 | , user_mb(mb) |
162 | , ops(0) |
163 | , src_scales(NULL) |
164 | , wei_scales(NULL) |
165 | , dst_scales(NULL) |
166 | , src_zp(NULL) |
167 | , dst_zp(NULL) |
168 | , ctx_init(ctx_init) |
169 | , ctx_exe(ctx_exe) { |
170 | if (mb) this->mb = mb; |
171 | count_ops(); |
172 | src_scales = generate_scales(DNNL_ARG_SRC); |
173 | wei_scales = generate_scales(DNNL_ARG_WEIGHTS); |
174 | dst_scales = generate_scales(DNNL_ARG_DST); |
175 | src_zp = generate_zero_points(DNNL_ARG_SRC); |
176 | dst_zp = generate_zero_points(DNNL_ARG_DST); |
177 | } |
178 | ~prb_t() { |
179 | if (src_scales) zfree(src_scales); |
180 | if (wei_scales) zfree(wei_scales); |
181 | if (dst_scales) zfree(dst_scales); |
182 | if (src_zp) zfree(src_zp); |
183 | if (dst_zp) zfree(dst_zp); |
184 | } |
185 | |
186 | dir_t dir; |
187 | mutable const dt_conf_t *cfg; // `mutable` because of `AUTO` and `WINO`. |
188 | std::string stag, wtag, dtag; |
189 | mutable alg_t alg; // `mutable` because of `AUTO`. |
190 | attr_t attr; |
191 | int64_t user_mb; |
192 | |
193 | double ops; |
194 | float *src_scales, *wei_scales, *dst_scales; |
195 | int32_t *src_zp, *dst_zp; |
196 | thr_ctx_t ctx_init, ctx_exe; |
197 | |
198 | void count_ops(); |
199 | |
200 | const dt_conf_t &get_dt_conf(data_kind_t dk) const { |
201 | if (cfg == conf_f32) { |
202 | switch (attr.fpmath_mode) { |
203 | case dnnl_fpmath_mode_bf16: |
204 | return conf_f32_with_bf16_fpmath[dk]; |
205 | case dnnl_fpmath_mode_tf32: |
206 | return conf_f32_with_tf32_fpmath[dk]; |
207 | default: return cfg[dk]; |
208 | } |
209 | } |
210 | return cfg[dk]; |
211 | } |
212 | |
213 | BENCHDNN_DISALLOW_COPY_AND_ASSIGN(prb_t); |
214 | |
215 | private: |
216 | float *generate_scales(int arg) const; |
217 | int32_t *generate_zero_points(int arg) const; |
218 | }; |
219 | std::ostream &operator<<(std::ostream &s, const prb_t &prb); |
220 | |
221 | struct perf_report_t : public base_perf_report_t { |
222 | perf_report_t(const prb_t *prb, const char *perf_template) |
223 | : base_perf_report_t(perf_template) |
224 | , p_(prb) |
225 | , stag_({normalize_tag(p_->stag, p_->ndims)}) |
226 | , wtag_(normalize_tag(p_->wtag, p_->ndims)) |
227 | , dtag_(normalize_tag(p_->dtag, p_->ndims)) {} |
228 | |
229 | void dump_alg(std::ostream &s) const override { s << alg2str(p_->alg); } |
230 | |
231 | void dump_cfg(std::ostream &s) const override { s << p_->cfg; } |
232 | |
233 | void dump_desc(std::ostream &s) const override { |
234 | s << static_cast<const desc_t &>(*p_); |
235 | } |
236 | |
237 | void dump_desc_csv(std::ostream &s) const override { |
238 | s << p_->g << ',' << p_->mb << ',' |
239 | |
240 | << p_->ic << ',' << p_->id << ',' << p_->ih << ',' << p_->iw << ',' |
241 | |
242 | << p_->oc << ',' << p_->od << ',' << p_->oh << ',' << p_->ow << ',' |
243 | |
244 | << p_->kd << ',' << p_->kh << ',' << p_->kw << ',' |
245 | |
246 | << p_->sd << ',' << p_->sh << ',' << p_->sw << ',' |
247 | |
248 | << p_->pd << ',' << p_->ph << ',' << p_->pw << ',' |
249 | |
250 | << p_->dd << ',' << p_->dh << ',' << p_->dw; |
251 | } |
252 | |
253 | double ops() const override { return p_->ops; } |
254 | const attr_t *attr() const override { return &p_->attr; } |
255 | const thr_ctx_t *ctx_init() const override { return &p_->ctx_init; } |
256 | const thr_ctx_t *ctx_exe() const override { return &p_->ctx_exe; } |
257 | const int64_t *user_mb() const override { return &p_->user_mb; } |
258 | const std::string *name() const override { return &p_->name; } |
259 | const dir_t *dir() const override { return &p_->dir; } |
260 | const std::vector<std::string> *stag() const override { return &stag_; } |
261 | const std::string *wtag() const override { return &wtag_; } |
262 | const std::string *dtag() const override { return &dtag_; } |
263 | |
264 | private: |
265 | const prb_t *p_; |
266 | std::vector<std::string> stag_; |
267 | std::string wtag_, dtag_; |
268 | }; |
269 | |
270 | inline int64_t src_off_f(const prb_t *prb, int64_t mb, int64_t g, int64_t ic, |
271 | int64_t id, int64_t ih, int64_t iw) { |
272 | return (((mb * prb->ic + g * prb->ic / prb->g + ic) * prb->id + id) |
273 | * prb->ih |
274 | + ih) |
275 | * prb->iw |
276 | + iw; |
277 | } |
278 | |
279 | inline int64_t wei_off_f(const prb_t *prb, int64_t g, int64_t oc, int64_t ic, |
280 | int64_t kd, int64_t kh, int64_t kw) { |
281 | return ((((g * prb->oc / prb->g + oc) * prb->ic / prb->g + ic) * prb->kd |
282 | + kd) * prb->kh |
283 | + kh) |
284 | * prb->kw |
285 | + kw; |
286 | } |
287 | |
288 | inline int64_t bia_off_f(const prb_t *prb, int64_t g, int64_t oc) { |
289 | return g * prb->oc / prb->g + oc; |
290 | } |
291 | |
292 | inline int64_t dst_off_f(const prb_t *prb, int64_t mb, int64_t g, int64_t oc, |
293 | int64_t od, int64_t oh, int64_t ow) { |
294 | return (((mb * prb->oc + g * prb->oc / prb->g + oc) * prb->od + od) |
295 | * prb->oh |
296 | + oh) |
297 | * prb->ow |
298 | + ow; |
299 | } |
300 | |
301 | int fill_src( |
302 | const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res); |
303 | int fill_wei( |
304 | const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res); |
305 | int fill_bia( |
306 | const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res); |
307 | int fill_dst( |
308 | const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res); |
309 | |
310 | void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, |
311 | const args_t &ref_args); |
312 | |
313 | void skip_unimplemented_prb(const prb_t *prb, res_t *res); |
314 | void skip_invalid_prb(const prb_t *prb, res_t *res); |
315 | void compute_ref(const prb_t *prb, const args_t &args, |
316 | dnnl_primitive_t prim_ref = nullptr); |
317 | |
318 | int doit(const prb_t *prb, res_t *res); |
319 | int bench(int argc, char **argv); |
320 | |
321 | } // namespace conv |
322 | |
323 | #endif |
324 | |