1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <stdio.h>
18#include <stdlib.h>
19
20#include <sstream>
21
22#include "dnnl_common.hpp"
23#include "utils/parser.hpp"
24
25#include "brgemm/brgemm.hpp"
26
27namespace brgemm {
28
29#if defined(DNNL_X64) && DNNL_X64 == 1 && DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
30
31void check_correctness(const settings_t &s, const settings_t &def) {
32 for_(const auto &i_dt : s.dt)
33 for_(const auto &i_bia_dt : s.bia_dt)
34 for_(const auto &i_stag : s.stag)
35 for_(const auto &i_wtag : s.wtag)
36 for_(const auto &i_dtag : s.dtag)
37 for_(const auto &i_ld : s.ld)
38 for_(const auto &i_alpha : s.alpha)
39 for_(const auto &i_beta : s.beta)
40 for_(const auto &i_batch_size : s.batch_size)
41 for_(const auto &i_brgemm_attr : s.brgemm_attr)
42 for_(const auto &i_scales : s.scales)
43 for_(const auto &i_zero_points : s.zero_points)
44 for_(const auto &i_post_ops : s.post_ops)
45 for_(const auto &i_scratchpad_mode : s.scratchpad_mode)
46 for_(const auto &i_fpmath_mode : s.fpmath_mode)
47 for_(const auto &i_ctx_init : s.ctx_init)
48 for (const auto &i_ctx_exe : s.ctx_exe) {
49 auto attr = settings_t::get_attr(i_scales, i_zero_points, i_post_ops,
50 i_scratchpad_mode, i_fpmath_mode);
51
52 static constexpr int n_inputs = 3;
53 if (i_dt.size() != 1 && i_dt.size() != n_inputs) {
54 fprintf(stderr,
55 "ERROR: brgemm driver: `dt` option expects either a single "
56 "input or three inputs in SRC, WEI, and DST order. Current "
57 "size is: \"%ld\"\n",
58 (long)i_dt.size()),
59 fflush(stderr);
60 SAFE_V(FAIL);
61 }
62
63 if (s.prb_vdims.ndims > 2) {
64 fprintf(stderr,
65 "ERROR: brgemm driver: problem descriptor supports only "
66 "MxK:KxN notion.\n"),
67 fflush(stderr);
68 SAFE_V(FAIL);
69 }
70
71 const prb_t prb(s.prb_vdims, i_dt, i_stag, i_wtag, i_dtag, i_ld,
72 i_bia_dt, i_alpha, i_beta, i_batch_size, i_brgemm_attr, attr,
73 i_ctx_init, i_ctx_exe);
74 std::stringstream ss;
75 ss << prb;
76 const std::string cpp_pstr = ss.str();
77 const char *pstr = cpp_pstr.c_str();
78 BENCHDNN_PRINT(1, "run: %s\n", pstr);
79
80 res_t res {};
81 doit(&prb, &res);
82
83 parse_result(res, pstr);
84
85 if (is_bench_mode(PERF)) {
86 perf_report_t pr(&prb, s.perf_template);
87 pr.report(&res, pstr);
88 }
89 }
90}
91
92static const std::string help_alpha
93 = "FLOAT (Default: 1.f)\n Specifies real value corresponding to "
94 "scaling of accumulator result: `C = alpha * A * B`.\n";
95
96static const std::string help_beta
97 = "FLOAT (Default: 0.f)\n Specifies real value corresponding to "
98 "adding a part of accumulator result: `C = A * B + beta * C`.\n";
99
100static const std::string help_batch_size
101 = "UINT (Default: `1`)\n Specifies a batch size that indicates "
102 "how many batches per kernel call will be used.\n";
103
104static const std::string help_ld
105 = "UINT:UINT:UINT (Default: not specified)\n Specifies "
106 "LDA:LDB:LDD values. If some values are skipped, the default one (K, "
107 "N, or N) will be used. If there are no post-ops, LDC will reuse "
108 "LDD, otherwise expect LDC always dense.\n";
109
110static const std::string help_brgemm_attr
111 = "STRING (Default: empty)\n Specifies BRGeMM kernel attributes. "
112 "If some values are skipped, the default one will be used.\n";
113
114int bench(int argc, char **argv) {
115 // BRGeMM kernel support is available on x86 Intel CPU only.
116 if (is_gpu()) return OK;
117 driver_name = "brgemm";
118 using namespace parser;
119 static settings_t s;
120 static const settings_t def {};
121 for (; argc > 0; --argc, ++argv) {
122 auto cstr2str = [](const char *str) { return std::string(str); };
123 const bool parsed_options = parse_bench_settings(argv[0])
124 || parse_batch(bench, argv[0])
125 || parse_multi_dt(s.dt, def.dt, argv[0], "dt")
126 || parse_dt(s.bia_dt, def.bia_dt, argv[0], "bia_dt")
127 || parse_multivector_option(
128 s.ld, def.ld, atoi, argv[0], "ld", help_ld)
129 || parse_vector_option(s.batch_size, def.batch_size, atoi,
130 argv[0], "bs", help_batch_size)
131 || parse_vector_option(
132 s.alpha, def.alpha, atof, argv[0], "alpha", help_alpha)
133 || parse_vector_option(
134 s.beta, def.beta, atof, argv[0], "beta", help_beta)
135 || parse_vector_option(s.brgemm_attr, def.brgemm_attr, cstr2str,
136 argv[0], "brgemm-attr", help_brgemm_attr)
137 || parse_attr_scales(s.scales, argv[0])
138 || parse_attr_zero_points(s.zero_points, argv[0])
139 || parse_attr_post_ops(s.post_ops, argv[0])
140 || parse_attr_scratchpad_mode(
141 s.scratchpad_mode, def.scratchpad_mode, argv[0])
142 || parse_attr_fpmath_mode(
143 s.fpmath_mode, def.fpmath_mode, argv[0])
144 || parse_perf_template(s.perf_template, s.perf_template_def,
145 s.perf_template_csv(), argv[0])
146 || parse_reset(s, argv[0]) || parse_help(argv[0]);
147 if (!parsed_options) {
148 catch_unknown_options(argv[0]);
149
150 parse_prb_vdims(s.prb_vdims, argv[0]);
151 check_correctness(s, def);
152 }
153 }
154 return parse_last_argument();
155}
156
157#else
158
159int bench(int argc, char **argv) {
160 BENCHDNN_PRINT(
161 0, "%s\n", "INFO: brgemm driver: only x64 backend is supported.");
162 return OK;
163}
164
165#endif
166
167} // namespace brgemm
168