1/*******************************************************************************
2* Copyright 2019-2021 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#if defined(FP16) || defined(FP32) || defined(F16F16F32) || defined(BF16BF16F32)
18INST_TEST_CASE(TestGEMM,
19 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
20 dnnl_invalid_arguments},
21 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
22 dnnl_invalid_arguments},
23 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
24 dnnl_invalid_arguments},
25 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
26 dnnl_invalid_arguments},
27
28 test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4},
29 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
30 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
31 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
32 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
33 test_params {'N', 'n', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
34 test_params {'n', 'T', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
35 test_params {'T', 'N', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
36 test_params {'t', 't', 31, 21, 11, 2.0, 1.5, 61, 51, 81},
37 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100},
38 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100},
39 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100},
40 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100},
41 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2},
42 test_params {'t', 't', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2},
43
44 make_test_params_with_offset(
45 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100),
46 make_test_params_with_offset(
47 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
48
49 test_params {'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
50 test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
51 test_params {'t', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
52 test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
53 test_params {'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
54 test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000},
55 test_params {'t', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000},
56 test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000});
57
58CPU_INST_TEST_CASE(TestGEMV,
59 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1},
60 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000},
61 test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8000, 300, 300},
62 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1},
63 test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 200, 1, 1},
64 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000},
65 test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 1, 300, 300},
66 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1},
67 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000},
68 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1},
69 test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 200, 8000, 1},
70 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000},
71
72 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1, 30},
73 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 1},
74 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 30},
75 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2010, 3010, 3010},
76 test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8010, 310, 310},
77 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2010, 20, 30},
78 test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 210, 20, 30},
79 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 20, 3010, 3010},
80 test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 20, 310, 310},
81 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1010, 20},
82 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2010, 2010, 3010},
83 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2010, 1010, 20},
84 test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 210, 8010, 20},
85 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 20, 2010, 3010},
86
87 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1},
88 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000},
89 test_params {'n', 'n', 1, 300, 8000, 1.0f, 1.0f, 8000, 300, 300},
90 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1},
91 test_params {'t', 'n', 200, 1, 8000, 1.0f, 1.0f, 200, 1, 1},
92 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000},
93 test_params {'t', 'n', 1, 300, 8000, 1.0f, 1.0f, 1, 300, 300},
94 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1},
95 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000},
96 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1},
97 test_params {'t', 't', 200, 1, 8000, 1.0f, 1.0f, 200, 8000, 1},
98 test_params {'t', 't', 1, 3000, 4000, 1.0f, 1.0f, 1, 4000, 3000});
99
100/**
101 * These cases are used to test the small-N avx-512 sgemm TN kernels.
102 * Note: The kernels assume a column major layout while the external
103 * APIs assume row major layout, so the M/N and transA/transB values
104 * are swapped.
105 */
106CPU_INST_TEST_CASE(TestGEMM_smalln,
107 test_params {'n', 't', 5, 512, 512, 1.0f, 1.0f, 512, 512, 512},
108 test_params {'n', 't', 5, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
109 test_params {'n', 't', 5, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
110 test_params {'n', 't', 5, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
111 test_params {'n', 't', 7, 512, 512, 0.0f, 1.0f, 512, 512, 512},
112 test_params {'n', 't', 7, 512, 1536, 1.0f, 0.0f, 1536, 1536, 512},
113 test_params {'n', 't', 7, 512, 2048, 0.5f, 0.5f, 2048, 2048, 512},
114 test_params {'n', 't', 7, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
115 test_params {'n', 't', 4, 512, 512, 1.0f, 1.0f, 512, 512, 512},
116 test_params {'n', 't', 4, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
117 test_params {'n', 't', 4, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
118 test_params {'n', 't', 4, 2048, 512, 1.0f, 1.0f, 512, 512, 2048},
119 test_params {'n', 't', 8, 512, 512, 1.0f, 1.0f, 512, 512, 512},
120 test_params {'n', 't', 8, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512},
121 test_params {'n', 't', 8, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512},
122 test_params {'n', 't', 8, 2048, 512, 1.0f, 1.0f, 512, 512, 2048});
123
124CPU_INST_TEST_CASE(TestGEMM_stkmem,
125 test_params {'n', 'n', 2, 48, 83, 1.0f, 0.0f, 83, 48, 48},
126 test_params {'n', 'n', 2, 48, 200, 1.0f, 0.0f, 200, 48, 48},
127 test_params {'n', 'n', 2, 16, 251, 1.0f, 0.0f, 251, 16, 16},
128 test_params {'n', 'n', 2, 16, 256, 1.0f, 0.0f, 256, 16, 16});
129
130#if defined(FP32) || defined(BF16BF16F32)
131INST_TEST_CASE(TestGEMM_packed,
132 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {false, true},
133 true, dnnl_invalid_arguments},
134 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {true, false},
135 true, dnnl_invalid_arguments},
136 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, true},
137 true, dnnl_invalid_arguments},
138 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {true, true},
139 true, dnnl_invalid_arguments},
140
141 make_test_params_pack(
142 {true, false}, 'N', 'n', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
143 make_test_params_pack(
144 {false, true}, 'n', 'T', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
145 make_test_params_pack(
146 {true, false}, 'T', 'N', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
147 make_test_params_pack(
148 {true, true}, 't', 't', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81),
149 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
150 100, 100, 100),
151 make_test_params_pack(
152 {true, true}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
153 make_test_params_pack(
154 {true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 10000, 2, 2),
155 make_test_params_pack(
156 {true, true}, 'n', 'n', 100, 1, 100, 1.0f, 2.0f, 100, 100, 100),
157 make_test_params_pack({true, false}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f,
158 100, 100, 100),
159 make_test_params_pack({false, true}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f,
160 100, 100, 100),
161
162 make_test_params_pack({true, false}, 'n', 'n', 3000, 3000, 3000, 1.0f,
163 2.0f, 3000, 3000, 3000),
164 make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f,
165 0.0f, 3000, 3000, 3000),
166 make_test_params_pack({true, false}, 'n', 't', 3000, 3000, 3000, 1.0f,
167 1.0f, 3000, 3000, 3000),
168 make_test_params_pack({true, false}, 't', 't', 3000, 3000, 3000, 1.0f,
169 2.0f, 3000, 3000, 3000),
170
171 make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
172 2.0f, 2000, 20000, 20000),
173 make_test_params_pack({false, true}, 'n', 'n', 2000, 2000, 2000, 1.0f,
174 2.0f, 2000, 2000, 2000),
175 make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f,
176 2.0f, 2000, 5000, 5000),
177 make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f,
178 2.0f, 2000, 100, 100),
179 make_test_params_pack({false, true}, 't', 'n', 2000, 2000, 2000, 1.0f,
180 0.0f, 2000, 2000, 2000),
181 make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f,
182 2.0f, 2000, 5000, 5000),
183 make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f,
184 2.0f, 5000, 100, 100),
185 make_test_params_pack({false, true}, 'n', 't', 2000, 2000, 2000, 1.0f,
186 1.0f, 2000, 2000, 2000),
187 make_test_params_pack({false, true}, 't', 't', 2000, 2000, 2000, 1.0f,
188 2.0f, 2000, 2000, 2000),
189 make_test_params_pack({true, true}, 't', 't', 2000, 5000, 2000, 1.0f,
190 2.0f, 2000, 2000, 5000),
191 make_test_params_pack({true, true}, 't', 't', 5000, 100, 2000, 1.0f,
192 2.0f, 5000, 2000, 100),
193
194 make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f,
195 3.0f, 8000, 150, 150),
196 make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f,
197 3.0f, 8000, 8000, 200),
198 make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f,
199 3.0f, 200, 300, 300));
200#endif
201
202#elif defined(BF16BF16BF16)
203
204INST_TEST_CASE(TestGEMM,
205 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
206 dnnl_invalid_arguments},
207 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
208 dnnl_invalid_arguments},
209 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
210 dnnl_invalid_arguments},
211 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
212 dnnl_invalid_arguments},
213
214 test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4},
215 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
216 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
217 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
218 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80},
219 test_params {'N', 'n', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
220 test_params {'n', 'T', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
221 test_params {'T', 'N', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
222 test_params {'t', 't', 31, 21, 11, 2.5, 1.5, 61, 51, 81},
223 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100},
224 test_params {'n', 't', 100, 2, 58, 1.0, 2.0, 100, 100, 100},
225 test_params {'t', 'n', 2, 100, 61, 1.0, 2.0, 100, 100, 100},
226 test_params {'t', 't', 2, 100, 60, 1.0, 2.0, 100, 100, 100},
227 test_params {'n', 'n', 2, 2, 11, 1.0, -1.0, 20, 2, 2},
228 test_params {'t', 't', 2, 2, 11, 1.0, -1.0, 2, 20, 2},
229
230 make_test_params_with_offset(
231 {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100),
232 make_test_params_with_offset(
233 {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100),
234
235 test_params {'n', 'n', 2000, 2000, 20, 1.0, 0.0, 20, 2000, 2000},
236 test_params {'n', 'n', 3000, 3000, 30, 1.0, 0.0, 30, 3000, 3000},
237 test_params {'t', 'n', 2000, 2000, 20, 1.0, 0.0, 2000, 2000, 2000},
238 test_params {'t', 'n', 3000, 3000, 30, 1.0, 0.0, 3000, 3000, 3000},
239 test_params {'n', 't', 2000, 2000, 20, 1.0, 0.0, 20, 20, 2000},
240 test_params {'n', 't', 3000, 3000, 30, 1.0, 0.0, 30, 30, 3000},
241 test_params {'t', 't', 2000, 2000, 20, 1.0, 0.0, 2000, 20, 2000},
242 test_params {'t', 't', 3000, 3000, 30, 1.0, 0.0, 3000, 30, 3000});
243
244#else
245constexpr test_igemm_params fix_use_oc = {'F', false, false, true};
246constexpr test_igemm_params col_use_oc = {'C', false, false, true};
247constexpr test_igemm_params row_use_oc = {'R', false, false, true};
248
249constexpr test_igemm_params fix_use_all_offsets = {'F', true, true, true};
250constexpr test_igemm_params col_use_all_offsets = {'C', true, true, true};
251constexpr test_igemm_params row_use_all_offsets = {'R', true, true, true};
252
253constexpr test_igemm_params fix_no_offsets = {'F', false, false, false};
254constexpr test_igemm_params col_no_offsets = {'C', false, false, false};
255constexpr test_igemm_params row_no_offsets = {'R', false, false, false};
256
257INST_TEST_CASE(TestGEMM_expected_failures,
258 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true,
259 dnnl_invalid_arguments},
260 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true,
261 dnnl_invalid_arguments},
262 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true,
263 dnnl_invalid_arguments},
264 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true,
265 dnnl_invalid_arguments},
266
267 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_oc, {}, true,
268 dnnl_invalid_arguments},
269 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_oc, {}, true,
270 dnnl_invalid_arguments},
271 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_oc, {}, true,
272 dnnl_invalid_arguments},
273 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_oc, {}, true,
274 dnnl_invalid_arguments},
275
276 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_all_offsets,
277 {}, true, dnnl_invalid_arguments},
278 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_all_offsets,
279 {}, true, dnnl_invalid_arguments},
280 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_all_offsets,
281 {}, true, dnnl_invalid_arguments},
282 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_all_offsets,
283 {}, true, dnnl_invalid_arguments},
284
285 test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {true, true},
286 true, dnnl_invalid_arguments},
287 test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {false, true},
288 true, dnnl_invalid_arguments},
289 test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, false},
290 true, dnnl_invalid_arguments},
291 test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {false, true},
292 true, dnnl_invalid_arguments});
293
294CPU_INST_TEST_CASE(TestGEMM_stkmem,
295 test_params {'n', 'n', 10, 4000, 2, 1.0, 0.0, 2, 4000, 4000,
296 fix_use_all_offsets},
297 test_params {'n', 'n', 10, 5000, 2, 1.0, 0.0, 2, 5000, 5000,
298 fix_use_all_offsets});
299
300INST_TEST_CASE(TestGEMM_general_cases_fix_offset,
301 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
302 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
303 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
304 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc},
305 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
306 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
307 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
308 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc},
309 test_params {
310 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_oc},
311 test_params {
312 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
313 test_params {
314 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
315 test_params {
316 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc},
317 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_use_oc},
318
319 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
320 fix_use_all_offsets},
321 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
322 fix_use_all_offsets},
323 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
324 fix_use_all_offsets},
325 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
326 fix_use_all_offsets},
327 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
328 fix_use_all_offsets},
329 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
330 fix_use_all_offsets},
331 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
332 fix_use_all_offsets},
333 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
334 fix_use_all_offsets},
335 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
336 fix_use_all_offsets},
337
338 test_params {
339 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
340 test_params {
341 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
342 test_params {
343 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
344 test_params {
345 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets},
346 test_params {
347 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
348 test_params {
349 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
350 test_params {
351 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
352 test_params {
353 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets},
354 test_params {
355 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_no_offsets});
356
357INST_TEST_CASE(TestGEMM_general_cases_col_offset,
358 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
359 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
360 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
361 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc},
362 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
363 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
364 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
365 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc},
366 test_params {
367 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_oc},
368 test_params {
369 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
370 test_params {
371 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
372 test_params {
373 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc},
374 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_use_oc},
375
376 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
377 col_use_all_offsets},
378 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
379 col_use_all_offsets},
380 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
381 col_use_all_offsets},
382 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
383 col_use_all_offsets},
384 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
385 col_use_all_offsets},
386 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
387 col_use_all_offsets},
388 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
389 col_use_all_offsets},
390 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
391 col_use_all_offsets},
392 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
393 col_use_all_offsets},
394
395 test_params {
396 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
397 test_params {
398 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
399 test_params {
400 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
401 test_params {
402 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets},
403 test_params {
404 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_no_offsets},
405 test_params {
406 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
407 test_params {
408 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
409 test_params {
410 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets},
411 test_params {
412 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_no_offsets});
413
414INST_TEST_CASE(TestGEMM_general_cases_row_offset,
415 test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
416 test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
417 test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
418 test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc},
419 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
420 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
421 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
422 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc},
423 test_params {
424 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_oc},
425 test_params {
426 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
427 test_params {
428 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
429 test_params {
430 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc},
431 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_use_oc},
432
433 test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
434 row_use_all_offsets},
435 test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
436 row_use_all_offsets},
437 test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
438 row_use_all_offsets},
439 test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80,
440 row_use_all_offsets},
441 test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100,
442 row_use_all_offsets},
443 test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100,
444 row_use_all_offsets},
445 test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
446 row_use_all_offsets},
447 test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100,
448 row_use_all_offsets},
449 test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2,
450 row_use_all_offsets},
451
452 test_params {
453 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
454 test_params {
455 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
456 test_params {
457 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
458 test_params {
459 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets},
460 test_params {
461 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_no_offsets},
462 test_params {
463 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
464 test_params {
465 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
466 test_params {
467 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets},
468 test_params {
469 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_no_offsets});
470
471CPU_INST_TEST_CASE(TestGEMM_fractional_scales_fix_offset,
472 /* alpha and beta have non-zero fractional part */
473 test_params {
474 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_oc},
475 test_params {
476 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_oc},
477 test_params {
478 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_oc},
479 test_params {
480 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_oc},
481 test_params {
482 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_oc},
483 test_params {
484 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_oc},
485 test_params {
486 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_oc},
487 test_params {
488 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_oc},
489 test_params {
490 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, fix_use_oc},
491
492 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
493 fix_use_all_offsets},
494 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
495 fix_use_all_offsets},
496 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
497 fix_use_all_offsets},
498 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
499 fix_use_all_offsets},
500 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
501 fix_use_all_offsets},
502 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
503 fix_use_all_offsets},
504 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
505 fix_use_all_offsets},
506 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
507 fix_use_all_offsets},
508 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
509 fix_use_all_offsets},
510
511 test_params {
512 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_no_offsets},
513 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
514 fix_no_offsets},
515 test_params {
516 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_no_offsets},
517 test_params {
518 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_no_offsets},
519 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
520 fix_no_offsets},
521 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
522 fix_no_offsets},
523 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
524 fix_no_offsets},
525 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
526 fix_no_offsets},
527 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
528 fix_no_offsets});
529
530CPU_INST_TEST_CASE(TestGEMM_fractional_scales_col_offset,
531 /* alpha and beta have non-zero fractional part */
532 test_params {
533 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_oc},
534 test_params {
535 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_oc},
536 test_params {
537 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_oc},
538 test_params {
539 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_oc},
540 test_params {
541 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_oc},
542 test_params {
543 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_oc},
544 test_params {
545 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_oc},
546 test_params {
547 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_oc},
548 test_params {
549 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, col_use_oc},
550
551 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
552 col_use_all_offsets},
553 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
554 col_use_all_offsets},
555 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
556 col_use_all_offsets},
557 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
558 col_use_all_offsets},
559 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
560 col_use_all_offsets},
561 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
562 col_use_all_offsets},
563 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
564 col_use_all_offsets},
565 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
566 col_use_all_offsets},
567 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
568 col_use_all_offsets},
569
570 test_params {
571 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_no_offsets},
572 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
573 col_no_offsets},
574 test_params {
575 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_no_offsets},
576 test_params {
577 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_no_offsets},
578 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
579 col_no_offsets},
580 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
581 col_no_offsets},
582 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
583 col_no_offsets},
584 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
585 col_no_offsets},
586 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
587 col_no_offsets});
588
589CPU_INST_TEST_CASE(TestGEMM_fractional_scales_row_offset,
590 /* alpha and beta have non-zero fractional part */
591 test_params {
592 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_oc},
593 test_params {
594 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_oc},
595 test_params {
596 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_oc},
597 test_params {
598 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_oc},
599 test_params {
600 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_oc},
601 test_params {
602 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_oc},
603 test_params {
604 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_oc},
605 test_params {
606 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_oc},
607 test_params {
608 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, row_use_oc},
609
610 test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80,
611 row_use_all_offsets},
612 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
613 row_use_all_offsets},
614 test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80,
615 row_use_all_offsets},
616 test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80,
617 row_use_all_offsets},
618 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
619 row_use_all_offsets},
620 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
621 row_use_all_offsets},
622 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
623 row_use_all_offsets},
624 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
625 row_use_all_offsets},
626 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
627 row_use_all_offsets},
628
629 test_params {
630 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_no_offsets},
631 test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120,
632 row_no_offsets},
633 test_params {
634 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_no_offsets},
635 test_params {
636 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_no_offsets},
637 test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100,
638 row_no_offsets},
639 test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100,
640 row_no_offsets},
641 test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100,
642 row_no_offsets},
643 test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100,
644 row_no_offsets},
645 test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2,
646 row_no_offsets});
647
648CPU_INST_TEST_CASE(TestGEMV,
649 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1,
650 fix_no_offsets},
651 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000,
652 fix_no_offsets},
653 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1,
654 fix_no_offsets},
655 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000,
656 fix_no_offsets},
657 test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1,
658 fix_no_offsets},
659 test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000,
660 fix_no_offsets},
661 test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1,
662 fix_no_offsets},
663 test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000,
664 fix_no_offsets},
665
666 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1,
667 fix_no_offsets},
668 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000,
669 fix_no_offsets},
670 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1,
671 fix_no_offsets},
672 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000,
673 fix_no_offsets},
674 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1,
675 fix_no_offsets},
676 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000,
677 fix_no_offsets},
678 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1,
679 fix_no_offsets},
680 test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000,
681 fix_no_offsets},
682
683 test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1,
684 {'F', true, false, false}},
685 test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000,
686 {'F', true, true, false}},
687 test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1,
688 {'F', false, true, false}},
689 test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000,
690 {'F', true, false, true}},
691 test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1,
692 {'F', false, true, true}},
693 test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000,
694 {'F', true, true, false}},
695 test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1,
696 {'F', true, false, false}},
697 test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000,
698 {'F', false, true, false}});
699
700CPU_INST_TEST_CASE(TestGEMV_kblocking,
701 test_params {
702 't', 'n', 20, 1, 7000, 1.0f, 0.0f, 20, 1, 500, fix_no_offsets},
703 test_params {'t', 't', 50, 1, 7000, 1.0f, 0.0f, 50, 7000, 500,
704 fix_no_offsets},
705 test_params {'t', 'n', 400, 1, 7000, 1.0f, 0.0f, 400, 1, 500,
706 fix_no_offsets},
707 test_params {'t', 't', 500, 1, 7000, 1.0f, 0.0f, 500, 7000, 500,
708 fix_no_offsets},
709 test_params {
710 't', 'n', 20, 1, 7000, 1.0f, 1.0f, 20, 1, 500, fix_no_offsets},
711 test_params {'t', 't', 50, 1, 7000, 1.0f, 1.0f, 50, 7000, 500,
712 fix_no_offsets},
713 test_params {'t', 'n', 500, 1, 7000, 1.0f, 1.0f, 500, 1, 500,
714 fix_no_offsets},
715 test_params {'t', 't', 500, 1, 7000, 1.0f, 1.0f, 500, 7000, 500,
716 fix_no_offsets},
717
718 test_params {'n', 'n', 1, 40, 7000, 1.0f, 0.0f, 7000, 40, 500,
719 fix_no_offsets},
720 test_params {'t', 'n', 1, 10, 7000, 1.0f, 0.0f, 7000, 10, 10,
721 fix_no_offsets},
722 test_params {'n', 'n', 1, 400, 7000, 1.0f, 0.0f, 7000, 400, 500,
723 fix_no_offsets},
724 test_params {'t', 'n', 1, 100, 7000, 1.0f, 0.0f, 7000, 100, 500,
725 fix_no_offsets},
726 test_params {'n', 'n', 1, 40, 7000, 1.0f, 1.0f, 7000, 40, 500,
727 fix_no_offsets},
728 test_params {'t', 'n', 1, 10, 7000, 1.0f, 1.0f, 7000, 10, 500,
729 fix_no_offsets},
730 test_params {'n', 'n', 1, 400, 7000, 1.0f, 1.0f, 7000, 400, 500,
731 fix_no_offsets},
732 test_params {'t', 'n', 1, 550, 7000, 1.0f, 1.0f, 7000, 550, 550,
733 fix_no_offsets});
734
735CPU_INST_TEST_CASE(TestGEMM_packed,
736 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
737 60, 50, 80, fix_use_oc),
738 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
739 60, 50, 80, fix_use_oc),
740 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
741 60, 50, 80, fix_use_oc),
742 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
743 60, 50, 80, fix_use_oc),
744
745 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
746 60, 50, 80, fix_no_offsets),
747 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
748 60, 50, 80, fix_no_offsets),
749 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
750 60, 50, 80, fix_no_offsets),
751 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
752 60, 50, 80, fix_no_offsets),
753
754 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
755 100, 100, 100, fix_use_oc),
756 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
757 100, 100, 100, fix_use_oc),
758 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
759 100, 100, 100, fix_use_oc),
760 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
761 100, 100, 100, fix_use_oc),
762 make_test_params_pack({true, false}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
763 10000, 2, 2, fix_use_oc),
764
765 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
766 100, 100, 100, row_use_oc),
767 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
768 100, 100, 100, row_use_oc),
769 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
770 100, 100, 100, row_use_oc),
771 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
772 100, 100, 100, row_use_oc),
773
774 make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f,
775 100, 100, 100, row_no_offsets),
776 make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f,
777 100, 100, 100, row_no_offsets),
778 make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f,
779 100, 100, 100, row_no_offsets),
780 make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f,
781 100, 100, 100, row_no_offsets),
782
783 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
784 60, 50, 80, row_use_oc),
785 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
786 60, 50, 80, row_use_oc),
787 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
788 60, 50, 80, row_use_oc),
789 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
790 60, 50, 80, row_use_oc),
791 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
792 10000, 2, 2, row_use_oc),
793
794 make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f,
795 100, 100, 100, col_use_oc),
796 make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f,
797 100, 100, 100, col_use_oc),
798 make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f,
799 100, 100, 100, col_use_oc),
800 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
801 10000, 2, 2, col_use_oc),
802
803 make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f,
804 100, 100, 100, col_no_offsets),
805 make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f,
806 100, 100, 100, col_no_offsets),
807 make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f,
808 100, 100, 100, col_no_offsets),
809 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
810 10000, 2, 2, col_no_offsets),
811
812 make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f,
813 60, 50, 80, col_use_oc),
814 make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f,
815 60, 50, 80, col_use_oc),
816 make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f,
817 60, 50, 80, col_use_oc),
818 make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f,
819 60, 50, 80, col_use_oc),
820 make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f,
821 10000, 2, 2, col_use_oc),
822
823 make_test_params_pack({false, true}, 'N', 'n', 200, 1, 200, 1.0f, 1.0f,
824 200, 200, 200, fix_no_offsets),
825 make_test_params_pack({true, false}, 't', 'N', 200, 1, 200, 1.0f, 0.0f,
826 200, 200, 200, fix_no_offsets),
827 make_test_params_pack({true, true}, 'T', 'N', 1, 200, 200, 1.0f, 1.0f,
828 1, 200, 200, fix_no_offsets),
829 make_test_params_pack({false, true}, 'n', 'T', 1, 200, 200, 1.0f, 0.0f,
830 200, 200, 200, fix_no_offsets));
831
832CPU_INST_TEST_CASE(TestGEMM_heavy,
833 test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
834 fix_use_oc},
835 test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
836 fix_use_oc},
837 test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
838 fix_use_oc},
839 test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000,
840 fix_use_oc});
841
842CPU_INST_TEST_CASE(TestGEMM_packed_heavy,
843 make_test_params_pack({false, true}, 'n', 'n', 3000, 3000, 3000, 1.0f,
844 0.0f, 3000, 3000, 3000, fix_use_oc),
845 make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f,
846 0.0f, 3000, 3000, 3000, fix_use_oc),
847 make_test_params_pack({true, true}, 'n', 't', 3000, 3000, 3000, 1.0f,
848 0.0f, 3000, 3000, 3000, row_use_oc),
849 make_test_params_pack({true, true}, 't', 't', 3000, 3000, 3000, 1.0f,
850 0.0f, 3000, 3000, 3000, row_use_oc),
851
852 make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f,
853 1.35f, 2000, 5000, 5000, col_use_oc),
854 make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f,
855 1.77f, 2000, 5000, 5000, col_use_oc),
856
857 make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
858 2.0f, 2000, 20000, 20000, fix_use_oc),
859 make_test_params_pack({true, true}, 'n', 'n', 200, 20000, 2000, 1.0f,
860 2.0f, 2000, 20000, 20000, row_use_oc),
861 make_test_params_pack({true, false}, 'n', 'n', 200, 20000, 2000, 1.0f,
862 2.0f, 2000, 20000, 20000, col_use_oc),
863
864 make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f,
865 2.0f, 2000, 100, 100, row_use_oc),
866 make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f,
867 2.0f, 5000, 100, 100, col_use_oc),
868
869 make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f,
870 1.7f, 8000, 150, 150, fix_use_oc),
871 make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f,
872 3.0f, 8000, 8000, 200, row_use_oc),
873 make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f,
874 0.0f, 200, 300, 300, col_use_oc));
875
876#endif
877