1 | /******************************************************************************* |
2 | * Copyright 2019-2021 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #if defined(FP16) || defined(FP32) || defined(F16F16F32) || defined(BF16BF16F32) |
18 | INST_TEST_CASE(TestGEMM, |
19 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, |
20 | dnnl_invalid_arguments}, |
21 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, |
22 | dnnl_invalid_arguments}, |
23 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, |
24 | dnnl_invalid_arguments}, |
25 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, |
26 | dnnl_invalid_arguments}, |
27 | |
28 | test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4}, |
29 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
30 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
31 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
32 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
33 | test_params {'N', 'n', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, |
34 | test_params {'n', 'T', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, |
35 | test_params {'T', 'N', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, |
36 | test_params {'t', 't', 31, 21, 11, 2.0, 1.5, 61, 51, 81}, |
37 | test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100}, |
38 | test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100}, |
39 | test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100}, |
40 | test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100}, |
41 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2}, |
42 | test_params {'t', 't', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2}, |
43 | |
44 | make_test_params_with_offset( |
45 | {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100), |
46 | make_test_params_with_offset( |
47 | {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), |
48 | |
49 | test_params {'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, |
50 | test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, |
51 | test_params {'t', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, |
52 | test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, |
53 | test_params {'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, |
54 | test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}, |
55 | test_params {'t', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000}, |
56 | test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000}); |
57 | |
58 | CPU_INST_TEST_CASE(TestGEMV, |
59 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1}, |
60 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000}, |
61 | test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8000, 300, 300}, |
62 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1}, |
63 | test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 200, 1, 1}, |
64 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000}, |
65 | test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 1, 300, 300}, |
66 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1}, |
67 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000}, |
68 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1}, |
69 | test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 200, 8000, 1}, |
70 | test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000}, |
71 | |
72 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1, 30}, |
73 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 1}, |
74 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1010, 20, 30}, |
75 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2010, 3010, 3010}, |
76 | test_params {'n', 'n', 1, 300, 8000, 1.0f, 0.0f, 8010, 310, 310}, |
77 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2010, 20, 30}, |
78 | test_params {'t', 'n', 200, 1, 8000, 1.0f, 0.0f, 210, 20, 30}, |
79 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 20, 3010, 3010}, |
80 | test_params {'t', 'n', 1, 300, 8000, 1.0f, 0.0f, 20, 310, 310}, |
81 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1010, 1010, 20}, |
82 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2010, 2010, 3010}, |
83 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2010, 1010, 20}, |
84 | test_params {'t', 't', 200, 1, 8000, 1.0f, 0.0f, 210, 8010, 20}, |
85 | test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 20, 2010, 3010}, |
86 | |
87 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1}, |
88 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000}, |
89 | test_params {'n', 'n', 1, 300, 8000, 1.0f, 1.0f, 8000, 300, 300}, |
90 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1}, |
91 | test_params {'t', 'n', 200, 1, 8000, 1.0f, 1.0f, 200, 1, 1}, |
92 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000}, |
93 | test_params {'t', 'n', 1, 300, 8000, 1.0f, 1.0f, 1, 300, 300}, |
94 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1}, |
95 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000}, |
96 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1}, |
97 | test_params {'t', 't', 200, 1, 8000, 1.0f, 1.0f, 200, 8000, 1}, |
98 | test_params {'t', 't', 1, 3000, 4000, 1.0f, 1.0f, 1, 4000, 3000}); |
99 | |
100 | /** |
101 | * These cases are used to test the small-N avx-512 sgemm TN kernels. |
102 | * Note: The kernels assume a column major layout while the external |
103 | * APIs assume row major layout, so the M/N and transA/transB values |
104 | * are swapped. |
105 | */ |
106 | CPU_INST_TEST_CASE(TestGEMM_smalln, |
107 | test_params {'n', 't', 5, 512, 512, 1.0f, 1.0f, 512, 512, 512}, |
108 | test_params {'n', 't', 5, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, |
109 | test_params {'n', 't', 5, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, |
110 | test_params {'n', 't', 5, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, |
111 | test_params {'n', 't', 7, 512, 512, 0.0f, 1.0f, 512, 512, 512}, |
112 | test_params {'n', 't', 7, 512, 1536, 1.0f, 0.0f, 1536, 1536, 512}, |
113 | test_params {'n', 't', 7, 512, 2048, 0.5f, 0.5f, 2048, 2048, 512}, |
114 | test_params {'n', 't', 7, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, |
115 | test_params {'n', 't', 4, 512, 512, 1.0f, 1.0f, 512, 512, 512}, |
116 | test_params {'n', 't', 4, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, |
117 | test_params {'n', 't', 4, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, |
118 | test_params {'n', 't', 4, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}, |
119 | test_params {'n', 't', 8, 512, 512, 1.0f, 1.0f, 512, 512, 512}, |
120 | test_params {'n', 't', 8, 512, 1536, 1.0f, 1.0f, 1536, 1536, 512}, |
121 | test_params {'n', 't', 8, 512, 2048, 1.0f, 1.0f, 2048, 2048, 512}, |
122 | test_params {'n', 't', 8, 2048, 512, 1.0f, 1.0f, 512, 512, 2048}); |
123 | |
124 | CPU_INST_TEST_CASE(TestGEMM_stkmem, |
125 | test_params {'n', 'n', 2, 48, 83, 1.0f, 0.0f, 83, 48, 48}, |
126 | test_params {'n', 'n', 2, 48, 200, 1.0f, 0.0f, 200, 48, 48}, |
127 | test_params {'n', 'n', 2, 16, 251, 1.0f, 0.0f, 251, 16, 16}, |
128 | test_params {'n', 'n', 2, 16, 256, 1.0f, 0.0f, 256, 16, 16}); |
129 | |
130 | #if defined(FP32) || defined(BF16BF16F32) |
131 | INST_TEST_CASE(TestGEMM_packed, |
132 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {false, true}, |
133 | true, dnnl_invalid_arguments}, |
134 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {true, false}, |
135 | true, dnnl_invalid_arguments}, |
136 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, true}, |
137 | true, dnnl_invalid_arguments}, |
138 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {true, true}, |
139 | true, dnnl_invalid_arguments}, |
140 | |
141 | make_test_params_pack( |
142 | {true, false}, 'N', 'n', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), |
143 | make_test_params_pack( |
144 | {false, true}, 'n', 'T', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), |
145 | make_test_params_pack( |
146 | {true, false}, 'T', 'N', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), |
147 | make_test_params_pack( |
148 | {true, true}, 't', 't', 31, 21, 11, 1.0f, 1.5f, 61, 51, 81), |
149 | make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, |
150 | 100, 100, 100), |
151 | make_test_params_pack( |
152 | {true, true}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), |
153 | make_test_params_pack( |
154 | {true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, 10000, 2, 2), |
155 | make_test_params_pack( |
156 | {true, true}, 'n', 'n', 100, 1, 100, 1.0f, 2.0f, 100, 100, 100), |
157 | make_test_params_pack({true, false}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f, |
158 | 100, 100, 100), |
159 | make_test_params_pack({false, true}, 'n', 'n', 1, 100, 100, 1.0f, 2.0f, |
160 | 100, 100, 100), |
161 | |
162 | make_test_params_pack({true, false}, 'n', 'n', 3000, 3000, 3000, 1.0f, |
163 | 2.0f, 3000, 3000, 3000), |
164 | make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f, |
165 | 0.0f, 3000, 3000, 3000), |
166 | make_test_params_pack({true, false}, 'n', 't', 3000, 3000, 3000, 1.0f, |
167 | 1.0f, 3000, 3000, 3000), |
168 | make_test_params_pack({true, false}, 't', 't', 3000, 3000, 3000, 1.0f, |
169 | 2.0f, 3000, 3000, 3000), |
170 | |
171 | make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f, |
172 | 2.0f, 2000, 20000, 20000), |
173 | make_test_params_pack({false, true}, 'n', 'n', 2000, 2000, 2000, 1.0f, |
174 | 2.0f, 2000, 2000, 2000), |
175 | make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f, |
176 | 2.0f, 2000, 5000, 5000), |
177 | make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f, |
178 | 2.0f, 2000, 100, 100), |
179 | make_test_params_pack({false, true}, 't', 'n', 2000, 2000, 2000, 1.0f, |
180 | 0.0f, 2000, 2000, 2000), |
181 | make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f, |
182 | 2.0f, 2000, 5000, 5000), |
183 | make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f, |
184 | 2.0f, 5000, 100, 100), |
185 | make_test_params_pack({false, true}, 'n', 't', 2000, 2000, 2000, 1.0f, |
186 | 1.0f, 2000, 2000, 2000), |
187 | make_test_params_pack({false, true}, 't', 't', 2000, 2000, 2000, 1.0f, |
188 | 2.0f, 2000, 2000, 2000), |
189 | make_test_params_pack({true, true}, 't', 't', 2000, 5000, 2000, 1.0f, |
190 | 2.0f, 2000, 2000, 5000), |
191 | make_test_params_pack({true, true}, 't', 't', 5000, 100, 2000, 1.0f, |
192 | 2.0f, 5000, 2000, 100), |
193 | |
194 | make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f, |
195 | 3.0f, 8000, 150, 150), |
196 | make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f, |
197 | 3.0f, 8000, 8000, 200), |
198 | make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f, |
199 | 3.0f, 200, 300, 300)); |
200 | #endif |
201 | |
202 | #elif defined(BF16BF16BF16) |
203 | |
204 | INST_TEST_CASE(TestGEMM, |
205 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, |
206 | dnnl_invalid_arguments}, |
207 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, |
208 | dnnl_invalid_arguments}, |
209 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, |
210 | dnnl_invalid_arguments}, |
211 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, |
212 | dnnl_invalid_arguments}, |
213 | |
214 | test_params {'N', 'N', 1, 1, 1, 1.0, 0.0, 4, 4, 4}, |
215 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
216 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
217 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
218 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80}, |
219 | test_params {'N', 'n', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, |
220 | test_params {'n', 'T', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, |
221 | test_params {'T', 'N', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, |
222 | test_params {'t', 't', 31, 21, 11, 2.5, 1.5, 61, 51, 81}, |
223 | test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100}, |
224 | test_params {'n', 't', 100, 2, 58, 1.0, 2.0, 100, 100, 100}, |
225 | test_params {'t', 'n', 2, 100, 61, 1.0, 2.0, 100, 100, 100}, |
226 | test_params {'t', 't', 2, 100, 60, 1.0, 2.0, 100, 100, 100}, |
227 | test_params {'n', 'n', 2, 2, 11, 1.0, -1.0, 20, 2, 2}, |
228 | test_params {'t', 't', 2, 2, 11, 1.0, -1.0, 2, 20, 2}, |
229 | |
230 | make_test_params_with_offset( |
231 | {1, 2, 3}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, 100, 100, 100), |
232 | make_test_params_with_offset( |
233 | {30, 20, 10}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, 100, 100, 100), |
234 | |
235 | test_params {'n', 'n', 2000, 2000, 20, 1.0, 0.0, 20, 2000, 2000}, |
236 | test_params {'n', 'n', 3000, 3000, 30, 1.0, 0.0, 30, 3000, 3000}, |
237 | test_params {'t', 'n', 2000, 2000, 20, 1.0, 0.0, 2000, 2000, 2000}, |
238 | test_params {'t', 'n', 3000, 3000, 30, 1.0, 0.0, 3000, 3000, 3000}, |
239 | test_params {'n', 't', 2000, 2000, 20, 1.0, 0.0, 20, 20, 2000}, |
240 | test_params {'n', 't', 3000, 3000, 30, 1.0, 0.0, 30, 30, 3000}, |
241 | test_params {'t', 't', 2000, 2000, 20, 1.0, 0.0, 2000, 20, 2000}, |
242 | test_params {'t', 't', 3000, 3000, 30, 1.0, 0.0, 3000, 30, 3000}); |
243 | |
244 | #else |
245 | constexpr test_igemm_params fix_use_oc = {'F', false, false, true}; |
246 | constexpr test_igemm_params col_use_oc = {'C', false, false, true}; |
247 | constexpr test_igemm_params row_use_oc = {'R', false, false, true}; |
248 | |
249 | constexpr test_igemm_params fix_use_all_offsets = {'F', true, true, true}; |
250 | constexpr test_igemm_params col_use_all_offsets = {'C', true, true, true}; |
251 | constexpr test_igemm_params row_use_all_offsets = {'R', true, true, true}; |
252 | |
253 | constexpr test_igemm_params fix_no_offsets = {'F', false, false, false}; |
254 | constexpr test_igemm_params col_no_offsets = {'C', false, false, false}; |
255 | constexpr test_igemm_params row_no_offsets = {'R', false, false, false}; |
256 | |
257 | INST_TEST_CASE(TestGEMM_expected_failures, |
258 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {}, true, |
259 | dnnl_invalid_arguments}, |
260 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {}, true, |
261 | dnnl_invalid_arguments}, |
262 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {}, true, |
263 | dnnl_invalid_arguments}, |
264 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {}, true, |
265 | dnnl_invalid_arguments}, |
266 | |
267 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_oc, {}, true, |
268 | dnnl_invalid_arguments}, |
269 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_oc, {}, true, |
270 | dnnl_invalid_arguments}, |
271 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_oc, {}, true, |
272 | dnnl_invalid_arguments}, |
273 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_oc, {}, true, |
274 | dnnl_invalid_arguments}, |
275 | |
276 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, fix_use_all_offsets, |
277 | {}, true, dnnl_invalid_arguments}, |
278 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, fix_use_all_offsets, |
279 | {}, true, dnnl_invalid_arguments}, |
280 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, fix_use_all_offsets, |
281 | {}, true, dnnl_invalid_arguments}, |
282 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, fix_use_all_offsets, |
283 | {}, true, dnnl_invalid_arguments}, |
284 | |
285 | test_params {'t', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, {true, true}, |
286 | true, dnnl_invalid_arguments}, |
287 | test_params {'n', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, {false, true}, |
288 | true, dnnl_invalid_arguments}, |
289 | test_params {'n', 't', 3, 2, 2, 1.0, 0.0, 3, 1, 8, {}, {true, false}, |
290 | true, dnnl_invalid_arguments}, |
291 | test_params {'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, {false, true}, |
292 | true, dnnl_invalid_arguments}); |
293 | |
294 | CPU_INST_TEST_CASE(TestGEMM_stkmem, |
295 | test_params {'n', 'n', 10, 4000, 2, 1.0, 0.0, 2, 4000, 4000, |
296 | fix_use_all_offsets}, |
297 | test_params {'n', 'n', 10, 5000, 2, 1.0, 0.0, 2, 5000, 5000, |
298 | fix_use_all_offsets}); |
299 | |
300 | INST_TEST_CASE(TestGEMM_general_cases_fix_offset, |
301 | test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, |
302 | test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, |
303 | test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, |
304 | test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, fix_use_oc}, |
305 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, |
306 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, |
307 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, |
308 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc}, |
309 | test_params { |
310 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_oc}, |
311 | test_params { |
312 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, |
313 | test_params { |
314 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, |
315 | test_params { |
316 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc}, |
317 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_use_oc}, |
318 | |
319 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
320 | fix_use_all_offsets}, |
321 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
322 | fix_use_all_offsets}, |
323 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
324 | fix_use_all_offsets}, |
325 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
326 | fix_use_all_offsets}, |
327 | test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, |
328 | fix_use_all_offsets}, |
329 | test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, |
330 | fix_use_all_offsets}, |
331 | test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
332 | fix_use_all_offsets}, |
333 | test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
334 | fix_use_all_offsets}, |
335 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, |
336 | fix_use_all_offsets}, |
337 | |
338 | test_params { |
339 | 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, |
340 | test_params { |
341 | 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, |
342 | test_params { |
343 | 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, |
344 | test_params { |
345 | 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets}, |
346 | test_params { |
347 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, |
348 | test_params { |
349 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, |
350 | test_params { |
351 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, |
352 | test_params { |
353 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets}, |
354 | test_params { |
355 | 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, fix_no_offsets}); |
356 | |
357 | INST_TEST_CASE(TestGEMM_general_cases_col_offset, |
358 | test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, |
359 | test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, |
360 | test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, |
361 | test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, col_use_oc}, |
362 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, |
363 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, |
364 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, |
365 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc}, |
366 | test_params { |
367 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_oc}, |
368 | test_params { |
369 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, |
370 | test_params { |
371 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, |
372 | test_params { |
373 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc}, |
374 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_use_oc}, |
375 | |
376 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
377 | col_use_all_offsets}, |
378 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
379 | col_use_all_offsets}, |
380 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
381 | col_use_all_offsets}, |
382 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
383 | col_use_all_offsets}, |
384 | test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, |
385 | col_use_all_offsets}, |
386 | test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, |
387 | col_use_all_offsets}, |
388 | test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
389 | col_use_all_offsets}, |
390 | test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
391 | col_use_all_offsets}, |
392 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, |
393 | col_use_all_offsets}, |
394 | |
395 | test_params { |
396 | 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, |
397 | test_params { |
398 | 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, |
399 | test_params { |
400 | 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, |
401 | test_params { |
402 | 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets}, |
403 | test_params { |
404 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_no_offsets}, |
405 | test_params { |
406 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, |
407 | test_params { |
408 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, |
409 | test_params { |
410 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets}, |
411 | test_params { |
412 | 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, col_no_offsets}); |
413 | |
414 | INST_TEST_CASE(TestGEMM_general_cases_row_offset, |
415 | test_params {'N', 'n', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, |
416 | test_params {'n', 'T', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, |
417 | test_params {'T', 'N', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, |
418 | test_params {'t', 't', 30, 20, 10, 1.0, 0.0, 60, 50, 80, row_use_oc}, |
419 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, |
420 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, |
421 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, |
422 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc}, |
423 | test_params { |
424 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_oc}, |
425 | test_params { |
426 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, |
427 | test_params { |
428 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, |
429 | test_params { |
430 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc}, |
431 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_use_oc}, |
432 | |
433 | test_params {'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
434 | row_use_all_offsets}, |
435 | test_params {'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
436 | row_use_all_offsets}, |
437 | test_params {'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
438 | row_use_all_offsets}, |
439 | test_params {'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, |
440 | row_use_all_offsets}, |
441 | test_params {'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, |
442 | row_use_all_offsets}, |
443 | test_params {'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, |
444 | row_use_all_offsets}, |
445 | test_params {'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
446 | row_use_all_offsets}, |
447 | test_params {'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, |
448 | row_use_all_offsets}, |
449 | test_params {'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, |
450 | row_use_all_offsets}, |
451 | |
452 | test_params { |
453 | 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, |
454 | test_params { |
455 | 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, |
456 | test_params { |
457 | 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, |
458 | test_params { |
459 | 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets}, |
460 | test_params { |
461 | 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_no_offsets}, |
462 | test_params { |
463 | 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, |
464 | test_params { |
465 | 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, |
466 | test_params { |
467 | 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets}, |
468 | test_params { |
469 | 'n', 'n', 2, 2, 10000, 1.0, 2.0, 10000, 2, 2, row_no_offsets}); |
470 | |
471 | CPU_INST_TEST_CASE(TestGEMM_fractional_scales_fix_offset, |
472 | /* alpha and beta have non-zero fractional part */ |
473 | test_params { |
474 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_oc}, |
475 | test_params { |
476 | 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_oc}, |
477 | test_params { |
478 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_oc}, |
479 | test_params { |
480 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_oc}, |
481 | test_params { |
482 | 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_oc}, |
483 | test_params { |
484 | 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_oc}, |
485 | test_params { |
486 | 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_oc}, |
487 | test_params { |
488 | 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_oc}, |
489 | test_params { |
490 | 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, fix_use_oc}, |
491 | |
492 | test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, |
493 | fix_use_all_offsets}, |
494 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
495 | fix_use_all_offsets}, |
496 | test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, |
497 | fix_use_all_offsets}, |
498 | test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, |
499 | fix_use_all_offsets}, |
500 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
501 | fix_use_all_offsets}, |
502 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
503 | fix_use_all_offsets}, |
504 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
505 | fix_use_all_offsets}, |
506 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
507 | fix_use_all_offsets}, |
508 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
509 | fix_use_all_offsets}, |
510 | |
511 | test_params { |
512 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_no_offsets}, |
513 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
514 | fix_no_offsets}, |
515 | test_params { |
516 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_no_offsets}, |
517 | test_params { |
518 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_no_offsets}, |
519 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
520 | fix_no_offsets}, |
521 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
522 | fix_no_offsets}, |
523 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
524 | fix_no_offsets}, |
525 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
526 | fix_no_offsets}, |
527 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
528 | fix_no_offsets}); |
529 | |
530 | CPU_INST_TEST_CASE(TestGEMM_fractional_scales_col_offset, |
531 | /* alpha and beta have non-zero fractional part */ |
532 | test_params { |
533 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_oc}, |
534 | test_params { |
535 | 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_oc}, |
536 | test_params { |
537 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_oc}, |
538 | test_params { |
539 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_oc}, |
540 | test_params { |
541 | 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_oc}, |
542 | test_params { |
543 | 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_oc}, |
544 | test_params { |
545 | 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_oc}, |
546 | test_params { |
547 | 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_oc}, |
548 | test_params { |
549 | 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, col_use_oc}, |
550 | |
551 | test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, |
552 | col_use_all_offsets}, |
553 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
554 | col_use_all_offsets}, |
555 | test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, |
556 | col_use_all_offsets}, |
557 | test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, |
558 | col_use_all_offsets}, |
559 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
560 | col_use_all_offsets}, |
561 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
562 | col_use_all_offsets}, |
563 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
564 | col_use_all_offsets}, |
565 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
566 | col_use_all_offsets}, |
567 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
568 | col_use_all_offsets}, |
569 | |
570 | test_params { |
571 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_no_offsets}, |
572 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
573 | col_no_offsets}, |
574 | test_params { |
575 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_no_offsets}, |
576 | test_params { |
577 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_no_offsets}, |
578 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
579 | col_no_offsets}, |
580 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
581 | col_no_offsets}, |
582 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
583 | col_no_offsets}, |
584 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
585 | col_no_offsets}, |
586 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
587 | col_no_offsets}); |
588 | |
589 | CPU_INST_TEST_CASE(TestGEMM_fractional_scales_row_offset, |
590 | /* alpha and beta have non-zero fractional part */ |
591 | test_params { |
592 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_oc}, |
593 | test_params { |
594 | 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_oc}, |
595 | test_params { |
596 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_oc}, |
597 | test_params { |
598 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_oc}, |
599 | test_params { |
600 | 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_oc}, |
601 | test_params { |
602 | 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_oc}, |
603 | test_params { |
604 | 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_oc}, |
605 | test_params { |
606 | 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_oc}, |
607 | test_params { |
608 | 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, row_use_oc}, |
609 | |
610 | test_params {'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, |
611 | row_use_all_offsets}, |
612 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
613 | row_use_all_offsets}, |
614 | test_params {'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, |
615 | row_use_all_offsets}, |
616 | test_params {'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, |
617 | row_use_all_offsets}, |
618 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
619 | row_use_all_offsets}, |
620 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
621 | row_use_all_offsets}, |
622 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
623 | row_use_all_offsets}, |
624 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
625 | row_use_all_offsets}, |
626 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
627 | row_use_all_offsets}, |
628 | |
629 | test_params { |
630 | 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_no_offsets}, |
631 | test_params {'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, |
632 | row_no_offsets}, |
633 | test_params { |
634 | 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_no_offsets}, |
635 | test_params { |
636 | 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_no_offsets}, |
637 | test_params {'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, |
638 | row_no_offsets}, |
639 | test_params {'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, |
640 | row_no_offsets}, |
641 | test_params {'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, |
642 | row_no_offsets}, |
643 | test_params {'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, |
644 | row_no_offsets}, |
645 | test_params {'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 10000, 2, 2, |
646 | row_no_offsets}); |
647 | |
648 | CPU_INST_TEST_CASE(TestGEMV, |
649 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 1, |
650 | fix_no_offsets}, |
651 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 3000, |
652 | fix_no_offsets}, |
653 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 1, |
654 | fix_no_offsets}, |
655 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 3000, |
656 | fix_no_offsets}, |
657 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1000, 1, |
658 | fix_no_offsets}, |
659 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 3000, |
660 | fix_no_offsets}, |
661 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 1, |
662 | fix_no_offsets}, |
663 | test_params {'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 3000, |
664 | fix_no_offsets}, |
665 | |
666 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1, |
667 | fix_no_offsets}, |
668 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000, |
669 | fix_no_offsets}, |
670 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1, |
671 | fix_no_offsets}, |
672 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000, |
673 | fix_no_offsets}, |
674 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1, |
675 | fix_no_offsets}, |
676 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000, |
677 | fix_no_offsets}, |
678 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1, |
679 | fix_no_offsets}, |
680 | test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000, |
681 | fix_no_offsets}, |
682 | |
683 | test_params {'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 1, |
684 | {'F', true, false, false}}, |
685 | test_params {'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 3000, |
686 | {'F', true, true, false}}, |
687 | test_params {'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 1, |
688 | {'F', false, true, false}}, |
689 | test_params {'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 3000, |
690 | {'F', true, false, true}}, |
691 | test_params {'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1000, 1, |
692 | {'F', false, true, true}}, |
693 | test_params {'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 3000, |
694 | {'F', true, true, false}}, |
695 | test_params {'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 1, |
696 | {'F', true, false, false}}, |
697 | test_params {'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 3000, |
698 | {'F', false, true, false}}); |
699 | |
700 | CPU_INST_TEST_CASE(TestGEMV_kblocking, |
701 | test_params { |
702 | 't', 'n', 20, 1, 7000, 1.0f, 0.0f, 20, 1, 500, fix_no_offsets}, |
703 | test_params {'t', 't', 50, 1, 7000, 1.0f, 0.0f, 50, 7000, 500, |
704 | fix_no_offsets}, |
705 | test_params {'t', 'n', 400, 1, 7000, 1.0f, 0.0f, 400, 1, 500, |
706 | fix_no_offsets}, |
707 | test_params {'t', 't', 500, 1, 7000, 1.0f, 0.0f, 500, 7000, 500, |
708 | fix_no_offsets}, |
709 | test_params { |
710 | 't', 'n', 20, 1, 7000, 1.0f, 1.0f, 20, 1, 500, fix_no_offsets}, |
711 | test_params {'t', 't', 50, 1, 7000, 1.0f, 1.0f, 50, 7000, 500, |
712 | fix_no_offsets}, |
713 | test_params {'t', 'n', 500, 1, 7000, 1.0f, 1.0f, 500, 1, 500, |
714 | fix_no_offsets}, |
715 | test_params {'t', 't', 500, 1, 7000, 1.0f, 1.0f, 500, 7000, 500, |
716 | fix_no_offsets}, |
717 | |
718 | test_params {'n', 'n', 1, 40, 7000, 1.0f, 0.0f, 7000, 40, 500, |
719 | fix_no_offsets}, |
720 | test_params {'t', 'n', 1, 10, 7000, 1.0f, 0.0f, 7000, 10, 10, |
721 | fix_no_offsets}, |
722 | test_params {'n', 'n', 1, 400, 7000, 1.0f, 0.0f, 7000, 400, 500, |
723 | fix_no_offsets}, |
724 | test_params {'t', 'n', 1, 100, 7000, 1.0f, 0.0f, 7000, 100, 500, |
725 | fix_no_offsets}, |
726 | test_params {'n', 'n', 1, 40, 7000, 1.0f, 1.0f, 7000, 40, 500, |
727 | fix_no_offsets}, |
728 | test_params {'t', 'n', 1, 10, 7000, 1.0f, 1.0f, 7000, 10, 500, |
729 | fix_no_offsets}, |
730 | test_params {'n', 'n', 1, 400, 7000, 1.0f, 1.0f, 7000, 400, 500, |
731 | fix_no_offsets}, |
732 | test_params {'t', 'n', 1, 550, 7000, 1.0f, 1.0f, 7000, 550, 550, |
733 | fix_no_offsets}); |
734 | |
735 | CPU_INST_TEST_CASE(TestGEMM_packed, |
736 | make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, |
737 | 60, 50, 80, fix_use_oc), |
738 | make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, |
739 | 60, 50, 80, fix_use_oc), |
740 | make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, |
741 | 60, 50, 80, fix_use_oc), |
742 | make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, |
743 | 60, 50, 80, fix_use_oc), |
744 | |
745 | make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, |
746 | 60, 50, 80, fix_no_offsets), |
747 | make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, |
748 | 60, 50, 80, fix_no_offsets), |
749 | make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, |
750 | 60, 50, 80, fix_no_offsets), |
751 | make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, |
752 | 60, 50, 80, fix_no_offsets), |
753 | |
754 | make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, |
755 | 100, 100, 100, fix_use_oc), |
756 | make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, |
757 | 100, 100, 100, fix_use_oc), |
758 | make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, |
759 | 100, 100, 100, fix_use_oc), |
760 | make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, |
761 | 100, 100, 100, fix_use_oc), |
762 | make_test_params_pack({true, false}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, |
763 | 10000, 2, 2, fix_use_oc), |
764 | |
765 | make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, |
766 | 100, 100, 100, row_use_oc), |
767 | make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, |
768 | 100, 100, 100, row_use_oc), |
769 | make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, |
770 | 100, 100, 100, row_use_oc), |
771 | make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, |
772 | 100, 100, 100, row_use_oc), |
773 | |
774 | make_test_params_pack({false, true}, 'n', 'n', 100, 100, 2, 1.0f, 2.0f, |
775 | 100, 100, 100, row_no_offsets), |
776 | make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f, |
777 | 100, 100, 100, row_no_offsets), |
778 | make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f, |
779 | 100, 100, 100, row_no_offsets), |
780 | make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f, |
781 | 100, 100, 100, row_no_offsets), |
782 | |
783 | make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, |
784 | 60, 50, 80, row_use_oc), |
785 | make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, |
786 | 60, 50, 80, row_use_oc), |
787 | make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, |
788 | 60, 50, 80, row_use_oc), |
789 | make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, |
790 | 60, 50, 80, row_use_oc), |
791 | make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, |
792 | 10000, 2, 2, row_use_oc), |
793 | |
794 | make_test_params_pack({true, false}, 'n', 't', 100, 2, 100, 1.0f, 2.0f, |
795 | 100, 100, 100, col_use_oc), |
796 | make_test_params_pack({true, true}, 't', 'n', 2, 100, 100, 1.0f, 2.0f, |
797 | 100, 100, 100, col_use_oc), |
798 | make_test_params_pack({false, true}, 't', 't', 2, 100, 100, 1.0f, 2.0f, |
799 | 100, 100, 100, col_use_oc), |
800 | make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, |
801 | 10000, 2, 2, col_use_oc), |
802 | |
803 | make_test_params_pack({true, false}, 'n', 't', 100, 1, 100, 1.0f, 2.0f, |
804 | 100, 100, 100, col_no_offsets), |
805 | make_test_params_pack({true, true}, 't', 'n', 1, 100, 100, 1.0f, 2.0f, |
806 | 100, 100, 100, col_no_offsets), |
807 | make_test_params_pack({false, true}, 't', 't', 1, 100, 100, 1.0f, 2.0f, |
808 | 100, 100, 100, col_no_offsets), |
809 | make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, |
810 | 10000, 2, 2, col_no_offsets), |
811 | |
812 | make_test_params_pack({false, true}, 'N', 'n', 30, 20, 10, 1.0f, 1.0f, |
813 | 60, 50, 80, col_use_oc), |
814 | make_test_params_pack({true, false}, 'n', 'T', 30, 20, 10, 1.0f, 1.0f, |
815 | 60, 50, 80, col_use_oc), |
816 | make_test_params_pack({true, true}, 'T', 'N', 30, 20, 10, 1.0f, 1.0f, |
817 | 60, 50, 80, col_use_oc), |
818 | make_test_params_pack({false, true}, 't', 't', 30, 20, 10, 1.0f, 1.0f, |
819 | 60, 50, 80, col_use_oc), |
820 | make_test_params_pack({true, true}, 'n', 'n', 2, 2, 10000, 1.0f, 2.0f, |
821 | 10000, 2, 2, col_use_oc), |
822 | |
823 | make_test_params_pack({false, true}, 'N', 'n', 200, 1, 200, 1.0f, 1.0f, |
824 | 200, 200, 200, fix_no_offsets), |
825 | make_test_params_pack({true, false}, 't', 'N', 200, 1, 200, 1.0f, 0.0f, |
826 | 200, 200, 200, fix_no_offsets), |
827 | make_test_params_pack({true, true}, 'T', 'N', 1, 200, 200, 1.0f, 1.0f, |
828 | 1, 200, 200, fix_no_offsets), |
829 | make_test_params_pack({false, true}, 'n', 'T', 1, 200, 200, 1.0f, 0.0f, |
830 | 200, 200, 200, fix_no_offsets)); |
831 | |
832 | CPU_INST_TEST_CASE(TestGEMM_heavy, |
833 | test_params {'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, |
834 | fix_use_oc}, |
835 | test_params {'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, |
836 | fix_use_oc}, |
837 | test_params {'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, |
838 | fix_use_oc}, |
839 | test_params {'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, |
840 | fix_use_oc}); |
841 | |
842 | CPU_INST_TEST_CASE(TestGEMM_packed_heavy, |
843 | make_test_params_pack({false, true}, 'n', 'n', 3000, 3000, 3000, 1.0f, |
844 | 0.0f, 3000, 3000, 3000, fix_use_oc), |
845 | make_test_params_pack({true, false}, 't', 'n', 3000, 3000, 3000, 1.0f, |
846 | 0.0f, 3000, 3000, 3000, fix_use_oc), |
847 | make_test_params_pack({true, true}, 'n', 't', 3000, 3000, 3000, 1.0f, |
848 | 0.0f, 3000, 3000, 3000, row_use_oc), |
849 | make_test_params_pack({true, true}, 't', 't', 3000, 3000, 3000, 1.0f, |
850 | 0.0f, 3000, 3000, 3000, row_use_oc), |
851 | |
852 | make_test_params_pack({true, true}, 'n', 'n', 2000, 5000, 2000, 1.0f, |
853 | 1.35f, 2000, 5000, 5000, col_use_oc), |
854 | make_test_params_pack({false, true}, 't', 'n', 2000, 5000, 2000, 1.0f, |
855 | 1.77f, 2000, 5000, 5000, col_use_oc), |
856 | |
857 | make_test_params_pack({false, true}, 'n', 'n', 200, 20000, 2000, 1.0f, |
858 | 2.0f, 2000, 20000, 20000, fix_use_oc), |
859 | make_test_params_pack({true, true}, 'n', 'n', 200, 20000, 2000, 1.0f, |
860 | 2.0f, 2000, 20000, 20000, row_use_oc), |
861 | make_test_params_pack({true, false}, 'n', 'n', 200, 20000, 2000, 1.0f, |
862 | 2.0f, 2000, 20000, 20000, col_use_oc), |
863 | |
864 | make_test_params_pack({true, true}, 'n', 'n', 5000, 100, 2000, 1.0f, |
865 | 2.0f, 2000, 100, 100, row_use_oc), |
866 | make_test_params_pack({false, true}, 't', 'n', 5000, 100, 2000, 1.0f, |
867 | 2.0f, 5000, 100, 100, col_use_oc), |
868 | |
869 | make_test_params_pack({true, false}, 'n', 'n', 150, 150, 8000, 1.0f, |
870 | 1.7f, 8000, 150, 150, fix_use_oc), |
871 | make_test_params_pack({true, true}, 'n', 't', 200, 200, 8000, 1.0f, |
872 | 3.0f, 8000, 8000, 200, row_use_oc), |
873 | make_test_params_pack({false, true}, 't', 'n', 200, 300, 8000, 1.0f, |
874 | 0.0f, 200, 300, 300, col_use_oc)); |
875 | |
876 | #endif |
877 | |