1 | // Copyright 2022 Google LLC |
2 | // |
3 | // This source code is licensed under the BSD-style license found in the |
4 | // LICENSE file in the root directory of this source tree. |
5 | |
6 | #include "convolution-test-helpers.h" |
7 | |
8 | #include <algorithm> |
9 | #include <cstdint> |
10 | #include <cstddef> |
11 | #include <vector> |
12 | |
13 | namespace xnnpack{ |
14 | |
15 | void compute_convolution_qs8_reference_results( |
16 | size_t batch_size, |
17 | size_t output_height, |
18 | size_t output_width, |
19 | size_t input_height, |
20 | size_t input_width, |
21 | size_t input_padding_top, |
22 | size_t input_padding_right, |
23 | size_t input_padding_bottom, |
24 | size_t input_padding_left, |
25 | size_t kernel_height, |
26 | size_t kernel_width, |
27 | size_t subsampling_height, |
28 | size_t subsampling_width, |
29 | size_t dilation_height, |
30 | size_t dilation_width, |
31 | size_t groups, |
32 | size_t group_input_channels, |
33 | size_t group_output_channels, |
34 | size_t input_channel_stride, |
35 | int8_t input_zero_point, |
36 | const std::vector<int8_t>& input, |
37 | const std::vector<int8_t>& filter, |
38 | std::vector<int32_t>& accumulators, |
39 | bool has_bias, |
40 | const std::vector<int32_t>& bias) |
41 | { |
42 | if (!has_bias) { |
43 | std::fill(accumulators.begin(), accumulators.end(), 0); |
44 | } |
45 | |
46 | for (size_t i = 0; i < batch_size; i++) { |
47 | for (size_t oy = 0; oy < output_height; oy++) { |
48 | for (size_t ox = 0; ox < output_width; ox++) { |
49 | // Initialize Bias |
50 | if (has_bias) { |
51 | for (size_t g = 0; g < groups; g++) { |
52 | for (size_t oc = 0; oc < group_output_channels; oc++) { |
53 | accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] = |
54 | bias[g * group_output_channels + oc]; |
55 | } |
56 | } |
57 | } |
58 | // Compute reference results. |
59 | for (size_t ky = 0; ky < kernel_height; ky++) { |
60 | const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top; |
61 | if (iy < input_height) { |
62 | for (size_t kx = 0; kx < kernel_width; kx++) { |
63 | const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left; |
64 | if (ix < input_width) { |
65 | for (size_t g = 0; g < groups; g++) { |
66 | for (size_t oc = 0; oc < group_output_channels; oc++) { |
67 | for (size_t ic = 0; ic < group_input_channels; ic++) { |
68 | accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] += |
69 | (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + |
70 | g * group_input_channels + ic]) - |
71 | int32_t(input_zero_point)) * |
72 | int32_t(filter[(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) * group_input_channels + ic]); |
73 | } |
74 | } |
75 | } |
76 | } |
77 | } |
78 | } |
79 | } |
80 | } |
81 | } |
82 | } |
83 | } |
84 | |
85 | void compute_convolution_qs8_reference_results( |
86 | size_t batch_size, |
87 | size_t output_height, |
88 | size_t output_width, |
89 | size_t input_height, |
90 | size_t input_width, |
91 | size_t input_padding_top, |
92 | size_t input_padding_right, |
93 | size_t input_padding_bottom, |
94 | size_t input_padding_left, |
95 | size_t kernel_height, |
96 | size_t kernel_width, |
97 | size_t subsampling_height, |
98 | size_t subsampling_width, |
99 | size_t dilation_height, |
100 | size_t dilation_width, |
101 | size_t groups, |
102 | size_t group_input_channels, |
103 | size_t group_output_channels, |
104 | int8_t input_zero_point, |
105 | const std::vector<int8_t>& input, |
106 | const std::vector<int8_t>& filter, |
107 | std::vector<int32_t>& accumulators, |
108 | bool has_bias, |
109 | const std::vector<int32_t>& bias) |
110 | { |
111 | compute_convolution_qs8_reference_results( |
112 | batch_size, |
113 | output_height, |
114 | output_width, |
115 | input_height, |
116 | input_width, |
117 | input_padding_top, |
118 | input_padding_right, |
119 | input_padding_bottom, |
120 | input_padding_left, |
121 | kernel_height, |
122 | kernel_width, |
123 | subsampling_height, |
124 | subsampling_width, |
125 | dilation_height, |
126 | dilation_width, |
127 | groups, |
128 | group_input_channels, |
129 | group_output_channels, |
130 | groups * group_input_channels, |
131 | input_zero_point, |
132 | input, |
133 | filter, |
134 | accumulators, |
135 | has_bias, |
136 | bias); |
137 | } |
138 | |
139 | void compute_convolution_qu8_reference_results( |
140 | size_t batch_size, |
141 | size_t output_height, |
142 | size_t output_width, |
143 | size_t input_height, |
144 | size_t input_width, |
145 | size_t input_padding_top, |
146 | size_t input_padding_right, |
147 | size_t input_padding_bottom, |
148 | size_t input_padding_left, |
149 | size_t kernel_height, |
150 | size_t kernel_width, |
151 | size_t subsampling_height, |
152 | size_t subsampling_width, |
153 | size_t dilation_height, |
154 | size_t dilation_width, |
155 | size_t groups, |
156 | size_t group_input_channels, |
157 | size_t group_output_channels, |
158 | size_t input_channel_stride, |
159 | uint8_t input_zero_point, |
160 | uint8_t kernel_zero_point, |
161 | const std::vector<uint8_t>& input, |
162 | const std::vector<uint8_t>& filter, |
163 | std::vector<int32_t>& accumulators, |
164 | bool has_bias, |
165 | const std::vector<int32_t>& bias) |
166 | { |
167 | if (!has_bias) { |
168 | std::fill(accumulators.begin(), accumulators.end(), 0); |
169 | } |
170 | |
171 | for (size_t i = 0; i < batch_size; i++) { |
172 | for (size_t oy = 0; oy < output_height; oy++) { |
173 | for (size_t ox = 0; ox < output_width; ox++) { |
174 | // Initialize Bias |
175 | if (has_bias) { |
176 | for (size_t g = 0; g < groups; g++) { |
177 | for (size_t oc = 0; oc < group_output_channels; oc++) { |
178 | accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] = |
179 | bias[g * group_output_channels + oc]; |
180 | } |
181 | } |
182 | } |
183 | // Compute reference results. |
184 | for (size_t ky = 0; ky < kernel_height; ky++) { |
185 | const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top; |
186 | if (iy < input_height) { |
187 | for (size_t kx = 0; kx < kernel_width; kx++) { |
188 | const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left; |
189 | if (ix < input_width) { |
190 | for (size_t g = 0; g < groups; g++) { |
191 | for (size_t oc = 0; oc < group_output_channels; oc++) { |
192 | for (size_t ic = 0; ic < group_input_channels; ic++) { |
193 | accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] += |
194 | (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g * group_input_channels + ic]) - |
195 | int32_t(input_zero_point)) * |
196 | (int32_t(filter[(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) * group_input_channels + ic]) - int32_t(kernel_zero_point)); |
197 | } |
198 | } |
199 | } |
200 | } |
201 | } |
202 | } |
203 | } |
204 | } |
205 | } |
206 | } |
207 | } |
208 | |
209 | void compute_convolution_qu8_reference_results( |
210 | size_t batch_size, |
211 | size_t output_height, |
212 | size_t output_width, |
213 | size_t input_height, |
214 | size_t input_width, |
215 | size_t input_padding_top, |
216 | size_t input_padding_right, |
217 | size_t input_padding_bottom, |
218 | size_t input_padding_left, |
219 | size_t kernel_height, |
220 | size_t kernel_width, |
221 | size_t subsampling_height, |
222 | size_t subsampling_width, |
223 | size_t dilation_height, |
224 | size_t dilation_width, |
225 | size_t groups, |
226 | size_t group_input_channels, |
227 | size_t group_output_channels, |
228 | uint8_t input_zero_point, |
229 | uint8_t kernel_zero_point, |
230 | const std::vector<uint8_t>& input, |
231 | const std::vector<uint8_t>& filter, |
232 | std::vector<int32_t>& accumulators, |
233 | bool has_bias, |
234 | const std::vector<int32_t>& bias) |
235 | { |
236 | compute_convolution_qu8_reference_results( |
237 | batch_size, |
238 | output_height, |
239 | output_width, |
240 | input_height, |
241 | input_width, |
242 | input_padding_top, |
243 | input_padding_right, |
244 | input_padding_bottom, |
245 | input_padding_left, |
246 | kernel_height, |
247 | kernel_width, |
248 | subsampling_height, |
249 | subsampling_width, |
250 | dilation_height, |
251 | dilation_width, |
252 | groups, |
253 | group_input_channels, |
254 | group_output_channels, |
255 | groups * group_input_channels, |
256 | input_zero_point, |
257 | kernel_zero_point, |
258 | input, |
259 | filter, |
260 | accumulators, |
261 | has_bias, |
262 | bias); |
263 | } |
264 | |
265 | void compute_depthwise_convolution_qs8_reference_results( |
266 | size_t batch_size, |
267 | size_t output_height, |
268 | size_t output_width, |
269 | size_t input_height, |
270 | size_t input_width, |
271 | size_t input_padding_top, |
272 | size_t input_padding_right, |
273 | size_t input_padding_bottom, |
274 | size_t input_padding_left, |
275 | size_t kernel_height, |
276 | size_t kernel_width, |
277 | size_t subsampling_height, |
278 | size_t subsampling_width, |
279 | size_t dilation_height, |
280 | size_t dilation_width, |
281 | size_t input_channels, |
282 | size_t depth_multiplier, |
283 | size_t input_channel_stride, |
284 | int8_t input_zero_point, |
285 | const std::vector<int8_t>& input, |
286 | const std::vector<int8_t>& filter, |
287 | std::vector<int32_t>& accumulators, |
288 | bool has_bias, |
289 | const std::vector<int32_t>& bias) |
290 | { |
291 | if (!has_bias) { |
292 | std::fill(accumulators.begin(), accumulators.end(), 0); |
293 | } |
294 | |
295 | for (size_t i = 0; i < batch_size; i++) { |
296 | for (size_t oy = 0; oy < output_height; oy++) { |
297 | for (size_t ox = 0; ox < output_width; ox++) { |
298 | // Initialize Bias |
299 | if (has_bias) { |
300 | for (size_t g = 0; g < input_channels; g++) { |
301 | for (size_t oc = 0; oc < depth_multiplier; oc++) { |
302 | accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] = |
303 | bias[g * depth_multiplier + oc]; |
304 | } |
305 | } |
306 | } |
307 | // Compute reference results. |
308 | for (size_t ky = 0; ky < kernel_height; ky++) { |
309 | const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top; |
310 | if (iy < input_height) { |
311 | for (size_t kx = 0; kx < kernel_width; kx++) { |
312 | const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left; |
313 | if (ix < input_width) { |
314 | for (size_t g = 0; g < input_channels; g++) { |
315 | for (size_t oc = 0; oc < depth_multiplier; oc++) { |
316 | accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] += |
317 | (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g]) - int32_t(input_zero_point)) * |
318 | int32_t(filter[((ky * kernel_width + kx) * input_channels + g) * depth_multiplier + oc]); |
319 | } |
320 | } |
321 | } |
322 | } |
323 | } |
324 | } |
325 | } |
326 | } |
327 | } |
328 | } |
329 | |
330 | void compute_depthwise_convolution_qs8_reference_results( |
331 | size_t batch_size, |
332 | size_t output_height, |
333 | size_t output_width, |
334 | size_t input_height, |
335 | size_t input_width, |
336 | size_t input_padding_top, |
337 | size_t input_padding_right, |
338 | size_t input_padding_bottom, |
339 | size_t input_padding_left, |
340 | size_t kernel_height, |
341 | size_t kernel_width, |
342 | size_t subsampling_height, |
343 | size_t subsampling_width, |
344 | size_t dilation_height, |
345 | size_t dilation_width, |
346 | size_t input_channels, |
347 | size_t depth_multiplier, |
348 | int8_t input_zero_point, |
349 | const std::vector<int8_t>& input, |
350 | const std::vector<int8_t>& filter, |
351 | std::vector<int32_t>& accumulators, |
352 | bool has_bias, |
353 | const std::vector<int32_t>& bias) |
354 | { |
355 | compute_depthwise_convolution_qs8_reference_results( |
356 | batch_size, |
357 | output_height, |
358 | output_width, |
359 | input_height, |
360 | input_width, |
361 | input_padding_top, |
362 | input_padding_right, |
363 | input_padding_bottom, |
364 | input_padding_left, |
365 | kernel_height, |
366 | kernel_width, |
367 | subsampling_height, |
368 | subsampling_width, |
369 | dilation_height, |
370 | dilation_width, |
371 | input_channels, |
372 | depth_multiplier, |
373 | input_channels, |
374 | input_zero_point, |
375 | input, |
376 | filter, |
377 | accumulators, |
378 | has_bias, |
379 | bias); |
380 | } |
381 | |
382 | void compute_depthwise_convolution_qu8_reference_results( |
383 | size_t batch_size, |
384 | size_t output_height, |
385 | size_t output_width, |
386 | size_t input_height, |
387 | size_t input_width, |
388 | size_t input_padding_top, |
389 | size_t input_padding_right, |
390 | size_t input_padding_bottom, |
391 | size_t input_padding_left, |
392 | size_t kernel_height, |
393 | size_t kernel_width, |
394 | size_t subsampling_height, |
395 | size_t subsampling_width, |
396 | size_t dilation_height, |
397 | size_t dilation_width, |
398 | size_t input_channels, |
399 | size_t depth_multiplier, |
400 | size_t input_channel_stride, |
401 | uint8_t input_zero_point, |
402 | uint8_t kernel_zero_point, |
403 | const std::vector<uint8_t>& input, |
404 | const std::vector<uint8_t>& filter, |
405 | std::vector<int32_t>& accumulators, |
406 | bool has_bias, |
407 | const std::vector<int32_t>& bias) |
408 | { |
409 | if (!has_bias) { |
410 | std::fill(accumulators.begin(), accumulators.end(), 0); |
411 | } |
412 | |
413 | for (size_t i = 0; i < batch_size; i++) { |
414 | for (size_t oy = 0; oy < output_height; oy++) { |
415 | for (size_t ox = 0; ox < output_width; ox++) { |
416 | // Initialize Bias |
417 | if (has_bias) { |
418 | for (size_t g = 0; g < input_channels; g++) { |
419 | for (size_t oc = 0; oc < depth_multiplier; oc++) { |
420 | accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] = |
421 | bias[g * depth_multiplier + oc]; |
422 | } |
423 | } |
424 | } |
425 | // Compute reference results. |
426 | for (size_t ky = 0; ky < kernel_height; ky++) { |
427 | const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top; |
428 | if (iy < input_height) { |
429 | for (size_t kx = 0; kx < kernel_width; kx++) { |
430 | const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left; |
431 | if (ix < input_width) { |
432 | for (size_t g = 0; g < input_channels; g++) { |
433 | for (size_t oc = 0; oc < depth_multiplier; oc++) { |
434 | accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] += |
435 | (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g]) - int32_t(input_zero_point)) * |
436 | (int32_t(filter[((ky * kernel_width + kx) * input_channels + g) * depth_multiplier + oc]) - int32_t(kernel_zero_point)); |
437 | } |
438 | } |
439 | } |
440 | } |
441 | } |
442 | } |
443 | } |
444 | } |
445 | } |
446 | } |
447 | |
448 | void compute_depthwise_convolution_qu8_reference_results( |
449 | size_t batch_size, |
450 | size_t output_height, |
451 | size_t output_width, |
452 | size_t input_height, |
453 | size_t input_width, |
454 | size_t input_padding_top, |
455 | size_t input_padding_right, |
456 | size_t input_padding_bottom, |
457 | size_t input_padding_left, |
458 | size_t kernel_height, |
459 | size_t kernel_width, |
460 | size_t subsampling_height, |
461 | size_t subsampling_width, |
462 | size_t dilation_height, |
463 | size_t dilation_width, |
464 | size_t input_channels, |
465 | size_t depth_multiplier, |
466 | uint8_t input_zero_point, |
467 | uint8_t kernel_zero_point, |
468 | const std::vector<uint8_t>& input, |
469 | const std::vector<uint8_t>& filter, |
470 | std::vector<int32_t>& accumulators, |
471 | bool has_bias, |
472 | const std::vector<int32_t>& bias) |
473 | { |
474 | compute_depthwise_convolution_qu8_reference_results( |
475 | batch_size, |
476 | output_height, |
477 | output_width, |
478 | input_height, |
479 | input_width, |
480 | input_padding_top, |
481 | input_padding_right, |
482 | input_padding_bottom, |
483 | input_padding_left, |
484 | kernel_height, |
485 | kernel_width, |
486 | subsampling_height, |
487 | subsampling_width, |
488 | dilation_height, |
489 | dilation_width, |
490 | input_channels, |
491 | depth_multiplier, |
492 | input_channels, |
493 | input_zero_point, |
494 | kernel_zero_point, |
495 | input, |
496 | filter, |
497 | accumulators, |
498 | has_bias, |
499 | bias); |
500 | } |
501 | } |
502 | |