1// Copyright 2022 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include "convolution-test-helpers.h"
7
8#include <algorithm>
9#include <cstdint>
10#include <cstddef>
11#include <vector>
12
13namespace xnnpack{
14
15void compute_convolution_qs8_reference_results(
16 size_t batch_size,
17 size_t output_height,
18 size_t output_width,
19 size_t input_height,
20 size_t input_width,
21 size_t input_padding_top,
22 size_t input_padding_right,
23 size_t input_padding_bottom,
24 size_t input_padding_left,
25 size_t kernel_height,
26 size_t kernel_width,
27 size_t subsampling_height,
28 size_t subsampling_width,
29 size_t dilation_height,
30 size_t dilation_width,
31 size_t groups,
32 size_t group_input_channels,
33 size_t group_output_channels,
34 size_t input_channel_stride,
35 int8_t input_zero_point,
36 const std::vector<int8_t>& input,
37 const std::vector<int8_t>& filter,
38 std::vector<int32_t>& accumulators,
39 bool has_bias,
40 const std::vector<int32_t>& bias)
41{
42 if (!has_bias) {
43 std::fill(accumulators.begin(), accumulators.end(), 0);
44 }
45
46 for (size_t i = 0; i < batch_size; i++) {
47 for (size_t oy = 0; oy < output_height; oy++) {
48 for (size_t ox = 0; ox < output_width; ox++) {
49 // Initialize Bias
50 if (has_bias) {
51 for (size_t g = 0; g < groups; g++) {
52 for (size_t oc = 0; oc < group_output_channels; oc++) {
53 accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
54 bias[g * group_output_channels + oc];
55 }
56 }
57 }
58 // Compute reference results.
59 for (size_t ky = 0; ky < kernel_height; ky++) {
60 const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top;
61 if (iy < input_height) {
62 for (size_t kx = 0; kx < kernel_width; kx++) {
63 const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left;
64 if (ix < input_width) {
65 for (size_t g = 0; g < groups; g++) {
66 for (size_t oc = 0; oc < group_output_channels; oc++) {
67 for (size_t ic = 0; ic < group_input_channels; ic++) {
68 accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
69 (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride +
70 g * group_input_channels + ic]) -
71 int32_t(input_zero_point)) *
72 int32_t(filter[(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) * group_input_channels + ic]);
73 }
74 }
75 }
76 }
77 }
78 }
79 }
80 }
81 }
82 }
83}
84
85void compute_convolution_qs8_reference_results(
86 size_t batch_size,
87 size_t output_height,
88 size_t output_width,
89 size_t input_height,
90 size_t input_width,
91 size_t input_padding_top,
92 size_t input_padding_right,
93 size_t input_padding_bottom,
94 size_t input_padding_left,
95 size_t kernel_height,
96 size_t kernel_width,
97 size_t subsampling_height,
98 size_t subsampling_width,
99 size_t dilation_height,
100 size_t dilation_width,
101 size_t groups,
102 size_t group_input_channels,
103 size_t group_output_channels,
104 int8_t input_zero_point,
105 const std::vector<int8_t>& input,
106 const std::vector<int8_t>& filter,
107 std::vector<int32_t>& accumulators,
108 bool has_bias,
109 const std::vector<int32_t>& bias)
110{
111 compute_convolution_qs8_reference_results(
112 batch_size,
113 output_height,
114 output_width,
115 input_height,
116 input_width,
117 input_padding_top,
118 input_padding_right,
119 input_padding_bottom,
120 input_padding_left,
121 kernel_height,
122 kernel_width,
123 subsampling_height,
124 subsampling_width,
125 dilation_height,
126 dilation_width,
127 groups,
128 group_input_channels,
129 group_output_channels,
130 groups * group_input_channels,
131 input_zero_point,
132 input,
133 filter,
134 accumulators,
135 has_bias,
136 bias);
137}
138
139void compute_convolution_qu8_reference_results(
140 size_t batch_size,
141 size_t output_height,
142 size_t output_width,
143 size_t input_height,
144 size_t input_width,
145 size_t input_padding_top,
146 size_t input_padding_right,
147 size_t input_padding_bottom,
148 size_t input_padding_left,
149 size_t kernel_height,
150 size_t kernel_width,
151 size_t subsampling_height,
152 size_t subsampling_width,
153 size_t dilation_height,
154 size_t dilation_width,
155 size_t groups,
156 size_t group_input_channels,
157 size_t group_output_channels,
158 size_t input_channel_stride,
159 uint8_t input_zero_point,
160 uint8_t kernel_zero_point,
161 const std::vector<uint8_t>& input,
162 const std::vector<uint8_t>& filter,
163 std::vector<int32_t>& accumulators,
164 bool has_bias,
165 const std::vector<int32_t>& bias)
166{
167 if (!has_bias) {
168 std::fill(accumulators.begin(), accumulators.end(), 0);
169 }
170
171 for (size_t i = 0; i < batch_size; i++) {
172 for (size_t oy = 0; oy < output_height; oy++) {
173 for (size_t ox = 0; ox < output_width; ox++) {
174 // Initialize Bias
175 if (has_bias) {
176 for (size_t g = 0; g < groups; g++) {
177 for (size_t oc = 0; oc < group_output_channels; oc++) {
178 accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] =
179 bias[g * group_output_channels + oc];
180 }
181 }
182 }
183 // Compute reference results.
184 for (size_t ky = 0; ky < kernel_height; ky++) {
185 const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top;
186 if (iy < input_height) {
187 for (size_t kx = 0; kx < kernel_width; kx++) {
188 const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left;
189 if (ix < input_width) {
190 for (size_t g = 0; g < groups; g++) {
191 for (size_t oc = 0; oc < group_output_channels; oc++) {
192 for (size_t ic = 0; ic < group_input_channels; ic++) {
193 accumulators[(((i * output_height + oy) * output_width + ox) * groups + g) * group_output_channels + oc] +=
194 (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g * group_input_channels + ic]) -
195 int32_t(input_zero_point)) *
196 (int32_t(filter[(((g * group_output_channels + oc) * kernel_height + ky) * kernel_width + kx) * group_input_channels + ic]) - int32_t(kernel_zero_point));
197 }
198 }
199 }
200 }
201 }
202 }
203 }
204 }
205 }
206 }
207}
208
209void compute_convolution_qu8_reference_results(
210 size_t batch_size,
211 size_t output_height,
212 size_t output_width,
213 size_t input_height,
214 size_t input_width,
215 size_t input_padding_top,
216 size_t input_padding_right,
217 size_t input_padding_bottom,
218 size_t input_padding_left,
219 size_t kernel_height,
220 size_t kernel_width,
221 size_t subsampling_height,
222 size_t subsampling_width,
223 size_t dilation_height,
224 size_t dilation_width,
225 size_t groups,
226 size_t group_input_channels,
227 size_t group_output_channels,
228 uint8_t input_zero_point,
229 uint8_t kernel_zero_point,
230 const std::vector<uint8_t>& input,
231 const std::vector<uint8_t>& filter,
232 std::vector<int32_t>& accumulators,
233 bool has_bias,
234 const std::vector<int32_t>& bias)
235{
236 compute_convolution_qu8_reference_results(
237 batch_size,
238 output_height,
239 output_width,
240 input_height,
241 input_width,
242 input_padding_top,
243 input_padding_right,
244 input_padding_bottom,
245 input_padding_left,
246 kernel_height,
247 kernel_width,
248 subsampling_height,
249 subsampling_width,
250 dilation_height,
251 dilation_width,
252 groups,
253 group_input_channels,
254 group_output_channels,
255 groups * group_input_channels,
256 input_zero_point,
257 kernel_zero_point,
258 input,
259 filter,
260 accumulators,
261 has_bias,
262 bias);
263}
264
265void compute_depthwise_convolution_qs8_reference_results(
266 size_t batch_size,
267 size_t output_height,
268 size_t output_width,
269 size_t input_height,
270 size_t input_width,
271 size_t input_padding_top,
272 size_t input_padding_right,
273 size_t input_padding_bottom,
274 size_t input_padding_left,
275 size_t kernel_height,
276 size_t kernel_width,
277 size_t subsampling_height,
278 size_t subsampling_width,
279 size_t dilation_height,
280 size_t dilation_width,
281 size_t input_channels,
282 size_t depth_multiplier,
283 size_t input_channel_stride,
284 int8_t input_zero_point,
285 const std::vector<int8_t>& input,
286 const std::vector<int8_t>& filter,
287 std::vector<int32_t>& accumulators,
288 bool has_bias,
289 const std::vector<int32_t>& bias)
290{
291 if (!has_bias) {
292 std::fill(accumulators.begin(), accumulators.end(), 0);
293 }
294
295 for (size_t i = 0; i < batch_size; i++) {
296 for (size_t oy = 0; oy < output_height; oy++) {
297 for (size_t ox = 0; ox < output_width; ox++) {
298 // Initialize Bias
299 if (has_bias) {
300 for (size_t g = 0; g < input_channels; g++) {
301 for (size_t oc = 0; oc < depth_multiplier; oc++) {
302 accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] =
303 bias[g * depth_multiplier + oc];
304 }
305 }
306 }
307 // Compute reference results.
308 for (size_t ky = 0; ky < kernel_height; ky++) {
309 const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top;
310 if (iy < input_height) {
311 for (size_t kx = 0; kx < kernel_width; kx++) {
312 const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left;
313 if (ix < input_width) {
314 for (size_t g = 0; g < input_channels; g++) {
315 for (size_t oc = 0; oc < depth_multiplier; oc++) {
316 accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] +=
317 (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g]) - int32_t(input_zero_point)) *
318 int32_t(filter[((ky * kernel_width + kx) * input_channels + g) * depth_multiplier + oc]);
319 }
320 }
321 }
322 }
323 }
324 }
325 }
326 }
327 }
328}
329
330void compute_depthwise_convolution_qs8_reference_results(
331 size_t batch_size,
332 size_t output_height,
333 size_t output_width,
334 size_t input_height,
335 size_t input_width,
336 size_t input_padding_top,
337 size_t input_padding_right,
338 size_t input_padding_bottom,
339 size_t input_padding_left,
340 size_t kernel_height,
341 size_t kernel_width,
342 size_t subsampling_height,
343 size_t subsampling_width,
344 size_t dilation_height,
345 size_t dilation_width,
346 size_t input_channels,
347 size_t depth_multiplier,
348 int8_t input_zero_point,
349 const std::vector<int8_t>& input,
350 const std::vector<int8_t>& filter,
351 std::vector<int32_t>& accumulators,
352 bool has_bias,
353 const std::vector<int32_t>& bias)
354{
355 compute_depthwise_convolution_qs8_reference_results(
356 batch_size,
357 output_height,
358 output_width,
359 input_height,
360 input_width,
361 input_padding_top,
362 input_padding_right,
363 input_padding_bottom,
364 input_padding_left,
365 kernel_height,
366 kernel_width,
367 subsampling_height,
368 subsampling_width,
369 dilation_height,
370 dilation_width,
371 input_channels,
372 depth_multiplier,
373 input_channels,
374 input_zero_point,
375 input,
376 filter,
377 accumulators,
378 has_bias,
379 bias);
380}
381
382void compute_depthwise_convolution_qu8_reference_results(
383 size_t batch_size,
384 size_t output_height,
385 size_t output_width,
386 size_t input_height,
387 size_t input_width,
388 size_t input_padding_top,
389 size_t input_padding_right,
390 size_t input_padding_bottom,
391 size_t input_padding_left,
392 size_t kernel_height,
393 size_t kernel_width,
394 size_t subsampling_height,
395 size_t subsampling_width,
396 size_t dilation_height,
397 size_t dilation_width,
398 size_t input_channels,
399 size_t depth_multiplier,
400 size_t input_channel_stride,
401 uint8_t input_zero_point,
402 uint8_t kernel_zero_point,
403 const std::vector<uint8_t>& input,
404 const std::vector<uint8_t>& filter,
405 std::vector<int32_t>& accumulators,
406 bool has_bias,
407 const std::vector<int32_t>& bias)
408{
409 if (!has_bias) {
410 std::fill(accumulators.begin(), accumulators.end(), 0);
411 }
412
413 for (size_t i = 0; i < batch_size; i++) {
414 for (size_t oy = 0; oy < output_height; oy++) {
415 for (size_t ox = 0; ox < output_width; ox++) {
416 // Initialize Bias
417 if (has_bias) {
418 for (size_t g = 0; g < input_channels; g++) {
419 for (size_t oc = 0; oc < depth_multiplier; oc++) {
420 accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] =
421 bias[g * depth_multiplier + oc];
422 }
423 }
424 }
425 // Compute reference results.
426 for (size_t ky = 0; ky < kernel_height; ky++) {
427 const size_t iy = oy * subsampling_height + ky * dilation_height - input_padding_top;
428 if (iy < input_height) {
429 for (size_t kx = 0; kx < kernel_width; kx++) {
430 const size_t ix = ox * subsampling_width + kx * dilation_width - input_padding_left;
431 if (ix < input_width) {
432 for (size_t g = 0; g < input_channels; g++) {
433 for (size_t oc = 0; oc < depth_multiplier; oc++) {
434 accumulators[(((i * output_height + oy) * output_width + ox) * input_channels + g) * depth_multiplier + oc] +=
435 (int32_t(input[((i * input_height + iy) * input_width + ix) * input_channel_stride + g]) - int32_t(input_zero_point)) *
436 (int32_t(filter[((ky * kernel_width + kx) * input_channels + g) * depth_multiplier + oc]) - int32_t(kernel_zero_point));
437 }
438 }
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446}
447
448void compute_depthwise_convolution_qu8_reference_results(
449 size_t batch_size,
450 size_t output_height,
451 size_t output_width,
452 size_t input_height,
453 size_t input_width,
454 size_t input_padding_top,
455 size_t input_padding_right,
456 size_t input_padding_bottom,
457 size_t input_padding_left,
458 size_t kernel_height,
459 size_t kernel_width,
460 size_t subsampling_height,
461 size_t subsampling_width,
462 size_t dilation_height,
463 size_t dilation_width,
464 size_t input_channels,
465 size_t depth_multiplier,
466 uint8_t input_zero_point,
467 uint8_t kernel_zero_point,
468 const std::vector<uint8_t>& input,
469 const std::vector<uint8_t>& filter,
470 std::vector<int32_t>& accumulators,
471 bool has_bias,
472 const std::vector<int32_t>& bias)
473{
474 compute_depthwise_convolution_qu8_reference_results(
475 batch_size,
476 output_height,
477 output_width,
478 input_height,
479 input_width,
480 input_padding_top,
481 input_padding_right,
482 input_padding_bottom,
483 input_padding_left,
484 kernel_height,
485 kernel_width,
486 subsampling_height,
487 subsampling_width,
488 dilation_height,
489 dilation_width,
490 input_channels,
491 depth_multiplier,
492 input_channels,
493 input_zero_point,
494 kernel_zero_point,
495 input,
496 filter,
497 accumulators,
498 has_bias,
499 bias);
500}
501}
502