1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9#include <assert.h>
10#include <math.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <string.h>
16
17#include <qnnpack.h>
18#include <qnnpack/operator.h>
19#include <qnnpack/log.h>
20#include <qnnpack/common.h>
21#include <qnnpack/math.h>
22#include <qnnpack/params.h>
23#include <qnnpack/indirection.h>
24
25
26static inline size_t compute_output_dimension(
27 size_t padded_input_dimension,
28 size_t pooling_dimension,
29 size_t stride_dimension)
30{
31 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
32}
33
34enum qnnp_status qnnp_create_average_pooling2d_nhwc_q8(
35 uint32_t input_padding_top,
36 uint32_t input_padding_right,
37 uint32_t input_padding_bottom,
38 uint32_t input_padding_left,
39 uint32_t pooling_height,
40 uint32_t pooling_width,
41 uint32_t stride_height,
42 uint32_t stride_width,
43 size_t channels,
44 uint8_t input_zero_point,
45 float input_scale,
46 uint8_t output_zero_point,
47 float output_scale,
48 uint8_t output_min,
49 uint8_t output_max,
50 uint32_t flags,
51 qnnp_operator_t* average_pooling_out)
52{
53 qnnp_operator_t average_pooling = NULL;
54 enum qnnp_status status = qnnp_status_uninitialized;
55
56 if (!qnnp_params.initialized) {
57 qnnp_log_error("qnnp_create_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
58 goto error;
59 }
60
61 status = qnnp_status_invalid_parameter;
62
63 const uint32_t pooling_size = pooling_height * pooling_width;
64 if (pooling_size == 0) {
65 qnnp_log_error(
66 "failed to create average pooling with %" PRIu32 "x%" PRIu32 " pooling size: "
67 "pooling size dimensions must be non-zero",
68 pooling_width, pooling_height);
69 goto error;
70 }
71
72 if (pooling_size == 1) {
73 qnnp_log_error(
74 "failed to create average pooling with 1 pooling element: "
75 "1x1 pooling is meaningless");
76 goto error;
77 }
78
79 if (stride_height == 0 || stride_width == 0) {
80 qnnp_log_error(
81 "failed to create average pooling with %" PRIu32 "x%" PRIu32 " stride: "
82 "stride dimensions must be non-zero",
83 stride_width, stride_height);
84 goto error;
85 }
86
87 if (channels == 0) {
88 qnnp_log_error(
89 "failed to create average pooling with %zu channels: "
90 "number of channels must be non-zero",
91 channels);
92 goto error;
93 }
94
95 if (input_scale <= 0.0f || !isnormal(input_scale)) {
96 qnnp_log_error(
97 "failed to create average pooling with %.7g input scale: "
98 "scale must be finite and positive",
99 input_scale);
100 goto error;
101 }
102
103 if (output_scale <= 0.0f || !isnormal(output_scale)) {
104 qnnp_log_error(
105 "failed to create average pooling with %.7g output scale: "
106 "scale must be finite and positive",
107 output_scale);
108 goto error;
109 }
110
111 status = qnnp_status_unsupported_parameter;
112
113 const float input_output_scale = input_scale / output_scale;
114 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
115 qnnp_log_error(
116 "failed to create average pooling with %.7g input scale and %.7g output scale: "
117 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
118 input_scale, output_scale, input_output_scale);
119 goto error;
120 }
121
122 if (pooling_size >= 16777216) {
123 qnnp_log_error(
124 "failed to create average pooling with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
125 "the number of elements in the pooling area must be below 2**24",
126 pooling_size, pooling_width, pooling_height);
127 goto error;
128 }
129
130 status = qnnp_status_out_of_memory;
131
132 average_pooling = calloc(1, sizeof(struct qnnp_operator));
133 if (average_pooling == NULL) {
134 qnnp_log_error("failed to allocate %zu bytes for qnnp_operator structure", sizeof(struct qnnp_operator));
135 goto error;
136 }
137
138 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
139 const uint32_t kr = qnnp_params.q8avgpool.kr;
140 const uint32_t mr = qnnp_params.q8avgpool.mr;
141 const uint32_t qr = qnnp_params.q8avgpool.qr;
142 if (any_padding || (channels >= kr || (pooling_size - mr) % qr != 0)) {
143 void* zero_buffer = malloc(channels);
144 if (zero_buffer == NULL) {
145 qnnp_log_error("failed to allocate %zu bytes for zero padding", channels);
146 goto error;
147 }
148 memset(zero_buffer, input_zero_point, channels);
149 average_pooling->zero_buffer = zero_buffer;
150 average_pooling->zero_pointer = zero_buffer;
151 }
152
153 average_pooling->input_padding_top = input_padding_top;
154 average_pooling->input_padding_right = input_padding_right;
155 average_pooling->input_padding_bottom = input_padding_bottom;
156 average_pooling->input_padding_left = input_padding_left;
157
158 average_pooling->kernel_height = pooling_height;
159 average_pooling->kernel_width = pooling_width;
160 average_pooling->stride_height = stride_height;
161 average_pooling->stride_width = stride_width;
162 average_pooling->dilation_height = 1;
163 average_pooling->dilation_width = 1;
164 average_pooling->channels = channels;
165
166 size_t nrows = pooling_height * pooling_width;
167 if (channels >= qnnp_params.q8avgpool.kr) {
168 if (nrows <= mr) {
169 nrows = mr;
170 } else {
171 nrows = round_up(nrows - mr, qr) + mr;
172 }
173 }
174
175 average_pooling->avgpool_quantization_params =
176 qnnp_compute_avgpool_quantization_params(
177 (int32_t) -((uint32_t) input_zero_point * (uint32_t) nrows),
178 input_scale / (output_scale * (float) pooling_size),
179 output_zero_point, output_min, output_max);
180
181 average_pooling->ukernel_type = qnnp_ukernel_type_average_pooling;
182 average_pooling->format = qnnp_format_quint8;
183
184 *average_pooling_out = average_pooling;
185 return qnnp_status_success;
186
187error:
188 qnnp_delete_operator(average_pooling);
189 return status;
190}
191
192enum qnnp_status qnnp_setup_average_pooling2d_nhwc_q8(
193 qnnp_operator_t average_pooling,
194 size_t batch_size,
195 size_t input_height,
196 size_t input_width,
197 const uint8_t* input,
198 size_t input_pixel_stride,
199 uint8_t* output,
200 size_t output_pixel_stride,
201 pthreadpool_t threadpool)
202{
203 if (!qnnp_params.initialized) {
204 qnnp_log_error("qnnp_setup_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
205 return qnnp_status_uninitialized;
206 }
207
208 if (batch_size == 0) {
209 average_pooling->batch_size = 0;
210 return qnnp_status_success;
211 }
212
213 if (input_width == 0 || input_height == 0) {
214 qnnp_log_error(
215 "failed to setup average pooling with %zux%zu input: input dimensions must be non-zero",
216 input_width, input_height);
217 return qnnp_status_invalid_parameter;
218 }
219
220 average_pooling->batch_size = batch_size;
221 average_pooling->input_height = input_height;
222 average_pooling->input_width = input_width;
223 average_pooling->input = input;
224 average_pooling->input_pixel_stride = input_pixel_stride;
225
226 average_pooling->output_height = compute_output_dimension(
227 average_pooling->input_padding_top + input_height + average_pooling->input_padding_bottom,
228 average_pooling->kernel_height,
229 average_pooling->stride_height);
230 average_pooling->output_width = compute_output_dimension(
231 average_pooling->input_padding_left + input_width + average_pooling->input_padding_right,
232 average_pooling->kernel_width,
233 average_pooling->stride_width);
234 average_pooling->output = output;
235 average_pooling->output_pixel_stride = output_pixel_stride;
236
237 size_t valid_batch_size = 0;
238 if (input == average_pooling->last_input &&
239 input_height == average_pooling->last_input_height &&
240 input_width == average_pooling->last_input_width)
241 {
242 valid_batch_size = average_pooling->valid_batch_size;
243 if (batch_size <= valid_batch_size) {
244 return qnnp_status_success;
245 }
246 }
247
248 const size_t pooling_height = average_pooling->kernel_height;
249 const size_t pooling_width = average_pooling->kernel_width;
250 const size_t pooling_size = pooling_height * pooling_width;
251 const size_t output_height = average_pooling->output_height;
252 const size_t output_width = average_pooling->output_width;
253 /* Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer */
254 const uint32_t mr = qnnp_params.q8avgpool.mr;
255
256 const size_t step_width = min(average_pooling->stride_width, pooling_width);
257 const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
258 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
259
260 const void** indirection_buffer = (const void**) realloc(average_pooling->indirection_buffer, indirection_buffer_size);
261 if (indirection_buffer == NULL) {
262 qnnp_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
263 return qnnp_status_out_of_memory;
264 }
265 average_pooling->indirection_buffer = indirection_buffer;
266
267 qnnp_indirection_init_dwconv2d(average_pooling, valid_batch_size, step_height, step_width);
268
269 average_pooling->last_input = input;
270 average_pooling->last_input_height = input_height;
271 average_pooling->last_input_width = input_width;
272 average_pooling->valid_batch_size = max(valid_batch_size, batch_size);
273
274 return qnnp_status_success;
275}
276