1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under the BSD-style license found in the |
6 | * LICENSE file in the root directory of this source tree. |
7 | */ |
8 | |
9 | #include <assert.h> |
10 | #include <math.h> |
11 | #include <stdbool.h> |
12 | #include <stddef.h> |
13 | #include <stdint.h> |
14 | #include <stdlib.h> |
15 | #include <string.h> |
16 | |
17 | #include <qnnpack.h> |
18 | #include <qnnpack/operator.h> |
19 | #include <qnnpack/log.h> |
20 | #include <qnnpack/common.h> |
21 | #include <qnnpack/math.h> |
22 | #include <qnnpack/params.h> |
23 | #include <qnnpack/indirection.h> |
24 | |
25 | |
26 | static inline size_t compute_output_dimension( |
27 | size_t padded_input_dimension, |
28 | size_t pooling_dimension, |
29 | size_t stride_dimension) |
30 | { |
31 | return (padded_input_dimension - pooling_dimension) / stride_dimension + 1; |
32 | } |
33 | |
34 | enum qnnp_status qnnp_create_average_pooling2d_nhwc_q8( |
35 | uint32_t input_padding_top, |
36 | uint32_t input_padding_right, |
37 | uint32_t input_padding_bottom, |
38 | uint32_t input_padding_left, |
39 | uint32_t pooling_height, |
40 | uint32_t pooling_width, |
41 | uint32_t stride_height, |
42 | uint32_t stride_width, |
43 | size_t channels, |
44 | uint8_t input_zero_point, |
45 | float input_scale, |
46 | uint8_t output_zero_point, |
47 | float output_scale, |
48 | uint8_t output_min, |
49 | uint8_t output_max, |
50 | uint32_t flags, |
51 | qnnp_operator_t* average_pooling_out) |
52 | { |
53 | qnnp_operator_t average_pooling = NULL; |
54 | enum qnnp_status status = qnnp_status_uninitialized; |
55 | |
56 | if (!qnnp_params.initialized) { |
57 | qnnp_log_error("qnnp_create_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized" ); |
58 | goto error; |
59 | } |
60 | |
61 | status = qnnp_status_invalid_parameter; |
62 | |
63 | const uint32_t pooling_size = pooling_height * pooling_width; |
64 | if (pooling_size == 0) { |
65 | qnnp_log_error( |
66 | "failed to create average pooling with %" PRIu32 "x%" PRIu32 " pooling size: " |
67 | "pooling size dimensions must be non-zero" , |
68 | pooling_width, pooling_height); |
69 | goto error; |
70 | } |
71 | |
72 | if (pooling_size == 1) { |
73 | qnnp_log_error( |
74 | "failed to create average pooling with 1 pooling element: " |
75 | "1x1 pooling is meaningless" ); |
76 | goto error; |
77 | } |
78 | |
79 | if (stride_height == 0 || stride_width == 0) { |
80 | qnnp_log_error( |
81 | "failed to create average pooling with %" PRIu32 "x%" PRIu32 " stride: " |
82 | "stride dimensions must be non-zero" , |
83 | stride_width, stride_height); |
84 | goto error; |
85 | } |
86 | |
87 | if (channels == 0) { |
88 | qnnp_log_error( |
89 | "failed to create average pooling with %zu channels: " |
90 | "number of channels must be non-zero" , |
91 | channels); |
92 | goto error; |
93 | } |
94 | |
95 | if (input_scale <= 0.0f || !isnormal(input_scale)) { |
96 | qnnp_log_error( |
97 | "failed to create average pooling with %.7g input scale: " |
98 | "scale must be finite and positive" , |
99 | input_scale); |
100 | goto error; |
101 | } |
102 | |
103 | if (output_scale <= 0.0f || !isnormal(output_scale)) { |
104 | qnnp_log_error( |
105 | "failed to create average pooling with %.7g output scale: " |
106 | "scale must be finite and positive" , |
107 | output_scale); |
108 | goto error; |
109 | } |
110 | |
111 | status = qnnp_status_unsupported_parameter; |
112 | |
113 | const float input_output_scale = input_scale / output_scale; |
114 | if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) { |
115 | qnnp_log_error( |
116 | "failed to create average pooling with %.7g input scale and %.7g output scale: " |
117 | "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range" , |
118 | input_scale, output_scale, input_output_scale); |
119 | goto error; |
120 | } |
121 | |
122 | if (pooling_size >= 16777216) { |
123 | qnnp_log_error( |
124 | "failed to create average pooling with %" PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: " |
125 | "the number of elements in the pooling area must be below 2**24" , |
126 | pooling_size, pooling_width, pooling_height); |
127 | goto error; |
128 | } |
129 | |
130 | status = qnnp_status_out_of_memory; |
131 | |
132 | average_pooling = calloc(1, sizeof(struct qnnp_operator)); |
133 | if (average_pooling == NULL) { |
134 | qnnp_log_error("failed to allocate %zu bytes for qnnp_operator structure" , sizeof(struct qnnp_operator)); |
135 | goto error; |
136 | } |
137 | |
138 | const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0; |
139 | const uint32_t kr = qnnp_params.q8avgpool.kr; |
140 | const uint32_t mr = qnnp_params.q8avgpool.mr; |
141 | const uint32_t qr = qnnp_params.q8avgpool.qr; |
142 | if (any_padding || (channels >= kr || (pooling_size - mr) % qr != 0)) { |
143 | void* zero_buffer = malloc(channels); |
144 | if (zero_buffer == NULL) { |
145 | qnnp_log_error("failed to allocate %zu bytes for zero padding" , channels); |
146 | goto error; |
147 | } |
148 | memset(zero_buffer, input_zero_point, channels); |
149 | average_pooling->zero_buffer = zero_buffer; |
150 | average_pooling->zero_pointer = zero_buffer; |
151 | } |
152 | |
153 | average_pooling->input_padding_top = input_padding_top; |
154 | average_pooling->input_padding_right = input_padding_right; |
155 | average_pooling->input_padding_bottom = input_padding_bottom; |
156 | average_pooling->input_padding_left = input_padding_left; |
157 | |
158 | average_pooling->kernel_height = pooling_height; |
159 | average_pooling->kernel_width = pooling_width; |
160 | average_pooling->stride_height = stride_height; |
161 | average_pooling->stride_width = stride_width; |
162 | average_pooling->dilation_height = 1; |
163 | average_pooling->dilation_width = 1; |
164 | average_pooling->channels = channels; |
165 | |
166 | size_t nrows = pooling_height * pooling_width; |
167 | if (channels >= qnnp_params.q8avgpool.kr) { |
168 | if (nrows <= mr) { |
169 | nrows = mr; |
170 | } else { |
171 | nrows = round_up(nrows - mr, qr) + mr; |
172 | } |
173 | } |
174 | |
175 | average_pooling->avgpool_quantization_params = |
176 | qnnp_compute_avgpool_quantization_params( |
177 | (int32_t) -((uint32_t) input_zero_point * (uint32_t) nrows), |
178 | input_scale / (output_scale * (float) pooling_size), |
179 | output_zero_point, output_min, output_max); |
180 | |
181 | average_pooling->ukernel_type = qnnp_ukernel_type_average_pooling; |
182 | average_pooling->format = qnnp_format_quint8; |
183 | |
184 | *average_pooling_out = average_pooling; |
185 | return qnnp_status_success; |
186 | |
187 | error: |
188 | qnnp_delete_operator(average_pooling); |
189 | return status; |
190 | } |
191 | |
192 | enum qnnp_status qnnp_setup_average_pooling2d_nhwc_q8( |
193 | qnnp_operator_t average_pooling, |
194 | size_t batch_size, |
195 | size_t input_height, |
196 | size_t input_width, |
197 | const uint8_t* input, |
198 | size_t input_pixel_stride, |
199 | uint8_t* output, |
200 | size_t output_pixel_stride, |
201 | pthreadpool_t threadpool) |
202 | { |
203 | if (!qnnp_params.initialized) { |
204 | qnnp_log_error("qnnp_setup_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized" ); |
205 | return qnnp_status_uninitialized; |
206 | } |
207 | |
208 | if (batch_size == 0) { |
209 | average_pooling->batch_size = 0; |
210 | return qnnp_status_success; |
211 | } |
212 | |
213 | if (input_width == 0 || input_height == 0) { |
214 | qnnp_log_error( |
215 | "failed to setup average pooling with %zux%zu input: input dimensions must be non-zero" , |
216 | input_width, input_height); |
217 | return qnnp_status_invalid_parameter; |
218 | } |
219 | |
220 | average_pooling->batch_size = batch_size; |
221 | average_pooling->input_height = input_height; |
222 | average_pooling->input_width = input_width; |
223 | average_pooling->input = input; |
224 | average_pooling->input_pixel_stride = input_pixel_stride; |
225 | |
226 | average_pooling->output_height = compute_output_dimension( |
227 | average_pooling->input_padding_top + input_height + average_pooling->input_padding_bottom, |
228 | average_pooling->kernel_height, |
229 | average_pooling->stride_height); |
230 | average_pooling->output_width = compute_output_dimension( |
231 | average_pooling->input_padding_left + input_width + average_pooling->input_padding_right, |
232 | average_pooling->kernel_width, |
233 | average_pooling->stride_width); |
234 | average_pooling->output = output; |
235 | average_pooling->output_pixel_stride = output_pixel_stride; |
236 | |
237 | size_t valid_batch_size = 0; |
238 | if (input == average_pooling->last_input && |
239 | input_height == average_pooling->last_input_height && |
240 | input_width == average_pooling->last_input_width) |
241 | { |
242 | valid_batch_size = average_pooling->valid_batch_size; |
243 | if (batch_size <= valid_batch_size) { |
244 | return qnnp_status_success; |
245 | } |
246 | } |
247 | |
248 | const size_t pooling_height = average_pooling->kernel_height; |
249 | const size_t pooling_width = average_pooling->kernel_width; |
250 | const size_t pooling_size = pooling_height * pooling_width; |
251 | const size_t output_height = average_pooling->output_height; |
252 | const size_t output_width = average_pooling->output_width; |
253 | /* Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer */ |
254 | const uint32_t mr = qnnp_params.q8avgpool.mr; |
255 | |
256 | const size_t step_width = min(average_pooling->stride_width, pooling_width); |
257 | const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height; |
258 | const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height); |
259 | |
260 | const void** indirection_buffer = (const void**) realloc(average_pooling->indirection_buffer, indirection_buffer_size); |
261 | if (indirection_buffer == NULL) { |
262 | qnnp_log_error("failed to allocate %zu bytes for indirection buffer" , indirection_buffer_size); |
263 | return qnnp_status_out_of_memory; |
264 | } |
265 | average_pooling->indirection_buffer = indirection_buffer; |
266 | |
267 | qnnp_indirection_init_dwconv2d(average_pooling, valid_batch_size, step_height, step_width); |
268 | |
269 | average_pooling->last_input = input; |
270 | average_pooling->last_input_height = input_height; |
271 | average_pooling->last_input_width = input_width; |
272 | average_pooling->valid_batch_size = max(valid_batch_size, batch_size); |
273 | |
274 | return qnnp_status_success; |
275 | } |
276 | |