1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under the BSD-style license found in the |
6 | * LICENSE file in the root directory of this source tree. |
7 | */ |
8 | |
9 | #include <assert.h> |
10 | #include <math.h> |
11 | #include <stdbool.h> |
12 | #include <stddef.h> |
13 | #include <stdint.h> |
14 | #include <stdlib.h> |
15 | #include <string.h> |
16 | |
17 | #include <qnnpack.h> |
18 | #include <qnnpack/operator.h> |
19 | #include <qnnpack/log.h> |
20 | #include <qnnpack/common.h> |
21 | #include <qnnpack/math.h> |
22 | #include <qnnpack/params.h> |
23 | #include <qnnpack/indirection.h> |
24 | |
25 | |
26 | static inline size_t compute_output_dimension( |
27 | size_t padded_input_dimension, |
28 | size_t kernel_dimension, |
29 | size_t dilation_dimension, |
30 | size_t stride_dimension) |
31 | { |
32 | const size_t effective_kernel_dimension = (kernel_dimension - 1) * dilation_dimension + 1; |
33 | return (padded_input_dimension - effective_kernel_dimension) / stride_dimension + 1; |
34 | } |
35 | |
36 | enum qnnp_status qnnp_create_max_pooling2d_nhwc_u8( |
37 | uint32_t input_padding_top, |
38 | uint32_t input_padding_right, |
39 | uint32_t input_padding_bottom, |
40 | uint32_t input_padding_left, |
41 | uint32_t pooling_height, |
42 | uint32_t pooling_width, |
43 | uint32_t stride_height, |
44 | uint32_t stride_width, |
45 | uint32_t dilation_height, |
46 | uint32_t dilation_width, |
47 | size_t channels, |
48 | uint8_t output_min, |
49 | uint8_t output_max, |
50 | uint32_t flags, |
51 | qnnp_operator_t* max_pooling_out) |
52 | { |
53 | qnnp_operator_t max_pooling = NULL; |
54 | enum qnnp_status status = qnnp_status_uninitialized; |
55 | |
56 | if (!qnnp_params.initialized) { |
57 | qnnp_log_error("qnnp_create_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized" ); |
58 | goto error; |
59 | } |
60 | |
61 | status = qnnp_status_invalid_parameter; |
62 | |
63 | const uint32_t pooling_size = pooling_height * pooling_width; |
64 | if (pooling_size == 0) { |
65 | qnnp_log_error( |
66 | "failed to create max pooling with %" PRIu32 "x%" PRIu32 " pooling size: " |
67 | "pooling size dimensions must be non-zero" , |
68 | pooling_width, pooling_height); |
69 | goto error; |
70 | } |
71 | |
72 | if (pooling_size == 1) { |
73 | qnnp_log_error( |
74 | "failed to create max pooling with 1 pooling element: " |
75 | "1x1 pooling is meaningless" ); |
76 | goto error; |
77 | } |
78 | |
79 | if (stride_height == 0 || stride_width == 0) { |
80 | qnnp_log_error( |
81 | "failed to create max pooling with %" PRIu32 "x%" PRIu32 " stride: " |
82 | "stride dimensions must be non-zero" , |
83 | stride_width, stride_height); |
84 | goto error; |
85 | } |
86 | |
87 | if (dilation_height == 0 || dilation_width == 0) { |
88 | qnnp_log_error( |
89 | "failed to create max pooling with %" PRIu32 "x%" PRIu32 " dilation: " |
90 | "dilation dimensions must be non-zero" , |
91 | dilation_width, dilation_height); |
92 | goto error; |
93 | } |
94 | |
95 | if (channels == 0) { |
96 | qnnp_log_error( |
97 | "failed to create max pooling with %zu channels: " |
98 | "number of channels must be non-zero" , |
99 | channels); |
100 | goto error; |
101 | } |
102 | |
103 | status = qnnp_status_out_of_memory; |
104 | |
105 | max_pooling = calloc(1, sizeof(struct qnnp_operator)); |
106 | if (max_pooling == NULL) { |
107 | qnnp_log_error("failed to allocate %zu bytes for qnnp_operator structure" , sizeof(struct qnnp_operator)); |
108 | goto error; |
109 | } |
110 | |
111 | max_pooling->input_padding_top = input_padding_top; |
112 | max_pooling->input_padding_right = input_padding_right; |
113 | max_pooling->input_padding_bottom = input_padding_bottom; |
114 | max_pooling->input_padding_left = input_padding_left; |
115 | |
116 | max_pooling->kernel_height = pooling_height; |
117 | max_pooling->kernel_width = pooling_width; |
118 | max_pooling->stride_height = stride_height; |
119 | max_pooling->stride_width = stride_width; |
120 | max_pooling->dilation_height = dilation_height; |
121 | max_pooling->dilation_width = dilation_width; |
122 | max_pooling->channels = channels; |
123 | |
124 | max_pooling->u8_clamping_params = qnnp_compute_u8_clamping_params(output_min, output_max); |
125 | |
126 | max_pooling->ukernel_type = qnnp_ukernel_type_max_pooling; |
127 | max_pooling->format = qnnp_format_quint8; |
128 | |
129 | *max_pooling_out = max_pooling; |
130 | return qnnp_status_success; |
131 | |
132 | error: |
133 | qnnp_delete_operator(max_pooling); |
134 | return status; |
135 | } |
136 | |
137 | enum qnnp_status qnnp_setup_max_pooling2d_nhwc_u8( |
138 | qnnp_operator_t max_pooling, |
139 | size_t batch_size, |
140 | size_t input_height, |
141 | size_t input_width, |
142 | const uint8_t* input, |
143 | size_t input_pixel_stride, |
144 | uint8_t* output, |
145 | size_t output_pixel_stride, |
146 | pthreadpool_t threadpool) |
147 | { |
148 | if (!qnnp_params.initialized) { |
149 | qnnp_log_error("qnnp_setup_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized" ); |
150 | return qnnp_status_uninitialized; |
151 | } |
152 | |
153 | if (batch_size == 0) { |
154 | max_pooling->batch_size = 0; |
155 | return qnnp_status_success; |
156 | } |
157 | |
158 | if (input_width == 0 || input_height == 0) { |
159 | qnnp_log_error( |
160 | "failed to setup max pooling with %zux%zu input: input dimensions must be non-zero" , |
161 | input_width, input_height); |
162 | return qnnp_status_invalid_parameter; |
163 | } |
164 | |
165 | max_pooling->batch_size = batch_size; |
166 | max_pooling->input_height = input_height; |
167 | max_pooling->input_width = input_width; |
168 | max_pooling->input = input; |
169 | max_pooling->input_pixel_stride = input_pixel_stride; |
170 | |
171 | max_pooling->output_height = compute_output_dimension( |
172 | max_pooling->input_padding_top + input_height + max_pooling->input_padding_bottom, |
173 | max_pooling->kernel_height, |
174 | max_pooling->dilation_height, |
175 | max_pooling->stride_height); |
176 | max_pooling->output_width = compute_output_dimension( |
177 | max_pooling->input_padding_left + input_width + max_pooling->input_padding_right, |
178 | max_pooling->kernel_width, |
179 | max_pooling->dilation_width, |
180 | max_pooling->stride_width); |
181 | max_pooling->output = output; |
182 | max_pooling->output_pixel_stride = output_pixel_stride; |
183 | |
184 | size_t valid_batch_size = 0; |
185 | if (input == max_pooling->last_input && |
186 | input_height == max_pooling->last_input_height && |
187 | input_width == max_pooling->last_input_width) |
188 | { |
189 | valid_batch_size = max_pooling->valid_batch_size; |
190 | if (batch_size <= valid_batch_size) { |
191 | return qnnp_status_success; |
192 | } |
193 | } |
194 | |
195 | const size_t pooling_height = max_pooling->kernel_height; |
196 | const size_t pooling_width = max_pooling->kernel_width; |
197 | const size_t pooling_size = pooling_height * pooling_width; |
198 | const size_t output_height = max_pooling->output_height; |
199 | const size_t output_width = max_pooling->output_width; |
200 | /* Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer */ |
201 | const uint32_t mr = qnnp_params.u8maxpool.mr; |
202 | |
203 | const size_t step_width = |
204 | max_pooling->dilation_width > 1 ? pooling_width : min(max_pooling->stride_width, pooling_width); |
205 | const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height; |
206 | const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height); |
207 | |
208 | const void** indirection_buffer = (const void**) realloc(max_pooling->indirection_buffer, indirection_buffer_size); |
209 | if (indirection_buffer == NULL) { |
210 | qnnp_log_error("failed to allocate %zu bytes for indirection buffer" , indirection_buffer_size); |
211 | return qnnp_status_out_of_memory; |
212 | } |
213 | max_pooling->indirection_buffer = indirection_buffer; |
214 | |
215 | qnnp_indirection_init_maxpool2d(max_pooling, valid_batch_size, step_height, step_width); |
216 | |
217 | max_pooling->last_input = input; |
218 | max_pooling->last_input_height = input_height; |
219 | max_pooling->last_input_width = input_width; |
220 | max_pooling->valid_batch_size = max(valid_batch_size, batch_size); |
221 | |
222 | return qnnp_status_success; |
223 | } |
224 | |