1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under the BSD-style license found in the |
6 | * LICENSE file in the root directory of this source tree. |
7 | */ |
8 | |
9 | #include <stddef.h> |
10 | |
11 | #include <fxdiv.h> |
12 | |
13 | #include <qnnpack/indirection.h> |
14 | #include <qnnpack/operator.h> |
15 | #include <qnnpack/math.h> |
16 | |
17 | |
18 | void qnnp_indirection_init_conv2d( |
19 | qnnp_operator_t op, |
20 | size_t output_tile_size, |
21 | size_t tiled_output_size) |
22 | { |
23 | const void** indirection_buffer = op->indirection_buffer; |
24 | const void* input = op->input; |
25 | const size_t input_pixel_stride = op->input_pixel_stride; |
26 | const void* zero = op->zero_pointer; |
27 | const size_t groups = op->groups; |
28 | const size_t group_input_channels = op->group_input_channels; |
29 | const size_t batch_size = op->batch_size; |
30 | const size_t input_height = op->input_height; |
31 | const size_t input_width = op->input_width; |
32 | const size_t output_height = op->output_height; |
33 | const size_t output_width = op->output_width; |
34 | const size_t kernel_height = op->kernel_height; |
35 | const size_t kernel_width = op->kernel_width; |
36 | const size_t stride_height = op->stride_height; |
37 | const size_t stride_width = op->stride_width; |
38 | const size_t dilation_height = op->dilation_height; |
39 | const size_t dilation_width = op->dilation_width; |
40 | const size_t input_padding_top = op->input_padding_top; |
41 | const size_t input_padding_left = op->input_padding_left; |
42 | |
43 | const size_t output_size = output_height * output_width; |
44 | const size_t kernel_size = kernel_height * kernel_width; |
45 | const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width); |
46 | for (size_t group = 0; group < groups; group++) { |
47 | for (size_t image = 0; image < batch_size; image++) { |
48 | for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) { |
49 | for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) { |
50 | const size_t tiled_output_index = output_tile_start + output_tile_offset; |
51 | const size_t output_index = min(tiled_output_index, output_size - 1); |
52 | const struct fxdiv_result_size_t output_index_components = fxdiv_divide_size_t(output_index, output_width_divisor); |
53 | const size_t output_y = output_index_components.quotient; |
54 | const size_t output_x = output_index_components.remainder; |
55 | for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) { |
56 | const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top; |
57 | if (input_y < input_height) { |
58 | for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) { |
59 | const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left; |
60 | const size_t index = (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset; |
61 | if (input_x < input_width) { |
62 | indirection_buffer[index] = (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels; |
63 | } else { |
64 | indirection_buffer[index] = zero; |
65 | } |
66 | } |
67 | } else { |
68 | for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) { |
69 | const size_t index = |
70 | (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset; |
71 | indirection_buffer[index] = zero; |
72 | } |
73 | } |
74 | } |
75 | } |
76 | } |
77 | } |
78 | } |
79 | } |
80 | |
81 | void qnnp_indirection_init_dwconv2d( |
82 | qnnp_operator_t op, |
83 | size_t batch_start, |
84 | size_t step_height, |
85 | size_t step_width) |
86 | { |
87 | const void** indirection_buffer = op->indirection_buffer; |
88 | const void* input = op->input; |
89 | const size_t input_pixel_stride = op->input_pixel_stride; |
90 | const void* zero = op->zero_pointer; |
91 | const size_t batch_size = op->batch_size; |
92 | const size_t input_height = op->input_height; |
93 | const size_t input_width = op->input_width; |
94 | const size_t output_height = op->output_height; |
95 | const size_t output_width = op->output_width; |
96 | const size_t kernel_height = op->kernel_height; |
97 | const size_t kernel_width = op->kernel_width; |
98 | const size_t stride_height = op->stride_height; |
99 | const size_t stride_width = op->stride_width; |
100 | const size_t dilation_height = op->dilation_height; |
101 | const size_t dilation_width = op->dilation_width; |
102 | const size_t input_padding_top = op->input_padding_top; |
103 | const size_t input_padding_left = op->input_padding_left; |
104 | |
105 | for (size_t image = batch_start; image < batch_size; image++) { |
106 | for (size_t output_y = 0; output_y < output_height; output_y++) { |
107 | for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) { |
108 | const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top; |
109 | if (input_y < input_height) { |
110 | for (size_t output_x = 0; output_x < output_width; output_x++) { |
111 | for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) { |
112 | const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left; |
113 | const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y; |
114 | if (input_x < input_width) { |
115 | indirection_buffer[index] = (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride; |
116 | } else { |
117 | indirection_buffer[index] = zero; |
118 | } |
119 | } |
120 | } |
121 | } else { |
122 | for (size_t output_x = 0; output_x < output_width; output_x++) { |
123 | for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) { |
124 | const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y; |
125 | indirection_buffer[index] = zero; |
126 | } |
127 | } |
128 | } |
129 | } |
130 | } |
131 | } |
132 | } |
133 | |
134 | void qnnp_indirection_init_deconv2d( |
135 | qnnp_operator_t op, |
136 | size_t output_tile_size, |
137 | size_t tiled_output_size) |
138 | { |
139 | const void** indirection_buffer = op->indirection_buffer; |
140 | const void* input = op->input; |
141 | const size_t input_pixel_stride = op->input_pixel_stride; |
142 | const void* zero = op->zero_pointer; |
143 | const size_t groups = op->groups; |
144 | const size_t group_input_channels = op->group_input_channels; |
145 | const size_t batch_size = op->batch_size; |
146 | const size_t input_height = op->input_height; |
147 | const size_t input_width = op->input_width; |
148 | const size_t output_height = op->output_height; |
149 | const size_t output_width = op->output_width; |
150 | const size_t kernel_height = op->kernel_height; |
151 | const size_t kernel_width = op->kernel_width; |
152 | const size_t stride_height = op->stride_height; |
153 | const size_t stride_width = op->stride_width; |
154 | const size_t dilation_height = op->dilation_height; |
155 | const size_t dilation_width = op->dilation_width; |
156 | const size_t input_padding_top = op->input_padding_top; |
157 | const size_t input_padding_left = op->input_padding_left; |
158 | |
159 | const size_t output_size = output_height * output_width; |
160 | const size_t kernel_size = kernel_height * kernel_width; |
161 | |
162 | for (size_t group = 0; group < groups; group++) { |
163 | for (size_t image = 0; image < batch_size; image++) { |
164 | for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) { |
165 | for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) { |
166 | const size_t tiled_output_index = output_tile_start + output_tile_offset; |
167 | const size_t output_index = min(tiled_output_index, output_size - 1); |
168 | const size_t output_y = output_index / output_width; |
169 | const size_t output_x = output_index % output_width; |
170 | for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) { |
171 | const size_t y = output_y + input_padding_top - kernel_y * dilation_height; |
172 | const size_t input_y = y / stride_height; |
173 | for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) { |
174 | const size_t x = output_x + input_padding_left - kernel_x * dilation_width; |
175 | const size_t input_x = x / stride_width; |
176 | const size_t index = |
177 | (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset; |
178 | if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) { |
179 | indirection_buffer[index] = |
180 | (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels; |
181 | } else { |
182 | indirection_buffer[index] = zero; |
183 | } |
184 | } |
185 | } |
186 | } |
187 | } |
188 | } |
189 | } |
190 | } |
191 | |
192 | void qnnp_indirection_init_maxpool2d( |
193 | qnnp_operator_t op, |
194 | size_t batch_start, |
195 | size_t step_height, |
196 | size_t step_width) |
197 | { |
198 | const void** indirection_buffer = op->indirection_buffer; |
199 | const void* input = op->input; |
200 | const size_t input_pixel_stride = op->input_pixel_stride; |
201 | const size_t batch_size = op->batch_size; |
202 | const size_t input_height = op->input_height; |
203 | const size_t input_width = op->input_width; |
204 | const size_t output_height = op->output_height; |
205 | const size_t output_width = op->output_width; |
206 | const size_t pooling_height = op->kernel_height; |
207 | const size_t pooling_width = op->kernel_width; |
208 | const size_t stride_height = op->stride_height; |
209 | const size_t stride_width = op->stride_width; |
210 | const size_t dilation_height = op->dilation_height; |
211 | const size_t dilation_width = op->dilation_width; |
212 | const size_t input_padding_top = op->input_padding_top; |
213 | const size_t input_padding_left = op->input_padding_left; |
214 | |
215 | for (size_t image = batch_start; image < batch_size; image++) { |
216 | for (size_t output_y = 0; output_y < output_height; output_y++) { |
217 | for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) { |
218 | const size_t input_y = doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top); |
219 | const size_t clamped_input_y = min(input_y, input_height - 1); |
220 | for (size_t output_x = 0; output_x < output_width; output_x++) { |
221 | for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) { |
222 | const size_t input_x = doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left); |
223 | const size_t clamped_input_x = min(input_x, input_width - 1); |
224 | const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y; |
225 | indirection_buffer[index] = (char*)input + ((image * input_height + clamped_input_y) * input_width + clamped_input_x) * input_pixel_stride; |
226 | } |
227 | } |
228 | } |
229 | } |
230 | } |
231 | } |
232 | |