1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9#include <stddef.h>
10
11#include <fxdiv.h>
12
13#include <qnnpack/indirection.h>
14#include <qnnpack/operator.h>
15#include <qnnpack/math.h>
16
17
18void qnnp_indirection_init_conv2d(
19 qnnp_operator_t op,
20 size_t output_tile_size,
21 size_t tiled_output_size)
22{
23 const void** indirection_buffer = op->indirection_buffer;
24 const void* input = op->input;
25 const size_t input_pixel_stride = op->input_pixel_stride;
26 const void* zero = op->zero_pointer;
27 const size_t groups = op->groups;
28 const size_t group_input_channels = op->group_input_channels;
29 const size_t batch_size = op->batch_size;
30 const size_t input_height = op->input_height;
31 const size_t input_width = op->input_width;
32 const size_t output_height = op->output_height;
33 const size_t output_width = op->output_width;
34 const size_t kernel_height = op->kernel_height;
35 const size_t kernel_width = op->kernel_width;
36 const size_t stride_height = op->stride_height;
37 const size_t stride_width = op->stride_width;
38 const size_t dilation_height = op->dilation_height;
39 const size_t dilation_width = op->dilation_width;
40 const size_t input_padding_top = op->input_padding_top;
41 const size_t input_padding_left = op->input_padding_left;
42
43 const size_t output_size = output_height * output_width;
44 const size_t kernel_size = kernel_height * kernel_width;
45 const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
46 for (size_t group = 0; group < groups; group++) {
47 for (size_t image = 0; image < batch_size; image++) {
48 for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
49 for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
50 const size_t tiled_output_index = output_tile_start + output_tile_offset;
51 const size_t output_index = min(tiled_output_index, output_size - 1);
52 const struct fxdiv_result_size_t output_index_components = fxdiv_divide_size_t(output_index, output_width_divisor);
53 const size_t output_y = output_index_components.quotient;
54 const size_t output_x = output_index_components.remainder;
55 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
56 const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
57 if (input_y < input_height) {
58 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
59 const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
60 const size_t index = (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
61 if (input_x < input_width) {
62 indirection_buffer[index] = (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels;
63 } else {
64 indirection_buffer[index] = zero;
65 }
66 }
67 } else {
68 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
69 const size_t index =
70 (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
71 indirection_buffer[index] = zero;
72 }
73 }
74 }
75 }
76 }
77 }
78 }
79}
80
81void qnnp_indirection_init_dwconv2d(
82 qnnp_operator_t op,
83 size_t batch_start,
84 size_t step_height,
85 size_t step_width)
86{
87 const void** indirection_buffer = op->indirection_buffer;
88 const void* input = op->input;
89 const size_t input_pixel_stride = op->input_pixel_stride;
90 const void* zero = op->zero_pointer;
91 const size_t batch_size = op->batch_size;
92 const size_t input_height = op->input_height;
93 const size_t input_width = op->input_width;
94 const size_t output_height = op->output_height;
95 const size_t output_width = op->output_width;
96 const size_t kernel_height = op->kernel_height;
97 const size_t kernel_width = op->kernel_width;
98 const size_t stride_height = op->stride_height;
99 const size_t stride_width = op->stride_width;
100 const size_t dilation_height = op->dilation_height;
101 const size_t dilation_width = op->dilation_width;
102 const size_t input_padding_top = op->input_padding_top;
103 const size_t input_padding_left = op->input_padding_left;
104
105 for (size_t image = batch_start; image < batch_size; image++) {
106 for (size_t output_y = 0; output_y < output_height; output_y++) {
107 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
108 const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
109 if (input_y < input_height) {
110 for (size_t output_x = 0; output_x < output_width; output_x++) {
111 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
112 const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
113 const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
114 if (input_x < input_width) {
115 indirection_buffer[index] = (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride;
116 } else {
117 indirection_buffer[index] = zero;
118 }
119 }
120 }
121 } else {
122 for (size_t output_x = 0; output_x < output_width; output_x++) {
123 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
124 const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
125 indirection_buffer[index] = zero;
126 }
127 }
128 }
129 }
130 }
131 }
132}
133
134void qnnp_indirection_init_deconv2d(
135 qnnp_operator_t op,
136 size_t output_tile_size,
137 size_t tiled_output_size)
138{
139 const void** indirection_buffer = op->indirection_buffer;
140 const void* input = op->input;
141 const size_t input_pixel_stride = op->input_pixel_stride;
142 const void* zero = op->zero_pointer;
143 const size_t groups = op->groups;
144 const size_t group_input_channels = op->group_input_channels;
145 const size_t batch_size = op->batch_size;
146 const size_t input_height = op->input_height;
147 const size_t input_width = op->input_width;
148 const size_t output_height = op->output_height;
149 const size_t output_width = op->output_width;
150 const size_t kernel_height = op->kernel_height;
151 const size_t kernel_width = op->kernel_width;
152 const size_t stride_height = op->stride_height;
153 const size_t stride_width = op->stride_width;
154 const size_t dilation_height = op->dilation_height;
155 const size_t dilation_width = op->dilation_width;
156 const size_t input_padding_top = op->input_padding_top;
157 const size_t input_padding_left = op->input_padding_left;
158
159 const size_t output_size = output_height * output_width;
160 const size_t kernel_size = kernel_height * kernel_width;
161
162 for (size_t group = 0; group < groups; group++) {
163 for (size_t image = 0; image < batch_size; image++) {
164 for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
165 for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
166 const size_t tiled_output_index = output_tile_start + output_tile_offset;
167 const size_t output_index = min(tiled_output_index, output_size - 1);
168 const size_t output_y = output_index / output_width;
169 const size_t output_x = output_index % output_width;
170 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
171 const size_t y = output_y + input_padding_top - kernel_y * dilation_height;
172 const size_t input_y = y / stride_height;
173 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
174 const size_t x = output_x + input_padding_left - kernel_x * dilation_width;
175 const size_t input_x = x / stride_width;
176 const size_t index =
177 (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
178 if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {
179 indirection_buffer[index] =
180 (char*)input + ((image * input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels;
181 } else {
182 indirection_buffer[index] = zero;
183 }
184 }
185 }
186 }
187 }
188 }
189 }
190}
191
192void qnnp_indirection_init_maxpool2d(
193 qnnp_operator_t op,
194 size_t batch_start,
195 size_t step_height,
196 size_t step_width)
197{
198 const void** indirection_buffer = op->indirection_buffer;
199 const void* input = op->input;
200 const size_t input_pixel_stride = op->input_pixel_stride;
201 const size_t batch_size = op->batch_size;
202 const size_t input_height = op->input_height;
203 const size_t input_width = op->input_width;
204 const size_t output_height = op->output_height;
205 const size_t output_width = op->output_width;
206 const size_t pooling_height = op->kernel_height;
207 const size_t pooling_width = op->kernel_width;
208 const size_t stride_height = op->stride_height;
209 const size_t stride_width = op->stride_width;
210 const size_t dilation_height = op->dilation_height;
211 const size_t dilation_width = op->dilation_width;
212 const size_t input_padding_top = op->input_padding_top;
213 const size_t input_padding_left = op->input_padding_left;
214
215 for (size_t image = batch_start; image < batch_size; image++) {
216 for (size_t output_y = 0; output_y < output_height; output_y++) {
217 for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
218 const size_t input_y = doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top);
219 const size_t clamped_input_y = min(input_y, input_height - 1);
220 for (size_t output_x = 0; output_x < output_width; output_x++) {
221 for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
222 const size_t input_x = doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left);
223 const size_t clamped_input_x = min(input_x, input_width - 1);
224 const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
225 indirection_buffer[index] = (char*)input + ((image * input_height + clamped_input_y) * input_width + clamped_input_x) * input_pixel_stride;
226 }
227 }
228 }
229 }
230 }
231}
232