indirection.c source code [pytorch/third_party/QNNPACK/src/indirection.c]

1	/*
2	* Copyright (c) Facebook, Inc. and its affiliates.
3	* All rights reserved.
4	*
5	* This source code is licensed under the BSD-style license found in the
6	* LICENSE file in the root directory of this source tree.
7	*/
8
9	#include <stddef.h>
10
11	#include <fxdiv.h>
12
13	#include <qnnpack/indirection.h>
14	#include <qnnpack/operator.h>
15	#include <qnnpack/math.h>
16
17
18	void qnnp_indirection_init_conv2d(
19	qnnp_operator_t op,
20	size_t output_tile_size,
21	size_t tiled_output_size)
22	{
23	const void** indirection_buffer = op->indirection_buffer;
24	const void* input = op->input;
25	const size_t input_pixel_stride = op->input_pixel_stride;
26	const void* zero = op->zero_pointer;
27	const size_t groups = op->groups;
28	const size_t group_input_channels = op->group_input_channels;
29	const size_t batch_size = op->batch_size;
30	const size_t input_height = op->input_height;
31	const size_t input_width = op->input_width;
32	const size_t output_height = op->output_height;
33	const size_t output_width = op->output_width;
34	const size_t kernel_height = op->kernel_height;
35	const size_t kernel_width = op->kernel_width;
36	const size_t stride_height = op->stride_height;
37	const size_t stride_width = op->stride_width;
38	const size_t dilation_height = op->dilation_height;
39	const size_t dilation_width = op->dilation_width;
40	const size_t input_padding_top = op->input_padding_top;
41	const size_t input_padding_left = op->input_padding_left;
42
43	const size_t output_size = output_height * output_width;
44	const size_t kernel_size = kernel_height * kernel_width;
45	const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
46	for (size_t group = `0`; group < groups; group++) {
47	for (size_t image = `0`; image < batch_size; image++) {
48	for (size_t output_tile_start = `0`; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
49	for (size_t output_tile_offset = `0`; output_tile_offset < output_tile_size; output_tile_offset++) {
50	const size_t tiled_output_index = output_tile_start + output_tile_offset;
51	const size_t output_index = min(tiled_output_index, output_size - `1`);
52	const struct fxdiv_result_size_t output_index_components = fxdiv_divide_size_t(output_index, output_width_divisor);
53	const size_t output_y = output_index_components.quotient;
54	const size_t output_x = output_index_components.remainder;
55	for (size_t kernel_y = `0`; kernel_y < kernel_height; kernel_y++) {
56	const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
57	if (input_y < input_height) {
58	for (size_t kernel_x = `0`; kernel_x < kernel_width; kernel_x++) {
59	const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
60	const size_t index = (group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
61	if (input_x < input_width) {
62	indirection_buffer[index] = (char)input + ((image input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels;
63	} else {
64	indirection_buffer[index] = zero;
65	}
66	}
67	} else {
68	for (size_t kernel_x = `0`; kernel_x < kernel_width; kernel_x++) {
69	const size_t index =
70	(group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
71	indirection_buffer[index] = zero;
72	}
73	}
74	}
75	}
76	}
77	}
78	}
79	}
80
81	void qnnp_indirection_init_dwconv2d(
82	qnnp_operator_t op,
83	size_t batch_start,
84	size_t step_height,
85	size_t step_width)
86	{
87	const void** indirection_buffer = op->indirection_buffer;
88	const void* input = op->input;
89	const size_t input_pixel_stride = op->input_pixel_stride;
90	const void* zero = op->zero_pointer;
91	const size_t batch_size = op->batch_size;
92	const size_t input_height = op->input_height;
93	const size_t input_width = op->input_width;
94	const size_t output_height = op->output_height;
95	const size_t output_width = op->output_width;
96	const size_t kernel_height = op->kernel_height;
97	const size_t kernel_width = op->kernel_width;
98	const size_t stride_height = op->stride_height;
99	const size_t stride_width = op->stride_width;
100	const size_t dilation_height = op->dilation_height;
101	const size_t dilation_width = op->dilation_width;
102	const size_t input_padding_top = op->input_padding_top;
103	const size_t input_padding_left = op->input_padding_left;
104
105	for (size_t image = batch_start; image < batch_size; image++) {
106	for (size_t output_y = `0`; output_y < output_height; output_y++) {
107	for (size_t kernel_y = `0`; kernel_y < kernel_height; kernel_y++) {
108	const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
109	if (input_y < input_height) {
110	for (size_t output_x = `0`; output_x < output_width; output_x++) {
111	for (size_t kernel_x = `0`; kernel_x < kernel_width; kernel_x++) {
112	const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
113	const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
114	if (input_x < input_width) {
115	indirection_buffer[index] = (char)input + ((image input_height + input_y) * input_width + input_x) * input_pixel_stride;
116	} else {
117	indirection_buffer[index] = zero;
118	}
119	}
120	}
121	} else {
122	for (size_t output_x = `0`; output_x < output_width; output_x++) {
123	for (size_t kernel_x = `0`; kernel_x < kernel_width; kernel_x++) {
124	const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
125	indirection_buffer[index] = zero;
126	}
127	}
128	}
129	}
130	}
131	}
132	}
133
134	void qnnp_indirection_init_deconv2d(
135	qnnp_operator_t op,
136	size_t output_tile_size,
137	size_t tiled_output_size)
138	{
139	const void** indirection_buffer = op->indirection_buffer;
140	const void* input = op->input;
141	const size_t input_pixel_stride = op->input_pixel_stride;
142	const void* zero = op->zero_pointer;
143	const size_t groups = op->groups;
144	const size_t group_input_channels = op->group_input_channels;
145	const size_t batch_size = op->batch_size;
146	const size_t input_height = op->input_height;
147	const size_t input_width = op->input_width;
148	const size_t output_height = op->output_height;
149	const size_t output_width = op->output_width;
150	const size_t kernel_height = op->kernel_height;
151	const size_t kernel_width = op->kernel_width;
152	const size_t stride_height = op->stride_height;
153	const size_t stride_width = op->stride_width;
154	const size_t dilation_height = op->dilation_height;
155	const size_t dilation_width = op->dilation_width;
156	const size_t input_padding_top = op->input_padding_top;
157	const size_t input_padding_left = op->input_padding_left;
158
159	const size_t output_size = output_height * output_width;
160	const size_t kernel_size = kernel_height * kernel_width;
161
162	for (size_t group = `0`; group < groups; group++) {
163	for (size_t image = `0`; image < batch_size; image++) {
164	for (size_t output_tile_start = `0`; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
165	for (size_t output_tile_offset = `0`; output_tile_offset < output_tile_size; output_tile_offset++) {
166	const size_t tiled_output_index = output_tile_start + output_tile_offset;
167	const size_t output_index = min(tiled_output_index, output_size - `1`);
168	const size_t output_y = output_index / output_width;
169	const size_t output_x = output_index % output_width;
170	for (size_t kernel_y = `0`; kernel_y < kernel_height; kernel_y++) {
171	const size_t y = output_y + input_padding_top - kernel_y * dilation_height;
172	const size_t input_y = y / stride_height;
173	for (size_t kernel_x = `0`; kernel_x < kernel_width; kernel_x++) {
174	const size_t x = output_x + input_padding_left - kernel_x * dilation_width;
175	const size_t input_x = x / stride_width;
176	const size_t index =
177	(group * batch_size + image) * tiled_output_size * kernel_size + output_tile_start * kernel_size + (kernel_y * kernel_width + kernel_x) * output_tile_size + output_tile_offset;
178	if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {
179	indirection_buffer[index] =
180	(char)input + ((image input_height + input_y) * input_width + input_x) * input_pixel_stride + group * group_input_channels;
181	} else {
182	indirection_buffer[index] = zero;
183	}
184	}
185	}
186	}
187	}
188	}
189	}
190	}
191
192	void qnnp_indirection_init_maxpool2d(
193	qnnp_operator_t op,
194	size_t batch_start,
195	size_t step_height,
196	size_t step_width)
197	{
198	const void** indirection_buffer = op->indirection_buffer;
199	const void* input = op->input;
200	const size_t input_pixel_stride = op->input_pixel_stride;
201	const size_t batch_size = op->batch_size;
202	const size_t input_height = op->input_height;
203	const size_t input_width = op->input_width;
204	const size_t output_height = op->output_height;
205	const size_t output_width = op->output_width;
206	const size_t pooling_height = op->kernel_height;
207	const size_t pooling_width = op->kernel_width;
208	const size_t stride_height = op->stride_height;
209	const size_t stride_width = op->stride_width;
210	const size_t dilation_height = op->dilation_height;
211	const size_t dilation_width = op->dilation_width;
212	const size_t input_padding_top = op->input_padding_top;
213	const size_t input_padding_left = op->input_padding_left;
214
215	for (size_t image = batch_start; image < batch_size; image++) {
216	for (size_t output_y = `0`; output_y < output_height; output_y++) {
217	for (size_t pooling_y = `0`; pooling_y < pooling_height; pooling_y++) {
218	const size_t input_y = doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top);
219	const size_t clamped_input_y = min(input_y, input_height - `1`);
220	for (size_t output_x = `0`; output_x < output_width; output_x++) {
221	for (size_t pooling_x = `0`; pooling_x < pooling_width; pooling_x++) {
222	const size_t input_x = doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left);
223	const size_t clamped_input_x = min(input_x, input_width - `1`);
224	const size_t index = (image * output_height + output_y) * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
225	indirection_buffer[index] = (char)input + ((image input_height + clamped_input_y) * input_width + clamped_input_x) * input_pixel_stride;
226	}
227	}
228	}
229	}
230	}
231	}
232

Browse the source code of pytorch/third_party/QNNPACK/src/indirection.c