legacy-api.c source code [pytorch/third_party/pthreadpool/src/legacy-api.c]

1	/ Standard C headers /
2	#include <stddef.h>
3
4	/ Dependencies /
5	#include <fxdiv.h>
6
7	/ Public library header /
8	#include <pthreadpool.h>
9
10	/ Internal library headers /
11	#include "threadpool-utils.h"
12
13
14	void pthreadpool_compute_1d(
15	pthreadpool_t threadpool,
16	pthreadpool_function_1d_t function,
17	void* argument,
18	size_t range)
19	{
20	pthreadpool_parallelize_1d(threadpool,
21	(pthreadpool_task_1d_t) function, argument,
22	range, `0` / flags /);
23	}
24
25	void pthreadpool_compute_1d_tiled(
26	pthreadpool_t threadpool,
27	pthreadpool_function_1d_tiled_t function,
28	void* argument,
29	size_t range,
30	size_t tile)
31	{
32	pthreadpool_parallelize_1d_tile_1d(threadpool,
33	(pthreadpool_task_1d_tile_1d_t) function, argument,
34	range, tile, `0` / flags /);
35	}
36
37	void pthreadpool_compute_2d(
38	pthreadpool_t threadpool,
39	pthreadpool_function_2d_t function,
40	void* argument,
41	size_t range_i,
42	size_t range_j)
43	{
44	pthreadpool_parallelize_2d(threadpool,
45	(pthreadpool_task_2d_t) function, argument,
46	range_i, range_j, `0` / flags /);
47	}
48
49	void pthreadpool_compute_2d_tiled(
50	pthreadpool_t threadpool,
51	pthreadpool_function_2d_tiled_t function,
52	void* argument,
53	size_t range_i,
54	size_t range_j,
55	size_t tile_i,
56	size_t tile_j)
57	{
58	pthreadpool_parallelize_2d_tile_2d(threadpool,
59	(pthreadpool_task_2d_tile_2d_t) function, argument,
60	range_i, range_j, tile_i, tile_j, `0` / flags /);
61	}
62
63	struct compute_3d_tiled_context {
64	pthreadpool_function_3d_tiled_t function;
65	void* argument;
66	struct fxdiv_divisor_size_t tile_range_j;
67	struct fxdiv_divisor_size_t tile_range_k;
68	size_t range_i;
69	size_t range_j;
70	size_t range_k;
71	size_t tile_i;
72	size_t tile_j;
73	size_t tile_k;
74	};
75
76	static void compute_3d_tiled(const struct compute_3d_tiled_context* context, size_t linear_index) {
77	const struct fxdiv_divisor_size_t tile_range_k = context->tile_range_k;
78	const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
79	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
80	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
81	const size_t max_tile_i = context->tile_i;
82	const size_t max_tile_j = context->tile_j;
83	const size_t max_tile_k = context->tile_k;
84	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
85	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
86	const size_t index_k = tile_index_ij_k.remainder * max_tile_k;
87	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
88	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
89	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
90	context->function(context->argument, index_i, index_j, index_k, tile_i, tile_j, tile_k);
91	}
92
93	void pthreadpool_compute_3d_tiled(
94	pthreadpool_t threadpool,
95	pthreadpool_function_3d_tiled_t function,
96	void* argument,
97	size_t range_i,
98	size_t range_j,
99	size_t range_k,
100	size_t tile_i,
101	size_t tile_j,
102	size_t tile_k)
103	{
104	if (pthreadpool_get_threads_count(threadpool) <= `1`) {
105	/ No thread pool used: execute function sequentially on the calling thread /
106	for (size_t i = `0`; i < range_i; i += tile_i) {
107	for (size_t j = `0`; j < range_j; j += tile_j) {
108	for (size_t k = `0`; k < range_k; k += tile_k) {
109	function(argument, i, j, k, min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k));
110	}
111	}
112	}
113	} else {
114	/ Execute in parallel on the thread pool using linearized index /
115	const size_t tile_range_i = divide_round_up(range_i, tile_i);
116	const size_t tile_range_j = divide_round_up(range_j, tile_j);
117	const size_t tile_range_k = divide_round_up(range_k, tile_k);
118	struct compute_3d_tiled_context context = {
119	.function = function,
120	.argument = argument,
121	.tile_range_j = fxdiv_init_size_t(tile_range_j),
122	.tile_range_k = fxdiv_init_size_t(tile_range_k),
123	.range_i = range_i,
124	.range_j = range_j,
125	.range_k = range_k,
126	.tile_i = tile_i,
127	.tile_j = tile_j,
128	.tile_k = tile_k
129	};
130	pthreadpool_parallelize_1d(threadpool,
131	(pthreadpool_task_1d_t) compute_3d_tiled, &context,
132	tile_range_i * tile_range_j * tile_range_k,
133	`0` / flags /);
134	}
135	}
136
137	struct compute_4d_tiled_context {
138	pthreadpool_function_4d_tiled_t function;
139	void* argument;
140	struct fxdiv_divisor_size_t tile_range_kl;
141	struct fxdiv_divisor_size_t tile_range_j;
142	struct fxdiv_divisor_size_t tile_range_l;
143	size_t range_i;
144	size_t range_j;
145	size_t range_k;
146	size_t range_l;
147	size_t tile_i;
148	size_t tile_j;
149	size_t tile_k;
150	size_t tile_l;
151	};
152
153	static void compute_4d_tiled(const struct compute_4d_tiled_context* context, size_t linear_index) {
154	const struct fxdiv_divisor_size_t tile_range_kl = context->tile_range_kl;
155	const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
156	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
157	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, tile_range_j);
158	const struct fxdiv_divisor_size_t tile_range_l = context->tile_range_l;
159	const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
160	const size_t max_tile_i = context->tile_i;
161	const size_t max_tile_j = context->tile_j;
162	const size_t max_tile_k = context->tile_k;
163	const size_t max_tile_l = context->tile_l;
164	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
165	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
166	const size_t index_k = tile_index_k_l.quotient * max_tile_k;
167	const size_t index_l = tile_index_k_l.remainder * max_tile_l;
168	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
169	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
170	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
171	const size_t tile_l = min(max_tile_l, context->range_l - index_l);
172	context->function(context->argument, index_i, index_j, index_k, index_l, tile_i, tile_j, tile_k, tile_l);
173	}
174
175	void pthreadpool_compute_4d_tiled(
176	pthreadpool_t threadpool,
177	pthreadpool_function_4d_tiled_t function,
178	void* argument,
179	size_t range_i,
180	size_t range_j,
181	size_t range_k,
182	size_t range_l,
183	size_t tile_i,
184	size_t tile_j,
185	size_t tile_k,
186	size_t tile_l)
187	{
188	if (pthreadpool_get_threads_count(threadpool) <= `1`) {
189	/ No thread pool used: execute function sequentially on the calling thread /
190	for (size_t i = `0`; i < range_i; i += tile_i) {
191	for (size_t j = `0`; j < range_j; j += tile_j) {
192	for (size_t k = `0`; k < range_k; k += tile_k) {
193	for (size_t l = `0`; l < range_l; l += tile_l) {
194	function(argument, i, j, k, l,
195	min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k), min(range_l - l, tile_l));
196	}
197	}
198	}
199	}
200	} else {
201	/ Execute in parallel on the thread pool using linearized index /
202	const size_t tile_range_i = divide_round_up(range_i, tile_i);
203	const size_t tile_range_j = divide_round_up(range_j, tile_j);
204	const size_t tile_range_k = divide_round_up(range_k, tile_k);
205	const size_t tile_range_l = divide_round_up(range_l, tile_l);
206	struct compute_4d_tiled_context context = {
207	.function = function,
208	.argument = argument,
209	.tile_range_kl = fxdiv_init_size_t(tile_range_k * tile_range_l),
210	.tile_range_j = fxdiv_init_size_t(tile_range_j),
211	.tile_range_l = fxdiv_init_size_t(tile_range_l),
212	.range_i = range_i,
213	.range_j = range_j,
214	.range_k = range_k,
215	.range_l = range_l,
216	.tile_i = tile_i,
217	.tile_j = tile_j,
218	.tile_k = tile_k,
219	.tile_l = tile_l
220	};
221	pthreadpool_parallelize_1d(threadpool,
222	(pthreadpool_task_1d_t) compute_4d_tiled, &context,
223	tile_range_i * tile_range_j * tile_range_k * tile_range_l,
224	`0` / flags /);
225	}
226	}
227

Browse the source code of pytorch/third_party/pthreadpool/src/legacy-api.c