1 | #ifndef PTHREADPOOL_H_ |
2 | #define PTHREADPOOL_H_ |
3 | |
4 | #include <stddef.h> |
5 | #include <stdint.h> |
6 | |
7 | typedef struct pthreadpool* pthreadpool_t; |
8 | |
9 | typedef void (*pthreadpool_task_1d_t)(void*, size_t); |
10 | typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t); |
11 | typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t); |
12 | typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t); |
13 | typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t); |
14 | typedef void (*pthreadpool_task_3d_t)(void*, size_t, size_t, size_t); |
15 | typedef void (*pthreadpool_task_3d_tile_1d_t)(void*, size_t, size_t, size_t, size_t); |
16 | typedef void (*pthreadpool_task_3d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t); |
17 | typedef void (*pthreadpool_task_4d_t)(void*, size_t, size_t, size_t, size_t); |
18 | typedef void (*pthreadpool_task_4d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t); |
19 | typedef void (*pthreadpool_task_4d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); |
20 | typedef void (*pthreadpool_task_5d_t)(void*, size_t, size_t, size_t, size_t, size_t); |
21 | typedef void (*pthreadpool_task_5d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); |
22 | typedef void (*pthreadpool_task_5d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); |
23 | typedef void (*pthreadpool_task_6d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); |
24 | typedef void (*pthreadpool_task_6d_tile_1d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t); |
25 | typedef void (*pthreadpool_task_6d_tile_2d_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); |
26 | |
27 | typedef void (*pthreadpool_task_1d_with_id_t)(void*, uint32_t, size_t); |
28 | typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t); |
29 | typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t); |
30 | typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void*, uint32_t, size_t, size_t, size_t, size_t, size_t, size_t); |
31 | |
32 | |
33 | /** |
34 | * Disable support for denormalized numbers to the maximum extent possible for |
35 | * the duration of the computation. |
36 | * |
37 | * Handling denormalized floating-point numbers is often implemented in |
38 | * microcode, and incurs significant performance degradation. This hint |
39 | * instructs the thread pool to disable support for denormalized numbers before |
40 | * running the computation by manipulating architecture-specific control |
41 | * registers, and restore the initial value of control registers after the |
42 | * computation is complete. The thread pool temporary disables denormalized |
43 | * numbers on all threads involved in the computation (i.e. the caller threads, |
44 | * and potentially worker threads). |
45 | * |
46 | * Disabling denormalized numbers may have a small negative effect on results' |
47 | * accuracy. As various architectures differ in capabilities to control |
48 | * processing of denormalized numbers, using this flag may also hurt results' |
49 | * reproducibility across different instruction set architectures. |
50 | */ |
51 | #define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001 |
52 | |
53 | /** |
54 | * Yield worker threads to the system scheduler after the operation is finished. |
55 | * |
56 | * Force workers to use kernel wait (instead of active spin-wait by default) for |
57 | * new commands after this command is processed. This flag affects only the |
58 | * immediate next operation on this thread pool. To make the thread pool always |
59 | * use kernel wait, pass this flag to all parallelization functions. |
60 | */ |
61 | #define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002 |
62 | |
63 | #ifdef __cplusplus |
64 | extern "C" { |
65 | #endif |
66 | |
67 | /** |
68 | * Create a thread pool with the specified number of threads. |
69 | * |
70 | * @param threads_count the number of threads in the thread pool. |
71 | * A value of 0 has special interpretation: it creates a thread pool with as |
72 | * many threads as there are logical processors in the system. |
73 | * |
74 | * @returns A pointer to an opaque thread pool object if the call is |
75 | * successful, or NULL pointer if the call failed. |
76 | */ |
77 | pthreadpool_t pthreadpool_create(size_t threads_count); |
78 | |
79 | /** |
80 | * Query the number of threads in a thread pool. |
81 | * |
82 | * @param threadpool the thread pool to query. |
83 | * |
84 | * @returns The number of threads in the thread pool. |
85 | */ |
86 | size_t pthreadpool_get_threads_count(pthreadpool_t threadpool); |
87 | |
88 | /** |
89 | * Process items on a 1D grid. |
90 | * |
91 | * The function implements a parallel version of the following snippet: |
92 | * |
93 | * for (size_t i = 0; i < range; i++) |
94 | * function(context, i); |
95 | * |
96 | * When the function returns, all items have been processed and the thread pool |
97 | * is ready for a new task. |
98 | * |
99 | * @note If multiple threads call this function with the same thread pool, the |
100 | * calls are serialized. |
101 | * |
102 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
103 | * is NULL, all items are processed serially on the calling thread. |
104 | * @param function the function to call for each item. |
105 | * @param context the first argument passed to the specified function. |
106 | * @param range the number of items on the 1D grid to process. The |
107 | * specified function will be called once for each item. |
108 | * @param flags a bitwise combination of zero or more optional flags |
109 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
110 | */ |
111 | void pthreadpool_parallelize_1d( |
112 | pthreadpool_t threadpool, |
113 | pthreadpool_task_1d_t function, |
114 | void* context, |
115 | size_t range, |
116 | uint32_t flags); |
117 | |
118 | /** |
119 | * Process items on a 1D grid using a microarchitecture-aware task function. |
120 | * |
121 | * The function implements a parallel version of the following snippet: |
122 | * |
123 | * uint32_t uarch_index = cpuinfo_initialize() ? |
124 | * cpuinfo_get_current_uarch_index() : default_uarch_index; |
125 | * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; |
126 | * for (size_t i = 0; i < range; i++) |
127 | * function(context, uarch_index, i); |
128 | * |
129 | * When the function returns, all items have been processed and the thread pool |
130 | * is ready for a new task. |
131 | * |
132 | * @note If multiple threads call this function with the same thread pool, the |
133 | * calls are serialized. |
134 | * |
135 | * @param threadpool the thread pool to use for parallelisation. If |
136 | * threadpool is NULL, all items are processed serially on the calling |
137 | * thread. |
138 | * @param function the function to call for each item. |
139 | * @param context the first argument passed to the specified |
140 | * function. |
141 | * @param default_uarch_index the microarchitecture index to use when |
142 | * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, |
143 | * or index returned by cpuinfo_get_current_uarch_index() exceeds the |
144 | * max_uarch_index value. |
145 | * @param max_uarch_index the maximum microarchitecture index expected by |
146 | * the specified function. If the index returned by |
147 | * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index |
148 | * will be used instead. default_uarch_index can exceed max_uarch_index. |
149 | * @param range the number of items on the 1D grid to process. |
150 | * The specified function will be called once for each item. |
151 | * @param flags a bitwise combination of zero or more optional |
152 | * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or |
153 | * PTHREADPOOL_FLAG_YIELD_WORKERS) |
154 | */ |
155 | void pthreadpool_parallelize_1d_with_uarch( |
156 | pthreadpool_t threadpool, |
157 | pthreadpool_task_1d_with_id_t function, |
158 | void* context, |
159 | uint32_t default_uarch_index, |
160 | uint32_t max_uarch_index, |
161 | size_t range, |
162 | uint32_t flags); |
163 | |
164 | /** |
165 | * Process items on a 1D grid with specified maximum tile size. |
166 | * |
167 | * The function implements a parallel version of the following snippet: |
168 | * |
169 | * for (size_t i = 0; i < range; i += tile) |
170 | * function(context, i, min(range - i, tile)); |
171 | * |
172 | * When the call returns, all items have been processed and the thread pool is |
173 | * ready for a new task. |
174 | * |
175 | * @note If multiple threads call this function with the same thread pool, |
176 | * the calls are serialized. |
177 | * |
178 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
179 | * is NULL, all items are processed serially on the calling thread. |
180 | * @param function the function to call for each tile. |
181 | * @param context the first argument passed to the specified function. |
182 | * @param range the number of items on the 1D grid to process. |
183 | * @param tile the maximum number of items on the 1D grid to process in |
184 | * one function call. |
185 | * @param flags a bitwise combination of zero or more optional flags |
186 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
187 | */ |
188 | void pthreadpool_parallelize_1d_tile_1d( |
189 | pthreadpool_t threadpool, |
190 | pthreadpool_task_1d_tile_1d_t function, |
191 | void* context, |
192 | size_t range, |
193 | size_t tile, |
194 | uint32_t flags); |
195 | |
196 | /** |
197 | * Process items on a 2D grid. |
198 | * |
199 | * The function implements a parallel version of the following snippet: |
200 | * |
201 | * for (size_t i = 0; i < range_i; i++) |
202 | * for (size_t j = 0; j < range_j; j++) |
203 | * function(context, i, j); |
204 | * |
205 | * When the function returns, all items have been processed and the thread pool |
206 | * is ready for a new task. |
207 | * |
208 | * @note If multiple threads call this function with the same thread pool, the |
209 | * calls are serialized. |
210 | * |
211 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
212 | * is NULL, all items are processed serially on the calling thread. |
213 | * @param function the function to call for each item. |
214 | * @param context the first argument passed to the specified function. |
215 | * @param range_i the number of items to process along the first dimension |
216 | * of the 2D grid. |
217 | * @param range_j the number of items to process along the second dimension |
218 | * of the 2D grid. |
219 | * @param flags a bitwise combination of zero or more optional flags |
220 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
221 | */ |
222 | void pthreadpool_parallelize_2d( |
223 | pthreadpool_t threadpool, |
224 | pthreadpool_task_2d_t function, |
225 | void* context, |
226 | size_t range_i, |
227 | size_t range_j, |
228 | uint32_t flags); |
229 | |
230 | /** |
231 | * Process items on a 2D grid with the specified maximum tile size along the |
232 | * last grid dimension. |
233 | * |
234 | * The function implements a parallel version of the following snippet: |
235 | * |
236 | * for (size_t i = 0; i < range_i; i++) |
237 | * for (size_t j = 0; j < range_j; j += tile_j) |
238 | * function(context, i, j, min(range_j - j, tile_j)); |
239 | * |
240 | * When the function returns, all items have been processed and the thread pool |
241 | * is ready for a new task. |
242 | * |
243 | * @note If multiple threads call this function with the same thread pool, the |
244 | * calls are serialized. |
245 | * |
246 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
247 | * is NULL, all items are processed serially on the calling thread. |
248 | * @param function the function to call for each tile. |
249 | * @param context the first argument passed to the specified function. |
250 | * @param range_i the number of items to process along the first dimension |
251 | * of the 2D grid. |
252 | * @param range_j the number of items to process along the second dimension |
253 | * of the 2D grid. |
254 | * @param tile_j the maximum number of items along the second dimension of |
255 | * the 2D grid to process in one function call. |
256 | * @param flags a bitwise combination of zero or more optional flags |
257 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
258 | */ |
259 | void pthreadpool_parallelize_2d_tile_1d( |
260 | pthreadpool_t threadpool, |
261 | pthreadpool_task_2d_tile_1d_t function, |
262 | void* context, |
263 | size_t range_i, |
264 | size_t range_j, |
265 | size_t tile_j, |
266 | uint32_t flags); |
267 | |
268 | /** |
269 | * Process items on a 2D grid with the specified maximum tile size along each |
270 | * grid dimension. |
271 | * |
272 | * The function implements a parallel version of the following snippet: |
273 | * |
274 | * for (size_t i = 0; i < range_i; i += tile_i) |
275 | * for (size_t j = 0; j < range_j; j += tile_j) |
276 | * function(context, i, j, |
277 | * min(range_i - i, tile_i), min(range_j - j, tile_j)); |
278 | * |
279 | * When the function returns, all items have been processed and the thread pool |
280 | * is ready for a new task. |
281 | * |
282 | * @note If multiple threads call this function with the same thread pool, the |
283 | * calls are serialized. |
284 | * |
285 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
286 | * is NULL, all items are processed serially on the calling thread. |
287 | * @param function the function to call for each tile. |
288 | * @param context the first argument passed to the specified function. |
289 | * @param range_i the number of items to process along the first dimension |
290 | * of the 2D grid. |
291 | * @param range_j the number of items to process along the second dimension |
292 | * of the 2D grid. |
293 | * @param tile_j the maximum number of items along the first dimension of |
294 | * the 2D grid to process in one function call. |
295 | * @param tile_j the maximum number of items along the second dimension of |
296 | * the 2D grid to process in one function call. |
297 | * @param flags a bitwise combination of zero or more optional flags |
298 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
299 | */ |
300 | void pthreadpool_parallelize_2d_tile_2d( |
301 | pthreadpool_t threadpool, |
302 | pthreadpool_task_2d_tile_2d_t function, |
303 | void* context, |
304 | size_t range_i, |
305 | size_t range_j, |
306 | size_t tile_i, |
307 | size_t tile_j, |
308 | uint32_t flags); |
309 | |
310 | /** |
311 | * Process items on a 2D grid with the specified maximum tile size along each |
312 | * grid dimension using a microarchitecture-aware task function. |
313 | * |
314 | * The function implements a parallel version of the following snippet: |
315 | * |
316 | * uint32_t uarch_index = cpuinfo_initialize() ? |
317 | * cpuinfo_get_current_uarch_index() : default_uarch_index; |
318 | * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; |
319 | * for (size_t i = 0; i < range_i; i += tile_i) |
320 | * for (size_t j = 0; j < range_j; j += tile_j) |
321 | * function(context, uarch_index, i, j, |
322 | * min(range_i - i, tile_i), min(range_j - j, tile_j)); |
323 | * |
324 | * When the function returns, all items have been processed and the thread pool |
325 | * is ready for a new task. |
326 | * |
327 | * @note If multiple threads call this function with the same thread pool, the |
328 | * calls are serialized. |
329 | * |
330 | * @param threadpool the thread pool to use for parallelisation. If |
331 | * threadpool is NULL, all items are processed serially on the calling |
332 | * thread. |
333 | * @param function the function to call for each tile. |
334 | * @param context the first argument passed to the specified |
335 | * function. |
336 | * @param default_uarch_index the microarchitecture index to use when |
337 | * pthreadpool is configured without cpuinfo, |
338 | * cpuinfo initialization failed, or index returned |
339 | * by cpuinfo_get_current_uarch_index() exceeds |
340 | * the max_uarch_index value. |
341 | * @param max_uarch_index the maximum microarchitecture index expected |
342 | * by the specified function. If the index returned |
343 | * by cpuinfo_get_current_uarch_index() exceeds this |
344 | * value, default_uarch_index will be used instead. |
345 | * default_uarch_index can exceed max_uarch_index. |
346 | * @param range_i the number of items to process along the first |
347 | * dimension of the 2D grid. |
348 | * @param range_j the number of items to process along the second |
349 | * dimension of the 2D grid. |
350 | * @param tile_j the maximum number of items along the first |
351 | * dimension of the 2D grid to process in one function call. |
352 | * @param tile_j the maximum number of items along the second |
353 | * dimension of the 2D grid to process in one function call. |
354 | * @param flags a bitwise combination of zero or more optional |
355 | * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or |
356 | * PTHREADPOOL_FLAG_YIELD_WORKERS) |
357 | */ |
358 | void pthreadpool_parallelize_2d_tile_2d_with_uarch( |
359 | pthreadpool_t threadpool, |
360 | pthreadpool_task_2d_tile_2d_with_id_t function, |
361 | void* context, |
362 | uint32_t default_uarch_index, |
363 | uint32_t max_uarch_index, |
364 | size_t range_i, |
365 | size_t range_j, |
366 | size_t tile_i, |
367 | size_t tile_j, |
368 | uint32_t flags); |
369 | |
370 | /** |
371 | * Process items on a 3D grid. |
372 | * |
373 | * The function implements a parallel version of the following snippet: |
374 | * |
375 | * for (size_t i = 0; i < range_i; i++) |
376 | * for (size_t j = 0; j < range_j; j++) |
377 | * for (size_t k = 0; k < range_k; k++) |
378 | * function(context, i, j, k); |
379 | * |
380 | * When the function returns, all items have been processed and the thread pool |
381 | * is ready for a new task. |
382 | * |
383 | * @note If multiple threads call this function with the same thread pool, the |
384 | * calls are serialized. |
385 | * |
386 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
387 | * is NULL, all items are processed serially on the calling thread. |
388 | * @param function the function to call for each tile. |
389 | * @param context the first argument passed to the specified function. |
390 | * @param range_i the number of items to process along the first dimension |
391 | * of the 3D grid. |
392 | * @param range_j the number of items to process along the second dimension |
393 | * of the 3D grid. |
394 | * @param range_k the number of items to process along the third dimension |
395 | * of the 3D grid. |
396 | * @param flags a bitwise combination of zero or more optional flags |
397 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
398 | */ |
399 | void pthreadpool_parallelize_3d( |
400 | pthreadpool_t threadpool, |
401 | pthreadpool_task_3d_t function, |
402 | void* context, |
403 | size_t range_i, |
404 | size_t range_j, |
405 | size_t range_k, |
406 | uint32_t flags); |
407 | |
408 | /** |
409 | * Process items on a 3D grid with the specified maximum tile size along the |
410 | * last grid dimension. |
411 | * |
412 | * The function implements a parallel version of the following snippet: |
413 | * |
414 | * for (size_t i = 0; i < range_i; i++) |
415 | * for (size_t j = 0; j < range_j; j++) |
416 | * for (size_t k = 0; k < range_k; k += tile_k) |
417 | * function(context, i, j, k, min(range_k - k, tile_k)); |
418 | * |
419 | * When the function returns, all items have been processed and the thread pool |
420 | * is ready for a new task. |
421 | * |
422 | * @note If multiple threads call this function with the same thread pool, the |
423 | * calls are serialized. |
424 | * |
425 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
426 | * is NULL, all items are processed serially on the calling thread. |
427 | * @param function the function to call for each tile. |
428 | * @param context the first argument passed to the specified function. |
429 | * @param range_i the number of items to process along the first dimension |
430 | * of the 3D grid. |
431 | * @param range_j the number of items to process along the second dimension |
432 | * of the 3D grid. |
433 | * @param range_k the number of items to process along the third dimension |
434 | * of the 3D grid. |
435 | * @param tile_k the maximum number of items along the third dimension of |
436 | * the 3D grid to process in one function call. |
437 | * @param flags a bitwise combination of zero or more optional flags |
438 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
439 | */ |
440 | void pthreadpool_parallelize_3d_tile_1d( |
441 | pthreadpool_t threadpool, |
442 | pthreadpool_task_3d_tile_1d_t function, |
443 | void* context, |
444 | size_t range_i, |
445 | size_t range_j, |
446 | size_t range_k, |
447 | size_t tile_k, |
448 | uint32_t flags); |
449 | |
450 | /** |
451 | * Process items on a 3D grid with the specified maximum tile size along the |
452 | * last two grid dimensions. |
453 | * |
454 | * The function implements a parallel version of the following snippet: |
455 | * |
456 | * for (size_t i = 0; i < range_i; i++) |
457 | * for (size_t j = 0; j < range_j; j += tile_j) |
458 | * for (size_t k = 0; k < range_k; k += tile_k) |
459 | * function(context, i, j, k, |
460 | * min(range_j - j, tile_j), min(range_k - k, tile_k)); |
461 | * |
462 | * When the function returns, all items have been processed and the thread pool |
463 | * is ready for a new task. |
464 | * |
465 | * @note If multiple threads call this function with the same thread pool, the |
466 | * calls are serialized. |
467 | * |
468 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
469 | * is NULL, all items are processed serially on the calling thread. |
470 | * @param function the function to call for each tile. |
471 | * @param context the first argument passed to the specified function. |
472 | * @param range_i the number of items to process along the first dimension |
473 | * of the 3D grid. |
474 | * @param range_j the number of items to process along the second dimension |
475 | * of the 3D grid. |
476 | * @param range_k the number of items to process along the third dimension |
477 | * of the 3D grid. |
478 | * @param tile_j the maximum number of items along the second dimension of |
479 | * the 3D grid to process in one function call. |
480 | * @param tile_k the maximum number of items along the third dimension of |
481 | * the 3D grid to process in one function call. |
482 | * @param flags a bitwise combination of zero or more optional flags |
483 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
484 | */ |
485 | void pthreadpool_parallelize_3d_tile_2d( |
486 | pthreadpool_t threadpool, |
487 | pthreadpool_task_3d_tile_2d_t function, |
488 | void* context, |
489 | size_t range_i, |
490 | size_t range_j, |
491 | size_t range_k, |
492 | size_t tile_j, |
493 | size_t tile_k, |
494 | uint32_t flags); |
495 | |
496 | /** |
497 | * Process items on a 3D grid with the specified maximum tile size along the |
498 | * last two grid dimensions using a microarchitecture-aware task function. |
499 | * |
500 | * The function implements a parallel version of the following snippet: |
501 | * |
502 | * uint32_t uarch_index = cpuinfo_initialize() ? |
503 | * cpuinfo_get_current_uarch_index() : default_uarch_index; |
504 | * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; |
505 | * for (size_t i = 0; i < range_i; i++) |
506 | * for (size_t j = 0; j < range_j; j += tile_j) |
507 | * for (size_t k = 0; k < range_k; k += tile_k) |
508 | * function(context, uarch_index, i, j, k, |
509 | * min(range_j - j, tile_j), min(range_k - k, tile_k)); |
510 | * |
511 | * When the function returns, all items have been processed and the thread pool |
512 | * is ready for a new task. |
513 | * |
514 | * @note If multiple threads call this function with the same thread pool, the |
515 | * calls are serialized. |
516 | * |
517 | * @param threadpool the thread pool to use for parallelisation. If |
518 | * threadpool is NULL, all items are processed serially on the calling |
519 | * thread. |
520 | * @param function the function to call for each tile. |
521 | * @param context the first argument passed to the specified |
522 | * function. |
523 | * @param default_uarch_index the microarchitecture index to use when |
524 | * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, |
525 | * or index returned by cpuinfo_get_current_uarch_index() exceeds the |
526 | * max_uarch_index value. |
527 | * @param max_uarch_index the maximum microarchitecture index expected by |
528 | * the specified function. If the index returned by |
529 | * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index |
530 | * will be used instead. default_uarch_index can exceed max_uarch_index. |
531 | * @param range_i the number of items to process along the first |
532 | * dimension of the 3D grid. |
533 | * @param range_j the number of items to process along the second |
534 | * dimension of the 3D grid. |
535 | * @param range_k the number of items to process along the third |
536 | * dimension of the 3D grid. |
537 | * @param tile_j the maximum number of items along the second |
538 | * dimension of the 3D grid to process in one function call. |
539 | * @param tile_k the maximum number of items along the third |
540 | * dimension of the 3D grid to process in one function call. |
541 | * @param flags a bitwise combination of zero or more optional |
542 | * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or |
543 | * PTHREADPOOL_FLAG_YIELD_WORKERS) |
544 | */ |
545 | void pthreadpool_parallelize_3d_tile_2d_with_uarch( |
546 | pthreadpool_t threadpool, |
547 | pthreadpool_task_3d_tile_2d_with_id_t function, |
548 | void* context, |
549 | uint32_t default_uarch_index, |
550 | uint32_t max_uarch_index, |
551 | size_t range_i, |
552 | size_t range_j, |
553 | size_t range_k, |
554 | size_t tile_j, |
555 | size_t tile_k, |
556 | uint32_t flags); |
557 | |
558 | /** |
559 | * Process items on a 4D grid. |
560 | * |
561 | * The function implements a parallel version of the following snippet: |
562 | * |
563 | * for (size_t i = 0; i < range_i; i++) |
564 | * for (size_t j = 0; j < range_j; j++) |
565 | * for (size_t k = 0; k < range_k; k++) |
566 | * for (size_t l = 0; l < range_l; l++) |
567 | * function(context, i, j, k, l); |
568 | * |
569 | * When the function returns, all items have been processed and the thread pool |
570 | * is ready for a new task. |
571 | * |
572 | * @note If multiple threads call this function with the same thread pool, the |
573 | * calls are serialized. |
574 | * |
575 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
576 | * is NULL, all items are processed serially on the calling thread. |
577 | * @param function the function to call for each tile. |
578 | * @param context the first argument passed to the specified function. |
579 | * @param range_i the number of items to process along the first dimension |
580 | * of the 4D grid. |
581 | * @param range_j the number of items to process along the second dimension |
582 | * of the 4D grid. |
583 | * @param range_k the number of items to process along the third dimension |
584 | * of the 4D grid. |
585 | * @param range_l the number of items to process along the fourth dimension |
586 | * of the 4D grid. |
587 | * @param flags a bitwise combination of zero or more optional flags |
588 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
589 | */ |
590 | void pthreadpool_parallelize_4d( |
591 | pthreadpool_t threadpool, |
592 | pthreadpool_task_4d_t function, |
593 | void* context, |
594 | size_t range_i, |
595 | size_t range_j, |
596 | size_t range_k, |
597 | size_t range_l, |
598 | uint32_t flags); |
599 | |
600 | /** |
601 | * Process items on a 4D grid with the specified maximum tile size along the |
602 | * last grid dimension. |
603 | * |
604 | * The function implements a parallel version of the following snippet: |
605 | * |
606 | * for (size_t i = 0; i < range_i; i++) |
607 | * for (size_t j = 0; j < range_j; j++) |
608 | * for (size_t k = 0; k < range_k; k++) |
609 | * for (size_t l = 0; l < range_l; l += tile_l) |
610 | * function(context, i, j, k, l, min(range_l - l, tile_l)); |
611 | * |
612 | * When the function returns, all items have been processed and the thread pool |
613 | * is ready for a new task. |
614 | * |
615 | * @note If multiple threads call this function with the same thread pool, the |
616 | * calls are serialized. |
617 | * |
618 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
619 | * is NULL, all items are processed serially on the calling thread. |
620 | * @param function the function to call for each tile. |
621 | * @param context the first argument passed to the specified function. |
622 | * @param range_i the number of items to process along the first dimension |
623 | * of the 4D grid. |
624 | * @param range_j the number of items to process along the second dimension |
625 | * of the 4D grid. |
626 | * @param range_k the number of items to process along the third dimension |
627 | * of the 4D grid. |
628 | * @param range_l the number of items to process along the fourth dimension |
629 | * of the 4D grid. |
630 | * @param tile_l the maximum number of items along the fourth dimension of |
631 | * the 4D grid to process in one function call. |
632 | * @param flags a bitwise combination of zero or more optional flags |
633 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
634 | */ |
635 | void pthreadpool_parallelize_4d_tile_1d( |
636 | pthreadpool_t threadpool, |
637 | pthreadpool_task_4d_tile_1d_t function, |
638 | void* context, |
639 | size_t range_i, |
640 | size_t range_j, |
641 | size_t range_k, |
642 | size_t range_l, |
643 | size_t tile_l, |
644 | uint32_t flags); |
645 | |
646 | /** |
647 | * Process items on a 4D grid with the specified maximum tile size along the |
648 | * last two grid dimensions. |
649 | * |
650 | * The function implements a parallel version of the following snippet: |
651 | * |
652 | * for (size_t i = 0; i < range_i; i++) |
653 | * for (size_t j = 0; j < range_j; j++) |
654 | * for (size_t k = 0; k < range_k; k += tile_k) |
655 | * for (size_t l = 0; l < range_l; l += tile_l) |
656 | * function(context, i, j, k, l, |
657 | * min(range_k - k, tile_k), min(range_l - l, tile_l)); |
658 | * |
659 | * When the function returns, all items have been processed and the thread pool |
660 | * is ready for a new task. |
661 | * |
662 | * @note If multiple threads call this function with the same thread pool, the |
663 | * calls are serialized. |
664 | * |
665 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
666 | * is NULL, all items are processed serially on the calling thread. |
667 | * @param function the function to call for each tile. |
668 | * @param context the first argument passed to the specified function. |
669 | * @param range_i the number of items to process along the first dimension |
670 | * of the 4D grid. |
671 | * @param range_j the number of items to process along the second dimension |
672 | * of the 4D grid. |
673 | * @param range_k the number of items to process along the third dimension |
674 | * of the 4D grid. |
675 | * @param range_l the number of items to process along the fourth dimension |
676 | * of the 4D grid. |
677 | * @param tile_k the maximum number of items along the third dimension of |
678 | * the 4D grid to process in one function call. |
679 | * @param tile_l the maximum number of items along the fourth dimension of |
680 | * the 4D grid to process in one function call. |
681 | * @param flags a bitwise combination of zero or more optional flags |
682 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
683 | */ |
684 | void pthreadpool_parallelize_4d_tile_2d( |
685 | pthreadpool_t threadpool, |
686 | pthreadpool_task_4d_tile_2d_t function, |
687 | void* context, |
688 | size_t range_i, |
689 | size_t range_j, |
690 | size_t range_k, |
691 | size_t range_l, |
692 | size_t tile_k, |
693 | size_t tile_l, |
694 | uint32_t flags); |
695 | |
696 | /** |
697 | * Process items on a 4D grid with the specified maximum tile size along the |
698 | * last two grid dimensions using a microarchitecture-aware task function. |
699 | * |
700 | * The function implements a parallel version of the following snippet: |
701 | * |
702 | * uint32_t uarch_index = cpuinfo_initialize() ? |
703 | * cpuinfo_get_current_uarch_index() : default_uarch_index; |
704 | * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; |
705 | * for (size_t i = 0; i < range_i; i++) |
706 | * for (size_t j = 0; j < range_j; j++) |
707 | * for (size_t k = 0; k < range_k; k += tile_k) |
708 | * for (size_t l = 0; l < range_l; l += tile_l) |
709 | * function(context, uarch_index, i, j, k, l, |
710 | * min(range_k - k, tile_k), min(range_l - l, tile_l)); |
711 | * |
712 | * When the function returns, all items have been processed and the thread pool |
713 | * is ready for a new task. |
714 | * |
715 | * @note If multiple threads call this function with the same thread pool, the |
716 | * calls are serialized. |
717 | * |
718 | * @param threadpool the thread pool to use for parallelisation. If |
719 | * threadpool is NULL, all items are processed serially on the calling |
720 | * thread. |
721 | * @param function the function to call for each tile. |
722 | * @param context the first argument passed to the specified |
723 | * function. |
724 | * @param default_uarch_index the microarchitecture index to use when |
725 | * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, |
726 | * or index returned by cpuinfo_get_current_uarch_index() exceeds the |
727 | * max_uarch_index value. |
728 | * @param max_uarch_index the maximum microarchitecture index expected by |
729 | * the specified function. If the index returned by |
730 | * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index |
731 | * will be used instead. default_uarch_index can exceed max_uarch_index. |
732 | * @param range_i the number of items to process along the first |
733 | * dimension of the 4D grid. |
734 | * @param range_j the number of items to process along the second |
735 | * dimension of the 4D grid. |
736 | * @param range_k the number of items to process along the third |
737 | * dimension of the 4D grid. |
738 | * @param range_l the number of items to process along the fourth |
739 | * dimension of the 4D grid. |
740 | * @param tile_k the maximum number of items along the third |
741 | * dimension of the 4D grid to process in one function call. |
742 | * @param tile_l the maximum number of items along the fourth |
743 | * dimension of the 4D grid to process in one function call. |
744 | * @param flags a bitwise combination of zero or more optional |
745 | * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or |
746 | * PTHREADPOOL_FLAG_YIELD_WORKERS) |
747 | */ |
748 | void pthreadpool_parallelize_4d_tile_2d_with_uarch( |
749 | pthreadpool_t threadpool, |
750 | pthreadpool_task_4d_tile_2d_with_id_t function, |
751 | void* context, |
752 | uint32_t default_uarch_index, |
753 | uint32_t max_uarch_index, |
754 | size_t range_i, |
755 | size_t range_j, |
756 | size_t range_k, |
757 | size_t range_l, |
758 | size_t tile_k, |
759 | size_t tile_l, |
760 | uint32_t flags); |
761 | |
762 | /** |
763 | * Process items on a 5D grid. |
764 | * |
765 | * The function implements a parallel version of the following snippet: |
766 | * |
767 | * for (size_t i = 0; i < range_i; i++) |
768 | * for (size_t j = 0; j < range_j; j++) |
769 | * for (size_t k = 0; k < range_k; k++) |
770 | * for (size_t l = 0; l < range_l; l++) |
771 | * for (size_t m = 0; m < range_m; m++) |
772 | * function(context, i, j, k, l, m); |
773 | * |
774 | * When the function returns, all items have been processed and the thread pool |
775 | * is ready for a new task. |
776 | * |
777 | * @note If multiple threads call this function with the same thread pool, the |
778 | * calls are serialized. |
779 | * |
780 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
781 | * is NULL, all items are processed serially on the calling thread. |
782 | * @param function the function to call for each tile. |
783 | * @param context the first argument passed to the specified function. |
784 | * @param range_i the number of items to process along the first dimension |
785 | * of the 5D grid. |
786 | * @param range_j the number of items to process along the second dimension |
787 | * of the 5D grid. |
788 | * @param range_k the number of items to process along the third dimension |
789 | * of the 5D grid. |
790 | * @param range_l the number of items to process along the fourth dimension |
791 | * of the 5D grid. |
792 | * @param range_m the number of items to process along the fifth dimension |
793 | * of the 5D grid. |
794 | * @param flags a bitwise combination of zero or more optional flags |
795 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
796 | */ |
797 | void pthreadpool_parallelize_5d( |
798 | pthreadpool_t threadpool, |
799 | pthreadpool_task_5d_t function, |
800 | void* context, |
801 | size_t range_i, |
802 | size_t range_j, |
803 | size_t range_k, |
804 | size_t range_l, |
805 | size_t range_m, |
806 | uint32_t flags); |
807 | |
808 | /** |
809 | * Process items on a 5D grid with the specified maximum tile size along the |
810 | * last grid dimension. |
811 | * |
812 | * The function implements a parallel version of the following snippet: |
813 | * |
814 | * for (size_t i = 0; i < range_i; i++) |
815 | * for (size_t j = 0; j < range_j; j++) |
816 | * for (size_t k = 0; k < range_k; k++) |
817 | * for (size_t l = 0; l < range_l; l++) |
818 | * for (size_t m = 0; m < range_m; m += tile_m) |
819 | * function(context, i, j, k, l, m, min(range_m - m, tile_m)); |
820 | * |
821 | * When the function returns, all items have been processed and the thread pool |
822 | * is ready for a new task. |
823 | * |
824 | * @note If multiple threads call this function with the same thread pool, the |
825 | * calls are serialized. |
826 | * |
827 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
828 | * is NULL, all items are processed serially on the calling thread. |
829 | * @param function the function to call for each tile. |
830 | * @param context the first argument passed to the specified function. |
831 | * @param range_i the number of items to process along the first dimension |
832 | * of the 5D grid. |
833 | * @param range_j the number of items to process along the second dimension |
834 | * of the 5D grid. |
835 | * @param range_k the number of items to process along the third dimension |
836 | * of the 5D grid. |
837 | * @param range_l the number of items to process along the fourth dimension |
838 | * of the 5D grid. |
839 | * @param range_m the number of items to process along the fifth dimension |
840 | * of the 5D grid. |
841 | * @param tile_m the maximum number of items along the fifth dimension of |
842 | * the 5D grid to process in one function call. |
843 | * @param flags a bitwise combination of zero or more optional flags |
844 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
845 | */ |
846 | void pthreadpool_parallelize_5d_tile_1d( |
847 | pthreadpool_t threadpool, |
848 | pthreadpool_task_5d_tile_1d_t function, |
849 | void* context, |
850 | size_t range_i, |
851 | size_t range_j, |
852 | size_t range_k, |
853 | size_t range_l, |
854 | size_t range_m, |
855 | size_t tile_m, |
856 | uint32_t flags); |
857 | |
858 | /** |
859 | * Process items on a 5D grid with the specified maximum tile size along the |
860 | * last two grid dimensions. |
861 | * |
862 | * The function implements a parallel version of the following snippet: |
863 | * |
864 | * for (size_t i = 0; i < range_i; i++) |
865 | * for (size_t j = 0; j < range_j; j++) |
866 | * for (size_t k = 0; k < range_k; k++) |
867 | * for (size_t l = 0; l < range_l; l += tile_l) |
868 | * for (size_t m = 0; m < range_m; m += tile_m) |
869 | * function(context, i, j, k, l, m, |
870 | * min(range_l - l, tile_l), min(range_m - m, tile_m)); |
871 | * |
872 | * When the function returns, all items have been processed and the thread pool |
873 | * is ready for a new task. |
874 | * |
875 | * @note If multiple threads call this function with the same thread pool, the |
876 | * calls are serialized. |
877 | * |
878 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
879 | * is NULL, all items are processed serially on the calling thread. |
880 | * @param function the function to call for each tile. |
881 | * @param context the first argument passed to the specified function. |
882 | * @param range_i the number of items to process along the first dimension |
883 | * of the 5D grid. |
884 | * @param range_j the number of items to process along the second dimension |
885 | * of the 5D grid. |
886 | * @param range_k the number of items to process along the third dimension |
887 | * of the 5D grid. |
888 | * @param range_l the number of items to process along the fourth dimension |
889 | * of the 5D grid. |
890 | * @param range_m the number of items to process along the fifth dimension |
891 | * of the 5D grid. |
892 | * @param tile_l the maximum number of items along the fourth dimension of |
893 | * the 5D grid to process in one function call. |
894 | * @param tile_m the maximum number of items along the fifth dimension of |
895 | * the 5D grid to process in one function call. |
896 | * @param flags a bitwise combination of zero or more optional flags |
897 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
898 | */ |
899 | void pthreadpool_parallelize_5d_tile_2d( |
900 | pthreadpool_t threadpool, |
901 | pthreadpool_task_5d_tile_2d_t function, |
902 | void* context, |
903 | size_t range_i, |
904 | size_t range_j, |
905 | size_t range_k, |
906 | size_t range_l, |
907 | size_t range_m, |
908 | size_t tile_l, |
909 | size_t tile_m, |
910 | uint32_t flags); |
911 | |
912 | /** |
913 | * Process items on a 6D grid. |
914 | * |
915 | * The function implements a parallel version of the following snippet: |
916 | * |
917 | * for (size_t i = 0; i < range_i; i++) |
918 | * for (size_t j = 0; j < range_j; j++) |
919 | * for (size_t k = 0; k < range_k; k++) |
920 | * for (size_t l = 0; l < range_l; l++) |
921 | * for (size_t m = 0; m < range_m; m++) |
922 | * for (size_t n = 0; n < range_n; n++) |
923 | * function(context, i, j, k, l, m, n); |
924 | * |
925 | * When the function returns, all items have been processed and the thread pool |
926 | * is ready for a new task. |
927 | * |
928 | * @note If multiple threads call this function with the same thread pool, the |
929 | * calls are serialized. |
930 | * |
931 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
932 | * is NULL, all items are processed serially on the calling thread. |
933 | * @param function the function to call for each tile. |
934 | * @param context the first argument passed to the specified function. |
935 | * @param range_i the number of items to process along the first dimension |
936 | * of the 6D grid. |
937 | * @param range_j the number of items to process along the second dimension |
938 | * of the 6D grid. |
939 | * @param range_k the number of items to process along the third dimension |
940 | * of the 6D grid. |
941 | * @param range_l the number of items to process along the fourth dimension |
942 | * of the 6D grid. |
943 | * @param range_m the number of items to process along the fifth dimension |
944 | * of the 6D grid. |
945 | * @param range_n the number of items to process along the sixth dimension |
946 | * of the 6D grid. |
947 | * @param tile_n the maximum number of items along the sixth dimension of |
948 | * the 6D grid to process in one function call. |
949 | * @param flags a bitwise combination of zero or more optional flags |
950 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
951 | */ |
952 | void pthreadpool_parallelize_6d( |
953 | pthreadpool_t threadpool, |
954 | pthreadpool_task_6d_t function, |
955 | void* context, |
956 | size_t range_i, |
957 | size_t range_j, |
958 | size_t range_k, |
959 | size_t range_l, |
960 | size_t range_m, |
961 | size_t range_n, |
962 | uint32_t flags); |
963 | |
964 | /** |
965 | * Process items on a 6D grid with the specified maximum tile size along the |
966 | * last grid dimension. |
967 | * |
968 | * The function implements a parallel version of the following snippet: |
969 | * |
970 | * for (size_t i = 0; i < range_i; i++) |
971 | * for (size_t j = 0; j < range_j; j++) |
972 | * for (size_t k = 0; k < range_k; k++) |
973 | * for (size_t l = 0; l < range_l; l++) |
974 | * for (size_t m = 0; m < range_m; m++) |
975 | * for (size_t n = 0; n < range_n; n += tile_n) |
976 | * function(context, i, j, k, l, m, n, min(range_n - n, tile_n)); |
977 | * |
978 | * When the function returns, all items have been processed and the thread pool |
979 | * is ready for a new task. |
980 | * |
981 | * @note If multiple threads call this function with the same thread pool, the |
982 | * calls are serialized. |
983 | * |
984 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
985 | * is NULL, all items are processed serially on the calling thread. |
986 | * @param function the function to call for each tile. |
987 | * @param context the first argument passed to the specified function. |
988 | * @param range_i the number of items to process along the first dimension |
989 | * of the 6D grid. |
990 | * @param range_j the number of items to process along the second dimension |
991 | * of the 6D grid. |
992 | * @param range_k the number of items to process along the third dimension |
993 | * of the 6D grid. |
994 | * @param range_l the number of items to process along the fourth dimension |
995 | * of the 6D grid. |
996 | * @param range_m the number of items to process along the fifth dimension |
997 | * of the 6D grid. |
998 | * @param range_n the number of items to process along the sixth dimension |
999 | * of the 6D grid. |
1000 | * @param tile_n the maximum number of items along the sixth dimension of |
1001 | * the 6D grid to process in one function call. |
1002 | * @param flags a bitwise combination of zero or more optional flags |
1003 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
1004 | */ |
1005 | void pthreadpool_parallelize_6d_tile_1d( |
1006 | pthreadpool_t threadpool, |
1007 | pthreadpool_task_6d_tile_1d_t function, |
1008 | void* context, |
1009 | size_t range_i, |
1010 | size_t range_j, |
1011 | size_t range_k, |
1012 | size_t range_l, |
1013 | size_t range_m, |
1014 | size_t range_n, |
1015 | size_t tile_n, |
1016 | uint32_t flags); |
1017 | |
1018 | /** |
1019 | * Process items on a 6D grid with the specified maximum tile size along the |
1020 | * last two grid dimensions. |
1021 | * |
1022 | * The function implements a parallel version of the following snippet: |
1023 | * |
1024 | * for (size_t i = 0; i < range_i; i++) |
1025 | * for (size_t j = 0; j < range_j; j++) |
1026 | * for (size_t k = 0; k < range_k; k++) |
1027 | * for (size_t l = 0; l < range_l; l++) |
1028 | * for (size_t m = 0; m < range_m; m += tile_m) |
1029 | * for (size_t n = 0; n < range_n; n += tile_n) |
1030 | * function(context, i, j, k, l, m, n, |
1031 | * min(range_m - m, tile_m), min(range_n - n, tile_n)); |
1032 | * |
1033 | * When the function returns, all items have been processed and the thread pool |
1034 | * is ready for a new task. |
1035 | * |
1036 | * @note If multiple threads call this function with the same thread pool, the |
1037 | * calls are serialized. |
1038 | * |
1039 | * @param threadpool the thread pool to use for parallelisation. If threadpool |
1040 | * is NULL, all items are processed serially on the calling thread. |
1041 | * @param function the function to call for each tile. |
1042 | * @param context the first argument passed to the specified function. |
1043 | * @param range_i the number of items to process along the first dimension |
1044 | * of the 6D grid. |
1045 | * @param range_j the number of items to process along the second dimension |
1046 | * of the 6D grid. |
1047 | * @param range_k the number of items to process along the third dimension |
1048 | * of the 6D grid. |
1049 | * @param range_l the number of items to process along the fourth dimension |
1050 | * of the 6D grid. |
1051 | * @param range_m the number of items to process along the fifth dimension |
1052 | * of the 6D grid. |
1053 | * @param range_n the number of items to process along the sixth dimension |
1054 | * of the 6D grid. |
1055 | * @param tile_m the maximum number of items along the fifth dimension of |
1056 | * the 6D grid to process in one function call. |
1057 | * @param tile_n the maximum number of items along the sixth dimension of |
1058 | * the 6D grid to process in one function call. |
1059 | * @param flags a bitwise combination of zero or more optional flags |
1060 | * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) |
1061 | */ |
1062 | void pthreadpool_parallelize_6d_tile_2d( |
1063 | pthreadpool_t threadpool, |
1064 | pthreadpool_task_6d_tile_2d_t function, |
1065 | void* context, |
1066 | size_t range_i, |
1067 | size_t range_j, |
1068 | size_t range_k, |
1069 | size_t range_l, |
1070 | size_t range_m, |
1071 | size_t range_n, |
1072 | size_t tile_m, |
1073 | size_t tile_n, |
1074 | uint32_t flags); |
1075 | |
1076 | /** |
1077 | * Terminates threads in the thread pool and releases associated resources. |
1078 | * |
1079 | * @warning Accessing the thread pool after a call to this function constitutes |
1080 | * undefined behaviour and may cause data corruption. |
1081 | * |
1082 | * @param[in,out] threadpool The thread pool to destroy. |
1083 | */ |
1084 | void pthreadpool_destroy(pthreadpool_t threadpool); |
1085 | |
1086 | |
1087 | #ifndef PTHREADPOOL_NO_DEPRECATED_API |
1088 | |
1089 | /* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */ |
1090 | #if defined(__GNUC__) |
1091 | #define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__)) |
1092 | #else |
1093 | #define PTHREADPOOL_DEPRECATED |
1094 | #endif |
1095 | |
1096 | typedef void (*pthreadpool_function_1d_t)(void*, size_t); |
1097 | typedef void (*pthreadpool_function_1d_tiled_t)(void*, size_t, size_t); |
1098 | typedef void (*pthreadpool_function_2d_t)(void*, size_t, size_t); |
1099 | typedef void (*pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t); |
1100 | typedef void (*pthreadpool_function_3d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t); |
1101 | typedef void (*pthreadpool_function_4d_tiled_t)(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t); |
1102 | |
1103 | void pthreadpool_compute_1d( |
1104 | pthreadpool_t threadpool, |
1105 | pthreadpool_function_1d_t function, |
1106 | void* argument, |
1107 | size_t range) PTHREADPOOL_DEPRECATED; |
1108 | |
1109 | void pthreadpool_compute_1d_tiled( |
1110 | pthreadpool_t threadpool, |
1111 | pthreadpool_function_1d_tiled_t function, |
1112 | void* argument, |
1113 | size_t range, |
1114 | size_t tile) PTHREADPOOL_DEPRECATED; |
1115 | |
1116 | void pthreadpool_compute_2d( |
1117 | pthreadpool_t threadpool, |
1118 | pthreadpool_function_2d_t function, |
1119 | void* argument, |
1120 | size_t range_i, |
1121 | size_t range_j) PTHREADPOOL_DEPRECATED; |
1122 | |
1123 | void pthreadpool_compute_2d_tiled( |
1124 | pthreadpool_t threadpool, |
1125 | pthreadpool_function_2d_tiled_t function, |
1126 | void* argument, |
1127 | size_t range_i, |
1128 | size_t range_j, |
1129 | size_t tile_i, |
1130 | size_t tile_j) PTHREADPOOL_DEPRECATED; |
1131 | |
1132 | void pthreadpool_compute_3d_tiled( |
1133 | pthreadpool_t threadpool, |
1134 | pthreadpool_function_3d_tiled_t function, |
1135 | void* argument, |
1136 | size_t range_i, |
1137 | size_t range_j, |
1138 | size_t range_k, |
1139 | size_t tile_i, |
1140 | size_t tile_j, |
1141 | size_t tile_k) PTHREADPOOL_DEPRECATED; |
1142 | |
1143 | void pthreadpool_compute_4d_tiled( |
1144 | pthreadpool_t threadpool, |
1145 | pthreadpool_function_4d_tiled_t function, |
1146 | void* argument, |
1147 | size_t range_i, |
1148 | size_t range_j, |
1149 | size_t range_k, |
1150 | size_t range_l, |
1151 | size_t tile_i, |
1152 | size_t tile_j, |
1153 | size_t tile_k, |
1154 | size_t tile_l) PTHREADPOOL_DEPRECATED; |
1155 | |
1156 | #endif /* PTHREADPOOL_NO_DEPRECATED_API */ |
1157 | |
1158 | #ifdef __cplusplus |
1159 | } /* extern "C" */ |
1160 | #endif |
1161 | |
1162 | #endif /* PTHREADPOOL_H_ */ |
1163 | |