1#pragma once
2
3/* Standard C headers */
4#include <stddef.h>
5#include <stdint.h>
6
7/* Internal headers */
8#include "threadpool-common.h"
9#include "threadpool-atomics.h"
10
11/* POSIX headers */
12#if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
13#include <pthread.h>
14#endif
15
16/* Mach headers */
17#if PTHREADPOOL_USE_GCD
18#include <dispatch/dispatch.h>
19#endif
20
21/* Windows headers */
22#if PTHREADPOOL_USE_EVENT
23#ifndef WIN32_LEAN_AND_MEAN
24#define WIN32_LEAN_AND_MEAN
25#endif
26#include <windows.h>
27#endif
28
29/* Dependencies */
30#include <fxdiv.h>
31
32/* Library header */
33#include <pthreadpool.h>
34
35
36#define THREADPOOL_COMMAND_MASK UINT32_C(0x7FFFFFFF)
37
38enum threadpool_command {
39 threadpool_command_init,
40 threadpool_command_parallelize,
41 threadpool_command_shutdown,
42};
43
44struct PTHREADPOOL_CACHELINE_ALIGNED thread_info {
45 /**
46 * Index of the first element in the work range.
47 * Before processing a new element the owning worker thread increments this value.
48 */
49 pthreadpool_atomic_size_t range_start;
50 /**
51 * Index of the element after the last element of the work range.
52 * Before processing a new element the stealing worker thread decrements this value.
53 */
54 pthreadpool_atomic_size_t range_end;
55 /**
56 * The number of elements in the work range.
57 * Due to race conditions range_length <= range_end - range_start.
58 * The owning worker thread must decrement this value before incrementing @a range_start.
59 * The stealing worker thread must decrement this value before decrementing @a range_end.
60 */
61 pthreadpool_atomic_size_t range_length;
62 /**
63 * Thread number in the 0..threads_count-1 range.
64 */
65 size_t thread_number;
66 /**
67 * Thread pool which owns the thread.
68 */
69 struct pthreadpool* threadpool;
70#if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
71 /**
72 * The pthread object corresponding to the thread.
73 */
74 pthread_t thread_object;
75#endif
76#if PTHREADPOOL_USE_EVENT
77 /**
78 * The Windows thread handle corresponding to the thread.
79 */
80 HANDLE thread_handle;
81#endif
82};
83
84PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZE == 0,
85 "thread_info structure must occupy an integer number of cache lines (64 bytes)");
86
87struct pthreadpool_1d_with_uarch_params {
88 /**
89 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
90 */
91 uint32_t default_uarch_index;
92 /**
93 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
94 */
95 uint32_t max_uarch_index;
96};
97
98struct pthreadpool_1d_tile_1d_params {
99 /**
100 * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function.
101 */
102 size_t range;
103 /**
104 * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function.
105 */
106 size_t tile;
107};
108
109struct pthreadpool_2d_params {
110 /**
111 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function.
112 */
113 struct fxdiv_divisor_size_t range_j;
114};
115
116struct pthreadpool_2d_tile_1d_params {
117 /**
118 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
119 */
120 size_t range_j;
121 /**
122 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
123 */
124 size_t tile_j;
125 /**
126 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
127 */
128 struct fxdiv_divisor_size_t tile_range_j;
129};
130
131struct pthreadpool_2d_tile_2d_params {
132 /**
133 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
134 */
135 size_t range_i;
136 /**
137 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
138 */
139 size_t tile_i;
140 /**
141 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
142 */
143 size_t range_j;
144 /**
145 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
146 */
147 size_t tile_j;
148 /**
149 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
150 */
151 struct fxdiv_divisor_size_t tile_range_j;
152};
153
154struct pthreadpool_2d_tile_2d_with_uarch_params {
155 /**
156 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
157 */
158 uint32_t default_uarch_index;
159 /**
160 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
161 */
162 uint32_t max_uarch_index;
163 /**
164 * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
165 */
166 size_t range_i;
167 /**
168 * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
169 */
170 size_t tile_i;
171 /**
172 * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
173 */
174 size_t range_j;
175 /**
176 * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
177 */
178 size_t tile_j;
179 /**
180 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
181 */
182 struct fxdiv_divisor_size_t tile_range_j;
183};
184
185struct pthreadpool_3d_params {
186 /**
187 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d function.
188 */
189 struct fxdiv_divisor_size_t range_j;
190 /**
191 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_3d function.
192 */
193 struct fxdiv_divisor_size_t range_k;
194};
195
196struct pthreadpool_3d_tile_1d_params {
197 /**
198 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_1d function.
199 */
200 size_t range_k;
201 /**
202 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_1d function.
203 */
204 size_t tile_k;
205 /**
206 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d_tile_1d function.
207 */
208 struct fxdiv_divisor_size_t range_j;
209 /**
210 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
211 */
212 struct fxdiv_divisor_size_t tile_range_k;
213};
214
215struct pthreadpool_3d_tile_2d_params {
216 /**
217 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
218 */
219 size_t range_j;
220 /**
221 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
222 */
223 size_t tile_j;
224 /**
225 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
226 */
227 size_t range_k;
228 /**
229 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
230 */
231 size_t tile_k;
232 /**
233 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
234 */
235 struct fxdiv_divisor_size_t tile_range_j;
236 /**
237 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
238 */
239 struct fxdiv_divisor_size_t tile_range_k;
240};
241
242struct pthreadpool_3d_tile_2d_with_uarch_params {
243 /**
244 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
245 */
246 uint32_t default_uarch_index;
247 /**
248 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
249 */
250 uint32_t max_uarch_index;
251 /**
252 * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
253 */
254 size_t range_j;
255 /**
256 * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
257 */
258 size_t tile_j;
259 /**
260 * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
261 */
262 size_t range_k;
263 /**
264 * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
265 */
266 size_t tile_k;
267 /**
268 * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
269 */
270 struct fxdiv_divisor_size_t tile_range_j;
271 /**
272 * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
273 */
274 struct fxdiv_divisor_size_t tile_range_k;
275};
276
277struct pthreadpool_4d_params {
278 /**
279 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d function.
280 */
281 size_t range_k;
282 /**
283 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d function.
284 */
285 struct fxdiv_divisor_size_t range_j;
286 /**
287 * FXdiv divisor for the range_k * range_l value.
288 */
289 struct fxdiv_divisor_size_t range_kl;
290 /**
291 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_4d function.
292 */
293 struct fxdiv_divisor_size_t range_l;
294};
295
296struct pthreadpool_4d_tile_1d_params {
297 /**
298 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_1d function.
299 */
300 size_t range_k;
301 /**
302 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_1d function.
303 */
304 size_t range_l;
305 /**
306 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_1d function.
307 */
308 size_t tile_l;
309 /**
310 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_1d function.
311 */
312 struct fxdiv_divisor_size_t range_j;
313 /**
314 * FXdiv divisor for the range_k * divide_round_up(range_l, tile_l) value.
315 */
316 struct fxdiv_divisor_size_t tile_range_kl;
317 /**
318 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
319 */
320 struct fxdiv_divisor_size_t tile_range_l;
321};
322
323struct pthreadpool_4d_tile_2d_params {
324 /**
325 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
326 */
327 size_t range_k;
328 /**
329 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
330 */
331 size_t tile_k;
332 /**
333 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
334 */
335 size_t range_l;
336 /**
337 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
338 */
339 size_t tile_l;
340 /**
341 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function.
342 */
343 struct fxdiv_divisor_size_t range_j;
344 /**
345 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
346 */
347 struct fxdiv_divisor_size_t tile_range_kl;
348 /**
349 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
350 */
351 struct fxdiv_divisor_size_t tile_range_l;
352};
353
354struct pthreadpool_4d_tile_2d_with_uarch_params {
355 /**
356 * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
357 */
358 uint32_t default_uarch_index;
359 /**
360 * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
361 */
362 uint32_t max_uarch_index;
363 /**
364 * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
365 */
366 size_t range_k;
367 /**
368 * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
369 */
370 size_t tile_k;
371 /**
372 * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
373 */
374 size_t range_l;
375 /**
376 * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
377 */
378 size_t tile_l;
379 /**
380 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
381 */
382 struct fxdiv_divisor_size_t range_j;
383 /**
384 * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
385 */
386 struct fxdiv_divisor_size_t tile_range_kl;
387 /**
388 * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
389 */
390 struct fxdiv_divisor_size_t tile_range_l;
391};
392
393struct pthreadpool_5d_params {
394 /**
395 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d function.
396 */
397 size_t range_l;
398 /**
399 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d function.
400 */
401 struct fxdiv_divisor_size_t range_j;
402 /**
403 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d function.
404 */
405 struct fxdiv_divisor_size_t range_k;
406 /**
407 * FXdiv divisor for the range_l * range_m value.
408 */
409 struct fxdiv_divisor_size_t range_lm;
410 /**
411 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_5d function.
412 */
413 struct fxdiv_divisor_size_t range_m;
414};
415
416struct pthreadpool_5d_tile_1d_params {
417 /**
418 * Copy of the range_k argument passed to the pthreadpool_parallelize_5d_tile_1d function.
419 */
420 size_t range_k;
421 /**
422 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_1d function.
423 */
424 size_t range_m;
425 /**
426 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_1d function.
427 */
428 size_t tile_m;
429 /**
430 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_1d function.
431 */
432 struct fxdiv_divisor_size_t range_j;
433 /**
434 * FXdiv divisor for the range_k * range_l value.
435 */
436 struct fxdiv_divisor_size_t range_kl;
437 /**
438 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_5d_tile_1d function.
439 */
440 struct fxdiv_divisor_size_t range_l;
441 /**
442 * FXdiv divisor for the divide_round_up(range_m, tile_m) value.
443 */
444 struct fxdiv_divisor_size_t tile_range_m;
445};
446
447struct pthreadpool_5d_tile_2d_params {
448 /**
449 * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
450 */
451 size_t range_l;
452 /**
453 * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
454 */
455 size_t tile_l;
456 /**
457 * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
458 */
459 size_t range_m;
460 /**
461 * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
462 */
463 size_t tile_m;
464 /**
465 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function.
466 */
467 struct fxdiv_divisor_size_t range_j;
468 /**
469 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function.
470 */
471 struct fxdiv_divisor_size_t range_k;
472 /**
473 * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value.
474 */
475 struct fxdiv_divisor_size_t tile_range_lm;
476 /**
477 * FXdiv divisor for the divide_round_up(range_m, tile_m) value.
478 */
479 struct fxdiv_divisor_size_t tile_range_m;
480};
481
482struct pthreadpool_6d_params {
483 /**
484 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d function.
485 */
486 size_t range_l;
487 /**
488 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d function.
489 */
490 struct fxdiv_divisor_size_t range_j;
491 /**
492 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d function.
493 */
494 struct fxdiv_divisor_size_t range_k;
495 /**
496 * FXdiv divisor for the range_l * range_m * range_n value.
497 */
498 struct fxdiv_divisor_size_t range_lmn;
499 /**
500 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d function.
501 */
502 struct fxdiv_divisor_size_t range_m;
503 /**
504 * FXdiv divisor for the range_n argument passed to the pthreadpool_parallelize_6d function.
505 */
506 struct fxdiv_divisor_size_t range_n;
507};
508
509struct pthreadpool_6d_tile_1d_params {
510 /**
511 * Copy of the range_l argument passed to the pthreadpool_parallelize_6d_tile_1d function.
512 */
513 size_t range_l;
514 /**
515 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_1d function.
516 */
517 size_t range_n;
518 /**
519 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_1d function.
520 */
521 size_t tile_n;
522 /**
523 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_1d function.
524 */
525 struct fxdiv_divisor_size_t range_j;
526 /**
527 * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d_tile_1d function.
528 */
529 struct fxdiv_divisor_size_t range_k;
530 /**
531 * FXdiv divisor for the range_l * range_m * divide_round_up(range_n, tile_n) value.
532 */
533 struct fxdiv_divisor_size_t tile_range_lmn;
534 /**
535 * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d_tile_1d function.
536 */
537 struct fxdiv_divisor_size_t range_m;
538 /**
539 * FXdiv divisor for the divide_round_up(range_n, tile_n) value.
540 */
541 struct fxdiv_divisor_size_t tile_range_n;
542};
543
544struct pthreadpool_6d_tile_2d_params {
545 /**
546 * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function.
547 */
548 size_t range_k;
549 /**
550 * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
551 */
552 size_t range_m;
553 /**
554 * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
555 */
556 size_t tile_m;
557 /**
558 * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
559 */
560 size_t range_n;
561 /**
562 * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
563 */
564 size_t tile_n;
565 /**
566 * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function.
567 */
568 struct fxdiv_divisor_size_t range_j;
569 /**
570 * FXdiv divisor for the range_k * range_l value.
571 */
572 struct fxdiv_divisor_size_t range_kl;
573 /**
574 * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function.
575 */
576 struct fxdiv_divisor_size_t range_l;
577 /**
578 * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value.
579 */
580 struct fxdiv_divisor_size_t tile_range_mn;
581 /**
582 * FXdiv divisor for the divide_round_up(range_n, tile_n) value.
583 */
584 struct fxdiv_divisor_size_t tile_range_n;
585};
586
587struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool {
588#if !PTHREADPOOL_USE_GCD
589 /**
590 * The number of threads that are processing an operation.
591 */
592 pthreadpool_atomic_size_t active_threads;
593#endif
594#if PTHREADPOOL_USE_FUTEX
595 /**
596 * Indicates if there are active threads.
597 * Only two values are possible:
598 * - has_active_threads == 0 if active_threads == 0
599 * - has_active_threads == 1 if active_threads != 0
600 */
601 pthreadpool_atomic_uint32_t has_active_threads;
602#endif
603#if !PTHREADPOOL_USE_GCD
604 /**
605 * The last command submitted to the thread pool.
606 */
607 pthreadpool_atomic_uint32_t command;
608#endif
609 /**
610 * The entry point function to call for each thread in the thread pool for parallelization tasks.
611 */
612 pthreadpool_atomic_void_p thread_function;
613 /**
614 * The function to call for each item.
615 */
616 pthreadpool_atomic_void_p task;
617 /**
618 * The first argument to the item processing function.
619 */
620 pthreadpool_atomic_void_p argument;
621 /**
622 * Additional parallelization parameters.
623 * These parameters are specific for each thread_function.
624 */
625 union {
626 struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch;
627 struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d;
628 struct pthreadpool_2d_params parallelize_2d;
629 struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d;
630 struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d;
631 struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch;
632 struct pthreadpool_3d_params parallelize_3d;
633 struct pthreadpool_3d_tile_1d_params parallelize_3d_tile_1d;
634 struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d;
635 struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch;
636 struct pthreadpool_4d_params parallelize_4d;
637 struct pthreadpool_4d_tile_1d_params parallelize_4d_tile_1d;
638 struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d;
639 struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch;
640 struct pthreadpool_5d_params parallelize_5d;
641 struct pthreadpool_5d_tile_1d_params parallelize_5d_tile_1d;
642 struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d;
643 struct pthreadpool_6d_params parallelize_6d;
644 struct pthreadpool_6d_tile_1d_params parallelize_6d_tile_1d;
645 struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d;
646 } params;
647 /**
648 * Copy of the flags passed to a parallelization function.
649 */
650 pthreadpool_atomic_uint32_t flags;
651#if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX
652 /**
653 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
654 */
655 pthread_mutex_t execution_mutex;
656#endif
657#if PTHREADPOOL_USE_GCD
658 /**
659 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
660 */
661 dispatch_semaphore_t execution_semaphore;
662#endif
663#if PTHREADPOOL_USE_EVENT
664 /**
665 * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads.
666 */
667 HANDLE execution_mutex;
668#endif
669#if PTHREADPOOL_USE_CONDVAR
670 /**
671 * Guards access to the @a active_threads variable.
672 */
673 pthread_mutex_t completion_mutex;
674 /**
675 * Condition variable to wait until all threads complete an operation (until @a active_threads is zero).
676 */
677 pthread_cond_t completion_condvar;
678 /**
679 * Guards access to the @a command variable.
680 */
681 pthread_mutex_t command_mutex;
682 /**
683 * Condition variable to wait for change of the @a command variable.
684 */
685 pthread_cond_t command_condvar;
686#endif
687#if PTHREADPOOL_USE_EVENT
688 /**
689 * Events to wait on until all threads complete an operation (until @a active_threads is zero).
690 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every
691 * submitted command according to the high bit of the command word.
692 */
693 HANDLE completion_event[2];
694 /**
695 * Events to wait on for change of the @a command variable.
696 * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every
697 * submitted command according to the high bit of the command word.
698 */
699 HANDLE command_event[2];
700#endif
701 /**
702 * FXdiv divisor for the number of threads in the thread pool.
703 * This struct never change after pthreadpool_create.
704 */
705 struct fxdiv_divisor_size_t threads_count;
706 /**
707 * Thread information structures that immediately follow this structure.
708 */
709 struct thread_info threads[];
710};
711
712PTHREADPOOL_STATIC_ASSERT(sizeof(struct pthreadpool) % PTHREADPOOL_CACHELINE_SIZE == 0,
713 "pthreadpool structure must occupy an integer number of cache lines (64 bytes)");
714
715PTHREADPOOL_INTERNAL struct pthreadpool* pthreadpool_allocate(
716 size_t threads_count);
717
718PTHREADPOOL_INTERNAL void pthreadpool_deallocate(
719 struct pthreadpool* threadpool);
720
721typedef void (*thread_function_t)(struct pthreadpool* threadpool, struct thread_info* thread);
722
723PTHREADPOOL_INTERNAL void pthreadpool_parallelize(
724 struct pthreadpool* threadpool,
725 thread_function_t thread_function,
726 const void* params,
727 size_t params_size,
728 void* task,
729 void* context,
730 size_t linear_range,
731 uint32_t flags);
732
733PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath(
734 struct pthreadpool* threadpool,
735 struct thread_info* thread);
736
737PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath(
738 struct pthreadpool* threadpool,
739 struct thread_info* thread);
740
741PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath(
742 struct pthreadpool* threadpool,
743 struct thread_info* thread);
744
745PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath(
746 struct pthreadpool* threadpool,
747 struct thread_info* thread);
748
749PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath(
750 struct pthreadpool* threadpool,
751 struct thread_info* thread);
752
753PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath(
754 struct pthreadpool* threadpool,
755 struct thread_info* thread);
756
757PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath(
758 struct pthreadpool* threadpool,
759 struct thread_info* thread);
760
761PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath(
762 struct pthreadpool* threadpool,
763 struct thread_info* thread);
764
765PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath(
766 struct pthreadpool* threadpool,
767 struct thread_info* thread);
768
769PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath(
770 struct pthreadpool* threadpool,
771 struct thread_info* thread);
772
773PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath(
774 struct pthreadpool* threadpool,
775 struct thread_info* thread);
776
777PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath(
778 struct pthreadpool* threadpool,
779 struct thread_info* thread);
780
781PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath(
782 struct pthreadpool* threadpool,
783 struct thread_info* thread);
784
785PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath(
786 struct pthreadpool* threadpool,
787 struct thread_info* thread);
788
789PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath(
790 struct pthreadpool* threadpool,
791 struct thread_info* thread);
792
793PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath(
794 struct pthreadpool* threadpool,
795 struct thread_info* thread);
796
797PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath(
798 struct pthreadpool* threadpool,
799 struct thread_info* thread);
800
801PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath(
802 struct pthreadpool* threadpool,
803 struct thread_info* thread);
804
805PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath(
806 struct pthreadpool* threadpool,
807 struct thread_info* thread);
808
809PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath(
810 struct pthreadpool* threadpool,
811 struct thread_info* thread);
812
813PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath(
814 struct pthreadpool* threadpool,
815 struct thread_info* thread);
816