1 | #pragma once |
2 | |
3 | /* Standard C headers */ |
4 | #include <stddef.h> |
5 | #include <stdint.h> |
6 | |
7 | /* Internal headers */ |
8 | #include "threadpool-common.h" |
9 | #include "threadpool-atomics.h" |
10 | |
11 | /* POSIX headers */ |
12 | #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX |
13 | #include <pthread.h> |
14 | #endif |
15 | |
16 | /* Mach headers */ |
17 | #if PTHREADPOOL_USE_GCD |
18 | #include <dispatch/dispatch.h> |
19 | #endif |
20 | |
21 | /* Windows headers */ |
22 | #if PTHREADPOOL_USE_EVENT |
23 | #ifndef WIN32_LEAN_AND_MEAN |
24 | #define WIN32_LEAN_AND_MEAN |
25 | #endif |
26 | #include <windows.h> |
27 | #endif |
28 | |
29 | /* Dependencies */ |
30 | #include <fxdiv.h> |
31 | |
32 | /* Library header */ |
33 | #include <pthreadpool.h> |
34 | |
35 | |
36 | #define THREADPOOL_COMMAND_MASK UINT32_C(0x7FFFFFFF) |
37 | |
38 | enum threadpool_command { |
39 | threadpool_command_init, |
40 | threadpool_command_parallelize, |
41 | threadpool_command_shutdown, |
42 | }; |
43 | |
44 | struct PTHREADPOOL_CACHELINE_ALIGNED thread_info { |
45 | /** |
46 | * Index of the first element in the work range. |
47 | * Before processing a new element the owning worker thread increments this value. |
48 | */ |
49 | pthreadpool_atomic_size_t range_start; |
50 | /** |
51 | * Index of the element after the last element of the work range. |
52 | * Before processing a new element the stealing worker thread decrements this value. |
53 | */ |
54 | pthreadpool_atomic_size_t range_end; |
55 | /** |
56 | * The number of elements in the work range. |
57 | * Due to race conditions range_length <= range_end - range_start. |
58 | * The owning worker thread must decrement this value before incrementing @a range_start. |
59 | * The stealing worker thread must decrement this value before decrementing @a range_end. |
60 | */ |
61 | pthreadpool_atomic_size_t range_length; |
62 | /** |
63 | * Thread number in the 0..threads_count-1 range. |
64 | */ |
65 | size_t thread_number; |
66 | /** |
67 | * Thread pool which owns the thread. |
68 | */ |
69 | struct pthreadpool* threadpool; |
70 | #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX |
71 | /** |
72 | * The pthread object corresponding to the thread. |
73 | */ |
74 | pthread_t thread_object; |
75 | #endif |
76 | #if PTHREADPOOL_USE_EVENT |
77 | /** |
78 | * The Windows thread handle corresponding to the thread. |
79 | */ |
80 | HANDLE thread_handle; |
81 | #endif |
82 | }; |
83 | |
84 | PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZE == 0, |
85 | "thread_info structure must occupy an integer number of cache lines (64 bytes)" ); |
86 | |
87 | struct pthreadpool_1d_with_uarch_params { |
88 | /** |
89 | * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. |
90 | */ |
91 | uint32_t default_uarch_index; |
92 | /** |
93 | * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function. |
94 | */ |
95 | uint32_t max_uarch_index; |
96 | }; |
97 | |
98 | struct pthreadpool_1d_tile_1d_params { |
99 | /** |
100 | * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function. |
101 | */ |
102 | size_t range; |
103 | /** |
104 | * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function. |
105 | */ |
106 | size_t tile; |
107 | }; |
108 | |
109 | struct pthreadpool_2d_params { |
110 | /** |
111 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function. |
112 | */ |
113 | struct fxdiv_divisor_size_t range_j; |
114 | }; |
115 | |
116 | struct pthreadpool_2d_tile_1d_params { |
117 | /** |
118 | * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. |
119 | */ |
120 | size_t range_j; |
121 | /** |
122 | * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function. |
123 | */ |
124 | size_t tile_j; |
125 | /** |
126 | * FXdiv divisor for the divide_round_up(range_j, tile_j) value. |
127 | */ |
128 | struct fxdiv_divisor_size_t tile_range_j; |
129 | }; |
130 | |
131 | struct pthreadpool_2d_tile_2d_params { |
132 | /** |
133 | * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. |
134 | */ |
135 | size_t range_i; |
136 | /** |
137 | * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function. |
138 | */ |
139 | size_t tile_i; |
140 | /** |
141 | * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. |
142 | */ |
143 | size_t range_j; |
144 | /** |
145 | * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function. |
146 | */ |
147 | size_t tile_j; |
148 | /** |
149 | * FXdiv divisor for the divide_round_up(range_j, tile_j) value. |
150 | */ |
151 | struct fxdiv_divisor_size_t tile_range_j; |
152 | }; |
153 | |
154 | struct pthreadpool_2d_tile_2d_with_uarch_params { |
155 | /** |
156 | * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
157 | */ |
158 | uint32_t default_uarch_index; |
159 | /** |
160 | * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
161 | */ |
162 | uint32_t max_uarch_index; |
163 | /** |
164 | * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
165 | */ |
166 | size_t range_i; |
167 | /** |
168 | * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
169 | */ |
170 | size_t tile_i; |
171 | /** |
172 | * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
173 | */ |
174 | size_t range_j; |
175 | /** |
176 | * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function. |
177 | */ |
178 | size_t tile_j; |
179 | /** |
180 | * FXdiv divisor for the divide_round_up(range_j, tile_j) value. |
181 | */ |
182 | struct fxdiv_divisor_size_t tile_range_j; |
183 | }; |
184 | |
185 | struct pthreadpool_3d_params { |
186 | /** |
187 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d function. |
188 | */ |
189 | struct fxdiv_divisor_size_t range_j; |
190 | /** |
191 | * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_3d function. |
192 | */ |
193 | struct fxdiv_divisor_size_t range_k; |
194 | }; |
195 | |
196 | struct pthreadpool_3d_tile_1d_params { |
197 | /** |
198 | * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. |
199 | */ |
200 | size_t range_k; |
201 | /** |
202 | * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_1d function. |
203 | */ |
204 | size_t tile_k; |
205 | /** |
206 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_3d_tile_1d function. |
207 | */ |
208 | struct fxdiv_divisor_size_t range_j; |
209 | /** |
210 | * FXdiv divisor for the divide_round_up(range_k, tile_k) value. |
211 | */ |
212 | struct fxdiv_divisor_size_t tile_range_k; |
213 | }; |
214 | |
215 | struct pthreadpool_3d_tile_2d_params { |
216 | /** |
217 | * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. |
218 | */ |
219 | size_t range_j; |
220 | /** |
221 | * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function. |
222 | */ |
223 | size_t tile_j; |
224 | /** |
225 | * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. |
226 | */ |
227 | size_t range_k; |
228 | /** |
229 | * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function. |
230 | */ |
231 | size_t tile_k; |
232 | /** |
233 | * FXdiv divisor for the divide_round_up(range_j, tile_j) value. |
234 | */ |
235 | struct fxdiv_divisor_size_t tile_range_j; |
236 | /** |
237 | * FXdiv divisor for the divide_round_up(range_k, tile_k) value. |
238 | */ |
239 | struct fxdiv_divisor_size_t tile_range_k; |
240 | }; |
241 | |
242 | struct pthreadpool_3d_tile_2d_with_uarch_params { |
243 | /** |
244 | * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
245 | */ |
246 | uint32_t default_uarch_index; |
247 | /** |
248 | * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
249 | */ |
250 | uint32_t max_uarch_index; |
251 | /** |
252 | * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
253 | */ |
254 | size_t range_j; |
255 | /** |
256 | * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
257 | */ |
258 | size_t tile_j; |
259 | /** |
260 | * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
261 | */ |
262 | size_t range_k; |
263 | /** |
264 | * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function. |
265 | */ |
266 | size_t tile_k; |
267 | /** |
268 | * FXdiv divisor for the divide_round_up(range_j, tile_j) value. |
269 | */ |
270 | struct fxdiv_divisor_size_t tile_range_j; |
271 | /** |
272 | * FXdiv divisor for the divide_round_up(range_k, tile_k) value. |
273 | */ |
274 | struct fxdiv_divisor_size_t tile_range_k; |
275 | }; |
276 | |
277 | struct pthreadpool_4d_params { |
278 | /** |
279 | * Copy of the range_k argument passed to the pthreadpool_parallelize_4d function. |
280 | */ |
281 | size_t range_k; |
282 | /** |
283 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d function. |
284 | */ |
285 | struct fxdiv_divisor_size_t range_j; |
286 | /** |
287 | * FXdiv divisor for the range_k * range_l value. |
288 | */ |
289 | struct fxdiv_divisor_size_t range_kl; |
290 | /** |
291 | * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_4d function. |
292 | */ |
293 | struct fxdiv_divisor_size_t range_l; |
294 | }; |
295 | |
296 | struct pthreadpool_4d_tile_1d_params { |
297 | /** |
298 | * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_1d function. |
299 | */ |
300 | size_t range_k; |
301 | /** |
302 | * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. |
303 | */ |
304 | size_t range_l; |
305 | /** |
306 | * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_1d function. |
307 | */ |
308 | size_t tile_l; |
309 | /** |
310 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_1d function. |
311 | */ |
312 | struct fxdiv_divisor_size_t range_j; |
313 | /** |
314 | * FXdiv divisor for the range_k * divide_round_up(range_l, tile_l) value. |
315 | */ |
316 | struct fxdiv_divisor_size_t tile_range_kl; |
317 | /** |
318 | * FXdiv divisor for the divide_round_up(range_l, tile_l) value. |
319 | */ |
320 | struct fxdiv_divisor_size_t tile_range_l; |
321 | }; |
322 | |
323 | struct pthreadpool_4d_tile_2d_params { |
324 | /** |
325 | * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. |
326 | */ |
327 | size_t range_k; |
328 | /** |
329 | * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function. |
330 | */ |
331 | size_t tile_k; |
332 | /** |
333 | * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. |
334 | */ |
335 | size_t range_l; |
336 | /** |
337 | * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function. |
338 | */ |
339 | size_t tile_l; |
340 | /** |
341 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function. |
342 | */ |
343 | struct fxdiv_divisor_size_t range_j; |
344 | /** |
345 | * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. |
346 | */ |
347 | struct fxdiv_divisor_size_t tile_range_kl; |
348 | /** |
349 | * FXdiv divisor for the divide_round_up(range_l, tile_l) value. |
350 | */ |
351 | struct fxdiv_divisor_size_t tile_range_l; |
352 | }; |
353 | |
354 | struct pthreadpool_4d_tile_2d_with_uarch_params { |
355 | /** |
356 | * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
357 | */ |
358 | uint32_t default_uarch_index; |
359 | /** |
360 | * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
361 | */ |
362 | uint32_t max_uarch_index; |
363 | /** |
364 | * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
365 | */ |
366 | size_t range_k; |
367 | /** |
368 | * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
369 | */ |
370 | size_t tile_k; |
371 | /** |
372 | * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
373 | */ |
374 | size_t range_l; |
375 | /** |
376 | * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
377 | */ |
378 | size_t tile_l; |
379 | /** |
380 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function. |
381 | */ |
382 | struct fxdiv_divisor_size_t range_j; |
383 | /** |
384 | * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value. |
385 | */ |
386 | struct fxdiv_divisor_size_t tile_range_kl; |
387 | /** |
388 | * FXdiv divisor for the divide_round_up(range_l, tile_l) value. |
389 | */ |
390 | struct fxdiv_divisor_size_t tile_range_l; |
391 | }; |
392 | |
393 | struct pthreadpool_5d_params { |
394 | /** |
395 | * Copy of the range_l argument passed to the pthreadpool_parallelize_5d function. |
396 | */ |
397 | size_t range_l; |
398 | /** |
399 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d function. |
400 | */ |
401 | struct fxdiv_divisor_size_t range_j; |
402 | /** |
403 | * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d function. |
404 | */ |
405 | struct fxdiv_divisor_size_t range_k; |
406 | /** |
407 | * FXdiv divisor for the range_l * range_m value. |
408 | */ |
409 | struct fxdiv_divisor_size_t range_lm; |
410 | /** |
411 | * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_5d function. |
412 | */ |
413 | struct fxdiv_divisor_size_t range_m; |
414 | }; |
415 | |
416 | struct pthreadpool_5d_tile_1d_params { |
417 | /** |
418 | * Copy of the range_k argument passed to the pthreadpool_parallelize_5d_tile_1d function. |
419 | */ |
420 | size_t range_k; |
421 | /** |
422 | * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. |
423 | */ |
424 | size_t range_m; |
425 | /** |
426 | * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_1d function. |
427 | */ |
428 | size_t tile_m; |
429 | /** |
430 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_1d function. |
431 | */ |
432 | struct fxdiv_divisor_size_t range_j; |
433 | /** |
434 | * FXdiv divisor for the range_k * range_l value. |
435 | */ |
436 | struct fxdiv_divisor_size_t range_kl; |
437 | /** |
438 | * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_5d_tile_1d function. |
439 | */ |
440 | struct fxdiv_divisor_size_t range_l; |
441 | /** |
442 | * FXdiv divisor for the divide_round_up(range_m, tile_m) value. |
443 | */ |
444 | struct fxdiv_divisor_size_t tile_range_m; |
445 | }; |
446 | |
447 | struct pthreadpool_5d_tile_2d_params { |
448 | /** |
449 | * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
450 | */ |
451 | size_t range_l; |
452 | /** |
453 | * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
454 | */ |
455 | size_t tile_l; |
456 | /** |
457 | * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
458 | */ |
459 | size_t range_m; |
460 | /** |
461 | * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
462 | */ |
463 | size_t tile_m; |
464 | /** |
465 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
466 | */ |
467 | struct fxdiv_divisor_size_t range_j; |
468 | /** |
469 | * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function. |
470 | */ |
471 | struct fxdiv_divisor_size_t range_k; |
472 | /** |
473 | * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value. |
474 | */ |
475 | struct fxdiv_divisor_size_t tile_range_lm; |
476 | /** |
477 | * FXdiv divisor for the divide_round_up(range_m, tile_m) value. |
478 | */ |
479 | struct fxdiv_divisor_size_t tile_range_m; |
480 | }; |
481 | |
482 | struct pthreadpool_6d_params { |
483 | /** |
484 | * Copy of the range_l argument passed to the pthreadpool_parallelize_6d function. |
485 | */ |
486 | size_t range_l; |
487 | /** |
488 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d function. |
489 | */ |
490 | struct fxdiv_divisor_size_t range_j; |
491 | /** |
492 | * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d function. |
493 | */ |
494 | struct fxdiv_divisor_size_t range_k; |
495 | /** |
496 | * FXdiv divisor for the range_l * range_m * range_n value. |
497 | */ |
498 | struct fxdiv_divisor_size_t range_lmn; |
499 | /** |
500 | * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d function. |
501 | */ |
502 | struct fxdiv_divisor_size_t range_m; |
503 | /** |
504 | * FXdiv divisor for the range_n argument passed to the pthreadpool_parallelize_6d function. |
505 | */ |
506 | struct fxdiv_divisor_size_t range_n; |
507 | }; |
508 | |
509 | struct pthreadpool_6d_tile_1d_params { |
510 | /** |
511 | * Copy of the range_l argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
512 | */ |
513 | size_t range_l; |
514 | /** |
515 | * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
516 | */ |
517 | size_t range_n; |
518 | /** |
519 | * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
520 | */ |
521 | size_t tile_n; |
522 | /** |
523 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
524 | */ |
525 | struct fxdiv_divisor_size_t range_j; |
526 | /** |
527 | * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
528 | */ |
529 | struct fxdiv_divisor_size_t range_k; |
530 | /** |
531 | * FXdiv divisor for the range_l * range_m * divide_round_up(range_n, tile_n) value. |
532 | */ |
533 | struct fxdiv_divisor_size_t tile_range_lmn; |
534 | /** |
535 | * FXdiv divisor for the range_m argument passed to the pthreadpool_parallelize_6d_tile_1d function. |
536 | */ |
537 | struct fxdiv_divisor_size_t range_m; |
538 | /** |
539 | * FXdiv divisor for the divide_round_up(range_n, tile_n) value. |
540 | */ |
541 | struct fxdiv_divisor_size_t tile_range_n; |
542 | }; |
543 | |
544 | struct pthreadpool_6d_tile_2d_params { |
545 | /** |
546 | * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
547 | */ |
548 | size_t range_k; |
549 | /** |
550 | * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
551 | */ |
552 | size_t range_m; |
553 | /** |
554 | * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
555 | */ |
556 | size_t tile_m; |
557 | /** |
558 | * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
559 | */ |
560 | size_t range_n; |
561 | /** |
562 | * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
563 | */ |
564 | size_t tile_n; |
565 | /** |
566 | * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
567 | */ |
568 | struct fxdiv_divisor_size_t range_j; |
569 | /** |
570 | * FXdiv divisor for the range_k * range_l value. |
571 | */ |
572 | struct fxdiv_divisor_size_t range_kl; |
573 | /** |
574 | * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function. |
575 | */ |
576 | struct fxdiv_divisor_size_t range_l; |
577 | /** |
578 | * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value. |
579 | */ |
580 | struct fxdiv_divisor_size_t tile_range_mn; |
581 | /** |
582 | * FXdiv divisor for the divide_round_up(range_n, tile_n) value. |
583 | */ |
584 | struct fxdiv_divisor_size_t tile_range_n; |
585 | }; |
586 | |
587 | struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool { |
588 | #if !PTHREADPOOL_USE_GCD |
589 | /** |
590 | * The number of threads that are processing an operation. |
591 | */ |
592 | pthreadpool_atomic_size_t active_threads; |
593 | #endif |
594 | #if PTHREADPOOL_USE_FUTEX |
595 | /** |
596 | * Indicates if there are active threads. |
597 | * Only two values are possible: |
598 | * - has_active_threads == 0 if active_threads == 0 |
599 | * - has_active_threads == 1 if active_threads != 0 |
600 | */ |
601 | pthreadpool_atomic_uint32_t has_active_threads; |
602 | #endif |
603 | #if !PTHREADPOOL_USE_GCD |
604 | /** |
605 | * The last command submitted to the thread pool. |
606 | */ |
607 | pthreadpool_atomic_uint32_t command; |
608 | #endif |
609 | /** |
610 | * The entry point function to call for each thread in the thread pool for parallelization tasks. |
611 | */ |
612 | pthreadpool_atomic_void_p thread_function; |
613 | /** |
614 | * The function to call for each item. |
615 | */ |
616 | pthreadpool_atomic_void_p task; |
617 | /** |
618 | * The first argument to the item processing function. |
619 | */ |
620 | pthreadpool_atomic_void_p argument; |
621 | /** |
622 | * Additional parallelization parameters. |
623 | * These parameters are specific for each thread_function. |
624 | */ |
625 | union { |
626 | struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch; |
627 | struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d; |
628 | struct pthreadpool_2d_params parallelize_2d; |
629 | struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d; |
630 | struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d; |
631 | struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch; |
632 | struct pthreadpool_3d_params parallelize_3d; |
633 | struct pthreadpool_3d_tile_1d_params parallelize_3d_tile_1d; |
634 | struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d; |
635 | struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch; |
636 | struct pthreadpool_4d_params parallelize_4d; |
637 | struct pthreadpool_4d_tile_1d_params parallelize_4d_tile_1d; |
638 | struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d; |
639 | struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch; |
640 | struct pthreadpool_5d_params parallelize_5d; |
641 | struct pthreadpool_5d_tile_1d_params parallelize_5d_tile_1d; |
642 | struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d; |
643 | struct pthreadpool_6d_params parallelize_6d; |
644 | struct pthreadpool_6d_tile_1d_params parallelize_6d_tile_1d; |
645 | struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d; |
646 | } params; |
647 | /** |
648 | * Copy of the flags passed to a parallelization function. |
649 | */ |
650 | pthreadpool_atomic_uint32_t flags; |
651 | #if PTHREADPOOL_USE_CONDVAR || PTHREADPOOL_USE_FUTEX |
652 | /** |
653 | * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. |
654 | */ |
655 | pthread_mutex_t execution_mutex; |
656 | #endif |
657 | #if PTHREADPOOL_USE_GCD |
658 | /** |
659 | * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. |
660 | */ |
661 | dispatch_semaphore_t execution_semaphore; |
662 | #endif |
663 | #if PTHREADPOOL_USE_EVENT |
664 | /** |
665 | * Serializes concurrent calls to @a pthreadpool_parallelize_* from different threads. |
666 | */ |
667 | HANDLE execution_mutex; |
668 | #endif |
669 | #if PTHREADPOOL_USE_CONDVAR |
670 | /** |
671 | * Guards access to the @a active_threads variable. |
672 | */ |
673 | pthread_mutex_t completion_mutex; |
674 | /** |
675 | * Condition variable to wait until all threads complete an operation (until @a active_threads is zero). |
676 | */ |
677 | pthread_cond_t completion_condvar; |
678 | /** |
679 | * Guards access to the @a command variable. |
680 | */ |
681 | pthread_mutex_t command_mutex; |
682 | /** |
683 | * Condition variable to wait for change of the @a command variable. |
684 | */ |
685 | pthread_cond_t command_condvar; |
686 | #endif |
687 | #if PTHREADPOOL_USE_EVENT |
688 | /** |
689 | * Events to wait on until all threads complete an operation (until @a active_threads is zero). |
690 | * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every |
691 | * submitted command according to the high bit of the command word. |
692 | */ |
693 | HANDLE completion_event[2]; |
694 | /** |
695 | * Events to wait on for change of the @a command variable. |
696 | * To avoid race conditions due to spin-lock synchronization, we use two events and switch event in use after every |
697 | * submitted command according to the high bit of the command word. |
698 | */ |
699 | HANDLE command_event[2]; |
700 | #endif |
701 | /** |
702 | * FXdiv divisor for the number of threads in the thread pool. |
703 | * This struct never change after pthreadpool_create. |
704 | */ |
705 | struct fxdiv_divisor_size_t threads_count; |
706 | /** |
707 | * Thread information structures that immediately follow this structure. |
708 | */ |
709 | struct thread_info threads[]; |
710 | }; |
711 | |
712 | PTHREADPOOL_STATIC_ASSERT(sizeof(struct pthreadpool) % PTHREADPOOL_CACHELINE_SIZE == 0, |
713 | "pthreadpool structure must occupy an integer number of cache lines (64 bytes)" ); |
714 | |
715 | PTHREADPOOL_INTERNAL struct pthreadpool* pthreadpool_allocate( |
716 | size_t threads_count); |
717 | |
718 | PTHREADPOOL_INTERNAL void pthreadpool_deallocate( |
719 | struct pthreadpool* threadpool); |
720 | |
721 | typedef void (*thread_function_t)(struct pthreadpool* threadpool, struct thread_info* thread); |
722 | |
723 | PTHREADPOOL_INTERNAL void pthreadpool_parallelize( |
724 | struct pthreadpool* threadpool, |
725 | thread_function_t thread_function, |
726 | const void* params, |
727 | size_t params_size, |
728 | void* task, |
729 | void* context, |
730 | size_t linear_range, |
731 | uint32_t flags); |
732 | |
733 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath( |
734 | struct pthreadpool* threadpool, |
735 | struct thread_info* thread); |
736 | |
737 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath( |
738 | struct pthreadpool* threadpool, |
739 | struct thread_info* thread); |
740 | |
741 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath( |
742 | struct pthreadpool* threadpool, |
743 | struct thread_info* thread); |
744 | |
745 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath( |
746 | struct pthreadpool* threadpool, |
747 | struct thread_info* thread); |
748 | |
749 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath( |
750 | struct pthreadpool* threadpool, |
751 | struct thread_info* thread); |
752 | |
753 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath( |
754 | struct pthreadpool* threadpool, |
755 | struct thread_info* thread); |
756 | |
757 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath( |
758 | struct pthreadpool* threadpool, |
759 | struct thread_info* thread); |
760 | |
761 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath( |
762 | struct pthreadpool* threadpool, |
763 | struct thread_info* thread); |
764 | |
765 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath( |
766 | struct pthreadpool* threadpool, |
767 | struct thread_info* thread); |
768 | |
769 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath( |
770 | struct pthreadpool* threadpool, |
771 | struct thread_info* thread); |
772 | |
773 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath( |
774 | struct pthreadpool* threadpool, |
775 | struct thread_info* thread); |
776 | |
777 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath( |
778 | struct pthreadpool* threadpool, |
779 | struct thread_info* thread); |
780 | |
781 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath( |
782 | struct pthreadpool* threadpool, |
783 | struct thread_info* thread); |
784 | |
785 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath( |
786 | struct pthreadpool* threadpool, |
787 | struct thread_info* thread); |
788 | |
789 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath( |
790 | struct pthreadpool* threadpool, |
791 | struct thread_info* thread); |
792 | |
793 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath( |
794 | struct pthreadpool* threadpool, |
795 | struct thread_info* thread); |
796 | |
797 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath( |
798 | struct pthreadpool* threadpool, |
799 | struct thread_info* thread); |
800 | |
801 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath( |
802 | struct pthreadpool* threadpool, |
803 | struct thread_info* thread); |
804 | |
805 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath( |
806 | struct pthreadpool* threadpool, |
807 | struct thread_info* thread); |
808 | |
809 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath( |
810 | struct pthreadpool* threadpool, |
811 | struct thread_info* thread); |
812 | |
813 | PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath( |
814 | struct pthreadpool* threadpool, |
815 | struct thread_info* thread); |
816 | |