1#pragma once
2
3#include <stdbool.h>
4#include <stddef.h>
5#include <stdint.h>
6
7/* SSE-specific headers */
8#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
9 #include <xmmintrin.h>
10#endif
11
12/* ARM-specific headers */
13#if defined(__ARM_ACLE)
14 #include <arm_acle.h>
15#endif
16
17/* MSVC-specific headers */
18#ifdef _MSC_VER
19 #include <intrin.h>
20#endif
21
22
23#if defined(__wasm__) && defined(__clang__)
24 /*
25 * Clang for WebAssembly target lacks stdatomic.h header,
26 * even though it supports the necessary low-level intrinsics.
27 * Thus, we implement pthreadpool atomic functions on top of
28 * low-level Clang-specific interfaces for this target.
29 */
30
31 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
32 typedef _Atomic(size_t) pthreadpool_atomic_size_t;
33 typedef _Atomic(void*) pthreadpool_atomic_void_p;
34
35 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
36 pthreadpool_atomic_uint32_t* address)
37 {
38 return __c11_atomic_load(address, __ATOMIC_RELAXED);
39 }
40
41 static inline size_t pthreadpool_load_relaxed_size_t(
42 pthreadpool_atomic_size_t* address)
43 {
44 return __c11_atomic_load(address, __ATOMIC_RELAXED);
45 }
46
47 static inline void* pthreadpool_load_relaxed_void_p(
48 pthreadpool_atomic_void_p* address)
49 {
50 return __c11_atomic_load(address, __ATOMIC_RELAXED);
51 }
52
53 static inline uint32_t pthreadpool_load_acquire_uint32_t(
54 pthreadpool_atomic_uint32_t* address)
55 {
56 return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
57 }
58
59 static inline size_t pthreadpool_load_acquire_size_t(
60 pthreadpool_atomic_size_t* address)
61 {
62 return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
63 }
64
65 static inline void pthreadpool_store_relaxed_uint32_t(
66 pthreadpool_atomic_uint32_t* address,
67 uint32_t value)
68 {
69 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
70 }
71
72 static inline void pthreadpool_store_relaxed_size_t(
73 pthreadpool_atomic_size_t* address,
74 size_t value)
75 {
76 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
77 }
78
79 static inline void pthreadpool_store_relaxed_void_p(
80 pthreadpool_atomic_void_p* address,
81 void* value)
82 {
83 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
84 }
85
86 static inline void pthreadpool_store_release_uint32_t(
87 pthreadpool_atomic_uint32_t* address,
88 uint32_t value)
89 {
90 __c11_atomic_store(address, value, __ATOMIC_RELEASE);
91 }
92
93 static inline void pthreadpool_store_release_size_t(
94 pthreadpool_atomic_size_t* address,
95 size_t value)
96 {
97 __c11_atomic_store(address, value, __ATOMIC_RELEASE);
98 }
99
100 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
101 pthreadpool_atomic_size_t* address)
102 {
103 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
104 }
105
106 static inline size_t pthreadpool_decrement_fetch_release_size_t(
107 pthreadpool_atomic_size_t* address)
108 {
109 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1;
110 }
111
112 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
113 pthreadpool_atomic_size_t* address)
114 {
115 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_ACQ_REL) - 1;
116 }
117
118 static inline bool pthreadpool_try_decrement_relaxed_size_t(
119 pthreadpool_atomic_size_t* value)
120 {
121 size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED);
122 while (actual_value != 0) {
123 if (__c11_atomic_compare_exchange_weak(
124 value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
125 {
126 return true;
127 }
128 }
129 return false;
130 }
131
132 static inline void pthreadpool_fence_acquire() {
133 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
134 }
135
136 static inline void pthreadpool_fence_release() {
137 __c11_atomic_thread_fence(__ATOMIC_RELEASE);
138 }
139#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
140 #include <stdatomic.h>
141
142 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
143 typedef _Atomic(size_t) pthreadpool_atomic_size_t;
144 typedef _Atomic(void*) pthreadpool_atomic_void_p;
145
146 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
147 pthreadpool_atomic_uint32_t* address)
148 {
149 return atomic_load_explicit(address, memory_order_relaxed);
150 }
151
152 static inline size_t pthreadpool_load_relaxed_size_t(
153 pthreadpool_atomic_size_t* address)
154 {
155 return atomic_load_explicit(address, memory_order_relaxed);
156 }
157
158 static inline void* pthreadpool_load_relaxed_void_p(
159 pthreadpool_atomic_void_p* address)
160 {
161 return atomic_load_explicit(address, memory_order_relaxed);
162 }
163
164 static inline uint32_t pthreadpool_load_acquire_uint32_t(
165 pthreadpool_atomic_uint32_t* address)
166 {
167 return atomic_load_explicit(address, memory_order_acquire);
168 }
169
170 static inline size_t pthreadpool_load_acquire_size_t(
171 pthreadpool_atomic_size_t* address)
172 {
173 return atomic_load_explicit(address, memory_order_acquire);
174 }
175
176 static inline void pthreadpool_store_relaxed_uint32_t(
177 pthreadpool_atomic_uint32_t* address,
178 uint32_t value)
179 {
180 atomic_store_explicit(address, value, memory_order_relaxed);
181 }
182
183 static inline void pthreadpool_store_relaxed_size_t(
184 pthreadpool_atomic_size_t* address,
185 size_t value)
186 {
187 atomic_store_explicit(address, value, memory_order_relaxed);
188 }
189
190 static inline void pthreadpool_store_relaxed_void_p(
191 pthreadpool_atomic_void_p* address,
192 void* value)
193 {
194 atomic_store_explicit(address, value, memory_order_relaxed);
195 }
196
197 static inline void pthreadpool_store_release_uint32_t(
198 pthreadpool_atomic_uint32_t* address,
199 uint32_t value)
200 {
201 atomic_store_explicit(address, value, memory_order_release);
202 }
203
204 static inline void pthreadpool_store_release_size_t(
205 pthreadpool_atomic_size_t* address,
206 size_t value)
207 {
208 atomic_store_explicit(address, value, memory_order_release);
209 }
210
211 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
212 pthreadpool_atomic_size_t* address)
213 {
214 return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
215 }
216
217 static inline size_t pthreadpool_decrement_fetch_release_size_t(
218 pthreadpool_atomic_size_t* address)
219 {
220 return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1;
221 }
222
223 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
224 pthreadpool_atomic_size_t* address)
225 {
226 return atomic_fetch_sub_explicit(address, 1, memory_order_acq_rel) - 1;
227 }
228
229 static inline bool pthreadpool_try_decrement_relaxed_size_t(
230 pthreadpool_atomic_size_t* value)
231 {
232 #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__))
233 size_t actual_value;
234 do {
235 actual_value = __builtin_arm_ldrex((const volatile size_t*) value);
236 if (actual_value == 0) {
237 __builtin_arm_clrex();
238 return false;
239 }
240 } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0);
241 return true;
242 #else
243 size_t actual_value = pthreadpool_load_relaxed_size_t(value);
244 while (actual_value != 0) {
245 if (atomic_compare_exchange_weak_explicit(
246 value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed))
247 {
248 return true;
249 }
250 }
251 return false;
252 #endif
253 }
254
255 static inline void pthreadpool_fence_acquire() {
256 atomic_thread_fence(memory_order_acquire);
257 }
258
259 static inline void pthreadpool_fence_release() {
260 atomic_thread_fence(memory_order_release);
261 }
262#elif defined(__GNUC__)
263 typedef uint32_t volatile pthreadpool_atomic_uint32_t;
264 typedef size_t volatile pthreadpool_atomic_size_t;
265 typedef void* volatile pthreadpool_atomic_void_p;
266
267 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
268 pthreadpool_atomic_uint32_t* address)
269 {
270 return *address;
271 }
272
273 static inline size_t pthreadpool_load_relaxed_size_t(
274 pthreadpool_atomic_size_t* address)
275 {
276 return *address;
277 }
278
279 static inline void* pthreadpool_load_relaxed_void_p(
280 pthreadpool_atomic_void_p* address)
281 {
282 return *address;
283 }
284
285 static inline uint32_t pthreadpool_load_acquire_uint32_t(
286 pthreadpool_atomic_uint32_t* address)
287 {
288 return *address;
289 }
290
291 static inline size_t pthreadpool_load_acquire_size_t(
292 pthreadpool_atomic_size_t* address)
293 {
294 return *address;
295 }
296
297 static inline void pthreadpool_store_relaxed_uint32_t(
298 pthreadpool_atomic_uint32_t* address,
299 uint32_t value)
300 {
301 *address = value;
302 }
303
304 static inline void pthreadpool_store_relaxed_size_t(
305 pthreadpool_atomic_size_t* address,
306 size_t value)
307 {
308 *address = value;
309 }
310
311 static inline void pthreadpool_store_relaxed_void_p(
312 pthreadpool_atomic_void_p* address,
313 void* value)
314 {
315 *address = value;
316 }
317
318 static inline void pthreadpool_store_release_uint32_t(
319 pthreadpool_atomic_uint32_t* address,
320 uint32_t value)
321 {
322 *address = value;
323 }
324
325 static inline void pthreadpool_store_release_size_t(
326 pthreadpool_atomic_size_t* address,
327 size_t value)
328 {
329 *address = value;
330 }
331
332 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
333 pthreadpool_atomic_size_t* address)
334 {
335 return __sync_sub_and_fetch(address, 1);
336 }
337
338 static inline size_t pthreadpool_decrement_fetch_release_size_t(
339 pthreadpool_atomic_size_t* address)
340 {
341 return __sync_sub_and_fetch(address, 1);
342 }
343
344 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
345 pthreadpool_atomic_size_t* address)
346 {
347 return __sync_sub_and_fetch(address, 1);
348 }
349
350 static inline bool pthreadpool_try_decrement_relaxed_size_t(
351 pthreadpool_atomic_size_t* value)
352 {
353 size_t actual_value = *value;
354 while (actual_value != 0) {
355 const size_t new_value = actual_value - 1;
356 const size_t expected_value = actual_value;
357 actual_value = __sync_val_compare_and_swap(value, expected_value, new_value);
358 if (actual_value == expected_value) {
359 return true;
360 }
361 }
362 return false;
363 }
364
365 static inline void pthreadpool_fence_acquire() {
366 __sync_synchronize();
367 }
368
369 static inline void pthreadpool_fence_release() {
370 __sync_synchronize();
371 }
372#elif defined(_MSC_VER) && defined(_M_X64)
373 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
374 typedef volatile size_t pthreadpool_atomic_size_t;
375 typedef void *volatile pthreadpool_atomic_void_p;
376
377 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
378 pthreadpool_atomic_uint32_t* address)
379 {
380 return *address;
381 }
382
383 static inline size_t pthreadpool_load_relaxed_size_t(
384 pthreadpool_atomic_size_t* address)
385 {
386 return *address;
387 }
388
389 static inline void* pthreadpool_load_relaxed_void_p(
390 pthreadpool_atomic_void_p* address)
391 {
392 return *address;
393 }
394
395 static inline uint32_t pthreadpool_load_acquire_uint32_t(
396 pthreadpool_atomic_uint32_t* address)
397 {
398 /* x86-64 loads always have acquire semantics; use only a compiler barrier */
399 const uint32_t value = *address;
400 _ReadBarrier();
401 return value;
402 }
403
404 static inline size_t pthreadpool_load_acquire_size_t(
405 pthreadpool_atomic_size_t* address)
406 {
407 /* x86-64 loads always have acquire semantics; use only a compiler barrier */
408 const size_t value = *address;
409 _ReadBarrier();
410 return value;
411 }
412
413 static inline void pthreadpool_store_relaxed_uint32_t(
414 pthreadpool_atomic_uint32_t* address,
415 uint32_t value)
416 {
417 *address = value;
418 }
419
420 static inline void pthreadpool_store_relaxed_size_t(
421 pthreadpool_atomic_size_t* address,
422 size_t value)
423 {
424 *address = value;
425 }
426
427 static inline void pthreadpool_store_relaxed_void_p(
428 pthreadpool_atomic_void_p* address,
429 void* value)
430 {
431 *address = value;
432 }
433
434 static inline void pthreadpool_store_release_uint32_t(
435 pthreadpool_atomic_uint32_t* address,
436 uint32_t value)
437 {
438 /* x86-64 stores always have release semantics; use only a compiler barrier */
439 _WriteBarrier();
440 *address = value;
441 }
442
443 static inline void pthreadpool_store_release_size_t(
444 pthreadpool_atomic_size_t* address,
445 size_t value)
446 {
447 /* x86-64 stores always have release semantics; use only a compiler barrier */
448 _WriteBarrier();
449 *address = value;
450 }
451
452 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
453 pthreadpool_atomic_size_t* address)
454 {
455 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
456 }
457
458 static inline size_t pthreadpool_decrement_fetch_release_size_t(
459 pthreadpool_atomic_size_t* address)
460 {
461 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
462 }
463
464 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
465 pthreadpool_atomic_size_t* address)
466 {
467 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
468 }
469
470 static inline bool pthreadpool_try_decrement_relaxed_size_t(
471 pthreadpool_atomic_size_t* value)
472 {
473 size_t actual_value = *value;
474 while (actual_value != 0) {
475 const size_t new_value = actual_value - 1;
476 const size_t expected_value = actual_value;
477 actual_value = _InterlockedCompareExchange64(
478 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
479 if (actual_value == expected_value) {
480 return true;
481 }
482 }
483 return false;
484 }
485
486 static inline void pthreadpool_fence_acquire() {
487 _mm_lfence();
488 _ReadBarrier();
489 }
490
491 static inline void pthreadpool_fence_release() {
492 _WriteBarrier();
493 _mm_sfence();
494 }
495#elif defined(_MSC_VER) && defined(_M_IX86)
496 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
497 typedef volatile size_t pthreadpool_atomic_size_t;
498 typedef void *volatile pthreadpool_atomic_void_p;
499
500 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
501 pthreadpool_atomic_uint32_t* address)
502 {
503 return *address;
504 }
505
506 static inline size_t pthreadpool_load_relaxed_size_t(
507 pthreadpool_atomic_size_t* address)
508 {
509 return *address;
510 }
511
512 static inline void* pthreadpool_load_relaxed_void_p(
513 pthreadpool_atomic_void_p* address)
514 {
515 return *address;
516 }
517
518 static inline uint32_t pthreadpool_load_acquire_uint32_t(
519 pthreadpool_atomic_uint32_t* address)
520 {
521 /* x86 loads always have acquire semantics; use only a compiler barrier */
522 const uint32_t value = *address;
523 _ReadBarrier();
524 return value;
525 }
526
527 static inline size_t pthreadpool_load_acquire_size_t(
528 pthreadpool_atomic_size_t* address)
529 {
530 /* x86 loads always have acquire semantics; use only a compiler barrier */
531 const size_t value = *address;
532 _ReadBarrier();
533 return value;
534 }
535
536 static inline void pthreadpool_store_relaxed_uint32_t(
537 pthreadpool_atomic_uint32_t* address,
538 uint32_t value)
539 {
540 *address = value;
541 }
542
543 static inline void pthreadpool_store_relaxed_size_t(
544 pthreadpool_atomic_size_t* address,
545 size_t value)
546 {
547 *address = value;
548 }
549
550 static inline void pthreadpool_store_relaxed_void_p(
551 pthreadpool_atomic_void_p* address,
552 void* value)
553 {
554 *address = value;
555 }
556
557 static inline void pthreadpool_store_release_uint32_t(
558 pthreadpool_atomic_uint32_t* address,
559 uint32_t value)
560 {
561 /* x86 stores always have release semantics; use only a compiler barrier */
562 _WriteBarrier();
563 *address = value;
564 }
565
566 static inline void pthreadpool_store_release_size_t(
567 pthreadpool_atomic_size_t* address,
568 size_t value)
569 {
570 /* x86 stores always have release semantics; use only a compiler barrier */
571 _WriteBarrier();
572 *address = value;
573 }
574
575 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
576 pthreadpool_atomic_size_t* address)
577 {
578 return (size_t) _InterlockedDecrement((volatile long*) address);
579 }
580
581 static inline size_t pthreadpool_decrement_fetch_release_size_t(
582 pthreadpool_atomic_size_t* address)
583 {
584 return (size_t) _InterlockedDecrement((volatile long*) address);
585 }
586
587 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
588 pthreadpool_atomic_size_t* address)
589 {
590 return (size_t) _InterlockedDecrement((volatile long*) address);
591 }
592
593 static inline bool pthreadpool_try_decrement_relaxed_size_t(
594 pthreadpool_atomic_size_t* value)
595 {
596 size_t actual_value = *value;
597 while (actual_value != 0) {
598 const size_t new_value = actual_value - 1;
599 const size_t expected_value = actual_value;
600 actual_value = _InterlockedCompareExchange(
601 (volatile long*) value, (long) new_value, (long) expected_value);
602 if (actual_value == expected_value) {
603 return true;
604 }
605 }
606 return false;
607 }
608
609 static inline void pthreadpool_fence_acquire() {
610 _mm_lfence();
611 }
612
613 static inline void pthreadpool_fence_release() {
614 _mm_sfence();
615 }
616#elif defined(_MSC_VER) && defined(_M_ARM64)
617 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
618 typedef volatile size_t pthreadpool_atomic_size_t;
619 typedef void *volatile pthreadpool_atomic_void_p;
620
621 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
622 pthreadpool_atomic_uint32_t* address)
623 {
624 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
625 }
626
627 static inline size_t pthreadpool_load_relaxed_size_t(
628 pthreadpool_atomic_size_t* address)
629 {
630 return (size_t) __iso_volatile_load64((const volatile __int64*) address);
631 }
632
633 static inline void* pthreadpool_load_relaxed_void_p(
634 pthreadpool_atomic_void_p* address)
635 {
636 return (void*) __iso_volatile_load64((const volatile __int64*) address);
637 }
638
639 static inline uint32_t pthreadpool_load_acquire_uint32_t(
640 pthreadpool_atomic_uint32_t* address)
641 {
642 return (uint32_t) __ldar32((volatile unsigned __int32*) address);
643 }
644
645 static inline size_t pthreadpool_load_acquire_size_t(
646 pthreadpool_atomic_size_t* address)
647 {
648 return (size_t) __ldar64((volatile unsigned __int64*) address);
649 }
650
651 static inline void pthreadpool_store_relaxed_uint32_t(
652 pthreadpool_atomic_uint32_t* address,
653 uint32_t value)
654 {
655 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
656 }
657
658 static inline void pthreadpool_store_relaxed_size_t(
659 pthreadpool_atomic_size_t* address,
660 size_t value)
661 {
662 __iso_volatile_store64((volatile __int64*) address, (__int64) value);
663 }
664
665 static inline void pthreadpool_store_relaxed_void_p(
666 pthreadpool_atomic_void_p* address,
667 void* value)
668 {
669 __iso_volatile_store64((volatile __int64*) address, (__int64) value);
670 }
671
672 static inline void pthreadpool_store_release_uint32_t(
673 pthreadpool_atomic_uint32_t* address,
674 uint32_t value)
675 {
676 _WriteBarrier();
677 __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
678 }
679
680 static inline void pthreadpool_store_release_size_t(
681 pthreadpool_atomic_size_t* address,
682 size_t value)
683 {
684 _WriteBarrier();
685 __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
686 }
687
688 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
689 pthreadpool_atomic_size_t* address)
690 {
691 return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
692 }
693
694 static inline size_t pthreadpool_decrement_fetch_release_size_t(
695 pthreadpool_atomic_size_t* address)
696 {
697 return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
698 }
699
700 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
701 pthreadpool_atomic_size_t* address)
702 {
703 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
704 }
705
706 static inline bool pthreadpool_try_decrement_relaxed_size_t(
707 pthreadpool_atomic_size_t* value)
708 {
709 size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
710 while (actual_value != 0) {
711 const size_t new_value = actual_value - 1;
712 const size_t expected_value = actual_value;
713 actual_value = _InterlockedCompareExchange64_nf(
714 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
715 if (actual_value == expected_value) {
716 return true;
717 }
718 }
719 return false;
720 }
721
722 static inline void pthreadpool_fence_acquire() {
723 __dmb(_ARM64_BARRIER_ISHLD);
724 _ReadBarrier();
725 }
726
727 static inline void pthreadpool_fence_release() {
728 _WriteBarrier();
729 __dmb(_ARM64_BARRIER_ISH);
730 }
731#elif defined(_MSC_VER) && defined(_M_ARM)
732 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
733 typedef volatile size_t pthreadpool_atomic_size_t;
734 typedef void *volatile pthreadpool_atomic_void_p;
735
736 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
737 pthreadpool_atomic_uint32_t* address)
738 {
739 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
740 }
741
742 static inline size_t pthreadpool_load_relaxed_size_t(
743 pthreadpool_atomic_size_t* address)
744 {
745 return (size_t) __iso_volatile_load32((const volatile __int32*) address);
746 }
747
748 static inline void* pthreadpool_load_relaxed_void_p(
749 pthreadpool_atomic_void_p* address)
750 {
751 return (void*) __iso_volatile_load32((const volatile __int32*) address);
752 }
753
754 static inline uint32_t pthreadpool_load_acquire_uint32_t(
755 pthreadpool_atomic_uint32_t* address)
756 {
757 const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
758 __dmb(_ARM_BARRIER_ISH);
759 _ReadBarrier();
760 return value;
761 }
762
763 static inline size_t pthreadpool_load_acquire_size_t(
764 pthreadpool_atomic_size_t* address)
765 {
766 const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address);
767 __dmb(_ARM_BARRIER_ISH);
768 _ReadBarrier();
769 return value;
770 }
771
772 static inline void pthreadpool_store_relaxed_uint32_t(
773 pthreadpool_atomic_uint32_t* address,
774 uint32_t value)
775 {
776 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
777 }
778
779 static inline void pthreadpool_store_relaxed_size_t(
780 pthreadpool_atomic_size_t* address,
781 size_t value)
782 {
783 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
784 }
785
786 static inline void pthreadpool_store_relaxed_void_p(
787 pthreadpool_atomic_void_p* address,
788 void* value)
789 {
790 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
791 }
792
793 static inline void pthreadpool_store_release_uint32_t(
794 pthreadpool_atomic_uint32_t* address,
795 uint32_t value)
796 {
797 _WriteBarrier();
798 __dmb(_ARM_BARRIER_ISH);
799 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
800 }
801
802 static inline void pthreadpool_store_release_size_t(
803 pthreadpool_atomic_size_t* address,
804 size_t value)
805 {
806 _WriteBarrier();
807 __dmb(_ARM_BARRIER_ISH);
808 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
809 }
810
811 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
812 pthreadpool_atomic_size_t* address)
813 {
814 return (size_t) _InterlockedDecrement_nf((volatile long*) address);
815 }
816
817 static inline size_t pthreadpool_decrement_fetch_release_size_t(
818 pthreadpool_atomic_size_t* address)
819 {
820 return (size_t) _InterlockedDecrement_rel((volatile long*) address);
821 }
822
823 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
824 pthreadpool_atomic_size_t* address)
825 {
826 return (size_t) _InterlockedDecrement((volatile long*) address);
827 }
828
829 static inline bool pthreadpool_try_decrement_relaxed_size_t(
830 pthreadpool_atomic_size_t* value)
831 {
832 size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value);
833 while (actual_value != 0) {
834 const size_t new_value = actual_value - 1;
835 const size_t expected_value = actual_value;
836 actual_value = _InterlockedCompareExchange_nf(
837 (volatile long*) value, (long) new_value, (long) expected_value);
838 if (actual_value == expected_value) {
839 return true;
840 }
841 }
842 return false;
843 }
844
845 static inline void pthreadpool_fence_acquire() {
846 __dmb(_ARM_BARRIER_ISH);
847 _ReadBarrier();
848 }
849
850 static inline void pthreadpool_fence_release() {
851 _WriteBarrier();
852 __dmb(_ARM_BARRIER_ISH);
853 }
854#else
855 #error "Platform-specific implementation of threadpool-atomics.h required"
856#endif
857
858#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
859 static inline void pthreadpool_yield() {
860 _mm_pause();
861 }
862#elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
863 static inline void pthreadpool_yield() {
864 __yield();
865 }
866#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__))
867 static inline void pthreadpool_yield() {
868 __asm__ __volatile__("yield");
869 }
870#else
871 static inline void pthreadpool_yield() {
872 pthreadpool_fence_acquire();
873 }
874#endif
875