1#pragma once
2
3#include <stdbool.h>
4#include <stddef.h>
5#include <stdint.h>
6
7/* SSE-specific headers */
8#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
9 #include <xmmintrin.h>
10#endif
11
12/* ARM-specific headers */
13#if defined(__ARM_ACLE)
14 #include <arm_acle.h>
15#endif
16
17/* MSVC-specific headers */
18#ifdef _MSC_VER
19 #include <intrin.h>
20#endif
21
22
23#if defined(__wasm__) && defined(__clang__)
24 /*
25 * Clang for WebAssembly target lacks stdatomic.h header,
26 * even though it supports the necessary low-level intrinsics.
27 * Thus, we implement pthreadpool atomic functions on top of
28 * low-level Clang-specific interfaces for this target.
29 */
30
31 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
32 typedef _Atomic(size_t) pthreadpool_atomic_size_t;
33 typedef _Atomic(void*) pthreadpool_atomic_void_p;
34
35 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
36 pthreadpool_atomic_uint32_t* address)
37 {
38 return __c11_atomic_load(address, __ATOMIC_RELAXED);
39 }
40
41 static inline size_t pthreadpool_load_relaxed_size_t(
42 pthreadpool_atomic_size_t* address)
43 {
44 return __c11_atomic_load(address, __ATOMIC_RELAXED);
45 }
46
47 static inline void* pthreadpool_load_relaxed_void_p(
48 pthreadpool_atomic_void_p* address)
49 {
50 return __c11_atomic_load(address, __ATOMIC_RELAXED);
51 }
52
53 static inline uint32_t pthreadpool_load_acquire_uint32_t(
54 pthreadpool_atomic_uint32_t* address)
55 {
56 return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
57 }
58
59 static inline size_t pthreadpool_load_acquire_size_t(
60 pthreadpool_atomic_size_t* address)
61 {
62 return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
63 }
64
65 static inline void pthreadpool_store_relaxed_uint32_t(
66 pthreadpool_atomic_uint32_t* address,
67 uint32_t value)
68 {
69 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
70 }
71
72 static inline void pthreadpool_store_relaxed_size_t(
73 pthreadpool_atomic_size_t* address,
74 size_t value)
75 {
76 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
77 }
78
79 static inline void pthreadpool_store_relaxed_void_p(
80 pthreadpool_atomic_void_p* address,
81 void* value)
82 {
83 __c11_atomic_store(address, value, __ATOMIC_RELAXED);
84 }
85
86 static inline void pthreadpool_store_release_uint32_t(
87 pthreadpool_atomic_uint32_t* address,
88 uint32_t value)
89 {
90 __c11_atomic_store(address, value, __ATOMIC_RELEASE);
91 }
92
93 static inline void pthreadpool_store_release_size_t(
94 pthreadpool_atomic_size_t* address,
95 size_t value)
96 {
97 __c11_atomic_store(address, value, __ATOMIC_RELEASE);
98 }
99
100 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
101 pthreadpool_atomic_size_t* address)
102 {
103 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
104 }
105
106 static inline size_t pthreadpool_decrement_fetch_release_size_t(
107 pthreadpool_atomic_size_t* address)
108 {
109 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1;
110 }
111
112 static inline bool pthreadpool_try_decrement_relaxed_size_t(
113 pthreadpool_atomic_size_t* value)
114 {
115 size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED);
116 while (actual_value != 0) {
117 if (__c11_atomic_compare_exchange_weak(
118 value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
119 {
120 return true;
121 }
122 }
123 return false;
124 }
125
126 static inline void pthreadpool_fence_acquire() {
127 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
128 }
129
130 static inline void pthreadpool_fence_release() {
131 __c11_atomic_thread_fence(__ATOMIC_RELEASE);
132 }
133#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
134 #include <stdatomic.h>
135
136 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
137 typedef _Atomic(size_t) pthreadpool_atomic_size_t;
138 typedef _Atomic(void*) pthreadpool_atomic_void_p;
139
140 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
141 pthreadpool_atomic_uint32_t* address)
142 {
143 return atomic_load_explicit(address, memory_order_relaxed);
144 }
145
146 static inline size_t pthreadpool_load_relaxed_size_t(
147 pthreadpool_atomic_size_t* address)
148 {
149 return atomic_load_explicit(address, memory_order_relaxed);
150 }
151
152 static inline void* pthreadpool_load_relaxed_void_p(
153 pthreadpool_atomic_void_p* address)
154 {
155 return atomic_load_explicit(address, memory_order_relaxed);
156 }
157
158 static inline uint32_t pthreadpool_load_acquire_uint32_t(
159 pthreadpool_atomic_uint32_t* address)
160 {
161 return atomic_load_explicit(address, memory_order_acquire);
162 }
163
164 static inline size_t pthreadpool_load_acquire_size_t(
165 pthreadpool_atomic_size_t* address)
166 {
167 return atomic_load_explicit(address, memory_order_acquire);
168 }
169
170 static inline void pthreadpool_store_relaxed_uint32_t(
171 pthreadpool_atomic_uint32_t* address,
172 uint32_t value)
173 {
174 atomic_store_explicit(address, value, memory_order_relaxed);
175 }
176
177 static inline void pthreadpool_store_relaxed_size_t(
178 pthreadpool_atomic_size_t* address,
179 size_t value)
180 {
181 atomic_store_explicit(address, value, memory_order_relaxed);
182 }
183
184 static inline void pthreadpool_store_relaxed_void_p(
185 pthreadpool_atomic_void_p* address,
186 void* value)
187 {
188 atomic_store_explicit(address, value, memory_order_relaxed);
189 }
190
191 static inline void pthreadpool_store_release_uint32_t(
192 pthreadpool_atomic_uint32_t* address,
193 uint32_t value)
194 {
195 atomic_store_explicit(address, value, memory_order_release);
196 }
197
198 static inline void pthreadpool_store_release_size_t(
199 pthreadpool_atomic_size_t* address,
200 size_t value)
201 {
202 atomic_store_explicit(address, value, memory_order_release);
203 }
204
205 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
206 pthreadpool_atomic_size_t* address)
207 {
208 return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
209 }
210
211 static inline size_t pthreadpool_decrement_fetch_release_size_t(
212 pthreadpool_atomic_size_t* address)
213 {
214 return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1;
215 }
216
217 static inline bool pthreadpool_try_decrement_relaxed_size_t(
218 pthreadpool_atomic_size_t* value)
219 {
220 #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__))
221 size_t actual_value;
222 do {
223 actual_value = __builtin_arm_ldrex((const volatile size_t*) value);
224 if (actual_value == 0) {
225 __builtin_arm_clrex();
226 return false;
227 }
228 } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0);
229 return true;
230 #else
231 size_t actual_value = pthreadpool_load_relaxed_size_t(value);
232 while (actual_value != 0) {
233 if (atomic_compare_exchange_weak_explicit(
234 value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed))
235 {
236 return true;
237 }
238 }
239 return false;
240 #endif
241 }
242
243 static inline void pthreadpool_fence_acquire() {
244 atomic_thread_fence(memory_order_acquire);
245 }
246
247 static inline void pthreadpool_fence_release() {
248 atomic_thread_fence(memory_order_release);
249 }
250#elif defined(__GNUC__)
251 typedef uint32_t volatile pthreadpool_atomic_uint32_t;
252 typedef size_t volatile pthreadpool_atomic_size_t;
253 typedef void* volatile pthreadpool_atomic_void_p;
254
255 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
256 pthreadpool_atomic_uint32_t* address)
257 {
258 return *address;
259 }
260
261 static inline size_t pthreadpool_load_relaxed_size_t(
262 pthreadpool_atomic_size_t* address)
263 {
264 return *address;
265 }
266
267 static inline void* pthreadpool_load_relaxed_void_p(
268 pthreadpool_atomic_void_p* address)
269 {
270 return *address;
271 }
272
273 static inline uint32_t pthreadpool_load_acquire_uint32_t(
274 pthreadpool_atomic_uint32_t* address)
275 {
276 return *address;
277 }
278
279 static inline size_t pthreadpool_load_acquire_size_t(
280 pthreadpool_atomic_size_t* address)
281 {
282 return *address;
283 }
284
285 static inline void pthreadpool_store_relaxed_uint32_t(
286 pthreadpool_atomic_uint32_t* address,
287 uint32_t value)
288 {
289 *address = value;
290 }
291
292 static inline void pthreadpool_store_relaxed_size_t(
293 pthreadpool_atomic_size_t* address,
294 size_t value)
295 {
296 *address = value;
297 }
298
299 static inline void pthreadpool_store_relaxed_void_p(
300 pthreadpool_atomic_void_p* address,
301 void* value)
302 {
303 *address = value;
304 }
305
306 static inline void pthreadpool_store_release_uint32_t(
307 pthreadpool_atomic_uint32_t* address,
308 uint32_t value)
309 {
310 *address = value;
311 }
312
313 static inline void pthreadpool_store_release_size_t(
314 pthreadpool_atomic_size_t* address,
315 size_t value)
316 {
317 *address = value;
318 }
319
320 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
321 pthreadpool_atomic_size_t* address)
322 {
323 return __sync_sub_and_fetch(address, 1);
324 }
325
326 static inline size_t pthreadpool_decrement_fetch_release_size_t(
327 pthreadpool_atomic_size_t* address)
328 {
329 return __sync_sub_and_fetch(address, 1);
330 }
331
332 static inline bool pthreadpool_try_decrement_relaxed_size_t(
333 pthreadpool_atomic_size_t* value)
334 {
335 size_t actual_value = *value;
336 while (actual_value != 0) {
337 const size_t new_value = actual_value - 1;
338 const size_t expected_value = actual_value;
339 actual_value = __sync_val_compare_and_swap(value, expected_value, new_value);
340 if (actual_value == expected_value) {
341 return true;
342 }
343 }
344 return false;
345 }
346
347 static inline void pthreadpool_fence_acquire() {
348 __sync_synchronize();
349 }
350
351 static inline void pthreadpool_fence_release() {
352 __sync_synchronize();
353 }
354#elif defined(_MSC_VER) && defined(_M_X64)
355 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
356 typedef volatile size_t pthreadpool_atomic_size_t;
357 typedef void *volatile pthreadpool_atomic_void_p;
358
359 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
360 pthreadpool_atomic_uint32_t* address)
361 {
362 return *address;
363 }
364
365 static inline size_t pthreadpool_load_relaxed_size_t(
366 pthreadpool_atomic_size_t* address)
367 {
368 return *address;
369 }
370
371 static inline void* pthreadpool_load_relaxed_void_p(
372 pthreadpool_atomic_void_p* address)
373 {
374 return *address;
375 }
376
377 static inline uint32_t pthreadpool_load_acquire_uint32_t(
378 pthreadpool_atomic_uint32_t* address)
379 {
380 /* x86-64 loads always have acquire semantics; use only a compiler barrier */
381 const uint32_t value = *address;
382 _ReadBarrier();
383 return value;
384 }
385
386 static inline size_t pthreadpool_load_acquire_size_t(
387 pthreadpool_atomic_size_t* address)
388 {
389 /* x86-64 loads always have acquire semantics; use only a compiler barrier */
390 const size_t value = *address;
391 _ReadBarrier();
392 return value;
393 }
394
395 static inline void pthreadpool_store_relaxed_uint32_t(
396 pthreadpool_atomic_uint32_t* address,
397 uint32_t value)
398 {
399 *address = value;
400 }
401
402 static inline void pthreadpool_store_relaxed_size_t(
403 pthreadpool_atomic_size_t* address,
404 size_t value)
405 {
406 *address = value;
407 }
408
409 static inline void pthreadpool_store_relaxed_void_p(
410 pthreadpool_atomic_void_p* address,
411 void* value)
412 {
413 *address = value;
414 }
415
416 static inline void pthreadpool_store_release_uint32_t(
417 pthreadpool_atomic_uint32_t* address,
418 uint32_t value)
419 {
420 /* x86-64 stores always have release semantics; use only a compiler barrier */
421 _WriteBarrier();
422 *address = value;
423 }
424
425 static inline void pthreadpool_store_release_size_t(
426 pthreadpool_atomic_size_t* address,
427 size_t value)
428 {
429 /* x86-64 stores always have release semantics; use only a compiler barrier */
430 _WriteBarrier();
431 *address = value;
432 }
433
434 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
435 pthreadpool_atomic_size_t* address)
436 {
437 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
438 }
439
440 static inline size_t pthreadpool_decrement_fetch_release_size_t(
441 pthreadpool_atomic_size_t* address)
442 {
443 return (size_t) _InterlockedDecrement64((volatile __int64*) address);
444 }
445
446 static inline bool pthreadpool_try_decrement_relaxed_size_t(
447 pthreadpool_atomic_size_t* value)
448 {
449 size_t actual_value = *value;
450 while (actual_value != 0) {
451 const size_t new_value = actual_value - 1;
452 const size_t expected_value = actual_value;
453 actual_value = _InterlockedCompareExchange64(
454 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
455 if (actual_value == expected_value) {
456 return true;
457 }
458 }
459 return false;
460 }
461
462 static inline void pthreadpool_fence_acquire() {
463 _mm_lfence();
464 _ReadBarrier();
465 }
466
467 static inline void pthreadpool_fence_release() {
468 _WriteBarrier();
469 _mm_sfence();
470 }
471#elif defined(_MSC_VER) && defined(_M_IX86)
472 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
473 typedef volatile size_t pthreadpool_atomic_size_t;
474 typedef void *volatile pthreadpool_atomic_void_p;
475
476 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
477 pthreadpool_atomic_uint32_t* address)
478 {
479 return *address;
480 }
481
482 static inline size_t pthreadpool_load_relaxed_size_t(
483 pthreadpool_atomic_size_t* address)
484 {
485 return *address;
486 }
487
488 static inline void* pthreadpool_load_relaxed_void_p(
489 pthreadpool_atomic_void_p* address)
490 {
491 return *address;
492 }
493
494 static inline uint32_t pthreadpool_load_acquire_uint32_t(
495 pthreadpool_atomic_uint32_t* address)
496 {
497 /* x86 loads always have acquire semantics; use only a compiler barrier */
498 const uint32_t value = *address;
499 _ReadBarrier();
500 return value;
501 }
502
503 static inline size_t pthreadpool_load_acquire_size_t(
504 pthreadpool_atomic_size_t* address)
505 {
506 /* x86 loads always have acquire semantics; use only a compiler barrier */
507 const size_t value = *address;
508 _ReadBarrier();
509 return value;
510 }
511
512 static inline void pthreadpool_store_relaxed_uint32_t(
513 pthreadpool_atomic_uint32_t* address,
514 uint32_t value)
515 {
516 *address = value;
517 }
518
519 static inline void pthreadpool_store_relaxed_size_t(
520 pthreadpool_atomic_size_t* address,
521 size_t value)
522 {
523 *address = value;
524 }
525
526 static inline void pthreadpool_store_relaxed_void_p(
527 pthreadpool_atomic_void_p* address,
528 void* value)
529 {
530 *address = value;
531 }
532
533 static inline void pthreadpool_store_release_uint32_t(
534 pthreadpool_atomic_uint32_t* address,
535 uint32_t value)
536 {
537 /* x86 stores always have release semantics; use only a compiler barrier */
538 _WriteBarrier();
539 *address = value;
540 }
541
542 static inline void pthreadpool_store_release_size_t(
543 pthreadpool_atomic_size_t* address,
544 size_t value)
545 {
546 /* x86 stores always have release semantics; use only a compiler barrier */
547 _WriteBarrier();
548 *address = value;
549 }
550
551 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
552 pthreadpool_atomic_size_t* address)
553 {
554 return (size_t) _InterlockedDecrement((volatile long*) address);
555 }
556
557 static inline size_t pthreadpool_decrement_fetch_release_size_t(
558 pthreadpool_atomic_size_t* address)
559 {
560 return (size_t) _InterlockedDecrement((volatile long*) address);
561 }
562
563 static inline bool pthreadpool_try_decrement_relaxed_size_t(
564 pthreadpool_atomic_size_t* value)
565 {
566 size_t actual_value = *value;
567 while (actual_value != 0) {
568 const size_t new_value = actual_value - 1;
569 const size_t expected_value = actual_value;
570 actual_value = _InterlockedCompareExchange(
571 (volatile long*) value, (long) new_value, (long) expected_value);
572 if (actual_value == expected_value) {
573 return true;
574 }
575 }
576 return false;
577 }
578
579 static inline void pthreadpool_fence_acquire() {
580 _mm_lfence();
581 }
582
583 static inline void pthreadpool_fence_release() {
584 _mm_sfence();
585 }
586#elif defined(_MSC_VER) && defined(_M_ARM64)
587 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
588 typedef volatile size_t pthreadpool_atomic_size_t;
589 typedef void *volatile pthreadpool_atomic_void_p;
590
591 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
592 pthreadpool_atomic_uint32_t* address)
593 {
594 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
595 }
596
597 static inline size_t pthreadpool_load_relaxed_size_t(
598 pthreadpool_atomic_size_t* address)
599 {
600 return (size_t) __iso_volatile_load64((const volatile __int64*) address);
601 }
602
603 static inline void* pthreadpool_load_relaxed_void_p(
604 pthreadpool_atomic_void_p* address)
605 {
606 return (void*) __iso_volatile_load64((const volatile __int64*) address);
607 }
608
609 static inline uint32_t pthreadpool_load_acquire_uint32_t(
610 pthreadpool_atomic_uint32_t* address)
611 {
612 return (uint32_t) __ldar32((volatile unsigned __int32*) address);
613 }
614
615 static inline size_t pthreadpool_load_acquire_size_t(
616 pthreadpool_atomic_size_t* address)
617 {
618 return (size_t) __ldar64((volatile unsigned __int64*) address);
619 }
620
621 static inline void pthreadpool_store_relaxed_uint32_t(
622 pthreadpool_atomic_uint32_t* address,
623 uint32_t value)
624 {
625 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
626 }
627
628 static inline void pthreadpool_store_relaxed_size_t(
629 pthreadpool_atomic_size_t* address,
630 size_t value)
631 {
632 __iso_volatile_store64((volatile __int64*) address, (__int64) value);
633 }
634
635 static inline void pthreadpool_store_relaxed_void_p(
636 pthreadpool_atomic_void_p* address,
637 void* value)
638 {
639 __iso_volatile_store64((volatile __int64*) address, (__int64) value);
640 }
641
642 static inline void pthreadpool_store_release_uint32_t(
643 pthreadpool_atomic_uint32_t* address,
644 uint32_t value)
645 {
646 _WriteBarrier();
647 __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
648 }
649
650 static inline void pthreadpool_store_release_size_t(
651 pthreadpool_atomic_size_t* address,
652 size_t value)
653 {
654 _WriteBarrier();
655 __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
656 }
657
658 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
659 pthreadpool_atomic_size_t* address)
660 {
661 return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
662 }
663
664 static inline size_t pthreadpool_decrement_fetch_release_size_t(
665 pthreadpool_atomic_size_t* address)
666 {
667 return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
668 }
669
670 static inline bool pthreadpool_try_decrement_relaxed_size_t(
671 pthreadpool_atomic_size_t* value)
672 {
673 size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
674 while (actual_value != 0) {
675 const size_t new_value = actual_value - 1;
676 const size_t expected_value = actual_value;
677 actual_value = _InterlockedCompareExchange64_nf(
678 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
679 if (actual_value == expected_value) {
680 return true;
681 }
682 }
683 return false;
684 }
685
686 static inline void pthreadpool_fence_acquire() {
687 __dmb(_ARM64_BARRIER_ISHLD);
688 _ReadBarrier();
689 }
690
691 static inline void pthreadpool_fence_release() {
692 _WriteBarrier();
693 __dmb(_ARM64_BARRIER_ISH);
694 }
695#elif defined(_MSC_VER) && defined(_M_ARM)
696 typedef volatile uint32_t pthreadpool_atomic_uint32_t;
697 typedef volatile size_t pthreadpool_atomic_size_t;
698 typedef void *volatile pthreadpool_atomic_void_p;
699
700 static inline uint32_t pthreadpool_load_relaxed_uint32_t(
701 pthreadpool_atomic_uint32_t* address)
702 {
703 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
704 }
705
706 static inline size_t pthreadpool_load_relaxed_size_t(
707 pthreadpool_atomic_size_t* address)
708 {
709 return (size_t) __iso_volatile_load32((const volatile __int32*) address);
710 }
711
712 static inline void* pthreadpool_load_relaxed_void_p(
713 pthreadpool_atomic_void_p* address)
714 {
715 return (void*) __iso_volatile_load32((const volatile __int32*) address);
716 }
717
718 static inline uint32_t pthreadpool_load_acquire_uint32_t(
719 pthreadpool_atomic_uint32_t* address)
720 {
721 const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
722 __dmb(_ARM_BARRIER_ISH);
723 _ReadBarrier();
724 return value;
725 }
726
727 static inline size_t pthreadpool_load_acquire_size_t(
728 pthreadpool_atomic_size_t* address)
729 {
730 const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address);
731 __dmb(_ARM_BARRIER_ISH);
732 _ReadBarrier();
733 return value;
734 }
735
736 static inline void pthreadpool_store_relaxed_uint32_t(
737 pthreadpool_atomic_uint32_t* address,
738 uint32_t value)
739 {
740 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
741 }
742
743 static inline void pthreadpool_store_relaxed_size_t(
744 pthreadpool_atomic_size_t* address,
745 size_t value)
746 {
747 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
748 }
749
750 static inline void pthreadpool_store_relaxed_void_p(
751 pthreadpool_atomic_void_p* address,
752 void* value)
753 {
754 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
755 }
756
757 static inline void pthreadpool_store_release_uint32_t(
758 pthreadpool_atomic_uint32_t* address,
759 uint32_t value)
760 {
761 _WriteBarrier();
762 __dmb(_ARM_BARRIER_ISH);
763 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
764 }
765
766 static inline void pthreadpool_store_release_size_t(
767 pthreadpool_atomic_size_t* address,
768 size_t value)
769 {
770 _WriteBarrier();
771 __dmb(_ARM_BARRIER_ISH);
772 __iso_volatile_store32((volatile __int32*) address, (__int32) value);
773 }
774
775 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
776 pthreadpool_atomic_size_t* address)
777 {
778 return (size_t) _InterlockedDecrement_nf((volatile long*) address);
779 }
780
781 static inline size_t pthreadpool_decrement_fetch_release_size_t(
782 pthreadpool_atomic_size_t* address)
783 {
784 return (size_t) _InterlockedDecrement_rel((volatile long*) address);
785 }
786
787 static inline bool pthreadpool_try_decrement_relaxed_size_t(
788 pthreadpool_atomic_size_t* value)
789 {
790 size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value);
791 while (actual_value != 0) {
792 const size_t new_value = actual_value - 1;
793 const size_t expected_value = actual_value;
794 actual_value = _InterlockedCompareExchange_nf(
795 (volatile long*) value, (long) new_value, (long) expected_value);
796 if (actual_value == expected_value) {
797 return true;
798 }
799 }
800 return false;
801 }
802
803 static inline void pthreadpool_fence_acquire() {
804 __dmb(_ARM_BARRIER_ISH);
805 _ReadBarrier();
806 }
807
808 static inline void pthreadpool_fence_release() {
809 _WriteBarrier();
810 __dmb(_ARM_BARRIER_ISH);
811 }
812#else
813 #error "Platform-specific implementation of threadpool-atomics.h required"
814#endif
815
816#if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
817 static inline void pthreadpool_yield() {
818 _mm_pause();
819 }
820#elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
821 static inline void pthreadpool_yield() {
822 __yield();
823 }
824#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__))
825 static inline void pthreadpool_yield() {
826 __asm__ __volatile__("yield");
827 }
828#else
829 static inline void pthreadpool_yield() {
830 pthreadpool_fence_acquire();
831 }
832#endif
833