1 | #pragma once |
2 | |
3 | #include <stdbool.h> |
4 | #include <stddef.h> |
5 | #include <stdint.h> |
6 | |
7 | /* SSE-specific headers */ |
8 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) |
9 | #include <xmmintrin.h> |
10 | #endif |
11 | |
12 | /* ARM-specific headers */ |
13 | #if defined(__ARM_ACLE) |
14 | #include <arm_acle.h> |
15 | #endif |
16 | |
17 | /* MSVC-specific headers */ |
18 | #ifdef _MSC_VER |
19 | #include <intrin.h> |
20 | #endif |
21 | |
22 | |
23 | #if defined(__wasm__) && defined(__clang__) |
24 | /* |
25 | * Clang for WebAssembly target lacks stdatomic.h header, |
26 | * even though it supports the necessary low-level intrinsics. |
27 | * Thus, we implement pthreadpool atomic functions on top of |
28 | * low-level Clang-specific interfaces for this target. |
29 | */ |
30 | |
31 | typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; |
32 | typedef _Atomic(size_t) pthreadpool_atomic_size_t; |
33 | typedef _Atomic(void*) pthreadpool_atomic_void_p; |
34 | |
35 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
36 | pthreadpool_atomic_uint32_t* address) |
37 | { |
38 | return __c11_atomic_load(address, __ATOMIC_RELAXED); |
39 | } |
40 | |
41 | static inline size_t pthreadpool_load_relaxed_size_t( |
42 | pthreadpool_atomic_size_t* address) |
43 | { |
44 | return __c11_atomic_load(address, __ATOMIC_RELAXED); |
45 | } |
46 | |
47 | static inline void* pthreadpool_load_relaxed_void_p( |
48 | pthreadpool_atomic_void_p* address) |
49 | { |
50 | return __c11_atomic_load(address, __ATOMIC_RELAXED); |
51 | } |
52 | |
53 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
54 | pthreadpool_atomic_uint32_t* address) |
55 | { |
56 | return __c11_atomic_load(address, __ATOMIC_ACQUIRE); |
57 | } |
58 | |
59 | static inline size_t pthreadpool_load_acquire_size_t( |
60 | pthreadpool_atomic_size_t* address) |
61 | { |
62 | return __c11_atomic_load(address, __ATOMIC_ACQUIRE); |
63 | } |
64 | |
65 | static inline void pthreadpool_store_relaxed_uint32_t( |
66 | pthreadpool_atomic_uint32_t* address, |
67 | uint32_t value) |
68 | { |
69 | __c11_atomic_store(address, value, __ATOMIC_RELAXED); |
70 | } |
71 | |
72 | static inline void pthreadpool_store_relaxed_size_t( |
73 | pthreadpool_atomic_size_t* address, |
74 | size_t value) |
75 | { |
76 | __c11_atomic_store(address, value, __ATOMIC_RELAXED); |
77 | } |
78 | |
79 | static inline void pthreadpool_store_relaxed_void_p( |
80 | pthreadpool_atomic_void_p* address, |
81 | void* value) |
82 | { |
83 | __c11_atomic_store(address, value, __ATOMIC_RELAXED); |
84 | } |
85 | |
86 | static inline void pthreadpool_store_release_uint32_t( |
87 | pthreadpool_atomic_uint32_t* address, |
88 | uint32_t value) |
89 | { |
90 | __c11_atomic_store(address, value, __ATOMIC_RELEASE); |
91 | } |
92 | |
93 | static inline void pthreadpool_store_release_size_t( |
94 | pthreadpool_atomic_size_t* address, |
95 | size_t value) |
96 | { |
97 | __c11_atomic_store(address, value, __ATOMIC_RELEASE); |
98 | } |
99 | |
100 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
101 | pthreadpool_atomic_size_t* address) |
102 | { |
103 | return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1; |
104 | } |
105 | |
106 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
107 | pthreadpool_atomic_size_t* address) |
108 | { |
109 | return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1; |
110 | } |
111 | |
112 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
113 | pthreadpool_atomic_size_t* value) |
114 | { |
115 | size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED); |
116 | while (actual_value != 0) { |
117 | if (__c11_atomic_compare_exchange_weak( |
118 | value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) |
119 | { |
120 | return true; |
121 | } |
122 | } |
123 | return false; |
124 | } |
125 | |
126 | static inline void pthreadpool_fence_acquire() { |
127 | __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); |
128 | } |
129 | |
130 | static inline void pthreadpool_fence_release() { |
131 | __c11_atomic_thread_fence(__ATOMIC_RELEASE); |
132 | } |
133 | #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) |
134 | #include <stdatomic.h> |
135 | |
136 | typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; |
137 | typedef _Atomic(size_t) pthreadpool_atomic_size_t; |
138 | typedef _Atomic(void*) pthreadpool_atomic_void_p; |
139 | |
140 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
141 | pthreadpool_atomic_uint32_t* address) |
142 | { |
143 | return atomic_load_explicit(address, memory_order_relaxed); |
144 | } |
145 | |
146 | static inline size_t pthreadpool_load_relaxed_size_t( |
147 | pthreadpool_atomic_size_t* address) |
148 | { |
149 | return atomic_load_explicit(address, memory_order_relaxed); |
150 | } |
151 | |
152 | static inline void* pthreadpool_load_relaxed_void_p( |
153 | pthreadpool_atomic_void_p* address) |
154 | { |
155 | return atomic_load_explicit(address, memory_order_relaxed); |
156 | } |
157 | |
158 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
159 | pthreadpool_atomic_uint32_t* address) |
160 | { |
161 | return atomic_load_explicit(address, memory_order_acquire); |
162 | } |
163 | |
164 | static inline size_t pthreadpool_load_acquire_size_t( |
165 | pthreadpool_atomic_size_t* address) |
166 | { |
167 | return atomic_load_explicit(address, memory_order_acquire); |
168 | } |
169 | |
170 | static inline void pthreadpool_store_relaxed_uint32_t( |
171 | pthreadpool_atomic_uint32_t* address, |
172 | uint32_t value) |
173 | { |
174 | atomic_store_explicit(address, value, memory_order_relaxed); |
175 | } |
176 | |
177 | static inline void pthreadpool_store_relaxed_size_t( |
178 | pthreadpool_atomic_size_t* address, |
179 | size_t value) |
180 | { |
181 | atomic_store_explicit(address, value, memory_order_relaxed); |
182 | } |
183 | |
184 | static inline void pthreadpool_store_relaxed_void_p( |
185 | pthreadpool_atomic_void_p* address, |
186 | void* value) |
187 | { |
188 | atomic_store_explicit(address, value, memory_order_relaxed); |
189 | } |
190 | |
191 | static inline void pthreadpool_store_release_uint32_t( |
192 | pthreadpool_atomic_uint32_t* address, |
193 | uint32_t value) |
194 | { |
195 | atomic_store_explicit(address, value, memory_order_release); |
196 | } |
197 | |
198 | static inline void pthreadpool_store_release_size_t( |
199 | pthreadpool_atomic_size_t* address, |
200 | size_t value) |
201 | { |
202 | atomic_store_explicit(address, value, memory_order_release); |
203 | } |
204 | |
205 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
206 | pthreadpool_atomic_size_t* address) |
207 | { |
208 | return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1; |
209 | } |
210 | |
211 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
212 | pthreadpool_atomic_size_t* address) |
213 | { |
214 | return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1; |
215 | } |
216 | |
217 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
218 | pthreadpool_atomic_size_t* value) |
219 | { |
220 | #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__)) |
221 | size_t actual_value; |
222 | do { |
223 | actual_value = __builtin_arm_ldrex((const volatile size_t*) value); |
224 | if (actual_value == 0) { |
225 | __builtin_arm_clrex(); |
226 | return false; |
227 | } |
228 | } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0); |
229 | return true; |
230 | #else |
231 | size_t actual_value = pthreadpool_load_relaxed_size_t(value); |
232 | while (actual_value != 0) { |
233 | if (atomic_compare_exchange_weak_explicit( |
234 | value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed)) |
235 | { |
236 | return true; |
237 | } |
238 | } |
239 | return false; |
240 | #endif |
241 | } |
242 | |
243 | static inline void pthreadpool_fence_acquire() { |
244 | atomic_thread_fence(memory_order_acquire); |
245 | } |
246 | |
247 | static inline void pthreadpool_fence_release() { |
248 | atomic_thread_fence(memory_order_release); |
249 | } |
250 | #elif defined(__GNUC__) |
251 | typedef uint32_t volatile pthreadpool_atomic_uint32_t; |
252 | typedef size_t volatile pthreadpool_atomic_size_t; |
253 | typedef void* volatile pthreadpool_atomic_void_p; |
254 | |
255 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
256 | pthreadpool_atomic_uint32_t* address) |
257 | { |
258 | return *address; |
259 | } |
260 | |
261 | static inline size_t pthreadpool_load_relaxed_size_t( |
262 | pthreadpool_atomic_size_t* address) |
263 | { |
264 | return *address; |
265 | } |
266 | |
267 | static inline void* pthreadpool_load_relaxed_void_p( |
268 | pthreadpool_atomic_void_p* address) |
269 | { |
270 | return *address; |
271 | } |
272 | |
273 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
274 | pthreadpool_atomic_uint32_t* address) |
275 | { |
276 | return *address; |
277 | } |
278 | |
279 | static inline size_t pthreadpool_load_acquire_size_t( |
280 | pthreadpool_atomic_size_t* address) |
281 | { |
282 | return *address; |
283 | } |
284 | |
285 | static inline void pthreadpool_store_relaxed_uint32_t( |
286 | pthreadpool_atomic_uint32_t* address, |
287 | uint32_t value) |
288 | { |
289 | *address = value; |
290 | } |
291 | |
292 | static inline void pthreadpool_store_relaxed_size_t( |
293 | pthreadpool_atomic_size_t* address, |
294 | size_t value) |
295 | { |
296 | *address = value; |
297 | } |
298 | |
299 | static inline void pthreadpool_store_relaxed_void_p( |
300 | pthreadpool_atomic_void_p* address, |
301 | void* value) |
302 | { |
303 | *address = value; |
304 | } |
305 | |
306 | static inline void pthreadpool_store_release_uint32_t( |
307 | pthreadpool_atomic_uint32_t* address, |
308 | uint32_t value) |
309 | { |
310 | *address = value; |
311 | } |
312 | |
313 | static inline void pthreadpool_store_release_size_t( |
314 | pthreadpool_atomic_size_t* address, |
315 | size_t value) |
316 | { |
317 | *address = value; |
318 | } |
319 | |
320 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
321 | pthreadpool_atomic_size_t* address) |
322 | { |
323 | return __sync_sub_and_fetch(address, 1); |
324 | } |
325 | |
326 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
327 | pthreadpool_atomic_size_t* address) |
328 | { |
329 | return __sync_sub_and_fetch(address, 1); |
330 | } |
331 | |
332 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
333 | pthreadpool_atomic_size_t* value) |
334 | { |
335 | size_t actual_value = *value; |
336 | while (actual_value != 0) { |
337 | const size_t new_value = actual_value - 1; |
338 | const size_t expected_value = actual_value; |
339 | actual_value = __sync_val_compare_and_swap(value, expected_value, new_value); |
340 | if (actual_value == expected_value) { |
341 | return true; |
342 | } |
343 | } |
344 | return false; |
345 | } |
346 | |
347 | static inline void pthreadpool_fence_acquire() { |
348 | __sync_synchronize(); |
349 | } |
350 | |
351 | static inline void pthreadpool_fence_release() { |
352 | __sync_synchronize(); |
353 | } |
354 | #elif defined(_MSC_VER) && defined(_M_X64) |
355 | typedef volatile uint32_t pthreadpool_atomic_uint32_t; |
356 | typedef volatile size_t pthreadpool_atomic_size_t; |
357 | typedef void *volatile pthreadpool_atomic_void_p; |
358 | |
359 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
360 | pthreadpool_atomic_uint32_t* address) |
361 | { |
362 | return *address; |
363 | } |
364 | |
365 | static inline size_t pthreadpool_load_relaxed_size_t( |
366 | pthreadpool_atomic_size_t* address) |
367 | { |
368 | return *address; |
369 | } |
370 | |
371 | static inline void* pthreadpool_load_relaxed_void_p( |
372 | pthreadpool_atomic_void_p* address) |
373 | { |
374 | return *address; |
375 | } |
376 | |
377 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
378 | pthreadpool_atomic_uint32_t* address) |
379 | { |
380 | /* x86-64 loads always have acquire semantics; use only a compiler barrier */ |
381 | const uint32_t value = *address; |
382 | _ReadBarrier(); |
383 | return value; |
384 | } |
385 | |
386 | static inline size_t pthreadpool_load_acquire_size_t( |
387 | pthreadpool_atomic_size_t* address) |
388 | { |
389 | /* x86-64 loads always have acquire semantics; use only a compiler barrier */ |
390 | const size_t value = *address; |
391 | _ReadBarrier(); |
392 | return value; |
393 | } |
394 | |
395 | static inline void pthreadpool_store_relaxed_uint32_t( |
396 | pthreadpool_atomic_uint32_t* address, |
397 | uint32_t value) |
398 | { |
399 | *address = value; |
400 | } |
401 | |
402 | static inline void pthreadpool_store_relaxed_size_t( |
403 | pthreadpool_atomic_size_t* address, |
404 | size_t value) |
405 | { |
406 | *address = value; |
407 | } |
408 | |
409 | static inline void pthreadpool_store_relaxed_void_p( |
410 | pthreadpool_atomic_void_p* address, |
411 | void* value) |
412 | { |
413 | *address = value; |
414 | } |
415 | |
416 | static inline void pthreadpool_store_release_uint32_t( |
417 | pthreadpool_atomic_uint32_t* address, |
418 | uint32_t value) |
419 | { |
420 | /* x86-64 stores always have release semantics; use only a compiler barrier */ |
421 | _WriteBarrier(); |
422 | *address = value; |
423 | } |
424 | |
425 | static inline void pthreadpool_store_release_size_t( |
426 | pthreadpool_atomic_size_t* address, |
427 | size_t value) |
428 | { |
429 | /* x86-64 stores always have release semantics; use only a compiler barrier */ |
430 | _WriteBarrier(); |
431 | *address = value; |
432 | } |
433 | |
434 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
435 | pthreadpool_atomic_size_t* address) |
436 | { |
437 | return (size_t) _InterlockedDecrement64((volatile __int64*) address); |
438 | } |
439 | |
440 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
441 | pthreadpool_atomic_size_t* address) |
442 | { |
443 | return (size_t) _InterlockedDecrement64((volatile __int64*) address); |
444 | } |
445 | |
446 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
447 | pthreadpool_atomic_size_t* value) |
448 | { |
449 | size_t actual_value = *value; |
450 | while (actual_value != 0) { |
451 | const size_t new_value = actual_value - 1; |
452 | const size_t expected_value = actual_value; |
453 | actual_value = _InterlockedCompareExchange64( |
454 | (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); |
455 | if (actual_value == expected_value) { |
456 | return true; |
457 | } |
458 | } |
459 | return false; |
460 | } |
461 | |
462 | static inline void pthreadpool_fence_acquire() { |
463 | _mm_lfence(); |
464 | _ReadBarrier(); |
465 | } |
466 | |
467 | static inline void pthreadpool_fence_release() { |
468 | _WriteBarrier(); |
469 | _mm_sfence(); |
470 | } |
471 | #elif defined(_MSC_VER) && defined(_M_IX86) |
472 | typedef volatile uint32_t pthreadpool_atomic_uint32_t; |
473 | typedef volatile size_t pthreadpool_atomic_size_t; |
474 | typedef void *volatile pthreadpool_atomic_void_p; |
475 | |
476 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
477 | pthreadpool_atomic_uint32_t* address) |
478 | { |
479 | return *address; |
480 | } |
481 | |
482 | static inline size_t pthreadpool_load_relaxed_size_t( |
483 | pthreadpool_atomic_size_t* address) |
484 | { |
485 | return *address; |
486 | } |
487 | |
488 | static inline void* pthreadpool_load_relaxed_void_p( |
489 | pthreadpool_atomic_void_p* address) |
490 | { |
491 | return *address; |
492 | } |
493 | |
494 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
495 | pthreadpool_atomic_uint32_t* address) |
496 | { |
497 | /* x86 loads always have acquire semantics; use only a compiler barrier */ |
498 | const uint32_t value = *address; |
499 | _ReadBarrier(); |
500 | return value; |
501 | } |
502 | |
503 | static inline size_t pthreadpool_load_acquire_size_t( |
504 | pthreadpool_atomic_size_t* address) |
505 | { |
506 | /* x86 loads always have acquire semantics; use only a compiler barrier */ |
507 | const size_t value = *address; |
508 | _ReadBarrier(); |
509 | return value; |
510 | } |
511 | |
512 | static inline void pthreadpool_store_relaxed_uint32_t( |
513 | pthreadpool_atomic_uint32_t* address, |
514 | uint32_t value) |
515 | { |
516 | *address = value; |
517 | } |
518 | |
519 | static inline void pthreadpool_store_relaxed_size_t( |
520 | pthreadpool_atomic_size_t* address, |
521 | size_t value) |
522 | { |
523 | *address = value; |
524 | } |
525 | |
526 | static inline void pthreadpool_store_relaxed_void_p( |
527 | pthreadpool_atomic_void_p* address, |
528 | void* value) |
529 | { |
530 | *address = value; |
531 | } |
532 | |
533 | static inline void pthreadpool_store_release_uint32_t( |
534 | pthreadpool_atomic_uint32_t* address, |
535 | uint32_t value) |
536 | { |
537 | /* x86 stores always have release semantics; use only a compiler barrier */ |
538 | _WriteBarrier(); |
539 | *address = value; |
540 | } |
541 | |
542 | static inline void pthreadpool_store_release_size_t( |
543 | pthreadpool_atomic_size_t* address, |
544 | size_t value) |
545 | { |
546 | /* x86 stores always have release semantics; use only a compiler barrier */ |
547 | _WriteBarrier(); |
548 | *address = value; |
549 | } |
550 | |
551 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
552 | pthreadpool_atomic_size_t* address) |
553 | { |
554 | return (size_t) _InterlockedDecrement((volatile long*) address); |
555 | } |
556 | |
557 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
558 | pthreadpool_atomic_size_t* address) |
559 | { |
560 | return (size_t) _InterlockedDecrement((volatile long*) address); |
561 | } |
562 | |
563 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
564 | pthreadpool_atomic_size_t* value) |
565 | { |
566 | size_t actual_value = *value; |
567 | while (actual_value != 0) { |
568 | const size_t new_value = actual_value - 1; |
569 | const size_t expected_value = actual_value; |
570 | actual_value = _InterlockedCompareExchange( |
571 | (volatile long*) value, (long) new_value, (long) expected_value); |
572 | if (actual_value == expected_value) { |
573 | return true; |
574 | } |
575 | } |
576 | return false; |
577 | } |
578 | |
579 | static inline void pthreadpool_fence_acquire() { |
580 | _mm_lfence(); |
581 | } |
582 | |
583 | static inline void pthreadpool_fence_release() { |
584 | _mm_sfence(); |
585 | } |
586 | #elif defined(_MSC_VER) && defined(_M_ARM64) |
587 | typedef volatile uint32_t pthreadpool_atomic_uint32_t; |
588 | typedef volatile size_t pthreadpool_atomic_size_t; |
589 | typedef void *volatile pthreadpool_atomic_void_p; |
590 | |
591 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
592 | pthreadpool_atomic_uint32_t* address) |
593 | { |
594 | return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); |
595 | } |
596 | |
597 | static inline size_t pthreadpool_load_relaxed_size_t( |
598 | pthreadpool_atomic_size_t* address) |
599 | { |
600 | return (size_t) __iso_volatile_load64((const volatile __int64*) address); |
601 | } |
602 | |
603 | static inline void* pthreadpool_load_relaxed_void_p( |
604 | pthreadpool_atomic_void_p* address) |
605 | { |
606 | return (void*) __iso_volatile_load64((const volatile __int64*) address); |
607 | } |
608 | |
609 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
610 | pthreadpool_atomic_uint32_t* address) |
611 | { |
612 | return (uint32_t) __ldar32((volatile unsigned __int32*) address); |
613 | } |
614 | |
615 | static inline size_t pthreadpool_load_acquire_size_t( |
616 | pthreadpool_atomic_size_t* address) |
617 | { |
618 | return (size_t) __ldar64((volatile unsigned __int64*) address); |
619 | } |
620 | |
621 | static inline void pthreadpool_store_relaxed_uint32_t( |
622 | pthreadpool_atomic_uint32_t* address, |
623 | uint32_t value) |
624 | { |
625 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
626 | } |
627 | |
628 | static inline void pthreadpool_store_relaxed_size_t( |
629 | pthreadpool_atomic_size_t* address, |
630 | size_t value) |
631 | { |
632 | __iso_volatile_store64((volatile __int64*) address, (__int64) value); |
633 | } |
634 | |
635 | static inline void pthreadpool_store_relaxed_void_p( |
636 | pthreadpool_atomic_void_p* address, |
637 | void* value) |
638 | { |
639 | __iso_volatile_store64((volatile __int64*) address, (__int64) value); |
640 | } |
641 | |
642 | static inline void pthreadpool_store_release_uint32_t( |
643 | pthreadpool_atomic_uint32_t* address, |
644 | uint32_t value) |
645 | { |
646 | _WriteBarrier(); |
647 | __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value); |
648 | } |
649 | |
650 | static inline void pthreadpool_store_release_size_t( |
651 | pthreadpool_atomic_size_t* address, |
652 | size_t value) |
653 | { |
654 | _WriteBarrier(); |
655 | __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value); |
656 | } |
657 | |
658 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
659 | pthreadpool_atomic_size_t* address) |
660 | { |
661 | return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address); |
662 | } |
663 | |
664 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
665 | pthreadpool_atomic_size_t* address) |
666 | { |
667 | return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address); |
668 | } |
669 | |
670 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
671 | pthreadpool_atomic_size_t* value) |
672 | { |
673 | size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value); |
674 | while (actual_value != 0) { |
675 | const size_t new_value = actual_value - 1; |
676 | const size_t expected_value = actual_value; |
677 | actual_value = _InterlockedCompareExchange64_nf( |
678 | (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); |
679 | if (actual_value == expected_value) { |
680 | return true; |
681 | } |
682 | } |
683 | return false; |
684 | } |
685 | |
686 | static inline void pthreadpool_fence_acquire() { |
687 | __dmb(_ARM64_BARRIER_ISHLD); |
688 | _ReadBarrier(); |
689 | } |
690 | |
691 | static inline void pthreadpool_fence_release() { |
692 | _WriteBarrier(); |
693 | __dmb(_ARM64_BARRIER_ISH); |
694 | } |
695 | #elif defined(_MSC_VER) && defined(_M_ARM) |
696 | typedef volatile uint32_t pthreadpool_atomic_uint32_t; |
697 | typedef volatile size_t pthreadpool_atomic_size_t; |
698 | typedef void *volatile pthreadpool_atomic_void_p; |
699 | |
700 | static inline uint32_t pthreadpool_load_relaxed_uint32_t( |
701 | pthreadpool_atomic_uint32_t* address) |
702 | { |
703 | return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); |
704 | } |
705 | |
706 | static inline size_t pthreadpool_load_relaxed_size_t( |
707 | pthreadpool_atomic_size_t* address) |
708 | { |
709 | return (size_t) __iso_volatile_load32((const volatile __int32*) address); |
710 | } |
711 | |
712 | static inline void* pthreadpool_load_relaxed_void_p( |
713 | pthreadpool_atomic_void_p* address) |
714 | { |
715 | return (void*) __iso_volatile_load32((const volatile __int32*) address); |
716 | } |
717 | |
718 | static inline uint32_t pthreadpool_load_acquire_uint32_t( |
719 | pthreadpool_atomic_uint32_t* address) |
720 | { |
721 | const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address); |
722 | __dmb(_ARM_BARRIER_ISH); |
723 | _ReadBarrier(); |
724 | return value; |
725 | } |
726 | |
727 | static inline size_t pthreadpool_load_acquire_size_t( |
728 | pthreadpool_atomic_size_t* address) |
729 | { |
730 | const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address); |
731 | __dmb(_ARM_BARRIER_ISH); |
732 | _ReadBarrier(); |
733 | return value; |
734 | } |
735 | |
736 | static inline void pthreadpool_store_relaxed_uint32_t( |
737 | pthreadpool_atomic_uint32_t* address, |
738 | uint32_t value) |
739 | { |
740 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
741 | } |
742 | |
743 | static inline void pthreadpool_store_relaxed_size_t( |
744 | pthreadpool_atomic_size_t* address, |
745 | size_t value) |
746 | { |
747 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
748 | } |
749 | |
750 | static inline void pthreadpool_store_relaxed_void_p( |
751 | pthreadpool_atomic_void_p* address, |
752 | void* value) |
753 | { |
754 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
755 | } |
756 | |
757 | static inline void pthreadpool_store_release_uint32_t( |
758 | pthreadpool_atomic_uint32_t* address, |
759 | uint32_t value) |
760 | { |
761 | _WriteBarrier(); |
762 | __dmb(_ARM_BARRIER_ISH); |
763 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
764 | } |
765 | |
766 | static inline void pthreadpool_store_release_size_t( |
767 | pthreadpool_atomic_size_t* address, |
768 | size_t value) |
769 | { |
770 | _WriteBarrier(); |
771 | __dmb(_ARM_BARRIER_ISH); |
772 | __iso_volatile_store32((volatile __int32*) address, (__int32) value); |
773 | } |
774 | |
775 | static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( |
776 | pthreadpool_atomic_size_t* address) |
777 | { |
778 | return (size_t) _InterlockedDecrement_nf((volatile long*) address); |
779 | } |
780 | |
781 | static inline size_t pthreadpool_decrement_fetch_release_size_t( |
782 | pthreadpool_atomic_size_t* address) |
783 | { |
784 | return (size_t) _InterlockedDecrement_rel((volatile long*) address); |
785 | } |
786 | |
787 | static inline bool pthreadpool_try_decrement_relaxed_size_t( |
788 | pthreadpool_atomic_size_t* value) |
789 | { |
790 | size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value); |
791 | while (actual_value != 0) { |
792 | const size_t new_value = actual_value - 1; |
793 | const size_t expected_value = actual_value; |
794 | actual_value = _InterlockedCompareExchange_nf( |
795 | (volatile long*) value, (long) new_value, (long) expected_value); |
796 | if (actual_value == expected_value) { |
797 | return true; |
798 | } |
799 | } |
800 | return false; |
801 | } |
802 | |
803 | static inline void pthreadpool_fence_acquire() { |
804 | __dmb(_ARM_BARRIER_ISH); |
805 | _ReadBarrier(); |
806 | } |
807 | |
808 | static inline void pthreadpool_fence_release() { |
809 | _WriteBarrier(); |
810 | __dmb(_ARM_BARRIER_ISH); |
811 | } |
812 | #else |
813 | #error "Platform-specific implementation of threadpool-atomics.h required" |
814 | #endif |
815 | |
816 | #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) |
817 | static inline void pthreadpool_yield() { |
818 | _mm_pause(); |
819 | } |
820 | #elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) |
821 | static inline void pthreadpool_yield() { |
822 | __yield(); |
823 | } |
824 | #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__)) |
825 | static inline void pthreadpool_yield() { |
826 | __asm__ __volatile__("yield" ); |
827 | } |
828 | #else |
829 | static inline void pthreadpool_yield() { |
830 | pthreadpool_fence_acquire(); |
831 | } |
832 | #endif |
833 | |