1#ifndef Py_ATOMIC_H
2#define Py_ATOMIC_H
3#ifdef __cplusplus
4extern "C" {
5#endif
6
7#ifndef Py_BUILD_CORE
8# error "this header requires Py_BUILD_CORE define"
9#endif
10
11#include "dynamic_annotations.h" /* _Py_ANNOTATE_MEMORY_ORDER */
12#include "pyconfig.h"
13
14#ifdef HAVE_STD_ATOMIC
15# include <stdatomic.h>
16#endif
17
18
19#if defined(_MSC_VER)
20#include <intrin.h>
21#if defined(_M_IX86) || defined(_M_X64)
22# include <immintrin.h>
23#endif
24#endif
25
26/* This is modeled after the atomics interface from C1x, according to
27 * the draft at
28 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29 * Operations and types are named the same except with a _Py_ prefix
30 * and have the same semantics.
31 *
32 * Beware, the implementations here are deep magic.
33 */
34
35#if defined(HAVE_STD_ATOMIC)
36
37typedef enum _Py_memory_order {
38 _Py_memory_order_relaxed = memory_order_relaxed,
39 _Py_memory_order_acquire = memory_order_acquire,
40 _Py_memory_order_release = memory_order_release,
41 _Py_memory_order_acq_rel = memory_order_acq_rel,
42 _Py_memory_order_seq_cst = memory_order_seq_cst
43} _Py_memory_order;
44
45typedef struct _Py_atomic_address {
46 atomic_uintptr_t _value;
47} _Py_atomic_address;
48
49typedef struct _Py_atomic_int {
50 atomic_int _value;
51} _Py_atomic_int;
52
53#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54 atomic_signal_fence(ORDER)
55
56#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57 atomic_thread_fence(ORDER)
58
59#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60 atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61
62#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63 atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64
65// Use builtin atomic operations in GCC >= 4.7 and clang
66#elif defined(HAVE_BUILTIN_ATOMIC)
67
68typedef enum _Py_memory_order {
69 _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70 _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71 _Py_memory_order_release = __ATOMIC_RELEASE,
72 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74} _Py_memory_order;
75
76typedef struct _Py_atomic_address {
77 uintptr_t _value;
78} _Py_atomic_address;
79
80typedef struct _Py_atomic_int {
81 int _value;
82} _Py_atomic_int;
83
84#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85 __atomic_signal_fence(ORDER)
86
87#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88 __atomic_thread_fence(ORDER)
89
90#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91 (assert((ORDER) == __ATOMIC_RELAXED \
92 || (ORDER) == __ATOMIC_SEQ_CST \
93 || (ORDER) == __ATOMIC_RELEASE), \
94 __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95
96#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
97 (assert((ORDER) == __ATOMIC_RELAXED \
98 || (ORDER) == __ATOMIC_SEQ_CST \
99 || (ORDER) == __ATOMIC_ACQUIRE \
100 || (ORDER) == __ATOMIC_CONSUME), \
101 __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102
103/* Only support GCC (for expression statements) and x86 (for simple
104 * atomic semantics) and MSVC x86/x64/ARM */
105#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106typedef enum _Py_memory_order {
107 _Py_memory_order_relaxed,
108 _Py_memory_order_acquire,
109 _Py_memory_order_release,
110 _Py_memory_order_acq_rel,
111 _Py_memory_order_seq_cst
112} _Py_memory_order;
113
114typedef struct _Py_atomic_address {
115 uintptr_t _value;
116} _Py_atomic_address;
117
118typedef struct _Py_atomic_int {
119 int _value;
120} _Py_atomic_int;
121
122
123static __inline__ void
124_Py_atomic_signal_fence(_Py_memory_order order)
125{
126 if (order != _Py_memory_order_relaxed)
127 __asm__ volatile("":::"memory");
128}
129
130static __inline__ void
131_Py_atomic_thread_fence(_Py_memory_order order)
132{
133 if (order != _Py_memory_order_relaxed)
134 __asm__ volatile("mfence":::"memory");
135}
136
137/* Tell the race checker about this operation's effects. */
138static __inline__ void
139_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140{
141 (void)address; /* shut up -Wunused-parameter */
142 switch(order) {
143 case _Py_memory_order_release:
144 case _Py_memory_order_acq_rel:
145 case _Py_memory_order_seq_cst:
146 _Py_ANNOTATE_HAPPENS_BEFORE(address);
147 break;
148 case _Py_memory_order_relaxed:
149 case _Py_memory_order_acquire:
150 break;
151 }
152 switch(order) {
153 case _Py_memory_order_acquire:
154 case _Py_memory_order_acq_rel:
155 case _Py_memory_order_seq_cst:
156 _Py_ANNOTATE_HAPPENS_AFTER(address);
157 break;
158 case _Py_memory_order_relaxed:
159 case _Py_memory_order_release:
160 break;
161 }
162}
163
164#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165 __extension__ ({ \
166 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167 __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169 _Py_memory_order order = ORDER; \
170 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171 \
172 /* Perform the operation. */ \
173 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174 switch(order) { \
175 case _Py_memory_order_release: \
176 _Py_atomic_signal_fence(_Py_memory_order_release); \
177 /* fallthrough */ \
178 case _Py_memory_order_relaxed: \
179 *volatile_data = new_val; \
180 break; \
181 \
182 case _Py_memory_order_acquire: \
183 case _Py_memory_order_acq_rel: \
184 case _Py_memory_order_seq_cst: \
185 __asm__ volatile("xchg %0, %1" \
186 : "+r"(new_val) \
187 : "m"(atomic_val->_value) \
188 : "memory"); \
189 break; \
190 } \
191 _Py_ANNOTATE_IGNORE_WRITES_END(); \
192 })
193
194#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195 __extension__ ({ \
196 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197 __typeof__(atomic_val->_value) result; \
198 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199 _Py_memory_order order = ORDER; \
200 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201 \
202 /* Perform the operation. */ \
203 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204 switch(order) { \
205 case _Py_memory_order_release: \
206 case _Py_memory_order_acq_rel: \
207 case _Py_memory_order_seq_cst: \
208 /* Loads on x86 are not releases by default, so need a */ \
209 /* thread fence. */ \
210 _Py_atomic_thread_fence(_Py_memory_order_release); \
211 break; \
212 default: \
213 /* No fence */ \
214 break; \
215 } \
216 result = *volatile_data; \
217 switch(order) { \
218 case _Py_memory_order_acquire: \
219 case _Py_memory_order_acq_rel: \
220 case _Py_memory_order_seq_cst: \
221 /* Loads on x86 are automatically acquire operations so */ \
222 /* can get by with just a compiler fence. */ \
223 _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224 break; \
225 default: \
226 /* No fence */ \
227 break; \
228 } \
229 _Py_ANNOTATE_IGNORE_READS_END(); \
230 result; \
231 })
232
233#elif defined(_MSC_VER)
234/* _Interlocked* functions provide a full memory barrier and are therefore
235 enough for acq_rel and seq_cst. If the HLE variants aren't available
236 in hardware they will fall back to a full memory barrier as well.
237
238 This might affect performance but likely only in some very specific and
239 hard to meassure scenario.
240*/
241#if defined(_M_IX86) || defined(_M_X64)
242typedef enum _Py_memory_order {
243 _Py_memory_order_relaxed,
244 _Py_memory_order_acquire,
245 _Py_memory_order_release,
246 _Py_memory_order_acq_rel,
247 _Py_memory_order_seq_cst
248} _Py_memory_order;
249
250typedef struct _Py_atomic_address {
251 volatile uintptr_t _value;
252} _Py_atomic_address;
253
254typedef struct _Py_atomic_int {
255 volatile int _value;
256} _Py_atomic_int;
257
258
259#if defined(_M_X64)
260#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261 switch (ORDER) { \
262 case _Py_memory_order_acquire: \
263 _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264 break; \
265 case _Py_memory_order_release: \
266 _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267 break; \
268 default: \
269 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270 break; \
271 }
272#else
273#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274#endif
275
276#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277 switch (ORDER) { \
278 case _Py_memory_order_acquire: \
279 _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280 break; \
281 case _Py_memory_order_release: \
282 _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283 break; \
284 default: \
285 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286 break; \
287 }
288
289#if defined(_M_X64)
290/* This has to be an intptr_t for now.
291 gil_created() uses -1 as a sentinel value, if this returns
292 a uintptr_t it will do an unsigned compare and crash
293*/
294inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295 __int64 old;
296 switch (order) {
297 case _Py_memory_order_acquire:
298 {
299 do {
300 old = *value;
301 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302 break;
303 }
304 case _Py_memory_order_release:
305 {
306 do {
307 old = *value;
308 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309 break;
310 }
311 case _Py_memory_order_relaxed:
312 old = *value;
313 break;
314 default:
315 {
316 do {
317 old = *value;
318 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319 break;
320 }
321 }
322 return old;
323}
324
325#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327
328#else
329#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330#endif
331
332inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333 long old;
334 switch (order) {
335 case _Py_memory_order_acquire:
336 {
337 do {
338 old = *value;
339 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340 break;
341 }
342 case _Py_memory_order_release:
343 {
344 do {
345 old = *value;
346 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347 break;
348 }
349 case _Py_memory_order_relaxed:
350 old = *value;
351 break;
352 default:
353 {
354 do {
355 old = *value;
356 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357 break;
358 }
359 }
360 return old;
361}
362
363#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365
366#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367 if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368 _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369 _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370
371#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372 ( \
373 sizeof((ATOMIC_VAL)->_value) == 8 ? \
374 _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375 _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376 )
377#elif defined(_M_ARM) || defined(_M_ARM64)
378typedef enum _Py_memory_order {
379 _Py_memory_order_relaxed,
380 _Py_memory_order_acquire,
381 _Py_memory_order_release,
382 _Py_memory_order_acq_rel,
383 _Py_memory_order_seq_cst
384} _Py_memory_order;
385
386typedef struct _Py_atomic_address {
387 volatile uintptr_t _value;
388} _Py_atomic_address;
389
390typedef struct _Py_atomic_int {
391 volatile int _value;
392} _Py_atomic_int;
393
394
395#if defined(_M_ARM64)
396#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397 switch (ORDER) { \
398 case _Py_memory_order_acquire: \
399 _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400 break; \
401 case _Py_memory_order_release: \
402 _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403 break; \
404 default: \
405 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406 break; \
407 }
408#else
409#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410#endif
411
412#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413 switch (ORDER) { \
414 case _Py_memory_order_acquire: \
415 _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416 break; \
417 case _Py_memory_order_release: \
418 _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419 break; \
420 default: \
421 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422 break; \
423 }
424
425#if defined(_M_ARM64)
426/* This has to be an intptr_t for now.
427 gil_created() uses -1 as a sentinel value, if this returns
428 a uintptr_t it will do an unsigned compare and crash
429*/
430inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431 uintptr_t old;
432 switch (order) {
433 case _Py_memory_order_acquire:
434 {
435 do {
436 old = *value;
437 } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438 break;
439 }
440 case _Py_memory_order_release:
441 {
442 do {
443 old = *value;
444 } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445 break;
446 }
447 case _Py_memory_order_relaxed:
448 old = *value;
449 break;
450 default:
451 {
452 do {
453 old = *value;
454 } while(_InterlockedCompareExchange64(value, old, old) != old);
455 break;
456 }
457 }
458 return old;
459}
460
461#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463
464#else
465#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466#endif
467
468inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469 int old;
470 switch (order) {
471 case _Py_memory_order_acquire:
472 {
473 do {
474 old = *value;
475 } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476 break;
477 }
478 case _Py_memory_order_release:
479 {
480 do {
481 old = *value;
482 } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483 break;
484 }
485 case _Py_memory_order_relaxed:
486 old = *value;
487 break;
488 default:
489 {
490 do {
491 old = *value;
492 } while(_InterlockedCompareExchange(value, old, old) != old);
493 break;
494 }
495 }
496 return old;
497}
498
499#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501
502#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503 if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504 _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505 _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506
507#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508 ( \
509 sizeof((ATOMIC_VAL)->_value) == 8 ? \
510 _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511 _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512 )
513#endif
514#else /* !gcc x86 !_msc_ver */
515typedef enum _Py_memory_order {
516 _Py_memory_order_relaxed,
517 _Py_memory_order_acquire,
518 _Py_memory_order_release,
519 _Py_memory_order_acq_rel,
520 _Py_memory_order_seq_cst
521} _Py_memory_order;
522
523typedef struct _Py_atomic_address {
524 uintptr_t _value;
525} _Py_atomic_address;
526
527typedef struct _Py_atomic_int {
528 int _value;
529} _Py_atomic_int;
530/* Fall back to other compilers and processors by assuming that simple
531 volatile accesses are atomic. This is false, so people should port
532 this. */
533#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536 ((ATOMIC_VAL)->_value = NEW_VAL)
537#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538 ((ATOMIC_VAL)->_value)
539#endif
540
541/* Standardized shortcuts. */
542#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544#define _Py_atomic_load(ATOMIC_VAL) \
545 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546
547/* Python-local extensions */
548
549#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553
554#ifdef __cplusplus
555}
556#endif
557#endif /* Py_ATOMIC_H */
558