1/* ----------------------------------------------------------------------------
2Copyright (c) 2018-2021 Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license. A copy of the license can be found in the file
5"LICENSE" at the root of this distribution.
6-----------------------------------------------------------------------------*/
7#pragma once
8#ifndef MIMALLOC_ATOMIC_H
9#define MIMALLOC_ATOMIC_H
10
11// --------------------------------------------------------------------------------------------
12// Atomics
13// We need to be portable between C, C++, and MSVC.
14// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
15// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
16// To gain better insight in the range of used atomics, we use explicitly named memory order operations
17// instead of passing the memory order as a parameter.
18// -----------------------------------------------------------------------------------------------
19
20#if defined(__cplusplus)
21// Use C++ atomics
22#include <atomic>
23#define _Atomic(tp) std::atomic<tp>
24#define mi_atomic(name) std::atomic_##name
25#define mi_memory_order(name) std::memory_order_##name
26#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
27 #define MI_ATOMIC_VAR_INIT(x) x
28#else
29 #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
30#endif
31#elif defined(_MSC_VER)
32// Use MSVC C wrapper for C11 atomics
33#define _Atomic(tp) tp
34#define MI_ATOMIC_VAR_INIT(x) x
35#define mi_atomic(name) mi_atomic_##name
36#define mi_memory_order(name) mi_memory_order_##name
37#else
38// Use C11 atomics
39#include <stdatomic.h>
40#define mi_atomic(name) atomic_##name
41#define mi_memory_order(name) memory_order_##name
42#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
43#endif
44
45// Various defines for all used memory orders in mimalloc
46#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
47 mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
48
49#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
50 mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
51
52#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
53#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
54#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
55#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
56#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
57#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
58#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
59#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
60#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
61#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
62
63#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
64#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
65#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
66#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
67#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
68#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
69
70#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
71#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1)
72#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1)
73#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1)
74
75static inline void mi_atomic_yield(void);
76static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add);
77static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
78
79
80#if defined(__cplusplus) || !defined(_MSC_VER)
81
82// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value)
83// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
84#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
85#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
86
87// In C++ we need to add casts to help resolve templates if NULL is passed
88#if defined(__cplusplus)
89#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x)
90#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x)
91#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
92#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
93#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
94#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
95#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
96#else
97#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
98#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
99#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
100#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
101#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
102#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
103#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
104#endif
105
106// These are used by the statistics
107static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
108 return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
109}
110static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
111 int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
112 while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
113}
114
115// Used by timers
116#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
117#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
118#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
119#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
120
121
122
123#elif defined(_MSC_VER)
124
125// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
126#define WIN32_LEAN_AND_MEAN
127#include <windows.h>
128#include <intrin.h>
129#ifdef _WIN64
130typedef LONG64 msc_intptr_t;
131#define MI_64(f) f##64
132#else
133typedef LONG msc_intptr_t;
134#define MI_64(f) f
135#endif
136
137typedef enum mi_memory_order_e {
138 mi_memory_order_relaxed,
139 mi_memory_order_consume,
140 mi_memory_order_acquire,
141 mi_memory_order_release,
142 mi_memory_order_acq_rel,
143 mi_memory_order_seq_cst
144} mi_memory_order;
145
146static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
147 (void)(mo);
148 return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
149}
150static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
151 (void)(mo);
152 return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
153}
154static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
155 (void)(mo);
156 return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
157}
158static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
159 (void)(mo);
160 return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
161}
162static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
163 (void)(mo1); (void)(mo2);
164 uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
165 if (read == *expected) {
166 return true;
167 }
168 else {
169 *expected = read;
170 return false;
171 }
172}
173static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
174 return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
175}
176static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
177 (void)(mo);
178 return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
179}
180static inline void mi_atomic_thread_fence(mi_memory_order mo) {
181 (void)(mo);
182 _Atomic(uintptr_t) x = 0;
183 mi_atomic_exchange_explicit(&x, 1, mo);
184}
185static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
186 (void)(mo);
187#if defined(_M_IX86) || defined(_M_X64)
188 return *p;
189#else
190 uintptr_t x = *p;
191 if (mo > mi_memory_order_relaxed) {
192 while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
193 }
194 return x;
195#endif
196}
197static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
198 (void)(mo);
199#if defined(_M_IX86) || defined(_M_X64)
200 *p = x;
201#else
202 mi_atomic_exchange_explicit(p, x, mo);
203#endif
204}
205static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
206 (void)(mo);
207#if defined(_M_X64)
208 return *p;
209#else
210 int64_t old = *p;
211 int64_t x = old;
212 while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
213 x = old;
214 }
215 return x;
216#endif
217}
218static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
219 (void)(mo);
220#if defined(x_M_IX86) || defined(_M_X64)
221 *p = x;
222#else
223 InterlockedExchange64(p, x);
224#endif
225}
226
227// These are used by the statistics
228static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
229#ifdef _WIN64
230 return (int64_t)mi_atomic_addi((int64_t*)p, add);
231#else
232 int64_t current;
233 int64_t sum;
234 do {
235 current = *p;
236 sum = current + add;
237 } while (_InterlockedCompareExchange64(p, sum, current) != current);
238 return current;
239#endif
240}
241static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
242 int64_t current;
243 do {
244 current = *p;
245 } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
246}
247
248// The pointer macros cast to `uintptr_t`.
249#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
250#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
251#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
252#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
253#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
254#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
255#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
256#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
257#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
258
259#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
260#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
261#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
262#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
263
264
265#endif
266
267
268// Atomically add a signed value; returns the previous value.
269static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
270 return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
271}
272
273// Atomically subtract a signed value; returns the previous value.
274static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
275 return (intptr_t)mi_atomic_addi(p, -sub);
276}
277
278// Yield
279#if defined(__cplusplus)
280#include <thread>
281static inline void mi_atomic_yield(void) {
282 std::this_thread::yield();
283}
284#elif defined(_WIN32)
285#define WIN32_LEAN_AND_MEAN
286#include <windows.h>
287static inline void mi_atomic_yield(void) {
288 YieldProcessor();
289}
290#elif defined(__SSE2__)
291#include <emmintrin.h>
292static inline void mi_atomic_yield(void) {
293 _mm_pause();
294}
295#elif (defined(__GNUC__) || defined(__clang__)) && \
296 (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
297 defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
298#if defined(__x86_64__) || defined(__i386__)
299static inline void mi_atomic_yield(void) {
300 __asm__ volatile ("pause" ::: "memory");
301}
302#elif defined(__aarch64__)
303static inline void mi_atomic_yield(void) {
304 __asm__ volatile("wfe");
305}
306#elif (defined(__arm__) && __ARM_ARCH__ >= 7)
307static inline void mi_atomic_yield(void) {
308 __asm__ volatile("yield" ::: "memory");
309}
310#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
311static inline void mi_atomic_yield(void) {
312 __asm__ __volatile__ ("or 27,27,27" ::: "memory");
313}
314#elif defined(__armel__) || defined(__ARMEL__)
315static inline void mi_atomic_yield(void) {
316 __asm__ volatile ("nop" ::: "memory");
317}
318#endif
319#elif defined(__sun)
320// Fallback for other archs
321#include <synch.h>
322static inline void mi_atomic_yield(void) {
323 smt_pause();
324}
325#elif defined(__wasi__)
326#include <sched.h>
327static inline void mi_atomic_yield(void) {
328 sched_yield();
329}
330#else
331#include <unistd.h>
332static inline void mi_atomic_yield(void) {
333 sleep(0);
334}
335#endif
336
337
338#endif // __MIMALLOC_ATOMIC_H
339