1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2018-2021 Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. A copy of the license can be found in the file |
5 | "LICENSE" at the root of this distribution. |
6 | -----------------------------------------------------------------------------*/ |
7 | #pragma once |
8 | #ifndef MIMALLOC_ATOMIC_H |
9 | #define MIMALLOC_ATOMIC_H |
10 | |
11 | // -------------------------------------------------------------------------------------------- |
12 | // Atomics |
13 | // We need to be portable between C, C++, and MSVC. |
14 | // We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. |
15 | // This is why we try to use only `uintptr_t` and `<type>*` as atomic types. |
16 | // To gain better insight in the range of used atomics, we use explicitly named memory order operations |
17 | // instead of passing the memory order as a parameter. |
18 | // ----------------------------------------------------------------------------------------------- |
19 | |
20 | #if defined(__cplusplus) |
21 | // Use C++ atomics |
22 | #include <atomic> |
23 | #define _Atomic(tp) std::atomic<tp> |
24 | #define mi_atomic(name) std::atomic_##name |
25 | #define mi_memory_order(name) std::memory_order_##name |
26 | #if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 |
27 | #define MI_ATOMIC_VAR_INIT(x) x |
28 | #else |
29 | #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) |
30 | #endif |
31 | #elif defined(_MSC_VER) |
32 | // Use MSVC C wrapper for C11 atomics |
33 | #define _Atomic(tp) tp |
34 | #define MI_ATOMIC_VAR_INIT(x) x |
35 | #define mi_atomic(name) mi_atomic_##name |
36 | #define mi_memory_order(name) mi_memory_order_##name |
37 | #else |
38 | // Use C11 atomics |
39 | #include <stdatomic.h> |
40 | #define mi_atomic(name) atomic_##name |
41 | #define mi_memory_order(name) memory_order_##name |
42 | #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) |
43 | #endif |
44 | |
45 | // Various defines for all used memory orders in mimalloc |
46 | #define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ |
47 | mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) |
48 | |
49 | #define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ |
50 | mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) |
51 | |
52 | #define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) |
53 | #define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) |
54 | #define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) |
55 | #define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) |
56 | #define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) |
57 | #define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) |
58 | #define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) |
59 | #define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) |
60 | #define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) |
61 | #define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) |
62 | |
63 | #define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) |
64 | #define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) |
65 | #define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) |
66 | #define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) |
67 | #define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) |
68 | #define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) |
69 | |
70 | #define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) |
71 | #define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) |
72 | #define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) |
73 | #define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) |
74 | |
75 | static inline void mi_atomic_yield(void); |
76 | static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); |
77 | static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); |
78 | |
79 | |
80 | #if defined(__cplusplus) || !defined(_MSC_VER) |
81 | |
82 | // In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) |
83 | // We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well |
84 | #define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) |
85 | #define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) |
86 | |
87 | // In C++ we need to add casts to help resolve templates if NULL is passed |
88 | #if defined(__cplusplus) |
89 | #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) |
90 | #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) |
91 | #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) |
92 | #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) |
93 | #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) |
94 | #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) |
95 | #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) |
96 | #else |
97 | #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) |
98 | #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) |
99 | #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) |
100 | #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) |
101 | #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) |
102 | #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) |
103 | #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) |
104 | #endif |
105 | |
106 | // These are used by the statistics |
107 | static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { |
108 | return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); |
109 | } |
110 | static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { |
111 | int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); |
112 | while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; |
113 | } |
114 | |
115 | // Used by timers |
116 | #define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) |
117 | #define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) |
118 | #define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) |
119 | #define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) |
120 | |
121 | |
122 | |
123 | #elif defined(_MSC_VER) |
124 | |
125 | // MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. |
126 | #define WIN32_LEAN_AND_MEAN |
127 | #include <windows.h> |
128 | #include <intrin.h> |
129 | #ifdef _WIN64 |
130 | typedef LONG64 msc_intptr_t; |
131 | #define MI_64(f) f##64 |
132 | #else |
133 | typedef LONG msc_intptr_t; |
134 | #define MI_64(f) f |
135 | #endif |
136 | |
137 | typedef enum mi_memory_order_e { |
138 | mi_memory_order_relaxed, |
139 | mi_memory_order_consume, |
140 | mi_memory_order_acquire, |
141 | mi_memory_order_release, |
142 | mi_memory_order_acq_rel, |
143 | mi_memory_order_seq_cst |
144 | } mi_memory_order; |
145 | |
146 | static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { |
147 | (void)(mo); |
148 | return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); |
149 | } |
150 | static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { |
151 | (void)(mo); |
152 | return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); |
153 | } |
154 | static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
155 | (void)(mo); |
156 | return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); |
157 | } |
158 | static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
159 | (void)(mo); |
160 | return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); |
161 | } |
162 | static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { |
163 | (void)(mo1); (void)(mo2); |
164 | uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); |
165 | if (read == *expected) { |
166 | return true; |
167 | } |
168 | else { |
169 | *expected = read; |
170 | return false; |
171 | } |
172 | } |
173 | static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { |
174 | return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); |
175 | } |
176 | static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { |
177 | (void)(mo); |
178 | return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); |
179 | } |
180 | static inline void mi_atomic_thread_fence(mi_memory_order mo) { |
181 | (void)(mo); |
182 | _Atomic(uintptr_t) x = 0; |
183 | mi_atomic_exchange_explicit(&x, 1, mo); |
184 | } |
185 | static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { |
186 | (void)(mo); |
187 | #if defined(_M_IX86) || defined(_M_X64) |
188 | return *p; |
189 | #else |
190 | uintptr_t x = *p; |
191 | if (mo > mi_memory_order_relaxed) { |
192 | while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; |
193 | } |
194 | return x; |
195 | #endif |
196 | } |
197 | static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
198 | (void)(mo); |
199 | #if defined(_M_IX86) || defined(_M_X64) |
200 | *p = x; |
201 | #else |
202 | mi_atomic_exchange_explicit(p, x, mo); |
203 | #endif |
204 | } |
205 | static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { |
206 | (void)(mo); |
207 | #if defined(_M_X64) |
208 | return *p; |
209 | #else |
210 | int64_t old = *p; |
211 | int64_t x = old; |
212 | while ((old = InterlockedCompareExchange64(p, x, old)) != x) { |
213 | x = old; |
214 | } |
215 | return x; |
216 | #endif |
217 | } |
218 | static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { |
219 | (void)(mo); |
220 | #if defined(x_M_IX86) || defined(_M_X64) |
221 | *p = x; |
222 | #else |
223 | InterlockedExchange64(p, x); |
224 | #endif |
225 | } |
226 | |
227 | // These are used by the statistics |
228 | static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { |
229 | #ifdef _WIN64 |
230 | return (int64_t)mi_atomic_addi((int64_t*)p, add); |
231 | #else |
232 | int64_t current; |
233 | int64_t sum; |
234 | do { |
235 | current = *p; |
236 | sum = current + add; |
237 | } while (_InterlockedCompareExchange64(p, sum, current) != current); |
238 | return current; |
239 | #endif |
240 | } |
241 | static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { |
242 | int64_t current; |
243 | do { |
244 | current = *p; |
245 | } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); |
246 | } |
247 | |
248 | // The pointer macros cast to `uintptr_t`. |
249 | #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) |
250 | #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) |
251 | #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) |
252 | #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) |
253 | #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
254 | #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
255 | #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
256 | #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) |
257 | #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) |
258 | |
259 | #define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) |
260 | #define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) |
261 | #define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) |
262 | #define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) |
263 | |
264 | |
265 | #endif |
266 | |
267 | |
268 | // Atomically add a signed value; returns the previous value. |
269 | static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { |
270 | return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); |
271 | } |
272 | |
273 | // Atomically subtract a signed value; returns the previous value. |
274 | static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { |
275 | return (intptr_t)mi_atomic_addi(p, -sub); |
276 | } |
277 | |
278 | // Yield |
279 | #if defined(__cplusplus) |
280 | #include <thread> |
281 | static inline void mi_atomic_yield(void) { |
282 | std::this_thread::yield(); |
283 | } |
284 | #elif defined(_WIN32) |
285 | #define WIN32_LEAN_AND_MEAN |
286 | #include <windows.h> |
287 | static inline void mi_atomic_yield(void) { |
288 | YieldProcessor(); |
289 | } |
290 | #elif defined(__SSE2__) |
291 | #include <emmintrin.h> |
292 | static inline void mi_atomic_yield(void) { |
293 | _mm_pause(); |
294 | } |
295 | #elif (defined(__GNUC__) || defined(__clang__)) && \ |
296 | (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ |
297 | defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) |
298 | #if defined(__x86_64__) || defined(__i386__) |
299 | static inline void mi_atomic_yield(void) { |
300 | __asm__ volatile ("pause" ::: "memory" ); |
301 | } |
302 | #elif defined(__aarch64__) |
303 | static inline void mi_atomic_yield(void) { |
304 | __asm__ volatile("wfe" ); |
305 | } |
306 | #elif (defined(__arm__) && __ARM_ARCH__ >= 7) |
307 | static inline void mi_atomic_yield(void) { |
308 | __asm__ volatile("yield" ::: "memory" ); |
309 | } |
310 | #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) |
311 | static inline void mi_atomic_yield(void) { |
312 | __asm__ __volatile__ ("or 27,27,27" ::: "memory" ); |
313 | } |
314 | #elif defined(__armel__) || defined(__ARMEL__) |
315 | static inline void mi_atomic_yield(void) { |
316 | __asm__ volatile ("nop" ::: "memory" ); |
317 | } |
318 | #endif |
319 | #elif defined(__sun) |
320 | // Fallback for other archs |
321 | #include <synch.h> |
322 | static inline void mi_atomic_yield(void) { |
323 | smt_pause(); |
324 | } |
325 | #elif defined(__wasi__) |
326 | #include <sched.h> |
327 | static inline void mi_atomic_yield(void) { |
328 | sched_yield(); |
329 | } |
330 | #else |
331 | #include <unistd.h> |
332 | static inline void mi_atomic_yield(void) { |
333 | sleep(0); |
334 | } |
335 | #endif |
336 | |
337 | |
338 | #endif // __MIMALLOC_ATOMIC_H |
339 | |