1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2018-2020 Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. |
5 | -----------------------------------------------------------------------------*/ |
6 | |
7 | /* This is a stress test for the allocator, using multiple threads and |
8 | transferring objects between threads. It tries to reflect real-world workloads: |
9 | - allocation size is distributed linearly in powers of two |
10 | - with some fraction extra large (and some extra extra large) |
11 | - the allocations are initialized and read again at free |
12 | - pointers transfer between threads |
13 | - threads are terminated and recreated with some objects surviving in between |
14 | - uses deterministic "randomness", but execution can still depend on |
15 | (random) thread scheduling. Do not use this test as a benchmark! |
16 | */ |
17 | |
18 | #include <stdio.h> |
19 | #include <stdlib.h> |
20 | #include <stdint.h> |
21 | #include <stdbool.h> |
22 | #include <string.h> |
23 | |
24 | // > mimalloc-test-stress [THREADS] [SCALE] [ITER] |
25 | // |
26 | // argument defaults |
27 | static int THREADS = 32; // more repeatable if THREADS <= #processors |
28 | static int SCALE = 25; // scaling factor |
29 | static int ITER = 50; // N full iterations destructing and re-creating all threads |
30 | |
31 | // static int THREADS = 8; // more repeatable if THREADS <= #processors |
32 | // static int SCALE = 100; // scaling factor |
33 | |
34 | #define STRESS // undefine for leak test |
35 | |
36 | static bool allow_large_objects = true; // allow very large objects? |
37 | static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? |
38 | |
39 | |
40 | // #define USE_STD_MALLOC |
41 | #ifdef USE_STD_MALLOC |
42 | #define custom_calloc(n,s) malloc(n*s) |
43 | #define custom_realloc(p,s) realloc(p,s) |
44 | #define custom_free(p) free(p) |
45 | #else |
46 | #include <mimalloc.h> |
47 | #define custom_calloc(n,s) mi_malloc(n*s) |
48 | #define custom_realloc(p,s) mi_realloc(p,s) |
49 | #define custom_free(p) mi_free(p) |
50 | #endif |
51 | |
52 | // transfer pointer between threads |
53 | #define TRANSFERS (1000) |
54 | static volatile void* transfer[TRANSFERS]; |
55 | |
56 | |
57 | #if (UINTPTR_MAX != UINT32_MAX) |
58 | const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; |
59 | #else |
60 | const uintptr_t cookie = 0x1ce4e5b9UL; |
61 | #endif |
62 | |
63 | static void* atomic_exchange_ptr(volatile void** p, void* newval); |
64 | |
65 | typedef uintptr_t* random_t; |
66 | |
67 | static uintptr_t pick(random_t r) { |
68 | uintptr_t x = *r; |
69 | #if (UINTPTR_MAX > UINT32_MAX) |
70 | // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c> |
71 | x ^= x >> 30; |
72 | x *= 0xbf58476d1ce4e5b9UL; |
73 | x ^= x >> 27; |
74 | x *= 0x94d049bb133111ebUL; |
75 | x ^= x >> 31; |
76 | #else |
77 | // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/> |
78 | x ^= x >> 16; |
79 | x *= 0x7feb352dUL; |
80 | x ^= x >> 15; |
81 | x *= 0x846ca68bUL; |
82 | x ^= x >> 16; |
83 | #endif |
84 | *r = x; |
85 | return x; |
86 | } |
87 | |
88 | static bool chance(size_t perc, random_t r) { |
89 | return (pick(r) % 100 <= perc); |
90 | } |
91 | |
92 | static void* alloc_items(size_t items, random_t r) { |
93 | if (chance(1, r)) { |
94 | if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant |
95 | else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge |
96 | else items *= 100; // 1% large objects; |
97 | } |
98 | if (items == 40) items++; // pthreads uses that size for stack increases |
99 | if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); |
100 | if (items==0) items = 1; |
101 | uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); |
102 | if (p != NULL) { |
103 | for (uintptr_t i = 0; i < items; i++) { |
104 | p[i] = (items - i) ^ cookie; |
105 | } |
106 | } |
107 | return p; |
108 | } |
109 | |
110 | static void free_items(void* p) { |
111 | if (p != NULL) { |
112 | uintptr_t* q = (uintptr_t*)p; |
113 | uintptr_t items = (q[0] ^ cookie); |
114 | for (uintptr_t i = 0; i < items; i++) { |
115 | if ((q[i] ^ cookie) != items - i) { |
116 | fprintf(stderr, "memory corruption at block %p at %zu\n" , p, i); |
117 | abort(); |
118 | } |
119 | } |
120 | } |
121 | custom_free(p); |
122 | } |
123 | |
124 | |
125 | static void stress(intptr_t tid) { |
126 | //bench_start_thread(); |
127 | uintptr_t r = ((tid + 1) * 43); // rand(); |
128 | const size_t max_item_shift = 5; // 128 |
129 | const size_t max_item_retained_shift = max_item_shift + 2; |
130 | size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more |
131 | size_t retain = allocs / 2; |
132 | void** data = NULL; |
133 | size_t data_size = 0; |
134 | size_t data_top = 0; |
135 | void** retained = (void**)custom_calloc(retain,sizeof(void*)); |
136 | size_t retain_top = 0; |
137 | |
138 | while (allocs > 0 || retain > 0) { |
139 | if (retain == 0 || (chance(50, &r) && allocs > 0)) { |
140 | // 50%+ alloc |
141 | allocs--; |
142 | if (data_top >= data_size) { |
143 | data_size += 100000; |
144 | data = (void**)custom_realloc(data, data_size * sizeof(void*)); |
145 | } |
146 | data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); |
147 | } |
148 | else { |
149 | // 25% retain |
150 | retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r); |
151 | retain--; |
152 | } |
153 | if (chance(66, &r) && data_top > 0) { |
154 | // 66% free previous alloc |
155 | size_t idx = pick(&r) % data_top; |
156 | free_items(data[idx]); |
157 | data[idx] = NULL; |
158 | } |
159 | if (chance(25, &r) && data_top > 0) { |
160 | // 25% exchange a local pointer with the (shared) transfer buffer. |
161 | size_t data_idx = pick(&r) % data_top; |
162 | size_t transfer_idx = pick(&r) % TRANSFERS; |
163 | void* p = data[data_idx]; |
164 | void* q = atomic_exchange_ptr(&transfer[transfer_idx], p); |
165 | data[data_idx] = q; |
166 | } |
167 | } |
168 | // free everything that is left |
169 | for (size_t i = 0; i < retain_top; i++) { |
170 | free_items(retained[i]); |
171 | } |
172 | for (size_t i = 0; i < data_top; i++) { |
173 | free_items(data[i]); |
174 | } |
175 | custom_free(retained); |
176 | custom_free(data); |
177 | //bench_end_thread(); |
178 | } |
179 | |
180 | static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); |
181 | |
182 | static void test_stress(void) { |
183 | uintptr_t r = rand(); |
184 | for (int n = 0; n < ITER; n++) { |
185 | run_os_threads(THREADS, &stress); |
186 | for (int i = 0; i < TRANSFERS; i++) { |
187 | if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers |
188 | void* p = atomic_exchange_ptr(&transfer[i], NULL); |
189 | free_items(p); |
190 | } |
191 | } |
192 | #ifndef NDEBUG |
193 | //mi_collect(false); |
194 | //mi_debug_show_arenas(); |
195 | #endif |
196 | #if !defined(NDEBUG) || defined(MI_TSAN) |
197 | if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n" , ITER - (n + 1)); } |
198 | #endif |
199 | } |
200 | } |
201 | |
202 | #ifndef STRESS |
203 | static void leak(intptr_t tid) { |
204 | uintptr_t r = rand(); |
205 | void* p = alloc_items(1 /*pick(&r)%128*/, &r); |
206 | if (chance(50, &r)) { |
207 | intptr_t i = (pick(&r) % TRANSFERS); |
208 | void* q = atomic_exchange_ptr(&transfer[i], p); |
209 | free_items(q); |
210 | } |
211 | } |
212 | |
213 | static void test_leak(void) { |
214 | for (int n = 0; n < ITER; n++) { |
215 | run_os_threads(THREADS, &leak); |
216 | mi_collect(false); |
217 | #ifndef NDEBUG |
218 | if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n" , ITER - (n + 1)); } |
219 | #endif |
220 | } |
221 | } |
222 | #endif |
223 | |
224 | int main(int argc, char** argv) { |
225 | // > mimalloc-test-stress [THREADS] [SCALE] [ITER] |
226 | if (argc >= 2) { |
227 | char* end; |
228 | long n = strtol(argv[1], &end, 10); |
229 | if (n > 0) THREADS = n; |
230 | } |
231 | if (argc >= 3) { |
232 | char* end; |
233 | long n = (strtol(argv[2], &end, 10)); |
234 | if (n > 0) SCALE = n; |
235 | } |
236 | if (argc >= 4) { |
237 | char* end; |
238 | long n = (strtol(argv[3], &end, 10)); |
239 | if (n > 0) ITER = n; |
240 | } |
241 | printf("Using %d threads with a %d%% load-per-thread and %d iterations\n" , THREADS, SCALE, ITER); |
242 | //mi_reserve_os_memory(1024*1024*1024ULL, false, true); |
243 | //int res = mi_reserve_huge_os_pages(4,1); |
244 | //printf("(reserve huge: %i\n)", res); |
245 | |
246 | //bench_start_program(); |
247 | |
248 | // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. |
249 | srand(0x7feb352d); |
250 | |
251 | //mi_reserve_os_memory(512ULL << 20, true, true); |
252 | |
253 | #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) |
254 | mi_stats_reset(); |
255 | #endif |
256 | |
257 | #ifdef STRESS |
258 | test_stress(); |
259 | #else |
260 | test_leak(); |
261 | #endif |
262 | |
263 | #ifndef USE_STD_MALLOC |
264 | #ifndef NDEBUG |
265 | mi_collect(true); |
266 | //mi_debug_show_arenas(); |
267 | #endif |
268 | mi_stats_print(NULL); |
269 | #endif |
270 | //bench_end_program(); |
271 | return 0; |
272 | } |
273 | |
274 | |
275 | static void (*thread_entry_fun)(intptr_t) = &stress; |
276 | |
277 | #ifdef _WIN32 |
278 | |
279 | #include <Windows.h> |
280 | |
281 | static DWORD WINAPI thread_entry(LPVOID param) { |
282 | thread_entry_fun((intptr_t)param); |
283 | return 0; |
284 | } |
285 | |
286 | static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { |
287 | thread_entry_fun = fun; |
288 | DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); |
289 | HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); |
290 | for (uintptr_t i = 0; i < nthreads; i++) { |
291 | thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]); |
292 | } |
293 | for (size_t i = 0; i < nthreads; i++) { |
294 | WaitForSingleObject(thandles[i], INFINITE); |
295 | } |
296 | for (size_t i = 0; i < nthreads; i++) { |
297 | CloseHandle(thandles[i]); |
298 | } |
299 | custom_free(tids); |
300 | custom_free(thandles); |
301 | } |
302 | |
303 | static void* atomic_exchange_ptr(volatile void** p, void* newval) { |
304 | #if (INTPTR_MAX == INT32_MAX) |
305 | return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); |
306 | #else |
307 | return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); |
308 | #endif |
309 | } |
310 | #else |
311 | |
312 | #include <pthread.h> |
313 | |
314 | static void* thread_entry(void* param) { |
315 | thread_entry_fun((uintptr_t)param); |
316 | return NULL; |
317 | } |
318 | |
319 | static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { |
320 | thread_entry_fun = fun; |
321 | pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); |
322 | memset(threads, 0, sizeof(pthread_t) * nthreads); |
323 | //pthread_setconcurrency(nthreads); |
324 | for (size_t i = 0; i < nthreads; i++) { |
325 | pthread_create(&threads[i], NULL, &thread_entry, (void*)i); |
326 | } |
327 | for (size_t i = 0; i < nthreads; i++) { |
328 | pthread_join(threads[i], NULL); |
329 | } |
330 | custom_free(threads); |
331 | } |
332 | |
333 | #ifdef __cplusplus |
334 | #include <atomic> |
335 | static void* atomic_exchange_ptr(volatile void** p, void* newval) { |
336 | return std::atomic_exchange((volatile std::atomic<void*>*)p, newval); |
337 | } |
338 | #else |
339 | #include <stdatomic.h> |
340 | static void* atomic_exchange_ptr(volatile void** p, void* newval) { |
341 | return atomic_exchange((volatile _Atomic(void*)*)p, newval); |
342 | } |
343 | #endif |
344 | |
345 | #endif |
346 | |