1/* ----------------------------------------------------------------------------
2Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license.
5-----------------------------------------------------------------------------*/
6
7/* This is a stress test for the allocator, using multiple threads and
8 transferring objects between threads. It tries to reflect real-world workloads:
9 - allocation size is distributed linearly in powers of two
10 - with some fraction extra large (and some extra extra large)
11 - the allocations are initialized and read again at free
12 - pointers transfer between threads
13 - threads are terminated and recreated with some objects surviving in between
14 - uses deterministic "randomness", but execution can still depend on
15 (random) thread scheduling. Do not use this test as a benchmark!
16*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include <stdbool.h>
22#include <string.h>
23
24// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
25//
26// argument defaults
27static int THREADS = 32; // more repeatable if THREADS <= #processors
28static int SCALE = 25; // scaling factor
29static int ITER = 50; // N full iterations destructing and re-creating all threads
30
31// static int THREADS = 8; // more repeatable if THREADS <= #processors
32// static int SCALE = 100; // scaling factor
33
34#define STRESS // undefine for leak test
35
36static bool allow_large_objects = true; // allow very large objects?
37static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
38
39
40// #define USE_STD_MALLOC
41#ifdef USE_STD_MALLOC
42#define custom_calloc(n,s) malloc(n*s)
43#define custom_realloc(p,s) realloc(p,s)
44#define custom_free(p) free(p)
45#else
46#include <mimalloc.h>
47#define custom_calloc(n,s) mi_malloc(n*s)
48#define custom_realloc(p,s) mi_realloc(p,s)
49#define custom_free(p) mi_free(p)
50#endif
51
52// transfer pointer between threads
53#define TRANSFERS (1000)
54static volatile void* transfer[TRANSFERS];
55
56
57#if (UINTPTR_MAX != UINT32_MAX)
58const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
59#else
60const uintptr_t cookie = 0x1ce4e5b9UL;
61#endif
62
63static void* atomic_exchange_ptr(volatile void** p, void* newval);
64
65typedef uintptr_t* random_t;
66
67static uintptr_t pick(random_t r) {
68 uintptr_t x = *r;
69#if (UINTPTR_MAX > UINT32_MAX)
70 // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
71 x ^= x >> 30;
72 x *= 0xbf58476d1ce4e5b9UL;
73 x ^= x >> 27;
74 x *= 0x94d049bb133111ebUL;
75 x ^= x >> 31;
76#else
77 // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
78 x ^= x >> 16;
79 x *= 0x7feb352dUL;
80 x ^= x >> 15;
81 x *= 0x846ca68bUL;
82 x ^= x >> 16;
83#endif
84 *r = x;
85 return x;
86}
87
88static bool chance(size_t perc, random_t r) {
89 return (pick(r) % 100 <= perc);
90}
91
92static void* alloc_items(size_t items, random_t r) {
93 if (chance(1, r)) {
94 if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
95 else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
96 else items *= 100; // 1% large objects;
97 }
98 if (items == 40) items++; // pthreads uses that size for stack increases
99 if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
100 if (items==0) items = 1;
101 uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
102 if (p != NULL) {
103 for (uintptr_t i = 0; i < items; i++) {
104 p[i] = (items - i) ^ cookie;
105 }
106 }
107 return p;
108}
109
110static void free_items(void* p) {
111 if (p != NULL) {
112 uintptr_t* q = (uintptr_t*)p;
113 uintptr_t items = (q[0] ^ cookie);
114 for (uintptr_t i = 0; i < items; i++) {
115 if ((q[i] ^ cookie) != items - i) {
116 fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
117 abort();
118 }
119 }
120 }
121 custom_free(p);
122}
123
124
125static void stress(intptr_t tid) {
126 //bench_start_thread();
127 uintptr_t r = ((tid + 1) * 43); // rand();
128 const size_t max_item_shift = 5; // 128
129 const size_t max_item_retained_shift = max_item_shift + 2;
130 size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
131 size_t retain = allocs / 2;
132 void** data = NULL;
133 size_t data_size = 0;
134 size_t data_top = 0;
135 void** retained = (void**)custom_calloc(retain,sizeof(void*));
136 size_t retain_top = 0;
137
138 while (allocs > 0 || retain > 0) {
139 if (retain == 0 || (chance(50, &r) && allocs > 0)) {
140 // 50%+ alloc
141 allocs--;
142 if (data_top >= data_size) {
143 data_size += 100000;
144 data = (void**)custom_realloc(data, data_size * sizeof(void*));
145 }
146 data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
147 }
148 else {
149 // 25% retain
150 retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
151 retain--;
152 }
153 if (chance(66, &r) && data_top > 0) {
154 // 66% free previous alloc
155 size_t idx = pick(&r) % data_top;
156 free_items(data[idx]);
157 data[idx] = NULL;
158 }
159 if (chance(25, &r) && data_top > 0) {
160 // 25% exchange a local pointer with the (shared) transfer buffer.
161 size_t data_idx = pick(&r) % data_top;
162 size_t transfer_idx = pick(&r) % TRANSFERS;
163 void* p = data[data_idx];
164 void* q = atomic_exchange_ptr(&transfer[transfer_idx], p);
165 data[data_idx] = q;
166 }
167 }
168 // free everything that is left
169 for (size_t i = 0; i < retain_top; i++) {
170 free_items(retained[i]);
171 }
172 for (size_t i = 0; i < data_top; i++) {
173 free_items(data[i]);
174 }
175 custom_free(retained);
176 custom_free(data);
177 //bench_end_thread();
178}
179
180static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
181
182static void test_stress(void) {
183 uintptr_t r = rand();
184 for (int n = 0; n < ITER; n++) {
185 run_os_threads(THREADS, &stress);
186 for (int i = 0; i < TRANSFERS; i++) {
187 if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
188 void* p = atomic_exchange_ptr(&transfer[i], NULL);
189 free_items(p);
190 }
191 }
192 #ifndef NDEBUG
193 //mi_collect(false);
194 //mi_debug_show_arenas();
195 #endif
196 #if !defined(NDEBUG) || defined(MI_TSAN)
197 if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
198 #endif
199 }
200}
201
202#ifndef STRESS
203static void leak(intptr_t tid) {
204 uintptr_t r = rand();
205 void* p = alloc_items(1 /*pick(&r)%128*/, &r);
206 if (chance(50, &r)) {
207 intptr_t i = (pick(&r) % TRANSFERS);
208 void* q = atomic_exchange_ptr(&transfer[i], p);
209 free_items(q);
210 }
211}
212
213static void test_leak(void) {
214 for (int n = 0; n < ITER; n++) {
215 run_os_threads(THREADS, &leak);
216 mi_collect(false);
217#ifndef NDEBUG
218 if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
219#endif
220 }
221}
222#endif
223
224int main(int argc, char** argv) {
225 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
226 if (argc >= 2) {
227 char* end;
228 long n = strtol(argv[1], &end, 10);
229 if (n > 0) THREADS = n;
230 }
231 if (argc >= 3) {
232 char* end;
233 long n = (strtol(argv[2], &end, 10));
234 if (n > 0) SCALE = n;
235 }
236 if (argc >= 4) {
237 char* end;
238 long n = (strtol(argv[3], &end, 10));
239 if (n > 0) ITER = n;
240 }
241 printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
242 //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
243 //int res = mi_reserve_huge_os_pages(4,1);
244 //printf("(reserve huge: %i\n)", res);
245
246 //bench_start_program();
247
248 // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
249 srand(0x7feb352d);
250
251 //mi_reserve_os_memory(512ULL << 20, true, true);
252
253#if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
254 mi_stats_reset();
255#endif
256
257#ifdef STRESS
258 test_stress();
259#else
260 test_leak();
261#endif
262
263#ifndef USE_STD_MALLOC
264 #ifndef NDEBUG
265 mi_collect(true);
266 //mi_debug_show_arenas();
267 #endif
268 mi_stats_print(NULL);
269#endif
270 //bench_end_program();
271 return 0;
272}
273
274
275static void (*thread_entry_fun)(intptr_t) = &stress;
276
277#ifdef _WIN32
278
279#include <Windows.h>
280
281static DWORD WINAPI thread_entry(LPVOID param) {
282 thread_entry_fun((intptr_t)param);
283 return 0;
284}
285
286static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
287 thread_entry_fun = fun;
288 DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
289 HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
290 for (uintptr_t i = 0; i < nthreads; i++) {
291 thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
292 }
293 for (size_t i = 0; i < nthreads; i++) {
294 WaitForSingleObject(thandles[i], INFINITE);
295 }
296 for (size_t i = 0; i < nthreads; i++) {
297 CloseHandle(thandles[i]);
298 }
299 custom_free(tids);
300 custom_free(thandles);
301}
302
303static void* atomic_exchange_ptr(volatile void** p, void* newval) {
304#if (INTPTR_MAX == INT32_MAX)
305 return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
306#else
307 return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
308#endif
309}
310#else
311
312#include <pthread.h>
313
314static void* thread_entry(void* param) {
315 thread_entry_fun((uintptr_t)param);
316 return NULL;
317}
318
319static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
320 thread_entry_fun = fun;
321 pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
322 memset(threads, 0, sizeof(pthread_t) * nthreads);
323 //pthread_setconcurrency(nthreads);
324 for (size_t i = 0; i < nthreads; i++) {
325 pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
326 }
327 for (size_t i = 0; i < nthreads; i++) {
328 pthread_join(threads[i], NULL);
329 }
330 custom_free(threads);
331}
332
333#ifdef __cplusplus
334#include <atomic>
335static void* atomic_exchange_ptr(volatile void** p, void* newval) {
336 return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
337}
338#else
339#include <stdatomic.h>
340static void* atomic_exchange_ptr(volatile void** p, void* newval) {
341 return atomic_exchange((volatile _Atomic(void*)*)p, newval);
342}
343#endif
344
345#endif
346