1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2018-2022, Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. A copy of the license can be found in the file |
5 | "LICENSE" at the root of this distribution. |
6 | -----------------------------------------------------------------------------*/ |
7 | #include "mimalloc.h" |
8 | #include "mimalloc-internal.h" |
9 | |
10 | #include <string.h> // memcpy, memset |
11 | #include <stdlib.h> // atexit |
12 | |
13 | // Empty page used to initialize the small free pages array |
14 | const mi_page_t _mi_page_empty = { |
15 | 0, false, false, false, false, |
16 | 0, // capacity |
17 | 0, // reserved capacity |
18 | { 0 }, // flags |
19 | false, // is_zero |
20 | 0, // retire_expire |
21 | NULL, // free |
22 | #if MI_ENCODE_FREELIST |
23 | { 0, 0 }, |
24 | #endif |
25 | 0, // used |
26 | 0, // xblock_size |
27 | NULL, // local_free |
28 | MI_ATOMIC_VAR_INIT(0), // xthread_free |
29 | MI_ATOMIC_VAR_INIT(0), // xheap |
30 | NULL, NULL |
31 | #if MI_INTPTR_SIZE==8 |
32 | , { 0 } // padding |
33 | #endif |
34 | }; |
35 | |
36 | #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) |
37 | |
38 | #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) |
39 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } |
40 | #elif (MI_PADDING>0) |
41 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } |
42 | #else |
43 | #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } |
44 | #endif |
45 | |
46 | |
47 | // Empty page queues for every bin |
48 | #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } |
49 | #define MI_PAGE_QUEUES_EMPTY \ |
50 | { QNULL(1), \ |
51 | QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ |
52 | QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ |
53 | QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ |
54 | QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ |
55 | QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ |
56 | QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ |
57 | QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ |
58 | QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ |
59 | QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ |
60 | QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ |
61 | QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ } |
62 | |
63 | #define MI_STAT_COUNT_NULL() {0,0,0,0} |
64 | |
65 | // Empty statistics |
66 | #if MI_STAT>1 |
67 | #define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } |
68 | #else |
69 | #define MI_STAT_COUNT_END_NULL() |
70 | #endif |
71 | |
72 | #define MI_STATS_NULL \ |
73 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
74 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
75 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
76 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
77 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
78 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
79 | MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ |
80 | { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ |
81 | { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ |
82 | MI_STAT_COUNT_END_NULL() |
83 | |
84 | |
85 | // Empty slice span queues for every bin |
86 | #define SQNULL(sz) { NULL, NULL, sz } |
87 | #define MI_SEGMENT_SPAN_QUEUES_EMPTY \ |
88 | { SQNULL(1), \ |
89 | SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ |
90 | SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ |
91 | SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ |
92 | SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ |
93 | SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } |
94 | |
95 | |
96 | // -------------------------------------------------------- |
97 | // Statically allocate an empty heap as the initial |
98 | // thread local value for the default heap, |
99 | // and statically allocate the backing heap for the main |
100 | // thread so it can function without doing any allocation |
101 | // itself (as accessing a thread local for the first time |
102 | // may lead to allocation itself on some platforms) |
103 | // -------------------------------------------------------- |
104 | |
105 | mi_decl_cache_align const mi_heap_t _mi_heap_empty = { |
106 | NULL, |
107 | MI_SMALL_PAGES_EMPTY, |
108 | MI_PAGE_QUEUES_EMPTY, |
109 | MI_ATOMIC_VAR_INIT(NULL), |
110 | 0, // tid |
111 | 0, // cookie |
112 | 0, // arena id |
113 | { 0, 0 }, // keys |
114 | { {0}, {0}, 0 }, |
115 | 0, // page count |
116 | MI_BIN_FULL, 0, // page retired min/max |
117 | NULL, // next |
118 | false |
119 | }; |
120 | |
121 | #define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) |
122 | #define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os))) |
123 | |
124 | mi_decl_cache_align static const mi_tld_t tld_empty = { |
125 | 0, |
126 | false, |
127 | NULL, NULL, |
128 | { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments |
129 | { 0, tld_empty_stats }, // os |
130 | { MI_STATS_NULL } // stats |
131 | }; |
132 | |
133 | // the thread-local default heap for allocation |
134 | mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; |
135 | |
136 | extern mi_heap_t _mi_heap_main; |
137 | |
138 | static mi_tld_t tld_main = { |
139 | 0, false, |
140 | &_mi_heap_main, & _mi_heap_main, |
141 | { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments |
142 | { 0, &tld_main.stats }, // os |
143 | { MI_STATS_NULL } // stats |
144 | }; |
145 | |
146 | mi_heap_t _mi_heap_main = { |
147 | &tld_main, |
148 | MI_SMALL_PAGES_EMPTY, |
149 | MI_PAGE_QUEUES_EMPTY, |
150 | MI_ATOMIC_VAR_INIT(NULL), |
151 | 0, // thread id |
152 | 0, // initial cookie |
153 | 0, // arena id |
154 | { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) |
155 | { {0x846ca68b}, {0}, 0 }, // random |
156 | 0, // page count |
157 | MI_BIN_FULL, 0, // page retired min/max |
158 | NULL, // next heap |
159 | false // can reclaim |
160 | }; |
161 | |
162 | bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. |
163 | |
164 | mi_stats_t _mi_stats_main = { MI_STATS_NULL }; |
165 | |
166 | |
167 | static void mi_heap_main_init(void) { |
168 | if (_mi_heap_main.cookie == 0) { |
169 | _mi_heap_main.thread_id = _mi_thread_id(); |
170 | _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init); |
171 | _mi_random_init(&_mi_heap_main.random); |
172 | _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); |
173 | _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); |
174 | } |
175 | } |
176 | |
177 | mi_heap_t* _mi_heap_main_get(void) { |
178 | mi_heap_main_init(); |
179 | return &_mi_heap_main; |
180 | } |
181 | |
182 | |
183 | /* ----------------------------------------------------------- |
184 | Initialization and freeing of the thread local heaps |
185 | ----------------------------------------------------------- */ |
186 | |
187 | // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). |
188 | typedef struct mi_thread_data_s { |
189 | mi_heap_t heap; // must come first due to cast in `_mi_heap_done` |
190 | mi_tld_t tld; |
191 | } mi_thread_data_t; |
192 | |
193 | |
194 | // Thread meta-data is allocated directly from the OS. For |
195 | // some programs that do not use thread pools and allocate and |
196 | // destroy many OS threads, this may causes too much overhead |
197 | // per thread so we maintain a small cache of recently freed metadata. |
198 | |
199 | #define TD_CACHE_SIZE (8) |
200 | static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; |
201 | |
202 | static mi_thread_data_t* mi_thread_data_alloc(void) { |
203 | // try to find thread metadata in the cache |
204 | mi_thread_data_t* td; |
205 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
206 | td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
207 | if (td != NULL) { |
208 | td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); |
209 | if (td != NULL) { |
210 | return td; |
211 | } |
212 | } |
213 | } |
214 | // if that fails, allocate directly from the OS |
215 | td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); |
216 | if (td == NULL) { |
217 | // if this fails, try once more. (issue #257) |
218 | td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); |
219 | if (td == NULL) { |
220 | // really out of memory |
221 | _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n" , sizeof(mi_thread_data_t)); |
222 | } |
223 | } |
224 | return td; |
225 | } |
226 | |
227 | static void mi_thread_data_free( mi_thread_data_t* tdfree ) { |
228 | // try to add the thread metadata to the cache |
229 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
230 | mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
231 | if (td == NULL) { |
232 | mi_thread_data_t* expected = NULL; |
233 | if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { |
234 | return; |
235 | } |
236 | } |
237 | } |
238 | // if that fails, just free it directly |
239 | _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); |
240 | } |
241 | |
242 | static void mi_thread_data_collect(void) { |
243 | // free all thread metadata from the cache |
244 | for (int i = 0; i < TD_CACHE_SIZE; i++) { |
245 | mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); |
246 | if (td != NULL) { |
247 | td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); |
248 | if (td != NULL) { |
249 | _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); |
250 | } |
251 | } |
252 | } |
253 | } |
254 | |
255 | // Initialize the thread local default heap, called from `mi_thread_init` |
256 | static bool _mi_heap_init(void) { |
257 | if (mi_heap_is_initialized(mi_get_default_heap())) return true; |
258 | if (_mi_is_main_thread()) { |
259 | // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization |
260 | // the main heap is statically allocated |
261 | mi_heap_main_init(); |
262 | _mi_heap_set_default_direct(&_mi_heap_main); |
263 | //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); |
264 | } |
265 | else { |
266 | // use `_mi_os_alloc` to allocate directly from the OS |
267 | mi_thread_data_t* td = mi_thread_data_alloc(); |
268 | if (td == NULL) return false; |
269 | |
270 | // OS allocated so already zero initialized |
271 | mi_tld_t* tld = &td->tld; |
272 | mi_heap_t* heap = &td->heap; |
273 | _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); |
274 | _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); |
275 | heap->thread_id = _mi_thread_id(); |
276 | _mi_random_init(&heap->random); |
277 | heap->cookie = _mi_heap_random_next(heap) | 1; |
278 | heap->keys[0] = _mi_heap_random_next(heap); |
279 | heap->keys[1] = _mi_heap_random_next(heap); |
280 | heap->tld = tld; |
281 | tld->heap_backing = heap; |
282 | tld->heaps = heap; |
283 | tld->segments.stats = &tld->stats; |
284 | tld->segments.os = &tld->os; |
285 | tld->os.stats = &tld->stats; |
286 | _mi_heap_set_default_direct(heap); |
287 | } |
288 | return false; |
289 | } |
290 | |
291 | // Free the thread local default heap (called from `mi_thread_done`) |
292 | static bool _mi_heap_done(mi_heap_t* heap) { |
293 | if (!mi_heap_is_initialized(heap)) return true; |
294 | |
295 | // reset default heap |
296 | _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); |
297 | |
298 | // switch to backing heap |
299 | heap = heap->tld->heap_backing; |
300 | if (!mi_heap_is_initialized(heap)) return false; |
301 | |
302 | // delete all non-backing heaps in this thread |
303 | mi_heap_t* curr = heap->tld->heaps; |
304 | while (curr != NULL) { |
305 | mi_heap_t* next = curr->next; // save `next` as `curr` will be freed |
306 | if (curr != heap) { |
307 | mi_assert_internal(!mi_heap_is_backing(curr)); |
308 | mi_heap_delete(curr); |
309 | } |
310 | curr = next; |
311 | } |
312 | mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL); |
313 | mi_assert_internal(mi_heap_is_backing(heap)); |
314 | |
315 | // collect if not the main thread |
316 | if (heap != &_mi_heap_main) { |
317 | _mi_heap_collect_abandon(heap); |
318 | } |
319 | |
320 | // merge stats |
321 | _mi_stats_done(&heap->tld->stats); |
322 | |
323 | // free if not the main thread |
324 | if (heap != &_mi_heap_main) { |
325 | // the following assertion does not always hold for huge segments as those are always treated |
326 | // as abondened: one may allocate it in one thread, but deallocate in another in which case |
327 | // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 |
328 | // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); |
329 | mi_thread_data_free((mi_thread_data_t*)heap); |
330 | } |
331 | else { |
332 | mi_thread_data_collect(); // free cached thread metadata |
333 | #if 0 |
334 | // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, |
335 | // there may still be delete/free calls after the mi_fls_done is called. Issue #207 |
336 | _mi_heap_destroy_pages(heap); |
337 | mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); |
338 | #endif |
339 | } |
340 | return false; |
341 | } |
342 | |
343 | |
344 | |
345 | // -------------------------------------------------------- |
346 | // Try to run `mi_thread_done()` automatically so any memory |
347 | // owned by the thread but not yet released can be abandoned |
348 | // and re-owned by another thread. |
349 | // |
350 | // 1. windows dynamic library: |
351 | // call from DllMain on DLL_THREAD_DETACH |
352 | // 2. windows static library: |
353 | // use `FlsAlloc` to call a destructor when the thread is done |
354 | // 3. unix, pthreads: |
355 | // use a pthread key to call a destructor when a pthread is done |
356 | // |
357 | // In the last two cases we also need to call `mi_process_init` |
358 | // to set up the thread local keys. |
359 | // -------------------------------------------------------- |
360 | |
361 | static void _mi_thread_done(mi_heap_t* default_heap); |
362 | |
363 | #if defined(_WIN32) && defined(MI_SHARED_LIB) |
364 | // nothing to do as it is done in DllMain |
365 | #elif defined(_WIN32) && !defined(MI_SHARED_LIB) |
366 | // use thread local storage keys to detect thread ending |
367 | #include <windows.h> |
368 | #include <fibersapi.h> |
369 | #if (_WIN32_WINNT < 0x600) // before Windows Vista |
370 | WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); |
371 | WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); |
372 | WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); |
373 | WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); |
374 | #endif |
375 | static DWORD mi_fls_key = (DWORD)(-1); |
376 | static void NTAPI mi_fls_done(PVOID value) { |
377 | if (value!=NULL) _mi_thread_done((mi_heap_t*)value); |
378 | } |
379 | #elif defined(MI_USE_PTHREADS) |
380 | // use pthread local storage keys to detect thread ending |
381 | // (and used with MI_TLS_PTHREADS for the default heap) |
382 | pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); |
383 | static void mi_pthread_done(void* value) { |
384 | if (value!=NULL) _mi_thread_done((mi_heap_t*)value); |
385 | } |
386 | #elif defined(__wasi__) |
387 | // no pthreads in the WebAssembly Standard Interface |
388 | #else |
389 | #pragma message("define a way to call mi_thread_done when a thread is done") |
390 | #endif |
391 | |
392 | // Set up handlers so `mi_thread_done` is called automatically |
393 | static void mi_process_setup_auto_thread_done(void) { |
394 | static bool tls_initialized = false; // fine if it races |
395 | if (tls_initialized) return; |
396 | tls_initialized = true; |
397 | #if defined(_WIN32) && defined(MI_SHARED_LIB) |
398 | // nothing to do as it is done in DllMain |
399 | #elif defined(_WIN32) && !defined(MI_SHARED_LIB) |
400 | mi_fls_key = FlsAlloc(&mi_fls_done); |
401 | #elif defined(MI_USE_PTHREADS) |
402 | mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); |
403 | pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); |
404 | #endif |
405 | _mi_heap_set_default_direct(&_mi_heap_main); |
406 | } |
407 | |
408 | |
409 | bool _mi_is_main_thread(void) { |
410 | return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); |
411 | } |
412 | |
413 | static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); |
414 | |
415 | size_t _mi_current_thread_count(void) { |
416 | return mi_atomic_load_relaxed(&thread_count); |
417 | } |
418 | |
419 | // This is called from the `mi_malloc_generic` |
420 | void mi_thread_init(void) mi_attr_noexcept |
421 | { |
422 | // ensure our process has started already |
423 | mi_process_init(); |
424 | |
425 | // initialize the thread local default heap |
426 | // (this will call `_mi_heap_set_default_direct` and thus set the |
427 | // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) |
428 | if (_mi_heap_init()) return; // returns true if already initialized |
429 | |
430 | _mi_stat_increase(&_mi_stats_main.threads, 1); |
431 | mi_atomic_increment_relaxed(&thread_count); |
432 | //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); |
433 | } |
434 | |
435 | void mi_thread_done(void) mi_attr_noexcept { |
436 | _mi_thread_done(mi_get_default_heap()); |
437 | } |
438 | |
439 | static void _mi_thread_done(mi_heap_t* heap) { |
440 | mi_atomic_decrement_relaxed(&thread_count); |
441 | _mi_stat_decrease(&_mi_stats_main.threads, 1); |
442 | |
443 | // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... |
444 | if (heap->thread_id != _mi_thread_id()) return; |
445 | |
446 | // abandon the thread local heap |
447 | if (_mi_heap_done(heap)) return; // returns true if already ran |
448 | } |
449 | |
450 | void _mi_heap_set_default_direct(mi_heap_t* heap) { |
451 | mi_assert_internal(heap != NULL); |
452 | #if defined(MI_TLS_SLOT) |
453 | mi_tls_slot_set(MI_TLS_SLOT,heap); |
454 | #elif defined(MI_TLS_PTHREAD_SLOT_OFS) |
455 | *mi_tls_pthread_heap_slot() = heap; |
456 | #elif defined(MI_TLS_PTHREAD) |
457 | // we use _mi_heap_default_key |
458 | #else |
459 | _mi_heap_default = heap; |
460 | #endif |
461 | |
462 | // ensure the default heap is passed to `_mi_thread_done` |
463 | // setting to a non-NULL value also ensures `mi_thread_done` is called. |
464 | #if defined(_WIN32) && defined(MI_SHARED_LIB) |
465 | // nothing to do as it is done in DllMain |
466 | #elif defined(_WIN32) && !defined(MI_SHARED_LIB) |
467 | mi_assert_internal(mi_fls_key != 0); |
468 | FlsSetValue(mi_fls_key, heap); |
469 | #elif defined(MI_USE_PTHREADS) |
470 | if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD |
471 | pthread_setspecific(_mi_heap_default_key, heap); |
472 | } |
473 | #endif |
474 | } |
475 | |
476 | |
477 | // -------------------------------------------------------- |
478 | // Run functions on process init/done, and thread init/done |
479 | // -------------------------------------------------------- |
480 | static void mi_cdecl mi_process_done(void); |
481 | |
482 | static bool os_preloading = true; // true until this module is initialized |
483 | static bool mi_redirected = false; // true if malloc redirects to mi_malloc |
484 | |
485 | // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. |
486 | bool _mi_preloading(void) { |
487 | return os_preloading; |
488 | } |
489 | |
490 | mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { |
491 | return mi_redirected; |
492 | } |
493 | |
494 | // Communicate with the redirection module on Windows |
495 | #if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) |
496 | #ifdef __cplusplus |
497 | extern "C" { |
498 | #endif |
499 | mi_decl_export void _mi_redirect_entry(DWORD reason) { |
500 | // called on redirection; careful as this may be called before DllMain |
501 | if (reason == DLL_PROCESS_ATTACH) { |
502 | mi_redirected = true; |
503 | } |
504 | else if (reason == DLL_PROCESS_DETACH) { |
505 | mi_redirected = false; |
506 | } |
507 | else if (reason == DLL_THREAD_DETACH) { |
508 | mi_thread_done(); |
509 | } |
510 | } |
511 | __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); |
512 | __declspec(dllimport) void mi_cdecl mi_allocator_done(void); |
513 | #ifdef __cplusplus |
514 | } |
515 | #endif |
516 | #else |
517 | static bool mi_allocator_init(const char** message) { |
518 | if (message != NULL) *message = NULL; |
519 | return true; |
520 | } |
521 | static void mi_allocator_done(void) { |
522 | // nothing to do |
523 | } |
524 | #endif |
525 | |
526 | // Called once by the process loader |
527 | static void mi_process_load(void) { |
528 | mi_heap_main_init(); |
529 | #if defined(MI_TLS_RECURSE_GUARD) |
530 | volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; |
531 | MI_UNUSED(dummy); |
532 | #endif |
533 | os_preloading = false; |
534 | #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) |
535 | atexit(&mi_process_done); |
536 | #endif |
537 | _mi_options_init(); |
538 | mi_process_init(); |
539 | //mi_stats_reset();- |
540 | if (mi_redirected) _mi_verbose_message("malloc is redirected.\n" ); |
541 | |
542 | // show message from the redirector (if present) |
543 | const char* msg = NULL; |
544 | mi_allocator_init(&msg); |
545 | if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { |
546 | _mi_fputs(NULL,NULL,NULL,msg); |
547 | } |
548 | } |
549 | |
550 | #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) |
551 | #include <intrin.h> |
552 | mi_decl_cache_align bool _mi_cpu_has_fsrm = false; |
553 | |
554 | static void mi_detect_cpu_features(void) { |
555 | // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) |
556 | int32_t cpu_info[4]; |
557 | __cpuid(cpu_info, 7); |
558 | _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https ://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features> |
559 | } |
560 | #else |
561 | static void mi_detect_cpu_features(void) { |
562 | // nothing |
563 | } |
564 | #endif |
565 | |
566 | // Initialize the process; called by thread_init or the process loader |
567 | void mi_process_init(void) mi_attr_noexcept { |
568 | // ensure we are called once |
569 | if (_mi_process_is_initialized) return; |
570 | _mi_verbose_message("process init: 0x%zx\n" , _mi_thread_id()); |
571 | _mi_process_is_initialized = true; |
572 | mi_process_setup_auto_thread_done(); |
573 | |
574 | |
575 | mi_detect_cpu_features(); |
576 | _mi_os_init(); |
577 | mi_heap_main_init(); |
578 | #if (MI_DEBUG) |
579 | _mi_verbose_message("debug level : %d\n" , MI_DEBUG); |
580 | #endif |
581 | _mi_verbose_message("secure level: %d\n" , MI_SECURE); |
582 | mi_thread_init(); |
583 | |
584 | #if defined(_WIN32) && !defined(MI_SHARED_LIB) |
585 | // When building as a static lib the FLS cleanup happens to early for the main thread. |
586 | // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup |
587 | // will not call _mi_thread_done on the (still executing) main thread. See issue #508. |
588 | FlsSetValue(mi_fls_key, NULL); |
589 | #endif |
590 | |
591 | mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) |
592 | |
593 | if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { |
594 | size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); |
595 | long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); |
596 | if (reserve_at != -1) { |
597 | mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); |
598 | } else { |
599 | mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); |
600 | } |
601 | } |
602 | if (mi_option_is_enabled(mi_option_reserve_os_memory)) { |
603 | long ksize = mi_option_get(mi_option_reserve_os_memory); |
604 | if (ksize > 0) { |
605 | mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); |
606 | } |
607 | } |
608 | } |
609 | |
610 | // Called when the process is done (through `at_exit`) |
611 | static void mi_cdecl mi_process_done(void) { |
612 | // only shutdown if we were initialized |
613 | if (!_mi_process_is_initialized) return; |
614 | // ensure we are called once |
615 | static bool process_done = false; |
616 | if (process_done) return; |
617 | process_done = true; |
618 | |
619 | #if defined(_WIN32) && !defined(MI_SHARED_LIB) |
620 | FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208 |
621 | #endif |
622 | |
623 | #ifndef MI_SKIP_COLLECT_ON_EXIT |
624 | #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) |
625 | // free all memory if possible on process exit. This is not needed for a stand-alone process |
626 | // but should be done if mimalloc is statically linked into another shared library which |
627 | // is repeatedly loaded/unloaded, see issue #281. |
628 | mi_collect(true /* force */ ); |
629 | #endif |
630 | #endif |
631 | |
632 | if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { |
633 | mi_stats_print(NULL); |
634 | } |
635 | mi_allocator_done(); |
636 | _mi_verbose_message("process done: 0x%zx\n" , _mi_heap_main.thread_id); |
637 | os_preloading = true; // don't call the C runtime anymore |
638 | } |
639 | |
640 | |
641 | |
642 | #if defined(_WIN32) && defined(MI_SHARED_LIB) |
643 | // Windows DLL: easy to hook into process_init and thread_done |
644 | __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { |
645 | MI_UNUSED(reserved); |
646 | MI_UNUSED(inst); |
647 | if (reason==DLL_PROCESS_ATTACH) { |
648 | mi_process_load(); |
649 | } |
650 | else if (reason==DLL_PROCESS_DETACH) { |
651 | mi_process_done(); |
652 | } |
653 | else if (reason==DLL_THREAD_DETACH) { |
654 | if (!mi_is_redirected()) { |
655 | mi_thread_done(); |
656 | } |
657 | } |
658 | return TRUE; |
659 | } |
660 | |
661 | #elif defined(_MSC_VER) |
662 | // MSVC: use data section magic for static libraries |
663 | // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm> |
664 | static int _mi_process_init(void) { |
665 | mi_process_load(); |
666 | return 0; |
667 | } |
668 | typedef int(*_mi_crt_callback_t)(void); |
669 | #if defined(_M_X64) || defined(_M_ARM64) |
670 | __pragma(comment(linker, "/include:" "_mi_msvc_initu" )) |
671 | #pragma section(".CRT$XIU", long, read) |
672 | #else |
673 | __pragma(comment(linker, "/include:" "__mi_msvc_initu" )) |
674 | #endif |
675 | #pragma data_seg(".CRT$XIU") |
676 | mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; |
677 | #pragma data_seg() |
678 | |
679 | #elif defined(__cplusplus) |
680 | // C++: use static initialization to detect process start |
681 | static bool _mi_process_init(void) { |
682 | mi_process_load(); |
683 | return (_mi_heap_main.thread_id != 0); |
684 | } |
685 | static bool mi_initialized = _mi_process_init(); |
686 | |
687 | #elif defined(__GNUC__) || defined(__clang__) |
688 | // GCC,Clang: use the constructor attribute |
689 | static void __attribute__((constructor)) _mi_process_init(void) { |
690 | mi_process_load(); |
691 | } |
692 | |
693 | #else |
694 | #pragma message("define a way to call mi_process_load on your platform") |
695 | #endif |
696 | |