1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/assert.h"
5#include "jemalloc/internal/san.h"
6#include "jemalloc/internal/mutex.h"
7#include "jemalloc/internal/rtree.h"
8
9/******************************************************************************/
10/* Data. */
11
12/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
13JEMALLOC_DIAGNOSTIC_PUSH
14JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
15
16#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
17JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
18JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
19bool tsd_booted = false;
20#elif (defined(JEMALLOC_TLS))
21JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
22pthread_key_t tsd_tsd;
23bool tsd_booted = false;
24#elif (defined(_WIN32))
25DWORD tsd_tsd;
26tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
27bool tsd_booted = false;
28#else
29
30/*
31 * This contains a mutex, but it's pretty convenient to allow the mutex code to
32 * have a dependency on tsd. So we define the struct here, and only refer to it
33 * by pointer in the header.
34 */
35struct tsd_init_head_s {
36 ql_head(tsd_init_block_t) blocks;
37 malloc_mutex_t lock;
38};
39
40pthread_key_t tsd_tsd;
41tsd_init_head_t tsd_init_head = {
42 ql_head_initializer(blocks),
43 MALLOC_MUTEX_INITIALIZER
44};
45
46tsd_wrapper_t tsd_boot_wrapper = {
47 false,
48 TSD_INITIALIZER
49};
50bool tsd_booted = false;
51#endif
52
53JEMALLOC_DIAGNOSTIC_POP
54
55/******************************************************************************/
56
57/* A list of all the tsds in the nominal state. */
58typedef ql_head(tsd_t) tsd_list_t;
59static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
60static malloc_mutex_t tsd_nominal_tsds_lock;
61
62/* How many slow-path-enabling features are turned on. */
63static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
64
65static bool
66tsd_in_nominal_list(tsd_t *tsd) {
67 tsd_t *tsd_list;
68 bool found = false;
69 /*
70 * We don't know that tsd is nominal; it might not be safe to get data
71 * out of it here.
72 */
73 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
74 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
75 if (tsd == tsd_list) {
76 found = true;
77 break;
78 }
79 }
80 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
81 return found;
82}
83
84static void
85tsd_add_nominal(tsd_t *tsd) {
86 assert(!tsd_in_nominal_list(tsd));
87 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
88 ql_elm_new(tsd, TSD_MANGLE(tsd_link));
89 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
90 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
91 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
92}
93
94static void
95tsd_remove_nominal(tsd_t *tsd) {
96 assert(tsd_in_nominal_list(tsd));
97 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
98 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
99 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
100 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
101}
102
103static void
104tsd_force_recompute(tsdn_t *tsdn) {
105 /*
106 * The stores to tsd->state here need to synchronize with the exchange
107 * in tsd_slow_update.
108 */
109 atomic_fence(ATOMIC_RELEASE);
110 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
111 tsd_t *remote_tsd;
112 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
113 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
114 <= tsd_state_nominal_max);
115 tsd_atomic_store(&remote_tsd->state,
116 tsd_state_nominal_recompute, ATOMIC_RELAXED);
117 /* See comments in te_recompute_fast_threshold(). */
118 atomic_fence(ATOMIC_SEQ_CST);
119 te_next_event_fast_set_non_nominal(remote_tsd);
120 }
121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
122}
123
124void
125tsd_global_slow_inc(tsdn_t *tsdn) {
126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
127 /*
128 * We unconditionally force a recompute, even if the global slow count
129 * was already positive. If we didn't, then it would be possible for us
130 * to return to the user, have the user synchronize externally with some
131 * other thread, and then have that other thread not have picked up the
132 * update yet (since the original incrementing thread might still be
133 * making its way through the tsd list).
134 */
135 tsd_force_recompute(tsdn);
136}
137
138void tsd_global_slow_dec(tsdn_t *tsdn) {
139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 /* See the note in ..._inc(). */
141 tsd_force_recompute(tsdn);
142}
143
144static bool
145tsd_local_slow(tsd_t *tsd) {
146 return !tsd_tcache_enabled_get(tsd)
147 || tsd_reentrancy_level_get(tsd) > 0;
148}
149
150bool
151tsd_global_slow() {
152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
153}
154
155/******************************************************************************/
156
157static uint8_t
158tsd_state_compute(tsd_t *tsd) {
159 if (!tsd_nominal(tsd)) {
160 return tsd_state_get(tsd);
161 }
162 /* We're in *a* nominal state; but which one? */
163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 return tsd_state_nominal_slow;
165 } else {
166 return tsd_state_nominal;
167 }
168}
169
170void
171tsd_slow_update(tsd_t *tsd) {
172 uint8_t old_state;
173 do {
174 uint8_t new_state = tsd_state_compute(tsd);
175 old_state = tsd_atomic_exchange(&tsd->state, new_state,
176 ATOMIC_ACQUIRE);
177 } while (old_state == tsd_state_nominal_recompute);
178
179 te_recompute_fast_threshold(tsd);
180}
181
182void
183tsd_state_set(tsd_t *tsd, uint8_t new_state) {
184 /* Only the tsd module can change the state *to* recompute. */
185 assert(new_state != tsd_state_nominal_recompute);
186 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
187 if (old_state > tsd_state_nominal_max) {
188 /*
189 * Not currently in the nominal list, but it might need to be
190 * inserted there.
191 */
192 assert(!tsd_in_nominal_list(tsd));
193 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
194 if (new_state <= tsd_state_nominal_max) {
195 tsd_add_nominal(tsd);
196 }
197 } else {
198 /*
199 * We're currently nominal. If the new state is non-nominal,
200 * great; we take ourselves off the list and just enter the new
201 * state.
202 */
203 assert(tsd_in_nominal_list(tsd));
204 if (new_state > tsd_state_nominal_max) {
205 tsd_remove_nominal(tsd);
206 tsd_atomic_store(&tsd->state, new_state,
207 ATOMIC_RELAXED);
208 } else {
209 /*
210 * This is the tricky case. We're transitioning from
211 * one nominal state to another. The caller can't know
212 * about any races that are occurring at the same time,
213 * so we always have to recompute no matter what.
214 */
215 tsd_slow_update(tsd);
216 }
217 }
218 te_recompute_fast_threshold(tsd);
219}
220
221static void
222tsd_prng_state_init(tsd_t *tsd) {
223 /*
224 * A nondeterministic seed based on the address of tsd reduces
225 * the likelihood of lockstep non-uniform cache index
226 * utilization among identical concurrent processes, but at the
227 * cost of test repeatability. For debug builds, instead use a
228 * deterministic seed.
229 */
230 *tsd_prng_statep_get(tsd) = config_debug ? 0 :
231 (uint64_t)(uintptr_t)tsd;
232}
233
234static bool
235tsd_data_init(tsd_t *tsd) {
236 /*
237 * We initialize the rtree context first (before the tcache), since the
238 * tcache initialization depends on it.
239 */
240 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
241 tsd_prng_state_init(tsd);
242 tsd_te_init(tsd); /* event_init may use the prng state above. */
243 tsd_san_init(tsd);
244 return tsd_tcache_enabled_data_init(tsd);
245}
246
247static void
248assert_tsd_data_cleanup_done(tsd_t *tsd) {
249 assert(!tsd_nominal(tsd));
250 assert(!tsd_in_nominal_list(tsd));
251 assert(*tsd_arenap_get_unsafe(tsd) == NULL);
252 assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
253 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
254 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
255}
256
257static bool
258tsd_data_init_nocleanup(tsd_t *tsd) {
259 assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
260 tsd_state_get(tsd) == tsd_state_minimal_initialized);
261 /*
262 * During reincarnation, there is no guarantee that the cleanup function
263 * will be called (deallocation may happen after all tsd destructors).
264 * We set up tsd in a way that no cleanup is needed.
265 */
266 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
267 *tsd_tcache_enabledp_get_unsafe(tsd) = false;
268 *tsd_reentrancy_levelp_get(tsd) = 1;
269 tsd_prng_state_init(tsd);
270 tsd_te_init(tsd); /* event_init may use the prng state above. */
271 tsd_san_init(tsd);
272 assert_tsd_data_cleanup_done(tsd);
273
274 return false;
275}
276
277tsd_t *
278tsd_fetch_slow(tsd_t *tsd, bool minimal) {
279 assert(!tsd_fast(tsd));
280
281 if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
282 /*
283 * On slow path but no work needed. Note that we can't
284 * necessarily *assert* that we're slow, because we might be
285 * slow because of an asynchronous modification to global state,
286 * which might be asynchronously modified *back*.
287 */
288 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
289 tsd_slow_update(tsd);
290 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
291 if (!minimal) {
292 if (tsd_booted) {
293 tsd_state_set(tsd, tsd_state_nominal);
294 tsd_slow_update(tsd);
295 /* Trigger cleanup handler registration. */
296 tsd_set(tsd);
297 tsd_data_init(tsd);
298 }
299 } else {
300 tsd_state_set(tsd, tsd_state_minimal_initialized);
301 tsd_set(tsd);
302 tsd_data_init_nocleanup(tsd);
303 }
304 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
305 if (!minimal) {
306 /* Switch to fully initialized. */
307 tsd_state_set(tsd, tsd_state_nominal);
308 assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
309 (*tsd_reentrancy_levelp_get(tsd))--;
310 tsd_slow_update(tsd);
311 tsd_data_init(tsd);
312 } else {
313 assert_tsd_data_cleanup_done(tsd);
314 }
315 } else if (tsd_state_get(tsd) == tsd_state_purgatory) {
316 tsd_state_set(tsd, tsd_state_reincarnated);
317 tsd_set(tsd);
318 tsd_data_init_nocleanup(tsd);
319 } else {
320 assert(tsd_state_get(tsd) == tsd_state_reincarnated);
321 }
322
323 return tsd;
324}
325
326void *
327malloc_tsd_malloc(size_t size) {
328 return a0malloc(CACHELINE_CEILING(size));
329}
330
331void
332malloc_tsd_dalloc(void *wrapper) {
333 a0dalloc(wrapper);
334}
335
336#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
337static unsigned ncleanups;
338static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
339
340#ifndef _WIN32
341JEMALLOC_EXPORT
342#endif
343void
344_malloc_thread_cleanup(void) {
345 bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
346 unsigned i;
347
348 for (i = 0; i < ncleanups; i++) {
349 pending[i] = true;
350 }
351
352 do {
353 again = false;
354 for (i = 0; i < ncleanups; i++) {
355 if (pending[i]) {
356 pending[i] = cleanups[i]();
357 if (pending[i]) {
358 again = true;
359 }
360 }
361 }
362 } while (again);
363}
364
365#ifndef _WIN32
366JEMALLOC_EXPORT
367#endif
368void
369_malloc_tsd_cleanup_register(bool (*f)(void)) {
370 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
371 cleanups[ncleanups] = f;
372 ncleanups++;
373}
374
375#endif
376
377static void
378tsd_do_data_cleanup(tsd_t *tsd) {
379 prof_tdata_cleanup(tsd);
380 iarena_cleanup(tsd);
381 arena_cleanup(tsd);
382 tcache_cleanup(tsd);
383 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
384 *tsd_reentrancy_levelp_get(tsd) = 1;
385}
386
387void
388tsd_cleanup(void *arg) {
389 tsd_t *tsd = (tsd_t *)arg;
390
391 switch (tsd_state_get(tsd)) {
392 case tsd_state_uninitialized:
393 /* Do nothing. */
394 break;
395 case tsd_state_minimal_initialized:
396 /* This implies the thread only did free() in its life time. */
397 /* Fall through. */
398 case tsd_state_reincarnated:
399 /*
400 * Reincarnated means another destructor deallocated memory
401 * after the destructor was called. Cleanup isn't required but
402 * is still called for testing and completeness.
403 */
404 assert_tsd_data_cleanup_done(tsd);
405 JEMALLOC_FALLTHROUGH;
406 case tsd_state_nominal:
407 case tsd_state_nominal_slow:
408 tsd_do_data_cleanup(tsd);
409 tsd_state_set(tsd, tsd_state_purgatory);
410 tsd_set(tsd);
411 break;
412 case tsd_state_purgatory:
413 /*
414 * The previous time this destructor was called, we set the
415 * state to tsd_state_purgatory so that other destructors
416 * wouldn't cause re-creation of the tsd. This time, do
417 * nothing, and do not request another callback.
418 */
419 break;
420 default:
421 not_reached();
422 }
423#ifdef JEMALLOC_JET
424 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
425 int *data = tsd_test_datap_get_unsafe(tsd);
426 if (test_callback != NULL) {
427 test_callback(data);
428 }
429#endif
430}
431
432tsd_t *
433malloc_tsd_boot0(void) {
434 tsd_t *tsd;
435
436#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
437 ncleanups = 0;
438#endif
439 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
440 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
441 return NULL;
442 }
443 if (tsd_boot0()) {
444 return NULL;
445 }
446 tsd = tsd_fetch();
447 return tsd;
448}
449
450void
451malloc_tsd_boot1(void) {
452 tsd_boot1();
453 tsd_t *tsd = tsd_fetch();
454 /* malloc_slow has been set properly. Update tsd_slow. */
455 tsd_slow_update(tsd);
456}
457
458#ifdef _WIN32
459static BOOL WINAPI
460_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
461 switch (fdwReason) {
462#ifdef JEMALLOC_LAZY_LOCK
463 case DLL_THREAD_ATTACH:
464 isthreaded = true;
465 break;
466#endif
467 case DLL_THREAD_DETACH:
468 _malloc_thread_cleanup();
469 break;
470 default:
471 break;
472 }
473 return true;
474}
475
476/*
477 * We need to be able to say "read" here (in the "pragma section"), but have
478 * hooked "read". We won't read for the rest of the file, so we can get away
479 * with unhooking.
480 */
481#ifdef read
482# undef read
483#endif
484
485#ifdef _MSC_VER
486# ifdef _M_IX86
487# pragma comment(linker, "/INCLUDE:__tls_used")
488# pragma comment(linker, "/INCLUDE:_tls_callback")
489# else
490# pragma comment(linker, "/INCLUDE:_tls_used")
491# pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
492# endif
493# pragma section(".CRT$XLY",long,read)
494#endif
495JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
496BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
497 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
498#endif
499
500#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
501 !defined(_WIN32))
502void *
503tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
504 pthread_t self = pthread_self();
505 tsd_init_block_t *iter;
506
507 /* Check whether this thread has already inserted into the list. */
508 malloc_mutex_lock(TSDN_NULL, &head->lock);
509 ql_foreach(iter, &head->blocks, link) {
510 if (iter->thread == self) {
511 malloc_mutex_unlock(TSDN_NULL, &head->lock);
512 return iter->data;
513 }
514 }
515 /* Insert block into list. */
516 ql_elm_new(block, link);
517 block->thread = self;
518 ql_tail_insert(&head->blocks, block, link);
519 malloc_mutex_unlock(TSDN_NULL, &head->lock);
520 return NULL;
521}
522
523void
524tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
525 malloc_mutex_lock(TSDN_NULL, &head->lock);
526 ql_remove(&head->blocks, block, link);
527 malloc_mutex_unlock(TSDN_NULL, &head->lock);
528}
529#endif
530
531void
532tsd_prefork(tsd_t *tsd) {
533 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
534}
535
536void
537tsd_postfork_parent(tsd_t *tsd) {
538 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
539}
540
541void
542tsd_postfork_child(tsd_t *tsd) {
543 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
544 ql_new(&tsd_nominal_tsds);
545
546 if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
547 tsd_add_nominal(tsd);
548 }
549}
550