1 | #include "jemalloc/internal/jemalloc_preamble.h" |
2 | #include "jemalloc/internal/jemalloc_internal_includes.h" |
3 | |
4 | #include "jemalloc/internal/assert.h" |
5 | #include "jemalloc/internal/san.h" |
6 | #include "jemalloc/internal/mutex.h" |
7 | #include "jemalloc/internal/rtree.h" |
8 | |
9 | /******************************************************************************/ |
10 | /* Data. */ |
11 | |
12 | /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ |
13 | JEMALLOC_DIAGNOSTIC_PUSH |
14 | JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS |
15 | |
16 | #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP |
17 | JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; |
18 | JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; |
19 | bool tsd_booted = false; |
20 | #elif (defined(JEMALLOC_TLS)) |
21 | JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; |
22 | pthread_key_t tsd_tsd; |
23 | bool tsd_booted = false; |
24 | #elif (defined(_WIN32)) |
25 | DWORD tsd_tsd; |
26 | tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; |
27 | bool tsd_booted = false; |
28 | #else |
29 | |
30 | /* |
31 | * This contains a mutex, but it's pretty convenient to allow the mutex code to |
32 | * have a dependency on tsd. So we define the struct here, and only refer to it |
33 | * by pointer in the header. |
34 | */ |
35 | struct tsd_init_head_s { |
36 | ql_head(tsd_init_block_t) blocks; |
37 | malloc_mutex_t lock; |
38 | }; |
39 | |
40 | pthread_key_t tsd_tsd; |
41 | tsd_init_head_t tsd_init_head = { |
42 | ql_head_initializer(blocks), |
43 | MALLOC_MUTEX_INITIALIZER |
44 | }; |
45 | |
46 | tsd_wrapper_t tsd_boot_wrapper = { |
47 | false, |
48 | TSD_INITIALIZER |
49 | }; |
50 | bool tsd_booted = false; |
51 | #endif |
52 | |
53 | JEMALLOC_DIAGNOSTIC_POP |
54 | |
55 | /******************************************************************************/ |
56 | |
57 | /* A list of all the tsds in the nominal state. */ |
58 | typedef ql_head(tsd_t) tsd_list_t; |
59 | static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); |
60 | static malloc_mutex_t tsd_nominal_tsds_lock; |
61 | |
62 | /* How many slow-path-enabling features are turned on. */ |
63 | static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); |
64 | |
65 | static bool |
66 | tsd_in_nominal_list(tsd_t *tsd) { |
67 | tsd_t *tsd_list; |
68 | bool found = false; |
69 | /* |
70 | * We don't know that tsd is nominal; it might not be safe to get data |
71 | * out of it here. |
72 | */ |
73 | malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); |
74 | ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { |
75 | if (tsd == tsd_list) { |
76 | found = true; |
77 | break; |
78 | } |
79 | } |
80 | malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); |
81 | return found; |
82 | } |
83 | |
84 | static void |
85 | tsd_add_nominal(tsd_t *tsd) { |
86 | assert(!tsd_in_nominal_list(tsd)); |
87 | assert(tsd_state_get(tsd) <= tsd_state_nominal_max); |
88 | ql_elm_new(tsd, TSD_MANGLE(tsd_link)); |
89 | malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
90 | ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); |
91 | malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
92 | } |
93 | |
94 | static void |
95 | tsd_remove_nominal(tsd_t *tsd) { |
96 | assert(tsd_in_nominal_list(tsd)); |
97 | assert(tsd_state_get(tsd) <= tsd_state_nominal_max); |
98 | malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
99 | ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); |
100 | malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
101 | } |
102 | |
103 | static void |
104 | tsd_force_recompute(tsdn_t *tsdn) { |
105 | /* |
106 | * The stores to tsd->state here need to synchronize with the exchange |
107 | * in tsd_slow_update. |
108 | */ |
109 | atomic_fence(ATOMIC_RELEASE); |
110 | malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); |
111 | tsd_t *remote_tsd; |
112 | ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { |
113 | assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) |
114 | <= tsd_state_nominal_max); |
115 | tsd_atomic_store(&remote_tsd->state, |
116 | tsd_state_nominal_recompute, ATOMIC_RELAXED); |
117 | /* See comments in te_recompute_fast_threshold(). */ |
118 | atomic_fence(ATOMIC_SEQ_CST); |
119 | te_next_event_fast_set_non_nominal(remote_tsd); |
120 | } |
121 | malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); |
122 | } |
123 | |
124 | void |
125 | tsd_global_slow_inc(tsdn_t *tsdn) { |
126 | atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); |
127 | /* |
128 | * We unconditionally force a recompute, even if the global slow count |
129 | * was already positive. If we didn't, then it would be possible for us |
130 | * to return to the user, have the user synchronize externally with some |
131 | * other thread, and then have that other thread not have picked up the |
132 | * update yet (since the original incrementing thread might still be |
133 | * making its way through the tsd list). |
134 | */ |
135 | tsd_force_recompute(tsdn); |
136 | } |
137 | |
138 | void tsd_global_slow_dec(tsdn_t *tsdn) { |
139 | atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); |
140 | /* See the note in ..._inc(). */ |
141 | tsd_force_recompute(tsdn); |
142 | } |
143 | |
144 | static bool |
145 | tsd_local_slow(tsd_t *tsd) { |
146 | return !tsd_tcache_enabled_get(tsd) |
147 | || tsd_reentrancy_level_get(tsd) > 0; |
148 | } |
149 | |
150 | bool |
151 | tsd_global_slow() { |
152 | return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; |
153 | } |
154 | |
155 | /******************************************************************************/ |
156 | |
157 | static uint8_t |
158 | tsd_state_compute(tsd_t *tsd) { |
159 | if (!tsd_nominal(tsd)) { |
160 | return tsd_state_get(tsd); |
161 | } |
162 | /* We're in *a* nominal state; but which one? */ |
163 | if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { |
164 | return tsd_state_nominal_slow; |
165 | } else { |
166 | return tsd_state_nominal; |
167 | } |
168 | } |
169 | |
170 | void |
171 | tsd_slow_update(tsd_t *tsd) { |
172 | uint8_t old_state; |
173 | do { |
174 | uint8_t new_state = tsd_state_compute(tsd); |
175 | old_state = tsd_atomic_exchange(&tsd->state, new_state, |
176 | ATOMIC_ACQUIRE); |
177 | } while (old_state == tsd_state_nominal_recompute); |
178 | |
179 | te_recompute_fast_threshold(tsd); |
180 | } |
181 | |
182 | void |
183 | tsd_state_set(tsd_t *tsd, uint8_t new_state) { |
184 | /* Only the tsd module can change the state *to* recompute. */ |
185 | assert(new_state != tsd_state_nominal_recompute); |
186 | uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); |
187 | if (old_state > tsd_state_nominal_max) { |
188 | /* |
189 | * Not currently in the nominal list, but it might need to be |
190 | * inserted there. |
191 | */ |
192 | assert(!tsd_in_nominal_list(tsd)); |
193 | tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); |
194 | if (new_state <= tsd_state_nominal_max) { |
195 | tsd_add_nominal(tsd); |
196 | } |
197 | } else { |
198 | /* |
199 | * We're currently nominal. If the new state is non-nominal, |
200 | * great; we take ourselves off the list and just enter the new |
201 | * state. |
202 | */ |
203 | assert(tsd_in_nominal_list(tsd)); |
204 | if (new_state > tsd_state_nominal_max) { |
205 | tsd_remove_nominal(tsd); |
206 | tsd_atomic_store(&tsd->state, new_state, |
207 | ATOMIC_RELAXED); |
208 | } else { |
209 | /* |
210 | * This is the tricky case. We're transitioning from |
211 | * one nominal state to another. The caller can't know |
212 | * about any races that are occurring at the same time, |
213 | * so we always have to recompute no matter what. |
214 | */ |
215 | tsd_slow_update(tsd); |
216 | } |
217 | } |
218 | te_recompute_fast_threshold(tsd); |
219 | } |
220 | |
221 | static void |
222 | tsd_prng_state_init(tsd_t *tsd) { |
223 | /* |
224 | * A nondeterministic seed based on the address of tsd reduces |
225 | * the likelihood of lockstep non-uniform cache index |
226 | * utilization among identical concurrent processes, but at the |
227 | * cost of test repeatability. For debug builds, instead use a |
228 | * deterministic seed. |
229 | */ |
230 | *tsd_prng_statep_get(tsd) = config_debug ? 0 : |
231 | (uint64_t)(uintptr_t)tsd; |
232 | } |
233 | |
234 | static bool |
235 | tsd_data_init(tsd_t *tsd) { |
236 | /* |
237 | * We initialize the rtree context first (before the tcache), since the |
238 | * tcache initialization depends on it. |
239 | */ |
240 | rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); |
241 | tsd_prng_state_init(tsd); |
242 | tsd_te_init(tsd); /* event_init may use the prng state above. */ |
243 | tsd_san_init(tsd); |
244 | return tsd_tcache_enabled_data_init(tsd); |
245 | } |
246 | |
247 | static void |
248 | assert_tsd_data_cleanup_done(tsd_t *tsd) { |
249 | assert(!tsd_nominal(tsd)); |
250 | assert(!tsd_in_nominal_list(tsd)); |
251 | assert(*tsd_arenap_get_unsafe(tsd) == NULL); |
252 | assert(*tsd_iarenap_get_unsafe(tsd) == NULL); |
253 | assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false); |
254 | assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL); |
255 | } |
256 | |
257 | static bool |
258 | tsd_data_init_nocleanup(tsd_t *tsd) { |
259 | assert(tsd_state_get(tsd) == tsd_state_reincarnated || |
260 | tsd_state_get(tsd) == tsd_state_minimal_initialized); |
261 | /* |
262 | * During reincarnation, there is no guarantee that the cleanup function |
263 | * will be called (deallocation may happen after all tsd destructors). |
264 | * We set up tsd in a way that no cleanup is needed. |
265 | */ |
266 | rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); |
267 | *tsd_tcache_enabledp_get_unsafe(tsd) = false; |
268 | *tsd_reentrancy_levelp_get(tsd) = 1; |
269 | tsd_prng_state_init(tsd); |
270 | tsd_te_init(tsd); /* event_init may use the prng state above. */ |
271 | tsd_san_init(tsd); |
272 | assert_tsd_data_cleanup_done(tsd); |
273 | |
274 | return false; |
275 | } |
276 | |
277 | tsd_t * |
278 | tsd_fetch_slow(tsd_t *tsd, bool minimal) { |
279 | assert(!tsd_fast(tsd)); |
280 | |
281 | if (tsd_state_get(tsd) == tsd_state_nominal_slow) { |
282 | /* |
283 | * On slow path but no work needed. Note that we can't |
284 | * necessarily *assert* that we're slow, because we might be |
285 | * slow because of an asynchronous modification to global state, |
286 | * which might be asynchronously modified *back*. |
287 | */ |
288 | } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { |
289 | tsd_slow_update(tsd); |
290 | } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { |
291 | if (!minimal) { |
292 | if (tsd_booted) { |
293 | tsd_state_set(tsd, tsd_state_nominal); |
294 | tsd_slow_update(tsd); |
295 | /* Trigger cleanup handler registration. */ |
296 | tsd_set(tsd); |
297 | tsd_data_init(tsd); |
298 | } |
299 | } else { |
300 | tsd_state_set(tsd, tsd_state_minimal_initialized); |
301 | tsd_set(tsd); |
302 | tsd_data_init_nocleanup(tsd); |
303 | } |
304 | } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { |
305 | if (!minimal) { |
306 | /* Switch to fully initialized. */ |
307 | tsd_state_set(tsd, tsd_state_nominal); |
308 | assert(*tsd_reentrancy_levelp_get(tsd) >= 1); |
309 | (*tsd_reentrancy_levelp_get(tsd))--; |
310 | tsd_slow_update(tsd); |
311 | tsd_data_init(tsd); |
312 | } else { |
313 | assert_tsd_data_cleanup_done(tsd); |
314 | } |
315 | } else if (tsd_state_get(tsd) == tsd_state_purgatory) { |
316 | tsd_state_set(tsd, tsd_state_reincarnated); |
317 | tsd_set(tsd); |
318 | tsd_data_init_nocleanup(tsd); |
319 | } else { |
320 | assert(tsd_state_get(tsd) == tsd_state_reincarnated); |
321 | } |
322 | |
323 | return tsd; |
324 | } |
325 | |
326 | void * |
327 | malloc_tsd_malloc(size_t size) { |
328 | return a0malloc(CACHELINE_CEILING(size)); |
329 | } |
330 | |
331 | void |
332 | malloc_tsd_dalloc(void *wrapper) { |
333 | a0dalloc(wrapper); |
334 | } |
335 | |
336 | #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) |
337 | static unsigned ncleanups; |
338 | static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; |
339 | |
340 | #ifndef _WIN32 |
341 | JEMALLOC_EXPORT |
342 | #endif |
343 | void |
344 | _malloc_thread_cleanup(void) { |
345 | bool pending[MALLOC_TSD_CLEANUPS_MAX], again; |
346 | unsigned i; |
347 | |
348 | for (i = 0; i < ncleanups; i++) { |
349 | pending[i] = true; |
350 | } |
351 | |
352 | do { |
353 | again = false; |
354 | for (i = 0; i < ncleanups; i++) { |
355 | if (pending[i]) { |
356 | pending[i] = cleanups[i](); |
357 | if (pending[i]) { |
358 | again = true; |
359 | } |
360 | } |
361 | } |
362 | } while (again); |
363 | } |
364 | |
365 | #ifndef _WIN32 |
366 | JEMALLOC_EXPORT |
367 | #endif |
368 | void |
369 | _malloc_tsd_cleanup_register(bool (*f)(void)) { |
370 | assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); |
371 | cleanups[ncleanups] = f; |
372 | ncleanups++; |
373 | } |
374 | |
375 | #endif |
376 | |
377 | static void |
378 | tsd_do_data_cleanup(tsd_t *tsd) { |
379 | prof_tdata_cleanup(tsd); |
380 | iarena_cleanup(tsd); |
381 | arena_cleanup(tsd); |
382 | tcache_cleanup(tsd); |
383 | witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd)); |
384 | *tsd_reentrancy_levelp_get(tsd) = 1; |
385 | } |
386 | |
387 | void |
388 | tsd_cleanup(void *arg) { |
389 | tsd_t *tsd = (tsd_t *)arg; |
390 | |
391 | switch (tsd_state_get(tsd)) { |
392 | case tsd_state_uninitialized: |
393 | /* Do nothing. */ |
394 | break; |
395 | case tsd_state_minimal_initialized: |
396 | /* This implies the thread only did free() in its life time. */ |
397 | /* Fall through. */ |
398 | case tsd_state_reincarnated: |
399 | /* |
400 | * Reincarnated means another destructor deallocated memory |
401 | * after the destructor was called. Cleanup isn't required but |
402 | * is still called for testing and completeness. |
403 | */ |
404 | assert_tsd_data_cleanup_done(tsd); |
405 | JEMALLOC_FALLTHROUGH; |
406 | case tsd_state_nominal: |
407 | case tsd_state_nominal_slow: |
408 | tsd_do_data_cleanup(tsd); |
409 | tsd_state_set(tsd, tsd_state_purgatory); |
410 | tsd_set(tsd); |
411 | break; |
412 | case tsd_state_purgatory: |
413 | /* |
414 | * The previous time this destructor was called, we set the |
415 | * state to tsd_state_purgatory so that other destructors |
416 | * wouldn't cause re-creation of the tsd. This time, do |
417 | * nothing, and do not request another callback. |
418 | */ |
419 | break; |
420 | default: |
421 | not_reached(); |
422 | } |
423 | #ifdef JEMALLOC_JET |
424 | test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); |
425 | int *data = tsd_test_datap_get_unsafe(tsd); |
426 | if (test_callback != NULL) { |
427 | test_callback(data); |
428 | } |
429 | #endif |
430 | } |
431 | |
432 | tsd_t * |
433 | malloc_tsd_boot0(void) { |
434 | tsd_t *tsd; |
435 | |
436 | #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) |
437 | ncleanups = 0; |
438 | #endif |
439 | if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock" , |
440 | WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { |
441 | return NULL; |
442 | } |
443 | if (tsd_boot0()) { |
444 | return NULL; |
445 | } |
446 | tsd = tsd_fetch(); |
447 | return tsd; |
448 | } |
449 | |
450 | void |
451 | malloc_tsd_boot1(void) { |
452 | tsd_boot1(); |
453 | tsd_t *tsd = tsd_fetch(); |
454 | /* malloc_slow has been set properly. Update tsd_slow. */ |
455 | tsd_slow_update(tsd); |
456 | } |
457 | |
458 | #ifdef _WIN32 |
459 | static BOOL WINAPI |
460 | _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { |
461 | switch (fdwReason) { |
462 | #ifdef JEMALLOC_LAZY_LOCK |
463 | case DLL_THREAD_ATTACH: |
464 | isthreaded = true; |
465 | break; |
466 | #endif |
467 | case DLL_THREAD_DETACH: |
468 | _malloc_thread_cleanup(); |
469 | break; |
470 | default: |
471 | break; |
472 | } |
473 | return true; |
474 | } |
475 | |
476 | /* |
477 | * We need to be able to say "read" here (in the "pragma section"), but have |
478 | * hooked "read". We won't read for the rest of the file, so we can get away |
479 | * with unhooking. |
480 | */ |
481 | #ifdef read |
482 | # undef read |
483 | #endif |
484 | |
485 | #ifdef _MSC_VER |
486 | # ifdef _M_IX86 |
487 | # pragma comment(linker, "/INCLUDE:__tls_used") |
488 | # pragma comment(linker, "/INCLUDE:_tls_callback") |
489 | # else |
490 | # pragma comment(linker, "/INCLUDE:_tls_used") |
491 | # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) |
492 | # endif |
493 | # pragma section(".CRT$XLY",long,read) |
494 | #endif |
495 | JEMALLOC_SECTION(".CRT$XLY" ) JEMALLOC_ATTR(used) |
496 | BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, |
497 | DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; |
498 | #endif |
499 | |
500 | #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ |
501 | !defined(_WIN32)) |
502 | void * |
503 | tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) { |
504 | pthread_t self = pthread_self(); |
505 | tsd_init_block_t *iter; |
506 | |
507 | /* Check whether this thread has already inserted into the list. */ |
508 | malloc_mutex_lock(TSDN_NULL, &head->lock); |
509 | ql_foreach(iter, &head->blocks, link) { |
510 | if (iter->thread == self) { |
511 | malloc_mutex_unlock(TSDN_NULL, &head->lock); |
512 | return iter->data; |
513 | } |
514 | } |
515 | /* Insert block into list. */ |
516 | ql_elm_new(block, link); |
517 | block->thread = self; |
518 | ql_tail_insert(&head->blocks, block, link); |
519 | malloc_mutex_unlock(TSDN_NULL, &head->lock); |
520 | return NULL; |
521 | } |
522 | |
523 | void |
524 | tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { |
525 | malloc_mutex_lock(TSDN_NULL, &head->lock); |
526 | ql_remove(&head->blocks, block, link); |
527 | malloc_mutex_unlock(TSDN_NULL, &head->lock); |
528 | } |
529 | #endif |
530 | |
531 | void |
532 | tsd_prefork(tsd_t *tsd) { |
533 | malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
534 | } |
535 | |
536 | void |
537 | tsd_postfork_parent(tsd_t *tsd) { |
538 | malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
539 | } |
540 | |
541 | void |
542 | tsd_postfork_child(tsd_t *tsd) { |
543 | malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); |
544 | ql_new(&tsd_nominal_tsds); |
545 | |
546 | if (tsd_state_get(tsd) <= tsd_state_nominal_max) { |
547 | tsd_add_nominal(tsd); |
548 | } |
549 | } |
550 | |