prof_data.c source code [jemalloc/src/prof_data.c]

1	#include "jemalloc/internal/jemalloc_preamble.h"
2	#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4	#include "jemalloc/internal/assert.h"
5	#include "jemalloc/internal/ckh.h"
6	#include "jemalloc/internal/hash.h"
7	#include "jemalloc/internal/malloc_io.h"
8	#include "jemalloc/internal/prof_data.h"
9
10	/*
11	* This file defines and manages the core profiling data structures.
12	*
13	* Conceptually, profiling data can be imagined as a table with three columns:
14	* thread, stack trace, and current allocation size. (When prof_accum is on,
15	* there's one additional column which is the cumulative allocation size.)
16	*
17	* Implementation wise, each thread maintains a hash recording the stack trace
18	* to allocation size correspondences, which are basically the individual rows
19	* in the table. In addition, two global "indices" are built to make data
20	* aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
21	* the "grouped by stack trace" and "grouped by thread" views of the same table,
22	* respectively. Note that the allocation size is only aggregated to the two
23	* indices at dumping time, so as to optimize for performance.
24	*/
25
26	/****************************************************************************/
27
28	malloc_mutex_t bt2gctx_mtx;
29	malloc_mutex_t tdatas_mtx;
30	malloc_mutex_t prof_dump_mtx;
31
32	/*
33	* Table of mutexes that are shared among gctx's. These are leaf locks, so
34	* there is no problem with using them for more than one gctx at the same time.
35	* The primary motivation for this sharing though is that gctx's are ephemeral,
36	* and destroying mutexes causes complications for systems that allocate when
37	* creating/destroying mutexes.
38	*/
39	malloc_mutex_t *gctx_locks;
40	static atomic_u_t cum_gctxs; / Atomic counter. /
41
42	/*
43	* Table of mutexes that are shared among tdata's. No operations require
44	* holding multiple tdata locks, so there is no problem with using them for more
45	* than one tdata at the same time, even though a gctx lock may be acquired
46	* while holding a tdata lock.
47	*/
48	malloc_mutex_t *tdata_locks;
49
50	/*
51	* Global hash of (prof_bt_t )-->(prof_gctx_t ). This is the master data
52	* structure that knows about all backtraces currently captured.
53	*/
54	static ckh_t bt2gctx;
55
56	/*
57	* Tree of all extant prof_tdata_t structures, regardless of state,
58	* {attached,detached,expired}.
59	*/
60	static prof_tdata_tree_t tdatas;
61
62	size_t prof_unbiased_sz[PROF_SC_NSIZES];
63	size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
64
65	/****************************************************************************/
66	/ Red-black trees. /
67
68	static int
69	prof_tctx_comp(const prof_tctx_t a, const* prof_tctx_t *b) {
70	uint64_t a_thr_uid = a->thr_uid;
71	uint64_t b_thr_uid = b->thr_uid;
72	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
73	if (ret == `0`) {
74	uint64_t a_thr_discrim = a->thr_discrim;
75	uint64_t b_thr_discrim = b->thr_discrim;
76	ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
77	b_thr_discrim);
78	if (ret == `0`) {
79	uint64_t a_tctx_uid = a->tctx_uid;
80	uint64_t b_tctx_uid = b->tctx_uid;
81	ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
82	b_tctx_uid);
83	}
84	}
85	return ret;
86	}
87
88	rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
89	tctx_link, prof_tctx_comp)
90
91	static int
92	prof_gctx_comp(const prof_gctx_t a, const* prof_gctx_t *b) {
93	unsigned a_len = a->bt.len;
94	unsigned b_len = b->bt.len;
95	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
96	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
97	if (ret == `0`) {
98	ret = (a_len > b_len) - (a_len < b_len);
99	}
100	return ret;
101	}
102
103	rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
104	prof_gctx_comp)
105
106	static int
107	prof_tdata_comp(const prof_tdata_t a, const* prof_tdata_t *b) {
108	int ret;
109	uint64_t a_uid = a->thr_uid;
110	uint64_t b_uid = b->thr_uid;
111
112	ret = ((a_uid > b_uid) - (a_uid < b_uid));
113	if (ret == `0`) {
114	uint64_t a_discrim = a->thr_discrim;
115	uint64_t b_discrim = b->thr_discrim;
116
117	ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
118	}
119	return ret;
120	}
121
122	rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
123	prof_tdata_comp)
124
125	/****************************************************************************/
126
127	static malloc_mutex_t *
128	prof_gctx_mutex_choose(void) {
129	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, `1`, ATOMIC_RELAXED);
130
131	return &gctx_locks[(ngctxs - `1`) % PROF_NCTX_LOCKS];
132	}
133
134	static malloc_mutex_t *
135	prof_tdata_mutex_choose(uint64_t thr_uid) {
136	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
137	}
138
139	bool
140	prof_data_init(tsd_t *tsd) {
141	tdata_tree_new(&tdatas);
142	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
143	prof_bt_hash, prof_bt_keycomp);
144	}
145
146	static void
147	prof_enter(tsd_t tsd, prof_tdata_t tdata) {
148	cassert(config_prof);
149	assert(tdata == prof_tdata_get(tsd, false));
150
151	if (tdata != NULL) {
152	assert(!tdata->enq);
153	tdata->enq = true;
154	}
155
156	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
157	}
158
159	static void
160	prof_leave(tsd_t tsd, prof_tdata_t tdata) {
161	cassert(config_prof);
162	assert(tdata == prof_tdata_get(tsd, false));
163
164	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
165
166	if (tdata != NULL) {
167	bool idump, gdump;
168
169	assert(tdata->enq);
170	tdata->enq = false;
171	idump = tdata->enq_idump;
172	tdata->enq_idump = false;
173	gdump = tdata->enq_gdump;
174	tdata->enq_gdump = false;
175
176	if (idump) {
177	prof_idump(tsd_tsdn(tsd));
178	}
179	if (gdump) {
180	prof_gdump(tsd_tsdn(tsd));
181	}
182	}
183	}
184
185	static prof_gctx_t *
186	prof_gctx_create(tsdn_t tsdn, prof_bt_t bt) {
187	/*
188	* Create a single allocation that has space for vec of length bt->len.
189	*/
190	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
191	prof_gctx_t gctx = (prof_gctx_t )iallocztm(tsdn, size,
192	sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, `0`, true),
193	true);
194	if (gctx == NULL) {
195	return NULL;
196	}
197	gctx->lock = prof_gctx_mutex_choose();
198	/*
199	* Set nlimbo to 1, in order to avoid a race condition with
200	* prof_tctx_destroy()/prof_gctx_try_destroy().
201	*/
202	gctx->nlimbo = `1`;
203	tctx_tree_new(&gctx->tctxs);
204	/ Duplicate bt. /
205	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
206	gctx->bt.vec = gctx->vec;
207	gctx->bt.len = bt->len;
208	return gctx;
209	}
210
211	static void
212	prof_gctx_try_destroy(tsd_t tsd, prof_tdata_t tdata_self,
213	prof_gctx_t *gctx) {
214	cassert(config_prof);
215
216	/*
217	* Check that gctx is still unused by any thread cache before destroying
218	* it. prof_lookup() increments gctx->nlimbo in order to avoid a race
219	* condition with this function, as does prof_tctx_destroy() in order to
220	* avoid a race between the main body of prof_tctx_destroy() and entry
221	* into this function.
222	*/
223	prof_enter(tsd, tdata_self);
224	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
225	assert(gctx->nlimbo != `0`);
226	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == `1`) {
227	/ Remove gctx from bt2gctx. /
228	if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
229	not_reached();
230	}
231	prof_leave(tsd, tdata_self);
232	/ Destroy gctx. /
233	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
234	idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
235	} else {
236	/*
237	* Compensate for increment in prof_tctx_destroy() or
238	* prof_lookup().
239	*/
240	gctx->nlimbo--;
241	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
242	prof_leave(tsd, tdata_self);
243	}
244	}
245
246	static bool
247	prof_gctx_should_destroy(prof_gctx_t *gctx) {
248	if (opt_prof_accum) {
249	return false;
250	}
251	if (!tctx_tree_empty(&gctx->tctxs)) {
252	return false;
253	}
254	if (gctx->nlimbo != `0`) {
255	return false;
256	}
257	return true;
258	}
259
260	static bool
261	prof_lookup_global(tsd_t tsd, prof_bt_t bt, prof_tdata_t *tdata,
262	void p_btkey, prof_gctx_t p_gctx, bool *p_new_gctx) {
263	union {
264	prof_gctx_t *p;
265	void *v;
266	} gctx, tgctx;
267	union {
268	prof_bt_t *p;
269	void *v;
270	} btkey;
271	bool new_gctx;
272
273	prof_enter(tsd, tdata);
274	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
275	/ bt has never been seen before. Insert it. /
276	prof_leave(tsd, tdata);
277	tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
278	if (tgctx.v == NULL) {
279	return true;
280	}
281	prof_enter(tsd, tdata);
282	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
283	gctx.p = tgctx.p;
284	btkey.p = &gctx.p->bt;
285	if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
286	/ OOM. /
287	prof_leave(tsd, tdata);
288	idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
289	true, true);
290	return true;
291	}
292	new_gctx = true;
293	} else {
294	new_gctx = false;
295	}
296	} else {
297	tgctx.v = NULL;
298	new_gctx = false;
299	}
300
301	if (!new_gctx) {
302	/*
303	* Increment nlimbo, in order to avoid a race condition with
304	* prof_tctx_destroy()/prof_gctx_try_destroy().
305	*/
306	malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
307	gctx.p->nlimbo++;
308	malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
309	new_gctx = false;
310
311	if (tgctx.v != NULL) {
312	/ Lost race to insert. /
313	idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
314	true);
315	}
316	}
317	prof_leave(tsd, tdata);
318
319	*p_btkey = btkey.v;
320	*p_gctx = gctx.p;
321	*p_new_gctx = new_gctx;
322	return false;
323	}
324
325	prof_tctx_t *
326	prof_lookup(tsd_t tsd, prof_bt_t bt) {
327	union {
328	prof_tctx_t *p;
329	void *v;
330	} ret;
331	prof_tdata_t *tdata;
332	bool not_found;
333
334	cassert(config_prof);
335
336	tdata = prof_tdata_get(tsd, false);
337	assert(tdata != NULL);
338
339	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
340	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
341	if (!not_found) { / Note double negative! /
342	ret.p->prepared = true;
343	}
344	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
345	if (not_found) {
346	void *btkey;
347	prof_gctx_t *gctx;
348	bool new_gctx, error;
349
350	/*
351	* This thread's cache lacks bt. Look for it in the global
352	* cache.
353	*/
354	if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
355	&new_gctx)) {
356	return NULL;
357	}
358
359	/ Link a prof_tctx_t into gctx for this thread. /
360	ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
361	sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
362	arena_ichoose(tsd, NULL), true);
363	if (ret.p == NULL) {
364	if (new_gctx) {
365	prof_gctx_try_destroy(tsd, tdata, gctx);
366	}
367	return NULL;
368	}
369	ret.p->tdata = tdata;
370	ret.p->thr_uid = tdata->thr_uid;
371	ret.p->thr_discrim = tdata->thr_discrim;
372	ret.p->recent_count = `0`;
373	memset(&ret.p->cnts, `0`, sizeof(prof_cnt_t));
374	ret.p->gctx = gctx;
375	ret.p->tctx_uid = tdata->tctx_uid_next++;
376	ret.p->prepared = true;
377	ret.p->state = prof_tctx_state_initializing;
378	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
379	error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
380	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
381	if (error) {
382	if (new_gctx) {
383	prof_gctx_try_destroy(tsd, tdata, gctx);
384	}
385	idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
386	return NULL;
387	}
388	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
389	ret.p->state = prof_tctx_state_nominal;
390	tctx_tree_insert(&gctx->tctxs, ret.p);
391	gctx->nlimbo--;
392	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
393	}
394
395	return ret.p;
396	}
397
398	/ Used in unit tests. /
399	static prof_tdata_t *
400	prof_tdata_count_iter(prof_tdata_tree_t tdatas_ptr, prof_tdata_t tdata,
401	void *arg) {
402	size_t tdata_count = (size_t )arg;
403
404	(*tdata_count)++;
405
406	return NULL;
407	}
408
409	/ Used in unit tests. /
410	size_t
411	prof_tdata_count(void) {
412	size_t tdata_count = `0`;
413	tsdn_t *tsdn;
414
415	tsdn = tsdn_fetch();
416	malloc_mutex_lock(tsdn, &tdatas_mtx);
417	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
418	(void *)&tdata_count);
419	malloc_mutex_unlock(tsdn, &tdatas_mtx);
420
421	return tdata_count;
422	}
423
424	/ Used in unit tests. /
425	size_t
426	prof_bt_count(void) {
427	size_t bt_count;
428	tsd_t *tsd;
429	prof_tdata_t *tdata;
430
431	tsd = tsd_fetch();
432	tdata = prof_tdata_get(tsd, false);
433	if (tdata == NULL) {
434	return `0`;
435	}
436
437	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
438	bt_count = ckh_count(&bt2gctx);
439	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
440
441	return bt_count;
442	}
443
444	char *
445	prof_thread_name_alloc(tsd_t tsd, const* char *thread_name) {
446	char *ret;
447	size_t size;
448
449	if (thread_name == NULL) {
450	return NULL;
451	}
452
453	size = strlen(thread_name) + `1`;
454	if (size == `1`) {
455	return "";
456	}
457
458	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
459	true, arena_get(TSDN_NULL, `0`, true), true);
460	if (ret == NULL) {
461	return NULL;
462	}
463	memcpy(ret, thread_name, size);
464	return ret;
465	}
466
467	int
468	prof_thread_name_set_impl(tsd_t tsd, const* char *thread_name) {
469	assert(tsd_reentrancy_level_get(tsd) == `0`);
470
471	prof_tdata_t *tdata;
472	unsigned i;
473	char *s;
474
475	tdata = prof_tdata_get(tsd, true);
476	if (tdata == NULL) {
477	return EAGAIN;
478	}
479
480	/ Validate input. /
481	if (thread_name == NULL) {
482	return EFAULT;
483	}
484	for (i = `0`; thread_name[i] != `'\0'`; i++) {
485	char c = thread_name[i];
486	if (!isgraph(c) && !isblank(c)) {
487	return EFAULT;
488	}
489	}
490
491	s = prof_thread_name_alloc(tsd, thread_name);
492	if (s == NULL) {
493	return EAGAIN;
494	}
495
496	if (tdata->thread_name != NULL) {
497	idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
498	true);
499	tdata->thread_name = NULL;
500	}
501	if (strlen(s) > `0`) {
502	tdata->thread_name = s;
503	}
504	return `0`;
505	}
506
507	JEMALLOC_FORMAT_PRINTF(`3`, `4`)
508	static void
509	prof_dump_printf(write_cb_t prof_dump_write, void* *cbopaque,
510	const char *format, ...) {
511	va_list ap;
512	char buf[PROF_PRINTF_BUFSIZE];
513
514	va_start(ap, format);
515	malloc_vsnprintf(buf, sizeof(buf), format, ap);
516	va_end(ap);
517	prof_dump_write(cbopaque, buf);
518	}
519
520	/*
521	* Casting a double to a uint64_t may not necessarily be in range; this can be
522	* UB. I don't think this is practically possible with the cur counters, but
523	* plausibly could be with the accum counters.
524	*/
525	#ifdef JEMALLOC_PROF
526	static uint64_t
527	prof_double_uint64_cast(double d) {
528	/*
529	* Note: UINT64_MAX + 1 is exactly representable as a double on all
530	* reasonable platforms (certainly those we'll support). Writing this
531	* as !(a < b) instead of (a >= b) means that we're NaN-safe.
532	*/
533	double rounded = round(d);
534	if (!(rounded < (double)UINT64_MAX)) {
535	return UINT64_MAX;
536	}
537	return (uint64_t)rounded;
538	}
539	#endif
540
541	void prof_unbias_map_init() {
542	/ See the comment in prof_sample_new_event_wait /
543	#ifdef JEMALLOC_PROF
544	for (szind_t i = `0`; i < SC_NSIZES; i++) {
545	double sz = (double)sz_index2size(i);
546	double rate = (double)(ZU(`1`) << lg_prof_sample);
547	double div_val = `1.0` - exp(-sz / rate);
548	double unbiased_sz = sz / div_val;
549	/*
550	* The "true" right value for the unbiased count is
551	* 1.0/(1 - exp(-sz/rate)). The problem is, we keep the counts
552	* as integers (for a variety of reasons -- rounding errors
553	* could trigger asserts, and not all libcs can properly handle
554	* floating point arithmetic during malloc calls inside libc).
555	* Rounding to an integer, though, can lead to rounding errors
556	* of over 30% for sizes close to the sampling rate. So
557	* instead, we multiply by a constant, dividing the maximum
558	* possible roundoff error by that constant. To avoid overflow
559	* in summing up size_t values, the largest safe constant we can
560	* pick is the size of the smallest allocation.
561	*/
562	double cnt_shift = (double)(ZU(`1`) << SC_LG_TINY_MIN);
563	double shifted_unbiased_cnt = cnt_shift / div_val;
564	prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
565	prof_shifted_unbiased_cnt[i] = (size_t)round(
566	shifted_unbiased_cnt);
567	}
568	#else
569	unreachable();
570	#endif
571	}
572
573	/*
574	* The unbiasing story is long. The jeprof unbiasing logic was copied from
575	* pprof. Both shared an issue: they unbiased using the average size of the
576	* allocations at a particular stack trace. This can work out OK if allocations
577	* are mostly of the same size given some stack, but not otherwise. We now
578	* internally track what the unbiased results ought to be. We can't just report
579	* them as they are though; they'll still go through the jeprof unbiasing
580	* process. Instead, we figure out what values we can feed into jeprof's
581	* unbiasing mechanism that will lead to getting the right values out.
582	*
583	* It'll unbias count and aggregate size as:
584	*
585	* c_out = c_in * 1/(1-exp(-s_in/c_in/R)
586	* s_out = s_in * 1/(1-exp(-s_in/c_in/R)
587	*
588	* We want to solve for the values of c_in and s_in that will
589	* give the c_out and s_out that we've computed internally.
590	*
591	* Let's do a change of variables (both to make the math easier and to make it
592	* easier to write):
593	* x = s_in / c_in
594	* y = s_in
595	* k = 1/R.
596	*
597	* Then
598	* c_out = y/x * 1/(1-exp(-k*x))
599	* s_out = y * 1/(1-exp(-k*x))
600	*
601	* The first equation gives:
602	* y = x * c_out * (1-exp(-k*x))
603	* The second gives:
604	* y = s_out * (1-exp(-k*x))
605	* So we have
606	* x = s_out / c_out.
607	* And all the other values fall out from that.
608	*
609	* This is all a fair bit of work. The thing we get out of it is that we don't
610	* break backwards compatibility with jeprof (and the various tools that have
611	* copied its unbiasing logic). Eventually, we anticipate a v3 heap profile
612	* dump format based on JSON, at which point I think much of this logic can get
613	* cleaned up (since we'll be taking a compatibility break there anyways).
614	*/
615	static void
616	prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
617	uint64_t *r_s_in) {
618	#ifdef JEMALLOC_PROF
619	if (c_out_shifted_i == `0` \|\| s_out_i == `0`) {
620	*r_c_in = `0`;
621	*r_s_in = `0`;
622	return;
623	}
624	/*
625	* See the note in prof_unbias_map_init() to see why we take c_out in a
626	* shifted form.
627	*/
628	double c_out = (double)c_out_shifted_i
629	/ (double)(ZU(`1`) << SC_LG_TINY_MIN);
630	double s_out = (double)s_out_i;
631	double R = (double)(ZU(`1`) << lg_prof_sample);
632
633	double x = s_out / c_out;
634	double y = s_out * (`1.0` - exp(-x / R));
635
636	double c_in = y / x;
637	double s_in = y;
638
639	*r_c_in = prof_double_uint64_cast(c_in);
640	*r_s_in = prof_double_uint64_cast(s_in);
641	#else
642	unreachable();
643	#endif
644	}
645
646	static void
647	prof_dump_print_cnts(write_cb_t prof_dump_write, void* *cbopaque,
648	const prof_cnt_t *cnts) {
649	uint64_t curobjs;
650	uint64_t curbytes;
651	uint64_t accumobjs;
652	uint64_t accumbytes;
653	if (opt_prof_unbias) {
654	prof_do_unbias(cnts->curobjs_shifted_unbiased,
655	cnts->curbytes_unbiased, &curobjs, &curbytes);
656	prof_do_unbias(cnts->accumobjs_shifted_unbiased,
657	cnts->accumbytes_unbiased, &accumobjs, &accumbytes);
658	} else {
659	curobjs = cnts->curobjs;
660	curbytes = cnts->curbytes;
661	accumobjs = cnts->accumobjs;
662	accumbytes = cnts->accumbytes;
663	}
664	prof_dump_printf(prof_dump_write, cbopaque,
665	"%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
666	curobjs, curbytes, accumobjs, accumbytes);
667	}
668
669	static void
670	prof_tctx_merge_tdata(tsdn_t tsdn, prof_tctx_t tctx, prof_tdata_t *tdata) {
671	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
672
673	malloc_mutex_lock(tsdn, tctx->gctx->lock);
674
675	switch (tctx->state) {
676	case prof_tctx_state_initializing:
677	malloc_mutex_unlock(tsdn, tctx->gctx->lock);
678	return;
679	case prof_tctx_state_nominal:
680	tctx->state = prof_tctx_state_dumping;
681	malloc_mutex_unlock(tsdn, tctx->gctx->lock);
682
683	memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
684
685	tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
686	tdata->cnt_summed.curobjs_shifted_unbiased
687	+= tctx->dump_cnts.curobjs_shifted_unbiased;
688	tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
689	tdata->cnt_summed.curbytes_unbiased
690	+= tctx->dump_cnts.curbytes_unbiased;
691	if (opt_prof_accum) {
692	tdata->cnt_summed.accumobjs +=
693	tctx->dump_cnts.accumobjs;
694	tdata->cnt_summed.accumobjs_shifted_unbiased +=
695	tctx->dump_cnts.accumobjs_shifted_unbiased;
696	tdata->cnt_summed.accumbytes +=
697	tctx->dump_cnts.accumbytes;
698	tdata->cnt_summed.accumbytes_unbiased +=
699	tctx->dump_cnts.accumbytes_unbiased;
700	}
701	break;
702	case prof_tctx_state_dumping:
703	case prof_tctx_state_purgatory:
704	not_reached();
705	}
706	}
707
708	static void
709	prof_tctx_merge_gctx(tsdn_t tsdn, prof_tctx_t tctx, prof_gctx_t *gctx) {
710	malloc_mutex_assert_owner(tsdn, gctx->lock);
711
712	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
713	gctx->cnt_summed.curobjs_shifted_unbiased
714	+= tctx->dump_cnts.curobjs_shifted_unbiased;
715	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
716	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
717	if (opt_prof_accum) {
718	gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
719	gctx->cnt_summed.accumobjs_shifted_unbiased
720	+= tctx->dump_cnts.accumobjs_shifted_unbiased;
721	gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
722	gctx->cnt_summed.accumbytes_unbiased
723	+= tctx->dump_cnts.accumbytes_unbiased;
724	}
725	}
726
727	static prof_tctx_t *
728	prof_tctx_merge_iter(prof_tctx_tree_t tctxs, prof_tctx_t tctx, void *arg) {
729	tsdn_t tsdn = (tsdn_t )arg;
730
731	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
732
733	switch (tctx->state) {
734	case prof_tctx_state_nominal:
735	/ New since dumping started; ignore. /
736	break;
737	case prof_tctx_state_dumping:
738	case prof_tctx_state_purgatory:
739	prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
740	break;
741	default:
742	not_reached();
743	}
744
745	return NULL;
746	}
747
748	typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
749	struct prof_dump_iter_arg_s {
750	tsdn_t *tsdn;
751	write_cb_t *prof_dump_write;
752	void *cbopaque;
753	};
754
755	static prof_tctx_t *
756	prof_tctx_dump_iter(prof_tctx_tree_t tctxs, prof_tctx_t tctx, void *opaque) {
757	prof_dump_iter_arg_t arg = (prof_dump_iter_arg_t )opaque;
758	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
759
760	switch (tctx->state) {
761	case prof_tctx_state_initializing:
762	case prof_tctx_state_nominal:
763	/ Not captured by this dump. /
764	break;
765	case prof_tctx_state_dumping:
766	case prof_tctx_state_purgatory:
767	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
768	" t%"FMTu64": ", tctx->thr_uid);
769	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
770	&tctx->dump_cnts);
771	arg->prof_dump_write(arg->cbopaque, "\n");
772	break;
773	default:
774	not_reached();
775	}
776	return NULL;
777	}
778
779	static prof_tctx_t *
780	prof_tctx_finish_iter(prof_tctx_tree_t tctxs, prof_tctx_t tctx, void *arg) {
781	tsdn_t tsdn = (tsdn_t )arg;
782	prof_tctx_t *ret;
783
784	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
785
786	switch (tctx->state) {
787	case prof_tctx_state_nominal:
788	/ New since dumping started; ignore. /
789	break;
790	case prof_tctx_state_dumping:
791	tctx->state = prof_tctx_state_nominal;
792	break;
793	case prof_tctx_state_purgatory:
794	ret = tctx;
795	goto label_return;
796	default:
797	not_reached();
798	}
799
800	ret = NULL;
801	label_return:
802	return ret;
803	}
804
805	static void
806	prof_dump_gctx_prep(tsdn_t tsdn, prof_gctx_t gctx, prof_gctx_tree_t *gctxs) {
807	cassert(config_prof);
808
809	malloc_mutex_lock(tsdn, gctx->lock);
810
811	/*
812	* Increment nlimbo so that gctx won't go away before dump.
813	* Additionally, link gctx into the dump list so that it is included in
814	* prof_dump()'s second pass.
815	*/
816	gctx->nlimbo++;
817	gctx_tree_insert(gctxs, gctx);
818
819	memset(&gctx->cnt_summed, `0`, sizeof(prof_cnt_t));
820
821	malloc_mutex_unlock(tsdn, gctx->lock);
822	}
823
824	typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
825	struct prof_gctx_merge_iter_arg_s {
826	tsdn_t *tsdn;
827	size_t *leak_ngctx;
828	};
829
830	static prof_gctx_t *
831	prof_gctx_merge_iter(prof_gctx_tree_t gctxs, prof_gctx_t gctx, void *opaque) {
832	prof_gctx_merge_iter_arg_t arg = (prof_gctx_merge_iter_arg_t )opaque;
833
834	malloc_mutex_lock(arg->tsdn, gctx->lock);
835	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
836	(void *)arg->tsdn);
837	if (gctx->cnt_summed.curobjs != `0`) {
838	(*arg->leak_ngctx)++;
839	}
840	malloc_mutex_unlock(arg->tsdn, gctx->lock);
841
842	return NULL;
843	}
844
845	static void
846	prof_gctx_finish(tsd_t tsd, prof_gctx_tree_t gctxs) {
847	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
848	prof_gctx_t *gctx;
849
850	/*
851	* Standard tree iteration won't work here, because as soon as we
852	* decrement gctx->nlimbo and unlock gctx, another thread can
853	* concurrently destroy it, which will corrupt the tree. Therefore,
854	* tear down the tree one node at a time during iteration.
855	*/
856	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
857	gctx_tree_remove(gctxs, gctx);
858	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
859	{
860	prof_tctx_t *next;
861
862	next = NULL;
863	do {
864	prof_tctx_t *to_destroy =
865	tctx_tree_iter(&gctx->tctxs, next,
866	prof_tctx_finish_iter,
867	(void *)tsd_tsdn(tsd));
868	if (to_destroy != NULL) {
869	next = tctx_tree_next(&gctx->tctxs,
870	to_destroy);
871	tctx_tree_remove(&gctx->tctxs,
872	to_destroy);
873	idalloctm(tsd_tsdn(tsd), to_destroy,
874	NULL, NULL, true, true);
875	} else {
876	next = NULL;
877	}
878	} while (next != NULL);
879	}
880	gctx->nlimbo--;
881	if (prof_gctx_should_destroy(gctx)) {
882	gctx->nlimbo++;
883	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
884	prof_gctx_try_destroy(tsd, tdata, gctx);
885	} else {
886	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
887	}
888	}
889	}
890
891	typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
892	struct prof_tdata_merge_iter_arg_s {
893	tsdn_t *tsdn;
894	prof_cnt_t *cnt_all;
895	};
896
897	static prof_tdata_t *
898	prof_tdata_merge_iter(prof_tdata_tree_t tdatas_ptr, prof_tdata_t tdata,
899	void *opaque) {
900	prof_tdata_merge_iter_arg_t *arg =
901	(prof_tdata_merge_iter_arg_t *)opaque;
902
903	malloc_mutex_lock(arg->tsdn, tdata->lock);
904	if (!tdata->expired) {
905	size_t tabind;
906	union {
907	prof_tctx_t *p;
908	void *v;
909	} tctx;
910
911	tdata->dumping = true;
912	memset(&tdata->cnt_summed, `0`, sizeof(prof_cnt_t));
913	for (tabind = `0`; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
914	&tctx.v);) {
915	prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
916	}
917
918	arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
919	arg->cnt_all->curobjs_shifted_unbiased
920	+= tdata->cnt_summed.curobjs_shifted_unbiased;
921	arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
922	arg->cnt_all->curbytes_unbiased
923	+= tdata->cnt_summed.curbytes_unbiased;
924	if (opt_prof_accum) {
925	arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
926	arg->cnt_all->accumobjs_shifted_unbiased
927	+= tdata->cnt_summed.accumobjs_shifted_unbiased;
928	arg->cnt_all->accumbytes +=
929	tdata->cnt_summed.accumbytes;
930	arg->cnt_all->accumbytes_unbiased +=
931	tdata->cnt_summed.accumbytes_unbiased;
932	}
933	} else {
934	tdata->dumping = false;
935	}
936	malloc_mutex_unlock(arg->tsdn, tdata->lock);
937
938	return NULL;
939	}
940
941	static prof_tdata_t *
942	prof_tdata_dump_iter(prof_tdata_tree_t tdatas_ptr, prof_tdata_t tdata,
943	void *opaque) {
944	if (!tdata->dumping) {
945	return NULL;
946	}
947
948	prof_dump_iter_arg_t arg = (prof_dump_iter_arg_t )opaque;
949	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, " t%"FMTu64": ",
950	tdata->thr_uid);
951	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
952	&tdata->cnt_summed);
953	if (tdata->thread_name != NULL) {
954	arg->prof_dump_write(arg->cbopaque, " ");
955	arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
956	}
957	arg->prof_dump_write(arg->cbopaque, "\n");
958	return NULL;
959	}
960
961	static void
962	prof_dump_header(prof_dump_iter_arg_t arg, const* prof_cnt_t *cnt_all) {
963	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
964	"heap_v2/%"FMTu64"\n t*: ", ((uint64_t)`1U` << lg_prof_sample));
965	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
966	arg->prof_dump_write(arg->cbopaque, "\n");
967
968	malloc_mutex_lock(arg->tsdn, &tdatas_mtx);
969	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
970	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
971	}
972
973	static void
974	prof_dump_gctx(prof_dump_iter_arg_t arg, prof_gctx_t gctx,
975	const prof_bt_t bt, prof_gctx_tree_t gctxs) {
976	cassert(config_prof);
977	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
978
979	/ Avoid dumping such gctx's that have no useful data. /
980	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == `0`) \|\|
981	(opt_prof_accum && gctx->cnt_summed.accumobjs == `0`)) {
982	assert(gctx->cnt_summed.curobjs == `0`);
983	assert(gctx->cnt_summed.curbytes == `0`);
984	/*
985	* These asserts would not be correct -- see the comment on races
986	* in prof.c
987	* assert(gctx->cnt_summed.curobjs_unbiased == 0);
988	* assert(gctx->cnt_summed.curbytes_unbiased == 0);
989	*/
990	assert(gctx->cnt_summed.accumobjs == `0`);
991	assert(gctx->cnt_summed.accumobjs_shifted_unbiased == `0`);
992	assert(gctx->cnt_summed.accumbytes == `0`);
993	assert(gctx->cnt_summed.accumbytes_unbiased == `0`);
994	return;
995	}
996
997	arg->prof_dump_write(arg->cbopaque, "@");
998	for (unsigned i = `0`; i < bt->len; i++) {
999	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
1000	" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
1001	}
1002
1003	arg->prof_dump_write(arg->cbopaque, "\n t*: ");
1004	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
1005	&gctx->cnt_summed);
1006	arg->prof_dump_write(arg->cbopaque, "\n");
1007
1008	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
1009	}
1010
1011	/*
1012	* See prof_sample_new_event_wait() comment for why the body of this function
1013	* is conditionally compiled.
1014	*/
1015	static void
1016	prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
1017	#ifdef JEMALLOC_PROF
1018	/*
1019	* Scaling is equivalent AdjustSamples() in jeprof, but the result may
1020	* differ slightly from what jeprof reports, because here we scale the
1021	* summary values, whereas jeprof scales each context individually and
1022	* reports the sums of the scaled values.
1023	*/
1024	if (cnt_all->curbytes != `0`) {
1025	double sample_period = (double)((uint64_t)`1` << lg_prof_sample);
1026	double ratio = (((double)cnt_all->curbytes) /
1027	(double)cnt_all->curobjs) / sample_period;
1028	double scale_factor = `1.0` / (`1.0` - exp(-ratio));
1029	uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1030	* scale_factor);
1031	uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1032	scale_factor);
1033
1034	malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1035	" byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1036	curbytes, (curbytes != `1`) ? "s" : "", curobjs, (curobjs !=
1037	`1`) ? "s" : "", leak_ngctx, (leak_ngctx != `1`) ? "s" : "");
1038	malloc_printf(
1039	"<jemalloc>: Run jeprof on dump output for leak detail\n");
1040	if (opt_prof_leak_error) {
1041	malloc_printf(
1042	"<jemalloc>: Exiting with error code because memory"
1043	" leaks were detected\n");
1044	/*
1045	* Use _exit() with underscore to avoid calling atexit()
1046	* and entering endless cycle.
1047	*/
1048	_exit(`1`);
1049	}
1050	}
1051	#endif
1052	}
1053
1054	static prof_gctx_t *
1055	prof_gctx_dump_iter(prof_gctx_tree_t gctxs, prof_gctx_t gctx, void *opaque) {
1056	prof_dump_iter_arg_t arg = (prof_dump_iter_arg_t )opaque;
1057	malloc_mutex_lock(arg->tsdn, gctx->lock);
1058	prof_dump_gctx(arg, gctx, &gctx->bt, gctxs);
1059	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1060	return NULL;
1061	}
1062
1063	static void
1064	prof_dump_prep(tsd_t tsd, prof_tdata_t tdata, prof_cnt_t *cnt_all,
1065	size_t leak_ngctx, prof_gctx_tree_t gctxs) {
1066	size_t tabind;
1067	union {
1068	prof_gctx_t *p;
1069	void *v;
1070	} gctx;
1071
1072	prof_enter(tsd, tdata);
1073
1074	/*
1075	* Put gctx's in limbo and clear their counters in preparation for
1076	* summing.
1077	*/
1078	gctx_tree_new(gctxs);
1079	for (tabind = `0`; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1080	prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1081	}
1082
1083	/*
1084	* Iterate over tdatas, and for the non-expired ones snapshot their tctx
1085	* stats and merge them into the associated gctx's.
1086	*/
1087	memset(cnt_all, `0`, sizeof(prof_cnt_t));
1088	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
1089	cnt_all};
1090	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1091	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1092	&prof_tdata_merge_iter_arg);
1093	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1094
1095	/ Merge tctx stats into gctx's. /
1096	*leak_ngctx = `0`;
1097	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
1098	leak_ngctx};
1099	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1100	&prof_gctx_merge_iter_arg);
1101
1102	prof_leave(tsd, tdata);
1103	}
1104
1105	void
1106	prof_dump_impl(tsd_t tsd, write_cb_t prof_dump_write, void *cbopaque,
1107	prof_tdata_t *tdata, bool leakcheck) {
1108	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
1109	prof_cnt_t cnt_all;
1110	size_t leak_ngctx;
1111	prof_gctx_tree_t gctxs;
1112	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
1113	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
1114	prof_dump_write, cbopaque};
1115	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
1116	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
1117	prof_gctx_finish(tsd, &gctxs);
1118	if (leakcheck) {
1119	prof_leakcheck(&cnt_all, leak_ngctx);
1120	}
1121	}
1122
1123	/ Used in unit tests. /
1124	void
1125	prof_cnt_all(prof_cnt_t *cnt_all) {
1126	tsd_t *tsd = tsd_fetch();
1127	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1128	if (tdata == NULL) {
1129	memset(cnt_all, `0`, sizeof(prof_cnt_t));
1130	} else {
1131	size_t leak_ngctx;
1132	prof_gctx_tree_t gctxs;
1133	prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
1134	prof_gctx_finish(tsd, &gctxs);
1135	}
1136	}
1137
1138	void
1139	prof_bt_hash(const void *key, size_t r_hash[`2`]) {
1140	prof_bt_t bt = (prof_bt_t )key;
1141
1142	cassert(config_prof);
1143
1144	hash(bt->vec, bt->len * sizeof(void *), `0x94122f33U`, r_hash);
1145	}
1146
1147	bool
1148	prof_bt_keycomp(const void k1, const* void *k2) {
1149	const prof_bt_t bt1 = (prof_bt_t )k1;
1150	const prof_bt_t bt2 = (prof_bt_t )k2;
1151
1152	cassert(config_prof);
1153
1154	if (bt1->len != bt2->len) {
1155	return false;
1156	}
1157	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == `0`);
1158	}
1159
1160	prof_tdata_t *
1161	prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
1162	char *thread_name, bool active) {
1163	assert(tsd_reentrancy_level_get(tsd) == `0`);
1164
1165	prof_tdata_t *tdata;
1166
1167	cassert(config_prof);
1168
1169	/ Initialize an empty cache for this thread. /
1170	tdata = (prof_tdata_t )iallocztm(tsd_tsdn(tsd), sizeof*(prof_tdata_t),
1171	sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
1172	arena_get(TSDN_NULL, `0`, true), true);
1173	if (tdata == NULL) {
1174	return NULL;
1175	}
1176
1177	tdata->lock = prof_tdata_mutex_choose(thr_uid);
1178	tdata->thr_uid = thr_uid;
1179	tdata->thr_discrim = thr_discrim;
1180	tdata->thread_name = thread_name;
1181	tdata->attached = true;
1182	tdata->expired = false;
1183	tdata->tctx_uid_next = `0`;
1184
1185	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
1186	prof_bt_keycomp)) {
1187	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
1188	return NULL;
1189	}
1190
1191	tdata->enq = false;
1192	tdata->enq_idump = false;
1193	tdata->enq_gdump = false;
1194
1195	tdata->dumping = false;
1196	tdata->active = active;
1197
1198	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1199	tdata_tree_insert(&tdatas, tdata);
1200	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1201
1202	return tdata;
1203	}
1204
1205	static bool
1206	prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
1207	if (tdata->attached && !even_if_attached) {
1208	return false;
1209	}
1210	if (ckh_count(&tdata->bt2tctx) != `0`) {
1211	return false;
1212	}
1213	return true;
1214	}
1215
1216	static bool
1217	prof_tdata_should_destroy(tsdn_t tsdn, prof_tdata_t tdata,
1218	bool even_if_attached) {
1219	malloc_mutex_assert_owner(tsdn, tdata->lock);
1220
1221	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
1222	}
1223
1224	static void
1225	prof_tdata_destroy_locked(tsd_t tsd, prof_tdata_t tdata,
1226	bool even_if_attached) {
1227	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
1228	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
1229
1230	tdata_tree_remove(&tdatas, tdata);
1231
1232	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1233
1234	if (tdata->thread_name != NULL) {
1235	idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
1236	true);
1237	}
1238	ckh_delete(tsd, &tdata->bt2tctx);
1239	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
1240	}
1241
1242	static void
1243	prof_tdata_destroy(tsd_t tsd, prof_tdata_t tdata, bool even_if_attached) {
1244	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1245	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
1246	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1247	}
1248
1249	void
1250	prof_tdata_detach(tsd_t tsd, prof_tdata_t tdata) {
1251	bool destroy_tdata;
1252
1253	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1254	if (tdata->attached) {
1255	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
1256	true);
1257	/*
1258	* Only detach if !destroy_tdata, because detaching would allow
1259	* another thread to win the race to destroy tdata.
1260	*/
1261	if (!destroy_tdata) {
1262	tdata->attached = false;
1263	}
1264	tsd_prof_tdata_set(tsd, NULL);
1265	} else {
1266	destroy_tdata = false;
1267	}
1268	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1269	if (destroy_tdata) {
1270	prof_tdata_destroy(tsd, tdata, true);
1271	}
1272	}
1273
1274	static bool
1275	prof_tdata_expire(tsdn_t tsdn, prof_tdata_t tdata) {
1276	bool destroy_tdata;
1277
1278	malloc_mutex_lock(tsdn, tdata->lock);
1279	if (!tdata->expired) {
1280	tdata->expired = true;
1281	destroy_tdata = prof_tdata_should_destroy(tsdn, tdata, false);
1282	} else {
1283	destroy_tdata = false;
1284	}
1285	malloc_mutex_unlock(tsdn, tdata->lock);
1286
1287	return destroy_tdata;
1288	}
1289
1290	static prof_tdata_t *
1291	prof_tdata_reset_iter(prof_tdata_tree_t tdatas_ptr, prof_tdata_t tdata,
1292	void *arg) {
1293	tsdn_t tsdn = (tsdn_t )arg;
1294
1295	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
1296	}
1297
1298	void
1299	prof_reset(tsd_t *tsd, size_t lg_sample) {
1300	prof_tdata_t *next;
1301
1302	assert(lg_sample < (sizeof(uint64_t) << `3`));
1303
1304	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1305	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1306
1307	lg_prof_sample = lg_sample;
1308	prof_unbias_map_init();
1309
1310	next = NULL;
1311	do {
1312	prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
1313	prof_tdata_reset_iter, (void *)tsd);
1314	if (to_destroy != NULL) {
1315	next = tdata_tree_next(&tdatas, to_destroy);
1316	prof_tdata_destroy_locked(tsd, to_destroy, false);
1317	} else {
1318	next = NULL;
1319	}
1320	} while (next != NULL);
1321
1322	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1323	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1324	}
1325
1326	static bool
1327	prof_tctx_should_destroy(tsd_t tsd, prof_tctx_t tctx) {
1328	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
1329
1330	if (opt_prof_accum) {
1331	return false;
1332	}
1333	if (tctx->cnts.curobjs != `0`) {
1334	return false;
1335	}
1336	if (tctx->prepared) {
1337	return false;
1338	}
1339	if (tctx->recent_count != `0`) {
1340	return false;
1341	}
1342	return true;
1343	}
1344
1345	static void
1346	prof_tctx_destroy(tsd_t tsd, prof_tctx_t tctx) {
1347	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
1348
1349	assert(tctx->cnts.curobjs == `0`);
1350	assert(tctx->cnts.curbytes == `0`);
1351	/*
1352	* These asserts are not correct -- see the comment about races in
1353	* prof.c
1354	*
1355	* assert(tctx->cnts.curobjs_shifted_unbiased == 0);
1356	* assert(tctx->cnts.curbytes_unbiased == 0);
1357	*/
1358	assert(!opt_prof_accum);
1359	assert(tctx->cnts.accumobjs == `0`);
1360	assert(tctx->cnts.accumbytes == `0`);
1361	/*
1362	* These ones are, since accumbyte counts never go down. Either
1363	* prof_accum is off (in which case these should never have changed from
1364	* their initial value of zero), or it's on (in which case we shouldn't
1365	* be destroying this tctx).
1366	*/
1367	assert(tctx->cnts.accumobjs_shifted_unbiased == `0`);
1368	assert(tctx->cnts.accumbytes_unbiased == `0`);
1369
1370	prof_gctx_t *gctx = tctx->gctx;
1371
1372	{
1373	prof_tdata_t *tdata = tctx->tdata;
1374	tctx->tdata = NULL;
1375	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
1376	bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
1377	tdata, false);
1378	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1379	if (destroy_tdata) {
1380	prof_tdata_destroy(tsd, tdata, false);
1381	}
1382	}
1383
1384	bool destroy_tctx, destroy_gctx;
1385
1386	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1387	switch (tctx->state) {
1388	case prof_tctx_state_nominal:
1389	tctx_tree_remove(&gctx->tctxs, tctx);
1390	destroy_tctx = true;
1391	if (prof_gctx_should_destroy(gctx)) {
1392	/*
1393	* Increment gctx->nlimbo in order to keep another
1394	* thread from winning the race to destroy gctx while
1395	* this one has gctx->lock dropped. Without this, it
1396	* would be possible for another thread to:
1397	*
1398	* 1) Sample an allocation associated with gctx.
1399	* 2) Deallocate the sampled object.
1400	* 3) Successfully prof_gctx_try_destroy(gctx).
1401	*
1402	* The result would be that gctx no longer exists by the
1403	* time this thread accesses it in
1404	* prof_gctx_try_destroy().
1405	*/
1406	gctx->nlimbo++;
1407	destroy_gctx = true;
1408	} else {
1409	destroy_gctx = false;
1410	}
1411	break;
1412	case prof_tctx_state_dumping:
1413	/*
1414	* A dumping thread needs tctx to remain valid until dumping
1415	* has finished. Change state such that the dumping thread will
1416	* complete destruction during a late dump iteration phase.
1417	*/
1418	tctx->state = prof_tctx_state_purgatory;
1419	destroy_tctx = false;
1420	destroy_gctx = false;
1421	break;
1422	default:
1423	not_reached();
1424	destroy_tctx = false;
1425	destroy_gctx = false;
1426	}
1427	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1428	if (destroy_gctx) {
1429	prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
1430	}
1431	if (destroy_tctx) {
1432	idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
1433	}
1434	}
1435
1436	void
1437	prof_tctx_try_destroy(tsd_t tsd, prof_tctx_t tctx) {
1438	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
1439	if (prof_tctx_should_destroy(tsd, tctx)) {
1440	/ tctx->tdata->lock will be released in prof_tctx_destroy(). /
1441	prof_tctx_destroy(tsd, tctx);
1442	} else {
1443	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
1444	}
1445	}
1446
1447	/****************************************************************************/
1448

Browse the source code of jemalloc/src/prof_data.c