sec.c source code [jemalloc/src/sec.c]

1	#include "jemalloc/internal/jemalloc_preamble.h"
2	#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4	#include "jemalloc/internal/sec.h"
5
6	static edata_t sec_alloc(tsdn_t tsdn, pai_t *self, size_t size,
7	size_t alignment, bool zero, bool guarded, bool frequent_reuse,
8	bool *deferred_work_generated);
9	static bool sec_expand(tsdn_t tsdn, pai_t self, edata_t *edata,
10	size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
11	static bool sec_shrink(tsdn_t tsdn, pai_t self, edata_t *edata,
12	size_t old_size, size_t new_size, bool *deferred_work_generated);
13	static void sec_dalloc(tsdn_t tsdn, pai_t self, edata_t *edata,
14	bool *deferred_work_generated);
15
16	static void
17	sec_bin_init(sec_bin_t *bin) {
18	bin->being_batch_filled = false;
19	bin->bytes_cur = `0`;
20	edata_list_active_init(&bin->freelist);
21	}
22
23	bool
24	sec_init(tsdn_t tsdn, sec_t sec, base_t base, pai_t fallback,
25	const sec_opts_t *opts) {
26	assert(opts->max_alloc >= PAGE);
27
28	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
29	pszind_t npsizes = sz_psz2ind(max_alloc) + `1`;
30
31	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
32	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
33	size_t sz_alloc = sz_shards + sz_bins;
34	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
35	if (dynalloc == NULL) {
36	return true;
37	}
38	sec_shard_t shard_cur = (sec_shard_t )dynalloc;
39	sec->shards = shard_cur;
40	sec_bin_t bin_cur = (sec_bin_t )&shard_cur[opts->nshards];
41	/ Just for asserts, below. /
42	sec_bin_t *bin_start = bin_cur;
43
44	for (size_t i = `0`; i < opts->nshards; i++) {
45	sec_shard_t *shard = shard_cur;
46	shard_cur++;
47	bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
48	WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
49	if (err) {
50	return true;
51	}
52	shard->enabled = true;
53	shard->bins = bin_cur;
54	for (pszind_t j = `0`; j < npsizes; j++) {
55	sec_bin_init(&shard->bins[j]);
56	bin_cur++;
57	}
58	shard->bytes_cur = `0`;
59	shard->to_flush_next = `0`;
60	}
61	/*
62	* Should have exactly matched the bin_start to the first unused byte
63	* after the shards.
64	*/
65	assert((void )shard_cur == (void* *)bin_start);
66	/ And the last bin to use up the last bytes of the allocation. /
67	assert((char )bin_cur == ((char* *)dynalloc + sz_alloc));
68	sec->fallback = fallback;
69
70
71	sec->opts = *opts;
72	sec->npsizes = npsizes;
73
74	/*
75	* Initialize these last so that an improper use of an SEC whose
76	* initialization failed will segfault in an easy-to-spot way.
77	*/
78	sec->pai.alloc = &sec_alloc;
79	sec->pai.alloc_batch = &pai_alloc_batch_default;
80	sec->pai.expand = &sec_expand;
81	sec->pai.shrink = &sec_shrink;
82	sec->pai.dalloc = &sec_dalloc;
83	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
84
85	return false;
86	}
87
88	static sec_shard_t *
89	sec_shard_pick(tsdn_t tsdn, sec_t sec) {
90	/*
91	* Eventually, we should implement affinity, tracking source shard using
92	* the edata_t's newly freed up fields. For now, just randomly
93	* distribute across all shards.
94	*/
95	if (tsdn_null(tsdn)) {
96	return &sec->shards[`0`];
97	}
98	tsd_t *tsd = tsdn_tsd(tsdn);
99	uint8_t *idxp = tsd_sec_shardp_get(tsd);
100	if (*idxp == (uint8_t)-`1`) {
101	/*
102	* First use; initialize using the trick from Daniel Lemire's
103	* "A fast alternative to the modulo reduction. Use a 64 bit
104	* number to store 32 bits, since we'll deliberately overflow
105	* when we multiply by the number of shards.
106	*/
107	uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), `32`);
108	uint32_t idx =
109	(uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> `32`);
110	assert(idx < (uint32_t)sec->opts.nshards);
111	*idxp = (uint8_t)idx;
112	}
113	return &sec->shards[*idxp];
114	}
115
116	/*
117	* Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
118	* empty cache, we'll try to fill it, which can push the shard over it's limit.
119	*/
120	static void
121	sec_flush_some_and_unlock(tsdn_t tsdn, sec_t sec, sec_shard_t *shard) {
122	malloc_mutex_assert_owner(tsdn, &shard->mtx);
123	edata_list_active_t to_flush;
124	edata_list_active_init(&to_flush);
125	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
126	/ Pick a victim. /
127	sec_bin_t *bin = &shard->bins[shard->to_flush_next];
128
129	/ Update our victim-picking state. /
130	shard->to_flush_next++;
131	if (shard->to_flush_next == sec->npsizes) {
132	shard->to_flush_next = `0`;
133	}
134
135	assert(shard->bytes_cur >= bin->bytes_cur);
136	if (bin->bytes_cur != `0`) {
137	shard->bytes_cur -= bin->bytes_cur;
138	bin->bytes_cur = `0`;
139	edata_list_active_concat(&to_flush, &bin->freelist);
140	}
141	/*
142	* Either bin->bytes_cur was 0, in which case we didn't touch
143	* the bin list but it should be empty anyways (or else we
144	* missed a bytes_cur update on a list modification), or it
145	* was 0 and we emptied it ourselves. Either way, it should
146	* be empty now.
147	*/
148	assert(edata_list_active_empty(&bin->freelist));
149	}
150
151	malloc_mutex_unlock(tsdn, &shard->mtx);
152	bool deferred_work_generated = false;
153	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
154	&deferred_work_generated);
155	}
156
157	static edata_t *
158	sec_shard_alloc_locked(tsdn_t tsdn, sec_t sec, sec_shard_t *shard,
159	sec_bin_t *bin) {
160	malloc_mutex_assert_owner(tsdn, &shard->mtx);
161	if (!shard->enabled) {
162	return NULL;
163	}
164	edata_t *edata = edata_list_active_first(&bin->freelist);
165	if (edata != NULL) {
166	edata_list_active_remove(&bin->freelist, edata);
167	assert(edata_size_get(edata) <= bin->bytes_cur);
168	bin->bytes_cur -= edata_size_get(edata);
169	assert(edata_size_get(edata) <= shard->bytes_cur);
170	shard->bytes_cur -= edata_size_get(edata);
171	}
172	return edata;
173	}
174
175	static edata_t *
176	sec_batch_fill_and_alloc(tsdn_t tsdn, sec_t sec, sec_shard_t *shard,
177	sec_bin_t *bin, size_t size) {
178	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
179
180	edata_list_active_t result;
181	edata_list_active_init(&result);
182	bool deferred_work_generated = false;
183	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
184	`1` + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
185
186	edata_t *ret = edata_list_active_first(&result);
187	if (ret != NULL) {
188	edata_list_active_remove(&result, ret);
189	}
190
191	malloc_mutex_lock(tsdn, &shard->mtx);
192	bin->being_batch_filled = false;
193	/*
194	* Handle the easy case first: nothing to cache. Note that this can
195	* only happen in case of OOM, since sec_alloc checks the expected
196	* number of allocs, and doesn't bother going down the batch_fill
197	* pathway if there won't be anything left to cache. So to be in this
198	* code path, we must have asked for > 1 alloc, but only gotten 1 back.
199	*/
200	if (nalloc <= `1`) {
201	malloc_mutex_unlock(tsdn, &shard->mtx);
202	return ret;
203	}
204
205	size_t new_cached_bytes = (nalloc - `1`) * size;
206
207	edata_list_active_concat(&bin->freelist, &result);
208	bin->bytes_cur += new_cached_bytes;
209	shard->bytes_cur += new_cached_bytes;
210
211	if (shard->bytes_cur > sec->opts.max_bytes) {
212	sec_flush_some_and_unlock(tsdn, sec, shard);
213	} else {
214	malloc_mutex_unlock(tsdn, &shard->mtx);
215	}
216
217	return ret;
218	}
219
220	static edata_t *
221	sec_alloc(tsdn_t tsdn, pai_t self, size_t size, size_t alignment, bool zero,
222	bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
223	assert((size & PAGE_MASK) == `0`);
224	assert(!guarded);
225
226	sec_t sec = (sec_t )self;
227
228	if (zero \|\| alignment > PAGE \|\| sec->opts.nshards == `0`
229	\|\| size > sec->opts.max_alloc) {
230	return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
231	/ guarded / false, frequent_reuse,
232	deferred_work_generated);
233	}
234	pszind_t pszind = sz_psz2ind(size);
235	assert(pszind < sec->npsizes);
236
237	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
238	sec_bin_t *bin = &shard->bins[pszind];
239	bool do_batch_fill = false;
240
241	malloc_mutex_lock(tsdn, &shard->mtx);
242	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
243	if (edata == NULL) {
244	if (!bin->being_batch_filled
245	&& sec->opts.batch_fill_extra > `0`) {
246	bin->being_batch_filled = true;
247	do_batch_fill = true;
248	}
249	}
250	malloc_mutex_unlock(tsdn, &shard->mtx);
251	if (edata == NULL) {
252	if (do_batch_fill) {
253	edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
254	size);
255	} else {
256	edata = pai_alloc(tsdn, sec->fallback, size, alignment,
257	zero, / guarded / false, frequent_reuse,
258	deferred_work_generated);
259	}
260	}
261	return edata;
262	}
263
264	static bool
265	sec_expand(tsdn_t tsdn, pai_t self, edata_t *edata, size_t old_size,
266	size_t new_size, bool zero, bool *deferred_work_generated) {
267	sec_t sec = (sec_t )self;
268	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
269	deferred_work_generated);
270	}
271
272	static bool
273	sec_shrink(tsdn_t tsdn, pai_t self, edata_t *edata, size_t old_size,
274	size_t new_size, bool *deferred_work_generated) {
275	sec_t sec = (sec_t )self;
276	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
277	deferred_work_generated);
278	}
279
280	static void
281	sec_flush_all_locked(tsdn_t tsdn, sec_t sec, sec_shard_t *shard) {
282	malloc_mutex_assert_owner(tsdn, &shard->mtx);
283	shard->bytes_cur = `0`;
284	edata_list_active_t to_flush;
285	edata_list_active_init(&to_flush);
286	for (pszind_t i = `0`; i < sec->npsizes; i++) {
287	sec_bin_t *bin = &shard->bins[i];
288	bin->bytes_cur = `0`;
289	edata_list_active_concat(&to_flush, &bin->freelist);
290	}
291
292	/*
293	* Ordinarily we would try to avoid doing the batch deallocation while
294	* holding the shard mutex, but the flush_all pathways only happen when
295	* we're disabling the HPA or resetting the arena, both of which are
296	* rare pathways.
297	*/
298	bool deferred_work_generated = false;
299	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
300	&deferred_work_generated);
301	}
302
303	static void
304	sec_shard_dalloc_and_unlock(tsdn_t tsdn, sec_t sec, sec_shard_t *shard,
305	edata_t *edata) {
306	malloc_mutex_assert_owner(tsdn, &shard->mtx);
307	assert(shard->bytes_cur <= sec->opts.max_bytes);
308	size_t size = edata_size_get(edata);
309	pszind_t pszind = sz_psz2ind(size);
310	assert(pszind < sec->npsizes);
311	/*
312	* Prepending here results in LIFO allocation per bin, which seems
313	* reasonable.
314	*/
315	sec_bin_t *bin = &shard->bins[pszind];
316	edata_list_active_prepend(&bin->freelist, edata);
317	bin->bytes_cur += size;
318	shard->bytes_cur += size;
319	if (shard->bytes_cur > sec->opts.max_bytes) {
320	/*
321	* We've exceeded the shard limit. We make two nods in the
322	* direction of fragmentation avoidance: we flush everything in
323	* the shard, rather than one particular bin, and we hold the
324	* lock while flushing (in case one of the extents we flush is
325	* highly preferred from a fragmentation-avoidance perspective
326	* in the backing allocator). This has the extra advantage of
327	* not requiring advanced cache balancing strategies.
328	*/
329	sec_flush_some_and_unlock(tsdn, sec, shard);
330	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
331	} else {
332	malloc_mutex_unlock(tsdn, &shard->mtx);
333	}
334	}
335
336	static void
337	sec_dalloc(tsdn_t tsdn, pai_t self, edata_t *edata,
338	bool *deferred_work_generated) {
339	sec_t sec = (sec_t )self;
340	if (sec->opts.nshards == `0`
341	\|\| edata_size_get(edata) > sec->opts.max_alloc) {
342	pai_dalloc(tsdn, sec->fallback, edata,
343	deferred_work_generated);
344	return;
345	}
346	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
347	malloc_mutex_lock(tsdn, &shard->mtx);
348	if (shard->enabled) {
349	sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
350	} else {
351	malloc_mutex_unlock(tsdn, &shard->mtx);
352	pai_dalloc(tsdn, sec->fallback, edata,
353	deferred_work_generated);
354	}
355	}
356
357	void
358	sec_flush(tsdn_t tsdn, sec_t sec) {
359	for (size_t i = `0`; i < sec->opts.nshards; i++) {
360	malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
361	sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
362	malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
363	}
364	}
365
366	void
367	sec_disable(tsdn_t tsdn, sec_t sec) {
368	for (size_t i = `0`; i < sec->opts.nshards; i++) {
369	malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
370	sec->shards[i].enabled = false;
371	sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
372	malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
373	}
374	}
375
376	void
377	sec_stats_merge(tsdn_t tsdn, sec_t sec, sec_stats_t *stats) {
378	size_t sum = `0`;
379	for (size_t i = `0`; i < sec->opts.nshards; i++) {
380	/*
381	* We could save these lock acquisitions by making bytes_cur
382	* atomic, but stats collection is rare anyways and we expect
383	* the number and type of stats to get more interesting.
384	*/
385	malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
386	sum += sec->shards[i].bytes_cur;
387	malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
388	}
389	stats->bytes += sum;
390	}
391
392	void
393	sec_mutex_stats_read(tsdn_t tsdn, sec_t sec,
394	mutex_prof_data_t *mutex_prof_data) {
395	for (size_t i = `0`; i < sec->opts.nshards; i++) {
396	malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
397	malloc_mutex_prof_accum(tsdn, mutex_prof_data,
398	&sec->shards[i].mtx);
399	malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
400	}
401	}
402
403	void
404	sec_prefork2(tsdn_t tsdn, sec_t sec) {
405	for (size_t i = `0`; i < sec->opts.nshards; i++) {
406	malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
407	}
408	}
409
410	void
411	sec_postfork_parent(tsdn_t tsdn, sec_t sec) {
412	for (size_t i = `0`; i < sec->opts.nshards; i++) {
413	malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
414	}
415	}
416
417	void
418	sec_postfork_child(tsdn_t tsdn, sec_t sec) {
419	for (size_t i = `0`; i < sec->opts.nshards; i++) {
420	malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
421	}
422	}
423

Browse the source code of jemalloc/src/sec.c