1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/assert.h"
5#include "jemalloc/internal/extent_mmap.h"
6#include "jemalloc/internal/mutex.h"
7#include "jemalloc/internal/sz.h"
8
9/*
10 * In auto mode, arenas switch to huge pages for the base allocator on the
11 * second base block. a0 switches to thp on the 5th block (after 20 megabytes
12 * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
13 */
14
15#define BASE_AUTO_THP_THRESHOLD 2
16#define BASE_AUTO_THP_THRESHOLD_A0 5
17
18/******************************************************************************/
19/* Data. */
20
21static base_t *b0;
22
23metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
24
25const char *metadata_thp_mode_names[] = {
26 "disabled",
27 "auto",
28 "always"
29};
30
31/******************************************************************************/
32
33static inline bool
34metadata_thp_madvise(void) {
35 return (metadata_thp_enabled() &&
36 (init_system_thp_mode == thp_mode_default));
37}
38
39static void *
40base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
41 void *addr;
42 bool zero = true;
43 bool commit = true;
44
45 /* Use huge page sizes and alignment regardless of opt_metadata_thp. */
46 assert(size == HUGEPAGE_CEILING(size));
47 size_t alignment = HUGEPAGE;
48 if (ehooks_are_default(ehooks)) {
49 addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
50 if (have_madvise_huge && addr) {
51 pages_set_thp_state(addr, size);
52 }
53 } else {
54 addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
55 &commit);
56 }
57
58 return addr;
59}
60
61static void
62base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
63 size_t size) {
64 /*
65 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
66 * stopping at first success. This cascade is performed for consistency
67 * with the cascade in extent_dalloc_wrapper() because an application's
68 * custom hooks may not support e.g. dalloc. This function is only ever
69 * called as a side effect of arena destruction, so although it might
70 * seem pointless to do anything besides dalloc here, the application
71 * may in fact want the end state of all associated virtual memory to be
72 * in some consistent-but-allocated state.
73 */
74 if (ehooks_are_default(ehooks)) {
75 if (!extent_dalloc_mmap(addr, size)) {
76 goto label_done;
77 }
78 if (!pages_decommit(addr, size)) {
79 goto label_done;
80 }
81 if (!pages_purge_forced(addr, size)) {
82 goto label_done;
83 }
84 if (!pages_purge_lazy(addr, size)) {
85 goto label_done;
86 }
87 /* Nothing worked. This should never happen. */
88 not_reached();
89 } else {
90 if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) {
91 goto label_done;
92 }
93 if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) {
94 goto label_done;
95 }
96 if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) {
97 goto label_done;
98 }
99 if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) {
100 goto label_done;
101 }
102 /* Nothing worked. That's the application's problem. */
103 }
104label_done:
105 if (metadata_thp_madvise()) {
106 /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
107 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
108 (size & HUGEPAGE_MASK) == 0);
109 pages_nohuge(addr, size);
110 }
111}
112
113static void
114base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
115 size_t size) {
116 size_t sn;
117
118 sn = *extent_sn_next;
119 (*extent_sn_next)++;
120
121 edata_binit(edata, addr, size, sn);
122}
123
124static size_t
125base_get_num_blocks(base_t *base, bool with_new_block) {
126 base_block_t *b = base->blocks;
127 assert(b != NULL);
128
129 size_t n_blocks = with_new_block ? 2 : 1;
130 while (b->next != NULL) {
131 n_blocks++;
132 b = b->next;
133 }
134
135 return n_blocks;
136}
137
138static void
139base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
140 assert(opt_metadata_thp == metadata_thp_auto);
141 malloc_mutex_assert_owner(tsdn, &base->mtx);
142 if (base->auto_thp_switched) {
143 return;
144 }
145 /* Called when adding a new block. */
146 bool should_switch;
147 if (base_ind_get(base) != 0) {
148 should_switch = (base_get_num_blocks(base, true) ==
149 BASE_AUTO_THP_THRESHOLD);
150 } else {
151 should_switch = (base_get_num_blocks(base, true) ==
152 BASE_AUTO_THP_THRESHOLD_A0);
153 }
154 if (!should_switch) {
155 return;
156 }
157
158 base->auto_thp_switched = true;
159 assert(!config_stats || base->n_thp == 0);
160 /* Make the initial blocks THP lazily. */
161 base_block_t *block = base->blocks;
162 while (block != NULL) {
163 assert((block->size & HUGEPAGE_MASK) == 0);
164 pages_huge(block, block->size);
165 if (config_stats) {
166 base->n_thp += HUGEPAGE_CEILING(block->size -
167 edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
168 }
169 block = block->next;
170 assert(block == NULL || (base_ind_get(base) == 0));
171 }
172}
173
174static void *
175base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
176 size_t alignment) {
177 void *ret;
178
179 assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
180 assert(size == ALIGNMENT_CEILING(size, alignment));
181
182 *gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
183 alignment) - (uintptr_t)edata_addr_get(edata);
184 ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
185 assert(edata_bsize_get(edata) >= *gap_size + size);
186 edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
187 *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
188 edata_sn_get(edata));
189 return ret;
190}
191
192static void
193base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
194 void *addr, size_t size) {
195 if (edata_bsize_get(edata) > 0) {
196 /*
197 * Compute the index for the largest size class that does not
198 * exceed extent's size.
199 */
200 szind_t index_floor =
201 sz_size2index(edata_bsize_get(edata) + 1) - 1;
202 edata_heap_insert(&base->avail[index_floor], edata);
203 }
204
205 if (config_stats) {
206 base->allocated += size;
207 /*
208 * Add one PAGE to base_resident for every page boundary that is
209 * crossed by the new allocation. Adjust n_thp similarly when
210 * metadata_thp is enabled.
211 */
212 base->resident += PAGE_CEILING((uintptr_t)addr + size) -
213 PAGE_CEILING((uintptr_t)addr - gap_size);
214 assert(base->allocated <= base->resident);
215 assert(base->resident <= base->mapped);
216 if (metadata_thp_madvise() && (opt_metadata_thp ==
217 metadata_thp_always || base->auto_thp_switched)) {
218 base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
219 - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
220 LG_HUGEPAGE;
221 assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
222 }
223 }
224}
225
226static void *
227base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
228 size_t alignment) {
229 void *ret;
230 size_t gap_size;
231
232 ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
233 base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
234 return ret;
235}
236
237/*
238 * Allocate a block of virtual memory that is large enough to start with a
239 * base_block_t header, followed by an object of specified size and alignment.
240 * On success a pointer to the initialized base_block_t header is returned.
241 */
242static base_block_t *
243base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
244 pszind_t *pind_last, size_t *extent_sn_next, size_t size,
245 size_t alignment) {
246 alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
247 size_t usize = ALIGNMENT_CEILING(size, alignment);
248 size_t header_size = sizeof(base_block_t);
249 size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
250 header_size;
251 /*
252 * Create increasingly larger blocks in order to limit the total number
253 * of disjoint virtual memory ranges. Choose the next size in the page
254 * size class series (skipping size classes that are not a multiple of
255 * HUGEPAGE), or a size large enough to satisfy the requested size and
256 * alignment, whichever is larger.
257 */
258 size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
259 + usize));
260 pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
261 *pind_last + 1 : *pind_last;
262 size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
263 size_t block_size = (min_block_size > next_block_size) ? min_block_size
264 : next_block_size;
265 base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
266 block_size);
267 if (block == NULL) {
268 return NULL;
269 }
270
271 if (metadata_thp_madvise()) {
272 void *addr = (void *)block;
273 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
274 (block_size & HUGEPAGE_MASK) == 0);
275 if (opt_metadata_thp == metadata_thp_always) {
276 pages_huge(addr, block_size);
277 } else if (opt_metadata_thp == metadata_thp_auto &&
278 base != NULL) {
279 /* base != NULL indicates this is not a new base. */
280 malloc_mutex_lock(tsdn, &base->mtx);
281 base_auto_thp_switch(tsdn, base);
282 if (base->auto_thp_switched) {
283 pages_huge(addr, block_size);
284 }
285 malloc_mutex_unlock(tsdn, &base->mtx);
286 }
287 }
288
289 *pind_last = sz_psz2ind(block_size);
290 block->size = block_size;
291 block->next = NULL;
292 assert(block_size >= header_size);
293 base_edata_init(extent_sn_next, &block->edata,
294 (void *)((uintptr_t)block + header_size), block_size - header_size);
295 return block;
296}
297
298/*
299 * Allocate an extent that is at least as large as specified size, with
300 * specified alignment.
301 */
302static edata_t *
303base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
304 malloc_mutex_assert_owner(tsdn, &base->mtx);
305
306 ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
307 /*
308 * Drop mutex during base_block_alloc(), because an extent hook will be
309 * called.
310 */
311 malloc_mutex_unlock(tsdn, &base->mtx);
312 base_block_t *block = base_block_alloc(tsdn, base, ehooks,
313 base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
314 alignment);
315 malloc_mutex_lock(tsdn, &base->mtx);
316 if (block == NULL) {
317 return NULL;
318 }
319 block->next = base->blocks;
320 base->blocks = block;
321 if (config_stats) {
322 base->allocated += sizeof(base_block_t);
323 base->resident += PAGE_CEILING(sizeof(base_block_t));
324 base->mapped += block->size;
325 if (metadata_thp_madvise() &&
326 !(opt_metadata_thp == metadata_thp_auto
327 && !base->auto_thp_switched)) {
328 assert(base->n_thp > 0);
329 base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
330 LG_HUGEPAGE;
331 }
332 assert(base->allocated <= base->resident);
333 assert(base->resident <= base->mapped);
334 assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
335 }
336 return &block->edata;
337}
338
339base_t *
340b0get(void) {
341 return b0;
342}
343
344base_t *
345base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
346 bool metadata_use_hooks) {
347 pszind_t pind_last = 0;
348 size_t extent_sn_next = 0;
349
350 /*
351 * The base will contain the ehooks eventually, but it itself is
352 * allocated using them. So we use some stack ehooks to bootstrap its
353 * memory, and then initialize the ehooks within the base_t.
354 */
355 ehooks_t fake_ehooks;
356 ehooks_init(&fake_ehooks, metadata_use_hooks ?
357 (extent_hooks_t *)extent_hooks :
358 (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
359
360 base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
361 &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
362 if (block == NULL) {
363 return NULL;
364 }
365
366 size_t gap_size;
367 size_t base_alignment = CACHELINE;
368 size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
369 base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
370 &gap_size, base_size, base_alignment);
371 ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
372 ehooks_init(&base->ehooks_base, metadata_use_hooks ?
373 (extent_hooks_t *)extent_hooks :
374 (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
375 if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
376 malloc_mutex_rank_exclusive)) {
377 base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
378 return NULL;
379 }
380 base->pind_last = pind_last;
381 base->extent_sn_next = extent_sn_next;
382 base->blocks = block;
383 base->auto_thp_switched = false;
384 for (szind_t i = 0; i < SC_NSIZES; i++) {
385 edata_heap_new(&base->avail[i]);
386 }
387 if (config_stats) {
388 base->allocated = sizeof(base_block_t);
389 base->resident = PAGE_CEILING(sizeof(base_block_t));
390 base->mapped = block->size;
391 base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
392 metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
393 >> LG_HUGEPAGE : 0;
394 assert(base->allocated <= base->resident);
395 assert(base->resident <= base->mapped);
396 assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
397 }
398 base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
399 base_size);
400
401 return base;
402}
403
404void
405base_delete(tsdn_t *tsdn, base_t *base) {
406 ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
407 base_block_t *next = base->blocks;
408 do {
409 base_block_t *block = next;
410 next = block->next;
411 base_unmap(tsdn, ehooks, base_ind_get(base), block,
412 block->size);
413 } while (next != NULL);
414}
415
416ehooks_t *
417base_ehooks_get(base_t *base) {
418 return &base->ehooks;
419}
420
421ehooks_t *
422base_ehooks_get_for_metadata(base_t *base) {
423 return &base->ehooks_base;
424}
425
426extent_hooks_t *
427base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
428 extent_hooks_t *old_extent_hooks =
429 ehooks_get_extent_hooks_ptr(&base->ehooks);
430 ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
431 return old_extent_hooks;
432}
433
434static void *
435base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
436 size_t *esn) {
437 alignment = QUANTUM_CEILING(alignment);
438 size_t usize = ALIGNMENT_CEILING(size, alignment);
439 size_t asize = usize + alignment - QUANTUM;
440
441 edata_t *edata = NULL;
442 malloc_mutex_lock(tsdn, &base->mtx);
443 for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
444 edata = edata_heap_remove_first(&base->avail[i]);
445 if (edata != NULL) {
446 /* Use existing space. */
447 break;
448 }
449 }
450 if (edata == NULL) {
451 /* Try to allocate more space. */
452 edata = base_extent_alloc(tsdn, base, usize, alignment);
453 }
454 void *ret;
455 if (edata == NULL) {
456 ret = NULL;
457 goto label_return;
458 }
459
460 ret = base_extent_bump_alloc(base, edata, usize, alignment);
461 if (esn != NULL) {
462 *esn = (size_t)edata_sn_get(edata);
463 }
464label_return:
465 malloc_mutex_unlock(tsdn, &base->mtx);
466 return ret;
467}
468
469/*
470 * base_alloc() returns zeroed memory, which is always demand-zeroed for the
471 * auto arenas, in order to make multi-page sparse data structures such as radix
472 * tree nodes efficient with respect to physical memory usage. Upon success a
473 * pointer to at least size bytes with specified alignment is returned. Note
474 * that size is rounded up to the nearest multiple of alignment to avoid false
475 * sharing.
476 */
477void *
478base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
479 return base_alloc_impl(tsdn, base, size, alignment, NULL);
480}
481
482edata_t *
483base_alloc_edata(tsdn_t *tsdn, base_t *base) {
484 size_t esn;
485 edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
486 EDATA_ALIGNMENT, &esn);
487 if (edata == NULL) {
488 return NULL;
489 }
490 edata_esn_set(edata, esn);
491 return edata;
492}
493
494void
495base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
496 size_t *mapped, size_t *n_thp) {
497 cassert(config_stats);
498
499 malloc_mutex_lock(tsdn, &base->mtx);
500 assert(base->allocated <= base->resident);
501 assert(base->resident <= base->mapped);
502 *allocated = base->allocated;
503 *resident = base->resident;
504 *mapped = base->mapped;
505 *n_thp = base->n_thp;
506 malloc_mutex_unlock(tsdn, &base->mtx);
507}
508
509void
510base_prefork(tsdn_t *tsdn, base_t *base) {
511 malloc_mutex_prefork(tsdn, &base->mtx);
512}
513
514void
515base_postfork_parent(tsdn_t *tsdn, base_t *base) {
516 malloc_mutex_postfork_parent(tsdn, &base->mtx);
517}
518
519void
520base_postfork_child(tsdn_t *tsdn, base_t *base) {
521 malloc_mutex_postfork_child(tsdn, &base->mtx);
522}
523
524bool
525base_boot(tsdn_t *tsdn) {
526 b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
527 /* metadata_use_hooks */ true);
528 return (b0 == NULL);
529}
530