1 | #include "jemalloc/internal/jemalloc_preamble.h" |
2 | #include "jemalloc/internal/jemalloc_internal_includes.h" |
3 | |
4 | #include "jemalloc/internal/assert.h" |
5 | #include "jemalloc/internal/extent_mmap.h" |
6 | #include "jemalloc/internal/mutex.h" |
7 | #include "jemalloc/internal/sz.h" |
8 | |
9 | /* |
10 | * In auto mode, arenas switch to huge pages for the base allocator on the |
11 | * second base block. a0 switches to thp on the 5th block (after 20 megabytes |
12 | * of metadata), since more metadata (e.g. rtree nodes) come from a0's base. |
13 | */ |
14 | |
15 | #define BASE_AUTO_THP_THRESHOLD 2 |
16 | #define BASE_AUTO_THP_THRESHOLD_A0 5 |
17 | |
18 | /******************************************************************************/ |
19 | /* Data. */ |
20 | |
21 | static base_t *b0; |
22 | |
23 | metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; |
24 | |
25 | const char *metadata_thp_mode_names[] = { |
26 | "disabled" , |
27 | "auto" , |
28 | "always" |
29 | }; |
30 | |
31 | /******************************************************************************/ |
32 | |
33 | static inline bool |
34 | metadata_thp_madvise(void) { |
35 | return (metadata_thp_enabled() && |
36 | (init_system_thp_mode == thp_mode_default)); |
37 | } |
38 | |
39 | static void * |
40 | base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) { |
41 | void *addr; |
42 | bool zero = true; |
43 | bool commit = true; |
44 | |
45 | /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ |
46 | assert(size == HUGEPAGE_CEILING(size)); |
47 | size_t alignment = HUGEPAGE; |
48 | if (ehooks_are_default(ehooks)) { |
49 | addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); |
50 | if (have_madvise_huge && addr) { |
51 | pages_set_thp_state(addr, size); |
52 | } |
53 | } else { |
54 | addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero, |
55 | &commit); |
56 | } |
57 | |
58 | return addr; |
59 | } |
60 | |
61 | static void |
62 | base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr, |
63 | size_t size) { |
64 | /* |
65 | * Cascade through dalloc, decommit, purge_forced, and purge_lazy, |
66 | * stopping at first success. This cascade is performed for consistency |
67 | * with the cascade in extent_dalloc_wrapper() because an application's |
68 | * custom hooks may not support e.g. dalloc. This function is only ever |
69 | * called as a side effect of arena destruction, so although it might |
70 | * seem pointless to do anything besides dalloc here, the application |
71 | * may in fact want the end state of all associated virtual memory to be |
72 | * in some consistent-but-allocated state. |
73 | */ |
74 | if (ehooks_are_default(ehooks)) { |
75 | if (!extent_dalloc_mmap(addr, size)) { |
76 | goto label_done; |
77 | } |
78 | if (!pages_decommit(addr, size)) { |
79 | goto label_done; |
80 | } |
81 | if (!pages_purge_forced(addr, size)) { |
82 | goto label_done; |
83 | } |
84 | if (!pages_purge_lazy(addr, size)) { |
85 | goto label_done; |
86 | } |
87 | /* Nothing worked. This should never happen. */ |
88 | not_reached(); |
89 | } else { |
90 | if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) { |
91 | goto label_done; |
92 | } |
93 | if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) { |
94 | goto label_done; |
95 | } |
96 | if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) { |
97 | goto label_done; |
98 | } |
99 | if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) { |
100 | goto label_done; |
101 | } |
102 | /* Nothing worked. That's the application's problem. */ |
103 | } |
104 | label_done: |
105 | if (metadata_thp_madvise()) { |
106 | /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ |
107 | assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && |
108 | (size & HUGEPAGE_MASK) == 0); |
109 | pages_nohuge(addr, size); |
110 | } |
111 | } |
112 | |
113 | static void |
114 | base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr, |
115 | size_t size) { |
116 | size_t sn; |
117 | |
118 | sn = *extent_sn_next; |
119 | (*extent_sn_next)++; |
120 | |
121 | edata_binit(edata, addr, size, sn); |
122 | } |
123 | |
124 | static size_t |
125 | base_get_num_blocks(base_t *base, bool with_new_block) { |
126 | base_block_t *b = base->blocks; |
127 | assert(b != NULL); |
128 | |
129 | size_t n_blocks = with_new_block ? 2 : 1; |
130 | while (b->next != NULL) { |
131 | n_blocks++; |
132 | b = b->next; |
133 | } |
134 | |
135 | return n_blocks; |
136 | } |
137 | |
138 | static void |
139 | base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { |
140 | assert(opt_metadata_thp == metadata_thp_auto); |
141 | malloc_mutex_assert_owner(tsdn, &base->mtx); |
142 | if (base->auto_thp_switched) { |
143 | return; |
144 | } |
145 | /* Called when adding a new block. */ |
146 | bool should_switch; |
147 | if (base_ind_get(base) != 0) { |
148 | should_switch = (base_get_num_blocks(base, true) == |
149 | BASE_AUTO_THP_THRESHOLD); |
150 | } else { |
151 | should_switch = (base_get_num_blocks(base, true) == |
152 | BASE_AUTO_THP_THRESHOLD_A0); |
153 | } |
154 | if (!should_switch) { |
155 | return; |
156 | } |
157 | |
158 | base->auto_thp_switched = true; |
159 | assert(!config_stats || base->n_thp == 0); |
160 | /* Make the initial blocks THP lazily. */ |
161 | base_block_t *block = base->blocks; |
162 | while (block != NULL) { |
163 | assert((block->size & HUGEPAGE_MASK) == 0); |
164 | pages_huge(block, block->size); |
165 | if (config_stats) { |
166 | base->n_thp += HUGEPAGE_CEILING(block->size - |
167 | edata_bsize_get(&block->edata)) >> LG_HUGEPAGE; |
168 | } |
169 | block = block->next; |
170 | assert(block == NULL || (base_ind_get(base) == 0)); |
171 | } |
172 | } |
173 | |
174 | static void * |
175 | base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size, |
176 | size_t alignment) { |
177 | void *ret; |
178 | |
179 | assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM)); |
180 | assert(size == ALIGNMENT_CEILING(size, alignment)); |
181 | |
182 | *gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata), |
183 | alignment) - (uintptr_t)edata_addr_get(edata); |
184 | ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size); |
185 | assert(edata_bsize_get(edata) >= *gap_size + size); |
186 | edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) + |
187 | *gap_size + size), edata_bsize_get(edata) - *gap_size - size, |
188 | edata_sn_get(edata)); |
189 | return ret; |
190 | } |
191 | |
192 | static void |
193 | base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size, |
194 | void *addr, size_t size) { |
195 | if (edata_bsize_get(edata) > 0) { |
196 | /* |
197 | * Compute the index for the largest size class that does not |
198 | * exceed extent's size. |
199 | */ |
200 | szind_t index_floor = |
201 | sz_size2index(edata_bsize_get(edata) + 1) - 1; |
202 | edata_heap_insert(&base->avail[index_floor], edata); |
203 | } |
204 | |
205 | if (config_stats) { |
206 | base->allocated += size; |
207 | /* |
208 | * Add one PAGE to base_resident for every page boundary that is |
209 | * crossed by the new allocation. Adjust n_thp similarly when |
210 | * metadata_thp is enabled. |
211 | */ |
212 | base->resident += PAGE_CEILING((uintptr_t)addr + size) - |
213 | PAGE_CEILING((uintptr_t)addr - gap_size); |
214 | assert(base->allocated <= base->resident); |
215 | assert(base->resident <= base->mapped); |
216 | if (metadata_thp_madvise() && (opt_metadata_thp == |
217 | metadata_thp_always || base->auto_thp_switched)) { |
218 | base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) |
219 | - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> |
220 | LG_HUGEPAGE; |
221 | assert(base->mapped >= base->n_thp << LG_HUGEPAGE); |
222 | } |
223 | } |
224 | } |
225 | |
226 | static void * |
227 | base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size, |
228 | size_t alignment) { |
229 | void *ret; |
230 | size_t gap_size; |
231 | |
232 | ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment); |
233 | base_extent_bump_alloc_post(base, edata, gap_size, ret, size); |
234 | return ret; |
235 | } |
236 | |
237 | /* |
238 | * Allocate a block of virtual memory that is large enough to start with a |
239 | * base_block_t header, followed by an object of specified size and alignment. |
240 | * On success a pointer to the initialized base_block_t header is returned. |
241 | */ |
242 | static base_block_t * |
243 | base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind, |
244 | pszind_t *pind_last, size_t *extent_sn_next, size_t size, |
245 | size_t alignment) { |
246 | alignment = ALIGNMENT_CEILING(alignment, QUANTUM); |
247 | size_t usize = ALIGNMENT_CEILING(size, alignment); |
248 | size_t = sizeof(base_block_t); |
249 | size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) - |
250 | header_size; |
251 | /* |
252 | * Create increasingly larger blocks in order to limit the total number |
253 | * of disjoint virtual memory ranges. Choose the next size in the page |
254 | * size class series (skipping size classes that are not a multiple of |
255 | * HUGEPAGE), or a size large enough to satisfy the requested size and |
256 | * alignment, whichever is larger. |
257 | */ |
258 | size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size |
259 | + usize)); |
260 | pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ? |
261 | *pind_last + 1 : *pind_last; |
262 | size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next)); |
263 | size_t block_size = (min_block_size > next_block_size) ? min_block_size |
264 | : next_block_size; |
265 | base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind, |
266 | block_size); |
267 | if (block == NULL) { |
268 | return NULL; |
269 | } |
270 | |
271 | if (metadata_thp_madvise()) { |
272 | void *addr = (void *)block; |
273 | assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && |
274 | (block_size & HUGEPAGE_MASK) == 0); |
275 | if (opt_metadata_thp == metadata_thp_always) { |
276 | pages_huge(addr, block_size); |
277 | } else if (opt_metadata_thp == metadata_thp_auto && |
278 | base != NULL) { |
279 | /* base != NULL indicates this is not a new base. */ |
280 | malloc_mutex_lock(tsdn, &base->mtx); |
281 | base_auto_thp_switch(tsdn, base); |
282 | if (base->auto_thp_switched) { |
283 | pages_huge(addr, block_size); |
284 | } |
285 | malloc_mutex_unlock(tsdn, &base->mtx); |
286 | } |
287 | } |
288 | |
289 | *pind_last = sz_psz2ind(block_size); |
290 | block->size = block_size; |
291 | block->next = NULL; |
292 | assert(block_size >= header_size); |
293 | base_edata_init(extent_sn_next, &block->edata, |
294 | (void *)((uintptr_t)block + header_size), block_size - header_size); |
295 | return block; |
296 | } |
297 | |
298 | /* |
299 | * Allocate an extent that is at least as large as specified size, with |
300 | * specified alignment. |
301 | */ |
302 | static edata_t * |
303 | base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { |
304 | malloc_mutex_assert_owner(tsdn, &base->mtx); |
305 | |
306 | ehooks_t *ehooks = base_ehooks_get_for_metadata(base); |
307 | /* |
308 | * Drop mutex during base_block_alloc(), because an extent hook will be |
309 | * called. |
310 | */ |
311 | malloc_mutex_unlock(tsdn, &base->mtx); |
312 | base_block_t *block = base_block_alloc(tsdn, base, ehooks, |
313 | base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, |
314 | alignment); |
315 | malloc_mutex_lock(tsdn, &base->mtx); |
316 | if (block == NULL) { |
317 | return NULL; |
318 | } |
319 | block->next = base->blocks; |
320 | base->blocks = block; |
321 | if (config_stats) { |
322 | base->allocated += sizeof(base_block_t); |
323 | base->resident += PAGE_CEILING(sizeof(base_block_t)); |
324 | base->mapped += block->size; |
325 | if (metadata_thp_madvise() && |
326 | !(opt_metadata_thp == metadata_thp_auto |
327 | && !base->auto_thp_switched)) { |
328 | assert(base->n_thp > 0); |
329 | base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> |
330 | LG_HUGEPAGE; |
331 | } |
332 | assert(base->allocated <= base->resident); |
333 | assert(base->resident <= base->mapped); |
334 | assert(base->n_thp << LG_HUGEPAGE <= base->mapped); |
335 | } |
336 | return &block->edata; |
337 | } |
338 | |
339 | base_t * |
340 | b0get(void) { |
341 | return b0; |
342 | } |
343 | |
344 | base_t * |
345 | base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks, |
346 | bool metadata_use_hooks) { |
347 | pszind_t pind_last = 0; |
348 | size_t extent_sn_next = 0; |
349 | |
350 | /* |
351 | * The base will contain the ehooks eventually, but it itself is |
352 | * allocated using them. So we use some stack ehooks to bootstrap its |
353 | * memory, and then initialize the ehooks within the base_t. |
354 | */ |
355 | ehooks_t fake_ehooks; |
356 | ehooks_init(&fake_ehooks, metadata_use_hooks ? |
357 | (extent_hooks_t *)extent_hooks : |
358 | (extent_hooks_t *)&ehooks_default_extent_hooks, ind); |
359 | |
360 | base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind, |
361 | &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); |
362 | if (block == NULL) { |
363 | return NULL; |
364 | } |
365 | |
366 | size_t gap_size; |
367 | size_t base_alignment = CACHELINE; |
368 | size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment); |
369 | base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata, |
370 | &gap_size, base_size, base_alignment); |
371 | ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind); |
372 | ehooks_init(&base->ehooks_base, metadata_use_hooks ? |
373 | (extent_hooks_t *)extent_hooks : |
374 | (extent_hooks_t *)&ehooks_default_extent_hooks, ind); |
375 | if (malloc_mutex_init(&base->mtx, "base" , WITNESS_RANK_BASE, |
376 | malloc_mutex_rank_exclusive)) { |
377 | base_unmap(tsdn, &fake_ehooks, ind, block, block->size); |
378 | return NULL; |
379 | } |
380 | base->pind_last = pind_last; |
381 | base->extent_sn_next = extent_sn_next; |
382 | base->blocks = block; |
383 | base->auto_thp_switched = false; |
384 | for (szind_t i = 0; i < SC_NSIZES; i++) { |
385 | edata_heap_new(&base->avail[i]); |
386 | } |
387 | if (config_stats) { |
388 | base->allocated = sizeof(base_block_t); |
389 | base->resident = PAGE_CEILING(sizeof(base_block_t)); |
390 | base->mapped = block->size; |
391 | base->n_thp = (opt_metadata_thp == metadata_thp_always) && |
392 | metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) |
393 | >> LG_HUGEPAGE : 0; |
394 | assert(base->allocated <= base->resident); |
395 | assert(base->resident <= base->mapped); |
396 | assert(base->n_thp << LG_HUGEPAGE <= base->mapped); |
397 | } |
398 | base_extent_bump_alloc_post(base, &block->edata, gap_size, base, |
399 | base_size); |
400 | |
401 | return base; |
402 | } |
403 | |
404 | void |
405 | base_delete(tsdn_t *tsdn, base_t *base) { |
406 | ehooks_t *ehooks = base_ehooks_get_for_metadata(base); |
407 | base_block_t *next = base->blocks; |
408 | do { |
409 | base_block_t *block = next; |
410 | next = block->next; |
411 | base_unmap(tsdn, ehooks, base_ind_get(base), block, |
412 | block->size); |
413 | } while (next != NULL); |
414 | } |
415 | |
416 | ehooks_t * |
417 | base_ehooks_get(base_t *base) { |
418 | return &base->ehooks; |
419 | } |
420 | |
421 | ehooks_t * |
422 | base_ehooks_get_for_metadata(base_t *base) { |
423 | return &base->ehooks_base; |
424 | } |
425 | |
426 | extent_hooks_t * |
427 | base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) { |
428 | extent_hooks_t *old_extent_hooks = |
429 | ehooks_get_extent_hooks_ptr(&base->ehooks); |
430 | ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks)); |
431 | return old_extent_hooks; |
432 | } |
433 | |
434 | static void * |
435 | base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, |
436 | size_t *esn) { |
437 | alignment = QUANTUM_CEILING(alignment); |
438 | size_t usize = ALIGNMENT_CEILING(size, alignment); |
439 | size_t asize = usize + alignment - QUANTUM; |
440 | |
441 | edata_t *edata = NULL; |
442 | malloc_mutex_lock(tsdn, &base->mtx); |
443 | for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) { |
444 | edata = edata_heap_remove_first(&base->avail[i]); |
445 | if (edata != NULL) { |
446 | /* Use existing space. */ |
447 | break; |
448 | } |
449 | } |
450 | if (edata == NULL) { |
451 | /* Try to allocate more space. */ |
452 | edata = base_extent_alloc(tsdn, base, usize, alignment); |
453 | } |
454 | void *ret; |
455 | if (edata == NULL) { |
456 | ret = NULL; |
457 | goto label_return; |
458 | } |
459 | |
460 | ret = base_extent_bump_alloc(base, edata, usize, alignment); |
461 | if (esn != NULL) { |
462 | *esn = (size_t)edata_sn_get(edata); |
463 | } |
464 | label_return: |
465 | malloc_mutex_unlock(tsdn, &base->mtx); |
466 | return ret; |
467 | } |
468 | |
469 | /* |
470 | * base_alloc() returns zeroed memory, which is always demand-zeroed for the |
471 | * auto arenas, in order to make multi-page sparse data structures such as radix |
472 | * tree nodes efficient with respect to physical memory usage. Upon success a |
473 | * pointer to at least size bytes with specified alignment is returned. Note |
474 | * that size is rounded up to the nearest multiple of alignment to avoid false |
475 | * sharing. |
476 | */ |
477 | void * |
478 | base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { |
479 | return base_alloc_impl(tsdn, base, size, alignment, NULL); |
480 | } |
481 | |
482 | edata_t * |
483 | base_alloc_edata(tsdn_t *tsdn, base_t *base) { |
484 | size_t esn; |
485 | edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t), |
486 | EDATA_ALIGNMENT, &esn); |
487 | if (edata == NULL) { |
488 | return NULL; |
489 | } |
490 | edata_esn_set(edata, esn); |
491 | return edata; |
492 | } |
493 | |
494 | void |
495 | base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, |
496 | size_t *mapped, size_t *n_thp) { |
497 | cassert(config_stats); |
498 | |
499 | malloc_mutex_lock(tsdn, &base->mtx); |
500 | assert(base->allocated <= base->resident); |
501 | assert(base->resident <= base->mapped); |
502 | *allocated = base->allocated; |
503 | *resident = base->resident; |
504 | *mapped = base->mapped; |
505 | *n_thp = base->n_thp; |
506 | malloc_mutex_unlock(tsdn, &base->mtx); |
507 | } |
508 | |
509 | void |
510 | base_prefork(tsdn_t *tsdn, base_t *base) { |
511 | malloc_mutex_prefork(tsdn, &base->mtx); |
512 | } |
513 | |
514 | void |
515 | base_postfork_parent(tsdn_t *tsdn, base_t *base) { |
516 | malloc_mutex_postfork_parent(tsdn, &base->mtx); |
517 | } |
518 | |
519 | void |
520 | base_postfork_child(tsdn_t *tsdn, base_t *base) { |
521 | malloc_mutex_postfork_child(tsdn, &base->mtx); |
522 | } |
523 | |
524 | bool |
525 | base_boot(tsdn_t *tsdn) { |
526 | b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks, |
527 | /* metadata_use_hooks */ true); |
528 | return (b0 == NULL); |
529 | } |
530 | |