1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/pac.h"
5#include "jemalloc/internal/san.h"
6
7static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
8 size_t alignment, bool zero, bool guarded, bool frequent_reuse,
9 bool *deferred_work_generated);
10static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
11 size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
12static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
13 size_t old_size, size_t new_size, bool *deferred_work_generated);
14static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
15 bool *deferred_work_generated);
16static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
17
18static inline void
19pac_decay_data_get(pac_t *pac, extent_state_t state,
20 decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
21 switch(state) {
22 case extent_state_dirty:
23 *r_decay = &pac->decay_dirty;
24 *r_decay_stats = &pac->stats->decay_dirty;
25 *r_ecache = &pac->ecache_dirty;
26 return;
27 case extent_state_muzzy:
28 *r_decay = &pac->decay_muzzy;
29 *r_decay_stats = &pac->stats->decay_muzzy;
30 *r_ecache = &pac->ecache_muzzy;
31 return;
32 default:
33 unreachable();
34 }
35}
36
37bool
38pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
39 edata_cache_t *edata_cache, nstime_t *cur_time,
40 size_t pac_oversize_threshold, ssize_t dirty_decay_ms,
41 ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
42 unsigned ind = base_ind_get(base);
43 /*
44 * Delay coalescing for dirty extents despite the disruptive effect on
45 * memory layout for best-fit extent allocation, since cached extents
46 * are likely to be reused soon after deallocation, and the cost of
47 * merging/splitting extents is non-trivial.
48 */
49 if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
50 /* delay_coalesce */ true)) {
51 return true;
52 }
53 /*
54 * Coalesce muzzy extents immediately, because operations on them are in
55 * the critical path much less often than for dirty extents.
56 */
57 if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
58 /* delay_coalesce */ false)) {
59 return true;
60 }
61 /*
62 * Coalesce retained extents immediately, in part because they will
63 * never be evicted (and therefore there's no opportunity for delayed
64 * coalescing), but also because operations on retained extents are not
65 * in the critical path.
66 */
67 if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
68 ind, /* delay_coalesce */ false)) {
69 return true;
70 }
71 exp_grow_init(&pac->exp_grow);
72 if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
73 WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
74 return true;
75 }
76 atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
77 ATOMIC_RELAXED);
78 if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
79 return true;
80 }
81 if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) {
82 return true;
83 }
84 if (san_bump_alloc_init(&pac->sba)) {
85 return true;
86 }
87
88 pac->base = base;
89 pac->emap = emap;
90 pac->edata_cache = edata_cache;
91 pac->stats = pac_stats;
92 pac->stats_mtx = stats_mtx;
93 atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
94
95 pac->pai.alloc = &pac_alloc_impl;
96 pac->pai.alloc_batch = &pai_alloc_batch_default;
97 pac->pai.expand = &pac_expand_impl;
98 pac->pai.shrink = &pac_shrink_impl;
99 pac->pai.dalloc = &pac_dalloc_impl;
100 pac->pai.dalloc_batch = &pai_dalloc_batch_default;
101 pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
102
103 return false;
104}
105
106static inline bool
107pac_may_have_muzzy(pac_t *pac) {
108 return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
109}
110
111static edata_t *
112pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
113 size_t alignment, bool zero, bool guarded) {
114 assert(!guarded || alignment <= PAGE);
115
116 edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
117 NULL, size, alignment, zero, guarded);
118
119 if (edata == NULL && pac_may_have_muzzy(pac)) {
120 edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
121 NULL, size, alignment, zero, guarded);
122 }
123 if (edata == NULL) {
124 edata = ecache_alloc_grow(tsdn, pac, ehooks,
125 &pac->ecache_retained, NULL, size, alignment, zero,
126 guarded);
127 if (config_stats && edata != NULL) {
128 atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
129 ATOMIC_RELAXED);
130 }
131 }
132
133 return edata;
134}
135
136static edata_t *
137pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
138 size_t alignment, bool zero, bool frequent_reuse) {
139 assert(alignment <= PAGE);
140
141 edata_t *edata;
142 if (san_bump_enabled() && frequent_reuse) {
143 edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
144 zero);
145 } else {
146 size_t size_with_guards = san_two_side_guarded_sz(size);
147 /* Alloc a non-guarded extent first.*/
148 edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
149 /* alignment */ PAGE, zero, /* guarded */ false);
150 if (edata != NULL) {
151 /* Add guards around it. */
152 assert(edata_size_get(edata) == size_with_guards);
153 san_guard_pages_two_sided(tsdn, ehooks, edata,
154 pac->emap, true);
155 }
156 }
157 assert(edata == NULL || (edata_guarded_get(edata) &&
158 edata_size_get(edata) == size));
159
160 return edata;
161}
162
163static edata_t *
164pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
165 bool zero, bool guarded, bool frequent_reuse,
166 bool *deferred_work_generated) {
167 pac_t *pac = (pac_t *)self;
168 ehooks_t *ehooks = pac_ehooks_get(pac);
169
170 edata_t *edata = NULL;
171 /*
172 * The condition is an optimization - not frequently reused guarded
173 * allocations are never put in the ecache. pac_alloc_real also
174 * doesn't grow retained for guarded allocations. So pac_alloc_real
175 * for such allocations would always return NULL.
176 * */
177 if (!guarded || frequent_reuse) {
178 edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment,
179 zero, guarded);
180 }
181 if (edata == NULL && guarded) {
182 /* No cached guarded extents; creating a new one. */
183 edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
184 alignment, zero, frequent_reuse);
185 }
186
187 return edata;
188}
189
190static bool
191pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
192 size_t new_size, bool zero, bool *deferred_work_generated) {
193 pac_t *pac = (pac_t *)self;
194 ehooks_t *ehooks = pac_ehooks_get(pac);
195
196 size_t mapped_add = 0;
197 size_t expand_amount = new_size - old_size;
198
199 if (ehooks_merge_will_fail(ehooks)) {
200 return true;
201 }
202 edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
203 edata, expand_amount, PAGE, zero, /* guarded*/ false);
204 if (trail == NULL) {
205 trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
206 edata, expand_amount, PAGE, zero, /* guarded*/ false);
207 }
208 if (trail == NULL) {
209 trail = ecache_alloc_grow(tsdn, pac, ehooks,
210 &pac->ecache_retained, edata, expand_amount, PAGE, zero,
211 /* guarded */ false);
212 mapped_add = expand_amount;
213 }
214 if (trail == NULL) {
215 return true;
216 }
217 if (extent_merge_wrapper(tsdn, pac, ehooks, edata, trail)) {
218 extent_dalloc_wrapper(tsdn, pac, ehooks, trail);
219 return true;
220 }
221 if (config_stats && mapped_add > 0) {
222 atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
223 ATOMIC_RELAXED);
224 }
225 return false;
226}
227
228static bool
229pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
230 size_t new_size, bool *deferred_work_generated) {
231 pac_t *pac = (pac_t *)self;
232 ehooks_t *ehooks = pac_ehooks_get(pac);
233
234 size_t shrink_amount = old_size - new_size;
235
236 if (ehooks_split_will_fail(ehooks)) {
237 return true;
238 }
239
240 edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata,
241 new_size, shrink_amount, /* holding_core_locks */ false);
242 if (trail == NULL) {
243 return true;
244 }
245 ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
246 *deferred_work_generated = true;
247 return false;
248}
249
250static void
251pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
252 bool *deferred_work_generated) {
253 pac_t *pac = (pac_t *)self;
254 ehooks_t *ehooks = pac_ehooks_get(pac);
255
256 if (edata_guarded_get(edata)) {
257 /*
258 * Because cached guarded extents do exact fit only, large
259 * guarded extents are restored on dalloc eagerly (otherwise
260 * they will not be reused efficiently). Slab sizes have a
261 * limited number of size classes, and tend to cycle faster.
262 *
263 * In the case where coalesce is restrained (VirtualFree on
264 * Windows), guarded extents are also not cached -- otherwise
265 * during arena destroy / reset, the retained extents would not
266 * be whole regions (i.e. they are split between regular and
267 * guarded).
268 */
269 if (!edata_slab_get(edata) || !maps_coalesce) {
270 assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
271 !maps_coalesce);
272 san_unguard_pages_two_sided(tsdn, ehooks, edata,
273 pac->emap);
274 }
275 }
276
277 ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
278 /* Purging of deallocated pages is deferred */
279 *deferred_work_generated = true;
280}
281
282static inline uint64_t
283pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
284 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
285 /* Use minimal interval if decay is contended. */
286 return BACKGROUND_THREAD_DEFERRED_MIN;
287 }
288 uint64_t result = decay_ns_until_purge(decay, npages,
289 ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
290
291 malloc_mutex_unlock(tsdn, &decay->mtx);
292 return result;
293}
294
295static uint64_t
296pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
297 uint64_t time;
298 pac_t *pac = (pac_t *)self;
299
300 time = pac_ns_until_purge(tsdn,
301 &pac->decay_dirty,
302 ecache_npages_get(&pac->ecache_dirty));
303 if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
304 return time;
305 }
306
307 uint64_t muzzy = pac_ns_until_purge(tsdn,
308 &pac->decay_muzzy,
309 ecache_npages_get(&pac->ecache_muzzy));
310 if (muzzy < time) {
311 time = muzzy;
312 }
313 return time;
314}
315
316bool
317pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
318 size_t *new_limit) {
319 pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
320 if (new_limit != NULL) {
321 size_t limit = *new_limit;
322 /* Grow no more than the new limit. */
323 if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
324 return true;
325 }
326 }
327
328 malloc_mutex_lock(tsdn, &pac->grow_mtx);
329 if (old_limit != NULL) {
330 *old_limit = sz_pind2sz(pac->exp_grow.limit);
331 }
332 if (new_limit != NULL) {
333 pac->exp_grow.limit = new_ind;
334 }
335 malloc_mutex_unlock(tsdn, &pac->grow_mtx);
336
337 return false;
338}
339
340static size_t
341pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
342 size_t npages_limit, size_t npages_decay_max,
343 edata_list_inactive_t *result) {
344 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
345 WITNESS_RANK_CORE, 0);
346 ehooks_t *ehooks = pac_ehooks_get(pac);
347
348 /* Stash extents according to npages_limit. */
349 size_t nstashed = 0;
350 while (nstashed < npages_decay_max) {
351 edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
352 npages_limit);
353 if (edata == NULL) {
354 break;
355 }
356 edata_list_inactive_append(result, edata);
357 nstashed += edata_size_get(edata) >> LG_PAGE;
358 }
359 return nstashed;
360}
361
362static size_t
363pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
364 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
365 edata_list_inactive_t *decay_extents) {
366 bool err;
367
368 size_t nmadvise = 0;
369 size_t nunmapped = 0;
370 size_t npurged = 0;
371
372 ehooks_t *ehooks = pac_ehooks_get(pac);
373
374 bool try_muzzy = !fully_decay
375 && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
376
377 for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
378 NULL; edata = edata_list_inactive_first(decay_extents)) {
379 edata_list_inactive_remove(decay_extents, edata);
380
381 size_t size = edata_size_get(edata);
382 size_t npages = size >> LG_PAGE;
383
384 nmadvise++;
385 npurged += npages;
386
387 switch (ecache->state) {
388 case extent_state_active:
389 not_reached();
390 case extent_state_dirty:
391 if (try_muzzy) {
392 err = extent_purge_lazy_wrapper(tsdn, ehooks,
393 edata, /* offset */ 0, size);
394 if (!err) {
395 ecache_dalloc(tsdn, pac, ehooks,
396 &pac->ecache_muzzy, edata);
397 break;
398 }
399 }
400 JEMALLOC_FALLTHROUGH;
401 case extent_state_muzzy:
402 extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
403 nunmapped += npages;
404 break;
405 case extent_state_retained:
406 default:
407 not_reached();
408 }
409 }
410
411 if (config_stats) {
412 LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
413 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
414 &decay_stats->npurge, 1);
415 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
416 &decay_stats->nmadvise, nmadvise);
417 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
418 &decay_stats->purged, npurged);
419 LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
420 atomic_fetch_sub_zu(&pac->stats->pac_mapped,
421 nunmapped << LG_PAGE, ATOMIC_RELAXED);
422 }
423
424 return npurged;
425}
426
427/*
428 * npages_limit: Decay at most npages_decay_max pages without violating the
429 * invariant: (ecache_npages_get(ecache) >= npages_limit). We need an upper
430 * bound on number of pages in order to prevent unbounded growth (namely in
431 * stashed), otherwise unbounded new pages could be added to extents during the
432 * current decay run, so that the purging thread never finishes.
433 */
434static void
435pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
436 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
437 size_t npages_limit, size_t npages_decay_max) {
438 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
439 WITNESS_RANK_CORE, 1);
440
441 if (decay->purging || npages_decay_max == 0) {
442 return;
443 }
444 decay->purging = true;
445 malloc_mutex_unlock(tsdn, &decay->mtx);
446
447 edata_list_inactive_t decay_extents;
448 edata_list_inactive_init(&decay_extents);
449 size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
450 npages_decay_max, &decay_extents);
451 if (npurge != 0) {
452 size_t npurged = pac_decay_stashed(tsdn, pac, decay,
453 decay_stats, ecache, fully_decay, &decay_extents);
454 assert(npurged == npurge);
455 }
456
457 malloc_mutex_lock(tsdn, &decay->mtx);
458 decay->purging = false;
459}
460
461void
462pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
463 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
464 malloc_mutex_assert_owner(tsdn, &decay->mtx);
465 pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache, fully_decay,
466 /* npages_limit */ 0, ecache_npages_get(ecache));
467}
468
469static void
470pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
471 pac_decay_stats_t *decay_stats, ecache_t *ecache,
472 size_t current_npages, size_t npages_limit) {
473 if (current_npages > npages_limit) {
474 pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
475 /* fully_decay */ false, npages_limit,
476 current_npages - npages_limit);
477 }
478}
479
480bool
481pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
482 pac_decay_stats_t *decay_stats, ecache_t *ecache,
483 pac_purge_eagerness_t eagerness) {
484 malloc_mutex_assert_owner(tsdn, &decay->mtx);
485
486 /* Purge all or nothing if the option is disabled. */
487 ssize_t decay_ms = decay_ms_read(decay);
488 if (decay_ms <= 0) {
489 if (decay_ms == 0) {
490 pac_decay_to_limit(tsdn, pac, decay, decay_stats,
491 ecache, /* fully_decay */ false,
492 /* npages_limit */ 0, ecache_npages_get(ecache));
493 }
494 return false;
495 }
496
497 /*
498 * If the deadline has been reached, advance to the current epoch and
499 * purge to the new limit if necessary. Note that dirty pages created
500 * during the current epoch are not subject to purge until a future
501 * epoch, so as a result purging only happens during epoch advances, or
502 * being triggered by background threads (scheduled event).
503 */
504 nstime_t time;
505 nstime_init_update(&time);
506 size_t npages_current = ecache_npages_get(ecache);
507 bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
508 npages_current);
509 if (eagerness == PAC_PURGE_ALWAYS
510 || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
511 size_t npages_limit = decay_npages_limit_get(decay);
512 pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache,
513 npages_current, npages_limit);
514 }
515
516 return epoch_advanced;
517}
518
519bool
520pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
521 ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
522 decay_t *decay;
523 pac_decay_stats_t *decay_stats;
524 ecache_t *ecache;
525 pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
526
527 if (!decay_ms_valid(decay_ms)) {
528 return true;
529 }
530
531 malloc_mutex_lock(tsdn, &decay->mtx);
532 /*
533 * Restart decay backlog from scratch, which may cause many dirty pages
534 * to be immediately purged. It would conceptually be possible to map
535 * the old backlog onto the new backlog, but there is no justification
536 * for such complexity since decay_ms changes are intended to be
537 * infrequent, either between the {-1, 0, >0} states, or a one-time
538 * arbitrary change during initial arena configuration.
539 */
540 nstime_t cur_time;
541 nstime_init_update(&cur_time);
542 decay_reinit(decay, &cur_time, decay_ms);
543 pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
544 malloc_mutex_unlock(tsdn, &decay->mtx);
545
546 return false;
547}
548
549ssize_t
550pac_decay_ms_get(pac_t *pac, extent_state_t state) {
551 decay_t *decay;
552 pac_decay_stats_t *decay_stats;
553 ecache_t *ecache;
554 pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
555 return decay_ms_read(decay);
556}
557
558void
559pac_reset(tsdn_t *tsdn, pac_t *pac) {
560 /*
561 * No-op for now; purging is still done at the arena-level. It should
562 * get moved in here, though.
563 */
564 (void)tsdn;
565 (void)pac;
566}
567
568void
569pac_destroy(tsdn_t *tsdn, pac_t *pac) {
570 assert(ecache_npages_get(&pac->ecache_dirty) == 0);
571 assert(ecache_npages_get(&pac->ecache_muzzy) == 0);
572 /*
573 * Iterate over the retained extents and destroy them. This gives the
574 * extent allocator underlying the extent hooks an opportunity to unmap
575 * all retained memory without having to keep its own metadata
576 * structures. In practice, virtual memory for dss-allocated extents is
577 * leaked here, so best practice is to avoid dss for arenas to be
578 * destroyed, or provide custom extent hooks that track retained
579 * dss-based extents for later reuse.
580 */
581 ehooks_t *ehooks = pac_ehooks_get(pac);
582 edata_t *edata;
583 while ((edata = ecache_evict(tsdn, pac, ehooks,
584 &pac->ecache_retained, 0)) != NULL) {
585 extent_destroy_wrapper(tsdn, pac, ehooks, edata);
586 }
587}
588