1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/psset.h"
5
6#include "jemalloc/internal/fb.h"
7
8void
9psset_init(psset_t *psset) {
10 for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
11 hpdata_age_heap_new(&psset->pageslabs[i]);
12 }
13 fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
14 memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
15 memset(&psset->stats, 0, sizeof(psset->stats));
16 hpdata_empty_list_init(&psset->empty);
17 for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
18 hpdata_purge_list_init(&psset->to_purge[i]);
19 }
20 fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS);
21 hpdata_hugify_list_init(&psset->to_hugify);
22}
23
24static void
25psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
26 dst->npageslabs += src->npageslabs;
27 dst->nactive += src->nactive;
28 dst->ndirty += src->ndirty;
29}
30
31void
32psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
33 psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
34 psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
35 psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
36 psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
37 for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
38 psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
39 &src->nonfull_slabs[i][0]);
40 psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
41 &src->nonfull_slabs[i][1]);
42 }
43}
44
45/*
46 * The stats maintenance strategy is to remove a pageslab's contribution to the
47 * stats when we call psset_update_begin, and re-add it (to a potentially new
48 * bin) when we call psset_update_end.
49 */
50JEMALLOC_ALWAYS_INLINE void
51psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
52 hpdata_t *ps, bool insert) {
53 size_t mul = insert ? (size_t)1 : (size_t)-1;
54 size_t huge_idx = (size_t)hpdata_huge_get(ps);
55
56 binstats[huge_idx].npageslabs += mul * 1;
57 binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
58 binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
59
60 psset->merged_stats.npageslabs += mul * 1;
61 psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
62 psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
63
64 if (config_debug) {
65 psset_bin_stats_t check_stats = {0};
66 for (size_t huge = 0; huge <= 1; huge++) {
67 psset_bin_stats_accum(&check_stats,
68 &psset->stats.full_slabs[huge]);
69 psset_bin_stats_accum(&check_stats,
70 &psset->stats.empty_slabs[huge]);
71 for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
72 psset_bin_stats_accum(&check_stats,
73 &psset->stats.nonfull_slabs[pind][huge]);
74 }
75 }
76 assert(psset->merged_stats.npageslabs
77 == check_stats.npageslabs);
78 assert(psset->merged_stats.nactive == check_stats.nactive);
79 assert(psset->merged_stats.ndirty == check_stats.ndirty);
80 }
81}
82
83static void
84psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
85 hpdata_t *ps) {
86 psset_bin_stats_insert_remove(psset, binstats, ps, true);
87}
88
89static void
90psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
91 hpdata_t *ps) {
92 psset_bin_stats_insert_remove(psset, binstats, ps, false);
93}
94
95static void
96psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
97 hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
98 if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
99 fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
100 }
101}
102
103static void
104psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
105 if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
106 fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
107 }
108 hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
109}
110
111static void
112psset_stats_insert(psset_t* psset, hpdata_t *ps) {
113 if (hpdata_empty(ps)) {
114 psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
115 } else if (hpdata_full(ps)) {
116 psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
117 } else {
118 size_t longest_free_range = hpdata_longest_free_range_get(ps);
119
120 pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
121 longest_free_range << LG_PAGE));
122 assert(pind < PSSET_NPSIZES);
123
124 psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
125 ps);
126 }
127}
128
129static void
130psset_stats_remove(psset_t *psset, hpdata_t *ps) {
131 if (hpdata_empty(ps)) {
132 psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
133 } else if (hpdata_full(ps)) {
134 psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
135 } else {
136 size_t longest_free_range = hpdata_longest_free_range_get(ps);
137
138 pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
139 longest_free_range << LG_PAGE));
140 assert(pind < PSSET_NPSIZES);
141
142 psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
143 ps);
144 }
145}
146
147/*
148 * Put ps into some container so that it can be found during future allocation
149 * requests.
150 */
151static void
152psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
153 assert(!hpdata_in_psset_alloc_container_get(ps));
154 hpdata_in_psset_alloc_container_set(ps, true);
155 if (hpdata_empty(ps)) {
156 /*
157 * This prepend, paired with popping the head in psset_fit,
158 * means we implement LIFO ordering for the empty slabs set,
159 * which seems reasonable.
160 */
161 hpdata_empty_list_prepend(&psset->empty, ps);
162 } else if (hpdata_full(ps)) {
163 /*
164 * We don't need to keep track of the full slabs; we're never
165 * going to return them from a psset_pick_alloc call.
166 */
167 } else {
168 size_t longest_free_range = hpdata_longest_free_range_get(ps);
169
170 pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
171 longest_free_range << LG_PAGE));
172 assert(pind < PSSET_NPSIZES);
173
174 psset_hpdata_heap_insert(psset, pind, ps);
175 }
176}
177
178/* Remove ps from those collections. */
179static void
180psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
181 assert(hpdata_in_psset_alloc_container_get(ps));
182 hpdata_in_psset_alloc_container_set(ps, false);
183
184 if (hpdata_empty(ps)) {
185 hpdata_empty_list_remove(&psset->empty, ps);
186 } else if (hpdata_full(ps)) {
187 /* Same as above -- do nothing in this case. */
188 } else {
189 size_t longest_free_range = hpdata_longest_free_range_get(ps);
190
191 pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
192 longest_free_range << LG_PAGE));
193 assert(pind < PSSET_NPSIZES);
194
195 psset_hpdata_heap_remove(psset, pind, ps);
196 }
197}
198
199static size_t
200psset_purge_list_ind(hpdata_t *ps) {
201 size_t ndirty = hpdata_ndirty_get(ps);
202 /* Shouldn't have something with no dirty pages purgeable. */
203 assert(ndirty > 0);
204 /*
205 * Higher indices correspond to lists we'd like to purge earlier; make
206 * the two highest indices correspond to empty lists, which we attempt
207 * to purge before purging any non-empty list. This has two advantages:
208 * - Empty page slabs are the least likely to get reused (we'll only
209 * pick them for an allocation if we have no other choice).
210 * - Empty page slabs can purge every dirty page they contain in a
211 * single call, which is not usually the case.
212 *
213 * We purge hugeified empty slabs before nonhugeified ones, on the basis
214 * that they are fully dirty, while nonhugified slabs might not be, so
215 * we free up more pages more easily.
216 */
217 if (hpdata_nactive_get(ps) == 0) {
218 if (hpdata_huge_get(ps)) {
219 return PSSET_NPURGE_LISTS - 1;
220 } else {
221 return PSSET_NPURGE_LISTS - 2;
222 }
223 }
224
225 pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
226 /*
227 * For non-empty slabs, we may reuse them again. Prefer purging
228 * non-hugeified slabs before hugeified ones then, among pages of
229 * similar dirtiness. We still get some benefit from the hugification.
230 */
231 return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
232}
233
234static void
235psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
236 /*
237 * Remove the hpdata from its purge list (if it's in one). Even if it's
238 * going to stay in the same one, by appending it during
239 * psset_update_end, we move it to the end of its queue, so that we
240 * purge LRU within a given dirtiness bucket.
241 */
242 if (hpdata_purge_allowed_get(ps)) {
243 size_t ind = psset_purge_list_ind(ps);
244 hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
245 hpdata_purge_list_remove(purge_list, ps);
246 if (hpdata_purge_list_empty(purge_list)) {
247 fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
248 }
249 }
250}
251
252static void
253psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
254 if (hpdata_purge_allowed_get(ps)) {
255 size_t ind = psset_purge_list_ind(ps);
256 hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
257 if (hpdata_purge_list_empty(purge_list)) {
258 fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
259 }
260 hpdata_purge_list_append(purge_list, ps);
261 }
262
263}
264
265void
266psset_update_begin(psset_t *psset, hpdata_t *ps) {
267 hpdata_assert_consistent(ps);
268 assert(hpdata_in_psset_get(ps));
269 hpdata_updating_set(ps, true);
270 psset_stats_remove(psset, ps);
271 if (hpdata_in_psset_alloc_container_get(ps)) {
272 /*
273 * Some metadata updates can break alloc container invariants
274 * (e.g. the longest free range determines the hpdata_heap_t the
275 * pageslab lives in).
276 */
277 assert(hpdata_alloc_allowed_get(ps));
278 psset_alloc_container_remove(psset, ps);
279 }
280 psset_maybe_remove_purge_list(psset, ps);
281 /*
282 * We don't update presence in the hugify list; we try to keep it FIFO,
283 * even in the presence of other metadata updates. We'll update
284 * presence at the end of the metadata update if necessary.
285 */
286}
287
288void
289psset_update_end(psset_t *psset, hpdata_t *ps) {
290 assert(hpdata_in_psset_get(ps));
291 hpdata_updating_set(ps, false);
292 psset_stats_insert(psset, ps);
293
294 /*
295 * The update begin should have removed ps from whatever alloc container
296 * it was in.
297 */
298 assert(!hpdata_in_psset_alloc_container_get(ps));
299 if (hpdata_alloc_allowed_get(ps)) {
300 psset_alloc_container_insert(psset, ps);
301 }
302 psset_maybe_insert_purge_list(psset, ps);
303
304 if (hpdata_hugify_allowed_get(ps)
305 && !hpdata_in_psset_hugify_container_get(ps)) {
306 hpdata_in_psset_hugify_container_set(ps, true);
307 hpdata_hugify_list_append(&psset->to_hugify, ps);
308 } else if (!hpdata_hugify_allowed_get(ps)
309 && hpdata_in_psset_hugify_container_get(ps)) {
310 hpdata_in_psset_hugify_container_set(ps, false);
311 hpdata_hugify_list_remove(&psset->to_hugify, ps);
312 }
313 hpdata_assert_consistent(ps);
314}
315
316hpdata_t *
317psset_pick_alloc(psset_t *psset, size_t size) {
318 assert((size & PAGE_MASK) == 0);
319 assert(size <= HUGEPAGE);
320
321 pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
322 pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
323 (size_t)min_pind);
324 if (pind == PSSET_NPSIZES) {
325 return hpdata_empty_list_first(&psset->empty);
326 }
327 hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
328 if (ps == NULL) {
329 return NULL;
330 }
331
332 hpdata_assert_consistent(ps);
333
334 return ps;
335}
336
337hpdata_t *
338psset_pick_purge(psset_t *psset) {
339 ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
340 PSSET_NPURGE_LISTS - 1);
341 if (ind_ssz < 0) {
342 return NULL;
343 }
344 pszind_t ind = (pszind_t)ind_ssz;
345 assert(ind < PSSET_NPURGE_LISTS);
346 hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
347 assert(ps != NULL);
348 return ps;
349}
350
351hpdata_t *
352psset_pick_hugify(psset_t *psset) {
353 return hpdata_hugify_list_first(&psset->to_hugify);
354}
355
356void
357psset_insert(psset_t *psset, hpdata_t *ps) {
358 hpdata_in_psset_set(ps, true);
359
360 psset_stats_insert(psset, ps);
361 if (hpdata_alloc_allowed_get(ps)) {
362 psset_alloc_container_insert(psset, ps);
363 }
364 psset_maybe_insert_purge_list(psset, ps);
365
366 if (hpdata_hugify_allowed_get(ps)) {
367 hpdata_in_psset_hugify_container_set(ps, true);
368 hpdata_hugify_list_append(&psset->to_hugify, ps);
369 }
370}
371
372void
373psset_remove(psset_t *psset, hpdata_t *ps) {
374 hpdata_in_psset_set(ps, false);
375
376 psset_stats_remove(psset, ps);
377 if (hpdata_in_psset_alloc_container_get(ps)) {
378 psset_alloc_container_remove(psset, ps);
379 }
380 psset_maybe_remove_purge_list(psset, ps);
381 if (hpdata_in_psset_hugify_container_get(ps)) {
382 hpdata_in_psset_hugify_container_set(ps, false);
383 hpdata_hugify_list_remove(&psset->to_hugify, ps);
384 }
385}
386