1 | #include "jemalloc/internal/jemalloc_preamble.h" |
2 | #include "jemalloc/internal/jemalloc_internal_includes.h" |
3 | |
4 | #include "jemalloc/internal/psset.h" |
5 | |
6 | #include "jemalloc/internal/fb.h" |
7 | |
8 | void |
9 | psset_init(psset_t *psset) { |
10 | for (unsigned i = 0; i < PSSET_NPSIZES; i++) { |
11 | hpdata_age_heap_new(&psset->pageslabs[i]); |
12 | } |
13 | fb_init(psset->pageslab_bitmap, PSSET_NPSIZES); |
14 | memset(&psset->merged_stats, 0, sizeof(psset->merged_stats)); |
15 | memset(&psset->stats, 0, sizeof(psset->stats)); |
16 | hpdata_empty_list_init(&psset->empty); |
17 | for (int i = 0; i < PSSET_NPURGE_LISTS; i++) { |
18 | hpdata_purge_list_init(&psset->to_purge[i]); |
19 | } |
20 | fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS); |
21 | hpdata_hugify_list_init(&psset->to_hugify); |
22 | } |
23 | |
24 | static void |
25 | psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) { |
26 | dst->npageslabs += src->npageslabs; |
27 | dst->nactive += src->nactive; |
28 | dst->ndirty += src->ndirty; |
29 | } |
30 | |
31 | void |
32 | psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) { |
33 | psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]); |
34 | psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]); |
35 | psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]); |
36 | psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]); |
37 | for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { |
38 | psset_bin_stats_accum(&dst->nonfull_slabs[i][0], |
39 | &src->nonfull_slabs[i][0]); |
40 | psset_bin_stats_accum(&dst->nonfull_slabs[i][1], |
41 | &src->nonfull_slabs[i][1]); |
42 | } |
43 | } |
44 | |
45 | /* |
46 | * The stats maintenance strategy is to remove a pageslab's contribution to the |
47 | * stats when we call psset_update_begin, and re-add it (to a potentially new |
48 | * bin) when we call psset_update_end. |
49 | */ |
50 | JEMALLOC_ALWAYS_INLINE void |
51 | psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats, |
52 | hpdata_t *ps, bool insert) { |
53 | size_t mul = insert ? (size_t)1 : (size_t)-1; |
54 | size_t huge_idx = (size_t)hpdata_huge_get(ps); |
55 | |
56 | binstats[huge_idx].npageslabs += mul * 1; |
57 | binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps); |
58 | binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps); |
59 | |
60 | psset->merged_stats.npageslabs += mul * 1; |
61 | psset->merged_stats.nactive += mul * hpdata_nactive_get(ps); |
62 | psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps); |
63 | |
64 | if (config_debug) { |
65 | psset_bin_stats_t check_stats = {0}; |
66 | for (size_t huge = 0; huge <= 1; huge++) { |
67 | psset_bin_stats_accum(&check_stats, |
68 | &psset->stats.full_slabs[huge]); |
69 | psset_bin_stats_accum(&check_stats, |
70 | &psset->stats.empty_slabs[huge]); |
71 | for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) { |
72 | psset_bin_stats_accum(&check_stats, |
73 | &psset->stats.nonfull_slabs[pind][huge]); |
74 | } |
75 | } |
76 | assert(psset->merged_stats.npageslabs |
77 | == check_stats.npageslabs); |
78 | assert(psset->merged_stats.nactive == check_stats.nactive); |
79 | assert(psset->merged_stats.ndirty == check_stats.ndirty); |
80 | } |
81 | } |
82 | |
83 | static void |
84 | psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats, |
85 | hpdata_t *ps) { |
86 | psset_bin_stats_insert_remove(psset, binstats, ps, true); |
87 | } |
88 | |
89 | static void |
90 | psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats, |
91 | hpdata_t *ps) { |
92 | psset_bin_stats_insert_remove(psset, binstats, ps, false); |
93 | } |
94 | |
95 | static void |
96 | psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) { |
97 | hpdata_age_heap_remove(&psset->pageslabs[pind], ps); |
98 | if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { |
99 | fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind); |
100 | } |
101 | } |
102 | |
103 | static void |
104 | psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) { |
105 | if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { |
106 | fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind); |
107 | } |
108 | hpdata_age_heap_insert(&psset->pageslabs[pind], ps); |
109 | } |
110 | |
111 | static void |
112 | psset_stats_insert(psset_t* psset, hpdata_t *ps) { |
113 | if (hpdata_empty(ps)) { |
114 | psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps); |
115 | } else if (hpdata_full(ps)) { |
116 | psset_bin_stats_insert(psset, psset->stats.full_slabs, ps); |
117 | } else { |
118 | size_t longest_free_range = hpdata_longest_free_range_get(ps); |
119 | |
120 | pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( |
121 | longest_free_range << LG_PAGE)); |
122 | assert(pind < PSSET_NPSIZES); |
123 | |
124 | psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind], |
125 | ps); |
126 | } |
127 | } |
128 | |
129 | static void |
130 | psset_stats_remove(psset_t *psset, hpdata_t *ps) { |
131 | if (hpdata_empty(ps)) { |
132 | psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps); |
133 | } else if (hpdata_full(ps)) { |
134 | psset_bin_stats_remove(psset, psset->stats.full_slabs, ps); |
135 | } else { |
136 | size_t longest_free_range = hpdata_longest_free_range_get(ps); |
137 | |
138 | pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( |
139 | longest_free_range << LG_PAGE)); |
140 | assert(pind < PSSET_NPSIZES); |
141 | |
142 | psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind], |
143 | ps); |
144 | } |
145 | } |
146 | |
147 | /* |
148 | * Put ps into some container so that it can be found during future allocation |
149 | * requests. |
150 | */ |
151 | static void |
152 | psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) { |
153 | assert(!hpdata_in_psset_alloc_container_get(ps)); |
154 | hpdata_in_psset_alloc_container_set(ps, true); |
155 | if (hpdata_empty(ps)) { |
156 | /* |
157 | * This prepend, paired with popping the head in psset_fit, |
158 | * means we implement LIFO ordering for the empty slabs set, |
159 | * which seems reasonable. |
160 | */ |
161 | hpdata_empty_list_prepend(&psset->empty, ps); |
162 | } else if (hpdata_full(ps)) { |
163 | /* |
164 | * We don't need to keep track of the full slabs; we're never |
165 | * going to return them from a psset_pick_alloc call. |
166 | */ |
167 | } else { |
168 | size_t longest_free_range = hpdata_longest_free_range_get(ps); |
169 | |
170 | pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( |
171 | longest_free_range << LG_PAGE)); |
172 | assert(pind < PSSET_NPSIZES); |
173 | |
174 | psset_hpdata_heap_insert(psset, pind, ps); |
175 | } |
176 | } |
177 | |
178 | /* Remove ps from those collections. */ |
179 | static void |
180 | psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) { |
181 | assert(hpdata_in_psset_alloc_container_get(ps)); |
182 | hpdata_in_psset_alloc_container_set(ps, false); |
183 | |
184 | if (hpdata_empty(ps)) { |
185 | hpdata_empty_list_remove(&psset->empty, ps); |
186 | } else if (hpdata_full(ps)) { |
187 | /* Same as above -- do nothing in this case. */ |
188 | } else { |
189 | size_t longest_free_range = hpdata_longest_free_range_get(ps); |
190 | |
191 | pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( |
192 | longest_free_range << LG_PAGE)); |
193 | assert(pind < PSSET_NPSIZES); |
194 | |
195 | psset_hpdata_heap_remove(psset, pind, ps); |
196 | } |
197 | } |
198 | |
199 | static size_t |
200 | psset_purge_list_ind(hpdata_t *ps) { |
201 | size_t ndirty = hpdata_ndirty_get(ps); |
202 | /* Shouldn't have something with no dirty pages purgeable. */ |
203 | assert(ndirty > 0); |
204 | /* |
205 | * Higher indices correspond to lists we'd like to purge earlier; make |
206 | * the two highest indices correspond to empty lists, which we attempt |
207 | * to purge before purging any non-empty list. This has two advantages: |
208 | * - Empty page slabs are the least likely to get reused (we'll only |
209 | * pick them for an allocation if we have no other choice). |
210 | * - Empty page slabs can purge every dirty page they contain in a |
211 | * single call, which is not usually the case. |
212 | * |
213 | * We purge hugeified empty slabs before nonhugeified ones, on the basis |
214 | * that they are fully dirty, while nonhugified slabs might not be, so |
215 | * we free up more pages more easily. |
216 | */ |
217 | if (hpdata_nactive_get(ps) == 0) { |
218 | if (hpdata_huge_get(ps)) { |
219 | return PSSET_NPURGE_LISTS - 1; |
220 | } else { |
221 | return PSSET_NPURGE_LISTS - 2; |
222 | } |
223 | } |
224 | |
225 | pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE)); |
226 | /* |
227 | * For non-empty slabs, we may reuse them again. Prefer purging |
228 | * non-hugeified slabs before hugeified ones then, among pages of |
229 | * similar dirtiness. We still get some benefit from the hugification. |
230 | */ |
231 | return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1); |
232 | } |
233 | |
234 | static void |
235 | psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) { |
236 | /* |
237 | * Remove the hpdata from its purge list (if it's in one). Even if it's |
238 | * going to stay in the same one, by appending it during |
239 | * psset_update_end, we move it to the end of its queue, so that we |
240 | * purge LRU within a given dirtiness bucket. |
241 | */ |
242 | if (hpdata_purge_allowed_get(ps)) { |
243 | size_t ind = psset_purge_list_ind(ps); |
244 | hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; |
245 | hpdata_purge_list_remove(purge_list, ps); |
246 | if (hpdata_purge_list_empty(purge_list)) { |
247 | fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); |
248 | } |
249 | } |
250 | } |
251 | |
252 | static void |
253 | psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) { |
254 | if (hpdata_purge_allowed_get(ps)) { |
255 | size_t ind = psset_purge_list_ind(ps); |
256 | hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; |
257 | if (hpdata_purge_list_empty(purge_list)) { |
258 | fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); |
259 | } |
260 | hpdata_purge_list_append(purge_list, ps); |
261 | } |
262 | |
263 | } |
264 | |
265 | void |
266 | psset_update_begin(psset_t *psset, hpdata_t *ps) { |
267 | hpdata_assert_consistent(ps); |
268 | assert(hpdata_in_psset_get(ps)); |
269 | hpdata_updating_set(ps, true); |
270 | psset_stats_remove(psset, ps); |
271 | if (hpdata_in_psset_alloc_container_get(ps)) { |
272 | /* |
273 | * Some metadata updates can break alloc container invariants |
274 | * (e.g. the longest free range determines the hpdata_heap_t the |
275 | * pageslab lives in). |
276 | */ |
277 | assert(hpdata_alloc_allowed_get(ps)); |
278 | psset_alloc_container_remove(psset, ps); |
279 | } |
280 | psset_maybe_remove_purge_list(psset, ps); |
281 | /* |
282 | * We don't update presence in the hugify list; we try to keep it FIFO, |
283 | * even in the presence of other metadata updates. We'll update |
284 | * presence at the end of the metadata update if necessary. |
285 | */ |
286 | } |
287 | |
288 | void |
289 | psset_update_end(psset_t *psset, hpdata_t *ps) { |
290 | assert(hpdata_in_psset_get(ps)); |
291 | hpdata_updating_set(ps, false); |
292 | psset_stats_insert(psset, ps); |
293 | |
294 | /* |
295 | * The update begin should have removed ps from whatever alloc container |
296 | * it was in. |
297 | */ |
298 | assert(!hpdata_in_psset_alloc_container_get(ps)); |
299 | if (hpdata_alloc_allowed_get(ps)) { |
300 | psset_alloc_container_insert(psset, ps); |
301 | } |
302 | psset_maybe_insert_purge_list(psset, ps); |
303 | |
304 | if (hpdata_hugify_allowed_get(ps) |
305 | && !hpdata_in_psset_hugify_container_get(ps)) { |
306 | hpdata_in_psset_hugify_container_set(ps, true); |
307 | hpdata_hugify_list_append(&psset->to_hugify, ps); |
308 | } else if (!hpdata_hugify_allowed_get(ps) |
309 | && hpdata_in_psset_hugify_container_get(ps)) { |
310 | hpdata_in_psset_hugify_container_set(ps, false); |
311 | hpdata_hugify_list_remove(&psset->to_hugify, ps); |
312 | } |
313 | hpdata_assert_consistent(ps); |
314 | } |
315 | |
316 | hpdata_t * |
317 | psset_pick_alloc(psset_t *psset, size_t size) { |
318 | assert((size & PAGE_MASK) == 0); |
319 | assert(size <= HUGEPAGE); |
320 | |
321 | pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size)); |
322 | pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES, |
323 | (size_t)min_pind); |
324 | if (pind == PSSET_NPSIZES) { |
325 | return hpdata_empty_list_first(&psset->empty); |
326 | } |
327 | hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); |
328 | if (ps == NULL) { |
329 | return NULL; |
330 | } |
331 | |
332 | hpdata_assert_consistent(ps); |
333 | |
334 | return ps; |
335 | } |
336 | |
337 | hpdata_t * |
338 | psset_pick_purge(psset_t *psset) { |
339 | ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS, |
340 | PSSET_NPURGE_LISTS - 1); |
341 | if (ind_ssz < 0) { |
342 | return NULL; |
343 | } |
344 | pszind_t ind = (pszind_t)ind_ssz; |
345 | assert(ind < PSSET_NPURGE_LISTS); |
346 | hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]); |
347 | assert(ps != NULL); |
348 | return ps; |
349 | } |
350 | |
351 | hpdata_t * |
352 | psset_pick_hugify(psset_t *psset) { |
353 | return hpdata_hugify_list_first(&psset->to_hugify); |
354 | } |
355 | |
356 | void |
357 | psset_insert(psset_t *psset, hpdata_t *ps) { |
358 | hpdata_in_psset_set(ps, true); |
359 | |
360 | psset_stats_insert(psset, ps); |
361 | if (hpdata_alloc_allowed_get(ps)) { |
362 | psset_alloc_container_insert(psset, ps); |
363 | } |
364 | psset_maybe_insert_purge_list(psset, ps); |
365 | |
366 | if (hpdata_hugify_allowed_get(ps)) { |
367 | hpdata_in_psset_hugify_container_set(ps, true); |
368 | hpdata_hugify_list_append(&psset->to_hugify, ps); |
369 | } |
370 | } |
371 | |
372 | void |
373 | psset_remove(psset_t *psset, hpdata_t *ps) { |
374 | hpdata_in_psset_set(ps, false); |
375 | |
376 | psset_stats_remove(psset, ps); |
377 | if (hpdata_in_psset_alloc_container_get(ps)) { |
378 | psset_alloc_container_remove(psset, ps); |
379 | } |
380 | psset_maybe_remove_purge_list(psset, ps); |
381 | if (hpdata_in_psset_hugify_container_get(ps)) { |
382 | hpdata_in_psset_hugify_container_set(ps, false); |
383 | hpdata_hugify_list_remove(&psset->to_hugify, ps); |
384 | } |
385 | } |
386 | |