1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/hpdata.h"
5
6static int
7hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
8 uint64_t a_age = hpdata_age_get(a);
9 uint64_t b_age = hpdata_age_get(b);
10 /*
11 * hpdata ages are operation counts in the psset; no two should be the
12 * same.
13 */
14 assert(a_age != b_age);
15 return (a_age > b_age) - (a_age < b_age);
16}
17
18ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
19
20void
21hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
22 hpdata_addr_set(hpdata, addr);
23 hpdata_age_set(hpdata, age);
24 hpdata->h_huge = false;
25 hpdata->h_alloc_allowed = true;
26 hpdata->h_in_psset_alloc_container = false;
27 hpdata->h_purge_allowed = false;
28 hpdata->h_hugify_allowed = false;
29 hpdata->h_in_psset_hugify_container = false;
30 hpdata->h_mid_purge = false;
31 hpdata->h_mid_hugify = false;
32 hpdata->h_updating = false;
33 hpdata->h_in_psset = false;
34 hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
35 hpdata->h_nactive = 0;
36 fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
37 hpdata->h_ntouched = 0;
38 fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
39
40 hpdata_assert_consistent(hpdata);
41}
42
43void *
44hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
45 hpdata_assert_consistent(hpdata);
46 /*
47 * This is a metadata change; the hpdata should therefore either not be
48 * in the psset, or should have explicitly marked itself as being
49 * mid-update.
50 */
51 assert(!hpdata->h_in_psset || hpdata->h_updating);
52 assert(hpdata->h_alloc_allowed);
53 assert((sz & PAGE_MASK) == 0);
54 size_t npages = sz >> LG_PAGE;
55 assert(npages <= hpdata_longest_free_range_get(hpdata));
56
57 size_t result;
58
59 size_t start = 0;
60 /*
61 * These are dead stores, but the compiler will issue warnings on them
62 * since it can't tell statically that found is always true below.
63 */
64 size_t begin = 0;
65 size_t len = 0;
66
67 size_t largest_unchosen_range = 0;
68 while (true) {
69 bool found = fb_urange_iter(hpdata->active_pages,
70 HUGEPAGE_PAGES, start, &begin, &len);
71 /*
72 * A precondition to this function is that hpdata must be able
73 * to serve the allocation.
74 */
75 assert(found);
76 assert(len <= hpdata_longest_free_range_get(hpdata));
77 if (len >= npages) {
78 /*
79 * We use first-fit within the page slabs; this gives
80 * bounded worst-case fragmentation within a slab. It's
81 * not necessarily right; we could experiment with
82 * various other options.
83 */
84 break;
85 }
86 if (len > largest_unchosen_range) {
87 largest_unchosen_range = len;
88 }
89 start = begin + len;
90 }
91 /* We found a range; remember it. */
92 result = begin;
93 fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
94 hpdata->h_nactive += npages;
95
96 /*
97 * We might be about to dirty some memory for the first time; update our
98 * count if so.
99 */
100 size_t new_dirty = fb_ucount(hpdata->touched_pages, HUGEPAGE_PAGES,
101 result, npages);
102 fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
103 hpdata->h_ntouched += new_dirty;
104
105 /*
106 * If we allocated out of a range that was the longest in the hpdata, it
107 * might be the only one of that size and we'll have to adjust the
108 * metadata.
109 */
110 if (len == hpdata_longest_free_range_get(hpdata)) {
111 start = begin + npages;
112 while (start < HUGEPAGE_PAGES) {
113 bool found = fb_urange_iter(hpdata->active_pages,
114 HUGEPAGE_PAGES, start, &begin, &len);
115 if (!found) {
116 break;
117 }
118 assert(len <= hpdata_longest_free_range_get(hpdata));
119 if (len == hpdata_longest_free_range_get(hpdata)) {
120 largest_unchosen_range = len;
121 break;
122 }
123 if (len > largest_unchosen_range) {
124 largest_unchosen_range = len;
125 }
126 start = begin + len;
127 }
128 hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
129 }
130
131 hpdata_assert_consistent(hpdata);
132 return (void *)(
133 (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
134}
135
136void
137hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
138 hpdata_assert_consistent(hpdata);
139 /* See the comment in reserve. */
140 assert(!hpdata->h_in_psset || hpdata->h_updating);
141 assert(((uintptr_t)addr & PAGE_MASK) == 0);
142 assert((sz & PAGE_MASK) == 0);
143 size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
144 >> LG_PAGE;
145 assert(begin < HUGEPAGE_PAGES);
146 size_t npages = sz >> LG_PAGE;
147 size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
148
149 fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
150 /* We might have just created a new, larger range. */
151 size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
152 begin) + 1);
153 size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
154 begin + npages - 1);
155 size_t new_range_len = new_end - new_begin;
156
157 if (new_range_len > old_longest_range) {
158 hpdata_longest_free_range_set(hpdata, new_range_len);
159 }
160
161 hpdata->h_nactive -= npages;
162
163 hpdata_assert_consistent(hpdata);
164}
165
166size_t
167hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
168 hpdata_assert_consistent(hpdata);
169 /*
170 * See the comment below; we might purge any inactive extent, so it's
171 * unsafe for any other thread to turn any inactive extent active while
172 * we're operating on it.
173 */
174 assert(!hpdata_alloc_allowed_get(hpdata));
175
176 purge_state->npurged = 0;
177 purge_state->next_purge_search_begin = 0;
178
179 /*
180 * Initialize to_purge.
181 *
182 * It's possible to end up in situations where two dirty extents are
183 * separated by a retained extent:
184 * - 1 page allocated.
185 * - 1 page allocated.
186 * - 1 pages allocated.
187 *
188 * If the middle page is freed and purged, and then the first and third
189 * pages are freed, and then another purge pass happens, the hpdata
190 * looks like this:
191 * - 1 page dirty.
192 * - 1 page retained.
193 * - 1 page dirty.
194 *
195 * But it's safe to do a single 3-page purge.
196 *
197 * We do this by first computing the dirty pages, and then filling in
198 * any gaps by extending each range in the dirty bitmap to extend until
199 * the next active page. This purges more pages, but the expensive part
200 * of purging is the TLB shootdowns, rather than the kernel state
201 * tracking; doing a little bit more of the latter is fine if it saves
202 * us from doing some of the former.
203 */
204
205 /*
206 * The dirty pages are those that are touched but not active. Note that
207 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
208 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8
209 * fb_group_ts.
210 */
211 fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
212 fb_init(dirty_pages, HUGEPAGE_PAGES);
213 fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
214 fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
215 HUGEPAGE_PAGES);
216
217 fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
218 size_t next_bit = 0;
219 while (next_bit < HUGEPAGE_PAGES) {
220 size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
221 next_bit);
222 /* Recall that fb_ffs returns nbits if no set bit is found. */
223 if (next_dirty == HUGEPAGE_PAGES) {
224 break;
225 }
226 size_t next_active = fb_ffs(hpdata->active_pages,
227 HUGEPAGE_PAGES, next_dirty);
228 /*
229 * Don't purge past the end of the dirty extent, into retained
230 * pages. This helps the kernel a tiny bit, but honestly it's
231 * mostly helpful for testing (where we tend to write test cases
232 * that think in terms of the dirty ranges).
233 */
234 ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
235 next_active - 1);
236 assert(last_dirty >= 0);
237 assert((size_t)last_dirty >= next_dirty);
238 assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
239
240 fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
241 last_dirty - next_dirty + 1);
242 next_bit = next_active + 1;
243 }
244
245 /* We should purge, at least, everything dirty. */
246 size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
247 purge_state->ndirty_to_purge = ndirty;
248 assert(ndirty <= fb_scount(
249 purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
250 assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
251 HUGEPAGE_PAGES));
252
253 hpdata_assert_consistent(hpdata);
254
255 return ndirty;
256}
257
258bool
259hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
260 void **r_purge_addr, size_t *r_purge_size) {
261 /*
262 * Note that we don't have a consistency check here; we're accessing
263 * hpdata without synchronization, and therefore have no right to expect
264 * a consistent state.
265 */
266 assert(!hpdata_alloc_allowed_get(hpdata));
267
268 if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
269 return false;
270 }
271 size_t purge_begin;
272 size_t purge_len;
273 bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
274 purge_state->next_purge_search_begin, &purge_begin, &purge_len);
275 if (!found_range) {
276 return false;
277 }
278
279 *r_purge_addr = (void *)(
280 (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
281 *r_purge_size = purge_len * PAGE;
282
283 purge_state->next_purge_search_begin = purge_begin + purge_len;
284 purge_state->npurged += purge_len;
285 assert(purge_state->npurged <= HUGEPAGE_PAGES);
286
287 return true;
288}
289
290void
291hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
292 assert(!hpdata_alloc_allowed_get(hpdata));
293 hpdata_assert_consistent(hpdata);
294 /* See the comment in reserve. */
295 assert(!hpdata->h_in_psset || hpdata->h_updating);
296
297 assert(purge_state->npurged == fb_scount(purge_state->to_purge,
298 HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
299 assert(purge_state->npurged >= purge_state->ndirty_to_purge);
300
301 fb_bit_not(purge_state->to_purge, purge_state->to_purge,
302 HUGEPAGE_PAGES);
303 fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
304 purge_state->to_purge, HUGEPAGE_PAGES);
305 assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
306 hpdata->h_ntouched -= purge_state->ndirty_to_purge;
307
308 hpdata_assert_consistent(hpdata);
309}
310
311void
312hpdata_hugify(hpdata_t *hpdata) {
313 hpdata_assert_consistent(hpdata);
314 hpdata->h_huge = true;
315 fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
316 hpdata->h_ntouched = HUGEPAGE_PAGES;
317 hpdata_assert_consistent(hpdata);
318}
319
320void
321hpdata_dehugify(hpdata_t *hpdata) {
322 hpdata_assert_consistent(hpdata);
323 hpdata->h_huge = false;
324 hpdata_assert_consistent(hpdata);
325}
326