1#ifndef JEMALLOC_INTERNAL_HPDATA_H
2#define JEMALLOC_INTERNAL_HPDATA_H
3
4#include "jemalloc/internal/fb.h"
5#include "jemalloc/internal/ph.h"
6#include "jemalloc/internal/ql.h"
7#include "jemalloc/internal/typed_list.h"
8
9/*
10 * The metadata representation we use for extents in hugepages. While the PAC
11 * uses the edata_t to represent both active and inactive extents, the HP only
12 * uses the edata_t for active ones; instead, inactive extent state is tracked
13 * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
14 * region of virtual address space.
15 *
16 * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
17 * an observable property of any given region of address space). It's just
18 * hugepage-sized and hugepage-aligned; it's *potentially* huge.
19 */
20typedef struct hpdata_s hpdata_t;
21ph_structs(hpdata_age_heap, hpdata_t);
22struct hpdata_s {
23 /*
24 * We likewise follow the edata convention of mangling names and forcing
25 * the use of accessors -- this lets us add some consistency checks on
26 * access.
27 */
28
29 /*
30 * The address of the hugepage in question. This can't be named h_addr,
31 * since that conflicts with a macro defined in Windows headers.
32 */
33 void *h_address;
34 /* Its age (measured in psset operations). */
35 uint64_t h_age;
36 /* Whether or not we think the hugepage is mapped that way by the OS. */
37 bool h_huge;
38
39 /*
40 * For some properties, we keep parallel sets of bools; h_foo_allowed
41 * and h_in_psset_foo_container. This is a decoupling mechanism to
42 * avoid bothering the hpa (which manages policies) from the psset
43 * (which is the mechanism used to enforce those policies). This allows
44 * all the container management logic to live in one place, without the
45 * HPA needing to know or care how that happens.
46 */
47
48 /*
49 * Whether or not the hpdata is allowed to be used to serve allocations,
50 * and whether or not the psset is currently tracking it as such.
51 */
52 bool h_alloc_allowed;
53 bool h_in_psset_alloc_container;
54
55 /*
56 * The same, but with purging. There's no corresponding
57 * h_in_psset_purge_container, because the psset (currently) always
58 * removes hpdatas from their containers during updates (to implement
59 * LRU for purging).
60 */
61 bool h_purge_allowed;
62
63 /* And with hugifying. */
64 bool h_hugify_allowed;
65 /* When we became a hugification candidate. */
66 nstime_t h_time_hugify_allowed;
67 bool h_in_psset_hugify_container;
68
69 /* Whether or not a purge or hugify is currently happening. */
70 bool h_mid_purge;
71 bool h_mid_hugify;
72
73 /*
74 * Whether or not the hpdata is being updated in the psset (i.e. if
75 * there has been a psset_update_begin call issued without a matching
76 * psset_update_end call). Eventually this will expand to other types
77 * of updates.
78 */
79 bool h_updating;
80
81 /* Whether or not the hpdata is in a psset. */
82 bool h_in_psset;
83
84 union {
85 /* When nonempty (and also nonfull), used by the psset bins. */
86 hpdata_age_heap_link_t age_link;
87 /*
88 * When empty (or not corresponding to any hugepage), list
89 * linkage.
90 */
91 ql_elm(hpdata_t) ql_link_empty;
92 };
93
94 /*
95 * Linkage for the psset to track candidates for purging and hugifying.
96 */
97 ql_elm(hpdata_t) ql_link_purge;
98 ql_elm(hpdata_t) ql_link_hugify;
99
100 /* The length of the largest contiguous sequence of inactive pages. */
101 size_t h_longest_free_range;
102
103 /* Number of active pages. */
104 size_t h_nactive;
105
106 /* A bitmap with bits set in the active pages. */
107 fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
108
109 /*
110 * Number of dirty or active pages, and a bitmap tracking them. One
111 * way to think of this is as which pages are dirty from the OS's
112 * perspective.
113 */
114 size_t h_ntouched;
115
116 /* The touched pages (using the same definition as above). */
117 fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
118};
119
120TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
121TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
122TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
123
124ph_proto(, hpdata_age_heap, hpdata_t);
125
126static inline void *
127hpdata_addr_get(const hpdata_t *hpdata) {
128 return hpdata->h_address;
129}
130
131static inline void
132hpdata_addr_set(hpdata_t *hpdata, void *addr) {
133 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
134 hpdata->h_address = addr;
135}
136
137static inline uint64_t
138hpdata_age_get(const hpdata_t *hpdata) {
139 return hpdata->h_age;
140}
141
142static inline void
143hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
144 hpdata->h_age = age;
145}
146
147static inline bool
148hpdata_huge_get(const hpdata_t *hpdata) {
149 return hpdata->h_huge;
150}
151
152static inline bool
153hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
154 return hpdata->h_alloc_allowed;
155}
156
157static inline void
158hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
159 hpdata->h_alloc_allowed = alloc_allowed;
160}
161
162static inline bool
163hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
164 return hpdata->h_in_psset_alloc_container;
165}
166
167static inline void
168hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
169 assert(in_container != hpdata->h_in_psset_alloc_container);
170 hpdata->h_in_psset_alloc_container = in_container;
171}
172
173static inline bool
174hpdata_purge_allowed_get(const hpdata_t *hpdata) {
175 return hpdata->h_purge_allowed;
176}
177
178static inline void
179hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
180 assert(purge_allowed == false || !hpdata->h_mid_purge);
181 hpdata->h_purge_allowed = purge_allowed;
182}
183
184static inline bool
185hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
186 return hpdata->h_hugify_allowed;
187}
188
189static inline void
190hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
191 assert(!hpdata->h_mid_hugify);
192 hpdata->h_hugify_allowed = true;
193 hpdata->h_time_hugify_allowed = now;
194}
195
196static inline nstime_t
197hpdata_time_hugify_allowed(hpdata_t *hpdata) {
198 return hpdata->h_time_hugify_allowed;
199}
200
201static inline void
202hpdata_disallow_hugify(hpdata_t *hpdata) {
203 hpdata->h_hugify_allowed = false;
204}
205
206static inline bool
207hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
208 return hpdata->h_in_psset_hugify_container;
209}
210
211static inline void
212hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
213 assert(in_container != hpdata->h_in_psset_hugify_container);
214 hpdata->h_in_psset_hugify_container = in_container;
215}
216
217static inline bool
218hpdata_mid_purge_get(const hpdata_t *hpdata) {
219 return hpdata->h_mid_purge;
220}
221
222static inline void
223hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
224 assert(mid_purge != hpdata->h_mid_purge);
225 hpdata->h_mid_purge = mid_purge;
226}
227
228static inline bool
229hpdata_mid_hugify_get(const hpdata_t *hpdata) {
230 return hpdata->h_mid_hugify;
231}
232
233static inline void
234hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
235 assert(mid_hugify != hpdata->h_mid_hugify);
236 hpdata->h_mid_hugify = mid_hugify;
237}
238
239static inline bool
240hpdata_changing_state_get(const hpdata_t *hpdata) {
241 return hpdata->h_mid_purge || hpdata->h_mid_hugify;
242}
243
244
245static inline bool
246hpdata_updating_get(const hpdata_t *hpdata) {
247 return hpdata->h_updating;
248}
249
250static inline void
251hpdata_updating_set(hpdata_t *hpdata, bool updating) {
252 assert(updating != hpdata->h_updating);
253 hpdata->h_updating = updating;
254}
255
256static inline bool
257hpdata_in_psset_get(const hpdata_t *hpdata) {
258 return hpdata->h_in_psset;
259}
260
261static inline void
262hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
263 assert(in_psset != hpdata->h_in_psset);
264 hpdata->h_in_psset = in_psset;
265}
266
267static inline size_t
268hpdata_longest_free_range_get(const hpdata_t *hpdata) {
269 return hpdata->h_longest_free_range;
270}
271
272static inline void
273hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
274 assert(longest_free_range <= HUGEPAGE_PAGES);
275 hpdata->h_longest_free_range = longest_free_range;
276}
277
278static inline size_t
279hpdata_nactive_get(hpdata_t *hpdata) {
280 return hpdata->h_nactive;
281}
282
283static inline size_t
284hpdata_ntouched_get(hpdata_t *hpdata) {
285 return hpdata->h_ntouched;
286}
287
288static inline size_t
289hpdata_ndirty_get(hpdata_t *hpdata) {
290 return hpdata->h_ntouched - hpdata->h_nactive;
291}
292
293static inline size_t
294hpdata_nretained_get(hpdata_t *hpdata) {
295 return HUGEPAGE_PAGES - hpdata->h_ntouched;
296}
297
298static inline void
299hpdata_assert_empty(hpdata_t *hpdata) {
300 assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
301 assert(hpdata->h_nactive == 0);
302}
303
304/*
305 * Only used in tests, and in hpdata_assert_consistent, below. Verifies some
306 * consistency properties of the hpdata (e.g. that cached counts of page stats
307 * match computed ones).
308 */
309static inline bool
310hpdata_consistent(hpdata_t *hpdata) {
311 if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
312 != hpdata_longest_free_range_get(hpdata)) {
313 return false;
314 }
315 if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
316 != hpdata->h_nactive) {
317 return false;
318 }
319 if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
320 != hpdata->h_ntouched) {
321 return false;
322 }
323 if (hpdata->h_ntouched < hpdata->h_nactive) {
324 return false;
325 }
326 if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
327 return false;
328 }
329 if (hpdata_changing_state_get(hpdata)
330 && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
331 return false;
332 }
333 if (hpdata_hugify_allowed_get(hpdata)
334 != hpdata_in_psset_hugify_container_get(hpdata)) {
335 return false;
336 }
337 return true;
338}
339
340static inline void
341hpdata_assert_consistent(hpdata_t *hpdata) {
342 assert(hpdata_consistent(hpdata));
343}
344
345static inline bool
346hpdata_empty(hpdata_t *hpdata) {
347 return hpdata->h_nactive == 0;
348}
349
350static inline bool
351hpdata_full(hpdata_t *hpdata) {
352 return hpdata->h_nactive == HUGEPAGE_PAGES;
353}
354
355void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
356
357/*
358 * Given an hpdata which can serve an allocation request, pick and reserve an
359 * offset within that allocation.
360 */
361void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
362void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
363
364/*
365 * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
366 * subranges of a hugepage while holding a lock, drop the lock during the actual
367 * purging of them, and reacquire it to update the metadata again.
368 */
369typedef struct hpdata_purge_state_s hpdata_purge_state_t;
370struct hpdata_purge_state_s {
371 size_t npurged;
372 size_t ndirty_to_purge;
373 fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
374 size_t next_purge_search_begin;
375};
376
377/*
378 * Initializes purge state. The access to hpdata must be externally
379 * synchronized with other hpdata_* calls.
380 *
381 * You can tell whether or not a thread is purging or hugifying a given hpdata
382 * via hpdata_changing_state_get(hpdata). Racing hugification or purging
383 * operations aren't allowed.
384 *
385 * Once you begin purging, you have to follow through and call hpdata_purge_next
386 * until you're done, and then end. Allocating out of an hpdata undergoing
387 * purging is not allowed.
388 *
389 * Returns the number of dirty pages that will be purged.
390 */
391size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
392
393/*
394 * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
395 * true, and returns true. Otherwise, returns false to indicate that we're
396 * done.
397 *
398 * This requires exclusive access to the purge state, but *not* to the hpdata.
399 * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
400 * into one part of the hpdata while purging a different part).
401 */
402bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
403 void **r_purge_addr, size_t *r_purge_size);
404/*
405 * Updates the hpdata metadata after all purging is done. Needs external
406 * synchronization.
407 */
408void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
409
410void hpdata_hugify(hpdata_t *hpdata);
411void hpdata_dehugify(hpdata_t *hpdata);
412
413#endif /* JEMALLOC_INTERNAL_HPDATA_H */
414