1 | #ifndef JEMALLOC_INTERNAL_HPDATA_H |
2 | #define JEMALLOC_INTERNAL_HPDATA_H |
3 | |
4 | #include "jemalloc/internal/fb.h" |
5 | #include "jemalloc/internal/ph.h" |
6 | #include "jemalloc/internal/ql.h" |
7 | #include "jemalloc/internal/typed_list.h" |
8 | |
9 | /* |
10 | * The metadata representation we use for extents in hugepages. While the PAC |
11 | * uses the edata_t to represent both active and inactive extents, the HP only |
12 | * uses the edata_t for active ones; instead, inactive extent state is tracked |
13 | * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned |
14 | * region of virtual address space. |
15 | * |
16 | * An hpdata need not be "truly" backed by a hugepage (which is not necessarily |
17 | * an observable property of any given region of address space). It's just |
18 | * hugepage-sized and hugepage-aligned; it's *potentially* huge. |
19 | */ |
20 | typedef struct hpdata_s hpdata_t; |
21 | ph_structs(hpdata_age_heap, hpdata_t); |
22 | struct hpdata_s { |
23 | /* |
24 | * We likewise follow the edata convention of mangling names and forcing |
25 | * the use of accessors -- this lets us add some consistency checks on |
26 | * access. |
27 | */ |
28 | |
29 | /* |
30 | * The address of the hugepage in question. This can't be named h_addr, |
31 | * since that conflicts with a macro defined in Windows headers. |
32 | */ |
33 | void *h_address; |
34 | /* Its age (measured in psset operations). */ |
35 | uint64_t h_age; |
36 | /* Whether or not we think the hugepage is mapped that way by the OS. */ |
37 | bool h_huge; |
38 | |
39 | /* |
40 | * For some properties, we keep parallel sets of bools; h_foo_allowed |
41 | * and h_in_psset_foo_container. This is a decoupling mechanism to |
42 | * avoid bothering the hpa (which manages policies) from the psset |
43 | * (which is the mechanism used to enforce those policies). This allows |
44 | * all the container management logic to live in one place, without the |
45 | * HPA needing to know or care how that happens. |
46 | */ |
47 | |
48 | /* |
49 | * Whether or not the hpdata is allowed to be used to serve allocations, |
50 | * and whether or not the psset is currently tracking it as such. |
51 | */ |
52 | bool h_alloc_allowed; |
53 | bool h_in_psset_alloc_container; |
54 | |
55 | /* |
56 | * The same, but with purging. There's no corresponding |
57 | * h_in_psset_purge_container, because the psset (currently) always |
58 | * removes hpdatas from their containers during updates (to implement |
59 | * LRU for purging). |
60 | */ |
61 | bool h_purge_allowed; |
62 | |
63 | /* And with hugifying. */ |
64 | bool h_hugify_allowed; |
65 | /* When we became a hugification candidate. */ |
66 | nstime_t h_time_hugify_allowed; |
67 | bool h_in_psset_hugify_container; |
68 | |
69 | /* Whether or not a purge or hugify is currently happening. */ |
70 | bool h_mid_purge; |
71 | bool h_mid_hugify; |
72 | |
73 | /* |
74 | * Whether or not the hpdata is being updated in the psset (i.e. if |
75 | * there has been a psset_update_begin call issued without a matching |
76 | * psset_update_end call). Eventually this will expand to other types |
77 | * of updates. |
78 | */ |
79 | bool h_updating; |
80 | |
81 | /* Whether or not the hpdata is in a psset. */ |
82 | bool h_in_psset; |
83 | |
84 | union { |
85 | /* When nonempty (and also nonfull), used by the psset bins. */ |
86 | hpdata_age_heap_link_t age_link; |
87 | /* |
88 | * When empty (or not corresponding to any hugepage), list |
89 | * linkage. |
90 | */ |
91 | ql_elm(hpdata_t) ql_link_empty; |
92 | }; |
93 | |
94 | /* |
95 | * Linkage for the psset to track candidates for purging and hugifying. |
96 | */ |
97 | ql_elm(hpdata_t) ql_link_purge; |
98 | ql_elm(hpdata_t) ql_link_hugify; |
99 | |
100 | /* The length of the largest contiguous sequence of inactive pages. */ |
101 | size_t h_longest_free_range; |
102 | |
103 | /* Number of active pages. */ |
104 | size_t h_nactive; |
105 | |
106 | /* A bitmap with bits set in the active pages. */ |
107 | fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)]; |
108 | |
109 | /* |
110 | * Number of dirty or active pages, and a bitmap tracking them. One |
111 | * way to think of this is as which pages are dirty from the OS's |
112 | * perspective. |
113 | */ |
114 | size_t h_ntouched; |
115 | |
116 | /* The touched pages (using the same definition as above). */ |
117 | fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)]; |
118 | }; |
119 | |
120 | TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty) |
121 | TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge) |
122 | TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify) |
123 | |
124 | ph_proto(, hpdata_age_heap, hpdata_t); |
125 | |
126 | static inline void * |
127 | hpdata_addr_get(const hpdata_t *hpdata) { |
128 | return hpdata->h_address; |
129 | } |
130 | |
131 | static inline void |
132 | hpdata_addr_set(hpdata_t *hpdata, void *addr) { |
133 | assert(HUGEPAGE_ADDR2BASE(addr) == addr); |
134 | hpdata->h_address = addr; |
135 | } |
136 | |
137 | static inline uint64_t |
138 | hpdata_age_get(const hpdata_t *hpdata) { |
139 | return hpdata->h_age; |
140 | } |
141 | |
142 | static inline void |
143 | hpdata_age_set(hpdata_t *hpdata, uint64_t age) { |
144 | hpdata->h_age = age; |
145 | } |
146 | |
147 | static inline bool |
148 | hpdata_huge_get(const hpdata_t *hpdata) { |
149 | return hpdata->h_huge; |
150 | } |
151 | |
152 | static inline bool |
153 | hpdata_alloc_allowed_get(const hpdata_t *hpdata) { |
154 | return hpdata->h_alloc_allowed; |
155 | } |
156 | |
157 | static inline void |
158 | hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) { |
159 | hpdata->h_alloc_allowed = alloc_allowed; |
160 | } |
161 | |
162 | static inline bool |
163 | hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) { |
164 | return hpdata->h_in_psset_alloc_container; |
165 | } |
166 | |
167 | static inline void |
168 | hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) { |
169 | assert(in_container != hpdata->h_in_psset_alloc_container); |
170 | hpdata->h_in_psset_alloc_container = in_container; |
171 | } |
172 | |
173 | static inline bool |
174 | hpdata_purge_allowed_get(const hpdata_t *hpdata) { |
175 | return hpdata->h_purge_allowed; |
176 | } |
177 | |
178 | static inline void |
179 | hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) { |
180 | assert(purge_allowed == false || !hpdata->h_mid_purge); |
181 | hpdata->h_purge_allowed = purge_allowed; |
182 | } |
183 | |
184 | static inline bool |
185 | hpdata_hugify_allowed_get(const hpdata_t *hpdata) { |
186 | return hpdata->h_hugify_allowed; |
187 | } |
188 | |
189 | static inline void |
190 | hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) { |
191 | assert(!hpdata->h_mid_hugify); |
192 | hpdata->h_hugify_allowed = true; |
193 | hpdata->h_time_hugify_allowed = now; |
194 | } |
195 | |
196 | static inline nstime_t |
197 | hpdata_time_hugify_allowed(hpdata_t *hpdata) { |
198 | return hpdata->h_time_hugify_allowed; |
199 | } |
200 | |
201 | static inline void |
202 | hpdata_disallow_hugify(hpdata_t *hpdata) { |
203 | hpdata->h_hugify_allowed = false; |
204 | } |
205 | |
206 | static inline bool |
207 | hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) { |
208 | return hpdata->h_in_psset_hugify_container; |
209 | } |
210 | |
211 | static inline void |
212 | hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) { |
213 | assert(in_container != hpdata->h_in_psset_hugify_container); |
214 | hpdata->h_in_psset_hugify_container = in_container; |
215 | } |
216 | |
217 | static inline bool |
218 | hpdata_mid_purge_get(const hpdata_t *hpdata) { |
219 | return hpdata->h_mid_purge; |
220 | } |
221 | |
222 | static inline void |
223 | hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) { |
224 | assert(mid_purge != hpdata->h_mid_purge); |
225 | hpdata->h_mid_purge = mid_purge; |
226 | } |
227 | |
228 | static inline bool |
229 | hpdata_mid_hugify_get(const hpdata_t *hpdata) { |
230 | return hpdata->h_mid_hugify; |
231 | } |
232 | |
233 | static inline void |
234 | hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) { |
235 | assert(mid_hugify != hpdata->h_mid_hugify); |
236 | hpdata->h_mid_hugify = mid_hugify; |
237 | } |
238 | |
239 | static inline bool |
240 | hpdata_changing_state_get(const hpdata_t *hpdata) { |
241 | return hpdata->h_mid_purge || hpdata->h_mid_hugify; |
242 | } |
243 | |
244 | |
245 | static inline bool |
246 | hpdata_updating_get(const hpdata_t *hpdata) { |
247 | return hpdata->h_updating; |
248 | } |
249 | |
250 | static inline void |
251 | hpdata_updating_set(hpdata_t *hpdata, bool updating) { |
252 | assert(updating != hpdata->h_updating); |
253 | hpdata->h_updating = updating; |
254 | } |
255 | |
256 | static inline bool |
257 | hpdata_in_psset_get(const hpdata_t *hpdata) { |
258 | return hpdata->h_in_psset; |
259 | } |
260 | |
261 | static inline void |
262 | hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) { |
263 | assert(in_psset != hpdata->h_in_psset); |
264 | hpdata->h_in_psset = in_psset; |
265 | } |
266 | |
267 | static inline size_t |
268 | hpdata_longest_free_range_get(const hpdata_t *hpdata) { |
269 | return hpdata->h_longest_free_range; |
270 | } |
271 | |
272 | static inline void |
273 | hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) { |
274 | assert(longest_free_range <= HUGEPAGE_PAGES); |
275 | hpdata->h_longest_free_range = longest_free_range; |
276 | } |
277 | |
278 | static inline size_t |
279 | hpdata_nactive_get(hpdata_t *hpdata) { |
280 | return hpdata->h_nactive; |
281 | } |
282 | |
283 | static inline size_t |
284 | hpdata_ntouched_get(hpdata_t *hpdata) { |
285 | return hpdata->h_ntouched; |
286 | } |
287 | |
288 | static inline size_t |
289 | hpdata_ndirty_get(hpdata_t *hpdata) { |
290 | return hpdata->h_ntouched - hpdata->h_nactive; |
291 | } |
292 | |
293 | static inline size_t |
294 | hpdata_nretained_get(hpdata_t *hpdata) { |
295 | return HUGEPAGE_PAGES - hpdata->h_ntouched; |
296 | } |
297 | |
298 | static inline void |
299 | hpdata_assert_empty(hpdata_t *hpdata) { |
300 | assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES)); |
301 | assert(hpdata->h_nactive == 0); |
302 | } |
303 | |
304 | /* |
305 | * Only used in tests, and in hpdata_assert_consistent, below. Verifies some |
306 | * consistency properties of the hpdata (e.g. that cached counts of page stats |
307 | * match computed ones). |
308 | */ |
309 | static inline bool |
310 | hpdata_consistent(hpdata_t *hpdata) { |
311 | if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES) |
312 | != hpdata_longest_free_range_get(hpdata)) { |
313 | return false; |
314 | } |
315 | if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES) |
316 | != hpdata->h_nactive) { |
317 | return false; |
318 | } |
319 | if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES) |
320 | != hpdata->h_ntouched) { |
321 | return false; |
322 | } |
323 | if (hpdata->h_ntouched < hpdata->h_nactive) { |
324 | return false; |
325 | } |
326 | if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) { |
327 | return false; |
328 | } |
329 | if (hpdata_changing_state_get(hpdata) |
330 | && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) { |
331 | return false; |
332 | } |
333 | if (hpdata_hugify_allowed_get(hpdata) |
334 | != hpdata_in_psset_hugify_container_get(hpdata)) { |
335 | return false; |
336 | } |
337 | return true; |
338 | } |
339 | |
340 | static inline void |
341 | hpdata_assert_consistent(hpdata_t *hpdata) { |
342 | assert(hpdata_consistent(hpdata)); |
343 | } |
344 | |
345 | static inline bool |
346 | hpdata_empty(hpdata_t *hpdata) { |
347 | return hpdata->h_nactive == 0; |
348 | } |
349 | |
350 | static inline bool |
351 | hpdata_full(hpdata_t *hpdata) { |
352 | return hpdata->h_nactive == HUGEPAGE_PAGES; |
353 | } |
354 | |
355 | void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age); |
356 | |
357 | /* |
358 | * Given an hpdata which can serve an allocation request, pick and reserve an |
359 | * offset within that allocation. |
360 | */ |
361 | void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz); |
362 | void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz); |
363 | |
364 | /* |
365 | * The hpdata_purge_prepare_t allows grabbing the metadata required to purge |
366 | * subranges of a hugepage while holding a lock, drop the lock during the actual |
367 | * purging of them, and reacquire it to update the metadata again. |
368 | */ |
369 | typedef struct hpdata_purge_state_s hpdata_purge_state_t; |
370 | struct hpdata_purge_state_s { |
371 | size_t npurged; |
372 | size_t ndirty_to_purge; |
373 | fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)]; |
374 | size_t next_purge_search_begin; |
375 | }; |
376 | |
377 | /* |
378 | * Initializes purge state. The access to hpdata must be externally |
379 | * synchronized with other hpdata_* calls. |
380 | * |
381 | * You can tell whether or not a thread is purging or hugifying a given hpdata |
382 | * via hpdata_changing_state_get(hpdata). Racing hugification or purging |
383 | * operations aren't allowed. |
384 | * |
385 | * Once you begin purging, you have to follow through and call hpdata_purge_next |
386 | * until you're done, and then end. Allocating out of an hpdata undergoing |
387 | * purging is not allowed. |
388 | * |
389 | * Returns the number of dirty pages that will be purged. |
390 | */ |
391 | size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state); |
392 | |
393 | /* |
394 | * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to |
395 | * true, and returns true. Otherwise, returns false to indicate that we're |
396 | * done. |
397 | * |
398 | * This requires exclusive access to the purge state, but *not* to the hpdata. |
399 | * In particular, unreserve calls are allowed while purging (i.e. you can dalloc |
400 | * into one part of the hpdata while purging a different part). |
401 | */ |
402 | bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, |
403 | void **r_purge_addr, size_t *r_purge_size); |
404 | /* |
405 | * Updates the hpdata metadata after all purging is done. Needs external |
406 | * synchronization. |
407 | */ |
408 | void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state); |
409 | |
410 | void hpdata_hugify(hpdata_t *hpdata); |
411 | void hpdata_dehugify(hpdata_t *hpdata); |
412 | |
413 | #endif /* JEMALLOC_INTERNAL_HPDATA_H */ |
414 | |