1 | #ifndef JEMALLOC_INTERNAL_PA_H |
2 | #define JEMALLOC_INTERNAL_PA_H |
3 | |
4 | #include "jemalloc/internal/base.h" |
5 | #include "jemalloc/internal/decay.h" |
6 | #include "jemalloc/internal/ecache.h" |
7 | #include "jemalloc/internal/edata_cache.h" |
8 | #include "jemalloc/internal/emap.h" |
9 | #include "jemalloc/internal/hpa.h" |
10 | #include "jemalloc/internal/lockedint.h" |
11 | #include "jemalloc/internal/pac.h" |
12 | #include "jemalloc/internal/pai.h" |
13 | #include "jemalloc/internal/sec.h" |
14 | |
15 | /* |
16 | * The page allocator; responsible for acquiring pages of memory for |
17 | * allocations. It picks the implementation of the page allocator interface |
18 | * (i.e. a pai_t) to handle a given page-level allocation request. For now, the |
19 | * only such implementation is the PAC code ("page allocator classic"), but |
20 | * others will be coming soon. |
21 | */ |
22 | |
23 | typedef struct pa_central_s pa_central_t; |
24 | struct pa_central_s { |
25 | hpa_central_t hpa; |
26 | }; |
27 | |
28 | /* |
29 | * The stats for a particular pa_shard. Because of the way the ctl module |
30 | * handles stats epoch data collection (it has its own arena_stats, and merges |
31 | * the stats from each arena into it), this needs to live in the arena_stats_t; |
32 | * hence we define it here and let the pa_shard have a pointer (rather than the |
33 | * more natural approach of just embedding it in the pa_shard itself). |
34 | * |
35 | * We follow the arena_stats_t approach of marking the derived fields. These |
36 | * are the ones that are not maintained on their own; instead, their values are |
37 | * derived during those stats merges. |
38 | */ |
39 | typedef struct pa_shard_stats_s pa_shard_stats_t; |
40 | struct pa_shard_stats_s { |
41 | /* Number of edata_t structs allocated by base, but not being used. */ |
42 | size_t edata_avail; /* Derived. */ |
43 | /* |
44 | * Stats specific to the PAC. For now, these are the only stats that |
45 | * exist, but there will eventually be other page allocators. Things |
46 | * like edata_avail make sense in a cross-PA sense, but things like |
47 | * npurges don't. |
48 | */ |
49 | pac_stats_t pac_stats; |
50 | }; |
51 | |
52 | /* |
53 | * The local allocator handle. Keeps the state necessary to satisfy page-sized |
54 | * allocations. |
55 | * |
56 | * The contents are mostly internal to the PA module. The key exception is that |
57 | * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches |
58 | * decay_ts, for a couple of queries, passing them back to a PA function, or |
59 | * acquiring decay.mtx and looking at decay.purging. The reasoning is that, |
60 | * while PA decides what and how to purge, the arena code decides when and where |
61 | * (e.g. on what thread). It's allowed to use the presence of another purger to |
62 | * decide. |
63 | * (The background thread code also touches some other decay internals, but |
64 | * that's not fundamental; its' just an artifact of a partial refactoring, and |
65 | * its accesses could be straightforwardly moved inside the decay module). |
66 | */ |
67 | typedef struct pa_shard_s pa_shard_t; |
68 | struct pa_shard_s { |
69 | /* The central PA this shard is associated with. */ |
70 | pa_central_t *central; |
71 | |
72 | /* |
73 | * Number of pages in active extents. |
74 | * |
75 | * Synchronization: atomic. |
76 | */ |
77 | atomic_zu_t nactive; |
78 | |
79 | /* |
80 | * Whether or not we should prefer the hugepage allocator. Atomic since |
81 | * it may be concurrently modified by a thread setting extent hooks. |
82 | * Note that we still may do HPA operations in this arena; if use_hpa is |
83 | * changed from true to false, we'll free back to the hugepage allocator |
84 | * for those allocations. |
85 | */ |
86 | atomic_b_t use_hpa; |
87 | |
88 | /* |
89 | * If we never used the HPA to begin with, it wasn't initialized, and so |
90 | * we shouldn't try to e.g. acquire its mutexes during fork. This |
91 | * tracks that knowledge. |
92 | */ |
93 | bool ever_used_hpa; |
94 | |
95 | /* Allocates from a PAC. */ |
96 | pac_t pac; |
97 | |
98 | /* |
99 | * We place a small extent cache in front of the HPA, since we intend |
100 | * these configurations to use many fewer arenas, and therefore have a |
101 | * higher risk of hot locks. |
102 | */ |
103 | sec_t hpa_sec; |
104 | hpa_shard_t hpa_shard; |
105 | |
106 | /* The source of edata_t objects. */ |
107 | edata_cache_t edata_cache; |
108 | |
109 | unsigned ind; |
110 | |
111 | malloc_mutex_t *stats_mtx; |
112 | pa_shard_stats_t *stats; |
113 | |
114 | /* The emap this shard is tied to. */ |
115 | emap_t *emap; |
116 | |
117 | /* The base from which we get the ehooks and allocate metadat. */ |
118 | base_t *base; |
119 | }; |
120 | |
121 | static inline bool |
122 | pa_shard_dont_decay_muzzy(pa_shard_t *shard) { |
123 | return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 && |
124 | pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0; |
125 | } |
126 | |
127 | static inline ehooks_t * |
128 | pa_shard_ehooks_get(pa_shard_t *shard) { |
129 | return base_ehooks_get(shard->base); |
130 | } |
131 | |
132 | /* Returns true on error. */ |
133 | bool pa_central_init(pa_central_t *central, base_t *base, bool hpa, |
134 | hpa_hooks_t *hpa_hooks); |
135 | |
136 | /* Returns true on error. */ |
137 | bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central, |
138 | emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats, |
139 | malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold, |
140 | ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms); |
141 | |
142 | /* |
143 | * This isn't exposed to users; we allow late enablement of the HPA shard so |
144 | * that we can boot without worrying about the HPA, then turn it on in a0. |
145 | */ |
146 | bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard, |
147 | const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts); |
148 | |
149 | /* |
150 | * We stop using the HPA when custom extent hooks are installed, but still |
151 | * redirect deallocations to it. |
152 | */ |
153 | void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard); |
154 | |
155 | /* |
156 | * This does the PA-specific parts of arena reset (i.e. freeing all active |
157 | * allocations). |
158 | */ |
159 | void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard); |
160 | |
161 | /* |
162 | * Destroy all the remaining retained extents. Should only be called after |
163 | * decaying all active, dirty, and muzzy extents to the retained state, as the |
164 | * last step in destroying the shard. |
165 | */ |
166 | void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard); |
167 | |
168 | /* Gets an edata for the given allocation. */ |
169 | edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, |
170 | size_t alignment, bool slab, szind_t szind, bool zero, bool guarded, |
171 | bool *deferred_work_generated); |
172 | /* Returns true on error, in which case nothing changed. */ |
173 | bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, |
174 | size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated); |
175 | /* |
176 | * The same. Sets *generated_dirty to true if we produced new dirty pages, and |
177 | * false otherwise. |
178 | */ |
179 | bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, |
180 | size_t new_size, szind_t szind, bool *deferred_work_generated); |
181 | /* |
182 | * Frees the given edata back to the pa. Sets *generated_dirty if we produced |
183 | * new dirty pages (well, we always set it for now; but this need not be the |
184 | * case). |
185 | * (We could make generated_dirty the return value of course, but this is more |
186 | * consistent with the shrink pathway and our error codes here). |
187 | */ |
188 | void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, |
189 | bool *deferred_work_generated); |
190 | bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state, |
191 | ssize_t decay_ms, pac_purge_eagerness_t eagerness); |
192 | ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state); |
193 | |
194 | /* |
195 | * Do deferred work on this PA shard. |
196 | * |
197 | * Morally, this should do both PAC decay and the HPA deferred work. For now, |
198 | * though, the arena, background thread, and PAC modules are tightly interwoven |
199 | * in a way that's tricky to extricate, so we only do the HPA-specific parts. |
200 | */ |
201 | void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard, |
202 | bool deferral_allowed); |
203 | void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); |
204 | void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); |
205 | uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); |
206 | |
207 | /******************************************************************************/ |
208 | /* |
209 | * Various bits of "boring" functionality that are still part of this module, |
210 | * but that we relegate to pa_extra.c, to keep the core logic in pa.c as |
211 | * readable as possible. |
212 | */ |
213 | |
214 | /* |
215 | * These fork phases are synchronized with the arena fork phase numbering to |
216 | * make it easy to keep straight. That's why there's no prefork1. |
217 | */ |
218 | void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard); |
219 | void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard); |
220 | void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard); |
221 | void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard); |
222 | void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard); |
223 | void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard); |
224 | void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard); |
225 | |
226 | void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, |
227 | size_t *ndirty, size_t *nmuzzy); |
228 | |
229 | void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, |
230 | pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out, |
231 | hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out, |
232 | size_t *resident); |
233 | |
234 | /* |
235 | * Reads the PA-owned mutex stats into the output stats array, at the |
236 | * appropriate positions. Morally, these stats should really live in |
237 | * pa_shard_stats_t, but the indices are sort of baked into the various mutex |
238 | * prof macros. This would be a good thing to do at some point. |
239 | */ |
240 | void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard, |
241 | mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]); |
242 | |
243 | #endif /* JEMALLOC_INTERNAL_PA_H */ |
244 | |