1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/emap.h"
5
6enum emap_lock_result_e {
7 emap_lock_result_success,
8 emap_lock_result_failure,
9 emap_lock_result_no_extent
10};
11typedef enum emap_lock_result_e emap_lock_result_t;
12
13bool
14emap_init(emap_t *emap, base_t *base, bool zeroed) {
15 return rtree_new(&emap->rtree, base, zeroed);
16}
17
18void
19emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
20 extent_state_t state) {
21 witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
22 WITNESS_RANK_CORE);
23
24 edata_state_set(edata, state);
25
26 EMAP_DECLARE_RTREE_CTX;
27 rtree_leaf_elm_t *elm1 = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
28 rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
29 /* init_missing */ false);
30 assert(elm1 != NULL);
31 rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
32 rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
33 (uintptr_t)edata_last_get(edata), /* dependent */ true,
34 /* init_missing */ false);
35
36 rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
37
38 emap_assert_mapped(tsdn, emap, edata);
39}
40
41static inline edata_t *
42emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
43 extent_pai_t pai, extent_state_t expected_state, bool forward,
44 bool expanding) {
45 witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
46 WITNESS_RANK_CORE);
47 assert(!edata_guarded_get(edata));
48 assert(!expanding || forward);
49 assert(!edata_state_in_transition(expected_state));
50 assert(expected_state == extent_state_dirty ||
51 expected_state == extent_state_muzzy ||
52 expected_state == extent_state_retained);
53
54 void *neighbor_addr = forward ? edata_past_get(edata) :
55 edata_before_get(edata);
56 /*
57 * This is subtle; the rtree code asserts that its input pointer is
58 * non-NULL, and this is a useful thing to check. But it's possible
59 * that edata corresponds to an address of (void *)PAGE (in practice,
60 * this has only been observed on FreeBSD when address-space
61 * randomization is on, but it could in principle happen anywhere). In
62 * this case, edata_before_get(edata) is NULL, triggering the assert.
63 */
64 if (neighbor_addr == NULL) {
65 return NULL;
66 }
67
68 EMAP_DECLARE_RTREE_CTX;
69 rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
70 rtree_ctx, (uintptr_t)neighbor_addr, /* dependent*/ false,
71 /* init_missing */ false);
72 if (elm == NULL) {
73 return NULL;
74 }
75
76 rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
77 &emap->rtree, elm, /* dependent */ true);
78 if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
79 expected_state, forward, expanding)) {
80 return NULL;
81 }
82
83 /* From this point, the neighbor edata can be safely acquired. */
84 edata_t *neighbor = neighbor_contents.edata;
85 assert(edata_state_get(neighbor) == expected_state);
86 emap_update_edata_state(tsdn, emap, neighbor, extent_state_merging);
87 if (expanding) {
88 extent_assert_can_expand(edata, neighbor);
89 } else {
90 extent_assert_can_coalesce(edata, neighbor);
91 }
92
93 return neighbor;
94}
95
96edata_t *
97emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
98 extent_pai_t pai, extent_state_t expected_state, bool forward) {
99 return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
100 expected_state, forward, /* expand */ false);
101}
102
103edata_t *
104emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
105 edata_t *edata, extent_pai_t pai, extent_state_t expected_state) {
106 /* Try expanding forward. */
107 return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
108 expected_state, /* forward */ true, /* expand */ true);
109}
110
111void
112emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
113 extent_state_t new_state) {
114 assert(emap_edata_in_transition(tsdn, emap, edata));
115 assert(emap_edata_is_acquired(tsdn, emap, edata));
116
117 emap_update_edata_state(tsdn, emap, edata, new_state);
118}
119
120static bool
121emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
122 const edata_t *edata, bool dependent, bool init_missing,
123 rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
124 *r_elm_a = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
125 (uintptr_t)edata_base_get(edata), dependent, init_missing);
126 if (!dependent && *r_elm_a == NULL) {
127 return true;
128 }
129 assert(*r_elm_a != NULL);
130
131 *r_elm_b = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
132 (uintptr_t)edata_last_get(edata), dependent, init_missing);
133 if (!dependent && *r_elm_b == NULL) {
134 return true;
135 }
136 assert(*r_elm_b != NULL);
137
138 return false;
139}
140
141static void
142emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
143 rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
144 rtree_contents_t contents;
145 contents.edata = edata;
146 contents.metadata.szind = szind;
147 contents.metadata.slab = slab;
148 contents.metadata.is_head = (edata == NULL) ? false :
149 edata_is_head_get(edata);
150 contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
151 rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
152 if (elm_b != NULL) {
153 rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
154 }
155}
156
157bool
158emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
159 szind_t szind, bool slab) {
160 assert(edata_state_get(edata) == extent_state_active);
161 EMAP_DECLARE_RTREE_CTX;
162
163 rtree_leaf_elm_t *elm_a, *elm_b;
164 bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
165 false, true, &elm_a, &elm_b);
166 if (err) {
167 return true;
168 }
169 assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
170 /* dependent */ false).edata == NULL);
171 assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
172 /* dependent */ false).edata == NULL);
173 emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
174 return false;
175}
176
177/* Invoked *after* emap_register_boundary. */
178void
179emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
180 szind_t szind) {
181 EMAP_DECLARE_RTREE_CTX;
182
183 assert(edata_slab_get(edata));
184 assert(edata_state_get(edata) == extent_state_active);
185
186 if (config_debug) {
187 /* Making sure the boundary is registered already. */
188 rtree_leaf_elm_t *elm_a, *elm_b;
189 bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx,
190 edata, /* dependent */ true, /* init_missing */ false,
191 &elm_a, &elm_b);
192 assert(!err);
193 rtree_contents_t contents_a, contents_b;
194 contents_a = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
195 /* dependent */ true);
196 contents_b = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
197 /* dependent */ true);
198 assert(contents_a.edata == edata && contents_b.edata == edata);
199 assert(contents_a.metadata.slab && contents_b.metadata.slab);
200 }
201
202 rtree_contents_t contents;
203 contents.edata = edata;
204 contents.metadata.szind = szind;
205 contents.metadata.slab = true;
206 contents.metadata.state = extent_state_active;
207 contents.metadata.is_head = false; /* Not allowed to access. */
208
209 assert(edata_size_get(edata) > (2 << LG_PAGE));
210 rtree_write_range(tsdn, &emap->rtree, rtree_ctx,
211 (uintptr_t)edata_base_get(edata) + PAGE,
212 (uintptr_t)edata_last_get(edata) - PAGE, contents);
213}
214
215void
216emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
217 /*
218 * The edata must be either in an acquired state, or protected by state
219 * based locks.
220 */
221 if (!emap_edata_is_acquired(tsdn, emap, edata)) {
222 witness_assert_positive_depth_to_rank(
223 tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
224 }
225
226 EMAP_DECLARE_RTREE_CTX;
227 rtree_leaf_elm_t *elm_a, *elm_b;
228
229 emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
230 true, false, &elm_a, &elm_b);
231 emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
232 false);
233}
234
235void
236emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
237 EMAP_DECLARE_RTREE_CTX;
238
239 assert(edata_slab_get(edata));
240 if (edata_size_get(edata) > (2 << LG_PAGE)) {
241 rtree_clear_range(tsdn, &emap->rtree, rtree_ctx,
242 (uintptr_t)edata_base_get(edata) + PAGE,
243 (uintptr_t)edata_last_get(edata) - PAGE);
244 }
245}
246
247void
248emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
249 bool slab) {
250 EMAP_DECLARE_RTREE_CTX;
251
252 if (szind != SC_NSIZES) {
253 rtree_contents_t contents;
254 contents.edata = edata;
255 contents.metadata.szind = szind;
256 contents.metadata.slab = slab;
257 contents.metadata.is_head = edata_is_head_get(edata);
258 contents.metadata.state = edata_state_get(edata);
259
260 rtree_write(tsdn, &emap->rtree, rtree_ctx,
261 (uintptr_t)edata_addr_get(edata), contents);
262 /*
263 * Recall that this is called only for active->inactive and
264 * inactive->active transitions (since only active extents have
265 * meaningful values for szind and slab). Active, non-slab
266 * extents only need to handle lookups at their head (on
267 * deallocation), so we don't bother filling in the end
268 * boundary.
269 *
270 * For slab extents, we do the end-mapping change. This still
271 * leaves the interior unmodified; an emap_register_interior
272 * call is coming in those cases, though.
273 */
274 if (slab && edata_size_get(edata) > PAGE) {
275 uintptr_t key = (uintptr_t)edata_past_get(edata)
276 - (uintptr_t)PAGE;
277 rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
278 contents);
279 }
280 }
281}
282
283bool
284emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
285 edata_t *edata, size_t size_a, edata_t *trail, size_t size_b) {
286 EMAP_DECLARE_RTREE_CTX;
287
288 /*
289 * We use incorrect constants for things like arena ind, zero, ranged,
290 * and commit state, and head status. This is a fake edata_t, used to
291 * facilitate a lookup.
292 */
293 edata_t lead = {0};
294 edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
295 extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
296
297 emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
298 &prepare->lead_elm_a, &prepare->lead_elm_b);
299 emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, false, true,
300 &prepare->trail_elm_a, &prepare->trail_elm_b);
301
302 if (prepare->lead_elm_a == NULL || prepare->lead_elm_b == NULL
303 || prepare->trail_elm_a == NULL || prepare->trail_elm_b == NULL) {
304 return true;
305 }
306 return false;
307}
308
309void
310emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
311 edata_t *lead, size_t size_a, edata_t *trail, size_t size_b) {
312 /*
313 * We should think about not writing to the lead leaf element. We can
314 * get into situations where a racing realloc-like call can disagree
315 * with a size lookup request. I think it's fine to declare that these
316 * situations are race bugs, but there's an argument to be made that for
317 * things like xallocx, a size lookup call should return either the old
318 * size or the new size, but not anything else.
319 */
320 emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
321 prepare->lead_elm_b, lead, SC_NSIZES, /* slab */ false);
322 emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
323 prepare->trail_elm_b, trail, SC_NSIZES, /* slab */ false);
324}
325
326void
327emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
328 edata_t *lead, edata_t *trail) {
329 EMAP_DECLARE_RTREE_CTX;
330 emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
331 &prepare->lead_elm_a, &prepare->lead_elm_b);
332 emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
333 &prepare->trail_elm_a, &prepare->trail_elm_b);
334}
335
336void
337emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
338 edata_t *lead, edata_t *trail) {
339 rtree_contents_t clear_contents;
340 clear_contents.edata = NULL;
341 clear_contents.metadata.szind = SC_NSIZES;
342 clear_contents.metadata.slab = false;
343 clear_contents.metadata.is_head = false;
344 clear_contents.metadata.state = (extent_state_t)0;
345
346 if (prepare->lead_elm_b != NULL) {
347 rtree_leaf_elm_write(tsdn, &emap->rtree,
348 prepare->lead_elm_b, clear_contents);
349 }
350
351 rtree_leaf_elm_t *merged_b;
352 if (prepare->trail_elm_b != NULL) {
353 rtree_leaf_elm_write(tsdn, &emap->rtree,
354 prepare->trail_elm_a, clear_contents);
355 merged_b = prepare->trail_elm_b;
356 } else {
357 merged_b = prepare->trail_elm_a;
358 }
359
360 emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
361 lead, SC_NSIZES, false);
362}
363
364void
365emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
366 EMAP_DECLARE_RTREE_CTX;
367
368 rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
369 (uintptr_t)edata_base_get(edata));
370 assert(contents.edata == edata);
371 assert(contents.metadata.is_head == edata_is_head_get(edata));
372 assert(contents.metadata.state == edata_state_get(edata));
373}
374
375void
376emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
377 emap_full_alloc_ctx_t context1 = {0};
378 emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
379 &context1);
380 assert(context1.edata == NULL);
381
382 emap_full_alloc_ctx_t context2 = {0};
383 emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
384 &context2);
385 assert(context2.edata == NULL);
386}
387