1 | #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H |
2 | #define JEMALLOC_INTERNAL_CACHE_BIN_H |
3 | |
4 | #include "jemalloc/internal/ql.h" |
5 | #include "jemalloc/internal/safety_check.h" |
6 | #include "jemalloc/internal/sz.h" |
7 | |
8 | /* |
9 | * The cache_bins are the mechanism that the tcache and the arena use to |
10 | * communicate. The tcache fills from and flushes to the arena by passing a |
11 | * cache_bin_t to fill/flush. When the arena needs to pull stats from the |
12 | * tcaches associated with it, it does so by iterating over its |
13 | * cache_bin_array_descriptor_t objects and reading out per-bin stats it |
14 | * contains. This makes it so that the arena need not know about the existence |
15 | * of the tcache at all. |
16 | */ |
17 | |
18 | /* |
19 | * The size in bytes of each cache bin stack. We also use this to indicate |
20 | * *counts* of individual objects. |
21 | */ |
22 | typedef uint16_t cache_bin_sz_t; |
23 | |
24 | /* |
25 | * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a |
26 | * bug starts leaking those. Make it look like the junk pattern but be distinct |
27 | * from it. |
28 | */ |
29 | static const uintptr_t cache_bin_preceding_junk = |
30 | (uintptr_t)0x7a7a7a7a7a7a7a7aULL; |
31 | /* Note: a7 vs. 7a above -- this tells you which pointer leaked. */ |
32 | static const uintptr_t cache_bin_trailing_junk = |
33 | (uintptr_t)0xa7a7a7a7a7a7a7a7ULL; |
34 | |
35 | /* |
36 | * That implies the following value, for the maximum number of items in any |
37 | * individual bin. The cache bins track their bounds looking just at the low |
38 | * bits of a pointer, compared against a cache_bin_sz_t. So that's |
39 | * 1 << (sizeof(cache_bin_sz_t) * 8) |
40 | * bytes spread across pointer sized objects to get the maximum. |
41 | */ |
42 | #define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \ |
43 | / sizeof(void *) - 1) |
44 | |
45 | /* |
46 | * This lives inside the cache_bin (for locality reasons), and is initialized |
47 | * alongside it, but is otherwise not modified by any cache bin operations. |
48 | * It's logically public and maintained by its callers. |
49 | */ |
50 | typedef struct cache_bin_stats_s cache_bin_stats_t; |
51 | struct cache_bin_stats_s { |
52 | /* |
53 | * Number of allocation requests that corresponded to the size of this |
54 | * bin. |
55 | */ |
56 | uint64_t nrequests; |
57 | }; |
58 | |
59 | /* |
60 | * Read-only information associated with each element of tcache_t's tbins array |
61 | * is stored separately, mainly to reduce memory usage. |
62 | */ |
63 | typedef struct cache_bin_info_s cache_bin_info_t; |
64 | struct cache_bin_info_s { |
65 | cache_bin_sz_t ncached_max; |
66 | }; |
67 | |
68 | /* |
69 | * Responsible for caching allocations associated with a single size. |
70 | * |
71 | * Several pointers are used to track the stack. To save on metadata bytes, |
72 | * only the stack_head is a full sized pointer (which is dereferenced on the |
73 | * fastpath), while the others store only the low 16 bits -- this is correct |
74 | * because a single stack never takes more space than 2^16 bytes, and at the |
75 | * same time only equality checks are performed on the low bits. |
76 | * |
77 | * (low addr) (high addr) |
78 | * |------stashed------|------available------|------cached-----| |
79 | * ^ ^ ^ ^ |
80 | * low_bound(derived) low_bits_full stack_head low_bits_empty |
81 | */ |
82 | typedef struct cache_bin_s cache_bin_t; |
83 | struct cache_bin_s { |
84 | /* |
85 | * The stack grows down. Whenever the bin is nonempty, the head points |
86 | * to an array entry containing a valid allocation. When it is empty, |
87 | * the head points to one element past the owned array. |
88 | */ |
89 | void **stack_head; |
90 | /* |
91 | * cur_ptr and stats are both modified frequently. Let's keep them |
92 | * close so that they have a higher chance of being on the same |
93 | * cacheline, thus less write-backs. |
94 | */ |
95 | cache_bin_stats_t tstats; |
96 | |
97 | /* |
98 | * The low bits of the address of the first item in the stack that |
99 | * hasn't been used since the last GC, to track the low water mark (min |
100 | * # of cached items). |
101 | * |
102 | * Since the stack grows down, this is a higher address than |
103 | * low_bits_full. |
104 | */ |
105 | uint16_t low_bits_low_water; |
106 | |
107 | /* |
108 | * The low bits of the value that stack_head will take on when the array |
109 | * is full (of cached & stashed items). But remember that stack_head |
110 | * always points to a valid item when the array is nonempty -- this is |
111 | * in the array. |
112 | * |
113 | * Recall that since the stack grows down, this is the lowest available |
114 | * address in the array for caching. Only adjusted when stashing items. |
115 | */ |
116 | uint16_t low_bits_full; |
117 | |
118 | /* |
119 | * The low bits of the value that stack_head will take on when the array |
120 | * is empty. |
121 | * |
122 | * The stack grows down -- this is one past the highest address in the |
123 | * array. Immutable after initialization. |
124 | */ |
125 | uint16_t low_bits_empty; |
126 | }; |
127 | |
128 | /* |
129 | * The cache_bins live inside the tcache, but the arena (by design) isn't |
130 | * supposed to know much about tcache internals. To let the arena iterate over |
131 | * associated bins, we keep (with the tcache) a linked list of |
132 | * cache_bin_array_descriptor_ts that tell the arena how to find the bins. |
133 | */ |
134 | typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; |
135 | struct cache_bin_array_descriptor_s { |
136 | /* |
137 | * The arena keeps a list of the cache bins associated with it, for |
138 | * stats collection. |
139 | */ |
140 | ql_elm(cache_bin_array_descriptor_t) link; |
141 | /* Pointers to the tcache bins. */ |
142 | cache_bin_t *bins; |
143 | }; |
144 | |
145 | static inline void |
146 | cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, |
147 | cache_bin_t *bins) { |
148 | ql_elm_new(descriptor, link); |
149 | descriptor->bins = bins; |
150 | } |
151 | |
152 | JEMALLOC_ALWAYS_INLINE bool |
153 | cache_bin_nonfast_aligned(const void *ptr) { |
154 | if (!config_uaf_detection) { |
155 | return false; |
156 | } |
157 | /* |
158 | * Currently we use alignment to decide which pointer to junk & stash on |
159 | * dealloc (for catching use-after-free). In some common cases a |
160 | * page-aligned check is needed already (sdalloc w/ config_prof), so we |
161 | * are getting it more or less for free -- no added instructions on |
162 | * free_fastpath. |
163 | * |
164 | * Another way of deciding which pointer to sample, is adding another |
165 | * thread_event to pick one every N bytes. That also adds no cost on |
166 | * the fastpath, however it will tend to pick large allocations which is |
167 | * not the desired behavior. |
168 | */ |
169 | return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0; |
170 | } |
171 | |
172 | /* Returns ncached_max: Upper limit on ncached. */ |
173 | static inline cache_bin_sz_t |
174 | cache_bin_info_ncached_max(cache_bin_info_t *info) { |
175 | return info->ncached_max; |
176 | } |
177 | |
178 | /* |
179 | * Internal. |
180 | * |
181 | * Asserts that the pointer associated with earlier is <= the one associated |
182 | * with later. |
183 | */ |
184 | static inline void |
185 | cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) { |
186 | if (earlier > later) { |
187 | assert(bin->low_bits_full > bin->low_bits_empty); |
188 | } |
189 | } |
190 | |
191 | /* |
192 | * Internal. |
193 | * |
194 | * Does difference calculations that handle wraparound correctly. Earlier must |
195 | * be associated with the position earlier in memory. |
196 | */ |
197 | static inline uint16_t |
198 | cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) { |
199 | /* |
200 | * When it's racy, bin->low_bits_full can be modified concurrently. It |
201 | * can cross the uint16_t max value and become less than |
202 | * bin->low_bits_empty at the time of the check. |
203 | */ |
204 | if (!racy) { |
205 | cache_bin_assert_earlier(bin, earlier, later); |
206 | } |
207 | return later - earlier; |
208 | } |
209 | |
210 | /* |
211 | * Number of items currently cached in the bin, without checking ncached_max. |
212 | * We require specifying whether or not the request is racy or not (i.e. whether |
213 | * or not concurrent modifications are possible). |
214 | */ |
215 | static inline cache_bin_sz_t |
216 | cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) { |
217 | cache_bin_sz_t diff = cache_bin_diff(bin, |
218 | (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy); |
219 | cache_bin_sz_t n = diff / sizeof(void *); |
220 | /* |
221 | * We have undefined behavior here; if this function is called from the |
222 | * arena stats updating code, then stack_head could change from the |
223 | * first line to the next one. Morally, these loads should be atomic, |
224 | * but compilers won't currently generate comparisons with in-memory |
225 | * operands against atomics, and these variables get accessed on the |
226 | * fast paths. This should still be "safe" in the sense of generating |
227 | * the correct assembly for the foreseeable future, though. |
228 | */ |
229 | assert(n == 0 || *(bin->stack_head) != NULL || racy); |
230 | return n; |
231 | } |
232 | |
233 | /* |
234 | * Number of items currently cached in the bin, with checking ncached_max. The |
235 | * caller must know that no concurrent modification of the cache_bin is |
236 | * possible. |
237 | */ |
238 | static inline cache_bin_sz_t |
239 | cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) { |
240 | cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, |
241 | /* racy */ false); |
242 | assert(n <= cache_bin_info_ncached_max(info)); |
243 | return n; |
244 | } |
245 | |
246 | /* |
247 | * Internal. |
248 | * |
249 | * A pointer to the position one past the end of the backing array. |
250 | * |
251 | * Do not call if racy, because both 'bin->stack_head' and 'bin->low_bits_full' |
252 | * are subject to concurrent modifications. |
253 | */ |
254 | static inline void ** |
255 | cache_bin_empty_position_get(cache_bin_t *bin) { |
256 | cache_bin_sz_t diff = cache_bin_diff(bin, |
257 | (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, |
258 | /* racy */ false); |
259 | uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff; |
260 | void **ret = (void **)empty_bits; |
261 | |
262 | assert(ret >= bin->stack_head); |
263 | |
264 | return ret; |
265 | } |
266 | |
267 | /* |
268 | * Internal. |
269 | * |
270 | * Calculates low bits of the lower bound of the usable cache bin's range (see |
271 | * cache_bin_t visual representation above). |
272 | * |
273 | * No values are concurrently modified, so should be safe to read in a |
274 | * multithreaded environment. Currently concurrent access happens only during |
275 | * arena statistics collection. |
276 | */ |
277 | static inline uint16_t |
278 | cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) { |
279 | return (uint16_t)bin->low_bits_empty - |
280 | info->ncached_max * sizeof(void *); |
281 | } |
282 | |
283 | /* |
284 | * Internal. |
285 | * |
286 | * A pointer to the position with the lowest address of the backing array. |
287 | */ |
288 | static inline void ** |
289 | cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) { |
290 | cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info); |
291 | void **ret = cache_bin_empty_position_get(bin) - ncached_max; |
292 | assert(ret <= bin->stack_head); |
293 | |
294 | return ret; |
295 | } |
296 | |
297 | /* |
298 | * As the name implies. This is important since it's not correct to try to |
299 | * batch fill a nonempty cache bin. |
300 | */ |
301 | static inline void |
302 | cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) { |
303 | assert(cache_bin_ncached_get_local(bin, info) == 0); |
304 | assert(cache_bin_empty_position_get(bin) == bin->stack_head); |
305 | } |
306 | |
307 | /* |
308 | * Get low water, but without any of the correctness checking we do for the |
309 | * caller-usable version, if we are temporarily breaking invariants (like |
310 | * ncached >= low_water during flush). |
311 | */ |
312 | static inline cache_bin_sz_t |
313 | cache_bin_low_water_get_internal(cache_bin_t *bin) { |
314 | return cache_bin_diff(bin, bin->low_bits_low_water, |
315 | bin->low_bits_empty, /* racy */ false) / sizeof(void *); |
316 | } |
317 | |
318 | /* Returns the numeric value of low water in [0, ncached]. */ |
319 | static inline cache_bin_sz_t |
320 | cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) { |
321 | cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin); |
322 | assert(low_water <= cache_bin_info_ncached_max(info)); |
323 | assert(low_water <= cache_bin_ncached_get_local(bin, info)); |
324 | |
325 | cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head, |
326 | bin->low_bits_low_water); |
327 | |
328 | return low_water; |
329 | } |
330 | |
331 | /* |
332 | * Indicates that the current cache bin position should be the low water mark |
333 | * going forward. |
334 | */ |
335 | static inline void |
336 | cache_bin_low_water_set(cache_bin_t *bin) { |
337 | bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head; |
338 | } |
339 | |
340 | static inline void |
341 | cache_bin_low_water_adjust(cache_bin_t *bin) { |
342 | if (cache_bin_ncached_get_internal(bin, /* racy */ false) |
343 | < cache_bin_low_water_get_internal(bin)) { |
344 | cache_bin_low_water_set(bin); |
345 | } |
346 | } |
347 | |
348 | JEMALLOC_ALWAYS_INLINE void * |
349 | cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) { |
350 | /* |
351 | * success (instead of ret) should be checked upon the return of this |
352 | * function. We avoid checking (ret == NULL) because there is never a |
353 | * null stored on the avail stack (which is unknown to the compiler), |
354 | * and eagerly checking ret would cause pipeline stall (waiting for the |
355 | * cacheline). |
356 | */ |
357 | |
358 | /* |
359 | * This may read from the empty position; however the loaded value won't |
360 | * be used. It's safe because the stack has one more slot reserved. |
361 | */ |
362 | void *ret = *bin->stack_head; |
363 | uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head; |
364 | void **new_head = bin->stack_head + 1; |
365 | |
366 | /* |
367 | * Note that the low water mark is at most empty; if we pass this check, |
368 | * we know we're non-empty. |
369 | */ |
370 | if (likely(low_bits != bin->low_bits_low_water)) { |
371 | bin->stack_head = new_head; |
372 | *success = true; |
373 | return ret; |
374 | } |
375 | if (!adjust_low_water) { |
376 | *success = false; |
377 | return NULL; |
378 | } |
379 | /* |
380 | * In the fast-path case where we call alloc_easy and then alloc, the |
381 | * previous checking and computation is optimized away -- we didn't |
382 | * actually commit any of our operations. |
383 | */ |
384 | if (likely(low_bits != bin->low_bits_empty)) { |
385 | bin->stack_head = new_head; |
386 | bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head; |
387 | *success = true; |
388 | return ret; |
389 | } |
390 | *success = false; |
391 | return NULL; |
392 | } |
393 | |
394 | /* |
395 | * Allocate an item out of the bin, failing if we're at the low-water mark. |
396 | */ |
397 | JEMALLOC_ALWAYS_INLINE void * |
398 | cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { |
399 | /* We don't look at info if we're not adjusting low-water. */ |
400 | return cache_bin_alloc_impl(bin, success, false); |
401 | } |
402 | |
403 | /* |
404 | * Allocate an item out of the bin, even if we're currently at the low-water |
405 | * mark (and failing only if the bin is empty). |
406 | */ |
407 | JEMALLOC_ALWAYS_INLINE void * |
408 | cache_bin_alloc(cache_bin_t *bin, bool *success) { |
409 | return cache_bin_alloc_impl(bin, success, true); |
410 | } |
411 | |
412 | JEMALLOC_ALWAYS_INLINE cache_bin_sz_t |
413 | cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) { |
414 | cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, |
415 | /* racy */ false); |
416 | if (n > num) { |
417 | n = (cache_bin_sz_t)num; |
418 | } |
419 | memcpy(out, bin->stack_head, n * sizeof(void *)); |
420 | bin->stack_head += n; |
421 | cache_bin_low_water_adjust(bin); |
422 | |
423 | return n; |
424 | } |
425 | |
426 | JEMALLOC_ALWAYS_INLINE bool |
427 | cache_bin_full(cache_bin_t *bin) { |
428 | return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full); |
429 | } |
430 | |
431 | /* |
432 | * Scans the allocated area of the cache_bin for the given pointer up to limit. |
433 | * Fires safety_check_fail if the ptr is found and returns true. |
434 | */ |
435 | JEMALLOC_ALWAYS_INLINE bool |
436 | cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) { |
437 | if (!config_debug || opt_debug_double_free_max_scan == 0) { |
438 | return false; |
439 | } |
440 | |
441 | cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin, false); |
442 | unsigned max_scan = opt_debug_double_free_max_scan < ncached |
443 | ? opt_debug_double_free_max_scan |
444 | : ncached; |
445 | |
446 | void **cur = bin->stack_head; |
447 | void **limit = cur + max_scan; |
448 | for (; cur < limit; cur++) { |
449 | if (*cur == ptr) { |
450 | safety_check_fail( |
451 | "Invalid deallocation detected: double free of " |
452 | "pointer %p\n" , |
453 | ptr); |
454 | return true; |
455 | } |
456 | } |
457 | return false; |
458 | } |
459 | |
460 | /* |
461 | * Free an object into the given bin. Fails only if the bin is full. |
462 | */ |
463 | JEMALLOC_ALWAYS_INLINE bool |
464 | cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { |
465 | if (unlikely(cache_bin_full(bin))) { |
466 | return false; |
467 | } |
468 | |
469 | if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) { |
470 | return true; |
471 | } |
472 | |
473 | bin->stack_head--; |
474 | *bin->stack_head = ptr; |
475 | cache_bin_assert_earlier(bin, bin->low_bits_full, |
476 | (uint16_t)(uintptr_t)bin->stack_head); |
477 | |
478 | return true; |
479 | } |
480 | |
481 | /* Returns false if failed to stash (i.e. bin is full). */ |
482 | JEMALLOC_ALWAYS_INLINE bool |
483 | cache_bin_stash(cache_bin_t *bin, void *ptr) { |
484 | if (cache_bin_full(bin)) { |
485 | return false; |
486 | } |
487 | |
488 | /* Stash at the full position, in the [full, head) range. */ |
489 | uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head; |
490 | /* Wraparound handled as well. */ |
491 | uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head, |
492 | /* racy */ false); |
493 | *(void **)((uintptr_t)bin->stack_head - diff) = ptr; |
494 | |
495 | assert(!cache_bin_full(bin)); |
496 | bin->low_bits_full += sizeof(void *); |
497 | cache_bin_assert_earlier(bin, bin->low_bits_full, low_bits_head); |
498 | |
499 | return true; |
500 | } |
501 | |
502 | /* |
503 | * Get the number of stashed pointers. |
504 | * |
505 | * When called from a thread not owning the TLS (i.e. racy = true), it's |
506 | * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can |
507 | * be modified concurrently and almost none assertions about their values can be |
508 | * made. |
509 | */ |
510 | JEMALLOC_ALWAYS_INLINE cache_bin_sz_t |
511 | cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info, |
512 | bool racy) { |
513 | cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info); |
514 | uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin, |
515 | info); |
516 | |
517 | cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound, |
518 | bin->low_bits_full, racy) / sizeof(void *); |
519 | assert(n <= ncached_max); |
520 | |
521 | if (!racy) { |
522 | /* Below are for assertions only. */ |
523 | void **low_bound = cache_bin_low_bound_get(bin, info); |
524 | |
525 | assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound); |
526 | void *stashed = *(low_bound + n - 1); |
527 | bool aligned = cache_bin_nonfast_aligned(stashed); |
528 | #ifdef JEMALLOC_JET |
529 | /* Allow arbitrary pointers to be stashed in tests. */ |
530 | aligned = true; |
531 | #endif |
532 | assert(n == 0 || (stashed != NULL && aligned)); |
533 | } |
534 | |
535 | return n; |
536 | } |
537 | |
538 | JEMALLOC_ALWAYS_INLINE cache_bin_sz_t |
539 | cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) { |
540 | cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info, |
541 | /* racy */ false); |
542 | assert(n <= cache_bin_info_ncached_max(info)); |
543 | return n; |
544 | } |
545 | |
546 | /* |
547 | * Obtain a racy view of the number of items currently in the cache bin, in the |
548 | * presence of possible concurrent modifications. |
549 | */ |
550 | static inline void |
551 | cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info, |
552 | cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) { |
553 | cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true); |
554 | assert(n <= cache_bin_info_ncached_max(info)); |
555 | *ncached = n; |
556 | |
557 | n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true); |
558 | assert(n <= cache_bin_info_ncached_max(info)); |
559 | *nstashed = n; |
560 | /* Note that cannot assert ncached + nstashed <= ncached_max (racy). */ |
561 | } |
562 | |
563 | /* |
564 | * Filling and flushing are done in batch, on arrays of void *s. For filling, |
565 | * the arrays go forward, and can be accessed with ordinary array arithmetic. |
566 | * For flushing, we work from the end backwards, and so need to use special |
567 | * accessors that invert the usual ordering. |
568 | * |
569 | * This is important for maintaining first-fit; the arena code fills with |
570 | * earliest objects first, and so those are the ones we should return first for |
571 | * cache_bin_alloc calls. When flushing, we should flush the objects that we |
572 | * wish to return later; those at the end of the array. This is better for the |
573 | * first-fit heuristic as well as for cache locality; the most recently freed |
574 | * objects are the ones most likely to still be in cache. |
575 | * |
576 | * This all sounds very hand-wavey and theoretical, but reverting the ordering |
577 | * on one or the other pathway leads to measurable slowdowns. |
578 | */ |
579 | |
580 | typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t; |
581 | struct cache_bin_ptr_array_s { |
582 | cache_bin_sz_t n; |
583 | void **ptr; |
584 | }; |
585 | |
586 | /* |
587 | * Declare a cache_bin_ptr_array_t sufficient for nval items. |
588 | * |
589 | * In the current implementation, this could be just part of a |
590 | * cache_bin_ptr_array_init_... call, since we reuse the cache bin stack memory. |
591 | * Indirecting behind a macro, though, means experimenting with linked-list |
592 | * representations is easy (since they'll require an alloca in the calling |
593 | * frame). |
594 | */ |
595 | #define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval) \ |
596 | cache_bin_ptr_array_t name; \ |
597 | name.n = (nval) |
598 | |
599 | /* |
600 | * Start a fill. The bin must be empty, and This must be followed by a |
601 | * finish_fill call before doing any alloc/dalloc operations on the bin. |
602 | */ |
603 | static inline void |
604 | cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info, |
605 | cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) { |
606 | cache_bin_assert_empty(bin, info); |
607 | arr->ptr = cache_bin_empty_position_get(bin) - nfill; |
608 | } |
609 | |
610 | /* |
611 | * While nfill in cache_bin_init_ptr_array_for_fill is the number we *intend* to |
612 | * fill, nfilled here is the number we actually filled (which may be less, in |
613 | * case of OOM. |
614 | */ |
615 | static inline void |
616 | cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info, |
617 | cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) { |
618 | cache_bin_assert_empty(bin, info); |
619 | void **empty_position = cache_bin_empty_position_get(bin); |
620 | if (nfilled < arr->n) { |
621 | memmove(empty_position - nfilled, empty_position - arr->n, |
622 | nfilled * sizeof(void *)); |
623 | } |
624 | bin->stack_head = empty_position - nfilled; |
625 | } |
626 | |
627 | /* |
628 | * Same deal, but with flush. Unlike fill (which can fail), the user must flush |
629 | * everything we give them. |
630 | */ |
631 | static inline void |
632 | cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info, |
633 | cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) { |
634 | arr->ptr = cache_bin_empty_position_get(bin) - nflush; |
635 | assert(cache_bin_ncached_get_local(bin, info) == 0 |
636 | || *arr->ptr != NULL); |
637 | } |
638 | |
639 | static inline void |
640 | cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info, |
641 | cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) { |
642 | unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed; |
643 | memmove(bin->stack_head + nflushed, bin->stack_head, |
644 | rem * sizeof(void *)); |
645 | bin->stack_head = bin->stack_head + nflushed; |
646 | cache_bin_low_water_adjust(bin); |
647 | } |
648 | |
649 | static inline void |
650 | cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind, |
651 | cache_bin_info_t *info, cache_bin_ptr_array_t *arr, |
652 | cache_bin_sz_t nstashed) { |
653 | assert(nstashed > 0); |
654 | assert(cache_bin_nstashed_get_local(bin, info) == nstashed); |
655 | |
656 | void **low_bound = cache_bin_low_bound_get(bin, info); |
657 | arr->ptr = low_bound; |
658 | assert(*arr->ptr != NULL); |
659 | } |
660 | |
661 | static inline void |
662 | cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) { |
663 | void **low_bound = cache_bin_low_bound_get(bin, info); |
664 | |
665 | /* Reset the bin local full position. */ |
666 | bin->low_bits_full = (uint16_t)(uintptr_t)low_bound; |
667 | assert(cache_bin_nstashed_get_local(bin, info) == 0); |
668 | } |
669 | |
670 | /* |
671 | * Initialize a cache_bin_info to represent up to the given number of items in |
672 | * the cache_bins it is associated with. |
673 | */ |
674 | void cache_bin_info_init(cache_bin_info_t *bin_info, |
675 | cache_bin_sz_t ncached_max); |
676 | /* |
677 | * Given an array of initialized cache_bin_info_ts, determine how big an |
678 | * allocation is required to initialize a full set of cache_bin_ts. |
679 | */ |
680 | void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos, |
681 | size_t *size, size_t *alignment); |
682 | |
683 | /* |
684 | * Actually initialize some cache bins. Callers should allocate the backing |
685 | * memory indicated by a call to cache_bin_compute_alloc. They should then |
686 | * preincrement, call init once for each bin and info, and then call |
687 | * cache_bin_postincrement. *alloc_cur will then point immediately past the end |
688 | * of the allocation. |
689 | */ |
690 | void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, |
691 | void *alloc, size_t *cur_offset); |
692 | void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, |
693 | void *alloc, size_t *cur_offset); |
694 | void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc, |
695 | size_t *cur_offset); |
696 | |
697 | /* |
698 | * If a cache bin was zero initialized (either because it lives in static or |
699 | * thread-local storage, or was memset to 0), this function indicates whether or |
700 | * not cache_bin_init was called on it. |
701 | */ |
702 | bool cache_bin_still_zero_initialized(cache_bin_t *bin); |
703 | |
704 | #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */ |
705 | |