1/* ----------------------------------------------------------------------------
2Copyright (c) 2020, Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license. A copy of the license can be found in the file
5"LICENSE" at the root of this distribution.
6-----------------------------------------------------------------------------*/
7
8/* ----------------------------------------------------------------------------
9 Implements a cache of segments to avoid expensive OS calls and to reuse
10 the commit_mask to optimize the commit/decommit calls.
11 The full memory map of all segments is also implemented here.
12-----------------------------------------------------------------------------*/
13#include "mimalloc.h"
14#include "mimalloc-internal.h"
15#include "mimalloc-atomic.h"
16
17#include "bitmap.h" // atomic bitmap
18
19//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache
20
21#define MI_CACHE_FIELDS (16)
22#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit
23
24#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
25#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes
26
27typedef struct mi_cache_slot_s {
28 void* p;
29 size_t memid;
30 bool is_pinned;
31 mi_commit_mask_t commit_mask;
32 mi_commit_mask_t decommit_mask;
33 _Atomic(mi_msecs_t) expire;
34} mi_cache_slot_t;
35
36static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0
37
38static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available!
39static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
40static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free
41
42static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
43 mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
44 mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
45 return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
46}
47
48mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
49{
50#ifdef MI_CACHE_DISABLE
51 return NULL;
52#else
53
54 // only segment blocks
55 if (size != MI_SEGMENT_SIZE) return NULL;
56
57 // numa node determines start field
58 const int numa_node = _mi_os_numa_node(tld);
59 size_t start_field = 0;
60 if (numa_node > 0) {
61 start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
62 if (start_field >= MI_CACHE_FIELDS) start_field = 0;
63 }
64
65 // find an available slot
66 mi_bitmap_index_t bitidx = 0;
67 bool claimed = false;
68 mi_arena_id_t req_arena_id = _req_arena_id;
69 mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable; // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
70
71 if (*large) { // large allowed?
72 claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
73 if (claimed) *large = true;
74 }
75 if (!claimed) {
76 claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
77 if (claimed) *large = false;
78 }
79
80 if (!claimed) return NULL;
81
82 // found a slot
83 mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
84 void* p = slot->p;
85 *memid = slot->memid;
86 *is_pinned = slot->is_pinned;
87 *is_zero = false;
88 *commit_mask = slot->commit_mask;
89 *decommit_mask = slot->decommit_mask;
90 slot->p = NULL;
91 mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
92
93 // mark the slot as free again
94 mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
95 _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
96 return p;
97#endif
98}
99
100static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
101{
102 if (mi_commit_mask_is_empty(cmask)) {
103 // nothing
104 }
105 else if (mi_commit_mask_is_full(cmask)) {
106 _mi_os_decommit(p, total, stats);
107 }
108 else {
109 // todo: one call to decommit the whole at once?
110 mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
111 size_t part = total/MI_COMMIT_MASK_BITS;
112 size_t idx;
113 size_t count;
114 mi_commit_mask_foreach(cmask, idx, count) {
115 void* start = (uint8_t*)p + (idx*part);
116 size_t size = count*part;
117 _mi_os_decommit(start, size, stats);
118 }
119 mi_commit_mask_foreach_end()
120 }
121 mi_commit_mask_create_empty(cmask);
122}
123
124#define MI_MAX_PURGE_PER_PUSH (4)
125
126static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
127{
128 MI_UNUSED(tld);
129 if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
130 mi_msecs_t now = _mi_clock_now();
131 size_t purged = 0;
132 const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
133 size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
134 for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots
135 if (idx >= MI_CACHE_MAX) idx = 0; // wrap
136 mi_cache_slot_t* slot = &cache[idx];
137 mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
138 if (expire != 0 && (force || now >= expire)) { // racy read
139 // seems expired, first claim it from available
140 purged++;
141 mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
142 if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
143 // was available, we claimed it
144 expire = mi_atomic_loadi64_acquire(&slot->expire);
145 if (expire != 0 && (force || now >= expire)) { // safe read
146 // still expired, decommit it
147 mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
148 mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
149 _mi_abandoned_await_readers(); // wait until safe to decommit
150 // decommit committed parts
151 // TODO: instead of decommit, we could also free to the OS?
152 mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
153 mi_commit_mask_create_empty(&slot->decommit_mask);
154 }
155 _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
156 }
157 if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push
158 }
159 }
160}
161
162void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
163 mi_segment_cache_purge(force, tld );
164}
165
166mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
167{
168#ifdef MI_CACHE_DISABLE
169 return false;
170#else
171
172 // only for normal segment blocks
173 if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
174
175 // numa node determines start field
176 int numa_node = _mi_os_numa_node(NULL);
177 size_t start_field = 0;
178 if (numa_node > 0) {
179 start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
180 if (start_field >= MI_CACHE_FIELDS) start_field = 0;
181 }
182
183 // purge expired entries
184 mi_segment_cache_purge(false /* force? */, tld);
185
186 // find an available slot
187 mi_bitmap_index_t bitidx;
188 bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
189 if (!claimed) return false;
190
191 mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
192 mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
193#if MI_DEBUG>1
194 if (is_pinned || is_large) {
195 mi_assert_internal(mi_commit_mask_is_full(commit_mask));
196 }
197#endif
198
199 // set the slot
200 mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
201 slot->p = start;
202 slot->memid = memid;
203 slot->is_pinned = is_pinned;
204 mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
205 slot->commit_mask = *commit_mask;
206 slot->decommit_mask = *decommit_mask;
207 if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
208 long delay = mi_option_get(mi_option_segment_decommit_delay);
209 if (delay == 0) {
210 _mi_abandoned_await_readers(); // wait until safe to decommit
211 mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
212 mi_commit_mask_create_empty(&slot->decommit_mask);
213 }
214 else {
215 mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
216 }
217 }
218
219 // make it available
220 _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
221 return true;
222#endif
223}
224
225
226/* -----------------------------------------------------------
227 The following functions are to reliably find the segment or
228 block that encompasses any pointer p (or NULL if it is not
229 in any of our segments).
230 We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
231 set to 1 if it contains the segment meta data.
232----------------------------------------------------------- */
233
234
235#if (MI_INTPTR_SIZE==8)
236#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB
237#else
238#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb
239#endif
240
241#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
242#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8)
243#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
244
245static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments
246
247static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
248 mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
249 if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
250 *bitidx = 0;
251 return MI_SEGMENT_MAP_WSIZE;
252 }
253 else {
254 const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
255 *bitidx = segindex % MI_INTPTR_BITS;
256 const size_t mapindex = segindex / MI_INTPTR_BITS;
257 mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
258 return mapindex;
259 }
260}
261
262void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
263 size_t bitidx;
264 size_t index = mi_segment_map_index_of(segment, &bitidx);
265 mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
266 if (index==MI_SEGMENT_MAP_WSIZE) return;
267 uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
268 uintptr_t newmask;
269 do {
270 newmask = (mask | ((uintptr_t)1 << bitidx));
271 } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
272}
273
274void _mi_segment_map_freed_at(const mi_segment_t* segment) {
275 size_t bitidx;
276 size_t index = mi_segment_map_index_of(segment, &bitidx);
277 mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
278 if (index == MI_SEGMENT_MAP_WSIZE) return;
279 uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
280 uintptr_t newmask;
281 do {
282 newmask = (mask & ~((uintptr_t)1 << bitidx));
283 } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
284}
285
286// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
287static mi_segment_t* _mi_segment_of(const void* p) {
288 mi_segment_t* segment = _mi_ptr_segment(p);
289 if (segment == NULL) return NULL;
290 size_t bitidx;
291 size_t index = mi_segment_map_index_of(segment, &bitidx);
292 // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
293 const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
294 if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
295 return segment; // yes, allocated by us
296 }
297 if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
298
299 // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
300
301 // search downwards for the first segment in case it is an interior pointer
302 // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
303 // valid huge objects
304 // note: we could maintain a lowest index to speed up the path for invalid pointers?
305 size_t lobitidx;
306 size_t loindex;
307 uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
308 if (lobits != 0) {
309 loindex = index;
310 lobitidx = mi_bsr(lobits); // lobits != 0
311 }
312 else if (index == 0) {
313 return NULL;
314 }
315 else {
316 mi_assert_internal(index > 0);
317 uintptr_t lomask = mask;
318 loindex = index;
319 do {
320 loindex--;
321 lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);
322 } while (lomask != 0 && loindex > 0);
323 if (lomask == 0) return NULL;
324 lobitidx = mi_bsr(lomask); // lomask != 0
325 }
326 mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
327 // take difference as the addresses could be larger than the MAX_ADDRESS space.
328 size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
329 segment = (mi_segment_t*)((uint8_t*)segment - diff);
330
331 if (segment == NULL) return NULL;
332 mi_assert_internal((void*)segment < p);
333 bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
334 mi_assert_internal(cookie_ok);
335 if mi_unlikely(!cookie_ok) return NULL;
336 if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
337 mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
338 return segment;
339}
340
341// Is this a valid pointer in our heap?
342static bool mi_is_valid_pointer(const void* p) {
343 return (_mi_segment_of(p) != NULL);
344}
345
346mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
347 return mi_is_valid_pointer(p);
348}
349
350/*
351// Return the full segment range belonging to a pointer
352static void* mi_segment_range_of(const void* p, size_t* size) {
353 mi_segment_t* segment = _mi_segment_of(p);
354 if (segment == NULL) {
355 if (size != NULL) *size = 0;
356 return NULL;
357 }
358 else {
359 if (size != NULL) *size = segment->segment_size;
360 return segment;
361 }
362 mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
363 mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
364 mi_reset_delayed(tld);
365 mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
366 return page;
367}
368*/
369