1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2020, Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. A copy of the license can be found in the file |
5 | "LICENSE" at the root of this distribution. |
6 | -----------------------------------------------------------------------------*/ |
7 | |
8 | /* ---------------------------------------------------------------------------- |
9 | Implements a cache of segments to avoid expensive OS calls and to reuse |
10 | the commit_mask to optimize the commit/decommit calls. |
11 | The full memory map of all segments is also implemented here. |
12 | -----------------------------------------------------------------------------*/ |
13 | #include "mimalloc.h" |
14 | #include "mimalloc-internal.h" |
15 | #include "mimalloc-atomic.h" |
16 | |
17 | #include "bitmap.h" // atomic bitmap |
18 | |
19 | //#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache |
20 | |
21 | #define MI_CACHE_FIELDS (16) |
22 | #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit |
23 | |
24 | #define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) |
25 | #define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes |
26 | |
27 | typedef struct mi_cache_slot_s { |
28 | void* p; |
29 | size_t memid; |
30 | bool is_pinned; |
31 | mi_commit_mask_t commit_mask; |
32 | mi_commit_mask_t decommit_mask; |
33 | _Atomic(mi_msecs_t) expire; |
34 | } mi_cache_slot_t; |
35 | |
36 | static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 |
37 | |
38 | static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! |
39 | static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; |
40 | static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free |
41 | |
42 | static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { |
43 | mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); |
44 | mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
45 | return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); |
46 | } |
47 | |
48 | mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) |
49 | { |
50 | #ifdef MI_CACHE_DISABLE |
51 | return NULL; |
52 | #else |
53 | |
54 | // only segment blocks |
55 | if (size != MI_SEGMENT_SIZE) return NULL; |
56 | |
57 | // numa node determines start field |
58 | const int numa_node = _mi_os_numa_node(tld); |
59 | size_t start_field = 0; |
60 | if (numa_node > 0) { |
61 | start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; |
62 | if (start_field >= MI_CACHE_FIELDS) start_field = 0; |
63 | } |
64 | |
65 | // find an available slot |
66 | mi_bitmap_index_t bitidx = 0; |
67 | bool claimed = false; |
68 | mi_arena_id_t req_arena_id = _req_arena_id; |
69 | mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable; // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? |
70 | |
71 | if (*large) { // large allowed? |
72 | claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); |
73 | if (claimed) *large = true; |
74 | } |
75 | if (!claimed) { |
76 | claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); |
77 | if (claimed) *large = false; |
78 | } |
79 | |
80 | if (!claimed) return NULL; |
81 | |
82 | // found a slot |
83 | mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
84 | void* p = slot->p; |
85 | *memid = slot->memid; |
86 | *is_pinned = slot->is_pinned; |
87 | *is_zero = false; |
88 | *commit_mask = slot->commit_mask; |
89 | *decommit_mask = slot->decommit_mask; |
90 | slot->p = NULL; |
91 | mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); |
92 | |
93 | // mark the slot as free again |
94 | mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); |
95 | _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); |
96 | return p; |
97 | #endif |
98 | } |
99 | |
100 | static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) |
101 | { |
102 | if (mi_commit_mask_is_empty(cmask)) { |
103 | // nothing |
104 | } |
105 | else if (mi_commit_mask_is_full(cmask)) { |
106 | _mi_os_decommit(p, total, stats); |
107 | } |
108 | else { |
109 | // todo: one call to decommit the whole at once? |
110 | mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); |
111 | size_t part = total/MI_COMMIT_MASK_BITS; |
112 | size_t idx; |
113 | size_t count; |
114 | mi_commit_mask_foreach(cmask, idx, count) { |
115 | void* start = (uint8_t*)p + (idx*part); |
116 | size_t size = count*part; |
117 | _mi_os_decommit(start, size, stats); |
118 | } |
119 | mi_commit_mask_foreach_end() |
120 | } |
121 | mi_commit_mask_create_empty(cmask); |
122 | } |
123 | |
124 | #define MI_MAX_PURGE_PER_PUSH (4) |
125 | |
126 | static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) |
127 | { |
128 | MI_UNUSED(tld); |
129 | if (!mi_option_is_enabled(mi_option_allow_decommit)) return; |
130 | mi_msecs_t now = _mi_clock_now(); |
131 | size_t purged = 0; |
132 | const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); |
133 | size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); |
134 | for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots |
135 | if (idx >= MI_CACHE_MAX) idx = 0; // wrap |
136 | mi_cache_slot_t* slot = &cache[idx]; |
137 | mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); |
138 | if (expire != 0 && (force || now >= expire)) { // racy read |
139 | // seems expired, first claim it from available |
140 | purged++; |
141 | mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); |
142 | if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { |
143 | // was available, we claimed it |
144 | expire = mi_atomic_loadi64_acquire(&slot->expire); |
145 | if (expire != 0 && (force || now >= expire)) { // safe read |
146 | // still expired, decommit it |
147 | mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); |
148 | mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); |
149 | _mi_abandoned_await_readers(); // wait until safe to decommit |
150 | // decommit committed parts |
151 | // TODO: instead of decommit, we could also free to the OS? |
152 | mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); |
153 | mi_commit_mask_create_empty(&slot->decommit_mask); |
154 | } |
155 | _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop |
156 | } |
157 | if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push |
158 | } |
159 | } |
160 | } |
161 | |
162 | void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { |
163 | mi_segment_cache_purge(force, tld ); |
164 | } |
165 | |
166 | mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) |
167 | { |
168 | #ifdef MI_CACHE_DISABLE |
169 | return false; |
170 | #else |
171 | |
172 | // only for normal segment blocks |
173 | if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; |
174 | |
175 | // numa node determines start field |
176 | int numa_node = _mi_os_numa_node(NULL); |
177 | size_t start_field = 0; |
178 | if (numa_node > 0) { |
179 | start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; |
180 | if (start_field >= MI_CACHE_FIELDS) start_field = 0; |
181 | } |
182 | |
183 | // purge expired entries |
184 | mi_segment_cache_purge(false /* force? */, tld); |
185 | |
186 | // find an available slot |
187 | mi_bitmap_index_t bitidx; |
188 | bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); |
189 | if (!claimed) return false; |
190 | |
191 | mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); |
192 | mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); |
193 | #if MI_DEBUG>1 |
194 | if (is_pinned || is_large) { |
195 | mi_assert_internal(mi_commit_mask_is_full(commit_mask)); |
196 | } |
197 | #endif |
198 | |
199 | // set the slot |
200 | mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; |
201 | slot->p = start; |
202 | slot->memid = memid; |
203 | slot->is_pinned = is_pinned; |
204 | mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); |
205 | slot->commit_mask = *commit_mask; |
206 | slot->decommit_mask = *decommit_mask; |
207 | if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { |
208 | long delay = mi_option_get(mi_option_segment_decommit_delay); |
209 | if (delay == 0) { |
210 | _mi_abandoned_await_readers(); // wait until safe to decommit |
211 | mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); |
212 | mi_commit_mask_create_empty(&slot->decommit_mask); |
213 | } |
214 | else { |
215 | mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); |
216 | } |
217 | } |
218 | |
219 | // make it available |
220 | _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); |
221 | return true; |
222 | #endif |
223 | } |
224 | |
225 | |
226 | /* ----------------------------------------------------------- |
227 | The following functions are to reliably find the segment or |
228 | block that encompasses any pointer p (or NULL if it is not |
229 | in any of our segments). |
230 | We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) |
231 | set to 1 if it contains the segment meta data. |
232 | ----------------------------------------------------------- */ |
233 | |
234 | |
235 | #if (MI_INTPTR_SIZE==8) |
236 | #define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB |
237 | #else |
238 | #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb |
239 | #endif |
240 | |
241 | #define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) |
242 | #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) |
243 | #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) |
244 | |
245 | static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments |
246 | |
247 | static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { |
248 | mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? |
249 | if ((uintptr_t)segment >= MI_MAX_ADDRESS) { |
250 | *bitidx = 0; |
251 | return MI_SEGMENT_MAP_WSIZE; |
252 | } |
253 | else { |
254 | const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; |
255 | *bitidx = segindex % MI_INTPTR_BITS; |
256 | const size_t mapindex = segindex / MI_INTPTR_BITS; |
257 | mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); |
258 | return mapindex; |
259 | } |
260 | } |
261 | |
262 | void _mi_segment_map_allocated_at(const mi_segment_t* segment) { |
263 | size_t bitidx; |
264 | size_t index = mi_segment_map_index_of(segment, &bitidx); |
265 | mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); |
266 | if (index==MI_SEGMENT_MAP_WSIZE) return; |
267 | uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
268 | uintptr_t newmask; |
269 | do { |
270 | newmask = (mask | ((uintptr_t)1 << bitidx)); |
271 | } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); |
272 | } |
273 | |
274 | void _mi_segment_map_freed_at(const mi_segment_t* segment) { |
275 | size_t bitidx; |
276 | size_t index = mi_segment_map_index_of(segment, &bitidx); |
277 | mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); |
278 | if (index == MI_SEGMENT_MAP_WSIZE) return; |
279 | uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
280 | uintptr_t newmask; |
281 | do { |
282 | newmask = (mask & ~((uintptr_t)1 << bitidx)); |
283 | } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); |
284 | } |
285 | |
286 | // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. |
287 | static mi_segment_t* _mi_segment_of(const void* p) { |
288 | mi_segment_t* segment = _mi_ptr_segment(p); |
289 | if (segment == NULL) return NULL; |
290 | size_t bitidx; |
291 | size_t index = mi_segment_map_index_of(segment, &bitidx); |
292 | // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge |
293 | const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); |
294 | if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { |
295 | return segment; // yes, allocated by us |
296 | } |
297 | if (index==MI_SEGMENT_MAP_WSIZE) return NULL; |
298 | |
299 | // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? |
300 | |
301 | // search downwards for the first segment in case it is an interior pointer |
302 | // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough |
303 | // valid huge objects |
304 | // note: we could maintain a lowest index to speed up the path for invalid pointers? |
305 | size_t lobitidx; |
306 | size_t loindex; |
307 | uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); |
308 | if (lobits != 0) { |
309 | loindex = index; |
310 | lobitidx = mi_bsr(lobits); // lobits != 0 |
311 | } |
312 | else if (index == 0) { |
313 | return NULL; |
314 | } |
315 | else { |
316 | mi_assert_internal(index > 0); |
317 | uintptr_t lomask = mask; |
318 | loindex = index; |
319 | do { |
320 | loindex--; |
321 | lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); |
322 | } while (lomask != 0 && loindex > 0); |
323 | if (lomask == 0) return NULL; |
324 | lobitidx = mi_bsr(lomask); // lomask != 0 |
325 | } |
326 | mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); |
327 | // take difference as the addresses could be larger than the MAX_ADDRESS space. |
328 | size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; |
329 | segment = (mi_segment_t*)((uint8_t*)segment - diff); |
330 | |
331 | if (segment == NULL) return NULL; |
332 | mi_assert_internal((void*)segment < p); |
333 | bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); |
334 | mi_assert_internal(cookie_ok); |
335 | if mi_unlikely(!cookie_ok) return NULL; |
336 | if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range |
337 | mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); |
338 | return segment; |
339 | } |
340 | |
341 | // Is this a valid pointer in our heap? |
342 | static bool mi_is_valid_pointer(const void* p) { |
343 | return (_mi_segment_of(p) != NULL); |
344 | } |
345 | |
346 | mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { |
347 | return mi_is_valid_pointer(p); |
348 | } |
349 | |
350 | /* |
351 | // Return the full segment range belonging to a pointer |
352 | static void* mi_segment_range_of(const void* p, size_t* size) { |
353 | mi_segment_t* segment = _mi_segment_of(p); |
354 | if (segment == NULL) { |
355 | if (size != NULL) *size = 0; |
356 | return NULL; |
357 | } |
358 | else { |
359 | if (size != NULL) *size = segment->segment_size; |
360 | return segment; |
361 | } |
362 | mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); |
363 | mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); |
364 | mi_reset_delayed(tld); |
365 | mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); |
366 | return page; |
367 | } |
368 | */ |
369 | |