1 | #include "jemalloc/internal/jemalloc_preamble.h" |
2 | |
3 | #include "jemalloc/internal/pages.h" |
4 | |
5 | #include "jemalloc/internal/jemalloc_internal_includes.h" |
6 | |
7 | #include "jemalloc/internal/assert.h" |
8 | #include "jemalloc/internal/malloc_io.h" |
9 | |
10 | #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT |
11 | #include <sys/sysctl.h> |
12 | #ifdef __FreeBSD__ |
13 | #include <vm/vm_param.h> |
14 | #endif |
15 | #endif |
16 | #ifdef __NetBSD__ |
17 | #include <sys/bitops.h> /* ilog2 */ |
18 | #endif |
19 | #ifdef JEMALLOC_HAVE_VM_MAKE_TAG |
20 | #define PAGES_FD_TAG VM_MAKE_TAG(101U) |
21 | #else |
22 | #define PAGES_FD_TAG -1 |
23 | #endif |
24 | #ifdef JEMALLOC_HAVE_PRCTL |
25 | #include <sys/prctl.h> |
26 | #ifndef PR_SET_VMA |
27 | #define PR_SET_VMA 0x53564d41 |
28 | #define PR_SET_VMA_ANON_NAME 0 |
29 | #endif |
30 | #endif |
31 | |
32 | /******************************************************************************/ |
33 | /* Data. */ |
34 | |
35 | /* Actual operating system page size, detected during bootstrap, <= PAGE. */ |
36 | static size_t os_page; |
37 | |
38 | #ifndef _WIN32 |
39 | # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) |
40 | # define PAGES_PROT_DECOMMIT (PROT_NONE) |
41 | static int mmap_flags; |
42 | #endif |
43 | static bool os_overcommits; |
44 | |
45 | const char *thp_mode_names[] = { |
46 | "default" , |
47 | "always" , |
48 | "never" , |
49 | "not supported" |
50 | }; |
51 | thp_mode_t opt_thp = THP_MODE_DEFAULT; |
52 | thp_mode_t init_system_thp_mode; |
53 | |
54 | /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ |
55 | static bool pages_can_purge_lazy_runtime = true; |
56 | |
57 | #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS |
58 | static int madvise_dont_need_zeros_is_faulty = -1; |
59 | /** |
60 | * Check that MADV_DONTNEED will actually zero pages on subsequent access. |
61 | * |
62 | * Since qemu does not support this, yet [1], and you can get very tricky |
63 | * assert if you will run program with jemalloc in use under qemu: |
64 | * |
65 | * <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" |
66 | * |
67 | * [1]: https://patchwork.kernel.org/patch/10576637/ |
68 | */ |
69 | static int madvise_MADV_DONTNEED_zeroes_pages() |
70 | { |
71 | int works = -1; |
72 | size_t size = PAGE; |
73 | |
74 | void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE, |
75 | MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
76 | |
77 | if (addr == MAP_FAILED) { |
78 | malloc_write("<jemalloc>: Cannot allocate memory for " |
79 | "MADV_DONTNEED check\n" ); |
80 | if (opt_abort) { |
81 | abort(); |
82 | } |
83 | } |
84 | |
85 | memset(addr, 'A', size); |
86 | if (madvise(addr, size, MADV_DONTNEED) == 0) { |
87 | works = memchr(addr, 'A', size) == NULL; |
88 | } else { |
89 | /* |
90 | * If madvise() does not support MADV_DONTNEED, then we can |
91 | * call it anyway, and use it's return code. |
92 | */ |
93 | works = 1; |
94 | } |
95 | |
96 | if (munmap(addr, size) != 0) { |
97 | malloc_write("<jemalloc>: Cannot deallocate memory for " |
98 | "MADV_DONTNEED check\n" ); |
99 | if (opt_abort) { |
100 | abort(); |
101 | } |
102 | } |
103 | |
104 | return works; |
105 | } |
106 | #endif |
107 | |
108 | #ifdef JEMALLOC_PAGEID |
109 | static int os_page_id(void *addr, size_t size, const char *name) |
110 | { |
111 | #ifdef JEMALLOC_HAVE_PRCTL |
112 | /* |
113 | * While parsing `/proc/<pid>/maps` file, the block could appear as |
114 | * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]` |
115 | */ |
116 | return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size, |
117 | (uintptr_t)name); |
118 | #else |
119 | return 0; |
120 | #endif |
121 | } |
122 | #endif |
123 | |
124 | /******************************************************************************/ |
125 | /* |
126 | * Function prototypes for static functions that are referenced prior to |
127 | * definition. |
128 | */ |
129 | |
130 | static void os_pages_unmap(void *addr, size_t size); |
131 | |
132 | /******************************************************************************/ |
133 | |
134 | static void * |
135 | os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { |
136 | assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); |
137 | assert(ALIGNMENT_CEILING(size, os_page) == size); |
138 | assert(size != 0); |
139 | |
140 | if (os_overcommits) { |
141 | *commit = true; |
142 | } |
143 | |
144 | void *ret; |
145 | #ifdef _WIN32 |
146 | /* |
147 | * If VirtualAlloc can't allocate at the given address when one is |
148 | * given, it fails and returns NULL. |
149 | */ |
150 | ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), |
151 | PAGE_READWRITE); |
152 | #else |
153 | /* |
154 | * We don't use MAP_FIXED here, because it can cause the *replacement* |
155 | * of existing mappings, and we only want to create new mappings. |
156 | */ |
157 | { |
158 | #ifdef __NetBSD__ |
159 | /* |
160 | * On NetBSD PAGE for a platform is defined to the |
161 | * maximum page size of all machine architectures |
162 | * for that platform, so that we can use the same |
163 | * binaries across all machine architectures. |
164 | */ |
165 | if (alignment > os_page || PAGE > os_page) { |
166 | unsigned int a = ilog2(MAX(alignment, PAGE)); |
167 | mmap_flags |= MAP_ALIGNED(a); |
168 | } |
169 | #endif |
170 | int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; |
171 | |
172 | ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0); |
173 | } |
174 | assert(ret != NULL); |
175 | |
176 | if (ret == MAP_FAILED) { |
177 | ret = NULL; |
178 | } else if (addr != NULL && ret != addr) { |
179 | /* |
180 | * We succeeded in mapping memory, but not in the right place. |
181 | */ |
182 | os_pages_unmap(ret, size); |
183 | ret = NULL; |
184 | } |
185 | #endif |
186 | assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && |
187 | ret == addr)); |
188 | #ifdef JEMALLOC_PAGEID |
189 | int n = os_page_id(ret, size, |
190 | os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg" ); |
191 | assert(n == 0 || (n == -1 && get_errno() == EINVAL)); |
192 | #endif |
193 | return ret; |
194 | } |
195 | |
196 | static void * |
197 | os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, |
198 | bool *commit) { |
199 | void *ret = (void *)((uintptr_t)addr + leadsize); |
200 | |
201 | assert(alloc_size >= leadsize + size); |
202 | #ifdef _WIN32 |
203 | os_pages_unmap(addr, alloc_size); |
204 | void *new_addr = os_pages_map(ret, size, PAGE, commit); |
205 | if (new_addr == ret) { |
206 | return ret; |
207 | } |
208 | if (new_addr != NULL) { |
209 | os_pages_unmap(new_addr, size); |
210 | } |
211 | return NULL; |
212 | #else |
213 | size_t trailsize = alloc_size - leadsize - size; |
214 | |
215 | if (leadsize != 0) { |
216 | os_pages_unmap(addr, leadsize); |
217 | } |
218 | if (trailsize != 0) { |
219 | os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); |
220 | } |
221 | return ret; |
222 | #endif |
223 | } |
224 | |
225 | static void |
226 | os_pages_unmap(void *addr, size_t size) { |
227 | assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); |
228 | assert(ALIGNMENT_CEILING(size, os_page) == size); |
229 | |
230 | #ifdef _WIN32 |
231 | if (VirtualFree(addr, 0, MEM_RELEASE) == 0) |
232 | #else |
233 | if (munmap(addr, size) == -1) |
234 | #endif |
235 | { |
236 | char buf[BUFERROR_BUF]; |
237 | |
238 | buferror(get_errno(), buf, sizeof(buf)); |
239 | malloc_printf("<jemalloc>: Error in " |
240 | #ifdef _WIN32 |
241 | "VirtualFree" |
242 | #else |
243 | "munmap" |
244 | #endif |
245 | "(): %s\n" , buf); |
246 | if (opt_abort) { |
247 | abort(); |
248 | } |
249 | } |
250 | } |
251 | |
252 | static void * |
253 | pages_map_slow(size_t size, size_t alignment, bool *commit) { |
254 | size_t alloc_size = size + alignment - os_page; |
255 | /* Beware size_t wrap-around. */ |
256 | if (alloc_size < size) { |
257 | return NULL; |
258 | } |
259 | |
260 | void *ret; |
261 | do { |
262 | void *pages = os_pages_map(NULL, alloc_size, alignment, commit); |
263 | if (pages == NULL) { |
264 | return NULL; |
265 | } |
266 | size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) |
267 | - (uintptr_t)pages; |
268 | ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); |
269 | } while (ret == NULL); |
270 | |
271 | assert(ret != NULL); |
272 | assert(PAGE_ADDR2BASE(ret) == ret); |
273 | return ret; |
274 | } |
275 | |
276 | void * |
277 | pages_map(void *addr, size_t size, size_t alignment, bool *commit) { |
278 | assert(alignment >= PAGE); |
279 | assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); |
280 | |
281 | #if defined(__FreeBSD__) && defined(MAP_EXCL) |
282 | /* |
283 | * FreeBSD has mechanisms both to mmap at specific address without |
284 | * touching existing mappings, and to mmap with specific alignment. |
285 | */ |
286 | { |
287 | if (os_overcommits) { |
288 | *commit = true; |
289 | } |
290 | |
291 | int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; |
292 | int flags = mmap_flags; |
293 | |
294 | if (addr != NULL) { |
295 | flags |= MAP_FIXED | MAP_EXCL; |
296 | } else { |
297 | unsigned alignment_bits = ffs_zu(alignment); |
298 | assert(alignment_bits > 0); |
299 | flags |= MAP_ALIGNED(alignment_bits); |
300 | } |
301 | |
302 | void *ret = mmap(addr, size, prot, flags, -1, 0); |
303 | if (ret == MAP_FAILED) { |
304 | ret = NULL; |
305 | } |
306 | |
307 | return ret; |
308 | } |
309 | #endif |
310 | /* |
311 | * Ideally, there would be a way to specify alignment to mmap() (like |
312 | * NetBSD has), but in the absence of such a feature, we have to work |
313 | * hard to efficiently create aligned mappings. The reliable, but |
314 | * slow method is to create a mapping that is over-sized, then trim the |
315 | * excess. However, that always results in one or two calls to |
316 | * os_pages_unmap(), and it can leave holes in the process's virtual |
317 | * memory map if memory grows downward. |
318 | * |
319 | * Optimistically try mapping precisely the right amount before falling |
320 | * back to the slow method, with the expectation that the optimistic |
321 | * approach works most of the time. |
322 | */ |
323 | |
324 | void *ret = os_pages_map(addr, size, os_page, commit); |
325 | if (ret == NULL || ret == addr) { |
326 | return ret; |
327 | } |
328 | assert(addr == NULL); |
329 | if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { |
330 | os_pages_unmap(ret, size); |
331 | return pages_map_slow(size, alignment, commit); |
332 | } |
333 | |
334 | assert(PAGE_ADDR2BASE(ret) == ret); |
335 | return ret; |
336 | } |
337 | |
338 | void |
339 | pages_unmap(void *addr, size_t size) { |
340 | assert(PAGE_ADDR2BASE(addr) == addr); |
341 | assert(PAGE_CEILING(size) == size); |
342 | |
343 | os_pages_unmap(addr, size); |
344 | } |
345 | |
346 | static bool |
347 | os_pages_commit(void *addr, size_t size, bool commit) { |
348 | assert(PAGE_ADDR2BASE(addr) == addr); |
349 | assert(PAGE_CEILING(size) == size); |
350 | |
351 | #ifdef _WIN32 |
352 | return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, |
353 | PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); |
354 | #else |
355 | { |
356 | int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; |
357 | void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, |
358 | PAGES_FD_TAG, 0); |
359 | if (result == MAP_FAILED) { |
360 | return true; |
361 | } |
362 | if (result != addr) { |
363 | /* |
364 | * We succeeded in mapping memory, but not in the right |
365 | * place. |
366 | */ |
367 | os_pages_unmap(result, size); |
368 | return true; |
369 | } |
370 | return false; |
371 | } |
372 | #endif |
373 | } |
374 | |
375 | static bool |
376 | pages_commit_impl(void *addr, size_t size, bool commit) { |
377 | if (os_overcommits) { |
378 | return true; |
379 | } |
380 | |
381 | return os_pages_commit(addr, size, commit); |
382 | } |
383 | |
384 | bool |
385 | pages_commit(void *addr, size_t size) { |
386 | return pages_commit_impl(addr, size, true); |
387 | } |
388 | |
389 | bool |
390 | pages_decommit(void *addr, size_t size) { |
391 | return pages_commit_impl(addr, size, false); |
392 | } |
393 | |
394 | void |
395 | pages_mark_guards(void *head, void *tail) { |
396 | assert(head != NULL || tail != NULL); |
397 | assert(head == NULL || tail == NULL || |
398 | (uintptr_t)head < (uintptr_t)tail); |
399 | #ifdef JEMALLOC_HAVE_MPROTECT |
400 | if (head != NULL) { |
401 | mprotect(head, PAGE, PROT_NONE); |
402 | } |
403 | if (tail != NULL) { |
404 | mprotect(tail, PAGE, PROT_NONE); |
405 | } |
406 | #else |
407 | /* Decommit sets to PROT_NONE / MEM_DECOMMIT. */ |
408 | if (head != NULL) { |
409 | os_pages_commit(head, PAGE, false); |
410 | } |
411 | if (tail != NULL) { |
412 | os_pages_commit(tail, PAGE, false); |
413 | } |
414 | #endif |
415 | } |
416 | |
417 | void |
418 | pages_unmark_guards(void *head, void *tail) { |
419 | assert(head != NULL || tail != NULL); |
420 | assert(head == NULL || tail == NULL || |
421 | (uintptr_t)head < (uintptr_t)tail); |
422 | #ifdef JEMALLOC_HAVE_MPROTECT |
423 | bool head_and_tail = (head != NULL) && (tail != NULL); |
424 | size_t range = head_and_tail ? |
425 | (uintptr_t)tail - (uintptr_t)head + PAGE : |
426 | SIZE_T_MAX; |
427 | /* |
428 | * The amount of work that the kernel does in mprotect depends on the |
429 | * range argument. SC_LARGE_MINCLASS is an arbitrary threshold chosen |
430 | * to prevent kernel from doing too much work that would outweigh the |
431 | * savings of performing one less system call. |
432 | */ |
433 | bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS; |
434 | if (ranged_mprotect) { |
435 | mprotect(head, range, PROT_READ | PROT_WRITE); |
436 | } else { |
437 | if (head != NULL) { |
438 | mprotect(head, PAGE, PROT_READ | PROT_WRITE); |
439 | } |
440 | if (tail != NULL) { |
441 | mprotect(tail, PAGE, PROT_READ | PROT_WRITE); |
442 | } |
443 | } |
444 | #else |
445 | if (head != NULL) { |
446 | os_pages_commit(head, PAGE, true); |
447 | } |
448 | if (tail != NULL) { |
449 | os_pages_commit(tail, PAGE, true); |
450 | } |
451 | #endif |
452 | } |
453 | |
454 | bool |
455 | pages_purge_lazy(void *addr, size_t size) { |
456 | assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); |
457 | assert(PAGE_CEILING(size) == size); |
458 | |
459 | if (!pages_can_purge_lazy) { |
460 | return true; |
461 | } |
462 | if (!pages_can_purge_lazy_runtime) { |
463 | /* |
464 | * Built with lazy purge enabled, but detected it was not |
465 | * supported on the current system. |
466 | */ |
467 | return true; |
468 | } |
469 | |
470 | #ifdef _WIN32 |
471 | VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); |
472 | return false; |
473 | #elif defined(JEMALLOC_PURGE_MADVISE_FREE) |
474 | return (madvise(addr, size, |
475 | # ifdef MADV_FREE |
476 | MADV_FREE |
477 | # else |
478 | JEMALLOC_MADV_FREE |
479 | # endif |
480 | ) != 0); |
481 | #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ |
482 | !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) |
483 | return (madvise(addr, size, MADV_DONTNEED) != 0); |
484 | #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ |
485 | !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) |
486 | return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); |
487 | #else |
488 | not_reached(); |
489 | #endif |
490 | } |
491 | |
492 | bool |
493 | pages_purge_forced(void *addr, size_t size) { |
494 | assert(PAGE_ADDR2BASE(addr) == addr); |
495 | assert(PAGE_CEILING(size) == size); |
496 | |
497 | if (!pages_can_purge_forced) { |
498 | return true; |
499 | } |
500 | |
501 | #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ |
502 | defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) |
503 | return (unlikely(madvise_dont_need_zeros_is_faulty) || |
504 | madvise(addr, size, MADV_DONTNEED) != 0); |
505 | #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ |
506 | defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) |
507 | return (unlikely(madvise_dont_need_zeros_is_faulty) || |
508 | posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); |
509 | #elif defined(JEMALLOC_MAPS_COALESCE) |
510 | /* Try to overlay a new demand-zeroed mapping. */ |
511 | return pages_commit(addr, size); |
512 | #else |
513 | not_reached(); |
514 | #endif |
515 | } |
516 | |
517 | static bool |
518 | pages_huge_impl(void *addr, size_t size, bool aligned) { |
519 | if (aligned) { |
520 | assert(HUGEPAGE_ADDR2BASE(addr) == addr); |
521 | assert(HUGEPAGE_CEILING(size) == size); |
522 | } |
523 | #if defined(JEMALLOC_HAVE_MADVISE_HUGE) |
524 | return (madvise(addr, size, MADV_HUGEPAGE) != 0); |
525 | #elif defined(JEMALLOC_HAVE_MEMCNTL) |
526 | struct memcntl_mha m = {0}; |
527 | m.mha_cmd = MHA_MAPSIZE_VA; |
528 | m.mha_pagesize = HUGEPAGE; |
529 | return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0); |
530 | #else |
531 | return true; |
532 | #endif |
533 | } |
534 | |
535 | bool |
536 | pages_huge(void *addr, size_t size) { |
537 | return pages_huge_impl(addr, size, true); |
538 | } |
539 | |
540 | static bool |
541 | pages_huge_unaligned(void *addr, size_t size) { |
542 | return pages_huge_impl(addr, size, false); |
543 | } |
544 | |
545 | static bool |
546 | pages_nohuge_impl(void *addr, size_t size, bool aligned) { |
547 | if (aligned) { |
548 | assert(HUGEPAGE_ADDR2BASE(addr) == addr); |
549 | assert(HUGEPAGE_CEILING(size) == size); |
550 | } |
551 | |
552 | #ifdef JEMALLOC_HAVE_MADVISE_HUGE |
553 | return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); |
554 | #else |
555 | return false; |
556 | #endif |
557 | } |
558 | |
559 | bool |
560 | pages_nohuge(void *addr, size_t size) { |
561 | return pages_nohuge_impl(addr, size, true); |
562 | } |
563 | |
564 | static bool |
565 | pages_nohuge_unaligned(void *addr, size_t size) { |
566 | return pages_nohuge_impl(addr, size, false); |
567 | } |
568 | |
569 | bool |
570 | pages_dontdump(void *addr, size_t size) { |
571 | assert(PAGE_ADDR2BASE(addr) == addr); |
572 | assert(PAGE_CEILING(size) == size); |
573 | #if defined(JEMALLOC_MADVISE_DONTDUMP) |
574 | return madvise(addr, size, MADV_DONTDUMP) != 0; |
575 | #elif defined(JEMALLOC_MADVISE_NOCORE) |
576 | return madvise(addr, size, MADV_NOCORE) != 0; |
577 | #else |
578 | return false; |
579 | #endif |
580 | } |
581 | |
582 | bool |
583 | pages_dodump(void *addr, size_t size) { |
584 | assert(PAGE_ADDR2BASE(addr) == addr); |
585 | assert(PAGE_CEILING(size) == size); |
586 | #if defined(JEMALLOC_MADVISE_DONTDUMP) |
587 | return madvise(addr, size, MADV_DODUMP) != 0; |
588 | #elif defined(JEMALLOC_MADVISE_NOCORE) |
589 | return madvise(addr, size, MADV_CORE) != 0; |
590 | #else |
591 | return false; |
592 | #endif |
593 | } |
594 | |
595 | |
596 | static size_t |
597 | os_page_detect(void) { |
598 | #ifdef _WIN32 |
599 | SYSTEM_INFO si; |
600 | GetSystemInfo(&si); |
601 | return si.dwPageSize; |
602 | #elif defined(__FreeBSD__) |
603 | /* |
604 | * This returns the value obtained from |
605 | * the auxv vector, avoiding a syscall. |
606 | */ |
607 | return getpagesize(); |
608 | #else |
609 | long result = sysconf(_SC_PAGESIZE); |
610 | if (result == -1) { |
611 | return LG_PAGE; |
612 | } |
613 | return (size_t)result; |
614 | #endif |
615 | } |
616 | |
617 | #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT |
618 | static bool |
619 | os_overcommits_sysctl(void) { |
620 | int vm_overcommit; |
621 | size_t sz; |
622 | |
623 | sz = sizeof(vm_overcommit); |
624 | #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) |
625 | int mib[2]; |
626 | |
627 | mib[0] = CTL_VM; |
628 | mib[1] = VM_OVERCOMMIT; |
629 | if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { |
630 | return false; /* Error. */ |
631 | } |
632 | #else |
633 | if (sysctlbyname("vm.overcommit" , &vm_overcommit, &sz, NULL, 0) != 0) { |
634 | return false; /* Error. */ |
635 | } |
636 | #endif |
637 | |
638 | return ((vm_overcommit & 0x3) == 0); |
639 | } |
640 | #endif |
641 | |
642 | #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY |
643 | /* |
644 | * Use syscall(2) rather than {open,read,close}(2) when possible to avoid |
645 | * reentry during bootstrapping if another library has interposed system call |
646 | * wrappers. |
647 | */ |
648 | static bool |
649 | os_overcommits_proc(void) { |
650 | int fd; |
651 | char buf[1]; |
652 | |
653 | #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) |
654 | #if defined(O_CLOEXEC) |
655 | fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory" , O_RDONLY | |
656 | O_CLOEXEC); |
657 | #else |
658 | fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory" , O_RDONLY); |
659 | if (fd != -1) { |
660 | fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); |
661 | } |
662 | #endif |
663 | #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) |
664 | #if defined(O_CLOEXEC) |
665 | fd = (int)syscall(SYS_openat, |
666 | AT_FDCWD, "/proc/sys/vm/overcommit_memory" , O_RDONLY | O_CLOEXEC); |
667 | #else |
668 | fd = (int)syscall(SYS_openat, |
669 | AT_FDCWD, "/proc/sys/vm/overcommit_memory" , O_RDONLY); |
670 | if (fd != -1) { |
671 | fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); |
672 | } |
673 | #endif |
674 | #else |
675 | #if defined(O_CLOEXEC) |
676 | fd = open("/proc/sys/vm/overcommit_memory" , O_RDONLY | O_CLOEXEC); |
677 | #else |
678 | fd = open("/proc/sys/vm/overcommit_memory" , O_RDONLY); |
679 | if (fd != -1) { |
680 | fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); |
681 | } |
682 | #endif |
683 | #endif |
684 | |
685 | if (fd == -1) { |
686 | return false; /* Error. */ |
687 | } |
688 | |
689 | ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); |
690 | #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) |
691 | syscall(SYS_close, fd); |
692 | #else |
693 | close(fd); |
694 | #endif |
695 | |
696 | if (nread < 1) { |
697 | return false; /* Error. */ |
698 | } |
699 | /* |
700 | * /proc/sys/vm/overcommit_memory meanings: |
701 | * 0: Heuristic overcommit. |
702 | * 1: Always overcommit. |
703 | * 2: Never overcommit. |
704 | */ |
705 | return (buf[0] == '0' || buf[0] == '1'); |
706 | } |
707 | #endif |
708 | |
709 | void |
710 | pages_set_thp_state (void *ptr, size_t size) { |
711 | if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { |
712 | return; |
713 | } |
714 | assert(opt_thp != thp_mode_not_supported && |
715 | init_system_thp_mode != thp_mode_not_supported); |
716 | |
717 | if (opt_thp == thp_mode_always |
718 | && init_system_thp_mode != thp_mode_never) { |
719 | assert(init_system_thp_mode == thp_mode_default); |
720 | pages_huge_unaligned(ptr, size); |
721 | } else if (opt_thp == thp_mode_never) { |
722 | assert(init_system_thp_mode == thp_mode_default || |
723 | init_system_thp_mode == thp_mode_always); |
724 | pages_nohuge_unaligned(ptr, size); |
725 | } |
726 | } |
727 | |
728 | static void |
729 | init_thp_state(void) { |
730 | if (!have_madvise_huge && !have_memcntl) { |
731 | if (metadata_thp_enabled() && opt_abort) { |
732 | malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n" ); |
733 | abort(); |
734 | } |
735 | goto label_error; |
736 | } |
737 | #if defined(JEMALLOC_HAVE_MADVISE_HUGE) |
738 | static const char sys_state_madvise[] = "always [madvise] never\n" ; |
739 | static const char sys_state_always[] = "[always] madvise never\n" ; |
740 | static const char sys_state_never[] = "always madvise [never]\n" ; |
741 | char buf[sizeof(sys_state_madvise)]; |
742 | |
743 | #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) |
744 | int fd = (int)syscall(SYS_open, |
745 | "/sys/kernel/mm/transparent_hugepage/enabled" , O_RDONLY); |
746 | #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) |
747 | int fd = (int)syscall(SYS_openat, |
748 | AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled" , O_RDONLY); |
749 | #else |
750 | int fd = open("/sys/kernel/mm/transparent_hugepage/enabled" , O_RDONLY); |
751 | #endif |
752 | if (fd == -1) { |
753 | goto label_error; |
754 | } |
755 | |
756 | ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); |
757 | #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) |
758 | syscall(SYS_close, fd); |
759 | #else |
760 | close(fd); |
761 | #endif |
762 | |
763 | if (nread < 0) { |
764 | goto label_error; |
765 | } |
766 | |
767 | if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { |
768 | init_system_thp_mode = thp_mode_default; |
769 | } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { |
770 | init_system_thp_mode = thp_mode_always; |
771 | } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { |
772 | init_system_thp_mode = thp_mode_never; |
773 | } else { |
774 | goto label_error; |
775 | } |
776 | return; |
777 | #elif defined(JEMALLOC_HAVE_MEMCNTL) |
778 | init_system_thp_mode = thp_mode_default; |
779 | return; |
780 | #endif |
781 | label_error: |
782 | opt_thp = init_system_thp_mode = thp_mode_not_supported; |
783 | } |
784 | |
785 | bool |
786 | pages_boot(void) { |
787 | os_page = os_page_detect(); |
788 | if (os_page > PAGE) { |
789 | malloc_write("<jemalloc>: Unsupported system page size\n" ); |
790 | if (opt_abort) { |
791 | abort(); |
792 | } |
793 | return true; |
794 | } |
795 | |
796 | #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS |
797 | if (!opt_trust_madvise) { |
798 | madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages(); |
799 | if (madvise_dont_need_zeros_is_faulty) { |
800 | malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n" ); |
801 | malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n" ); |
802 | } |
803 | } else { |
804 | /* In case opt_trust_madvise is disable, |
805 | * do not do runtime check */ |
806 | madvise_dont_need_zeros_is_faulty = 0; |
807 | } |
808 | #endif |
809 | |
810 | #ifndef _WIN32 |
811 | mmap_flags = MAP_PRIVATE | MAP_ANON; |
812 | #endif |
813 | |
814 | #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT |
815 | os_overcommits = os_overcommits_sysctl(); |
816 | #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) |
817 | os_overcommits = os_overcommits_proc(); |
818 | # ifdef MAP_NORESERVE |
819 | if (os_overcommits) { |
820 | mmap_flags |= MAP_NORESERVE; |
821 | } |
822 | # endif |
823 | #elif defined(__NetBSD__) |
824 | os_overcommits = true; |
825 | #else |
826 | os_overcommits = false; |
827 | #endif |
828 | |
829 | init_thp_state(); |
830 | |
831 | #ifdef __FreeBSD__ |
832 | /* |
833 | * FreeBSD doesn't need the check; madvise(2) is known to work. |
834 | */ |
835 | #else |
836 | /* Detect lazy purge runtime support. */ |
837 | if (pages_can_purge_lazy) { |
838 | bool committed = false; |
839 | void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); |
840 | if (madv_free_page == NULL) { |
841 | return true; |
842 | } |
843 | assert(pages_can_purge_lazy_runtime); |
844 | if (pages_purge_lazy(madv_free_page, PAGE)) { |
845 | pages_can_purge_lazy_runtime = false; |
846 | } |
847 | os_pages_unmap(madv_free_page, PAGE); |
848 | } |
849 | #endif |
850 | |
851 | return false; |
852 | } |
853 | |