1#include "jemalloc/internal/jemalloc_preamble.h"
2
3#include "jemalloc/internal/pages.h"
4
5#include "jemalloc/internal/jemalloc_internal_includes.h"
6
7#include "jemalloc/internal/assert.h"
8#include "jemalloc/internal/malloc_io.h"
9
10#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
11#include <sys/sysctl.h>
12#ifdef __FreeBSD__
13#include <vm/vm_param.h>
14#endif
15#endif
16#ifdef __NetBSD__
17#include <sys/bitops.h> /* ilog2 */
18#endif
19#ifdef JEMALLOC_HAVE_VM_MAKE_TAG
20#define PAGES_FD_TAG VM_MAKE_TAG(101U)
21#else
22#define PAGES_FD_TAG -1
23#endif
24#ifdef JEMALLOC_HAVE_PRCTL
25#include <sys/prctl.h>
26#ifndef PR_SET_VMA
27#define PR_SET_VMA 0x53564d41
28#define PR_SET_VMA_ANON_NAME 0
29#endif
30#endif
31
32/******************************************************************************/
33/* Data. */
34
35/* Actual operating system page size, detected during bootstrap, <= PAGE. */
36static size_t os_page;
37
38#ifndef _WIN32
39# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
40# define PAGES_PROT_DECOMMIT (PROT_NONE)
41static int mmap_flags;
42#endif
43static bool os_overcommits;
44
45const char *thp_mode_names[] = {
46 "default",
47 "always",
48 "never",
49 "not supported"
50};
51thp_mode_t opt_thp = THP_MODE_DEFAULT;
52thp_mode_t init_system_thp_mode;
53
54/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
55static bool pages_can_purge_lazy_runtime = true;
56
57#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
58static int madvise_dont_need_zeros_is_faulty = -1;
59/**
60 * Check that MADV_DONTNEED will actually zero pages on subsequent access.
61 *
62 * Since qemu does not support this, yet [1], and you can get very tricky
63 * assert if you will run program with jemalloc in use under qemu:
64 *
65 * <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
66 *
67 * [1]: https://patchwork.kernel.org/patch/10576637/
68 */
69static int madvise_MADV_DONTNEED_zeroes_pages()
70{
71 int works = -1;
72 size_t size = PAGE;
73
74 void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
75 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
76
77 if (addr == MAP_FAILED) {
78 malloc_write("<jemalloc>: Cannot allocate memory for "
79 "MADV_DONTNEED check\n");
80 if (opt_abort) {
81 abort();
82 }
83 }
84
85 memset(addr, 'A', size);
86 if (madvise(addr, size, MADV_DONTNEED) == 0) {
87 works = memchr(addr, 'A', size) == NULL;
88 } else {
89 /*
90 * If madvise() does not support MADV_DONTNEED, then we can
91 * call it anyway, and use it's return code.
92 */
93 works = 1;
94 }
95
96 if (munmap(addr, size) != 0) {
97 malloc_write("<jemalloc>: Cannot deallocate memory for "
98 "MADV_DONTNEED check\n");
99 if (opt_abort) {
100 abort();
101 }
102 }
103
104 return works;
105}
106#endif
107
108#ifdef JEMALLOC_PAGEID
109static int os_page_id(void *addr, size_t size, const char *name)
110{
111#ifdef JEMALLOC_HAVE_PRCTL
112 /*
113 * While parsing `/proc/<pid>/maps` file, the block could appear as
114 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
115 */
116 return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
117 (uintptr_t)name);
118#else
119 return 0;
120#endif
121}
122#endif
123
124/******************************************************************************/
125/*
126 * Function prototypes for static functions that are referenced prior to
127 * definition.
128 */
129
130static void os_pages_unmap(void *addr, size_t size);
131
132/******************************************************************************/
133
134static void *
135os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
136 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
137 assert(ALIGNMENT_CEILING(size, os_page) == size);
138 assert(size != 0);
139
140 if (os_overcommits) {
141 *commit = true;
142 }
143
144 void *ret;
145#ifdef _WIN32
146 /*
147 * If VirtualAlloc can't allocate at the given address when one is
148 * given, it fails and returns NULL.
149 */
150 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
151 PAGE_READWRITE);
152#else
153 /*
154 * We don't use MAP_FIXED here, because it can cause the *replacement*
155 * of existing mappings, and we only want to create new mappings.
156 */
157 {
158#ifdef __NetBSD__
159 /*
160 * On NetBSD PAGE for a platform is defined to the
161 * maximum page size of all machine architectures
162 * for that platform, so that we can use the same
163 * binaries across all machine architectures.
164 */
165 if (alignment > os_page || PAGE > os_page) {
166 unsigned int a = ilog2(MAX(alignment, PAGE));
167 mmap_flags |= MAP_ALIGNED(a);
168 }
169#endif
170 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
171
172 ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
173 }
174 assert(ret != NULL);
175
176 if (ret == MAP_FAILED) {
177 ret = NULL;
178 } else if (addr != NULL && ret != addr) {
179 /*
180 * We succeeded in mapping memory, but not in the right place.
181 */
182 os_pages_unmap(ret, size);
183 ret = NULL;
184 }
185#endif
186 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
187 ret == addr));
188#ifdef JEMALLOC_PAGEID
189 int n = os_page_id(ret, size,
190 os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
191 assert(n == 0 || (n == -1 && get_errno() == EINVAL));
192#endif
193 return ret;
194}
195
196static void *
197os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
198 bool *commit) {
199 void *ret = (void *)((uintptr_t)addr + leadsize);
200
201 assert(alloc_size >= leadsize + size);
202#ifdef _WIN32
203 os_pages_unmap(addr, alloc_size);
204 void *new_addr = os_pages_map(ret, size, PAGE, commit);
205 if (new_addr == ret) {
206 return ret;
207 }
208 if (new_addr != NULL) {
209 os_pages_unmap(new_addr, size);
210 }
211 return NULL;
212#else
213 size_t trailsize = alloc_size - leadsize - size;
214
215 if (leadsize != 0) {
216 os_pages_unmap(addr, leadsize);
217 }
218 if (trailsize != 0) {
219 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
220 }
221 return ret;
222#endif
223}
224
225static void
226os_pages_unmap(void *addr, size_t size) {
227 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
228 assert(ALIGNMENT_CEILING(size, os_page) == size);
229
230#ifdef _WIN32
231 if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
232#else
233 if (munmap(addr, size) == -1)
234#endif
235 {
236 char buf[BUFERROR_BUF];
237
238 buferror(get_errno(), buf, sizeof(buf));
239 malloc_printf("<jemalloc>: Error in "
240#ifdef _WIN32
241 "VirtualFree"
242#else
243 "munmap"
244#endif
245 "(): %s\n", buf);
246 if (opt_abort) {
247 abort();
248 }
249 }
250}
251
252static void *
253pages_map_slow(size_t size, size_t alignment, bool *commit) {
254 size_t alloc_size = size + alignment - os_page;
255 /* Beware size_t wrap-around. */
256 if (alloc_size < size) {
257 return NULL;
258 }
259
260 void *ret;
261 do {
262 void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
263 if (pages == NULL) {
264 return NULL;
265 }
266 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
267 - (uintptr_t)pages;
268 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
269 } while (ret == NULL);
270
271 assert(ret != NULL);
272 assert(PAGE_ADDR2BASE(ret) == ret);
273 return ret;
274}
275
276void *
277pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
278 assert(alignment >= PAGE);
279 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
280
281#if defined(__FreeBSD__) && defined(MAP_EXCL)
282 /*
283 * FreeBSD has mechanisms both to mmap at specific address without
284 * touching existing mappings, and to mmap with specific alignment.
285 */
286 {
287 if (os_overcommits) {
288 *commit = true;
289 }
290
291 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
292 int flags = mmap_flags;
293
294 if (addr != NULL) {
295 flags |= MAP_FIXED | MAP_EXCL;
296 } else {
297 unsigned alignment_bits = ffs_zu(alignment);
298 assert(alignment_bits > 0);
299 flags |= MAP_ALIGNED(alignment_bits);
300 }
301
302 void *ret = mmap(addr, size, prot, flags, -1, 0);
303 if (ret == MAP_FAILED) {
304 ret = NULL;
305 }
306
307 return ret;
308 }
309#endif
310 /*
311 * Ideally, there would be a way to specify alignment to mmap() (like
312 * NetBSD has), but in the absence of such a feature, we have to work
313 * hard to efficiently create aligned mappings. The reliable, but
314 * slow method is to create a mapping that is over-sized, then trim the
315 * excess. However, that always results in one or two calls to
316 * os_pages_unmap(), and it can leave holes in the process's virtual
317 * memory map if memory grows downward.
318 *
319 * Optimistically try mapping precisely the right amount before falling
320 * back to the slow method, with the expectation that the optimistic
321 * approach works most of the time.
322 */
323
324 void *ret = os_pages_map(addr, size, os_page, commit);
325 if (ret == NULL || ret == addr) {
326 return ret;
327 }
328 assert(addr == NULL);
329 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
330 os_pages_unmap(ret, size);
331 return pages_map_slow(size, alignment, commit);
332 }
333
334 assert(PAGE_ADDR2BASE(ret) == ret);
335 return ret;
336}
337
338void
339pages_unmap(void *addr, size_t size) {
340 assert(PAGE_ADDR2BASE(addr) == addr);
341 assert(PAGE_CEILING(size) == size);
342
343 os_pages_unmap(addr, size);
344}
345
346static bool
347os_pages_commit(void *addr, size_t size, bool commit) {
348 assert(PAGE_ADDR2BASE(addr) == addr);
349 assert(PAGE_CEILING(size) == size);
350
351#ifdef _WIN32
352 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
353 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
354#else
355 {
356 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
357 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
358 PAGES_FD_TAG, 0);
359 if (result == MAP_FAILED) {
360 return true;
361 }
362 if (result != addr) {
363 /*
364 * We succeeded in mapping memory, but not in the right
365 * place.
366 */
367 os_pages_unmap(result, size);
368 return true;
369 }
370 return false;
371 }
372#endif
373}
374
375static bool
376pages_commit_impl(void *addr, size_t size, bool commit) {
377 if (os_overcommits) {
378 return true;
379 }
380
381 return os_pages_commit(addr, size, commit);
382}
383
384bool
385pages_commit(void *addr, size_t size) {
386 return pages_commit_impl(addr, size, true);
387}
388
389bool
390pages_decommit(void *addr, size_t size) {
391 return pages_commit_impl(addr, size, false);
392}
393
394void
395pages_mark_guards(void *head, void *tail) {
396 assert(head != NULL || tail != NULL);
397 assert(head == NULL || tail == NULL ||
398 (uintptr_t)head < (uintptr_t)tail);
399#ifdef JEMALLOC_HAVE_MPROTECT
400 if (head != NULL) {
401 mprotect(head, PAGE, PROT_NONE);
402 }
403 if (tail != NULL) {
404 mprotect(tail, PAGE, PROT_NONE);
405 }
406#else
407 /* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
408 if (head != NULL) {
409 os_pages_commit(head, PAGE, false);
410 }
411 if (tail != NULL) {
412 os_pages_commit(tail, PAGE, false);
413 }
414#endif
415}
416
417void
418pages_unmark_guards(void *head, void *tail) {
419 assert(head != NULL || tail != NULL);
420 assert(head == NULL || tail == NULL ||
421 (uintptr_t)head < (uintptr_t)tail);
422#ifdef JEMALLOC_HAVE_MPROTECT
423 bool head_and_tail = (head != NULL) && (tail != NULL);
424 size_t range = head_and_tail ?
425 (uintptr_t)tail - (uintptr_t)head + PAGE :
426 SIZE_T_MAX;
427 /*
428 * The amount of work that the kernel does in mprotect depends on the
429 * range argument. SC_LARGE_MINCLASS is an arbitrary threshold chosen
430 * to prevent kernel from doing too much work that would outweigh the
431 * savings of performing one less system call.
432 */
433 bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS;
434 if (ranged_mprotect) {
435 mprotect(head, range, PROT_READ | PROT_WRITE);
436 } else {
437 if (head != NULL) {
438 mprotect(head, PAGE, PROT_READ | PROT_WRITE);
439 }
440 if (tail != NULL) {
441 mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
442 }
443 }
444#else
445 if (head != NULL) {
446 os_pages_commit(head, PAGE, true);
447 }
448 if (tail != NULL) {
449 os_pages_commit(tail, PAGE, true);
450 }
451#endif
452}
453
454bool
455pages_purge_lazy(void *addr, size_t size) {
456 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
457 assert(PAGE_CEILING(size) == size);
458
459 if (!pages_can_purge_lazy) {
460 return true;
461 }
462 if (!pages_can_purge_lazy_runtime) {
463 /*
464 * Built with lazy purge enabled, but detected it was not
465 * supported on the current system.
466 */
467 return true;
468 }
469
470#ifdef _WIN32
471 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
472 return false;
473#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
474 return (madvise(addr, size,
475# ifdef MADV_FREE
476 MADV_FREE
477# else
478 JEMALLOC_MADV_FREE
479# endif
480 ) != 0);
481#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
482 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
483 return (madvise(addr, size, MADV_DONTNEED) != 0);
484#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
485 !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
486 return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
487#else
488 not_reached();
489#endif
490}
491
492bool
493pages_purge_forced(void *addr, size_t size) {
494 assert(PAGE_ADDR2BASE(addr) == addr);
495 assert(PAGE_CEILING(size) == size);
496
497 if (!pages_can_purge_forced) {
498 return true;
499 }
500
501#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
502 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
503 return (unlikely(madvise_dont_need_zeros_is_faulty) ||
504 madvise(addr, size, MADV_DONTNEED) != 0);
505#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
506 defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
507 return (unlikely(madvise_dont_need_zeros_is_faulty) ||
508 posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
509#elif defined(JEMALLOC_MAPS_COALESCE)
510 /* Try to overlay a new demand-zeroed mapping. */
511 return pages_commit(addr, size);
512#else
513 not_reached();
514#endif
515}
516
517static bool
518pages_huge_impl(void *addr, size_t size, bool aligned) {
519 if (aligned) {
520 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
521 assert(HUGEPAGE_CEILING(size) == size);
522 }
523#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
524 return (madvise(addr, size, MADV_HUGEPAGE) != 0);
525#elif defined(JEMALLOC_HAVE_MEMCNTL)
526 struct memcntl_mha m = {0};
527 m.mha_cmd = MHA_MAPSIZE_VA;
528 m.mha_pagesize = HUGEPAGE;
529 return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0);
530#else
531 return true;
532#endif
533}
534
535bool
536pages_huge(void *addr, size_t size) {
537 return pages_huge_impl(addr, size, true);
538}
539
540static bool
541pages_huge_unaligned(void *addr, size_t size) {
542 return pages_huge_impl(addr, size, false);
543}
544
545static bool
546pages_nohuge_impl(void *addr, size_t size, bool aligned) {
547 if (aligned) {
548 assert(HUGEPAGE_ADDR2BASE(addr) == addr);
549 assert(HUGEPAGE_CEILING(size) == size);
550 }
551
552#ifdef JEMALLOC_HAVE_MADVISE_HUGE
553 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
554#else
555 return false;
556#endif
557}
558
559bool
560pages_nohuge(void *addr, size_t size) {
561 return pages_nohuge_impl(addr, size, true);
562}
563
564static bool
565pages_nohuge_unaligned(void *addr, size_t size) {
566 return pages_nohuge_impl(addr, size, false);
567}
568
569bool
570pages_dontdump(void *addr, size_t size) {
571 assert(PAGE_ADDR2BASE(addr) == addr);
572 assert(PAGE_CEILING(size) == size);
573#if defined(JEMALLOC_MADVISE_DONTDUMP)
574 return madvise(addr, size, MADV_DONTDUMP) != 0;
575#elif defined(JEMALLOC_MADVISE_NOCORE)
576 return madvise(addr, size, MADV_NOCORE) != 0;
577#else
578 return false;
579#endif
580}
581
582bool
583pages_dodump(void *addr, size_t size) {
584 assert(PAGE_ADDR2BASE(addr) == addr);
585 assert(PAGE_CEILING(size) == size);
586#if defined(JEMALLOC_MADVISE_DONTDUMP)
587 return madvise(addr, size, MADV_DODUMP) != 0;
588#elif defined(JEMALLOC_MADVISE_NOCORE)
589 return madvise(addr, size, MADV_CORE) != 0;
590#else
591 return false;
592#endif
593}
594
595
596static size_t
597os_page_detect(void) {
598#ifdef _WIN32
599 SYSTEM_INFO si;
600 GetSystemInfo(&si);
601 return si.dwPageSize;
602#elif defined(__FreeBSD__)
603 /*
604 * This returns the value obtained from
605 * the auxv vector, avoiding a syscall.
606 */
607 return getpagesize();
608#else
609 long result = sysconf(_SC_PAGESIZE);
610 if (result == -1) {
611 return LG_PAGE;
612 }
613 return (size_t)result;
614#endif
615}
616
617#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
618static bool
619os_overcommits_sysctl(void) {
620 int vm_overcommit;
621 size_t sz;
622
623 sz = sizeof(vm_overcommit);
624#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
625 int mib[2];
626
627 mib[0] = CTL_VM;
628 mib[1] = VM_OVERCOMMIT;
629 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
630 return false; /* Error. */
631 }
632#else
633 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
634 return false; /* Error. */
635 }
636#endif
637
638 return ((vm_overcommit & 0x3) == 0);
639}
640#endif
641
642#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
643/*
644 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
645 * reentry during bootstrapping if another library has interposed system call
646 * wrappers.
647 */
648static bool
649os_overcommits_proc(void) {
650 int fd;
651 char buf[1];
652
653#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
654 #if defined(O_CLOEXEC)
655 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
656 O_CLOEXEC);
657 #else
658 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
659 if (fd != -1) {
660 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
661 }
662 #endif
663#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
664 #if defined(O_CLOEXEC)
665 fd = (int)syscall(SYS_openat,
666 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
667 #else
668 fd = (int)syscall(SYS_openat,
669 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
670 if (fd != -1) {
671 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
672 }
673 #endif
674#else
675 #if defined(O_CLOEXEC)
676 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
677 #else
678 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
679 if (fd != -1) {
680 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
681 }
682 #endif
683#endif
684
685 if (fd == -1) {
686 return false; /* Error. */
687 }
688
689 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
690#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
691 syscall(SYS_close, fd);
692#else
693 close(fd);
694#endif
695
696 if (nread < 1) {
697 return false; /* Error. */
698 }
699 /*
700 * /proc/sys/vm/overcommit_memory meanings:
701 * 0: Heuristic overcommit.
702 * 1: Always overcommit.
703 * 2: Never overcommit.
704 */
705 return (buf[0] == '0' || buf[0] == '1');
706}
707#endif
708
709void
710pages_set_thp_state (void *ptr, size_t size) {
711 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
712 return;
713 }
714 assert(opt_thp != thp_mode_not_supported &&
715 init_system_thp_mode != thp_mode_not_supported);
716
717 if (opt_thp == thp_mode_always
718 && init_system_thp_mode != thp_mode_never) {
719 assert(init_system_thp_mode == thp_mode_default);
720 pages_huge_unaligned(ptr, size);
721 } else if (opt_thp == thp_mode_never) {
722 assert(init_system_thp_mode == thp_mode_default ||
723 init_system_thp_mode == thp_mode_always);
724 pages_nohuge_unaligned(ptr, size);
725 }
726}
727
728static void
729init_thp_state(void) {
730 if (!have_madvise_huge && !have_memcntl) {
731 if (metadata_thp_enabled() && opt_abort) {
732 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
733 abort();
734 }
735 goto label_error;
736 }
737#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
738 static const char sys_state_madvise[] = "always [madvise] never\n";
739 static const char sys_state_always[] = "[always] madvise never\n";
740 static const char sys_state_never[] = "always madvise [never]\n";
741 char buf[sizeof(sys_state_madvise)];
742
743#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
744 int fd = (int)syscall(SYS_open,
745 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
746#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
747 int fd = (int)syscall(SYS_openat,
748 AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
749#else
750 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
751#endif
752 if (fd == -1) {
753 goto label_error;
754 }
755
756 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
757#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
758 syscall(SYS_close, fd);
759#else
760 close(fd);
761#endif
762
763 if (nread < 0) {
764 goto label_error;
765 }
766
767 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
768 init_system_thp_mode = thp_mode_default;
769 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
770 init_system_thp_mode = thp_mode_always;
771 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
772 init_system_thp_mode = thp_mode_never;
773 } else {
774 goto label_error;
775 }
776 return;
777#elif defined(JEMALLOC_HAVE_MEMCNTL)
778 init_system_thp_mode = thp_mode_default;
779 return;
780#endif
781label_error:
782 opt_thp = init_system_thp_mode = thp_mode_not_supported;
783}
784
785bool
786pages_boot(void) {
787 os_page = os_page_detect();
788 if (os_page > PAGE) {
789 malloc_write("<jemalloc>: Unsupported system page size\n");
790 if (opt_abort) {
791 abort();
792 }
793 return true;
794 }
795
796#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
797 if (!opt_trust_madvise) {
798 madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
799 if (madvise_dont_need_zeros_is_faulty) {
800 malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
801 malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
802 }
803 } else {
804 /* In case opt_trust_madvise is disable,
805 * do not do runtime check */
806 madvise_dont_need_zeros_is_faulty = 0;
807 }
808#endif
809
810#ifndef _WIN32
811 mmap_flags = MAP_PRIVATE | MAP_ANON;
812#endif
813
814#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
815 os_overcommits = os_overcommits_sysctl();
816#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
817 os_overcommits = os_overcommits_proc();
818# ifdef MAP_NORESERVE
819 if (os_overcommits) {
820 mmap_flags |= MAP_NORESERVE;
821 }
822# endif
823#elif defined(__NetBSD__)
824 os_overcommits = true;
825#else
826 os_overcommits = false;
827#endif
828
829 init_thp_state();
830
831#ifdef __FreeBSD__
832 /*
833 * FreeBSD doesn't need the check; madvise(2) is known to work.
834 */
835#else
836 /* Detect lazy purge runtime support. */
837 if (pages_can_purge_lazy) {
838 bool committed = false;
839 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
840 if (madv_free_page == NULL) {
841 return true;
842 }
843 assert(pages_can_purge_lazy_runtime);
844 if (pages_purge_lazy(madv_free_page, PAGE)) {
845 pages_can_purge_lazy_runtime = false;
846 }
847 os_pages_unmap(madv_free_page, PAGE);
848 }
849#endif
850
851 return false;
852}
853