1#ifndef JEMALLOC_INTERNAL_INLINES_C_H
2#define JEMALLOC_INTERNAL_INLINES_C_H
3
4#include "jemalloc/internal/hook.h"
5#include "jemalloc/internal/jemalloc_internal_types.h"
6#include "jemalloc/internal/log.h"
7#include "jemalloc/internal/sz.h"
8#include "jemalloc/internal/thread_event.h"
9#include "jemalloc/internal/witness.h"
10
11/*
12 * Translating the names of the 'i' functions:
13 * Abbreviations used in the first part of the function name (before
14 * alloc/dalloc) describe what that function accomplishes:
15 * a: arena (query)
16 * s: size (query, or sized deallocation)
17 * e: extent (query)
18 * p: aligned (allocates)
19 * vs: size (query, without knowing that the pointer is into the heap)
20 * r: rallocx implementation
21 * x: xallocx implementation
22 * Abbreviations used in the second part of the function name (after
23 * alloc/dalloc) describe the arguments it takes
24 * z: whether to return zeroed memory
25 * t: accepts a tcache_t * parameter
26 * m: accepts an arena_t * parameter
27 */
28
29JEMALLOC_ALWAYS_INLINE arena_t *
30iaalloc(tsdn_t *tsdn, const void *ptr) {
31 assert(ptr != NULL);
32
33 return arena_aalloc(tsdn, ptr);
34}
35
36JEMALLOC_ALWAYS_INLINE size_t
37isalloc(tsdn_t *tsdn, const void *ptr) {
38 assert(ptr != NULL);
39
40 return arena_salloc(tsdn, ptr);
41}
42
43JEMALLOC_ALWAYS_INLINE void *
44iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
45 bool is_internal, arena_t *arena, bool slow_path) {
46 void *ret;
47
48 assert(!is_internal || tcache == NULL);
49 assert(!is_internal || arena == NULL || arena_is_auto(arena));
50 if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
51 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
52 WITNESS_RANK_CORE, 0);
53 }
54
55 ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
56 if (config_stats && is_internal && likely(ret != NULL)) {
57 arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
58 }
59 return ret;
60}
61
62JEMALLOC_ALWAYS_INLINE void *
63ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
64 return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
65 NULL, slow_path);
66}
67
68JEMALLOC_ALWAYS_INLINE void *
69ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
70 tcache_t *tcache, bool is_internal, arena_t *arena) {
71 void *ret;
72
73 assert(usize != 0);
74 assert(usize == sz_sa2u(usize, alignment));
75 assert(!is_internal || tcache == NULL);
76 assert(!is_internal || arena == NULL || arena_is_auto(arena));
77 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
78 WITNESS_RANK_CORE, 0);
79
80 ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
81 assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
82 if (config_stats && is_internal && likely(ret != NULL)) {
83 arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
84 }
85 return ret;
86}
87
88JEMALLOC_ALWAYS_INLINE void *
89ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
90 tcache_t *tcache, arena_t *arena) {
91 return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
92}
93
94JEMALLOC_ALWAYS_INLINE void *
95ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
96 return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
97 tcache_get(tsd), false, NULL);
98}
99
100JEMALLOC_ALWAYS_INLINE size_t
101ivsalloc(tsdn_t *tsdn, const void *ptr) {
102 return arena_vsalloc(tsdn, ptr);
103}
104
105JEMALLOC_ALWAYS_INLINE void
106idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
107 emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) {
108 assert(ptr != NULL);
109 assert(!is_internal || tcache == NULL);
110 assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
111 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
112 WITNESS_RANK_CORE, 0);
113 if (config_stats && is_internal) {
114 arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
115 }
116 if (!is_internal && !tsdn_null(tsdn) &&
117 tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
118 assert(tcache == NULL);
119 }
120 arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
121}
122
123JEMALLOC_ALWAYS_INLINE void
124idalloc(tsd_t *tsd, void *ptr) {
125 idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
126}
127
128JEMALLOC_ALWAYS_INLINE void
129isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
130 emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
131 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
132 WITNESS_RANK_CORE, 0);
133 arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
134}
135
136JEMALLOC_ALWAYS_INLINE void *
137iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
138 size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
139 hook_ralloc_args_t *hook_args) {
140 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
141 WITNESS_RANK_CORE, 0);
142 void *p;
143 size_t usize, copysize;
144
145 usize = sz_sa2u(size, alignment);
146 if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
147 return NULL;
148 }
149 p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
150 if (p == NULL) {
151 return NULL;
152 }
153 /*
154 * Copy at most size bytes (not size+extra), since the caller has no
155 * expectation that the extra bytes will be reliably preserved.
156 */
157 copysize = (size < oldsize) ? size : oldsize;
158 memcpy(p, ptr, copysize);
159 hook_invoke_alloc(hook_args->is_realloc
160 ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
161 hook_args->args);
162 hook_invoke_dalloc(hook_args->is_realloc
163 ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
164 isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
165 return p;
166}
167
168/*
169 * is_realloc threads through the knowledge of whether or not this call comes
170 * from je_realloc (as opposed to je_rallocx); this ensures that we pass the
171 * correct entry point into any hooks.
172 * Note that these functions are all force-inlined, so no actual bool gets
173 * passed-around anywhere.
174 */
175JEMALLOC_ALWAYS_INLINE void *
176iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
177 bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
178{
179 assert(ptr != NULL);
180 assert(size != 0);
181 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
182 WITNESS_RANK_CORE, 0);
183
184 if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
185 != 0) {
186 /*
187 * Existing object alignment is inadequate; allocate new space
188 * and copy.
189 */
190 return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
191 zero, tcache, arena, hook_args);
192 }
193
194 return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
195 tcache, hook_args);
196}
197
198JEMALLOC_ALWAYS_INLINE void *
199iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
200 bool zero, hook_ralloc_args_t *hook_args) {
201 return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
202 tcache_get(tsd), NULL, hook_args);
203}
204
205JEMALLOC_ALWAYS_INLINE bool
206ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
207 size_t alignment, bool zero, size_t *newsize) {
208 assert(ptr != NULL);
209 assert(size != 0);
210 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
211 WITNESS_RANK_CORE, 0);
212
213 if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
214 != 0) {
215 /* Existing object alignment is inadequate. */
216 *newsize = oldsize;
217 return true;
218 }
219
220 return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
221 newsize);
222}
223
224JEMALLOC_ALWAYS_INLINE void
225fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
226 cache_bin_t *bin, void *ret) {
227 thread_allocated_set(tsd, allocated_after);
228 if (config_stats) {
229 bin->tstats.nrequests++;
230 }
231
232 LOG("core.malloc.exit", "result: %p", ret);
233}
234
235JEMALLOC_ALWAYS_INLINE bool
236malloc_initialized(void) {
237 return (malloc_init_state == malloc_init_initialized);
238}
239
240/*
241 * malloc() fastpath. Included here so that we can inline it into operator new;
242 * function call overhead there is non-negligible as a fraction of total CPU in
243 * allocation-heavy C++ programs. We take the fallback alloc to allow malloc
244 * (which can return NULL) to differ in its behavior from operator new (which
245 * can't). It matches the signature of malloc / operator new so that we can
246 * tail-call the fallback allocator, allowing us to avoid setting up the call
247 * frame in the common case.
248 *
249 * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
250 * tcache. If either of these is false, we tail-call to the slowpath,
251 * malloc_default(). Tail-calling is used to avoid any caller-saved
252 * registers.
253 *
254 * fastpath supports ticker and profiling, both of which will also
255 * tail-call to the slowpath if they fire.
256 */
257JEMALLOC_ALWAYS_INLINE void *
258imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
259 LOG("core.malloc.entry", "size: %zu", size);
260 if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
261 return fallback_alloc(size);
262 }
263
264 tsd_t *tsd = tsd_get(false);
265 if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
266 return fallback_alloc(size);
267 }
268 /*
269 * The code below till the branch checking the next_event threshold may
270 * execute before malloc_init(), in which case the threshold is 0 to
271 * trigger slow path and initialization.
272 *
273 * Note that when uninitialized, only the fast-path variants of the sz /
274 * tsd facilities may be called.
275 */
276 szind_t ind;
277 /*
278 * The thread_allocated counter in tsd serves as a general purpose
279 * accumulator for bytes of allocation to trigger different types of
280 * events. usize is always needed to advance thread_allocated, though
281 * it's not always needed in the core allocation logic.
282 */
283 size_t usize;
284 sz_size2index_usize_fastpath(size, &ind, &usize);
285 /* Fast path relies on size being a bin. */
286 assert(ind < SC_NBINS);
287 assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
288 (size <= SC_SMALL_MAXCLASS));
289
290 uint64_t allocated, threshold;
291 te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
292 uint64_t allocated_after = allocated + usize;
293 /*
294 * The ind and usize might be uninitialized (or partially) before
295 * malloc_init(). The assertions check for: 1) full correctness (usize
296 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
297 * when !initialized.
298 */
299 if (!malloc_initialized()) {
300 assert(threshold == 0);
301 } else {
302 assert(ind == sz_size2index(size));
303 assert(usize > 0 && usize == sz_index2size(ind));
304 }
305 /*
306 * Check for events and tsd non-nominal (fast_threshold will be set to
307 * 0) in a single branch.
308 */
309 if (unlikely(allocated_after >= threshold)) {
310 return fallback_alloc(size);
311 }
312 assert(tsd_fast(tsd));
313
314 tcache_t *tcache = tsd_tcachep_get(tsd);
315 assert(tcache == tcache_get(tsd));
316 cache_bin_t *bin = &tcache->bins[ind];
317 bool tcache_success;
318 void *ret;
319
320 /*
321 * We split up the code this way so that redundant low-water
322 * computation doesn't happen on the (more common) case in which we
323 * don't touch the low water mark. The compiler won't do this
324 * duplication on its own.
325 */
326 ret = cache_bin_alloc_easy(bin, &tcache_success);
327 if (tcache_success) {
328 fastpath_success_finish(tsd, allocated_after, bin, ret);
329 return ret;
330 }
331 ret = cache_bin_alloc(bin, &tcache_success);
332 if (tcache_success) {
333 fastpath_success_finish(tsd, allocated_after, bin, ret);
334 return ret;
335 }
336
337 return fallback_alloc(size);
338}
339
340#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
341