1 | #ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H |
2 | #define JEMALLOC_INTERNAL_THREAD_EVENT_H |
3 | |
4 | #include "jemalloc/internal/tsd.h" |
5 | |
6 | /* "te" is short for "thread_event" */ |
7 | |
8 | /* |
9 | * TE_MIN_START_WAIT should not exceed the minimal allocation usize. |
10 | */ |
11 | #define TE_MIN_START_WAIT ((uint64_t)1U) |
12 | #define TE_MAX_START_WAIT UINT64_MAX |
13 | |
14 | /* |
15 | * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is |
16 | * no need to check overflow in malloc fast path. (The allocation size in malloc |
17 | * fast path never exceeds SC_LOOKUP_MAXCLASS.) |
18 | */ |
19 | #define TE_NEXT_EVENT_FAST_MAX (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U) |
20 | |
21 | /* |
22 | * The max interval helps make sure that malloc stays on the fast path in the |
23 | * common case, i.e. thread_allocated < thread_allocated_next_event_fast. When |
24 | * thread_allocated is within an event's distance to TE_NEXT_EVENT_FAST_MAX |
25 | * above, thread_allocated_next_event_fast is wrapped around and we fall back to |
26 | * the medium-fast path. The max interval makes sure that we're not staying on |
27 | * the fallback case for too long, even if there's no active event or if all |
28 | * active events have long wait times. |
29 | */ |
30 | #define TE_MAX_INTERVAL ((uint64_t)(4U << 20)) |
31 | |
32 | /* |
33 | * Invalid elapsed time, for situations where elapsed time is not needed. See |
34 | * comments in thread_event.c for more info. |
35 | */ |
36 | #define TE_INVALID_ELAPSED UINT64_MAX |
37 | |
38 | typedef struct te_ctx_s { |
39 | bool is_alloc; |
40 | uint64_t *current; |
41 | uint64_t *last_event; |
42 | uint64_t *next_event; |
43 | uint64_t *next_event_fast; |
44 | } te_ctx_t; |
45 | |
46 | void te_assert_invariants_debug(tsd_t *tsd); |
47 | void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx); |
48 | void te_recompute_fast_threshold(tsd_t *tsd); |
49 | void tsd_te_init(tsd_t *tsd); |
50 | |
51 | /* |
52 | * List of all events, in the following format: |
53 | * E(event, (condition), is_alloc_event) |
54 | */ |
55 | #define ITERATE_OVER_ALL_EVENTS \ |
56 | E(tcache_gc, (opt_tcache_gc_incr_bytes > 0), true) \ |
57 | E(prof_sample, (config_prof && opt_prof), true) \ |
58 | E(stats_interval, (opt_stats_interval >= 0), true) \ |
59 | E(tcache_gc_dalloc, (opt_tcache_gc_incr_bytes > 0), false) \ |
60 | E(peak_alloc, config_stats, true) \ |
61 | E(peak_dalloc, config_stats, false) |
62 | |
63 | #define E(event, condition_unused, is_alloc_event_unused) \ |
64 | C(event##_event_wait) |
65 | |
66 | /* List of all thread event counters. */ |
67 | #define ITERATE_OVER_ALL_COUNTERS \ |
68 | C(thread_allocated) \ |
69 | C(thread_allocated_last_event) \ |
70 | ITERATE_OVER_ALL_EVENTS \ |
71 | C(prof_sample_last_event) \ |
72 | C(stats_interval_last_event) |
73 | |
74 | /* Getters directly wrap TSD getters. */ |
75 | #define C(counter) \ |
76 | JEMALLOC_ALWAYS_INLINE uint64_t \ |
77 | counter##_get(tsd_t *tsd) { \ |
78 | return tsd_##counter##_get(tsd); \ |
79 | } |
80 | |
81 | ITERATE_OVER_ALL_COUNTERS |
82 | #undef C |
83 | |
84 | /* |
85 | * Setters call the TSD pointer getters rather than the TSD setters, so that |
86 | * the counters can be modified even when TSD state is reincarnated or |
87 | * minimal_initialized: if an event is triggered in such cases, we will |
88 | * temporarily delay the event and let it be immediately triggered at the next |
89 | * allocation call. |
90 | */ |
91 | #define C(counter) \ |
92 | JEMALLOC_ALWAYS_INLINE void \ |
93 | counter##_set(tsd_t *tsd, uint64_t v) { \ |
94 | *tsd_##counter##p_get(tsd) = v; \ |
95 | } |
96 | |
97 | ITERATE_OVER_ALL_COUNTERS |
98 | #undef C |
99 | |
100 | /* |
101 | * For generating _event_wait getter / setter functions for each individual |
102 | * event. |
103 | */ |
104 | #undef E |
105 | |
106 | /* |
107 | * The malloc and free fastpath getters -- use the unsafe getters since tsd may |
108 | * be non-nominal, in which case the fast_threshold will be set to 0. This |
109 | * allows checking for events and tsd non-nominal in a single branch. |
110 | * |
111 | * Note that these can only be used on the fastpath. |
112 | */ |
113 | JEMALLOC_ALWAYS_INLINE void |
114 | te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) { |
115 | *allocated = *tsd_thread_allocatedp_get_unsafe(tsd); |
116 | *threshold = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd); |
117 | assert(*threshold <= TE_NEXT_EVENT_FAST_MAX); |
118 | } |
119 | |
120 | JEMALLOC_ALWAYS_INLINE void |
121 | te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold) { |
122 | /* Unsafe getters since this may happen before tsd_init. */ |
123 | *deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd); |
124 | *threshold = *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd); |
125 | assert(*threshold <= TE_NEXT_EVENT_FAST_MAX); |
126 | } |
127 | |
128 | JEMALLOC_ALWAYS_INLINE bool |
129 | te_ctx_is_alloc(te_ctx_t *ctx) { |
130 | return ctx->is_alloc; |
131 | } |
132 | |
133 | JEMALLOC_ALWAYS_INLINE uint64_t |
134 | te_ctx_current_bytes_get(te_ctx_t *ctx) { |
135 | return *ctx->current; |
136 | } |
137 | |
138 | JEMALLOC_ALWAYS_INLINE void |
139 | te_ctx_current_bytes_set(te_ctx_t *ctx, uint64_t v) { |
140 | *ctx->current = v; |
141 | } |
142 | |
143 | JEMALLOC_ALWAYS_INLINE uint64_t |
144 | te_ctx_last_event_get(te_ctx_t *ctx) { |
145 | return *ctx->last_event; |
146 | } |
147 | |
148 | JEMALLOC_ALWAYS_INLINE void |
149 | te_ctx_last_event_set(te_ctx_t *ctx, uint64_t v) { |
150 | *ctx->last_event = v; |
151 | } |
152 | |
153 | /* Below 3 for next_event_fast. */ |
154 | JEMALLOC_ALWAYS_INLINE uint64_t |
155 | te_ctx_next_event_fast_get(te_ctx_t *ctx) { |
156 | uint64_t v = *ctx->next_event_fast; |
157 | assert(v <= TE_NEXT_EVENT_FAST_MAX); |
158 | return v; |
159 | } |
160 | |
161 | JEMALLOC_ALWAYS_INLINE void |
162 | te_ctx_next_event_fast_set(te_ctx_t *ctx, uint64_t v) { |
163 | assert(v <= TE_NEXT_EVENT_FAST_MAX); |
164 | *ctx->next_event_fast = v; |
165 | } |
166 | |
167 | JEMALLOC_ALWAYS_INLINE void |
168 | te_next_event_fast_set_non_nominal(tsd_t *tsd) { |
169 | /* |
170 | * Set the fast thresholds to zero when tsd is non-nominal. Use the |
171 | * unsafe getter as this may get called during tsd init and clean up. |
172 | */ |
173 | *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd) = 0; |
174 | *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) = 0; |
175 | } |
176 | |
177 | /* For next_event. Setter also updates the fast threshold. */ |
178 | JEMALLOC_ALWAYS_INLINE uint64_t |
179 | te_ctx_next_event_get(te_ctx_t *ctx) { |
180 | return *ctx->next_event; |
181 | } |
182 | |
183 | JEMALLOC_ALWAYS_INLINE void |
184 | te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) { |
185 | *ctx->next_event = v; |
186 | te_recompute_fast_threshold(tsd); |
187 | } |
188 | |
189 | /* |
190 | * The function checks in debug mode whether the thread event counters are in |
191 | * a consistent state, which forms the invariants before and after each round |
192 | * of thread event handling that we can rely on and need to promise. |
193 | * The invariants are only temporarily violated in the middle of |
194 | * te_event_advance() if an event is triggered (the te_event_trigger() call at |
195 | * the end will restore the invariants). |
196 | */ |
197 | JEMALLOC_ALWAYS_INLINE void |
198 | te_assert_invariants(tsd_t *tsd) { |
199 | if (config_debug) { |
200 | te_assert_invariants_debug(tsd); |
201 | } |
202 | } |
203 | |
204 | JEMALLOC_ALWAYS_INLINE void |
205 | te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) { |
206 | ctx->is_alloc = is_alloc; |
207 | if (is_alloc) { |
208 | ctx->current = tsd_thread_allocatedp_get(tsd); |
209 | ctx->last_event = tsd_thread_allocated_last_eventp_get(tsd); |
210 | ctx->next_event = tsd_thread_allocated_next_eventp_get(tsd); |
211 | ctx->next_event_fast = |
212 | tsd_thread_allocated_next_event_fastp_get(tsd); |
213 | } else { |
214 | ctx->current = tsd_thread_deallocatedp_get(tsd); |
215 | ctx->last_event = tsd_thread_deallocated_last_eventp_get(tsd); |
216 | ctx->next_event = tsd_thread_deallocated_next_eventp_get(tsd); |
217 | ctx->next_event_fast = |
218 | tsd_thread_deallocated_next_event_fastp_get(tsd); |
219 | } |
220 | } |
221 | |
222 | /* |
223 | * The lookahead functionality facilitates events to be able to lookahead, i.e. |
224 | * without touching the event counters, to determine whether an event would be |
225 | * triggered. The event counters are not advanced until the end of the |
226 | * allocation / deallocation calls, so the lookahead can be useful if some |
227 | * preparation work for some event must be done early in the allocation / |
228 | * deallocation calls. |
229 | * |
230 | * Currently only the profiling sampling event needs the lookahead |
231 | * functionality, so we don't yet define general purpose lookahead functions. |
232 | * |
233 | * Surplus is a terminology referring to the amount of bytes beyond what's |
234 | * needed for triggering an event, which can be a useful quantity to have in |
235 | * general when lookahead is being called. |
236 | */ |
237 | |
238 | JEMALLOC_ALWAYS_INLINE bool |
239 | te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize, |
240 | size_t *surplus) { |
241 | if (surplus != NULL) { |
242 | /* |
243 | * This is a dead store: the surplus will be overwritten before |
244 | * any read. The initialization suppresses compiler warnings. |
245 | * Meanwhile, using SIZE_MAX to initialize is good for |
246 | * debugging purpose, because a valid surplus value is strictly |
247 | * less than usize, which is at most SIZE_MAX. |
248 | */ |
249 | *surplus = SIZE_MAX; |
250 | } |
251 | if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) { |
252 | return false; |
253 | } |
254 | /* The subtraction is intentionally susceptible to underflow. */ |
255 | uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize - |
256 | tsd_thread_allocated_last_event_get(tsd); |
257 | uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd); |
258 | if (accumbytes < sample_wait) { |
259 | return false; |
260 | } |
261 | assert(accumbytes - sample_wait < (uint64_t)usize); |
262 | if (surplus != NULL) { |
263 | *surplus = (size_t)(accumbytes - sample_wait); |
264 | } |
265 | return true; |
266 | } |
267 | |
268 | JEMALLOC_ALWAYS_INLINE bool |
269 | te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) { |
270 | return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL); |
271 | } |
272 | |
273 | JEMALLOC_ALWAYS_INLINE void |
274 | te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) { |
275 | te_assert_invariants(tsd); |
276 | |
277 | te_ctx_t ctx; |
278 | te_ctx_get(tsd, &ctx, is_alloc); |
279 | |
280 | uint64_t bytes_before = te_ctx_current_bytes_get(&ctx); |
281 | te_ctx_current_bytes_set(&ctx, bytes_before + usize); |
282 | |
283 | /* The subtraction is intentionally susceptible to underflow. */ |
284 | if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) { |
285 | te_assert_invariants(tsd); |
286 | } else { |
287 | te_event_trigger(tsd, &ctx); |
288 | } |
289 | } |
290 | |
291 | JEMALLOC_ALWAYS_INLINE void |
292 | thread_dalloc_event(tsd_t *tsd, size_t usize) { |
293 | te_event_advance(tsd, usize, false); |
294 | } |
295 | |
296 | JEMALLOC_ALWAYS_INLINE void |
297 | thread_alloc_event(tsd_t *tsd, size_t usize) { |
298 | te_event_advance(tsd, usize, true); |
299 | } |
300 | |
301 | #endif /* JEMALLOC_INTERNAL_THREAD_EVENT_H */ |
302 | |