1#include "jemalloc/internal/jemalloc_preamble.h"
2#include "jemalloc/internal/jemalloc_internal_includes.h"
3
4#include "jemalloc/internal/decay.h"
5
6static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
7#define STEP(step, h, x, y) \
8 h,
9 SMOOTHSTEP
10#undef STEP
11};
12
13/*
14 * Generate a new deadline that is uniformly random within the next epoch after
15 * the current one.
16 */
17void
18decay_deadline_init(decay_t *decay) {
19 nstime_copy(&decay->deadline, &decay->epoch);
20 nstime_add(&decay->deadline, &decay->interval);
21 if (decay_ms_read(decay) > 0) {
22 nstime_t jitter;
23
24 nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
25 nstime_ns(&decay->interval)));
26 nstime_add(&decay->deadline, &jitter);
27 }
28}
29
30void
31decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
32 atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
33 if (decay_ms > 0) {
34 nstime_init(&decay->interval, (uint64_t)decay_ms *
35 KQU(1000000));
36 nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
37 }
38
39 nstime_copy(&decay->epoch, cur_time);
40 decay->jitter_state = (uint64_t)(uintptr_t)decay;
41 decay_deadline_init(decay);
42 decay->nunpurged = 0;
43 memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
44}
45
46bool
47decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
48 if (config_debug) {
49 for (size_t i = 0; i < sizeof(decay_t); i++) {
50 assert(((char *)decay)[i] == 0);
51 }
52 decay->ceil_npages = 0;
53 }
54 if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
55 malloc_mutex_rank_exclusive)) {
56 return true;
57 }
58 decay->purging = false;
59 decay_reinit(decay, cur_time, decay_ms);
60 return false;
61}
62
63bool
64decay_ms_valid(ssize_t decay_ms) {
65 if (decay_ms < -1) {
66 return false;
67 }
68 if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
69 KQU(1000)) {
70 return true;
71 }
72 return false;
73}
74
75static void
76decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
77 if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
78 new_time) > 0)) {
79 /*
80 * Time went backwards. Move the epoch back in time and
81 * generate a new deadline, with the expectation that time
82 * typically flows forward for long enough periods of time that
83 * epochs complete. Unfortunately, this strategy is susceptible
84 * to clock jitter triggering premature epoch advances, but
85 * clock jitter estimation and compensation isn't feasible here
86 * because calls into this code are event-driven.
87 */
88 nstime_copy(&decay->epoch, new_time);
89 decay_deadline_init(decay);
90 } else {
91 /* Verify that time does not go backwards. */
92 assert(nstime_compare(&decay->epoch, new_time) <= 0);
93 }
94}
95
96static size_t
97decay_backlog_npages_limit(const decay_t *decay) {
98 /*
99 * For each element of decay_backlog, multiply by the corresponding
100 * fixed-point smoothstep decay factor. Sum the products, then divide
101 * to round down to the nearest whole number of pages.
102 */
103 uint64_t sum = 0;
104 for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
105 sum += decay->backlog[i] * h_steps[i];
106 }
107 size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
108
109 return npages_limit_backlog;
110}
111
112/*
113 * Update backlog, assuming that 'nadvance_u64' time intervals have passed.
114 * Trailing 'nadvance_u64' records should be erased and 'current_npages' is
115 * placed as the newest record.
116 */
117static void
118decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
119 size_t current_npages) {
120 if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
121 memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
122 sizeof(size_t));
123 } else {
124 size_t nadvance_z = (size_t)nadvance_u64;
125
126 assert((uint64_t)nadvance_z == nadvance_u64);
127
128 memmove(decay->backlog, &decay->backlog[nadvance_z],
129 (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
130 if (nadvance_z > 1) {
131 memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
132 nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
133 }
134 }
135
136 size_t npages_delta = (current_npages > decay->nunpurged) ?
137 current_npages - decay->nunpurged : 0;
138 decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
139
140 if (config_debug) {
141 if (current_npages > decay->ceil_npages) {
142 decay->ceil_npages = current_npages;
143 }
144 size_t npages_limit = decay_backlog_npages_limit(decay);
145 assert(decay->ceil_npages >= npages_limit);
146 if (decay->ceil_npages > npages_limit) {
147 decay->ceil_npages = npages_limit;
148 }
149 }
150}
151
152static inline bool
153decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
154 return (nstime_compare(&decay->deadline, time) <= 0);
155}
156
157uint64_t
158decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
159 uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
160 size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
161
162 uint64_t npages_purge;
163 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
164 npages_purge = npages_new;
165 } else {
166 uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
167 assert(h_steps_max >=
168 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
169 npages_purge = npages_new * (h_steps_max -
170 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
171 npages_purge >>= SMOOTHSTEP_BFP;
172 }
173 return npages_purge;
174}
175
176bool
177decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
178 size_t npages_current) {
179 /* Handle possible non-monotonicity of time. */
180 decay_maybe_update_time(decay, new_time);
181
182 if (!decay_deadline_reached(decay, new_time)) {
183 return false;
184 }
185 nstime_t delta;
186 nstime_copy(&delta, new_time);
187 nstime_subtract(&delta, &decay->epoch);
188
189 uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
190 assert(nadvance_u64 > 0);
191
192 /* Add nadvance_u64 decay intervals to epoch. */
193 nstime_copy(&delta, &decay->interval);
194 nstime_imultiply(&delta, nadvance_u64);
195 nstime_add(&decay->epoch, &delta);
196
197 /* Set a new deadline. */
198 decay_deadline_init(decay);
199
200 /* Update the backlog. */
201 decay_backlog_update(decay, nadvance_u64, npages_current);
202
203 decay->npages_limit = decay_backlog_npages_limit(decay);
204 decay->nunpurged = (decay->npages_limit > npages_current) ?
205 decay->npages_limit : npages_current;
206
207 return true;
208}
209
210/*
211 * Calculate how many pages should be purged after 'interval'.
212 *
213 * First, calculate how many pages should remain at the moment, then subtract
214 * the number of pages that should remain after 'interval'. The difference is
215 * how many pages should be purged until then.
216 *
217 * The number of pages that should remain at a specific moment is calculated
218 * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval'
219 * passes, backlog would shift 'interval' positions to the left and sigmoid
220 * curve would be applied starting with backlog[interval].
221 *
222 * The implementation doesn't directly map to the description, but it's
223 * essentially the same calculation, optimized to avoid iterating over
224 * [interval..SMOOTHSTEP_NSTEPS) twice.
225 */
226static inline size_t
227decay_npurge_after_interval(decay_t *decay, size_t interval) {
228 size_t i;
229 uint64_t sum = 0;
230 for (i = 0; i < interval; i++) {
231 sum += decay->backlog[i] * h_steps[i];
232 }
233 for (; i < SMOOTHSTEP_NSTEPS; i++) {
234 sum += decay->backlog[i] *
235 (h_steps[i] - h_steps[i - interval]);
236 }
237
238 return (size_t)(sum >> SMOOTHSTEP_BFP);
239}
240
241uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
242 uint64_t npages_threshold) {
243 if (!decay_gradually(decay)) {
244 return DECAY_UNBOUNDED_TIME_TO_PURGE;
245 }
246 uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
247 assert(decay_interval_ns > 0);
248 if (npages_current == 0) {
249 unsigned i;
250 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
251 if (decay->backlog[i] > 0) {
252 break;
253 }
254 }
255 if (i == SMOOTHSTEP_NSTEPS) {
256 /* No dirty pages recorded. Sleep indefinitely. */
257 return DECAY_UNBOUNDED_TIME_TO_PURGE;
258 }
259 }
260 if (npages_current <= npages_threshold) {
261 /* Use max interval. */
262 return decay_interval_ns * SMOOTHSTEP_NSTEPS;
263 }
264
265 /* Minimal 2 intervals to ensure reaching next epoch deadline. */
266 size_t lb = 2;
267 size_t ub = SMOOTHSTEP_NSTEPS;
268
269 size_t npurge_lb, npurge_ub;
270 npurge_lb = decay_npurge_after_interval(decay, lb);
271 if (npurge_lb > npages_threshold) {
272 return decay_interval_ns * lb;
273 }
274 npurge_ub = decay_npurge_after_interval(decay, ub);
275 if (npurge_ub < npages_threshold) {
276 return decay_interval_ns * ub;
277 }
278
279 unsigned n_search = 0;
280 size_t target, npurge;
281 while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
282 target = (lb + ub) / 2;
283 npurge = decay_npurge_after_interval(decay, target);
284 if (npurge > npages_threshold) {
285 ub = target;
286 npurge_ub = npurge;
287 } else {
288 lb = target;
289 npurge_lb = npurge;
290 }
291 assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
292 ++n_search;
293 }
294 return decay_interval_ns * (ub + lb) / 2;
295}
296