1 | #ifndef JEMALLOC_INTERNAL_DECAY_H |
2 | #define JEMALLOC_INTERNAL_DECAY_H |
3 | |
4 | #include "jemalloc/internal/smoothstep.h" |
5 | |
6 | #define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1) |
7 | |
8 | /* |
9 | * The decay_t computes the number of pages we should purge at any given time. |
10 | * Page allocators inform a decay object when pages enter a decay-able state |
11 | * (i.e. dirty or muzzy), and query it to determine how many pages should be |
12 | * purged at any given time. |
13 | * |
14 | * This is mostly a single-threaded data structure and doesn't care about |
15 | * synchronization at all; it's the caller's responsibility to manage their |
16 | * synchronization on their own. There are two exceptions: |
17 | * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query). |
18 | * 2) The mtx and purging fields live (and are initialized) here, but are |
19 | * logically owned by the page allocator. This is just a convenience (since |
20 | * those fields would be duplicated for both the dirty and muzzy states |
21 | * otherwise). |
22 | */ |
23 | typedef struct decay_s decay_t; |
24 | struct decay_s { |
25 | /* Synchronizes all non-atomic fields. */ |
26 | malloc_mutex_t mtx; |
27 | /* |
28 | * True if a thread is currently purging the extents associated with |
29 | * this decay structure. |
30 | */ |
31 | bool purging; |
32 | /* |
33 | * Approximate time in milliseconds from the creation of a set of unused |
34 | * dirty pages until an equivalent set of unused dirty pages is purged |
35 | * and/or reused. |
36 | */ |
37 | atomic_zd_t time_ms; |
38 | /* time / SMOOTHSTEP_NSTEPS. */ |
39 | nstime_t interval; |
40 | /* |
41 | * Time at which the current decay interval logically started. We do |
42 | * not actually advance to a new epoch until sometime after it starts |
43 | * because of scheduling and computation delays, and it is even possible |
44 | * to completely skip epochs. In all cases, during epoch advancement we |
45 | * merge all relevant activity into the most recently recorded epoch. |
46 | */ |
47 | nstime_t epoch; |
48 | /* Deadline randomness generator. */ |
49 | uint64_t jitter_state; |
50 | /* |
51 | * Deadline for current epoch. This is the sum of interval and per |
52 | * epoch jitter which is a uniform random variable in [0..interval). |
53 | * Epochs always advance by precise multiples of interval, but we |
54 | * randomize the deadline to reduce the likelihood of arenas purging in |
55 | * lockstep. |
56 | */ |
57 | nstime_t deadline; |
58 | /* |
59 | * The number of pages we cap ourselves at in the current epoch, per |
60 | * decay policies. Updated on an epoch change. After an epoch change, |
61 | * the caller should take steps to try to purge down to this amount. |
62 | */ |
63 | size_t npages_limit; |
64 | /* |
65 | * Number of unpurged pages at beginning of current epoch. During epoch |
66 | * advancement we use the delta between arena->decay_*.nunpurged and |
67 | * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages, |
68 | * if any, were generated. |
69 | */ |
70 | size_t nunpurged; |
71 | /* |
72 | * Trailing log of how many unused dirty pages were generated during |
73 | * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last |
74 | * element is the most recent epoch. Corresponding epoch times are |
75 | * relative to epoch. |
76 | * |
77 | * Updated only on epoch advance, triggered by |
78 | * decay_maybe_advance_epoch, below. |
79 | */ |
80 | size_t backlog[SMOOTHSTEP_NSTEPS]; |
81 | |
82 | /* Peak number of pages in associated extents. Used for debug only. */ |
83 | uint64_t ceil_npages; |
84 | }; |
85 | |
86 | /* |
87 | * The current decay time setting. This is the only public access to a decay_t |
88 | * that's allowed without holding mtx. |
89 | */ |
90 | static inline ssize_t |
91 | decay_ms_read(const decay_t *decay) { |
92 | return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); |
93 | } |
94 | |
95 | /* |
96 | * See the comment on the struct field -- the limit on pages we should allow in |
97 | * this decay state this epoch. |
98 | */ |
99 | static inline size_t |
100 | decay_npages_limit_get(const decay_t *decay) { |
101 | return decay->npages_limit; |
102 | } |
103 | |
104 | /* How many unused dirty pages were generated during the last epoch. */ |
105 | static inline size_t |
106 | decay_epoch_npages_delta(const decay_t *decay) { |
107 | return decay->backlog[SMOOTHSTEP_NSTEPS - 1]; |
108 | } |
109 | |
110 | /* |
111 | * Current epoch duration, in nanoseconds. Given that new epochs are started |
112 | * somewhat haphazardly, this is not necessarily exactly the time between any |
113 | * two calls to decay_maybe_advance_epoch; see the comments on fields in the |
114 | * decay_t. |
115 | */ |
116 | static inline uint64_t |
117 | decay_epoch_duration_ns(const decay_t *decay) { |
118 | return nstime_ns(&decay->interval); |
119 | } |
120 | |
121 | static inline bool |
122 | decay_immediately(const decay_t *decay) { |
123 | ssize_t decay_ms = decay_ms_read(decay); |
124 | return decay_ms == 0; |
125 | } |
126 | |
127 | static inline bool |
128 | decay_disabled(const decay_t *decay) { |
129 | ssize_t decay_ms = decay_ms_read(decay); |
130 | return decay_ms < 0; |
131 | } |
132 | |
133 | /* Returns true if decay is enabled and done gradually. */ |
134 | static inline bool |
135 | decay_gradually(const decay_t *decay) { |
136 | ssize_t decay_ms = decay_ms_read(decay); |
137 | return decay_ms > 0; |
138 | } |
139 | |
140 | /* |
141 | * Returns true if the passed in decay time setting is valid. |
142 | * < -1 : invalid |
143 | * -1 : never decay |
144 | * 0 : decay immediately |
145 | * > 0 : some positive decay time, up to a maximum allowed value of |
146 | * NSTIME_SEC_MAX * 1000, which corresponds to decaying somewhere in the early |
147 | * 27th century. By that time, we expect to have implemented alternate purging |
148 | * strategies. |
149 | */ |
150 | bool decay_ms_valid(ssize_t decay_ms); |
151 | |
152 | /* |
153 | * As a precondition, the decay_t must be zeroed out (as if with memset). |
154 | * |
155 | * Returns true on error. |
156 | */ |
157 | bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms); |
158 | |
159 | /* |
160 | * Given an already-initialized decay_t, reinitialize it with the given decay |
161 | * time. The decay_t must have previously been initialized (and should not then |
162 | * be zeroed). |
163 | */ |
164 | void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms); |
165 | |
166 | /* |
167 | * Compute how many of 'npages_new' pages we would need to purge in 'time'. |
168 | */ |
169 | uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time, |
170 | size_t npages_new); |
171 | |
172 | /* Returns true if the epoch advanced and there are pages to purge. */ |
173 | bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, |
174 | size_t current_npages); |
175 | |
176 | /* |
177 | * Calculates wait time until a number of pages in the interval |
178 | * [0.5 * npages_threshold .. 1.5 * npages_threshold] should be purged. |
179 | * |
180 | * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of |
181 | * indefinite wait. |
182 | */ |
183 | uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current, |
184 | uint64_t npages_threshold); |
185 | |
186 | #endif /* JEMALLOC_INTERNAL_DECAY_H */ |
187 | |