1 | #include "jemalloc/internal/jemalloc_preamble.h" |
2 | |
3 | #include "jemalloc/internal/assert.h" |
4 | #include "jemalloc/internal/bit_util.h" |
5 | #include "jemalloc/internal/bitmap.h" |
6 | #include "jemalloc/internal/pages.h" |
7 | #include "jemalloc/internal/sc.h" |
8 | |
9 | /* |
10 | * This module computes the size classes used to satisfy allocations. The logic |
11 | * here was ported more or less line-by-line from a shell script, and because of |
12 | * that is not the most idiomatic C. Eventually we should fix this, but for now |
13 | * at least the damage is compartmentalized to this file. |
14 | */ |
15 | |
16 | size_t |
17 | reg_size_compute(int lg_base, int lg_delta, int ndelta) { |
18 | return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta); |
19 | } |
20 | |
21 | /* Returns the number of pages in the slab. */ |
22 | static int |
23 | slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) { |
24 | size_t page = (ZU(1) << lg_page); |
25 | size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta); |
26 | |
27 | size_t try_slab_size = page; |
28 | size_t try_nregs = try_slab_size / reg_size; |
29 | size_t perfect_slab_size = 0; |
30 | bool perfect = false; |
31 | /* |
32 | * This loop continues until we find the least common multiple of the |
33 | * page size and size class size. Size classes are all of the form |
34 | * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is |
35 | * (ndelta + ngroup) * delta. The way we choose slabbing strategies |
36 | * means that delta is at most the page size and ndelta < ngroup. So |
37 | * the loop executes for at most 2 * ngroup - 1 iterations, which is |
38 | * also the bound on the number of pages in a slab chosen by default. |
39 | * With the current default settings, this is at most 7. |
40 | */ |
41 | while (!perfect) { |
42 | perfect_slab_size = try_slab_size; |
43 | size_t perfect_nregs = try_nregs; |
44 | try_slab_size += page; |
45 | try_nregs = try_slab_size / reg_size; |
46 | if (perfect_slab_size == perfect_nregs * reg_size) { |
47 | perfect = true; |
48 | } |
49 | } |
50 | return (int)(perfect_slab_size / page); |
51 | } |
52 | |
53 | static void |
54 | size_class( |
55 | /* Output. */ |
56 | sc_t *sc, |
57 | /* Configuration decisions. */ |
58 | int lg_max_lookup, int lg_page, int lg_ngroup, |
59 | /* Inputs specific to the size class. */ |
60 | int index, int lg_base, int lg_delta, int ndelta) { |
61 | sc->index = index; |
62 | sc->lg_base = lg_base; |
63 | sc->lg_delta = lg_delta; |
64 | sc->ndelta = ndelta; |
65 | size_t size = reg_size_compute(lg_base, lg_delta, ndelta); |
66 | sc->psz = (size % (ZU(1) << lg_page) == 0); |
67 | if (index == 0) { |
68 | assert(!sc->psz); |
69 | } |
70 | if (size < (ZU(1) << (lg_page + lg_ngroup))) { |
71 | sc->bin = true; |
72 | sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta); |
73 | } else { |
74 | sc->bin = false; |
75 | sc->pgs = 0; |
76 | } |
77 | if (size <= (ZU(1) << lg_max_lookup)) { |
78 | sc->lg_delta_lookup = lg_delta; |
79 | } else { |
80 | sc->lg_delta_lookup = 0; |
81 | } |
82 | } |
83 | |
84 | static void |
85 | size_classes( |
86 | /* Output. */ |
87 | sc_data_t *sc_data, |
88 | /* Determined by the system. */ |
89 | size_t lg_ptr_size, int lg_quantum, |
90 | /* Configuration decisions. */ |
91 | int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) { |
92 | int ptr_bits = (1 << lg_ptr_size) * 8; |
93 | int ngroup = (1 << lg_ngroup); |
94 | int ntiny = 0; |
95 | int nlbins = 0; |
96 | int lg_tiny_maxclass = (unsigned)-1; |
97 | int nbins = 0; |
98 | int npsizes = 0; |
99 | |
100 | int index = 0; |
101 | |
102 | int ndelta = 0; |
103 | int lg_base = lg_tiny_min; |
104 | int lg_delta = lg_base; |
105 | |
106 | /* Outputs that we update as we go. */ |
107 | size_t lookup_maxclass = 0; |
108 | size_t small_maxclass = 0; |
109 | int lg_large_minclass = 0; |
110 | size_t large_maxclass = 0; |
111 | |
112 | /* Tiny size classes. */ |
113 | while (lg_base < lg_quantum) { |
114 | sc_t *sc = &sc_data->sc[index]; |
115 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
116 | lg_base, lg_delta, ndelta); |
117 | if (sc->lg_delta_lookup != 0) { |
118 | nlbins = index + 1; |
119 | } |
120 | if (sc->psz) { |
121 | npsizes++; |
122 | } |
123 | if (sc->bin) { |
124 | nbins++; |
125 | } |
126 | ntiny++; |
127 | /* Final written value is correct. */ |
128 | lg_tiny_maxclass = lg_base; |
129 | index++; |
130 | lg_delta = lg_base; |
131 | lg_base++; |
132 | } |
133 | |
134 | /* First non-tiny (pseudo) group. */ |
135 | if (ntiny != 0) { |
136 | sc_t *sc = &sc_data->sc[index]; |
137 | /* |
138 | * See the note in sc.h; the first non-tiny size class has an |
139 | * unusual encoding. |
140 | */ |
141 | lg_base--; |
142 | ndelta = 1; |
143 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
144 | lg_base, lg_delta, ndelta); |
145 | index++; |
146 | lg_base++; |
147 | lg_delta++; |
148 | if (sc->psz) { |
149 | npsizes++; |
150 | } |
151 | if (sc->bin) { |
152 | nbins++; |
153 | } |
154 | } |
155 | while (ndelta < ngroup) { |
156 | sc_t *sc = &sc_data->sc[index]; |
157 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
158 | lg_base, lg_delta, ndelta); |
159 | index++; |
160 | ndelta++; |
161 | if (sc->psz) { |
162 | npsizes++; |
163 | } |
164 | if (sc->bin) { |
165 | nbins++; |
166 | } |
167 | } |
168 | |
169 | /* All remaining groups. */ |
170 | lg_base = lg_base + lg_ngroup; |
171 | while (lg_base < ptr_bits - 1) { |
172 | ndelta = 1; |
173 | int ndelta_limit; |
174 | if (lg_base == ptr_bits - 2) { |
175 | ndelta_limit = ngroup - 1; |
176 | } else { |
177 | ndelta_limit = ngroup; |
178 | } |
179 | while (ndelta <= ndelta_limit) { |
180 | sc_t *sc = &sc_data->sc[index]; |
181 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
182 | lg_base, lg_delta, ndelta); |
183 | if (sc->lg_delta_lookup != 0) { |
184 | nlbins = index + 1; |
185 | /* Final written value is correct. */ |
186 | lookup_maxclass = (ZU(1) << lg_base) |
187 | + (ZU(ndelta) << lg_delta); |
188 | } |
189 | if (sc->psz) { |
190 | npsizes++; |
191 | } |
192 | if (sc->bin) { |
193 | nbins++; |
194 | /* Final written value is correct. */ |
195 | small_maxclass = (ZU(1) << lg_base) |
196 | + (ZU(ndelta) << lg_delta); |
197 | if (lg_ngroup > 0) { |
198 | lg_large_minclass = lg_base + 1; |
199 | } else { |
200 | lg_large_minclass = lg_base + 2; |
201 | } |
202 | } |
203 | large_maxclass = (ZU(1) << lg_base) |
204 | + (ZU(ndelta) << lg_delta); |
205 | index++; |
206 | ndelta++; |
207 | } |
208 | lg_base++; |
209 | lg_delta++; |
210 | } |
211 | /* Additional outputs. */ |
212 | int nsizes = index; |
213 | unsigned lg_ceil_nsizes = lg_ceil(nsizes); |
214 | |
215 | /* Fill in the output data. */ |
216 | sc_data->ntiny = ntiny; |
217 | sc_data->nlbins = nlbins; |
218 | sc_data->nbins = nbins; |
219 | sc_data->nsizes = nsizes; |
220 | sc_data->lg_ceil_nsizes = lg_ceil_nsizes; |
221 | sc_data->npsizes = npsizes; |
222 | sc_data->lg_tiny_maxclass = lg_tiny_maxclass; |
223 | sc_data->lookup_maxclass = lookup_maxclass; |
224 | sc_data->small_maxclass = small_maxclass; |
225 | sc_data->lg_large_minclass = lg_large_minclass; |
226 | sc_data->large_minclass = (ZU(1) << lg_large_minclass); |
227 | sc_data->large_maxclass = large_maxclass; |
228 | |
229 | /* |
230 | * We compute these values in two ways: |
231 | * - Incrementally, as above. |
232 | * - In macros, in sc.h. |
233 | * The computation is easier when done incrementally, but putting it in |
234 | * a constant makes it available to the fast paths without having to |
235 | * touch the extra global cacheline. We assert, however, that the two |
236 | * computations are equivalent. |
237 | */ |
238 | assert(sc_data->npsizes == SC_NPSIZES); |
239 | assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS); |
240 | assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS); |
241 | assert(sc_data->large_minclass == SC_LARGE_MINCLASS); |
242 | assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS); |
243 | assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS); |
244 | |
245 | /* |
246 | * In the allocation fastpath, we want to assume that we can |
247 | * unconditionally subtract the requested allocation size from |
248 | * a ssize_t, and detect passing through 0 correctly. This |
249 | * results in optimal generated code. For this to work, the |
250 | * maximum allocation size must be less than SSIZE_MAX. |
251 | */ |
252 | assert(SC_LARGE_MAXCLASS < SSIZE_MAX); |
253 | } |
254 | |
255 | void |
256 | sc_data_init(sc_data_t *sc_data) { |
257 | size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN, |
258 | SC_LG_MAX_LOOKUP, LG_PAGE, SC_LG_NGROUP); |
259 | |
260 | sc_data->initialized = true; |
261 | } |
262 | |
263 | static void |
264 | sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) { |
265 | size_t min_pgs = reg_size / PAGE; |
266 | if (reg_size % PAGE != 0) { |
267 | min_pgs++; |
268 | } |
269 | /* |
270 | * BITMAP_MAXBITS is actually determined by putting the smallest |
271 | * possible size-class on one page, so this can never be 0. |
272 | */ |
273 | size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE; |
274 | |
275 | assert(min_pgs <= max_pgs); |
276 | assert(min_pgs > 0); |
277 | assert(max_pgs >= 1); |
278 | if (pgs_guess < min_pgs) { |
279 | sc->pgs = (int)min_pgs; |
280 | } else if (pgs_guess > max_pgs) { |
281 | sc->pgs = (int)max_pgs; |
282 | } else { |
283 | sc->pgs = (int)pgs_guess; |
284 | } |
285 | } |
286 | |
287 | void |
288 | sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) { |
289 | assert(data->initialized); |
290 | for (int i = 0; i < data->nsizes; i++) { |
291 | sc_t *sc = &data->sc[i]; |
292 | if (!sc->bin) { |
293 | break; |
294 | } |
295 | size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta, |
296 | sc->ndelta); |
297 | if (begin <= reg_size && reg_size <= end) { |
298 | sc_data_update_sc_slab_size(sc, reg_size, pgs); |
299 | } |
300 | } |
301 | } |
302 | |
303 | void |
304 | sc_boot(sc_data_t *data) { |
305 | sc_data_init(data); |
306 | } |
307 | |