1 | #ifndef JEMALLOC_INTERNAL_PH_H |
2 | #define JEMALLOC_INTERNAL_PH_H |
3 | |
4 | /* |
5 | * A Pairing Heap implementation. |
6 | * |
7 | * "The Pairing Heap: A New Form of Self-Adjusting Heap" |
8 | * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf |
9 | * |
10 | * With auxiliary twopass list, described in a follow on paper. |
11 | * |
12 | * "Pairing Heaps: Experiments and Analysis" |
13 | * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf |
14 | * |
15 | ******************************************************************************* |
16 | * |
17 | * We include a non-obvious optimization: |
18 | * - First, we introduce a new pop-and-link operation; pop the two most |
19 | * recently-inserted items off the aux-list, link them, and push the resulting |
20 | * heap. |
21 | * - We maintain a count of the number of insertions since the last time we |
22 | * merged the aux-list (i.e. via first() or remove_first()). After N inserts, |
23 | * we do ffs(N) pop-and-link operations. |
24 | * |
25 | * One way to think of this is that we're progressively building up a tree in |
26 | * the aux-list, rather than a linked-list (think of the series of merges that |
27 | * will be performed as the aux-count grows). |
28 | * |
29 | * There's a couple reasons we benefit from this: |
30 | * - Ordinarily, after N insertions, the aux-list is of size N. With our |
31 | * strategy, it's of size O(log(N)). So we decrease the worst-case time of |
32 | * first() calls, and reduce the average cost of remove_min calls. Since |
33 | * these almost always occur while holding a lock, we practically reduce the |
34 | * frequency of unusually long hold times. |
35 | * - This moves the bulk of the work of merging the aux-list onto the threads |
36 | * that are inserting into the heap. In some common scenarios, insertions |
37 | * happen in bulk, from a single thread (think tcache flushing; we potentially |
38 | * move many slabs from slabs_full to slabs_nonfull). All the nodes in this |
39 | * case are in the inserting threads cache, and linking them is very cheap |
40 | * (cache misses dominate linking cost). Without this optimization, linking |
41 | * happens on the next call to remove_first. Since that remove_first call |
42 | * likely happens on a different thread (or at least, after the cache has |
43 | * gotten cold if done on the same thread), deferring linking trades cheap |
44 | * link operations now for expensive ones later. |
45 | * |
46 | * The ffs trick keeps amortized insert cost at constant time. Similar |
47 | * strategies based on periodically sorting the list after a batch of operations |
48 | * perform worse than this in practice, even with various fancy tricks; they |
49 | * all took amortized complexity of an insert from O(1) to O(log(n)). |
50 | */ |
51 | |
52 | typedef int (*ph_cmp_t)(void *, void *); |
53 | |
54 | /* Node structure. */ |
55 | typedef struct phn_link_s phn_link_t; |
56 | struct phn_link_s { |
57 | void *prev; |
58 | void *next; |
59 | void *lchild; |
60 | }; |
61 | |
62 | typedef struct ph_s ph_t; |
63 | struct ph_s { |
64 | void *root; |
65 | /* |
66 | * Inserts done since the last aux-list merge. This is not necessarily |
67 | * the size of the aux-list, since it's possible that removals have |
68 | * happened since, and we don't track whether or not those removals are |
69 | * from the aux list. |
70 | */ |
71 | size_t auxcount; |
72 | }; |
73 | |
74 | JEMALLOC_ALWAYS_INLINE phn_link_t * |
75 | phn_link_get(void *phn, size_t offset) { |
76 | return (phn_link_t *)(((uintptr_t)phn) + offset); |
77 | } |
78 | |
79 | JEMALLOC_ALWAYS_INLINE void |
80 | phn_link_init(void *phn, size_t offset) { |
81 | phn_link_get(phn, offset)->prev = NULL; |
82 | phn_link_get(phn, offset)->next = NULL; |
83 | phn_link_get(phn, offset)->lchild = NULL; |
84 | } |
85 | |
86 | /* Internal utility helpers. */ |
87 | JEMALLOC_ALWAYS_INLINE void * |
88 | phn_lchild_get(void *phn, size_t offset) { |
89 | return phn_link_get(phn, offset)->lchild; |
90 | } |
91 | |
92 | JEMALLOC_ALWAYS_INLINE void |
93 | phn_lchild_set(void *phn, void *lchild, size_t offset) { |
94 | phn_link_get(phn, offset)->lchild = lchild; |
95 | } |
96 | |
97 | JEMALLOC_ALWAYS_INLINE void * |
98 | phn_next_get(void *phn, size_t offset) { |
99 | return phn_link_get(phn, offset)->next; |
100 | } |
101 | |
102 | JEMALLOC_ALWAYS_INLINE void |
103 | phn_next_set(void *phn, void *next, size_t offset) { |
104 | phn_link_get(phn, offset)->next = next; |
105 | } |
106 | |
107 | JEMALLOC_ALWAYS_INLINE void * |
108 | phn_prev_get(void *phn, size_t offset) { |
109 | return phn_link_get(phn, offset)->prev; |
110 | } |
111 | |
112 | JEMALLOC_ALWAYS_INLINE void |
113 | phn_prev_set(void *phn, void *prev, size_t offset) { |
114 | phn_link_get(phn, offset)->prev = prev; |
115 | } |
116 | |
117 | JEMALLOC_ALWAYS_INLINE void |
118 | phn_merge_ordered(void *phn0, void *phn1, size_t offset, |
119 | ph_cmp_t cmp) { |
120 | void *phn0child; |
121 | |
122 | assert(phn0 != NULL); |
123 | assert(phn1 != NULL); |
124 | assert(cmp(phn0, phn1) <= 0); |
125 | |
126 | phn_prev_set(phn1, phn0, offset); |
127 | phn0child = phn_lchild_get(phn0, offset); |
128 | phn_next_set(phn1, phn0child, offset); |
129 | if (phn0child != NULL) { |
130 | phn_prev_set(phn0child, phn1, offset); |
131 | } |
132 | phn_lchild_set(phn0, phn1, offset); |
133 | } |
134 | |
135 | JEMALLOC_ALWAYS_INLINE void * |
136 | phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) { |
137 | void *result; |
138 | if (phn0 == NULL) { |
139 | result = phn1; |
140 | } else if (phn1 == NULL) { |
141 | result = phn0; |
142 | } else if (cmp(phn0, phn1) < 0) { |
143 | phn_merge_ordered(phn0, phn1, offset, cmp); |
144 | result = phn0; |
145 | } else { |
146 | phn_merge_ordered(phn1, phn0, offset, cmp); |
147 | result = phn1; |
148 | } |
149 | return result; |
150 | } |
151 | |
152 | JEMALLOC_ALWAYS_INLINE void * |
153 | phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) { |
154 | void *head = NULL; |
155 | void *tail = NULL; |
156 | void *phn0 = phn; |
157 | void *phn1 = phn_next_get(phn0, offset); |
158 | |
159 | /* |
160 | * Multipass merge, wherein the first two elements of a FIFO |
161 | * are repeatedly merged, and each result is appended to the |
162 | * singly linked FIFO, until the FIFO contains only a single |
163 | * element. We start with a sibling list but no reference to |
164 | * its tail, so we do a single pass over the sibling list to |
165 | * populate the FIFO. |
166 | */ |
167 | if (phn1 != NULL) { |
168 | void *phnrest = phn_next_get(phn1, offset); |
169 | if (phnrest != NULL) { |
170 | phn_prev_set(phnrest, NULL, offset); |
171 | } |
172 | phn_prev_set(phn0, NULL, offset); |
173 | phn_next_set(phn0, NULL, offset); |
174 | phn_prev_set(phn1, NULL, offset); |
175 | phn_next_set(phn1, NULL, offset); |
176 | phn0 = phn_merge(phn0, phn1, offset, cmp); |
177 | head = tail = phn0; |
178 | phn0 = phnrest; |
179 | while (phn0 != NULL) { |
180 | phn1 = phn_next_get(phn0, offset); |
181 | if (phn1 != NULL) { |
182 | phnrest = phn_next_get(phn1, offset); |
183 | if (phnrest != NULL) { |
184 | phn_prev_set(phnrest, NULL, offset); |
185 | } |
186 | phn_prev_set(phn0, NULL, offset); |
187 | phn_next_set(phn0, NULL, offset); |
188 | phn_prev_set(phn1, NULL, offset); |
189 | phn_next_set(phn1, NULL, offset); |
190 | phn0 = phn_merge(phn0, phn1, offset, cmp); |
191 | phn_next_set(tail, phn0, offset); |
192 | tail = phn0; |
193 | phn0 = phnrest; |
194 | } else { |
195 | phn_next_set(tail, phn0, offset); |
196 | tail = phn0; |
197 | phn0 = NULL; |
198 | } |
199 | } |
200 | phn0 = head; |
201 | phn1 = phn_next_get(phn0, offset); |
202 | if (phn1 != NULL) { |
203 | while (true) { |
204 | head = phn_next_get(phn1, offset); |
205 | assert(phn_prev_get(phn0, offset) == NULL); |
206 | phn_next_set(phn0, NULL, offset); |
207 | assert(phn_prev_get(phn1, offset) == NULL); |
208 | phn_next_set(phn1, NULL, offset); |
209 | phn0 = phn_merge(phn0, phn1, offset, cmp); |
210 | if (head == NULL) { |
211 | break; |
212 | } |
213 | phn_next_set(tail, phn0, offset); |
214 | tail = phn0; |
215 | phn0 = head; |
216 | phn1 = phn_next_get(phn0, offset); |
217 | } |
218 | } |
219 | } |
220 | return phn0; |
221 | } |
222 | |
223 | JEMALLOC_ALWAYS_INLINE void |
224 | ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) { |
225 | ph->auxcount = 0; |
226 | void *phn = phn_next_get(ph->root, offset); |
227 | if (phn != NULL) { |
228 | phn_prev_set(ph->root, NULL, offset); |
229 | phn_next_set(ph->root, NULL, offset); |
230 | phn_prev_set(phn, NULL, offset); |
231 | phn = phn_merge_siblings(phn, offset, cmp); |
232 | assert(phn_next_get(phn, offset) == NULL); |
233 | ph->root = phn_merge(ph->root, phn, offset, cmp); |
234 | } |
235 | } |
236 | |
237 | JEMALLOC_ALWAYS_INLINE void * |
238 | ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) { |
239 | void *result; |
240 | void *lchild = phn_lchild_get(phn, offset); |
241 | if (lchild == NULL) { |
242 | result = NULL; |
243 | } else { |
244 | result = phn_merge_siblings(lchild, offset, cmp); |
245 | } |
246 | return result; |
247 | } |
248 | |
249 | JEMALLOC_ALWAYS_INLINE void |
250 | ph_new(ph_t *ph) { |
251 | ph->root = NULL; |
252 | ph->auxcount = 0; |
253 | } |
254 | |
255 | JEMALLOC_ALWAYS_INLINE bool |
256 | ph_empty(ph_t *ph) { |
257 | return ph->root == NULL; |
258 | } |
259 | |
260 | JEMALLOC_ALWAYS_INLINE void * |
261 | ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) { |
262 | if (ph->root == NULL) { |
263 | return NULL; |
264 | } |
265 | ph_merge_aux(ph, offset, cmp); |
266 | return ph->root; |
267 | } |
268 | |
269 | JEMALLOC_ALWAYS_INLINE void * |
270 | ph_any(ph_t *ph, size_t offset) { |
271 | if (ph->root == NULL) { |
272 | return NULL; |
273 | } |
274 | void *aux = phn_next_get(ph->root, offset); |
275 | if (aux != NULL) { |
276 | return aux; |
277 | } |
278 | return ph->root; |
279 | } |
280 | |
281 | /* Returns true if we should stop trying to merge. */ |
282 | JEMALLOC_ALWAYS_INLINE bool |
283 | ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) { |
284 | assert(ph->root != NULL); |
285 | void *phn0 = phn_next_get(ph->root, offset); |
286 | if (phn0 == NULL) { |
287 | return true; |
288 | } |
289 | void *phn1 = phn_next_get(phn0, offset); |
290 | if (phn1 == NULL) { |
291 | return true; |
292 | } |
293 | void *next_phn1 = phn_next_get(phn1, offset); |
294 | phn_next_set(phn0, NULL, offset); |
295 | phn_prev_set(phn0, NULL, offset); |
296 | phn_next_set(phn1, NULL, offset); |
297 | phn_prev_set(phn1, NULL, offset); |
298 | phn0 = phn_merge(phn0, phn1, offset, cmp); |
299 | phn_next_set(phn0, next_phn1, offset); |
300 | if (next_phn1 != NULL) { |
301 | phn_prev_set(next_phn1, phn0, offset); |
302 | } |
303 | phn_next_set(ph->root, phn0, offset); |
304 | phn_prev_set(phn0, ph->root, offset); |
305 | return next_phn1 == NULL; |
306 | } |
307 | |
308 | JEMALLOC_ALWAYS_INLINE void |
309 | ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { |
310 | phn_link_init(phn, offset); |
311 | |
312 | /* |
313 | * Treat the root as an aux list during insertion, and lazily merge |
314 | * during a_prefix##remove_first(). For elements that are inserted, |
315 | * then removed via a_prefix##remove() before the aux list is ever |
316 | * processed, this makes insert/remove constant-time, whereas eager |
317 | * merging would make insert O(log n). |
318 | */ |
319 | if (ph->root == NULL) { |
320 | ph->root = phn; |
321 | } else { |
322 | /* |
323 | * As a special case, check to see if we can replace the root. |
324 | * This is practically common in some important cases, and lets |
325 | * us defer some insertions (hopefully, until the point where |
326 | * some of the items in the aux list have been removed, savings |
327 | * us from linking them at all). |
328 | */ |
329 | if (cmp(phn, ph->root) < 0) { |
330 | phn_lchild_set(phn, ph->root, offset); |
331 | phn_prev_set(ph->root, phn, offset); |
332 | ph->root = phn; |
333 | ph->auxcount = 0; |
334 | return; |
335 | } |
336 | ph->auxcount++; |
337 | phn_next_set(phn, phn_next_get(ph->root, offset), offset); |
338 | if (phn_next_get(ph->root, offset) != NULL) { |
339 | phn_prev_set(phn_next_get(ph->root, offset), phn, |
340 | offset); |
341 | } |
342 | phn_prev_set(phn, ph->root, offset); |
343 | phn_next_set(ph->root, phn, offset); |
344 | } |
345 | if (ph->auxcount > 1) { |
346 | unsigned nmerges = ffs_zu(ph->auxcount - 1); |
347 | bool done = false; |
348 | for (unsigned i = 0; i < nmerges && !done; i++) { |
349 | done = ph_try_aux_merge_pair(ph, offset, cmp); |
350 | } |
351 | } |
352 | } |
353 | |
354 | JEMALLOC_ALWAYS_INLINE void * |
355 | ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) { |
356 | void *ret; |
357 | |
358 | if (ph->root == NULL) { |
359 | return NULL; |
360 | } |
361 | ph_merge_aux(ph, offset, cmp); |
362 | ret = ph->root; |
363 | ph->root = ph_merge_children(ph->root, offset, cmp); |
364 | |
365 | return ret; |
366 | |
367 | } |
368 | |
369 | JEMALLOC_ALWAYS_INLINE void |
370 | ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { |
371 | void *replace; |
372 | void *parent; |
373 | |
374 | if (ph->root == phn) { |
375 | /* |
376 | * We can delete from aux list without merging it, but we need |
377 | * to merge if we are dealing with the root node and it has |
378 | * children. |
379 | */ |
380 | if (phn_lchild_get(phn, offset) == NULL) { |
381 | ph->root = phn_next_get(phn, offset); |
382 | if (ph->root != NULL) { |
383 | phn_prev_set(ph->root, NULL, offset); |
384 | } |
385 | return; |
386 | } |
387 | ph_merge_aux(ph, offset, cmp); |
388 | if (ph->root == phn) { |
389 | ph->root = ph_merge_children(ph->root, offset, cmp); |
390 | return; |
391 | } |
392 | } |
393 | |
394 | /* Get parent (if phn is leftmost child) before mutating. */ |
395 | if ((parent = phn_prev_get(phn, offset)) != NULL) { |
396 | if (phn_lchild_get(parent, offset) != phn) { |
397 | parent = NULL; |
398 | } |
399 | } |
400 | /* Find a possible replacement node, and link to parent. */ |
401 | replace = ph_merge_children(phn, offset, cmp); |
402 | /* Set next/prev for sibling linked list. */ |
403 | if (replace != NULL) { |
404 | if (parent != NULL) { |
405 | phn_prev_set(replace, parent, offset); |
406 | phn_lchild_set(parent, replace, offset); |
407 | } else { |
408 | phn_prev_set(replace, phn_prev_get(phn, offset), |
409 | offset); |
410 | if (phn_prev_get(phn, offset) != NULL) { |
411 | phn_next_set(phn_prev_get(phn, offset), replace, |
412 | offset); |
413 | } |
414 | } |
415 | phn_next_set(replace, phn_next_get(phn, offset), offset); |
416 | if (phn_next_get(phn, offset) != NULL) { |
417 | phn_prev_set(phn_next_get(phn, offset), replace, |
418 | offset); |
419 | } |
420 | } else { |
421 | if (parent != NULL) { |
422 | void *next = phn_next_get(phn, offset); |
423 | phn_lchild_set(parent, next, offset); |
424 | if (next != NULL) { |
425 | phn_prev_set(next, parent, offset); |
426 | } |
427 | } else { |
428 | assert(phn_prev_get(phn, offset) != NULL); |
429 | phn_next_set( |
430 | phn_prev_get(phn, offset), |
431 | phn_next_get(phn, offset), offset); |
432 | } |
433 | if (phn_next_get(phn, offset) != NULL) { |
434 | phn_prev_set( |
435 | phn_next_get(phn, offset), |
436 | phn_prev_get(phn, offset), offset); |
437 | } |
438 | } |
439 | } |
440 | |
441 | #define ph_structs(a_prefix, a_type) \ |
442 | typedef struct { \ |
443 | phn_link_t link; \ |
444 | } a_prefix##_link_t; \ |
445 | \ |
446 | typedef struct { \ |
447 | ph_t ph; \ |
448 | } a_prefix##_t; |
449 | |
450 | /* |
451 | * The ph_proto() macro generates function prototypes that correspond to the |
452 | * functions generated by an equivalently parameterized call to ph_gen(). |
453 | */ |
454 | #define ph_proto(a_attr, a_prefix, a_type) \ |
455 | \ |
456 | a_attr void a_prefix##_new(a_prefix##_t *ph); \ |
457 | a_attr bool a_prefix##_empty(a_prefix##_t *ph); \ |
458 | a_attr a_type *a_prefix##_first(a_prefix##_t *ph); \ |
459 | a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \ |
460 | a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \ |
461 | a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \ |
462 | a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \ |
463 | a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph); |
464 | |
465 | /* The ph_gen() macro generates a type-specific pairing heap implementation. */ |
466 | #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \ |
467 | JEMALLOC_ALWAYS_INLINE int \ |
468 | a_prefix##_ph_cmp(void *a, void *b) { \ |
469 | return a_cmp((a_type *)a, (a_type *)b); \ |
470 | } \ |
471 | \ |
472 | a_attr void \ |
473 | a_prefix##_new(a_prefix##_t *ph) { \ |
474 | ph_new(&ph->ph); \ |
475 | } \ |
476 | \ |
477 | a_attr bool \ |
478 | a_prefix##_empty(a_prefix##_t *ph) { \ |
479 | return ph_empty(&ph->ph); \ |
480 | } \ |
481 | \ |
482 | a_attr a_type * \ |
483 | a_prefix##_first(a_prefix##_t *ph) { \ |
484 | return ph_first(&ph->ph, offsetof(a_type, a_field), \ |
485 | &a_prefix##_ph_cmp); \ |
486 | } \ |
487 | \ |
488 | a_attr a_type * \ |
489 | a_prefix##_any(a_prefix##_t *ph) { \ |
490 | return ph_any(&ph->ph, offsetof(a_type, a_field)); \ |
491 | } \ |
492 | \ |
493 | a_attr void \ |
494 | a_prefix##_insert(a_prefix##_t *ph, a_type *phn) { \ |
495 | ph_insert(&ph->ph, phn, offsetof(a_type, a_field), \ |
496 | a_prefix##_ph_cmp); \ |
497 | } \ |
498 | \ |
499 | a_attr a_type * \ |
500 | a_prefix##_remove_first(a_prefix##_t *ph) { \ |
501 | return ph_remove_first(&ph->ph, offsetof(a_type, a_field), \ |
502 | a_prefix##_ph_cmp); \ |
503 | } \ |
504 | \ |
505 | a_attr void \ |
506 | a_prefix##_remove(a_prefix##_t *ph, a_type *phn) { \ |
507 | ph_remove(&ph->ph, phn, offsetof(a_type, a_field), \ |
508 | a_prefix##_ph_cmp); \ |
509 | } \ |
510 | \ |
511 | a_attr a_type * \ |
512 | a_prefix##_remove_any(a_prefix##_t *ph) { \ |
513 | a_type *ret = a_prefix##_any(ph); \ |
514 | if (ret != NULL) { \ |
515 | a_prefix##_remove(ph, ret); \ |
516 | } \ |
517 | return ret; \ |
518 | } |
519 | |
520 | #endif /* JEMALLOC_INTERNAL_PH_H */ |
521 | |