1#ifndef JEMALLOC_INTERNAL_PH_H
2#define JEMALLOC_INTERNAL_PH_H
3
4/*
5 * A Pairing Heap implementation.
6 *
7 * "The Pairing Heap: A New Form of Self-Adjusting Heap"
8 * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
9 *
10 * With auxiliary twopass list, described in a follow on paper.
11 *
12 * "Pairing Heaps: Experiments and Analysis"
13 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
14 *
15 *******************************************************************************
16 *
17 * We include a non-obvious optimization:
18 * - First, we introduce a new pop-and-link operation; pop the two most
19 * recently-inserted items off the aux-list, link them, and push the resulting
20 * heap.
21 * - We maintain a count of the number of insertions since the last time we
22 * merged the aux-list (i.e. via first() or remove_first()). After N inserts,
23 * we do ffs(N) pop-and-link operations.
24 *
25 * One way to think of this is that we're progressively building up a tree in
26 * the aux-list, rather than a linked-list (think of the series of merges that
27 * will be performed as the aux-count grows).
28 *
29 * There's a couple reasons we benefit from this:
30 * - Ordinarily, after N insertions, the aux-list is of size N. With our
31 * strategy, it's of size O(log(N)). So we decrease the worst-case time of
32 * first() calls, and reduce the average cost of remove_min calls. Since
33 * these almost always occur while holding a lock, we practically reduce the
34 * frequency of unusually long hold times.
35 * - This moves the bulk of the work of merging the aux-list onto the threads
36 * that are inserting into the heap. In some common scenarios, insertions
37 * happen in bulk, from a single thread (think tcache flushing; we potentially
38 * move many slabs from slabs_full to slabs_nonfull). All the nodes in this
39 * case are in the inserting threads cache, and linking them is very cheap
40 * (cache misses dominate linking cost). Without this optimization, linking
41 * happens on the next call to remove_first. Since that remove_first call
42 * likely happens on a different thread (or at least, after the cache has
43 * gotten cold if done on the same thread), deferring linking trades cheap
44 * link operations now for expensive ones later.
45 *
46 * The ffs trick keeps amortized insert cost at constant time. Similar
47 * strategies based on periodically sorting the list after a batch of operations
48 * perform worse than this in practice, even with various fancy tricks; they
49 * all took amortized complexity of an insert from O(1) to O(log(n)).
50 */
51
52typedef int (*ph_cmp_t)(void *, void *);
53
54/* Node structure. */
55typedef struct phn_link_s phn_link_t;
56struct phn_link_s {
57 void *prev;
58 void *next;
59 void *lchild;
60};
61
62typedef struct ph_s ph_t;
63struct ph_s {
64 void *root;
65 /*
66 * Inserts done since the last aux-list merge. This is not necessarily
67 * the size of the aux-list, since it's possible that removals have
68 * happened since, and we don't track whether or not those removals are
69 * from the aux list.
70 */
71 size_t auxcount;
72};
73
74JEMALLOC_ALWAYS_INLINE phn_link_t *
75phn_link_get(void *phn, size_t offset) {
76 return (phn_link_t *)(((uintptr_t)phn) + offset);
77}
78
79JEMALLOC_ALWAYS_INLINE void
80phn_link_init(void *phn, size_t offset) {
81 phn_link_get(phn, offset)->prev = NULL;
82 phn_link_get(phn, offset)->next = NULL;
83 phn_link_get(phn, offset)->lchild = NULL;
84}
85
86/* Internal utility helpers. */
87JEMALLOC_ALWAYS_INLINE void *
88phn_lchild_get(void *phn, size_t offset) {
89 return phn_link_get(phn, offset)->lchild;
90}
91
92JEMALLOC_ALWAYS_INLINE void
93phn_lchild_set(void *phn, void *lchild, size_t offset) {
94 phn_link_get(phn, offset)->lchild = lchild;
95}
96
97JEMALLOC_ALWAYS_INLINE void *
98phn_next_get(void *phn, size_t offset) {
99 return phn_link_get(phn, offset)->next;
100}
101
102JEMALLOC_ALWAYS_INLINE void
103phn_next_set(void *phn, void *next, size_t offset) {
104 phn_link_get(phn, offset)->next = next;
105}
106
107JEMALLOC_ALWAYS_INLINE void *
108phn_prev_get(void *phn, size_t offset) {
109 return phn_link_get(phn, offset)->prev;
110}
111
112JEMALLOC_ALWAYS_INLINE void
113phn_prev_set(void *phn, void *prev, size_t offset) {
114 phn_link_get(phn, offset)->prev = prev;
115}
116
117JEMALLOC_ALWAYS_INLINE void
118phn_merge_ordered(void *phn0, void *phn1, size_t offset,
119 ph_cmp_t cmp) {
120 void *phn0child;
121
122 assert(phn0 != NULL);
123 assert(phn1 != NULL);
124 assert(cmp(phn0, phn1) <= 0);
125
126 phn_prev_set(phn1, phn0, offset);
127 phn0child = phn_lchild_get(phn0, offset);
128 phn_next_set(phn1, phn0child, offset);
129 if (phn0child != NULL) {
130 phn_prev_set(phn0child, phn1, offset);
131 }
132 phn_lchild_set(phn0, phn1, offset);
133}
134
135JEMALLOC_ALWAYS_INLINE void *
136phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
137 void *result;
138 if (phn0 == NULL) {
139 result = phn1;
140 } else if (phn1 == NULL) {
141 result = phn0;
142 } else if (cmp(phn0, phn1) < 0) {
143 phn_merge_ordered(phn0, phn1, offset, cmp);
144 result = phn0;
145 } else {
146 phn_merge_ordered(phn1, phn0, offset, cmp);
147 result = phn1;
148 }
149 return result;
150}
151
152JEMALLOC_ALWAYS_INLINE void *
153phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
154 void *head = NULL;
155 void *tail = NULL;
156 void *phn0 = phn;
157 void *phn1 = phn_next_get(phn0, offset);
158
159 /*
160 * Multipass merge, wherein the first two elements of a FIFO
161 * are repeatedly merged, and each result is appended to the
162 * singly linked FIFO, until the FIFO contains only a single
163 * element. We start with a sibling list but no reference to
164 * its tail, so we do a single pass over the sibling list to
165 * populate the FIFO.
166 */
167 if (phn1 != NULL) {
168 void *phnrest = phn_next_get(phn1, offset);
169 if (phnrest != NULL) {
170 phn_prev_set(phnrest, NULL, offset);
171 }
172 phn_prev_set(phn0, NULL, offset);
173 phn_next_set(phn0, NULL, offset);
174 phn_prev_set(phn1, NULL, offset);
175 phn_next_set(phn1, NULL, offset);
176 phn0 = phn_merge(phn0, phn1, offset, cmp);
177 head = tail = phn0;
178 phn0 = phnrest;
179 while (phn0 != NULL) {
180 phn1 = phn_next_get(phn0, offset);
181 if (phn1 != NULL) {
182 phnrest = phn_next_get(phn1, offset);
183 if (phnrest != NULL) {
184 phn_prev_set(phnrest, NULL, offset);
185 }
186 phn_prev_set(phn0, NULL, offset);
187 phn_next_set(phn0, NULL, offset);
188 phn_prev_set(phn1, NULL, offset);
189 phn_next_set(phn1, NULL, offset);
190 phn0 = phn_merge(phn0, phn1, offset, cmp);
191 phn_next_set(tail, phn0, offset);
192 tail = phn0;
193 phn0 = phnrest;
194 } else {
195 phn_next_set(tail, phn0, offset);
196 tail = phn0;
197 phn0 = NULL;
198 }
199 }
200 phn0 = head;
201 phn1 = phn_next_get(phn0, offset);
202 if (phn1 != NULL) {
203 while (true) {
204 head = phn_next_get(phn1, offset);
205 assert(phn_prev_get(phn0, offset) == NULL);
206 phn_next_set(phn0, NULL, offset);
207 assert(phn_prev_get(phn1, offset) == NULL);
208 phn_next_set(phn1, NULL, offset);
209 phn0 = phn_merge(phn0, phn1, offset, cmp);
210 if (head == NULL) {
211 break;
212 }
213 phn_next_set(tail, phn0, offset);
214 tail = phn0;
215 phn0 = head;
216 phn1 = phn_next_get(phn0, offset);
217 }
218 }
219 }
220 return phn0;
221}
222
223JEMALLOC_ALWAYS_INLINE void
224ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
225 ph->auxcount = 0;
226 void *phn = phn_next_get(ph->root, offset);
227 if (phn != NULL) {
228 phn_prev_set(ph->root, NULL, offset);
229 phn_next_set(ph->root, NULL, offset);
230 phn_prev_set(phn, NULL, offset);
231 phn = phn_merge_siblings(phn, offset, cmp);
232 assert(phn_next_get(phn, offset) == NULL);
233 ph->root = phn_merge(ph->root, phn, offset, cmp);
234 }
235}
236
237JEMALLOC_ALWAYS_INLINE void *
238ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
239 void *result;
240 void *lchild = phn_lchild_get(phn, offset);
241 if (lchild == NULL) {
242 result = NULL;
243 } else {
244 result = phn_merge_siblings(lchild, offset, cmp);
245 }
246 return result;
247}
248
249JEMALLOC_ALWAYS_INLINE void
250ph_new(ph_t *ph) {
251 ph->root = NULL;
252 ph->auxcount = 0;
253}
254
255JEMALLOC_ALWAYS_INLINE bool
256ph_empty(ph_t *ph) {
257 return ph->root == NULL;
258}
259
260JEMALLOC_ALWAYS_INLINE void *
261ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
262 if (ph->root == NULL) {
263 return NULL;
264 }
265 ph_merge_aux(ph, offset, cmp);
266 return ph->root;
267}
268
269JEMALLOC_ALWAYS_INLINE void *
270ph_any(ph_t *ph, size_t offset) {
271 if (ph->root == NULL) {
272 return NULL;
273 }
274 void *aux = phn_next_get(ph->root, offset);
275 if (aux != NULL) {
276 return aux;
277 }
278 return ph->root;
279}
280
281/* Returns true if we should stop trying to merge. */
282JEMALLOC_ALWAYS_INLINE bool
283ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
284 assert(ph->root != NULL);
285 void *phn0 = phn_next_get(ph->root, offset);
286 if (phn0 == NULL) {
287 return true;
288 }
289 void *phn1 = phn_next_get(phn0, offset);
290 if (phn1 == NULL) {
291 return true;
292 }
293 void *next_phn1 = phn_next_get(phn1, offset);
294 phn_next_set(phn0, NULL, offset);
295 phn_prev_set(phn0, NULL, offset);
296 phn_next_set(phn1, NULL, offset);
297 phn_prev_set(phn1, NULL, offset);
298 phn0 = phn_merge(phn0, phn1, offset, cmp);
299 phn_next_set(phn0, next_phn1, offset);
300 if (next_phn1 != NULL) {
301 phn_prev_set(next_phn1, phn0, offset);
302 }
303 phn_next_set(ph->root, phn0, offset);
304 phn_prev_set(phn0, ph->root, offset);
305 return next_phn1 == NULL;
306}
307
308JEMALLOC_ALWAYS_INLINE void
309ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
310 phn_link_init(phn, offset);
311
312 /*
313 * Treat the root as an aux list during insertion, and lazily merge
314 * during a_prefix##remove_first(). For elements that are inserted,
315 * then removed via a_prefix##remove() before the aux list is ever
316 * processed, this makes insert/remove constant-time, whereas eager
317 * merging would make insert O(log n).
318 */
319 if (ph->root == NULL) {
320 ph->root = phn;
321 } else {
322 /*
323 * As a special case, check to see if we can replace the root.
324 * This is practically common in some important cases, and lets
325 * us defer some insertions (hopefully, until the point where
326 * some of the items in the aux list have been removed, savings
327 * us from linking them at all).
328 */
329 if (cmp(phn, ph->root) < 0) {
330 phn_lchild_set(phn, ph->root, offset);
331 phn_prev_set(ph->root, phn, offset);
332 ph->root = phn;
333 ph->auxcount = 0;
334 return;
335 }
336 ph->auxcount++;
337 phn_next_set(phn, phn_next_get(ph->root, offset), offset);
338 if (phn_next_get(ph->root, offset) != NULL) {
339 phn_prev_set(phn_next_get(ph->root, offset), phn,
340 offset);
341 }
342 phn_prev_set(phn, ph->root, offset);
343 phn_next_set(ph->root, phn, offset);
344 }
345 if (ph->auxcount > 1) {
346 unsigned nmerges = ffs_zu(ph->auxcount - 1);
347 bool done = false;
348 for (unsigned i = 0; i < nmerges && !done; i++) {
349 done = ph_try_aux_merge_pair(ph, offset, cmp);
350 }
351 }
352}
353
354JEMALLOC_ALWAYS_INLINE void *
355ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
356 void *ret;
357
358 if (ph->root == NULL) {
359 return NULL;
360 }
361 ph_merge_aux(ph, offset, cmp);
362 ret = ph->root;
363 ph->root = ph_merge_children(ph->root, offset, cmp);
364
365 return ret;
366
367}
368
369JEMALLOC_ALWAYS_INLINE void
370ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
371 void *replace;
372 void *parent;
373
374 if (ph->root == phn) {
375 /*
376 * We can delete from aux list without merging it, but we need
377 * to merge if we are dealing with the root node and it has
378 * children.
379 */
380 if (phn_lchild_get(phn, offset) == NULL) {
381 ph->root = phn_next_get(phn, offset);
382 if (ph->root != NULL) {
383 phn_prev_set(ph->root, NULL, offset);
384 }
385 return;
386 }
387 ph_merge_aux(ph, offset, cmp);
388 if (ph->root == phn) {
389 ph->root = ph_merge_children(ph->root, offset, cmp);
390 return;
391 }
392 }
393
394 /* Get parent (if phn is leftmost child) before mutating. */
395 if ((parent = phn_prev_get(phn, offset)) != NULL) {
396 if (phn_lchild_get(parent, offset) != phn) {
397 parent = NULL;
398 }
399 }
400 /* Find a possible replacement node, and link to parent. */
401 replace = ph_merge_children(phn, offset, cmp);
402 /* Set next/prev for sibling linked list. */
403 if (replace != NULL) {
404 if (parent != NULL) {
405 phn_prev_set(replace, parent, offset);
406 phn_lchild_set(parent, replace, offset);
407 } else {
408 phn_prev_set(replace, phn_prev_get(phn, offset),
409 offset);
410 if (phn_prev_get(phn, offset) != NULL) {
411 phn_next_set(phn_prev_get(phn, offset), replace,
412 offset);
413 }
414 }
415 phn_next_set(replace, phn_next_get(phn, offset), offset);
416 if (phn_next_get(phn, offset) != NULL) {
417 phn_prev_set(phn_next_get(phn, offset), replace,
418 offset);
419 }
420 } else {
421 if (parent != NULL) {
422 void *next = phn_next_get(phn, offset);
423 phn_lchild_set(parent, next, offset);
424 if (next != NULL) {
425 phn_prev_set(next, parent, offset);
426 }
427 } else {
428 assert(phn_prev_get(phn, offset) != NULL);
429 phn_next_set(
430 phn_prev_get(phn, offset),
431 phn_next_get(phn, offset), offset);
432 }
433 if (phn_next_get(phn, offset) != NULL) {
434 phn_prev_set(
435 phn_next_get(phn, offset),
436 phn_prev_get(phn, offset), offset);
437 }
438 }
439}
440
441#define ph_structs(a_prefix, a_type) \
442typedef struct { \
443 phn_link_t link; \
444} a_prefix##_link_t; \
445 \
446typedef struct { \
447 ph_t ph; \
448} a_prefix##_t;
449
450/*
451 * The ph_proto() macro generates function prototypes that correspond to the
452 * functions generated by an equivalently parameterized call to ph_gen().
453 */
454#define ph_proto(a_attr, a_prefix, a_type) \
455 \
456a_attr void a_prefix##_new(a_prefix##_t *ph); \
457a_attr bool a_prefix##_empty(a_prefix##_t *ph); \
458a_attr a_type *a_prefix##_first(a_prefix##_t *ph); \
459a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \
460a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \
461a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \
462a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \
463a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
464
465/* The ph_gen() macro generates a type-specific pairing heap implementation. */
466#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \
467JEMALLOC_ALWAYS_INLINE int \
468a_prefix##_ph_cmp(void *a, void *b) { \
469 return a_cmp((a_type *)a, (a_type *)b); \
470} \
471 \
472a_attr void \
473a_prefix##_new(a_prefix##_t *ph) { \
474 ph_new(&ph->ph); \
475} \
476 \
477a_attr bool \
478a_prefix##_empty(a_prefix##_t *ph) { \
479 return ph_empty(&ph->ph); \
480} \
481 \
482a_attr a_type * \
483a_prefix##_first(a_prefix##_t *ph) { \
484 return ph_first(&ph->ph, offsetof(a_type, a_field), \
485 &a_prefix##_ph_cmp); \
486} \
487 \
488a_attr a_type * \
489a_prefix##_any(a_prefix##_t *ph) { \
490 return ph_any(&ph->ph, offsetof(a_type, a_field)); \
491} \
492 \
493a_attr void \
494a_prefix##_insert(a_prefix##_t *ph, a_type *phn) { \
495 ph_insert(&ph->ph, phn, offsetof(a_type, a_field), \
496 a_prefix##_ph_cmp); \
497} \
498 \
499a_attr a_type * \
500a_prefix##_remove_first(a_prefix##_t *ph) { \
501 return ph_remove_first(&ph->ph, offsetof(a_type, a_field), \
502 a_prefix##_ph_cmp); \
503} \
504 \
505a_attr void \
506a_prefix##_remove(a_prefix##_t *ph, a_type *phn) { \
507 ph_remove(&ph->ph, phn, offsetof(a_type, a_field), \
508 a_prefix##_ph_cmp); \
509} \
510 \
511a_attr a_type * \
512a_prefix##_remove_any(a_prefix##_t *ph) { \
513 a_type *ret = a_prefix##_any(ph); \
514 if (ret != NULL) { \
515 a_prefix##_remove(ph, ret); \
516 } \
517 return ret; \
518}
519
520#endif /* JEMALLOC_INTERNAL_PH_H */
521