1 | /* Drop in replacement for heapq.py |
2 | |
3 | C implementation derived directly from heapq.py in Py2.3 |
4 | which was written by Kevin O'Connor, augmented by Tim Peters, |
5 | annotated by François Pinard, and converted to C by Raymond Hettinger. |
6 | |
7 | */ |
8 | |
9 | #include "Python.h" |
10 | #include "pycore_list.h" // _PyList_ITEMS() |
11 | |
12 | #include "clinic/_heapqmodule.c.h" |
13 | |
14 | |
15 | /*[clinic input] |
16 | module _heapq |
17 | [clinic start generated code]*/ |
18 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d7cca0a2e4c0ceb3]*/ |
19 | |
20 | static int |
21 | siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) |
22 | { |
23 | PyObject *newitem, *parent, **arr; |
24 | Py_ssize_t parentpos, size; |
25 | int cmp; |
26 | |
27 | assert(PyList_Check(heap)); |
28 | size = PyList_GET_SIZE(heap); |
29 | if (pos >= size) { |
30 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
31 | return -1; |
32 | } |
33 | |
34 | /* Follow the path to the root, moving parents down until finding |
35 | a place newitem fits. */ |
36 | arr = _PyList_ITEMS(heap); |
37 | newitem = arr[pos]; |
38 | while (pos > startpos) { |
39 | parentpos = (pos - 1) >> 1; |
40 | parent = arr[parentpos]; |
41 | Py_INCREF(newitem); |
42 | Py_INCREF(parent); |
43 | cmp = PyObject_RichCompareBool(newitem, parent, Py_LT); |
44 | Py_DECREF(parent); |
45 | Py_DECREF(newitem); |
46 | if (cmp < 0) |
47 | return -1; |
48 | if (size != PyList_GET_SIZE(heap)) { |
49 | PyErr_SetString(PyExc_RuntimeError, |
50 | "list changed size during iteration" ); |
51 | return -1; |
52 | } |
53 | if (cmp == 0) |
54 | break; |
55 | arr = _PyList_ITEMS(heap); |
56 | parent = arr[parentpos]; |
57 | newitem = arr[pos]; |
58 | arr[parentpos] = newitem; |
59 | arr[pos] = parent; |
60 | pos = parentpos; |
61 | } |
62 | return 0; |
63 | } |
64 | |
65 | static int |
66 | siftup(PyListObject *heap, Py_ssize_t pos) |
67 | { |
68 | Py_ssize_t startpos, endpos, childpos, limit; |
69 | PyObject *tmp1, *tmp2, **arr; |
70 | int cmp; |
71 | |
72 | assert(PyList_Check(heap)); |
73 | endpos = PyList_GET_SIZE(heap); |
74 | startpos = pos; |
75 | if (pos >= endpos) { |
76 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
77 | return -1; |
78 | } |
79 | |
80 | /* Bubble up the smaller child until hitting a leaf. */ |
81 | arr = _PyList_ITEMS(heap); |
82 | limit = endpos >> 1; /* smallest pos that has no child */ |
83 | while (pos < limit) { |
84 | /* Set childpos to index of smaller child. */ |
85 | childpos = 2*pos + 1; /* leftmost child position */ |
86 | if (childpos + 1 < endpos) { |
87 | PyObject* a = arr[childpos]; |
88 | PyObject* b = arr[childpos + 1]; |
89 | Py_INCREF(a); |
90 | Py_INCREF(b); |
91 | cmp = PyObject_RichCompareBool(a, b, Py_LT); |
92 | Py_DECREF(a); |
93 | Py_DECREF(b); |
94 | if (cmp < 0) |
95 | return -1; |
96 | childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */ |
97 | arr = _PyList_ITEMS(heap); /* arr may have changed */ |
98 | if (endpos != PyList_GET_SIZE(heap)) { |
99 | PyErr_SetString(PyExc_RuntimeError, |
100 | "list changed size during iteration" ); |
101 | return -1; |
102 | } |
103 | } |
104 | /* Move the smaller child up. */ |
105 | tmp1 = arr[childpos]; |
106 | tmp2 = arr[pos]; |
107 | arr[childpos] = tmp2; |
108 | arr[pos] = tmp1; |
109 | pos = childpos; |
110 | } |
111 | /* Bubble it up to its final resting place (by sifting its parents down). */ |
112 | return siftdown(heap, startpos, pos); |
113 | } |
114 | |
115 | /*[clinic input] |
116 | _heapq.heappush |
117 | |
118 | heap: object(subclass_of='&PyList_Type') |
119 | item: object |
120 | / |
121 | |
122 | Push item onto heap, maintaining the heap invariant. |
123 | [clinic start generated code]*/ |
124 | |
125 | static PyObject * |
126 | _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) |
127 | /*[clinic end generated code: output=912c094f47663935 input=7c69611f3698aceb]*/ |
128 | { |
129 | if (PyList_Append(heap, item)) |
130 | return NULL; |
131 | |
132 | if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) |
133 | return NULL; |
134 | Py_RETURN_NONE; |
135 | } |
136 | |
137 | static PyObject * |
138 | heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) |
139 | { |
140 | PyObject *lastelt, *returnitem; |
141 | Py_ssize_t n; |
142 | |
143 | /* raises IndexError if the heap is empty */ |
144 | n = PyList_GET_SIZE(heap); |
145 | if (n == 0) { |
146 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
147 | return NULL; |
148 | } |
149 | |
150 | lastelt = PyList_GET_ITEM(heap, n-1) ; |
151 | Py_INCREF(lastelt); |
152 | if (PyList_SetSlice(heap, n-1, n, NULL)) { |
153 | Py_DECREF(lastelt); |
154 | return NULL; |
155 | } |
156 | n--; |
157 | |
158 | if (!n) |
159 | return lastelt; |
160 | returnitem = PyList_GET_ITEM(heap, 0); |
161 | PyList_SET_ITEM(heap, 0, lastelt); |
162 | if (siftup_func((PyListObject *)heap, 0)) { |
163 | Py_DECREF(returnitem); |
164 | return NULL; |
165 | } |
166 | return returnitem; |
167 | } |
168 | |
169 | /*[clinic input] |
170 | _heapq.heappop |
171 | |
172 | heap: object(subclass_of='&PyList_Type') |
173 | / |
174 | |
175 | Pop the smallest item off the heap, maintaining the heap invariant. |
176 | [clinic start generated code]*/ |
177 | |
178 | static PyObject * |
179 | _heapq_heappop_impl(PyObject *module, PyObject *heap) |
180 | /*[clinic end generated code: output=96dfe82d37d9af76 input=91487987a583c856]*/ |
181 | { |
182 | return heappop_internal(heap, siftup); |
183 | } |
184 | |
185 | static PyObject * |
186 | heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObject *, Py_ssize_t)) |
187 | { |
188 | PyObject *returnitem; |
189 | |
190 | if (PyList_GET_SIZE(heap) == 0) { |
191 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
192 | return NULL; |
193 | } |
194 | |
195 | returnitem = PyList_GET_ITEM(heap, 0); |
196 | Py_INCREF(item); |
197 | PyList_SET_ITEM(heap, 0, item); |
198 | if (siftup_func((PyListObject *)heap, 0)) { |
199 | Py_DECREF(returnitem); |
200 | return NULL; |
201 | } |
202 | return returnitem; |
203 | } |
204 | |
205 | |
206 | /*[clinic input] |
207 | _heapq.heapreplace |
208 | |
209 | heap: object(subclass_of='&PyList_Type') |
210 | item: object |
211 | / |
212 | |
213 | Pop and return the current smallest value, and add the new item. |
214 | |
215 | This is more efficient than heappop() followed by heappush(), and can be |
216 | more appropriate when using a fixed-size heap. Note that the value |
217 | returned may be larger than item! That constrains reasonable uses of |
218 | this routine unless written as part of a conditional replacement: |
219 | |
220 | if item > heap[0]: |
221 | item = heapreplace(heap, item) |
222 | [clinic start generated code]*/ |
223 | |
224 | static PyObject * |
225 | _heapq_heapreplace_impl(PyObject *module, PyObject *heap, PyObject *item) |
226 | /*[clinic end generated code: output=82ea55be8fbe24b4 input=719202ac02ba10c8]*/ |
227 | { |
228 | return heapreplace_internal(heap, item, siftup); |
229 | } |
230 | |
231 | /*[clinic input] |
232 | _heapq.heappushpop |
233 | |
234 | heap: object(subclass_of='&PyList_Type') |
235 | item: object |
236 | / |
237 | |
238 | Push item on the heap, then pop and return the smallest item from the heap. |
239 | |
240 | The combined action runs more efficiently than heappush() followed by |
241 | a separate call to heappop(). |
242 | [clinic start generated code]*/ |
243 | |
244 | static PyObject * |
245 | _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) |
246 | /*[clinic end generated code: output=67231dc98ed5774f input=5dc701f1eb4a4aa7]*/ |
247 | { |
248 | PyObject *returnitem; |
249 | int cmp; |
250 | |
251 | if (PyList_GET_SIZE(heap) == 0) { |
252 | Py_INCREF(item); |
253 | return item; |
254 | } |
255 | |
256 | PyObject* top = PyList_GET_ITEM(heap, 0); |
257 | Py_INCREF(top); |
258 | cmp = PyObject_RichCompareBool(top, item, Py_LT); |
259 | Py_DECREF(top); |
260 | if (cmp < 0) |
261 | return NULL; |
262 | if (cmp == 0) { |
263 | Py_INCREF(item); |
264 | return item; |
265 | } |
266 | |
267 | if (PyList_GET_SIZE(heap) == 0) { |
268 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
269 | return NULL; |
270 | } |
271 | |
272 | returnitem = PyList_GET_ITEM(heap, 0); |
273 | Py_INCREF(item); |
274 | PyList_SET_ITEM(heap, 0, item); |
275 | if (siftup((PyListObject *)heap, 0)) { |
276 | Py_DECREF(returnitem); |
277 | return NULL; |
278 | } |
279 | return returnitem; |
280 | } |
281 | |
282 | static Py_ssize_t |
283 | keep_top_bit(Py_ssize_t n) |
284 | { |
285 | int i = 0; |
286 | |
287 | while (n > 1) { |
288 | n >>= 1; |
289 | i++; |
290 | } |
291 | return n << i; |
292 | } |
293 | |
294 | /* Cache friendly version of heapify() |
295 | ----------------------------------- |
296 | |
297 | Build-up a heap in O(n) time by performing siftup() operations |
298 | on nodes whose children are already heaps. |
299 | |
300 | The simplest way is to sift the nodes in reverse order from |
301 | n//2-1 to 0 inclusive. The downside is that children may be |
302 | out of cache by the time their parent is reached. |
303 | |
304 | A better way is to not wait for the children to go out of cache. |
305 | Once a sibling pair of child nodes have been sifted, immediately |
306 | sift their parent node (while the children are still in cache). |
307 | |
308 | Both ways build child heaps before their parents, so both ways |
309 | do the exact same number of comparisons and produce exactly |
310 | the same heap. The only difference is that the traversal |
311 | order is optimized for cache efficiency. |
312 | */ |
313 | |
314 | static PyObject * |
315 | cache_friendly_heapify(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) |
316 | { |
317 | Py_ssize_t i, j, m, mhalf, leftmost; |
318 | |
319 | m = PyList_GET_SIZE(heap) >> 1; /* index of first childless node */ |
320 | leftmost = keep_top_bit(m + 1) - 1; /* leftmost node in row of m */ |
321 | mhalf = m >> 1; /* parent of first childless node */ |
322 | |
323 | for (i = leftmost - 1 ; i >= mhalf ; i--) { |
324 | j = i; |
325 | while (1) { |
326 | if (siftup_func((PyListObject *)heap, j)) |
327 | return NULL; |
328 | if (!(j & 1)) |
329 | break; |
330 | j >>= 1; |
331 | } |
332 | } |
333 | |
334 | for (i = m - 1 ; i >= leftmost ; i--) { |
335 | j = i; |
336 | while (1) { |
337 | if (siftup_func((PyListObject *)heap, j)) |
338 | return NULL; |
339 | if (!(j & 1)) |
340 | break; |
341 | j >>= 1; |
342 | } |
343 | } |
344 | Py_RETURN_NONE; |
345 | } |
346 | |
347 | static PyObject * |
348 | heapify_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) |
349 | { |
350 | Py_ssize_t i, n; |
351 | |
352 | /* For heaps likely to be bigger than L1 cache, we use the cache |
353 | friendly heapify function. For smaller heaps that fit entirely |
354 | in cache, we prefer the simpler algorithm with less branching. |
355 | */ |
356 | n = PyList_GET_SIZE(heap); |
357 | if (n > 2500) |
358 | return cache_friendly_heapify(heap, siftup_func); |
359 | |
360 | /* Transform bottom-up. The largest index there's any point to |
361 | looking at is the largest with a child index in-range, so must |
362 | have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is |
363 | (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If |
364 | n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest, |
365 | and that's again n//2-1. |
366 | */ |
367 | for (i = (n >> 1) - 1 ; i >= 0 ; i--) |
368 | if (siftup_func((PyListObject *)heap, i)) |
369 | return NULL; |
370 | Py_RETURN_NONE; |
371 | } |
372 | |
373 | /*[clinic input] |
374 | _heapq.heapify |
375 | |
376 | heap: object(subclass_of='&PyList_Type') |
377 | / |
378 | |
379 | Transform list into a heap, in-place, in O(len(heap)) time. |
380 | [clinic start generated code]*/ |
381 | |
382 | static PyObject * |
383 | _heapq_heapify_impl(PyObject *module, PyObject *heap) |
384 | /*[clinic end generated code: output=e63a636fcf83d6d0 input=53bb7a2166febb73]*/ |
385 | { |
386 | return heapify_internal(heap, siftup); |
387 | } |
388 | |
389 | static int |
390 | siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) |
391 | { |
392 | PyObject *newitem, *parent, **arr; |
393 | Py_ssize_t parentpos, size; |
394 | int cmp; |
395 | |
396 | assert(PyList_Check(heap)); |
397 | size = PyList_GET_SIZE(heap); |
398 | if (pos >= size) { |
399 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
400 | return -1; |
401 | } |
402 | |
403 | /* Follow the path to the root, moving parents down until finding |
404 | a place newitem fits. */ |
405 | arr = _PyList_ITEMS(heap); |
406 | newitem = arr[pos]; |
407 | while (pos > startpos) { |
408 | parentpos = (pos - 1) >> 1; |
409 | parent = arr[parentpos]; |
410 | Py_INCREF(parent); |
411 | Py_INCREF(newitem); |
412 | cmp = PyObject_RichCompareBool(parent, newitem, Py_LT); |
413 | Py_DECREF(parent); |
414 | Py_DECREF(newitem); |
415 | if (cmp < 0) |
416 | return -1; |
417 | if (size != PyList_GET_SIZE(heap)) { |
418 | PyErr_SetString(PyExc_RuntimeError, |
419 | "list changed size during iteration" ); |
420 | return -1; |
421 | } |
422 | if (cmp == 0) |
423 | break; |
424 | arr = _PyList_ITEMS(heap); |
425 | parent = arr[parentpos]; |
426 | newitem = arr[pos]; |
427 | arr[parentpos] = newitem; |
428 | arr[pos] = parent; |
429 | pos = parentpos; |
430 | } |
431 | return 0; |
432 | } |
433 | |
434 | static int |
435 | siftup_max(PyListObject *heap, Py_ssize_t pos) |
436 | { |
437 | Py_ssize_t startpos, endpos, childpos, limit; |
438 | PyObject *tmp1, *tmp2, **arr; |
439 | int cmp; |
440 | |
441 | assert(PyList_Check(heap)); |
442 | endpos = PyList_GET_SIZE(heap); |
443 | startpos = pos; |
444 | if (pos >= endpos) { |
445 | PyErr_SetString(PyExc_IndexError, "index out of range" ); |
446 | return -1; |
447 | } |
448 | |
449 | /* Bubble up the smaller child until hitting a leaf. */ |
450 | arr = _PyList_ITEMS(heap); |
451 | limit = endpos >> 1; /* smallest pos that has no child */ |
452 | while (pos < limit) { |
453 | /* Set childpos to index of smaller child. */ |
454 | childpos = 2*pos + 1; /* leftmost child position */ |
455 | if (childpos + 1 < endpos) { |
456 | PyObject* a = arr[childpos + 1]; |
457 | PyObject* b = arr[childpos]; |
458 | Py_INCREF(a); |
459 | Py_INCREF(b); |
460 | cmp = PyObject_RichCompareBool(a, b, Py_LT); |
461 | Py_DECREF(a); |
462 | Py_DECREF(b); |
463 | if (cmp < 0) |
464 | return -1; |
465 | childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */ |
466 | arr = _PyList_ITEMS(heap); /* arr may have changed */ |
467 | if (endpos != PyList_GET_SIZE(heap)) { |
468 | PyErr_SetString(PyExc_RuntimeError, |
469 | "list changed size during iteration" ); |
470 | return -1; |
471 | } |
472 | } |
473 | /* Move the smaller child up. */ |
474 | tmp1 = arr[childpos]; |
475 | tmp2 = arr[pos]; |
476 | arr[childpos] = tmp2; |
477 | arr[pos] = tmp1; |
478 | pos = childpos; |
479 | } |
480 | /* Bubble it up to its final resting place (by sifting its parents down). */ |
481 | return siftdown_max(heap, startpos, pos); |
482 | } |
483 | |
484 | |
485 | /*[clinic input] |
486 | _heapq._heappop_max |
487 | |
488 | heap: object(subclass_of='&PyList_Type') |
489 | / |
490 | |
491 | Maxheap variant of heappop. |
492 | [clinic start generated code]*/ |
493 | |
494 | static PyObject * |
495 | _heapq__heappop_max_impl(PyObject *module, PyObject *heap) |
496 | /*[clinic end generated code: output=9e77aadd4e6a8760 input=362c06e1c7484793]*/ |
497 | { |
498 | return heappop_internal(heap, siftup_max); |
499 | } |
500 | |
501 | /*[clinic input] |
502 | _heapq._heapreplace_max |
503 | |
504 | heap: object(subclass_of='&PyList_Type') |
505 | item: object |
506 | / |
507 | |
508 | Maxheap variant of heapreplace. |
509 | [clinic start generated code]*/ |
510 | |
511 | static PyObject * |
512 | _heapq__heapreplace_max_impl(PyObject *module, PyObject *heap, |
513 | PyObject *item) |
514 | /*[clinic end generated code: output=8ad7545e4a5e8adb input=f2dd27cbadb948d7]*/ |
515 | { |
516 | return heapreplace_internal(heap, item, siftup_max); |
517 | } |
518 | |
519 | /*[clinic input] |
520 | _heapq._heapify_max |
521 | |
522 | heap: object(subclass_of='&PyList_Type') |
523 | / |
524 | |
525 | Maxheap variant of heapify. |
526 | [clinic start generated code]*/ |
527 | |
528 | static PyObject * |
529 | _heapq__heapify_max_impl(PyObject *module, PyObject *heap) |
530 | /*[clinic end generated code: output=2cb028beb4a8b65e input=c1f765ee69f124b8]*/ |
531 | { |
532 | return heapify_internal(heap, siftup_max); |
533 | } |
534 | |
535 | static PyMethodDef heapq_methods[] = { |
536 | _HEAPQ_HEAPPUSH_METHODDEF |
537 | _HEAPQ_HEAPPUSHPOP_METHODDEF |
538 | _HEAPQ_HEAPPOP_METHODDEF |
539 | _HEAPQ_HEAPREPLACE_METHODDEF |
540 | _HEAPQ_HEAPIFY_METHODDEF |
541 | _HEAPQ__HEAPPOP_MAX_METHODDEF |
542 | _HEAPQ__HEAPIFY_MAX_METHODDEF |
543 | _HEAPQ__HEAPREPLACE_MAX_METHODDEF |
544 | {NULL, NULL} /* sentinel */ |
545 | }; |
546 | |
547 | PyDoc_STRVAR(module_doc, |
548 | "Heap queue algorithm (a.k.a. priority queue).\n\ |
549 | \n\ |
550 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\ |
551 | all k, counting elements from 0. For the sake of comparison,\n\ |
552 | non-existing elements are considered to be infinite. The interesting\n\ |
553 | property of a heap is that a[0] is always its smallest element.\n\ |
554 | \n\ |
555 | Usage:\n\ |
556 | \n\ |
557 | heap = [] # creates an empty heap\n\ |
558 | heappush(heap, item) # pushes a new item on the heap\n\ |
559 | item = heappop(heap) # pops the smallest item from the heap\n\ |
560 | item = heap[0] # smallest item on the heap without popping it\n\ |
561 | heapify(x) # transforms list into a heap, in-place, in linear time\n\ |
562 | item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\ |
563 | # new item; the heap size is unchanged\n\ |
564 | \n\ |
565 | Our API differs from textbook heap algorithms as follows:\n\ |
566 | \n\ |
567 | - We use 0-based indexing. This makes the relationship between the\n\ |
568 | index for a node and the indexes for its children slightly less\n\ |
569 | obvious, but is more suitable since Python uses 0-based indexing.\n\ |
570 | \n\ |
571 | - Our heappop() method returns the smallest item, not the largest.\n\ |
572 | \n\ |
573 | These two make it possible to view the heap as a regular Python list\n\ |
574 | without surprises: heap[0] is the smallest item, and heap.sort()\n\ |
575 | maintains the heap invariant!\n" ); |
576 | |
577 | |
578 | PyDoc_STRVAR(__about__, |
579 | "Heap queues\n\ |
580 | \n\ |
581 | [explanation by Fran\xc3\xa7ois Pinard]\n\ |
582 | \n\ |
583 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\ |
584 | all k, counting elements from 0. For the sake of comparison,\n\ |
585 | non-existing elements are considered to be infinite. The interesting\n\ |
586 | property of a heap is that a[0] is always its smallest element.\n" |
587 | "\n\ |
588 | The strange invariant above is meant to be an efficient memory\n\ |
589 | representation for a tournament. The numbers below are `k', not a[k]:\n\ |
590 | \n\ |
591 | 0\n\ |
592 | \n\ |
593 | 1 2\n\ |
594 | \n\ |
595 | 3 4 5 6\n\ |
596 | \n\ |
597 | 7 8 9 10 11 12 13 14\n\ |
598 | \n\ |
599 | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\ |
600 | \n\ |
601 | \n\ |
602 | In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\ |
603 | a usual binary tournament we see in sports, each cell is the winner\n\ |
604 | over the two cells it tops, and we can trace the winner down the tree\n\ |
605 | to see all opponents s/he had. However, in many computer applications\n\ |
606 | of such tournaments, we do not need to trace the history of a winner.\n\ |
607 | To be more memory efficient, when a winner is promoted, we try to\n\ |
608 | replace it by something else at a lower level, and the rule becomes\n\ |
609 | that a cell and the two cells it tops contain three different items,\n\ |
610 | but the top cell \"wins\" over the two topped cells.\n" |
611 | "\n\ |
612 | If this heap invariant is protected at all time, index 0 is clearly\n\ |
613 | the overall winner. The simplest algorithmic way to remove it and\n\ |
614 | find the \"next\" winner is to move some loser (let's say cell 30 in the\n\ |
615 | diagram above) into the 0 position, and then percolate this new 0 down\n\ |
616 | the tree, exchanging values, until the invariant is re-established.\n\ |
617 | This is clearly logarithmic on the total number of items in the tree.\n\ |
618 | By iterating over all items, you get an O(n ln n) sort.\n" |
619 | "\n\ |
620 | A nice feature of this sort is that you can efficiently insert new\n\ |
621 | items while the sort is going on, provided that the inserted items are\n\ |
622 | not \"better\" than the last 0'th element you extracted. This is\n\ |
623 | especially useful in simulation contexts, where the tree holds all\n\ |
624 | incoming events, and the \"win\" condition means the smallest scheduled\n\ |
625 | time. When an event schedule other events for execution, they are\n\ |
626 | scheduled into the future, so they can easily go into the heap. So, a\n\ |
627 | heap is a good structure for implementing schedulers (this is what I\n\ |
628 | used for my MIDI sequencer :-).\n" |
629 | "\n\ |
630 | Various structures for implementing schedulers have been extensively\n\ |
631 | studied, and heaps are good for this, as they are reasonably speedy,\n\ |
632 | the speed is almost constant, and the worst case is not much different\n\ |
633 | than the average case. However, there are other representations which\n\ |
634 | are more efficient overall, yet the worst cases might be terrible.\n" |
635 | "\n\ |
636 | Heaps are also very useful in big disk sorts. You most probably all\n\ |
637 | know that a big sort implies producing \"runs\" (which are pre-sorted\n\ |
638 | sequences, which size is usually related to the amount of CPU memory),\n\ |
639 | followed by a merging passes for these runs, which merging is often\n\ |
640 | very cleverly organised[1]. It is very important that the initial\n\ |
641 | sort produces the longest runs possible. Tournaments are a good way\n\ |
642 | to that. If, using all the memory available to hold a tournament, you\n\ |
643 | replace and percolate items that happen to fit the current run, you'll\n\ |
644 | produce runs which are twice the size of the memory for random input,\n\ |
645 | and much better for input fuzzily ordered.\n" |
646 | "\n\ |
647 | Moreover, if you output the 0'th item on disk and get an input which\n\ |
648 | may not fit in the current tournament (because the value \"wins\" over\n\ |
649 | the last output value), it cannot fit in the heap, so the size of the\n\ |
650 | heap decreases. The freed memory could be cleverly reused immediately\n\ |
651 | for progressively building a second heap, which grows at exactly the\n\ |
652 | same rate the first heap is melting. When the first heap completely\n\ |
653 | vanishes, you switch heaps and start a new run. Clever and quite\n\ |
654 | effective!\n\ |
655 | \n\ |
656 | In a word, heaps are useful memory structures to know. I use them in\n\ |
657 | a few applications, and I think it is good to keep a `heap' module\n\ |
658 | around. :-)\n" |
659 | "\n\ |
660 | --------------------\n\ |
661 | [1] The disk balancing algorithms which are current, nowadays, are\n\ |
662 | more annoying than clever, and this is a consequence of the seeking\n\ |
663 | capabilities of the disks. On devices which cannot seek, like big\n\ |
664 | tape drives, the story was quite different, and one had to be very\n\ |
665 | clever to ensure (far in advance) that each tape movement will be the\n\ |
666 | most effective possible (that is, will best participate at\n\ |
667 | \"progressing\" the merge). Some tapes were even able to read\n\ |
668 | backwards, and this was also used to avoid the rewinding time.\n\ |
669 | Believe me, real good tape sorts were quite spectacular to watch!\n\ |
670 | From all times, sorting has always been a Great Art! :-)\n" ); |
671 | |
672 | |
673 | static int |
674 | heapq_exec(PyObject *m) |
675 | { |
676 | PyObject *about = PyUnicode_FromString(__about__); |
677 | if (PyModule_AddObject(m, "__about__" , about) < 0) { |
678 | Py_DECREF(about); |
679 | return -1; |
680 | } |
681 | return 0; |
682 | } |
683 | |
684 | static struct PyModuleDef_Slot heapq_slots[] = { |
685 | {Py_mod_exec, heapq_exec}, |
686 | {0, NULL} |
687 | }; |
688 | |
689 | static struct PyModuleDef _heapqmodule = { |
690 | PyModuleDef_HEAD_INIT, |
691 | "_heapq" , |
692 | module_doc, |
693 | 0, |
694 | heapq_methods, |
695 | heapq_slots, |
696 | NULL, |
697 | NULL, |
698 | NULL |
699 | }; |
700 | |
701 | PyMODINIT_FUNC |
702 | PyInit__heapq(void) |
703 | { |
704 | return PyModuleDef_Init(&_heapqmodule); |
705 | } |
706 | |