Mercurial > octave-nkf
annotate liboctave/oct-sort.cc @ 8206:0168d22e6bba
fix sorting of non-POD objects
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Fri, 10 Oct 2008 08:02:24 +0200 |
parents | 30b952e90c29 |
children | e2b4c19c455c |
rev | line source |
---|---|
4851 | 1 /* |
7017 | 2 Copyright (C) 2003, 2004, 2005, 2006, 2007 David Bateman |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
3 Copyright (C) 2008 Jaroslav Hajek |
4851 | 4 |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
7016 | 9 Free Software Foundation; either version 3 of the License, or (at your |
10 option) any later version. | |
4851 | 11 |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
7016 | 18 along with Octave; see the file COPYING. If not, see |
19 <http://www.gnu.org/licenses/>. | |
4851 | 20 |
21 Code stolen in large part from Python's, listobject.c, which itself had | |
22 no license header. However, thanks to Tim Peters for the parts of the | |
23 code I ripped-off. | |
24 | |
25 As required in the Python license the short description of the changes | |
26 made are | |
27 | |
28 * convert the sorting code in listobject.cc into a generic class, | |
29 replacing PyObject* with the type of the class T. | |
30 | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
31 * replaced usages of malloc, free, memcpy and memmove by standard C++ |
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
32 new [], delete [] and std::copy. Note that replacing memmove by std::copy |
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
33 is possible if the destination starts before the source. |
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
34 |
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
35 |
4851 | 36 The Python license is |
37 | |
38 PSF LICENSE AGREEMENT FOR PYTHON 2.3 | |
39 -------------------------------------- | |
40 | |
41 1. This LICENSE AGREEMENT is between the Python Software Foundation | |
42 ("PSF"), and the Individual or Organization ("Licensee") accessing and | |
43 otherwise using Python 2.3 software in source or binary form and its | |
44 associated documentation. | |
45 | |
46 2. Subject to the terms and conditions of this License Agreement, PSF | |
47 hereby grants Licensee a nonexclusive, royalty-free, world-wide | |
48 license to reproduce, analyze, test, perform and/or display publicly, | |
49 prepare derivative works, distribute, and otherwise use Python 2.3 | |
50 alone or in any derivative version, provided, however, that PSF's | |
51 License Agreement and PSF's notice of copyright, i.e., "Copyright (c) | |
52 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are | |
53 retained in Python 2.3 alone or in any derivative version prepared by | |
54 Licensee. | |
55 | |
56 3. In the event Licensee prepares a derivative work that is based on | |
57 or incorporates Python 2.3 or any part thereof, and wants to make | |
58 the derivative work available to others as provided herein, then | |
59 Licensee hereby agrees to include in any such work a brief summary of | |
60 the changes made to Python 2.3. | |
61 | |
62 4. PSF is making Python 2.3 available to Licensee on an "AS IS" | |
63 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR | |
64 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND | |
65 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS | |
66 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT | |
67 INFRINGE ANY THIRD PARTY RIGHTS. | |
68 | |
69 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON | |
70 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS | |
71 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, | |
72 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. | |
73 | |
74 6. This License Agreement will automatically terminate upon a material | |
75 breach of its terms and conditions. | |
76 | |
77 7. Nothing in this License Agreement shall be deemed to create any | |
78 relationship of agency, partnership, or joint venture between PSF and | |
79 Licensee. This License Agreement does not grant permission to use PSF | |
80 trademarks or trade name in a trademark sense to endorse or promote | |
81 products or services of Licensee, or any third party. | |
82 | |
83 8. By copying, installing or otherwise using Python 2.3, Licensee | |
84 agrees to be bound by the terms and conditions of this License | |
85 Agreement. | |
86 */ | |
87 | |
88 #ifdef HAVE_CONFIG_H | |
89 #include <config.h> | |
90 #endif | |
91 | |
5883 | 92 #include <cassert> |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
93 #include <algorithm> |
7480 | 94 #include <cstring> |
5883 | 95 |
4851 | 96 #include "lo-mappers.h" |
97 #include "quit.h" | |
98 #include "oct-sort.h" | |
99 | |
7433 | 100 #ifndef IFLT |
7234 | 101 #define IFLT(a,b) if (compare ? compare ((a), (b)) : ((a) < (b))) |
7433 | 102 #endif |
4851 | 103 |
104 template <class T> | |
7234 | 105 octave_sort<T>::octave_sort (void) : compare (0) |
4851 | 106 { |
7234 | 107 merge_init (); |
4851 | 108 merge_getmem (1024); |
109 } | |
110 | |
111 template <class T> | |
112 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) | |
113 { | |
4998 | 114 merge_init (); |
4851 | 115 merge_getmem (1024); |
116 } | |
117 | |
118 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ | |
119 template <class T> | |
120 void | |
7234 | 121 octave_sort<T>::reverse_slice (T *lo, T *hi) |
4851 | 122 { |
123 --hi; | |
124 while (lo < hi) | |
125 { | |
126 T t = *lo; | |
127 *lo = *hi; | |
128 *hi = t; | |
129 ++lo; | |
130 --hi; | |
131 } | |
132 } | |
133 | |
134 template <class T> | |
135 void | |
136 octave_sort<T>::binarysort (T *lo, T *hi, T *start) | |
137 { | |
6959 | 138 T *l, *p, *r; |
139 T pivot; | |
4851 | 140 |
141 if (lo == start) | |
142 ++start; | |
143 | |
144 for (; start < hi; ++start) | |
145 { | |
146 /* set l to where *start belongs */ | |
147 l = lo; | |
148 r = start; | |
149 pivot = *r; | |
150 /* Invariants: | |
151 * pivot >= all in [lo, l). | |
152 * pivot < all in [r, start). | |
153 * The second is vacuously true at the start. | |
154 */ | |
155 do | |
156 { | |
157 p = l + ((r - l) >> 1); | |
158 IFLT (pivot, *p) | |
159 r = p; | |
160 else | |
161 l = p+1; | |
162 } | |
163 while (l < r); | |
164 /* The invariants still hold, so pivot >= all in [lo, l) and | |
165 pivot < all in [l, start), so pivot belongs at l. Note | |
166 that if there are elements equal to pivot, l points to the | |
167 first slot after them -- that's why this sort is stable. | |
168 Slide over to make room. | |
169 Caution: using memmove is much slower under MSVC 5; | |
170 we're not usually moving many slots. */ | |
171 for (p = start; p > l; --p) | |
172 *p = *(p-1); | |
173 *l = pivot; | |
174 } | |
175 | |
176 return; | |
177 } | |
178 | |
179 /* | |
180 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi | |
181 is required on entry. "A run" is the longest ascending sequence, with | |
182 | |
183 lo[0] <= lo[1] <= lo[2] <= ... | |
184 | |
185 or the longest descending sequence, with | |
186 | |
187 lo[0] > lo[1] > lo[2] > ... | |
188 | |
7929 | 189 DESCENDING is set to false in the former case, or to true in the latter. |
4851 | 190 For its intended use in a stable mergesort, the strictness of the defn of |
191 "descending" is needed so that the caller can safely reverse a descending | |
192 sequence without violating stability (strict > ensures there are no equal | |
193 elements to get out of order). | |
194 | |
195 Returns -1 in case of error. | |
196 */ | |
197 template <class T> | |
7433 | 198 octave_idx_type |
7929 | 199 octave_sort<T>::count_run (T *lo, T *hi, bool& descending) |
4851 | 200 { |
7433 | 201 octave_idx_type n; |
4851 | 202 |
7929 | 203 descending = false; |
4851 | 204 ++lo; |
205 if (lo == hi) | |
206 return 1; | |
207 | |
208 n = 2; | |
209 | |
210 IFLT (*lo, *(lo-1)) | |
211 { | |
7929 | 212 descending = true; |
4851 | 213 for (lo = lo+1; lo < hi; ++lo, ++n) |
214 { | |
215 IFLT (*lo, *(lo-1)) | |
216 ; | |
217 else | |
218 break; | |
219 } | |
220 } | |
221 else | |
222 { | |
223 for (lo = lo+1; lo < hi; ++lo, ++n) | |
224 { | |
225 IFLT (*lo, *(lo-1)) | |
226 break; | |
227 } | |
228 } | |
229 | |
230 return n; | |
231 } | |
232 | |
233 /* | |
234 Locate the proper position of key in a sorted vector; if the vector contains | |
235 an element equal to key, return the position immediately to the left of | |
236 the leftmost equal element. [gallop_right() does the same except returns | |
237 the position to the right of the rightmost equal element (if any).] | |
238 | |
239 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. | |
240 | |
241 "hint" is an index at which to begin the search, 0 <= hint < n. The closer | |
242 hint is to the final result, the faster this runs. | |
243 | |
244 The return value is the int k in 0..n such that | |
245 | |
246 a[k-1] < key <= a[k] | |
247 | |
248 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, | |
249 key belongs at index k; or, IOW, the first k elements of a should precede | |
250 key, and the last n-k should follow key. | |
251 | |
252 Returns -1 on error. See listsort.txt for info on the method. | |
253 */ | |
254 template <class T> | |
7433 | 255 octave_idx_type |
256 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint) | |
4851 | 257 { |
7433 | 258 octave_idx_type ofs; |
259 octave_idx_type lastofs; | |
260 octave_idx_type k; | |
4851 | 261 |
262 a += hint; | |
263 lastofs = 0; | |
264 ofs = 1; | |
265 IFLT (*a, key) | |
266 { | |
267 /* a[hint] < key -- gallop right, until | |
268 * a[hint + lastofs] < key <= a[hint + ofs] | |
269 */ | |
7433 | 270 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 271 while (ofs < maxofs) |
272 { | |
273 IFLT (a[ofs], key) | |
274 { | |
275 lastofs = ofs; | |
276 ofs = (ofs << 1) + 1; | |
277 if (ofs <= 0) /* int overflow */ | |
278 ofs = maxofs; | |
279 } | |
280 else /* key <= a[hint + ofs] */ | |
281 break; | |
282 } | |
283 if (ofs > maxofs) | |
284 ofs = maxofs; | |
285 /* Translate back to offsets relative to &a[0]. */ | |
286 lastofs += hint; | |
287 ofs += hint; | |
288 } | |
289 else | |
290 { | |
291 /* key <= a[hint] -- gallop left, until | |
292 * a[hint - ofs] < key <= a[hint - lastofs] | |
293 */ | |
7433 | 294 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 295 while (ofs < maxofs) |
296 { | |
297 IFLT (*(a-ofs), key) | |
298 break; | |
299 /* key <= a[hint - ofs] */ | |
300 lastofs = ofs; | |
301 ofs = (ofs << 1) + 1; | |
302 if (ofs <= 0) /* int overflow */ | |
303 ofs = maxofs; | |
304 } | |
305 if (ofs > maxofs) | |
306 ofs = maxofs; | |
307 /* Translate back to positive offsets relative to &a[0]. */ | |
308 k = lastofs; | |
309 lastofs = hint - ofs; | |
310 ofs = hint - k; | |
311 } | |
312 a -= hint; | |
313 | |
314 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the | |
315 * right of lastofs but no farther right than ofs. Do a binary | |
316 * search, with invariant a[lastofs-1] < key <= a[ofs]. | |
317 */ | |
318 ++lastofs; | |
319 while (lastofs < ofs) | |
320 { | |
7433 | 321 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 322 |
323 IFLT (a[m], key) | |
324 lastofs = m+1; /* a[m] < key */ | |
325 else | |
326 ofs = m; /* key <= a[m] */ | |
327 } | |
7234 | 328 |
4851 | 329 return ofs; |
330 } | |
331 | |
332 /* | |
333 Exactly like gallop_left(), except that if key already exists in a[0:n], | |
334 finds the position immediately to the right of the rightmost equal value. | |
335 | |
336 The return value is the int k in 0..n such that | |
337 | |
338 a[k-1] <= key < a[k] | |
339 | |
340 or -1 if error. | |
341 | |
342 The code duplication is massive, but this is enough different given that | |
343 we're sticking to "<" comparisons that it's much harder to follow if | |
344 written as one routine with yet another "left or right?" flag. | |
345 */ | |
346 template <class T> | |
7433 | 347 octave_idx_type |
348 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint) | |
4851 | 349 { |
7433 | 350 octave_idx_type ofs; |
351 octave_idx_type lastofs; | |
352 octave_idx_type k; | |
4851 | 353 |
354 a += hint; | |
355 lastofs = 0; | |
356 ofs = 1; | |
357 IFLT (key, *a) | |
358 { | |
359 /* key < a[hint] -- gallop left, until | |
360 * a[hint - ofs] <= key < a[hint - lastofs] | |
361 */ | |
7433 | 362 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 363 while (ofs < maxofs) |
364 { | |
365 IFLT (key, *(a-ofs)) | |
366 { | |
367 lastofs = ofs; | |
368 ofs = (ofs << 1) + 1; | |
369 if (ofs <= 0) /* int overflow */ | |
370 ofs = maxofs; | |
371 } | |
372 else /* a[hint - ofs] <= key */ | |
373 break; | |
374 } | |
375 if (ofs > maxofs) | |
376 ofs = maxofs; | |
377 /* Translate back to positive offsets relative to &a[0]. */ | |
378 k = lastofs; | |
379 lastofs = hint - ofs; | |
380 ofs = hint - k; | |
381 } | |
382 else | |
383 { | |
384 /* a[hint] <= key -- gallop right, until | |
385 * a[hint + lastofs] <= key < a[hint + ofs] | |
386 */ | |
7433 | 387 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 388 while (ofs < maxofs) |
389 { | |
390 IFLT (key, a[ofs]) | |
391 break; | |
392 /* a[hint + ofs] <= key */ | |
393 lastofs = ofs; | |
394 ofs = (ofs << 1) + 1; | |
395 if (ofs <= 0) /* int overflow */ | |
396 ofs = maxofs; | |
397 } | |
398 if (ofs > maxofs) | |
399 ofs = maxofs; | |
400 /* Translate back to offsets relative to &a[0]. */ | |
401 lastofs += hint; | |
402 ofs += hint; | |
403 } | |
404 a -= hint; | |
405 | |
406 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the | |
407 * right of lastofs but no farther right than ofs. Do a binary | |
408 * search, with invariant a[lastofs-1] <= key < a[ofs]. | |
409 */ | |
410 ++lastofs; | |
411 while (lastofs < ofs) | |
412 { | |
7433 | 413 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 414 |
415 IFLT (key, a[m]) | |
416 ofs = m; /* key < a[m] */ | |
417 else | |
418 lastofs = m+1; /* a[m] <= key */ | |
419 } | |
7234 | 420 |
4851 | 421 return ofs; |
422 } | |
423 | |
424 /* Conceptually a MergeState's constructor. */ | |
425 template <class T> | |
426 void | |
7234 | 427 octave_sort<T>::merge_init (void) |
4851 | 428 { |
7234 | 429 ms.a = 0; |
4851 | 430 ms.alloced = 0; |
431 ms.n = 0; | |
432 ms.min_gallop = MIN_GALLOP; | |
433 } | |
434 | |
435 /* Free all the temp memory owned by the MergeState. This must be called | |
436 * when you're done with a MergeState, and may be called before then if | |
437 * you want to free the temp memory early. | |
438 */ | |
439 template <class T> | |
440 void | |
7234 | 441 octave_sort<T>::merge_freemem (void) |
4851 | 442 { |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
443 delete [] ms.a; |
4851 | 444 ms.alloced = 0; |
7234 | 445 ms.a = 0; |
4851 | 446 } |
447 | |
7433 | 448 static inline octave_idx_type |
449 roundupsize (octave_idx_type n) | |
4851 | 450 { |
451 unsigned int nbits = 3; | |
7433 | 452 octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8; |
4851 | 453 |
454 /* Round up: | |
455 * If n < 256, to a multiple of 8. | |
456 * If n < 2048, to a multiple of 64. | |
457 * If n < 16384, to a multiple of 512. | |
458 * If n < 131072, to a multiple of 4096. | |
459 * If n < 1048576, to a multiple of 32768. | |
460 * If n < 8388608, to a multiple of 262144. | |
461 * If n < 67108864, to a multiple of 2097152. | |
462 * If n < 536870912, to a multiple of 16777216. | |
463 * ... | |
464 * If n < 2**(5+3*i), to a multiple of 2**(3*i). | |
465 * | |
466 * This over-allocates proportional to the list size, making room | |
467 * for additional growth. The over-allocation is mild, but is | |
468 * enough to give linear-time amortized behavior over a long | |
469 * sequence of appends() in the presence of a poorly-performing | |
470 * system realloc() (which is a reality, e.g., across all flavors | |
471 * of Windows, with Win9x behavior being particularly bad -- and | |
472 * we've still got address space fragmentation problems on Win9x | |
473 * even with this scheme, although it requires much longer lists to | |
474 * provoke them than it used to). | |
475 */ | |
7234 | 476 while (n2) |
477 { | |
478 n2 >>= 3; | |
479 nbits += 3; | |
480 } | |
481 | |
4851 | 482 return ((n >> nbits) + 1) << nbits; |
483 } | |
484 | |
485 /* Ensure enough temp memory for 'need' array slots is available. | |
486 * Returns 0 on success and -1 if the memory can't be gotten. | |
487 */ | |
488 template <class T> | |
489 int | |
7433 | 490 octave_sort<T>::merge_getmem (octave_idx_type need) |
4851 | 491 { |
492 if (need <= ms.alloced) | |
493 return 0; | |
494 | |
7234 | 495 need = roundupsize (need); |
4851 | 496 /* Don't realloc! That can cost cycles to copy the old data, but |
497 * we don't care what's in the block. | |
498 */ | |
7234 | 499 merge_freemem (); |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
500 ms.a = new T[need]; |
7234 | 501 if (ms.a) |
502 { | |
503 ms.alloced = need; | |
504 return 0; | |
505 } | |
506 merge_freemem (); /* reset to sane state */ | |
507 | |
4851 | 508 return -1; |
509 } | |
510 | |
7234 | 511 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : merge_getmem (NEED)) |
4851 | 512 |
513 /* Merge the na elements starting at pa with the nb elements starting at pb | |
514 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
515 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
516 * merge, and should have na <= nb. See listsort.txt for more info. | |
517 * Return 0 if successful, -1 if error. | |
518 */ | |
519 template <class T> | |
520 int | |
7433 | 521 octave_sort<T>::merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851 | 522 { |
7433 | 523 octave_idx_type k; |
4851 | 524 T *dest; |
525 int result = -1; /* guilty until proved innocent */ | |
7433 | 526 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 527 |
7234 | 528 if (MERGE_GETMEM (na) < 0) |
4851 | 529 return -1; |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
530 std::copy (pa, pa + na, ms.a); |
4851 | 531 dest = pa; |
532 pa = ms.a; | |
533 | |
534 *dest++ = *pb++; | |
535 --nb; | |
536 if (nb == 0) | |
537 goto Succeed; | |
538 if (na == 1) | |
539 goto CopyB; | |
540 | |
7234 | 541 for (;;) |
542 { | |
7433 | 543 octave_idx_type acount = 0; /* # of times A won in a row */ |
544 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
7234 | 545 |
546 /* Do the straightforward thing until (if ever) one run | |
547 * appears to win consistently. | |
548 */ | |
549 for (;;) | |
550 { | |
4851 | 551 |
7234 | 552 IFLT (*pb, *pa) |
553 { | |
554 *dest++ = *pb++; | |
555 ++bcount; | |
556 acount = 0; | |
557 --nb; | |
558 if (nb == 0) | |
559 goto Succeed; | |
560 if (bcount >= min_gallop) | |
561 break; | |
562 } | |
563 else | |
564 { | |
565 *dest++ = *pa++; | |
566 ++acount; | |
567 bcount = 0; | |
568 --na; | |
569 if (na == 1) | |
570 goto CopyB; | |
571 if (acount >= min_gallop) | |
572 break; | |
573 } | |
574 } | |
4851 | 575 |
7234 | 576 /* One run is winning so consistently that galloping may |
577 * be a huge win. So try that, and continue galloping until | |
578 * (if ever) neither run appears to be winning consistently | |
579 * anymore. | |
580 */ | |
581 ++min_gallop; | |
582 do | |
4851 | 583 { |
7234 | 584 min_gallop -= min_gallop > 1; |
585 ms.min_gallop = min_gallop; | |
586 k = gallop_right (*pb, pa, na, 0); | |
587 acount = k; | |
588 if (k) | |
589 { | |
590 if (k < 0) | |
591 goto Fail; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
592 std::copy (pa, pa + k, dest); |
7234 | 593 dest += k; |
594 pa += k; | |
595 na -= k; | |
596 if (na == 1) | |
597 goto CopyB; | |
598 /* na==0 is impossible now if the comparison | |
599 * function is consistent, but we can't assume | |
600 * that it is. | |
601 */ | |
602 if (na == 0) | |
603 goto Succeed; | |
604 } | |
4851 | 605 *dest++ = *pb++; |
606 --nb; | |
607 if (nb == 0) | |
608 goto Succeed; | |
7234 | 609 |
610 k = gallop_left (*pa, pb, nb, 0); | |
611 bcount = k; | |
612 if (k) | |
613 { | |
614 if (k < 0) | |
615 goto Fail; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
616 std::copy (pb, pb + k, dest); |
7234 | 617 dest += k; |
618 pb += k; | |
619 nb -= k; | |
620 if (nb == 0) | |
621 goto Succeed; | |
622 } | |
4851 | 623 *dest++ = *pa++; |
624 --na; | |
625 if (na == 1) | |
626 goto CopyB; | |
627 } | |
7234 | 628 while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
629 | |
630 ++min_gallop; /* penalize it for leaving galloping mode */ | |
631 ms.min_gallop = min_gallop; | |
4851 | 632 } |
633 | |
634 Succeed: | |
635 result = 0; | |
7234 | 636 |
4851 | 637 Fail: |
638 if (na) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
639 std::copy (pa, pa + na, dest); |
4851 | 640 return result; |
7234 | 641 |
4851 | 642 CopyB: |
643 /* The last element of pa belongs at the end of the merge. */ | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
644 std::copy (pb, pb + nb, dest); |
4851 | 645 dest[nb] = *pa; |
7234 | 646 |
4851 | 647 return 0; |
648 } | |
649 | |
650 /* Merge the na elements starting at pa with the nb elements starting at pb | |
651 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
652 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
653 * merge, and should have na >= nb. See listsort.txt for more info. | |
654 * Return 0 if successful, -1 if error. | |
655 */ | |
656 template <class T> | |
657 int | |
7433 | 658 octave_sort<T>::merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851 | 659 { |
7433 | 660 octave_idx_type k; |
4851 | 661 T *dest; |
662 int result = -1; /* guilty until proved innocent */ | |
663 T *basea; | |
664 T *baseb; | |
7433 | 665 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 666 |
7234 | 667 if (MERGE_GETMEM (nb) < 0) |
4851 | 668 return -1; |
669 dest = pb + nb - 1; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
670 std::copy (pb, pb + nb, ms.a); |
4851 | 671 basea = pa; |
672 baseb = ms.a; | |
673 pb = ms.a + nb - 1; | |
674 pa += na - 1; | |
675 | |
676 *dest-- = *pa--; | |
677 --na; | |
678 if (na == 0) | |
679 goto Succeed; | |
680 if (nb == 1) | |
681 goto CopyA; | |
682 | |
683 for (;;) | |
684 { | |
7433 | 685 octave_idx_type acount = 0; /* # of times A won in a row */ |
686 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
4851 | 687 |
688 /* Do the straightforward thing until (if ever) one run | |
689 * appears to win consistently. | |
690 */ | |
691 for (;;) | |
692 { | |
693 IFLT (*pb, *pa) | |
694 { | |
695 *dest-- = *pa--; | |
696 ++acount; | |
697 bcount = 0; | |
698 --na; | |
699 if (na == 0) | |
700 goto Succeed; | |
701 if (acount >= min_gallop) | |
702 break; | |
703 } | |
704 else | |
705 { | |
706 *dest-- = *pb--; | |
707 ++bcount; | |
708 acount = 0; | |
709 --nb; | |
710 if (nb == 1) | |
711 goto CopyA; | |
712 if (bcount >= min_gallop) | |
713 break; | |
714 } | |
715 } | |
716 | |
717 /* One run is winning so consistently that galloping may | |
718 * be a huge win. So try that, and continue galloping until | |
719 * (if ever) neither run appears to be winning consistently | |
720 * anymore. | |
721 */ | |
722 ++min_gallop; | |
723 do | |
724 { | |
725 min_gallop -= min_gallop > 1; | |
726 ms.min_gallop = min_gallop; | |
7234 | 727 k = gallop_right (*pb, basea, na, na-1); |
4851 | 728 if (k < 0) |
729 goto Fail; | |
730 k = na - k; | |
731 acount = k; | |
732 if (k) | |
733 { | |
734 dest -= k; | |
735 pa -= k; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
736 std::copy (pa+1, pa+1 + k, dest+1); |
4851 | 737 na -= k; |
738 if (na == 0) | |
739 goto Succeed; | |
740 } | |
741 *dest-- = *pb--; | |
742 --nb; | |
743 if (nb == 1) | |
744 goto CopyA; | |
745 | |
7234 | 746 k = gallop_left (*pa, baseb, nb, nb-1); |
4851 | 747 if (k < 0) |
748 goto Fail; | |
749 k = nb - k; | |
750 bcount = k; | |
751 if (k) | |
752 { | |
753 dest -= k; | |
754 pb -= k; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
755 std::copy (pb+1, pb+1 + k, dest+1); |
4851 | 756 nb -= k; |
757 if (nb == 1) | |
758 goto CopyA; | |
759 /* nb==0 is impossible now if the comparison | |
760 * function is consistent, but we can't assume | |
761 * that it is. | |
762 */ | |
763 if (nb == 0) | |
764 goto Succeed; | |
765 } | |
766 *dest-- = *pa--; | |
767 --na; | |
768 if (na == 0) | |
769 goto Succeed; | |
770 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
771 ++min_gallop; /* penalize it for leaving galloping mode */ | |
772 ms.min_gallop = min_gallop; | |
773 } | |
7234 | 774 |
4851 | 775 Succeed: |
776 result = 0; | |
7234 | 777 |
4851 | 778 Fail: |
779 if (nb) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
780 std::copy (baseb, baseb + nb, dest-(nb-1)); |
4851 | 781 return result; |
7234 | 782 |
4851 | 783 CopyA: |
784 /* The first element of pb belongs at the front of the merge. */ | |
785 dest -= na; | |
786 pa -= na; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
787 std::copy (pa+1, pa+1 + na, dest+1); |
4851 | 788 *dest = *pb; |
7234 | 789 |
4851 | 790 return 0; |
791 } | |
792 | |
793 /* Merge the two runs at stack indices i and i+1. | |
794 * Returns 0 on success, -1 on error. | |
795 */ | |
796 template <class T> | |
797 int | |
7433 | 798 octave_sort<T>::merge_at (octave_idx_type i) |
4851 | 799 { |
800 T *pa, *pb; | |
7433 | 801 octave_idx_type na, nb; |
802 octave_idx_type k; | |
4851 | 803 |
804 pa = ms.pending[i].base; | |
805 na = ms.pending[i].len; | |
806 pb = ms.pending[i+1].base; | |
807 nb = ms.pending[i+1].len; | |
808 | |
809 /* Record the length of the combined runs; if i is the 3rd-last | |
810 * run now, also slide over the last run (which isn't involved | |
811 * in this merge). The current run i+1 goes away in any case. | |
812 */ | |
813 ms.pending[i].len = na + nb; | |
814 if (i == ms.n - 3) | |
815 ms.pending[i+1] = ms.pending[i+2]; | |
816 --ms.n; | |
817 | |
818 /* Where does b start in a? Elements in a before that can be | |
819 * ignored (already in place). | |
820 */ | |
7234 | 821 k = gallop_right (*pb, pa, na, 0); |
4851 | 822 if (k < 0) |
823 return -1; | |
824 pa += k; | |
825 na -= k; | |
826 if (na == 0) | |
827 return 0; | |
828 | |
829 /* Where does a end in b? Elements in b after that can be | |
830 * ignored (already in place). | |
831 */ | |
7234 | 832 nb = gallop_left (pa[na-1], pb, nb, nb-1); |
4851 | 833 if (nb <= 0) |
834 return nb; | |
835 | |
836 /* Merge what remains of the runs, using a temp array with | |
837 * min(na, nb) elements. | |
838 */ | |
839 if (na <= nb) | |
7234 | 840 return merge_lo (pa, na, pb, nb); |
4851 | 841 else |
7234 | 842 return merge_hi (pa, na, pb, nb); |
4851 | 843 } |
844 | |
845 /* Examine the stack of runs waiting to be merged, merging adjacent runs | |
846 * until the stack invariants are re-established: | |
847 * | |
848 * 1. len[-3] > len[-2] + len[-1] | |
849 * 2. len[-2] > len[-1] | |
850 * | |
851 * See listsort.txt for more info. | |
852 * | |
853 * Returns 0 on success, -1 on error. | |
854 */ | |
855 template <class T> | |
856 int | |
7234 | 857 octave_sort<T>::merge_collapse (void) |
4851 | 858 { |
859 struct s_slice *p = ms.pending; | |
860 | |
861 while (ms.n > 1) | |
862 { | |
7433 | 863 octave_idx_type n = ms.n - 2; |
4851 | 864 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) |
865 { | |
866 if (p[n-1].len < p[n+1].len) | |
867 --n; | |
7234 | 868 if (merge_at (n) < 0) |
4851 | 869 return -1; |
870 } | |
871 else if (p[n].len <= p[n+1].len) | |
872 { | |
7234 | 873 if (merge_at (n) < 0) |
4851 | 874 return -1; |
875 } | |
876 else | |
877 break; | |
878 } | |
7234 | 879 |
4851 | 880 return 0; |
881 } | |
882 | |
883 /* Regardless of invariants, merge all runs on the stack until only one | |
884 * remains. This is used at the end of the mergesort. | |
885 * | |
886 * Returns 0 on success, -1 on error. | |
887 */ | |
888 template <class T> | |
889 int | |
7234 | 890 octave_sort<T>::merge_force_collapse (void) |
4851 | 891 { |
892 struct s_slice *p = ms.pending; | |
893 | |
894 while (ms.n > 1) | |
895 { | |
7433 | 896 octave_idx_type n = ms.n - 2; |
4851 | 897 if (n > 0 && p[n-1].len < p[n+1].len) |
898 --n; | |
7234 | 899 if (merge_at (n) < 0) |
4851 | 900 return -1; |
901 } | |
7234 | 902 |
4851 | 903 return 0; |
904 } | |
905 | |
906 /* Compute a good value for the minimum run length; natural runs shorter | |
907 * than this are boosted artificially via binary insertion. | |
908 * | |
909 * If n < 64, return n (it's too small to bother with fancy stuff). | |
910 * Else if n is an exact power of 2, return 32. | |
911 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but | |
912 * strictly less than, an exact power of 2. | |
913 * | |
914 * See listsort.txt for more info. | |
915 */ | |
916 template <class T> | |
7433 | 917 octave_idx_type |
918 octave_sort<T>::merge_compute_minrun (octave_idx_type n) | |
4851 | 919 { |
7433 | 920 octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */ |
4851 | 921 |
7234 | 922 while (n >= 64) |
923 { | |
924 r |= n & 1; | |
925 n >>= 1; | |
926 } | |
927 | |
4851 | 928 return n + r; |
929 } | |
930 | |
931 template <class T> | |
932 void | |
7433 | 933 octave_sort<T>::sort (T *v, octave_idx_type elements) |
4851 | 934 { |
935 /* Re-initialize the Mergestate as this might be the second time called */ | |
936 ms.n = 0; | |
937 ms.min_gallop = MIN_GALLOP; | |
938 | |
939 if (elements > 1) | |
940 { | |
7433 | 941 octave_idx_type nremaining = elements; |
4851 | 942 T *lo = v; |
943 T *hi = v + elements; | |
944 | |
945 /* March over the array once, left to right, finding natural runs, | |
946 * and extending short natural runs to minrun elements. | |
947 */ | |
7433 | 948 octave_idx_type minrun = merge_compute_minrun (nremaining); |
4851 | 949 do |
950 { | |
7929 | 951 bool descending; |
7433 | 952 octave_idx_type n; |
4851 | 953 |
954 /* Identify next run. */ | |
7929 | 955 n = count_run (lo, hi, descending); |
4851 | 956 if (n < 0) |
957 goto fail; | |
958 if (descending) | |
7234 | 959 reverse_slice (lo, lo + n); |
4851 | 960 /* If short, extend to min(minrun, nremaining). */ |
961 if (n < minrun) | |
962 { | |
7433 | 963 const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; |
7234 | 964 binarysort (lo, lo + force, lo + n); |
4851 | 965 n = force; |
966 } | |
967 /* Push run onto pending-runs stack, and maybe merge. */ | |
7234 | 968 assert (ms.n < MAX_MERGE_PENDING); |
4851 | 969 ms.pending[ms.n].base = lo; |
970 ms.pending[ms.n].len = n; | |
971 ++ms.n; | |
7234 | 972 if (merge_collapse () < 0) |
4851 | 973 goto fail; |
974 /* Advance to find next run. */ | |
975 lo += n; | |
976 nremaining -= n; | |
7234 | 977 } |
978 while (nremaining); | |
4851 | 979 |
7234 | 980 merge_force_collapse (); |
4851 | 981 } |
982 | |
983 fail: | |
984 return; | |
985 } | |
986 | |
987 /* | |
988 ;;; Local Variables: *** | |
989 ;;; mode: C++ *** | |
990 ;;; End: *** | |
991 */ |