Mercurial > octave-nkf
annotate liboctave/oct-sort.cc @ 8710:739141cde75a ss-3-1-52
fix typo in Array-f.cc
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 09 Feb 2009 21:51:31 +0100 |
parents | 314be237cd5b |
children | e9cb742df9eb |
rev | line source |
---|---|
4851 | 1 /* |
7017 | 2 Copyright (C) 2003, 2004, 2005, 2006, 2007 David Bateman |
8700 | 3 Copyright (C) 2008, 2009 Jaroslav Hajek |
4851 | 4 |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
7016 | 9 Free Software Foundation; either version 3 of the License, or (at your |
10 option) any later version. | |
4851 | 11 |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
7016 | 18 along with Octave; see the file COPYING. If not, see |
19 <http://www.gnu.org/licenses/>. | |
4851 | 20 |
21 Code stolen in large part from Python's, listobject.c, which itself had | |
22 no license header. However, thanks to Tim Peters for the parts of the | |
23 code I ripped-off. | |
24 | |
25 As required in the Python license the short description of the changes | |
26 made are | |
27 | |
28 * convert the sorting code in listobject.cc into a generic class, | |
29 replacing PyObject* with the type of the class T. | |
30 | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
31 * replaced usages of malloc, free, memcpy and memmove by standard C++ |
8678
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
32 new [], delete [] and std::copy and std::copy_backward. Note that replacing |
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
33 memmove by std::copy is possible if the destination starts before the source. |
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
34 If not, std::copy_backward needs to be used. |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
35 |
8700 | 36 * templatize comparison operator in most methods, provide possible dispatch |
37 | |
38 * duplicate methods to avoid by-the-way indexed sorting | |
39 | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
40 |
4851 | 41 The Python license is |
42 | |
43 PSF LICENSE AGREEMENT FOR PYTHON 2.3 | |
44 -------------------------------------- | |
45 | |
46 1. This LICENSE AGREEMENT is between the Python Software Foundation | |
47 ("PSF"), and the Individual or Organization ("Licensee") accessing and | |
48 otherwise using Python 2.3 software in source or binary form and its | |
49 associated documentation. | |
50 | |
51 2. Subject to the terms and conditions of this License Agreement, PSF | |
52 hereby grants Licensee a nonexclusive, royalty-free, world-wide | |
53 license to reproduce, analyze, test, perform and/or display publicly, | |
54 prepare derivative works, distribute, and otherwise use Python 2.3 | |
55 alone or in any derivative version, provided, however, that PSF's | |
56 License Agreement and PSF's notice of copyright, i.e., "Copyright (c) | |
57 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are | |
58 retained in Python 2.3 alone or in any derivative version prepared by | |
59 Licensee. | |
60 | |
61 3. In the event Licensee prepares a derivative work that is based on | |
62 or incorporates Python 2.3 or any part thereof, and wants to make | |
63 the derivative work available to others as provided herein, then | |
64 Licensee hereby agrees to include in any such work a brief summary of | |
65 the changes made to Python 2.3. | |
66 | |
67 4. PSF is making Python 2.3 available to Licensee on an "AS IS" | |
68 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR | |
69 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND | |
70 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS | |
71 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT | |
72 INFRINGE ANY THIRD PARTY RIGHTS. | |
73 | |
74 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON | |
75 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS | |
76 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, | |
77 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. | |
78 | |
79 6. This License Agreement will automatically terminate upon a material | |
80 breach of its terms and conditions. | |
81 | |
82 7. Nothing in this License Agreement shall be deemed to create any | |
83 relationship of agency, partnership, or joint venture between PSF and | |
84 Licensee. This License Agreement does not grant permission to use PSF | |
85 trademarks or trade name in a trademark sense to endorse or promote | |
86 products or services of Licensee, or any third party. | |
87 | |
88 8. By copying, installing or otherwise using Python 2.3, Licensee | |
89 agrees to be bound by the terms and conditions of this License | |
90 Agreement. | |
91 */ | |
92 | |
93 #ifdef HAVE_CONFIG_H | |
94 #include <config.h> | |
95 #endif | |
96 | |
5883 | 97 #include <cassert> |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
98 #include <algorithm> |
7480 | 99 #include <cstring> |
5883 | 100 |
4851 | 101 #include "lo-mappers.h" |
102 #include "quit.h" | |
103 #include "oct-sort.h" | |
104 | |
105 template <class T> | |
8700 | 106 octave_sort<T>::octave_sort (void) : compare (ascending_compare) |
4851 | 107 { |
7234 | 108 merge_init (); |
4851 | 109 merge_getmem (1024); |
110 } | |
111 | |
112 template <class T> | |
113 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) | |
114 { | |
4998 | 115 merge_init (); |
4851 | 116 merge_getmem (1024); |
117 } | |
118 | |
119 template <class T> | |
8700 | 120 template <class Comp> |
4851 | 121 void |
8700 | 122 octave_sort<T>::binarysort (T *data, octave_idx_type nel, |
123 octave_idx_type start, Comp comp) | |
4851 | 124 { |
8700 | 125 if (start == 0) |
4851 | 126 ++start; |
127 | |
8700 | 128 for (; start < nel; ++start) |
4851 | 129 { |
130 /* set l to where *start belongs */ | |
8700 | 131 octave_idx_type l = 0, r = start; |
132 T pivot = data[start]; | |
4851 | 133 /* Invariants: |
134 * pivot >= all in [lo, l). | |
135 * pivot < all in [r, start). | |
136 * The second is vacuously true at the start. | |
137 */ | |
138 do | |
139 { | |
8700 | 140 octave_idx_type p = l + ((r - l) >> 1); |
141 if (comp (pivot, data[p])) | |
4851 | 142 r = p; |
143 else | |
144 l = p+1; | |
145 } | |
146 while (l < r); | |
147 /* The invariants still hold, so pivot >= all in [lo, l) and | |
148 pivot < all in [l, start), so pivot belongs at l. Note | |
149 that if there are elements equal to pivot, l points to the | |
150 first slot after them -- that's why this sort is stable. | |
151 Slide over to make room. | |
152 Caution: using memmove is much slower under MSVC 5; | |
153 we're not usually moving many slots. */ | |
8700 | 154 // NOTE: using swap and going upwards appears to be faster. |
155 for (octave_idx_type p = l; p < start; p++) | |
156 std::swap (pivot, data[p]); | |
157 data[start] = pivot; | |
158 } | |
159 | |
160 return; | |
161 } | |
162 | |
163 template <class T> | |
164 template <class Comp> | |
165 void | |
166 octave_sort<T>::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel, | |
167 octave_idx_type start, Comp comp) | |
168 { | |
169 if (start == 0) | |
170 ++start; | |
171 | |
172 for (; start < nel; ++start) | |
173 { | |
174 /* set l to where *start belongs */ | |
175 octave_idx_type l = 0, r = start; | |
176 T pivot = data[start]; | |
177 /* Invariants: | |
178 * pivot >= all in [lo, l). | |
179 * pivot < all in [r, start). | |
180 * The second is vacuously true at the start. | |
181 */ | |
182 do | |
183 { | |
184 octave_idx_type p = l + ((r - l) >> 1); | |
185 if (comp (pivot, data[p])) | |
186 r = p; | |
187 else | |
188 l = p+1; | |
189 } | |
190 while (l < r); | |
191 /* The invariants still hold, so pivot >= all in [lo, l) and | |
192 pivot < all in [l, start), so pivot belongs at l. Note | |
193 that if there are elements equal to pivot, l points to the | |
194 first slot after them -- that's why this sort is stable. | |
195 Slide over to make room. | |
196 Caution: using memmove is much slower under MSVC 5; | |
197 we're not usually moving many slots. */ | |
198 // NOTE: using swap and going upwards appears to be faster. | |
199 for (octave_idx_type p = l; p < start; p++) | |
200 std::swap (pivot, data[p]); | |
201 data[start] = pivot; | |
202 octave_idx_type ipivot = idx[start]; | |
203 for (octave_idx_type p = l; p < start; p++) | |
204 std::swap (ipivot, idx[p]); | |
205 idx[start] = ipivot; | |
4851 | 206 } |
207 | |
208 return; | |
209 } | |
210 | |
211 /* | |
212 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi | |
213 is required on entry. "A run" is the longest ascending sequence, with | |
214 | |
215 lo[0] <= lo[1] <= lo[2] <= ... | |
216 | |
217 or the longest descending sequence, with | |
218 | |
219 lo[0] > lo[1] > lo[2] > ... | |
220 | |
7929 | 221 DESCENDING is set to false in the former case, or to true in the latter. |
4851 | 222 For its intended use in a stable mergesort, the strictness of the defn of |
223 "descending" is needed so that the caller can safely reverse a descending | |
224 sequence without violating stability (strict > ensures there are no equal | |
225 elements to get out of order). | |
226 | |
227 Returns -1 in case of error. | |
228 */ | |
229 template <class T> | |
8700 | 230 template <class Comp> |
7433 | 231 octave_idx_type |
8700 | 232 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending, Comp comp) |
4851 | 233 { |
7433 | 234 octave_idx_type n; |
8700 | 235 T *hi = lo + nel; |
4851 | 236 |
7929 | 237 descending = false; |
4851 | 238 ++lo; |
239 if (lo == hi) | |
240 return 1; | |
241 | |
242 n = 2; | |
243 | |
8700 | 244 if (comp (*lo, *(lo-1))) |
4851 | 245 { |
7929 | 246 descending = true; |
4851 | 247 for (lo = lo+1; lo < hi; ++lo, ++n) |
248 { | |
8700 | 249 if (comp (*lo, *(lo-1))) |
4851 | 250 ; |
251 else | |
252 break; | |
253 } | |
254 } | |
255 else | |
256 { | |
257 for (lo = lo+1; lo < hi; ++lo, ++n) | |
258 { | |
8700 | 259 if (comp (*lo, *(lo-1))) |
4851 | 260 break; |
261 } | |
262 } | |
263 | |
264 return n; | |
265 } | |
266 | |
267 /* | |
268 Locate the proper position of key in a sorted vector; if the vector contains | |
269 an element equal to key, return the position immediately to the left of | |
270 the leftmost equal element. [gallop_right() does the same except returns | |
271 the position to the right of the rightmost equal element (if any).] | |
272 | |
273 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. | |
274 | |
275 "hint" is an index at which to begin the search, 0 <= hint < n. The closer | |
276 hint is to the final result, the faster this runs. | |
277 | |
278 The return value is the int k in 0..n such that | |
279 | |
280 a[k-1] < key <= a[k] | |
281 | |
282 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, | |
283 key belongs at index k; or, IOW, the first k elements of a should precede | |
284 key, and the last n-k should follow key. | |
285 | |
286 Returns -1 on error. See listsort.txt for info on the method. | |
287 */ | |
288 template <class T> | |
8700 | 289 template <class Comp> |
7433 | 290 octave_idx_type |
8700 | 291 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint, |
292 Comp comp) | |
4851 | 293 { |
7433 | 294 octave_idx_type ofs; |
295 octave_idx_type lastofs; | |
296 octave_idx_type k; | |
4851 | 297 |
298 a += hint; | |
299 lastofs = 0; | |
300 ofs = 1; | |
8700 | 301 if (comp (*a, key)) |
4851 | 302 { |
303 /* a[hint] < key -- gallop right, until | |
304 * a[hint + lastofs] < key <= a[hint + ofs] | |
305 */ | |
7433 | 306 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 307 while (ofs < maxofs) |
308 { | |
8700 | 309 if (comp (a[ofs], key)) |
4851 | 310 { |
311 lastofs = ofs; | |
312 ofs = (ofs << 1) + 1; | |
313 if (ofs <= 0) /* int overflow */ | |
314 ofs = maxofs; | |
315 } | |
316 else /* key <= a[hint + ofs] */ | |
317 break; | |
318 } | |
319 if (ofs > maxofs) | |
320 ofs = maxofs; | |
321 /* Translate back to offsets relative to &a[0]. */ | |
322 lastofs += hint; | |
323 ofs += hint; | |
324 } | |
325 else | |
326 { | |
327 /* key <= a[hint] -- gallop left, until | |
328 * a[hint - ofs] < key <= a[hint - lastofs] | |
329 */ | |
7433 | 330 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 331 while (ofs < maxofs) |
332 { | |
8700 | 333 if (comp (*(a-ofs), key)) |
4851 | 334 break; |
335 /* key <= a[hint - ofs] */ | |
336 lastofs = ofs; | |
337 ofs = (ofs << 1) + 1; | |
338 if (ofs <= 0) /* int overflow */ | |
339 ofs = maxofs; | |
340 } | |
341 if (ofs > maxofs) | |
342 ofs = maxofs; | |
343 /* Translate back to positive offsets relative to &a[0]. */ | |
344 k = lastofs; | |
345 lastofs = hint - ofs; | |
346 ofs = hint - k; | |
347 } | |
348 a -= hint; | |
349 | |
350 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the | |
351 * right of lastofs but no farther right than ofs. Do a binary | |
352 * search, with invariant a[lastofs-1] < key <= a[ofs]. | |
353 */ | |
354 ++lastofs; | |
355 while (lastofs < ofs) | |
356 { | |
7433 | 357 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 358 |
8700 | 359 if (comp (a[m], key)) |
4851 | 360 lastofs = m+1; /* a[m] < key */ |
361 else | |
362 ofs = m; /* key <= a[m] */ | |
363 } | |
7234 | 364 |
4851 | 365 return ofs; |
366 } | |
367 | |
368 /* | |
369 Exactly like gallop_left(), except that if key already exists in a[0:n], | |
370 finds the position immediately to the right of the rightmost equal value. | |
371 | |
372 The return value is the int k in 0..n such that | |
373 | |
374 a[k-1] <= key < a[k] | |
375 | |
376 or -1 if error. | |
377 | |
378 The code duplication is massive, but this is enough different given that | |
379 we're sticking to "<" comparisons that it's much harder to follow if | |
380 written as one routine with yet another "left or right?" flag. | |
381 */ | |
382 template <class T> | |
8700 | 383 template <class Comp> |
7433 | 384 octave_idx_type |
8700 | 385 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint, |
386 Comp comp) | |
4851 | 387 { |
7433 | 388 octave_idx_type ofs; |
389 octave_idx_type lastofs; | |
390 octave_idx_type k; | |
4851 | 391 |
392 a += hint; | |
393 lastofs = 0; | |
394 ofs = 1; | |
8700 | 395 if (comp (key, *a)) |
4851 | 396 { |
397 /* key < a[hint] -- gallop left, until | |
398 * a[hint - ofs] <= key < a[hint - lastofs] | |
399 */ | |
7433 | 400 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 401 while (ofs < maxofs) |
402 { | |
8700 | 403 if (comp (key, *(a-ofs))) |
4851 | 404 { |
405 lastofs = ofs; | |
406 ofs = (ofs << 1) + 1; | |
407 if (ofs <= 0) /* int overflow */ | |
408 ofs = maxofs; | |
409 } | |
410 else /* a[hint - ofs] <= key */ | |
411 break; | |
412 } | |
413 if (ofs > maxofs) | |
414 ofs = maxofs; | |
415 /* Translate back to positive offsets relative to &a[0]. */ | |
416 k = lastofs; | |
417 lastofs = hint - ofs; | |
418 ofs = hint - k; | |
419 } | |
420 else | |
421 { | |
422 /* a[hint] <= key -- gallop right, until | |
423 * a[hint + lastofs] <= key < a[hint + ofs] | |
424 */ | |
7433 | 425 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 426 while (ofs < maxofs) |
427 { | |
8700 | 428 if (comp (key, a[ofs])) |
4851 | 429 break; |
430 /* a[hint + ofs] <= key */ | |
431 lastofs = ofs; | |
432 ofs = (ofs << 1) + 1; | |
433 if (ofs <= 0) /* int overflow */ | |
434 ofs = maxofs; | |
435 } | |
436 if (ofs > maxofs) | |
437 ofs = maxofs; | |
438 /* Translate back to offsets relative to &a[0]. */ | |
439 lastofs += hint; | |
440 ofs += hint; | |
441 } | |
442 a -= hint; | |
443 | |
444 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the | |
445 * right of lastofs but no farther right than ofs. Do a binary | |
446 * search, with invariant a[lastofs-1] <= key < a[ofs]. | |
447 */ | |
448 ++lastofs; | |
449 while (lastofs < ofs) | |
450 { | |
7433 | 451 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 452 |
8700 | 453 if (comp (key, a[m])) |
4851 | 454 ofs = m; /* key < a[m] */ |
455 else | |
456 lastofs = m+1; /* a[m] <= key */ | |
457 } | |
7234 | 458 |
4851 | 459 return ofs; |
460 } | |
461 | |
462 /* Conceptually a MergeState's constructor. */ | |
463 template <class T> | |
464 void | |
7234 | 465 octave_sort<T>::merge_init (void) |
4851 | 466 { |
7234 | 467 ms.a = 0; |
8700 | 468 ms.ia = 0; |
4851 | 469 ms.alloced = 0; |
470 ms.n = 0; | |
471 ms.min_gallop = MIN_GALLOP; | |
472 } | |
473 | |
474 /* Free all the temp memory owned by the MergeState. This must be called | |
475 * when you're done with a MergeState, and may be called before then if | |
476 * you want to free the temp memory early. | |
477 */ | |
478 template <class T> | |
479 void | |
7234 | 480 octave_sort<T>::merge_freemem (void) |
4851 | 481 { |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
482 delete [] ms.a; |
8700 | 483 delete [] ms.ia; |
4851 | 484 ms.alloced = 0; |
7234 | 485 ms.a = 0; |
4851 | 486 } |
487 | |
7433 | 488 static inline octave_idx_type |
489 roundupsize (octave_idx_type n) | |
4851 | 490 { |
491 unsigned int nbits = 3; | |
7433 | 492 octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8; |
4851 | 493 |
494 /* Round up: | |
495 * If n < 256, to a multiple of 8. | |
496 * If n < 2048, to a multiple of 64. | |
497 * If n < 16384, to a multiple of 512. | |
498 * If n < 131072, to a multiple of 4096. | |
499 * If n < 1048576, to a multiple of 32768. | |
500 * If n < 8388608, to a multiple of 262144. | |
501 * If n < 67108864, to a multiple of 2097152. | |
502 * If n < 536870912, to a multiple of 16777216. | |
503 * ... | |
504 * If n < 2**(5+3*i), to a multiple of 2**(3*i). | |
505 * | |
506 * This over-allocates proportional to the list size, making room | |
507 * for additional growth. The over-allocation is mild, but is | |
508 * enough to give linear-time amortized behavior over a long | |
509 * sequence of appends() in the presence of a poorly-performing | |
510 * system realloc() (which is a reality, e.g., across all flavors | |
511 * of Windows, with Win9x behavior being particularly bad -- and | |
512 * we've still got address space fragmentation problems on Win9x | |
513 * even with this scheme, although it requires much longer lists to | |
514 * provoke them than it used to). | |
515 */ | |
7234 | 516 while (n2) |
517 { | |
518 n2 >>= 3; | |
519 nbits += 3; | |
520 } | |
521 | |
4851 | 522 return ((n >> nbits) + 1) << nbits; |
523 } | |
524 | |
525 /* Ensure enough temp memory for 'need' array slots is available. | |
526 * Returns 0 on success and -1 if the memory can't be gotten. | |
527 */ | |
528 template <class T> | |
529 int | |
7433 | 530 octave_sort<T>::merge_getmem (octave_idx_type need) |
4851 | 531 { |
532 if (need <= ms.alloced) | |
533 return 0; | |
534 | |
7234 | 535 need = roundupsize (need); |
4851 | 536 /* Don't realloc! That can cost cycles to copy the old data, but |
537 * we don't care what's in the block. | |
538 */ | |
7234 | 539 merge_freemem (); |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
540 ms.a = new T[need]; |
7234 | 541 if (ms.a) |
542 { | |
543 ms.alloced = need; | |
544 return 0; | |
545 } | |
546 merge_freemem (); /* reset to sane state */ | |
547 | |
4851 | 548 return -1; |
549 } | |
550 | |
8700 | 551 template <class T> |
552 int | |
553 octave_sort<T>::merge_getmemi (octave_idx_type need) | |
554 { | |
555 if (need <= ms.alloced && ms.a && ms.ia) | |
556 return 0; | |
557 | |
558 need = roundupsize (need); | |
559 /* Don't realloc! That can cost cycles to copy the old data, but | |
560 * we don't care what's in the block. | |
561 */ | |
562 merge_freemem (); | |
563 ms.a = new T[need]; | |
564 ms.ia = new octave_idx_type[need]; | |
565 if (ms.a && ms.ia) | |
566 { | |
567 ms.alloced = need; | |
568 return 0; | |
569 } | |
570 merge_freemem (); /* reset to sane state */ | |
571 | |
572 return -1; | |
573 } | |
4851 | 574 |
575 /* Merge the na elements starting at pa with the nb elements starting at pb | |
576 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
577 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
578 * merge, and should have na <= nb. See listsort.txt for more info. | |
579 * Return 0 if successful, -1 if error. | |
580 */ | |
581 template <class T> | |
8700 | 582 template <class Comp> |
4851 | 583 int |
8700 | 584 octave_sort<T>::merge_lo (T *pa, octave_idx_type na, |
585 T *pb, octave_idx_type nb, | |
586 Comp comp) | |
4851 | 587 { |
7433 | 588 octave_idx_type k; |
4851 | 589 T *dest; |
590 int result = -1; /* guilty until proved innocent */ | |
7433 | 591 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 592 |
8700 | 593 if (merge_getmem (na) < 0) |
4851 | 594 return -1; |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
595 std::copy (pa, pa + na, ms.a); |
4851 | 596 dest = pa; |
597 pa = ms.a; | |
598 | |
599 *dest++ = *pb++; | |
600 --nb; | |
601 if (nb == 0) | |
602 goto Succeed; | |
603 if (na == 1) | |
604 goto CopyB; | |
605 | |
7234 | 606 for (;;) |
607 { | |
7433 | 608 octave_idx_type acount = 0; /* # of times A won in a row */ |
609 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
7234 | 610 |
611 /* Do the straightforward thing until (if ever) one run | |
612 * appears to win consistently. | |
613 */ | |
614 for (;;) | |
615 { | |
4851 | 616 |
8700 | 617 // FIXME: these loops are candidates for further optimizations. |
618 // Rather than testing everything in each cycle, it may be more | |
619 // efficient to do it in hunks. | |
620 if (comp (*pb, *pa)) | |
7234 | 621 { |
622 *dest++ = *pb++; | |
623 ++bcount; | |
624 acount = 0; | |
625 --nb; | |
626 if (nb == 0) | |
627 goto Succeed; | |
628 if (bcount >= min_gallop) | |
629 break; | |
630 } | |
631 else | |
632 { | |
633 *dest++ = *pa++; | |
634 ++acount; | |
635 bcount = 0; | |
636 --na; | |
637 if (na == 1) | |
638 goto CopyB; | |
639 if (acount >= min_gallop) | |
640 break; | |
641 } | |
642 } | |
4851 | 643 |
7234 | 644 /* One run is winning so consistently that galloping may |
645 * be a huge win. So try that, and continue galloping until | |
646 * (if ever) neither run appears to be winning consistently | |
647 * anymore. | |
648 */ | |
649 ++min_gallop; | |
650 do | |
4851 | 651 { |
7234 | 652 min_gallop -= min_gallop > 1; |
653 ms.min_gallop = min_gallop; | |
8700 | 654 k = gallop_right (*pb, pa, na, 0, comp); |
7234 | 655 acount = k; |
656 if (k) | |
657 { | |
658 if (k < 0) | |
659 goto Fail; | |
8700 | 660 dest = std::copy (pa, pa + k, dest); |
7234 | 661 pa += k; |
662 na -= k; | |
663 if (na == 1) | |
664 goto CopyB; | |
665 /* na==0 is impossible now if the comparison | |
666 * function is consistent, but we can't assume | |
667 * that it is. | |
668 */ | |
669 if (na == 0) | |
670 goto Succeed; | |
671 } | |
4851 | 672 *dest++ = *pb++; |
673 --nb; | |
674 if (nb == 0) | |
675 goto Succeed; | |
7234 | 676 |
8700 | 677 k = gallop_left (*pa, pb, nb, 0, comp); |
7234 | 678 bcount = k; |
679 if (k) | |
680 { | |
681 if (k < 0) | |
682 goto Fail; | |
8700 | 683 dest = std::copy (pb, pb + k, dest); |
7234 | 684 pb += k; |
685 nb -= k; | |
686 if (nb == 0) | |
687 goto Succeed; | |
688 } | |
4851 | 689 *dest++ = *pa++; |
690 --na; | |
691 if (na == 1) | |
692 goto CopyB; | |
693 } | |
7234 | 694 while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
695 | |
696 ++min_gallop; /* penalize it for leaving galloping mode */ | |
697 ms.min_gallop = min_gallop; | |
4851 | 698 } |
699 | |
700 Succeed: | |
701 result = 0; | |
7234 | 702 |
4851 | 703 Fail: |
704 if (na) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
705 std::copy (pa, pa + na, dest); |
4851 | 706 return result; |
7234 | 707 |
4851 | 708 CopyB: |
709 /* The last element of pa belongs at the end of the merge. */ | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
710 std::copy (pb, pb + nb, dest); |
4851 | 711 dest[nb] = *pa; |
7234 | 712 |
4851 | 713 return 0; |
714 } | |
715 | |
8700 | 716 template <class T> |
717 template <class Comp> | |
718 int | |
719 octave_sort<T>::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na, | |
720 T *pb, octave_idx_type *ipb, octave_idx_type nb, | |
721 Comp comp) | |
722 { | |
723 octave_idx_type k; | |
724 T *dest; | |
725 octave_idx_type *idest; | |
726 int result = -1; /* guilty until proved innocent */ | |
727 octave_idx_type min_gallop = ms.min_gallop; | |
728 | |
729 if (merge_getmemi (na) < 0) | |
730 return -1; | |
731 std::copy (pa, pa + na, ms.a); | |
732 std::copy (ipa, ipa + na, ms.ia); | |
733 dest = pa; idest = ipa; | |
734 pa = ms.a; ipa = ms.ia; | |
735 | |
736 *dest++ = *pb++; *idest++ = *ipb++; | |
737 --nb; | |
738 if (nb == 0) | |
739 goto Succeed; | |
740 if (na == 1) | |
741 goto CopyB; | |
742 | |
743 for (;;) | |
744 { | |
745 octave_idx_type acount = 0; /* # of times A won in a row */ | |
746 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
747 | |
748 /* Do the straightforward thing until (if ever) one run | |
749 * appears to win consistently. | |
750 */ | |
751 for (;;) | |
752 { | |
753 | |
754 if (comp (*pb, *pa)) | |
755 { | |
756 *dest++ = *pb++; *idest++ = *ipb++; | |
757 ++bcount; | |
758 acount = 0; | |
759 --nb; | |
760 if (nb == 0) | |
761 goto Succeed; | |
762 if (bcount >= min_gallop) | |
763 break; | |
764 } | |
765 else | |
766 { | |
767 *dest++ = *pa++; *idest++ = *ipa++; | |
768 ++acount; | |
769 bcount = 0; | |
770 --na; | |
771 if (na == 1) | |
772 goto CopyB; | |
773 if (acount >= min_gallop) | |
774 break; | |
775 } | |
776 } | |
777 | |
778 /* One run is winning so consistently that galloping may | |
779 * be a huge win. So try that, and continue galloping until | |
780 * (if ever) neither run appears to be winning consistently | |
781 * anymore. | |
782 */ | |
783 ++min_gallop; | |
784 do | |
785 { | |
786 min_gallop -= min_gallop > 1; | |
787 ms.min_gallop = min_gallop; | |
788 k = gallop_right (*pb, pa, na, 0, comp); | |
789 acount = k; | |
790 if (k) | |
791 { | |
792 if (k < 0) | |
793 goto Fail; | |
794 dest = std::copy (pa, pa + k, dest); | |
795 idest = std::copy (ipa, ipa + k, idest); | |
796 pa += k; ipa += k; | |
797 na -= k; | |
798 if (na == 1) | |
799 goto CopyB; | |
800 /* na==0 is impossible now if the comparison | |
801 * function is consistent, but we can't assume | |
802 * that it is. | |
803 */ | |
804 if (na == 0) | |
805 goto Succeed; | |
806 } | |
807 *dest++ = *pb++; *idest++ = *ipb++; | |
808 --nb; | |
809 if (nb == 0) | |
810 goto Succeed; | |
811 | |
812 k = gallop_left (*pa, pb, nb, 0, comp); | |
813 bcount = k; | |
814 if (k) | |
815 { | |
816 if (k < 0) | |
817 goto Fail; | |
818 dest = std::copy (pb, pb + k, dest); | |
819 idest = std::copy (ipb, ipb + k, idest); | |
820 pb += k; ipb += k; | |
821 nb -= k; | |
822 if (nb == 0) | |
823 goto Succeed; | |
824 } | |
825 *dest++ = *pa++; *idest++ = *ipa++; | |
826 --na; | |
827 if (na == 1) | |
828 goto CopyB; | |
829 } | |
830 while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
831 | |
832 ++min_gallop; /* penalize it for leaving galloping mode */ | |
833 ms.min_gallop = min_gallop; | |
834 } | |
835 | |
836 Succeed: | |
837 result = 0; | |
838 | |
839 Fail: | |
840 if (na) | |
841 { | |
842 std::copy (pa, pa + na, dest); | |
843 std::copy (ipa, ipa + na, idest); | |
844 } | |
845 return result; | |
846 | |
847 CopyB: | |
848 /* The last element of pa belongs at the end of the merge. */ | |
849 std::copy (pb, pb + nb, dest); | |
850 std::copy (ipb, ipb + nb, idest); | |
851 dest[nb] = *pa; | |
852 idest[nb] = *ipa; | |
853 | |
854 return 0; | |
855 } | |
856 | |
4851 | 857 /* Merge the na elements starting at pa with the nb elements starting at pb |
858 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
859 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
860 * merge, and should have na >= nb. See listsort.txt for more info. | |
861 * Return 0 if successful, -1 if error. | |
862 */ | |
863 template <class T> | |
8700 | 864 template <class Comp> |
4851 | 865 int |
8700 | 866 octave_sort<T>::merge_hi (T *pa, octave_idx_type na, |
867 T *pb, octave_idx_type nb, | |
868 Comp comp) | |
4851 | 869 { |
7433 | 870 octave_idx_type k; |
4851 | 871 T *dest; |
872 int result = -1; /* guilty until proved innocent */ | |
8700 | 873 T *basea, *baseb; |
7433 | 874 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 875 |
8700 | 876 if (merge_getmem (nb) < 0) |
4851 | 877 return -1; |
878 dest = pb + nb - 1; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
879 std::copy (pb, pb + nb, ms.a); |
4851 | 880 basea = pa; |
881 baseb = ms.a; | |
882 pb = ms.a + nb - 1; | |
883 pa += na - 1; | |
884 | |
885 *dest-- = *pa--; | |
886 --na; | |
887 if (na == 0) | |
888 goto Succeed; | |
889 if (nb == 1) | |
890 goto CopyA; | |
891 | |
892 for (;;) | |
893 { | |
7433 | 894 octave_idx_type acount = 0; /* # of times A won in a row */ |
895 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
4851 | 896 |
897 /* Do the straightforward thing until (if ever) one run | |
898 * appears to win consistently. | |
899 */ | |
900 for (;;) | |
901 { | |
8700 | 902 if (comp (*pb, *pa)) |
4851 | 903 { |
904 *dest-- = *pa--; | |
905 ++acount; | |
906 bcount = 0; | |
907 --na; | |
908 if (na == 0) | |
909 goto Succeed; | |
910 if (acount >= min_gallop) | |
911 break; | |
912 } | |
913 else | |
914 { | |
915 *dest-- = *pb--; | |
916 ++bcount; | |
917 acount = 0; | |
918 --nb; | |
919 if (nb == 1) | |
920 goto CopyA; | |
921 if (bcount >= min_gallop) | |
922 break; | |
923 } | |
924 } | |
925 | |
926 /* One run is winning so consistently that galloping may | |
927 * be a huge win. So try that, and continue galloping until | |
928 * (if ever) neither run appears to be winning consistently | |
929 * anymore. | |
930 */ | |
931 ++min_gallop; | |
932 do | |
933 { | |
934 min_gallop -= min_gallop > 1; | |
935 ms.min_gallop = min_gallop; | |
8700 | 936 k = gallop_right (*pb, basea, na, na-1, comp); |
4851 | 937 if (k < 0) |
938 goto Fail; | |
939 k = na - k; | |
940 acount = k; | |
941 if (k) | |
942 { | |
8700 | 943 dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; |
4851 | 944 pa -= k; |
945 na -= k; | |
946 if (na == 0) | |
947 goto Succeed; | |
948 } | |
949 *dest-- = *pb--; | |
950 --nb; | |
951 if (nb == 1) | |
952 goto CopyA; | |
953 | |
8700 | 954 k = gallop_left (*pa, baseb, nb, nb-1, comp); |
4851 | 955 if (k < 0) |
956 goto Fail; | |
957 k = nb - k; | |
958 bcount = k; | |
959 if (k) | |
960 { | |
961 dest -= k; | |
962 pb -= k; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
963 std::copy (pb+1, pb+1 + k, dest+1); |
4851 | 964 nb -= k; |
965 if (nb == 1) | |
966 goto CopyA; | |
967 /* nb==0 is impossible now if the comparison | |
968 * function is consistent, but we can't assume | |
969 * that it is. | |
970 */ | |
971 if (nb == 0) | |
972 goto Succeed; | |
973 } | |
974 *dest-- = *pa--; | |
975 --na; | |
976 if (na == 0) | |
977 goto Succeed; | |
978 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
979 ++min_gallop; /* penalize it for leaving galloping mode */ | |
980 ms.min_gallop = min_gallop; | |
981 } | |
7234 | 982 |
4851 | 983 Succeed: |
984 result = 0; | |
7234 | 985 |
4851 | 986 Fail: |
987 if (nb) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
988 std::copy (baseb, baseb + nb, dest-(nb-1)); |
4851 | 989 return result; |
7234 | 990 |
4851 | 991 CopyA: |
992 /* The first element of pb belongs at the front of the merge. */ | |
8700 | 993 dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; |
4851 | 994 pa -= na; |
995 *dest = *pb; | |
7234 | 996 |
4851 | 997 return 0; |
998 } | |
999 | |
8700 | 1000 template <class T> |
1001 template <class Comp> | |
1002 int | |
1003 octave_sort<T>::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na, | |
1004 T *pb, octave_idx_type *ipb, octave_idx_type nb, | |
1005 Comp comp) | |
1006 { | |
1007 octave_idx_type k; | |
1008 T *dest; | |
1009 octave_idx_type *idest; | |
1010 int result = -1; /* guilty until proved innocent */ | |
1011 T *basea, *baseb; | |
1012 octave_idx_type *ibasea, *ibaseb; | |
1013 octave_idx_type min_gallop = ms.min_gallop; | |
1014 | |
1015 if (merge_getmemi (nb) < 0) | |
1016 return -1; | |
1017 dest = pb + nb - 1; | |
1018 idest = ipb + nb - 1; | |
1019 std::copy (pb, pb + nb, ms.a); | |
1020 std::copy (ipb, ipb + nb, ms.ia); | |
1021 basea = pa; ibasea = ipa; | |
1022 baseb = ms.a; ibaseb = ms.ia; | |
1023 pb = ms.a + nb - 1; ipb = ms.ia + nb - 1; | |
1024 pa += na - 1; ipa += na - 1; | |
1025 | |
1026 *dest-- = *pa--; *idest-- = *ipa--; | |
1027 --na; | |
1028 if (na == 0) | |
1029 goto Succeed; | |
1030 if (nb == 1) | |
1031 goto CopyA; | |
1032 | |
1033 for (;;) | |
1034 { | |
1035 octave_idx_type acount = 0; /* # of times A won in a row */ | |
1036 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
1037 | |
1038 /* Do the straightforward thing until (if ever) one run | |
1039 * appears to win consistently. | |
1040 */ | |
1041 for (;;) | |
1042 { | |
1043 if (comp (*pb, *pa)) | |
1044 { | |
1045 *dest-- = *pa--; *idest-- = *ipa--; | |
1046 ++acount; | |
1047 bcount = 0; | |
1048 --na; | |
1049 if (na == 0) | |
1050 goto Succeed; | |
1051 if (acount >= min_gallop) | |
1052 break; | |
1053 } | |
1054 else | |
1055 { | |
1056 *dest-- = *pb--; *idest-- = *ipb--; | |
1057 ++bcount; | |
1058 acount = 0; | |
1059 --nb; | |
1060 if (nb == 1) | |
1061 goto CopyA; | |
1062 if (bcount >= min_gallop) | |
1063 break; | |
1064 } | |
1065 } | |
1066 | |
1067 /* One run is winning so consistently that galloping may | |
1068 * be a huge win. So try that, and continue galloping until | |
1069 * (if ever) neither run appears to be winning consistently | |
1070 * anymore. | |
1071 */ | |
1072 ++min_gallop; | |
1073 do | |
1074 { | |
1075 min_gallop -= min_gallop > 1; | |
1076 ms.min_gallop = min_gallop; | |
1077 k = gallop_right (*pb, basea, na, na-1, comp); | |
1078 if (k < 0) | |
1079 goto Fail; | |
1080 k = na - k; | |
1081 acount = k; | |
1082 if (k) | |
1083 { | |
1084 dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; | |
1085 idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1; | |
1086 pa -= k; ipa -= k; | |
1087 na -= k; | |
1088 if (na == 0) | |
1089 goto Succeed; | |
1090 } | |
1091 *dest-- = *pb--; *idest-- = *ipb--; | |
1092 --nb; | |
1093 if (nb == 1) | |
1094 goto CopyA; | |
1095 | |
1096 k = gallop_left (*pa, baseb, nb, nb-1, comp); | |
1097 if (k < 0) | |
1098 goto Fail; | |
1099 k = nb - k; | |
1100 bcount = k; | |
1101 if (k) | |
1102 { | |
1103 dest -= k; idest -= k; | |
1104 pb -= k; ipb -= k; | |
1105 std::copy (pb+1, pb+1 + k, dest+1); | |
1106 std::copy (ipb+1, ipb+1 + k, idest+1); | |
1107 nb -= k; | |
1108 if (nb == 1) | |
1109 goto CopyA; | |
1110 /* nb==0 is impossible now if the comparison | |
1111 * function is consistent, but we can't assume | |
1112 * that it is. | |
1113 */ | |
1114 if (nb == 0) | |
1115 goto Succeed; | |
1116 } | |
1117 *dest-- = *pa--; *idest-- = *ipa--; | |
1118 --na; | |
1119 if (na == 0) | |
1120 goto Succeed; | |
1121 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
1122 ++min_gallop; /* penalize it for leaving galloping mode */ | |
1123 ms.min_gallop = min_gallop; | |
1124 } | |
1125 | |
1126 Succeed: | |
1127 result = 0; | |
1128 | |
1129 Fail: | |
1130 if (nb) | |
1131 { | |
1132 std::copy (baseb, baseb + nb, dest-(nb-1)); | |
1133 std::copy (ibaseb, ibaseb + nb, idest-(nb-1)); | |
1134 } | |
1135 return result; | |
1136 | |
1137 CopyA: | |
1138 /* The first element of pb belongs at the front of the merge. */ | |
1139 dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; | |
1140 idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1; | |
1141 pa -= na; ipa -= na; | |
1142 *dest = *pb; *idest = *ipb; | |
1143 | |
1144 return 0; | |
1145 } | |
1146 | |
4851 | 1147 /* Merge the two runs at stack indices i and i+1. |
1148 * Returns 0 on success, -1 on error. | |
1149 */ | |
1150 template <class T> | |
8700 | 1151 template <class Comp> |
4851 | 1152 int |
8700 | 1153 octave_sort<T>::merge_at (octave_idx_type i, T *data, |
1154 Comp comp) | |
4851 | 1155 { |
1156 T *pa, *pb; | |
7433 | 1157 octave_idx_type na, nb; |
1158 octave_idx_type k; | |
4851 | 1159 |
8700 | 1160 pa = data + ms.pending[i].base; |
4851 | 1161 na = ms.pending[i].len; |
8700 | 1162 pb = data + ms.pending[i+1].base; |
4851 | 1163 nb = ms.pending[i+1].len; |
1164 | |
1165 /* Record the length of the combined runs; if i is the 3rd-last | |
1166 * run now, also slide over the last run (which isn't involved | |
1167 * in this merge). The current run i+1 goes away in any case. | |
1168 */ | |
1169 ms.pending[i].len = na + nb; | |
1170 if (i == ms.n - 3) | |
1171 ms.pending[i+1] = ms.pending[i+2]; | |
1172 --ms.n; | |
1173 | |
1174 /* Where does b start in a? Elements in a before that can be | |
1175 * ignored (already in place). | |
1176 */ | |
8700 | 1177 k = gallop_right (*pb, pa, na, 0, comp); |
4851 | 1178 if (k < 0) |
1179 return -1; | |
1180 pa += k; | |
1181 na -= k; | |
1182 if (na == 0) | |
1183 return 0; | |
1184 | |
1185 /* Where does a end in b? Elements in b after that can be | |
1186 * ignored (already in place). | |
1187 */ | |
8700 | 1188 nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); |
4851 | 1189 if (nb <= 0) |
1190 return nb; | |
1191 | |
1192 /* Merge what remains of the runs, using a temp array with | |
1193 * min(na, nb) elements. | |
1194 */ | |
1195 if (na <= nb) | |
8700 | 1196 return merge_lo (pa, na, pb, nb, comp); |
4851 | 1197 else |
8700 | 1198 return merge_hi (pa, na, pb, nb, comp); |
1199 } | |
1200 | |
1201 template <class T> | |
1202 template <class Comp> | |
1203 int | |
1204 octave_sort<T>::merge_at (octave_idx_type i, T *data, octave_idx_type *idx, | |
1205 Comp comp) | |
1206 { | |
1207 T *pa, *pb; | |
1208 octave_idx_type *ipa, *ipb; | |
1209 octave_idx_type na, nb; | |
1210 octave_idx_type k; | |
1211 | |
1212 pa = data + ms.pending[i].base; | |
1213 ipa = idx + ms.pending[i].base; | |
1214 na = ms.pending[i].len; | |
1215 pb = data + ms.pending[i+1].base; | |
1216 ipb = idx + ms.pending[i+1].base; | |
1217 nb = ms.pending[i+1].len; | |
1218 | |
1219 /* Record the length of the combined runs; if i is the 3rd-last | |
1220 * run now, also slide over the last run (which isn't involved | |
1221 * in this merge). The current run i+1 goes away in any case. | |
1222 */ | |
1223 ms.pending[i].len = na + nb; | |
1224 if (i == ms.n - 3) | |
1225 ms.pending[i+1] = ms.pending[i+2]; | |
1226 --ms.n; | |
1227 | |
1228 /* Where does b start in a? Elements in a before that can be | |
1229 * ignored (already in place). | |
1230 */ | |
1231 k = gallop_right (*pb, pa, na, 0, comp); | |
1232 if (k < 0) | |
1233 return -1; | |
1234 pa += k; ipa += k; | |
1235 na -= k; | |
1236 if (na == 0) | |
1237 return 0; | |
1238 | |
1239 /* Where does a end in b? Elements in b after that can be | |
1240 * ignored (already in place). | |
1241 */ | |
1242 nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); | |
1243 if (nb <= 0) | |
1244 return nb; | |
1245 | |
1246 /* Merge what remains of the runs, using a temp array with | |
1247 * min(na, nb) elements. | |
1248 */ | |
1249 if (na <= nb) | |
1250 return merge_lo (pa, ipa, na, pb, ipb, nb, comp); | |
1251 else | |
1252 return merge_hi (pa, ipa, na, pb, ipb, nb, comp); | |
4851 | 1253 } |
1254 | |
1255 /* Examine the stack of runs waiting to be merged, merging adjacent runs | |
1256 * until the stack invariants are re-established: | |
1257 * | |
1258 * 1. len[-3] > len[-2] + len[-1] | |
1259 * 2. len[-2] > len[-1] | |
1260 * | |
1261 * See listsort.txt for more info. | |
1262 * | |
1263 * Returns 0 on success, -1 on error. | |
1264 */ | |
1265 template <class T> | |
8700 | 1266 template <class Comp> |
4851 | 1267 int |
8700 | 1268 octave_sort<T>::merge_collapse (T *data, Comp comp) |
4851 | 1269 { |
1270 struct s_slice *p = ms.pending; | |
1271 | |
1272 while (ms.n > 1) | |
1273 { | |
7433 | 1274 octave_idx_type n = ms.n - 2; |
4851 | 1275 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) |
1276 { | |
1277 if (p[n-1].len < p[n+1].len) | |
1278 --n; | |
8700 | 1279 if (merge_at (n, data, comp) < 0) |
4851 | 1280 return -1; |
1281 } | |
1282 else if (p[n].len <= p[n+1].len) | |
1283 { | |
8700 | 1284 if (merge_at (n, data, comp) < 0) |
1285 return -1; | |
1286 } | |
1287 else | |
1288 break; | |
1289 } | |
1290 | |
1291 return 0; | |
1292 } | |
1293 | |
1294 template <class T> | |
1295 template <class Comp> | |
1296 int | |
1297 octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp) | |
1298 { | |
1299 struct s_slice *p = ms.pending; | |
1300 | |
1301 while (ms.n > 1) | |
1302 { | |
1303 octave_idx_type n = ms.n - 2; | |
1304 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) | |
1305 { | |
1306 if (p[n-1].len < p[n+1].len) | |
1307 --n; | |
1308 if (merge_at (n, data, idx, comp) < 0) | |
1309 return -1; | |
1310 } | |
1311 else if (p[n].len <= p[n+1].len) | |
1312 { | |
1313 if (merge_at (n, data, idx, comp) < 0) | |
4851 | 1314 return -1; |
1315 } | |
1316 else | |
1317 break; | |
1318 } | |
7234 | 1319 |
4851 | 1320 return 0; |
1321 } | |
1322 | |
1323 /* Regardless of invariants, merge all runs on the stack until only one | |
1324 * remains. This is used at the end of the mergesort. | |
1325 * | |
1326 * Returns 0 on success, -1 on error. | |
1327 */ | |
1328 template <class T> | |
8700 | 1329 template <class Comp> |
4851 | 1330 int |
8700 | 1331 octave_sort<T>::merge_force_collapse (T *data, Comp comp) |
4851 | 1332 { |
1333 struct s_slice *p = ms.pending; | |
1334 | |
1335 while (ms.n > 1) | |
1336 { | |
7433 | 1337 octave_idx_type n = ms.n - 2; |
4851 | 1338 if (n > 0 && p[n-1].len < p[n+1].len) |
1339 --n; | |
8700 | 1340 if (merge_at (n, data, comp) < 0) |
1341 return -1; | |
1342 } | |
1343 | |
1344 return 0; | |
1345 } | |
1346 | |
1347 template <class T> | |
1348 template <class Comp> | |
1349 int | |
1350 octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp) | |
1351 { | |
1352 struct s_slice *p = ms.pending; | |
1353 | |
1354 while (ms.n > 1) | |
1355 { | |
1356 octave_idx_type n = ms.n - 2; | |
1357 if (n > 0 && p[n-1].len < p[n+1].len) | |
1358 --n; | |
1359 if (merge_at (n, data, idx, comp) < 0) | |
4851 | 1360 return -1; |
1361 } | |
7234 | 1362 |
4851 | 1363 return 0; |
1364 } | |
1365 | |
1366 /* Compute a good value for the minimum run length; natural runs shorter | |
1367 * than this are boosted artificially via binary insertion. | |
1368 * | |
1369 * If n < 64, return n (it's too small to bother with fancy stuff). | |
1370 * Else if n is an exact power of 2, return 32. | |
1371 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but | |
1372 * strictly less than, an exact power of 2. | |
1373 * | |
1374 * See listsort.txt for more info. | |
1375 */ | |
1376 template <class T> | |
7433 | 1377 octave_idx_type |
1378 octave_sort<T>::merge_compute_minrun (octave_idx_type n) | |
4851 | 1379 { |
7433 | 1380 octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */ |
4851 | 1381 |
7234 | 1382 while (n >= 64) |
1383 { | |
1384 r |= n & 1; | |
1385 n >>= 1; | |
1386 } | |
1387 | |
4851 | 1388 return n + r; |
1389 } | |
1390 | |
1391 template <class T> | |
8700 | 1392 template <class Comp> |
4851 | 1393 void |
8700 | 1394 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp) |
4851 | 1395 { |
1396 /* Re-initialize the Mergestate as this might be the second time called */ | |
1397 ms.n = 0; | |
1398 ms.min_gallop = MIN_GALLOP; | |
1399 | |
8700 | 1400 if (nel > 1) |
4851 | 1401 { |
8700 | 1402 octave_idx_type nremaining = nel; |
1403 octave_idx_type lo = 0; | |
1404 | |
1405 /* March over the array once, left to right, finding natural runs, | |
1406 * and extending short natural runs to minrun elements. | |
1407 */ | |
1408 octave_idx_type minrun = merge_compute_minrun (nremaining); | |
1409 do | |
1410 { | |
1411 bool descending; | |
1412 octave_idx_type n; | |
1413 | |
1414 /* Identify next run. */ | |
1415 n = count_run (data + lo, nremaining, descending, comp); | |
1416 if (n < 0) | |
1417 goto fail; | |
1418 if (descending) | |
1419 std::reverse (data + lo, data + lo + n); | |
1420 /* If short, extend to min(minrun, nremaining). */ | |
1421 if (n < minrun) | |
1422 { | |
1423 const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; | |
1424 binarysort (data + lo, force, n, comp); | |
1425 n = force; | |
1426 } | |
1427 /* Push run onto pending-runs stack, and maybe merge. */ | |
1428 assert (ms.n < MAX_MERGE_PENDING); | |
1429 ms.pending[ms.n].base = lo; | |
1430 ms.pending[ms.n].len = n; | |
1431 ++ms.n; | |
1432 if (merge_collapse (data, comp) < 0) | |
1433 goto fail; | |
1434 /* Advance to find next run. */ | |
1435 lo += n; | |
1436 nremaining -= n; | |
1437 } | |
1438 while (nremaining); | |
1439 | |
1440 merge_force_collapse (data, comp); | |
1441 } | |
1442 | |
1443 fail: | |
1444 return; | |
1445 } | |
1446 | |
1447 template <class T> | |
1448 template <class Comp> | |
1449 void | |
1450 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel, | |
1451 Comp comp) | |
1452 { | |
1453 /* Re-initialize the Mergestate as this might be the second time called */ | |
1454 ms.n = 0; | |
1455 ms.min_gallop = MIN_GALLOP; | |
1456 | |
1457 if (nel > 1) | |
1458 { | |
1459 octave_idx_type nremaining = nel; | |
1460 octave_idx_type lo = 0; | |
4851 | 1461 |
1462 /* March over the array once, left to right, finding natural runs, | |
1463 * and extending short natural runs to minrun elements. | |
1464 */ | |
7433 | 1465 octave_idx_type minrun = merge_compute_minrun (nremaining); |
4851 | 1466 do |
1467 { | |
7929 | 1468 bool descending; |
7433 | 1469 octave_idx_type n; |
4851 | 1470 |
1471 /* Identify next run. */ | |
8700 | 1472 n = count_run (data + lo, nremaining, descending, comp); |
4851 | 1473 if (n < 0) |
1474 goto fail; | |
1475 if (descending) | |
8700 | 1476 { |
1477 std::reverse (data + lo, data + lo + n); | |
1478 std::reverse (idx + lo, idx + lo + n); | |
1479 } | |
4851 | 1480 /* If short, extend to min(minrun, nremaining). */ |
1481 if (n < minrun) | |
1482 { | |
7433 | 1483 const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; |
8700 | 1484 binarysort (data + lo, idx + lo, force, n, comp); |
4851 | 1485 n = force; |
1486 } | |
1487 /* Push run onto pending-runs stack, and maybe merge. */ | |
7234 | 1488 assert (ms.n < MAX_MERGE_PENDING); |
4851 | 1489 ms.pending[ms.n].base = lo; |
1490 ms.pending[ms.n].len = n; | |
1491 ++ms.n; | |
8700 | 1492 if (merge_collapse (data, idx, comp) < 0) |
4851 | 1493 goto fail; |
1494 /* Advance to find next run. */ | |
1495 lo += n; | |
1496 nremaining -= n; | |
7234 | 1497 } |
1498 while (nremaining); | |
4851 | 1499 |
8700 | 1500 merge_force_collapse (data, idx, comp); |
4851 | 1501 } |
1502 | |
1503 fail: | |
1504 return; | |
1505 } | |
1506 | |
8700 | 1507 template <class T> |
1508 void | |
1509 octave_sort<T>::sort (T *data, octave_idx_type nel) | |
1510 { | |
1511 #ifdef INLINE_ASCENDING_SORT | |
1512 if (compare == ascending_compare) | |
1513 sort (data, nel, std::less<T> ()); | |
1514 else | |
1515 #endif | |
1516 #ifdef INLINE_DESCENDING_SORT | |
1517 if (compare == descending_compare) | |
1518 sort (data, nel, std::greater<T> ()); | |
1519 else | |
1520 #endif | |
1521 if (compare) | |
1522 sort (data, nel, compare); | |
1523 } | |
1524 | |
1525 template <class T> | |
1526 void | |
1527 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel) | |
1528 { | |
1529 #ifdef INLINE_ASCENDING_SORT | |
1530 if (compare == ascending_compare) | |
1531 sort (data, idx, nel, std::less<T> ()); | |
1532 else | |
1533 #endif | |
1534 #ifdef INLINE_DESCENDING_SORT | |
1535 if (compare == descending_compare) | |
1536 sort (data, idx, nel, std::greater<T> ()); | |
1537 else | |
1538 #endif | |
1539 if (compare) | |
1540 sort (data, idx, nel, compare); | |
1541 } | |
1542 | |
1543 template <class T> | |
1544 bool | |
1545 octave_sort<T>::ascending_compare (T x, T y) | |
1546 { | |
1547 return x < y; | |
1548 } | |
1549 | |
1550 template <class T> | |
1551 bool | |
1552 octave_sort<T>::descending_compare (T x, T y) | |
1553 { | |
1554 return x > y; | |
1555 } | |
1556 | |
4851 | 1557 /* |
1558 ;;; Local Variables: *** | |
1559 ;;; mode: C++ *** | |
1560 ;;; End: *** | |
1561 */ |