Mercurial > octave
view liboctave/util/oct-sort.cc @ 33248:7f73e4805a1f
replace calls to assert with liboctave_panic functions liboctave
* lo-error.h (liboctave_panic_impossible, liboctave_panic_if,
liboctave_panic_unless): New macros.
Replace all calls to assert with panic_if, panic_impossible, or
panic_unless as appropriate. Include "lo-error.h" as needed. Don't
include <cassert>. Affected files: Array-base.cc, Array-util.cc,
Array.h, CSparse.cc, DiagArray2.cc, 1 DiagArray2.h, Sparse.cc,
Sparse.h, dim-vector.h, idx-vector.cc, idx-vector.h, CollocWt.cc,
Quad.cc, oct-rand.cc, qrp.cc, svd.cc, Sparse-perm-op-defs.h, and
oct-sort.cc.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sun, 24 Mar 2024 18:12:34 -0400 |
parents | 2e484f9f1f18 |
children |
line wrap: on
line source
//////////////////////////////////////////////////////////////////////// // // Copyright (C) 2003-2024 The Octave Project Developers // // See the file COPYRIGHT.md in the top-level directory of this // distribution or <https://octave.org/copyright/>. // // This file is part of Octave. // // Octave is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Octave is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Octave; see the file COPYING. If not, see // <https://www.gnu.org/licenses/>. // // Code stolen in large part from Python's, listobject.c, which itself had // no license header. However, thanks to Tim Peters for the parts of the // code I ripped-off. // // As required in the Python license the short description of the changes // made are // // * convert the sorting code in listobject.cc into a generic class, // replacing PyObject* with the type of the class T. // // * replaced usages of malloc, free, memcpy and memmove by standard C++ // new [], delete [] and std::copy and std::copy_backward. Note that replacing // memmove by std::copy is possible if the destination starts before the source. // If not, std::copy_backward needs to be used. // // * templatize comparison operator in most methods, provide possible dispatch // // * duplicate methods to avoid by-the-way indexed sorting // // * add methods for verifying sortedness of array // // * row sorting via breadth-first tree subsorting // // * binary lookup and sequential binary lookup optimized for dense downsampling. // // * NOTE: the memory management routines rely on the fact that delete [] silently // ignores null pointers. Don't gripe about the missing checks - they're there. // // // The Python license is // // PSF LICENSE AGREEMENT FOR PYTHON 2.3 // -------------------------------------- // // 1. This LICENSE AGREEMENT is between the Python Software Foundation // ("PSF"), and the Individual or Organization ("Licensee") accessing and // otherwise using Python 2.3 software in source or binary form and its // associated documentation. // // 2. Subject to the terms and conditions of this License Agreement, PSF // hereby grants Licensee a nonexclusive, royalty-free, world-wide // license to reproduce, analyze, test, perform and/or display publicly, // prepare derivative works, distribute, and otherwise use Python 2.3 // alone or in any derivative version, provided, however, that PSF's // License Agreement and PSF's notice of copyright, i.e., "Copyright (c) // 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are // retained in Python 2.3 alone or in any derivative version prepared by // Licensee. // // 3. In the event Licensee prepares a derivative work that is based on // or incorporates Python 2.3 or any part thereof, and wants to make // the derivative work available to others as provided herein, then // Licensee hereby agrees to include in any such work a brief summary of // the changes made to Python 2.3. // // 4. PSF is making Python 2.3 available to Licensee on an "AS IS" // basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR // IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND // DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS // FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT // INFRINGE ANY THIRD PARTY RIGHTS. // // 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON // 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS // A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, // OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. // // 6. This License Agreement will automatically terminate upon a material // breach of its terms and conditions. // // 7. Nothing in this License Agreement shall be deemed to create any // relationship of agency, partnership, or joint venture between PSF and // Licensee. This License Agreement does not grant permission to use PSF // trademarks or trade name in a trademark sense to endorse or promote // products or services of Licensee, or any third party. // // 8. By copying, installing or otherwise using Python 2.3, Licensee // agrees to be bound by the terms and conditions of this License // Agreement. // //////////////////////////////////////////////////////////////////////// // This file should not include config.h. It is only included in other // C++ source files that should have included config.h before including // this file. #include <algorithm> #include <cstring> #include <stack> #include "lo-error.h" #include "lo-mappers.h" #include "quit.h" #include "oct-sort.h" #include "oct-locbuf.h" template <typename T> octave_sort<T>::octave_sort () : m_compare (ascending_compare), m_ms (nullptr) { } template <typename T> octave_sort<T>::octave_sort (const compare_fcn_type& comp) : m_compare (comp), m_ms (nullptr) { } template <typename T> octave_sort<T>::~octave_sort () { delete m_ms; } template <typename T> void octave_sort<T>::set_compare (sortmode mode) { if (mode == ASCENDING) m_compare = ascending_compare; else if (mode == DESCENDING) m_compare = descending_compare; else m_compare = compare_fcn_type (); } template <typename T> template <typename Comp> void octave_sort<T>::binarysort (T *data, octave_idx_type nel, octave_idx_type start, Comp comp) { if (start == 0) ++start; for (; start < nel; ++start) { /* set l to where *start belongs */ octave_idx_type l = 0; octave_idx_type r = start; T pivot = data[start]; /* Invariants: * pivot >= all in [lo, l). * pivot < all in [r, start). * The second is vacuously true at the start. */ do { octave_idx_type p = l + ((r - l) >> 1); if (comp (pivot, data[p])) r = p; else l = p+1; } while (l < r); /* The invariants still hold, so pivot >= all in [lo, l) and pivot < all in [l, start), so pivot belongs at l. Note that if there are elements equal to pivot, l points to the first slot after them -- that's why this sort is stable. Slide over to make room. Caution: using memmove is much slower under MSVC 5; we're not usually moving many slots. */ // NOTE: using swap and going upwards appears to be faster. for (octave_idx_type p = l; p < start; p++) std::swap (pivot, data[p]); data[start] = pivot; } return; } template <typename T> template <typename Comp> void octave_sort<T>::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel, octave_idx_type start, Comp comp) { if (start == 0) ++start; for (; start < nel; ++start) { /* set l to where *start belongs */ octave_idx_type l = 0; octave_idx_type r = start; T pivot = data[start]; /* Invariants: * pivot >= all in [lo, l). * pivot < all in [r, start). * The second is vacuously true at the start. */ do { octave_idx_type p = l + ((r - l) >> 1); if (comp (pivot, data[p])) r = p; else l = p+1; } while (l < r); /* The invariants still hold, so pivot >= all in [lo, l) and pivot < all in [l, start), so pivot belongs at l. Note that if there are elements equal to pivot, l points to the first slot after them -- that's why this sort is stable. Slide over to make room. Caution: using memmove is much slower under MSVC 5; we're not usually moving many slots. */ // NOTE: using swap and going upwards appears to be faster. for (octave_idx_type p = l; p < start; p++) std::swap (pivot, data[p]); data[start] = pivot; octave_idx_type ipivot = idx[start]; for (octave_idx_type p = l; p < start; p++) std::swap (ipivot, idx[p]); idx[start] = ipivot; } return; } /* Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi is required on entry. "A run" is the longest ascending sequence, with lo[0] <= lo[1] <= lo[2] <= ... or the longest descending sequence, with lo[0] > lo[1] > lo[2] > ... DESCENDING is set to false in the former case, or to true in the latter. For its intended use in a stable mergesort, the strictness of the defn of "descending" is needed so that the caller can safely reverse a descending sequence without violating stability (strict > ensures there are no equal elements to get out of order). Returns -1 in case of error. */ template <typename T> template <typename Comp> octave_idx_type octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending, Comp comp) { octave_idx_type n; T *hi = lo + nel; descending = false; ++lo; if (lo == hi) return 1; n = 2; if (comp (*lo, *(lo-1))) { descending = true; for (lo = lo+1; lo < hi; ++lo, ++n) { if (comp (*lo, *(lo-1))) ; else break; } } else { for (lo = lo+1; lo < hi; ++lo, ++n) { if (comp (*lo, *(lo-1))) break; } } return n; } /* Locate the proper position of key in a sorted vector; if the vector contains an element equal to key, return the position immediately to the left of the leftmost equal element. [gallop_right() does the same except returns the position to the right of the rightmost equal element (if any).] "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. "hint" is an index at which to begin the search, 0 <= hint < n. The closer hint is to the final result, the faster this runs. The return value is the int k in 0..n such that a[k-1] < key <= a[k] pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, key belongs at index k; or, IOW, the first k elements of a should precede key, and the last n-k should follow key. Returns -1 on error. See listsort.txt for info on the method. */ template <typename T> template <typename Comp> octave_idx_type octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp) { octave_idx_type ofs; octave_idx_type lastofs; octave_idx_type k; a += hint; lastofs = 0; ofs = 1; if (comp (*a, key)) { /* a[hint] < key -- gallop right, until * a[hint + lastofs] < key <= a[hint + ofs] */ const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ while (ofs < maxofs) { if (comp (a[ofs], key)) { lastofs = ofs; ofs = (ofs << 1) + 1; if (ofs <= 0) /* int overflow */ ofs = maxofs; } else /* key <= a[hint + ofs] */ break; } if (ofs > maxofs) ofs = maxofs; /* Translate back to offsets relative to &a[0]. */ lastofs += hint; ofs += hint; } else { /* key <= a[hint] -- gallop left, until * a[hint - ofs] < key <= a[hint - lastofs] */ const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ while (ofs < maxofs) { if (comp (*(a-ofs), key)) break; /* key <= a[hint - ofs] */ lastofs = ofs; ofs = (ofs << 1) + 1; if (ofs <= 0) /* int overflow */ ofs = maxofs; } if (ofs > maxofs) ofs = maxofs; /* Translate back to positive offsets relative to &a[0]. */ k = lastofs; lastofs = hint - ofs; ofs = hint - k; } a -= hint; /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the * right of lastofs but no farther right than ofs. Do a binary * search, with invariant a[lastofs-1] < key <= a[ofs]. */ ++lastofs; while (lastofs < ofs) { octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); if (comp (a[m], key)) lastofs = m+1; /* a[m] < key */ else ofs = m; /* key <= a[m] */ } return ofs; } /* Exactly like gallop_left(), except that if key already exists in a[0:n], finds the position immediately to the right of the rightmost equal value. The return value is the int k in 0..n such that a[k-1] <= key < a[k] or -1 if error. The code duplication is massive, but this is enough different given that we're sticking to "<" comparisons that it's much harder to follow if written as one routine with yet another "left or right?" flag. */ template <typename T> template <typename Comp> octave_idx_type octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp) { octave_idx_type ofs; octave_idx_type lastofs; octave_idx_type k; a += hint; lastofs = 0; ofs = 1; if (comp (key, *a)) { /* key < a[hint] -- gallop left, until * a[hint - ofs] <= key < a[hint - lastofs] */ const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ while (ofs < maxofs) { if (comp (key, *(a-ofs))) { lastofs = ofs; ofs = (ofs << 1) + 1; if (ofs <= 0) /* int overflow */ ofs = maxofs; } else /* a[hint - ofs] <= key */ break; } if (ofs > maxofs) ofs = maxofs; /* Translate back to positive offsets relative to &a[0]. */ k = lastofs; lastofs = hint - ofs; ofs = hint - k; } else { /* a[hint] <= key -- gallop right, until * a[hint + lastofs] <= key < a[hint + ofs] */ const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ while (ofs < maxofs) { if (comp (key, a[ofs])) break; /* a[hint + ofs] <= key */ lastofs = ofs; ofs = (ofs << 1) + 1; if (ofs <= 0) /* int overflow */ ofs = maxofs; } if (ofs > maxofs) ofs = maxofs; /* Translate back to offsets relative to &a[0]. */ lastofs += hint; ofs += hint; } a -= hint; /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the * right of lastofs but no farther right than ofs. Do a binary * search, with invariant a[lastofs-1] <= key < a[ofs]. */ ++lastofs; while (lastofs < ofs) { octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); if (comp (key, a[m])) ofs = m; /* key < a[m] */ else lastofs = m+1; /* a[m] <= key */ } return ofs; } static inline octave_idx_type roundupsize (std::size_t n) { unsigned int nbits = 3; std::size_t n2 = n >> 8; /* Round up: * If n < 256, to a multiple of 8. * If n < 2048, to a multiple of 64. * If n < 16384, to a multiple of 512. * If n < 131072, to a multiple of 4096. * If n < 1048576, to a multiple of 32768. * If n < 8388608, to a multiple of 262144. * If n < 67108864, to a multiple of 2097152. * If n < 536870912, to a multiple of 16777216. * ... * If n < 2**(5+3*i), to a multiple of 2**(3*i). * * This over-allocates proportional to the list size, making room * for additional growth. The over-allocation is mild, but is * enough to give linear-time amortized behavior over a long * sequence of appends() in the presence of a poorly-performing * system realloc() (which is a reality, e.g., across all flavors * of Windows, with Win9x behavior being particularly bad -- and * we've still got address space fragmentation problems on Win9x * even with this scheme, although it requires much longer lists to * provoke them than it used to). */ while (n2) { n2 >>= 3; nbits += 3; } std::size_t new_size = ((n >> nbits) + 1) << nbits; if (new_size == 0 || new_size > static_cast<std::size_t> (std::numeric_limits<octave_idx_type>::max ())) (*current_liboctave_error_handler) ("unable to allocate sufficient memory for sort"); return static_cast<octave_idx_type> (new_size); } /* Ensure enough temp memory for 'need' array slots is available. * Returns 0 on success and -1 if the memory can't be gotten. */ template <typename T> void octave_sort<T>::MergeState::getmem (octave_idx_type need) { if (need <= m_alloced) return; need = roundupsize (need); /* Don't realloc! That can cost cycles to copy the old data, but * we don't care what's in the block. */ delete [] m_a; delete [] m_ia; // Must do this or fool possible next getmemi. m_a = new T [need]; m_alloced = need; } template <typename T> void octave_sort<T>::MergeState::getmemi (octave_idx_type need) { if (m_ia && need <= m_alloced) return; need = roundupsize (need); /* Don't realloc! That can cost cycles to copy the old data, but * we don't care what's in the block. */ delete [] m_a; delete [] m_ia; m_a = new T [need]; m_ia = new octave_idx_type [need]; m_alloced = need; } /* Merge the na elements starting at pa with the nb elements starting at pb * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the * merge, and should have na <= nb. See listsort.txt for more info. * Return 0 if successful, -1 if error. */ template <typename T> template <typename Comp> int octave_sort<T>::merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp) { octave_idx_type k; T *dest; int result = -1; /* guilty until proved innocent */ octave_idx_type min_gallop = m_ms->m_min_gallop; m_ms->getmem (na); std::copy (pa, pa + na, m_ms->m_a); dest = pa; pa = m_ms->m_a; *dest++ = *pb++; --nb; if (nb == 0) goto Succeed; if (na == 1) goto CopyB; for (;;) { octave_idx_type acount = 0; /* # of times A won in a row */ octave_idx_type bcount = 0; /* # of times B won in a row */ /* Do the straightforward thing until (if ever) one run * appears to win consistently. */ for (;;) { // FIXME: these loops are candidates for further optimizations. // Rather than testing everything in each cycle, it may be more // efficient to do it in hunks. if (comp (*pb, *pa)) { *dest++ = *pb++; ++bcount; acount = 0; --nb; if (nb == 0) goto Succeed; if (bcount >= min_gallop) break; } else { *dest++ = *pa++; ++acount; bcount = 0; --na; if (na == 1) goto CopyB; if (acount >= min_gallop) break; } } /* One run is winning so consistently that galloping may * be a huge win. So try that, and continue galloping until * (if ever) neither run appears to be winning consistently * anymore. */ ++min_gallop; do { min_gallop -= (min_gallop > 1); m_ms->m_min_gallop = min_gallop; k = gallop_right (*pb, pa, na, 0, comp); acount = k; if (k) { if (k < 0) goto Fail; dest = std::copy (pa, pa + k, dest); pa += k; na -= k; if (na == 1) goto CopyB; /* na==0 is impossible now if the comparison * function is consistent, but we can't assume * that it is. */ if (na == 0) goto Succeed; } *dest++ = *pb++; --nb; if (nb == 0) goto Succeed; k = gallop_left (*pa, pb, nb, 0, comp); bcount = k; if (k) { if (k < 0) goto Fail; dest = std::copy (pb, pb + k, dest); pb += k; nb -= k; if (nb == 0) goto Succeed; } *dest++ = *pa++; --na; if (na == 1) goto CopyB; } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); ++min_gallop; /* penalize it for leaving galloping mode */ m_ms->m_min_gallop = min_gallop; } Succeed: result = 0; Fail: if (na) std::copy (pa, pa + na, dest); return result; CopyB: /* The last element of pa belongs at the end of the merge. */ std::copy (pb, pb + nb, dest); dest[nb] = *pa; return 0; } template <typename T> template <typename Comp> int octave_sort<T>::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na, T *pb, octave_idx_type *ipb, octave_idx_type nb, Comp comp) { octave_idx_type k; T *dest; octave_idx_type *idest; int result = -1; /* guilty until proved innocent */ octave_idx_type min_gallop = m_ms->m_min_gallop; m_ms->getmemi (na); std::copy (pa, pa + na, m_ms->m_a); std::copy (ipa, ipa + na, m_ms->m_ia); dest = pa; idest = ipa; pa = m_ms->m_a; ipa = m_ms->m_ia; *dest++ = *pb++; *idest++ = *ipb++; --nb; if (nb == 0) goto Succeed; if (na == 1) goto CopyB; for (;;) { octave_idx_type acount = 0; /* # of times A won in a row */ octave_idx_type bcount = 0; /* # of times B won in a row */ /* Do the straightforward thing until (if ever) one run * appears to win consistently. */ for (;;) { if (comp (*pb, *pa)) { *dest++ = *pb++; *idest++ = *ipb++; ++bcount; acount = 0; --nb; if (nb == 0) goto Succeed; if (bcount >= min_gallop) break; } else { *dest++ = *pa++; *idest++ = *ipa++; ++acount; bcount = 0; --na; if (na == 1) goto CopyB; if (acount >= min_gallop) break; } } /* One run is winning so consistently that galloping may * be a huge win. So try that, and continue galloping until * (if ever) neither run appears to be winning consistently * anymore. */ ++min_gallop; do { min_gallop -= (min_gallop > 1); m_ms->m_min_gallop = min_gallop; k = gallop_right (*pb, pa, na, 0, comp); acount = k; if (k) { if (k < 0) goto Fail; dest = std::copy (pa, pa + k, dest); idest = std::copy (ipa, ipa + k, idest); pa += k; ipa += k; na -= k; if (na == 1) goto CopyB; /* na==0 is impossible now if the comparison * function is consistent, but we can't assume * that it is. */ if (na == 0) goto Succeed; } *dest++ = *pb++; *idest++ = *ipb++; --nb; if (nb == 0) goto Succeed; k = gallop_left (*pa, pb, nb, 0, comp); bcount = k; if (k) { if (k < 0) goto Fail; dest = std::copy (pb, pb + k, dest); idest = std::copy (ipb, ipb + k, idest); pb += k; ipb += k; nb -= k; if (nb == 0) goto Succeed; } *dest++ = *pa++; *idest++ = *ipa++; --na; if (na == 1) goto CopyB; } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); ++min_gallop; /* penalize it for leaving galloping mode */ m_ms->m_min_gallop = min_gallop; } Succeed: result = 0; Fail: if (na) { std::copy (pa, pa + na, dest); std::copy (ipa, ipa + na, idest); } return result; CopyB: /* The last element of pa belongs at the end of the merge. */ std::copy (pb, pb + nb, dest); std::copy (ipb, ipb + nb, idest); dest[nb] = *pa; idest[nb] = *ipa; return 0; } /* Merge the na elements starting at pa with the nb elements starting at pb * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the * merge, and should have na >= nb. See listsort.txt for more info. * Return 0 if successful, -1 if error. */ template <typename T> template <typename Comp> int octave_sort<T>::merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp) { octave_idx_type k; T *dest; int result = -1; /* guilty until proved innocent */ T *basea, *baseb; octave_idx_type min_gallop = m_ms->m_min_gallop; m_ms->getmem (nb); dest = pb + nb - 1; std::copy (pb, pb + nb, m_ms->m_a); basea = pa; baseb = m_ms->m_a; pb = m_ms->m_a + nb - 1; pa += na - 1; *dest-- = *pa--; --na; if (na == 0) goto Succeed; if (nb == 1) goto CopyA; for (;;) { octave_idx_type acount = 0; /* # of times A won in a row */ octave_idx_type bcount = 0; /* # of times B won in a row */ /* Do the straightforward thing until (if ever) one run * appears to win consistently. */ for (;;) { if (comp (*pb, *pa)) { *dest-- = *pa--; ++acount; bcount = 0; --na; if (na == 0) goto Succeed; if (acount >= min_gallop) break; } else { *dest-- = *pb--; ++bcount; acount = 0; --nb; if (nb == 1) goto CopyA; if (bcount >= min_gallop) break; } } /* One run is winning so consistently that galloping may * be a huge win. So try that, and continue galloping until * (if ever) neither run appears to be winning consistently * anymore. */ ++min_gallop; do { min_gallop -= (min_gallop > 1); m_ms->m_min_gallop = min_gallop; k = gallop_right (*pb, basea, na, na-1, comp); if (k < 0) goto Fail; k = na - k; acount = k; if (k) { dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; pa -= k; na -= k; if (na == 0) goto Succeed; } *dest-- = *pb--; --nb; if (nb == 1) goto CopyA; k = gallop_left (*pa, baseb, nb, nb-1, comp); if (k < 0) goto Fail; k = nb - k; bcount = k; if (k) { dest -= k; pb -= k; std::copy (pb+1, pb+1 + k, dest+1); nb -= k; if (nb == 1) goto CopyA; /* nb==0 is impossible now if the comparison * function is consistent, but we can't assume * that it is. */ if (nb == 0) goto Succeed; } *dest-- = *pa--; --na; if (na == 0) goto Succeed; } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); ++min_gallop; /* penalize it for leaving galloping mode */ m_ms->m_min_gallop = min_gallop; } Succeed: result = 0; Fail: if (nb) std::copy (baseb, baseb + nb, dest-(nb-1)); return result; CopyA: /* The first element of pb belongs at the front of the merge. */ dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; pa -= na; *dest = *pb; return 0; } template <typename T> template <typename Comp> int octave_sort<T>::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na, T *pb, octave_idx_type *ipb, octave_idx_type nb, Comp comp) { octave_idx_type k; T *dest; octave_idx_type *idest; int result = -1; /* guilty until proved innocent */ T *basea, *baseb; octave_idx_type *ibaseb; octave_idx_type min_gallop = m_ms->m_min_gallop; m_ms->getmemi (nb); dest = pb + nb - 1; idest = ipb + nb - 1; std::copy (pb, pb + nb, m_ms->m_a); std::copy (ipb, ipb + nb, m_ms->m_ia); basea = pa; baseb = m_ms->m_a; ibaseb = m_ms->m_ia; pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1; pa += na - 1; ipa += na - 1; *dest-- = *pa--; *idest-- = *ipa--; --na; if (na == 0) goto Succeed; if (nb == 1) goto CopyA; for (;;) { octave_idx_type acount = 0; /* # of times A won in a row */ octave_idx_type bcount = 0; /* # of times B won in a row */ /* Do the straightforward thing until (if ever) one run * appears to win consistently. */ for (;;) { if (comp (*pb, *pa)) { *dest-- = *pa--; *idest-- = *ipa--; ++acount; bcount = 0; --na; if (na == 0) goto Succeed; if (acount >= min_gallop) break; } else { *dest-- = *pb--; *idest-- = *ipb--; ++bcount; acount = 0; --nb; if (nb == 1) goto CopyA; if (bcount >= min_gallop) break; } } /* One run is winning so consistently that galloping may * be a huge win. So try that, and continue galloping until * (if ever) neither run appears to be winning consistently * anymore. */ ++min_gallop; do { min_gallop -= (min_gallop > 1); m_ms->m_min_gallop = min_gallop; k = gallop_right (*pb, basea, na, na-1, comp); if (k < 0) goto Fail; k = na - k; acount = k; if (k) { dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1; pa -= k; ipa -= k; na -= k; if (na == 0) goto Succeed; } *dest-- = *pb--; *idest-- = *ipb--; --nb; if (nb == 1) goto CopyA; k = gallop_left (*pa, baseb, nb, nb-1, comp); if (k < 0) goto Fail; k = nb - k; bcount = k; if (k) { dest -= k; idest -= k; pb -= k; ipb -= k; std::copy (pb+1, pb+1 + k, dest+1); std::copy (ipb+1, ipb+1 + k, idest+1); nb -= k; if (nb == 1) goto CopyA; /* nb==0 is impossible now if the comparison * function is consistent, but we can't assume * that it is. */ if (nb == 0) goto Succeed; } *dest-- = *pa--; *idest-- = *ipa--; --na; if (na == 0) goto Succeed; } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); ++min_gallop; /* penalize it for leaving galloping mode */ m_ms->m_min_gallop = min_gallop; } Succeed: result = 0; Fail: if (nb) { std::copy (baseb, baseb + nb, dest-(nb-1)); std::copy (ibaseb, ibaseb + nb, idest-(nb-1)); } return result; CopyA: /* The first element of pb belongs at the front of the merge. */ dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1; pa -= na; ipa -= na; *dest = *pb; *idest = *ipb; return 0; } /* Merge the two runs at stack indices i and i+1. * Returns 0 on success, -1 on error. */ template <typename T> template <typename Comp> int octave_sort<T>::merge_at (octave_idx_type i, T *data, Comp comp) { T *pa, *pb; octave_idx_type na, nb; octave_idx_type k; pa = data + m_ms->m_pending[i].m_base; na = m_ms->m_pending[i].m_len; pb = data + m_ms->m_pending[i+1].m_base; nb = m_ms->m_pending[i+1].m_len; /* Record the length of the combined runs; if i is the 3rd-last * run now, also slide over the last run (which isn't involved * in this merge). The current run i+1 goes away in any case. */ m_ms->m_pending[i].m_len = na + nb; if (i == m_ms->m_n - 3) m_ms->m_pending[i+1] = m_ms->m_pending[i+2]; m_ms->m_n--; /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ k = gallop_right (*pb, pa, na, 0, comp); if (k < 0) return -1; pa += k; na -= k; if (na == 0) return 0; /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); if (nb <= 0) return nb; /* Merge what remains of the runs, using a temp array with * min (na, nb) elements. */ if (na <= nb) return merge_lo (pa, na, pb, nb, comp); else return merge_hi (pa, na, pb, nb, comp); } template <typename T> template <typename Comp> int octave_sort<T>::merge_at (octave_idx_type i, T *data, octave_idx_type *idx, Comp comp) { T *pa, *pb; octave_idx_type *ipa, *ipb; octave_idx_type na, nb; octave_idx_type k; pa = data + m_ms->m_pending[i].m_base; ipa = idx + m_ms->m_pending[i].m_base; na = m_ms->m_pending[i].m_len; pb = data + m_ms->m_pending[i+1].m_base; ipb = idx + m_ms->m_pending[i+1].m_base; nb = m_ms->m_pending[i+1].m_len; /* Record the length of the combined runs; if i is the 3rd-last * run now, also slide over the last run (which isn't involved * in this merge). The current run i+1 goes away in any case. */ m_ms->m_pending[i].m_len = na + nb; if (i == m_ms->m_n - 3) m_ms->m_pending[i+1] = m_ms->m_pending[i+2]; m_ms->m_n--; /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ k = gallop_right (*pb, pa, na, 0, comp); if (k < 0) return -1; pa += k; ipa += k; na -= k; if (na == 0) return 0; /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); if (nb <= 0) return nb; /* Merge what remains of the runs, using a temp array with * min (na, nb) elements. */ if (na <= nb) return merge_lo (pa, ipa, na, pb, ipb, nb, comp); else return merge_hi (pa, ipa, na, pb, ipb, nb, comp); } /* Examine the stack of runs waiting to be merged, merging adjacent runs * until the stack invariants are re-established: * * 1. len[-3] > len[-2] + len[-1] * 2. len[-2] > len[-1] * * See listsort.txt for more info. * * Returns 0 on success, -1 on error. */ template <typename T> template <typename Comp> int octave_sort<T>::merge_collapse (T *data, Comp comp) { struct s_slice *p = m_ms->m_pending; while (m_ms->m_n > 1) { octave_idx_type n = m_ms->m_n - 2; if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len) { if (p[n-1].m_len < p[n+1].m_len) --n; if (merge_at (n, data, comp) < 0) return -1; } else if (p[n].m_len <= p[n+1].m_len) { if (merge_at (n, data, comp) < 0) return -1; } else break; } return 0; } template <typename T> template <typename Comp> int octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp) { struct s_slice *p = m_ms->m_pending; while (m_ms->m_n > 1) { octave_idx_type n = m_ms->m_n - 2; if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len) { if (p[n-1].m_len < p[n+1].m_len) --n; if (merge_at (n, data, idx, comp) < 0) return -1; } else if (p[n].m_len <= p[n+1].m_len) { if (merge_at (n, data, idx, comp) < 0) return -1; } else break; } return 0; } /* Regardless of invariants, merge all runs on the stack until only one * remains. This is used at the end of the mergesort. * * Returns 0 on success, -1 on error. */ template <typename T> template <typename Comp> int octave_sort<T>::merge_force_collapse (T *data, Comp comp) { struct s_slice *p = m_ms->m_pending; while (m_ms->m_n > 1) { octave_idx_type n = m_ms->m_n - 2; if (n > 0 && p[n-1].m_len < p[n+1].m_len) --n; if (merge_at (n, data, comp) < 0) return -1; } return 0; } template <typename T> template <typename Comp> int octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp) { struct s_slice *p = m_ms->m_pending; while (m_ms->m_n > 1) { octave_idx_type n = m_ms->m_n - 2; if (n > 0 && p[n-1].m_len < p[n+1].m_len) --n; if (merge_at (n, data, idx, comp) < 0) return -1; } return 0; } /* Compute a good value for the minimum run length; natural runs shorter * than this are boosted artificially via binary insertion. * * If n < 64, return n (it's too small to bother with fancy stuff). * Else if n is an exact power of 2, return 32. * Else return an int k, 32 <= k <= 64, such that n/k is close to, but * strictly less than, an exact power of 2. * * See listsort.txt for more info. */ template <typename T> octave_idx_type octave_sort<T>::merge_compute_minrun (octave_idx_type n) { octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */ while (n >= 64) { r |= n & 1; n >>= 1; } return n + r; } template <typename T> template <typename Comp> void octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp) { /* Re-initialize the Mergestate as this might be the second time called */ if (! m_ms) m_ms = new MergeState; m_ms->reset (); m_ms->getmem (MERGESTATE_TEMP_SIZE); if (nel > 1) { octave_idx_type nremaining = nel; octave_idx_type lo = 0; /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ octave_idx_type minrun = merge_compute_minrun (nremaining); do { bool descending; octave_idx_type n; /* Identify next run. */ n = count_run (data + lo, nremaining, descending, comp); if (n < 0) return; if (descending) std::reverse (data + lo, data + lo + n); /* If short, extend to min (minrun, nremaining). */ if (n < minrun) { const octave_idx_type force = (nremaining <= minrun ? nremaining : minrun); binarysort (data + lo, force, n, comp); n = force; } /* Push run onto m_pending-runs stack, and maybe merge. */ liboctave_panic_unless (m_ms->m_n < MAX_MERGE_PENDING); m_ms->m_pending[m_ms->m_n].m_base = lo; m_ms->m_pending[m_ms->m_n].m_len = n; m_ms->m_n++; if (merge_collapse (data, comp) < 0) return; /* Advance to find next run. */ lo += n; nremaining -= n; } while (nremaining); merge_force_collapse (data, comp); } } template <typename T> template <typename Comp> void octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel, Comp comp) { /* Re-initialize the Mergestate as this might be the second time called */ if (! m_ms) m_ms = new MergeState; m_ms->reset (); m_ms->getmemi (MERGESTATE_TEMP_SIZE); if (nel > 1) { octave_idx_type nremaining = nel; octave_idx_type lo = 0; /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ octave_idx_type minrun = merge_compute_minrun (nremaining); do { bool descending; octave_idx_type n; /* Identify next run. */ n = count_run (data + lo, nremaining, descending, comp); if (n < 0) return; if (descending) { std::reverse (data + lo, data + lo + n); std::reverse (idx + lo, idx + lo + n); } /* If short, extend to min (minrun, nremaining). */ if (n < minrun) { const octave_idx_type force = (nremaining <= minrun ? nremaining : minrun); binarysort (data + lo, idx + lo, force, n, comp); n = force; } /* Push run onto m_pending-runs stack, and maybe merge. */ liboctave_panic_unless (m_ms->m_n < MAX_MERGE_PENDING); m_ms->m_pending[m_ms->m_n].m_base = lo; m_ms->m_pending[m_ms->m_n].m_len = n; m_ms->m_n++; if (merge_collapse (data, idx, comp) < 0) return; /* Advance to find next run. */ lo += n; nremaining -= n; } while (nremaining); merge_force_collapse (data, idx, comp); } } template <typename T> using compare_fcn_ptr = bool (*) (typename ref_param<T>::type, typename ref_param<T>::type); template <typename T> void octave_sort<T>::sort (T *data, octave_idx_type nel) { #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) sort (data, nel, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) sort (data, nel, std::greater<T> ()); else #endif if (m_compare) sort (data, nel, m_compare); } template <typename T> void octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel) { #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) sort (data, idx, nel, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) sort (data, idx, nel, std::greater<T> ()); else #endif if (m_compare) sort (data, idx, nel, m_compare); } template <typename T> template <typename Comp> bool octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp) { const T *end = data + nel; if (data != end) { const T *next = data; while (++next != end) { if (comp (*next, *data)) break; data = next; } data = next; } return data == end; } template <typename T> bool octave_sort<T>::issorted (const T *data, octave_idx_type nel) { bool retval = false; #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) retval = issorted (data, nel, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) retval = issorted (data, nel, std::greater<T> ()); else #endif if (m_compare) retval = issorted (data, nel, m_compare); return retval; } struct sortrows_run_t { public: sortrows_run_t (octave_idx_type c, octave_idx_type o, octave_idx_type n) : col (c), ofs (o), nel (n) { } //-------- octave_idx_type col, ofs, nel; }; template <typename T> template <typename Comp> void octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols, Comp comp) { OCTAVE_LOCAL_BUFFER (T, buf, rows); for (octave_idx_type i = 0; i < rows; i++) idx[i] = i; if (cols == 0 || rows <= 1) return; // This is a breadth-first traversal. typedef sortrows_run_t run_t; std::stack<run_t> runs; runs.push (run_t (0, 0, rows)); while (! runs.empty ()) { octave_idx_type col = runs.top ().col; octave_idx_type ofs = runs.top ().ofs; octave_idx_type nel = runs.top ().nel; runs.pop (); liboctave_panic_unless (nel > 1); T *lbuf = buf + ofs; const T *ldata = data + rows*col; octave_idx_type *lidx = idx + ofs; // Gather. for (octave_idx_type i = 0; i < nel; i++) lbuf[i] = ldata[lidx[i]]; // Sort. sort (lbuf, lidx, nel, comp); // Identify constant runs and schedule subsorts. if (col < cols-1) { octave_idx_type lst = 0; for (octave_idx_type i = 0; i < nel; i++) { if (comp (lbuf[lst], lbuf[i])) { if (i > lst + 1) runs.push (run_t (col+1, ofs + lst, i - lst)); lst = i; } } if (nel > lst + 1) runs.push (run_t (col+1, ofs + lst, nel - lst)); } } } template <typename T> void octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols) { #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) sort_rows (data, idx, rows, cols, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) sort_rows (data, idx, rows, cols, std::greater<T> ()); else #endif if (m_compare) sort_rows (data, idx, rows, cols, m_compare); } template <typename T> template <typename Comp> bool octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows, octave_idx_type cols, Comp comp) { if (rows <= 1 || cols == 0) return true; // This is a breadth-first traversal. const T *lastrow = data + rows*(cols - 1); typedef std::pair<const T *, octave_idx_type> run_t; std::stack<run_t> runs; bool sorted = true; runs.push (run_t (data, rows)); while (sorted && ! runs.empty ()) { const T *lo = runs.top ().first; octave_idx_type n = runs.top ().second; runs.pop (); if (lo < lastrow) { // Not the final column. liboctave_panic_unless (n > 1); const T *hi = lo + n; const T *lst = lo; for (lo++; lo < hi; lo++) { if (comp (*lst, *lo)) { if (lo > lst + 1) runs.push (run_t (lst + rows, lo - lst)); lst = lo; } else if (comp (*lo, *lst)) break; } if (lo == hi) { if (lo > lst + 1) runs.push (run_t (lst + rows, lo - lst)); } else { sorted = false; break; } } else // The final column - use fast code. sorted = issorted (lo, n, comp); } return sorted; } template <typename T> bool octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows, octave_idx_type cols) { bool retval = false; #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) retval = is_sorted_rows (data, rows, cols, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) retval = is_sorted_rows (data, rows, cols, std::greater<T> ()); else #endif if (m_compare) retval = is_sorted_rows (data, rows, cols, m_compare); return retval; } // The simple binary lookup. template <typename T> template <typename Comp> octave_idx_type octave_sort<T>::lookup (const T *data, octave_idx_type nel, const T& value, Comp comp) { octave_idx_type lo = 0; octave_idx_type hi = nel; while (lo < hi) { octave_idx_type mid = lo + ((hi-lo) >> 1); if (comp (value, data[mid])) hi = mid; else lo = mid + 1; } return lo; } template <typename T> octave_idx_type octave_sort<T>::lookup (const T *data, octave_idx_type nel, const T& value) { octave_idx_type retval = 0; #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) retval = lookup (data, nel, value, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) retval = lookup (data, nel, value, std::greater<T> ()); else #endif if (m_compare) retval = lookup (data, nel, value, m_compare); return retval; } template <typename T> template <typename Comp> void octave_sort<T>::lookup (const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, Comp comp) { // Use a sequence of binary lookups. // FIXME: Can this be sped up generally? The sorted merge case is dealt with // elsewhere. for (octave_idx_type j = 0; j < nvalues; j++) idx[j] = lookup (data, nel, values[j], comp); } template <typename T> void octave_sort<T>::lookup (const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx) { #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) lookup (data, nel, values, nvalues, idx, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) lookup (data, nel, values, nvalues, idx, std::greater<T> ()); else #endif if (m_compare) lookup (data, nel, values, nvalues, idx, m_compare); } template <typename T> template <typename Comp> void octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev, Comp comp) { if (rev) { octave_idx_type i = 0; octave_idx_type j = nvalues - 1; if (nvalues > 0 && nel > 0) { while (true) { if (comp (values[j], data[i])) { idx[j] = i; if (--j < 0) break; } else if (++i == nel) break; } } for (; j >= 0; j--) idx[j] = i; } else { octave_idx_type i = 0; octave_idx_type j = 0; if (nvalues > 0 && nel > 0) { while (true) { if (comp (values[j], data[i])) { idx[j] = i; if (++j == nvalues) break; } else if (++i == nel) break; } } for (; j != nvalues; j++) idx[j] = i; } } template <typename T> void octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev) { #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ()); else #endif if (m_compare) lookup_sorted (data, nel, values, nvalues, idx, rev, m_compare); } template <typename T> template <typename Comp> void octave_sort<T>::nth_element (T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up, Comp comp) { // Simply wrap the STL algorithms. // FIXME: this will fail if we attempt to inline <,> for Complex. if (up == lo+1) std::nth_element (data, data + lo, data + nel, comp); else if (lo == 0) std::partial_sort (data, data + up, data + nel, comp); else { std::nth_element (data, data + lo, data + nel, comp); if (up == lo + 2) { // Finding two subsequent elements. std::swap (data[lo+1], *std::min_element (data + lo + 1, data + nel, comp)); } else std::partial_sort (data + lo + 1, data + up, data + nel, comp); } } template <typename T> void octave_sort<T>::nth_element (T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up) { if (up < 0) up = lo + 1; #if defined (INLINE_ASCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare) nth_element (data, nel, lo, up, std::less<T> ()); else #endif #if defined (INLINE_DESCENDING_SORT) if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare) nth_element (data, nel, lo, up, std::greater<T> ()); else #endif if (m_compare) nth_element (data, nel, lo, up, m_compare); } template <typename T> bool octave_sort<T>::ascending_compare (typename ref_param<T>::type x, typename ref_param<T>::type y) { return x < y; } template <typename T> bool octave_sort<T>::descending_compare (typename ref_param<T>::type x, typename ref_param<T>::type y) { return x > y; }