view liboctave/util/oct-sort.cc @ 31771:21f9b34eb893

maint: Eliminate "(void)" in C++ function prototypes/declarations. * mk-opts.pl, external.txi, embedded.cc, make_int.cc, standalone.cc, standalonebuiltin.cc, BaseControl.cc, BaseControl.h, ButtonControl.cc, ButtonControl.h, ButtonGroup.cc, ButtonGroup.h, Canvas.cc, Canvas.h, CheckBoxControl.cc, CheckBoxControl.h, Container.cc, Container.h, ContextMenu.cc, ContextMenu.h, EditControl.cc, EditControl.h, Figure.cc, Figure.h, FigureWindow.cc, FigureWindow.h, GLCanvas.cc, GLCanvas.h, GenericEventNotify.h, KeyMap.cc, ListBoxControl.cc, ListBoxControl.h, Logger.cc, Logger.h, Menu.cc, Menu.h, MenuContainer.h, Object.cc, Object.h, ObjectProxy.cc, ObjectProxy.h, Panel.cc, Panel.h, PopupMenuControl.cc, PopupMenuControl.h, PushButtonControl.cc, PushButtonControl.h, PushTool.cc, PushTool.h, RadioButtonControl.cc, RadioButtonControl.h, SliderControl.cc, SliderControl.h, Table.cc, Table.h, TextControl.cc, TextControl.h, TextEdit.h, ToggleButtonControl.cc, ToggleButtonControl.h, ToggleTool.cc, ToggleTool.h, ToolBar.cc, ToolBar.h, ToolBarButton.cc, ToolBarButton.h, gl-select.cc, gl-select.h, qopengl-functions.h, qt-graphics-toolkit.h, qdialog.cpp, qfontdialog.cpp, qprintdialog_win.cpp, liboctgui-build-info.h, liboctgui-build-info.in.cc, color-picker.cc, color-picker.h, command-widget.cc, command-widget.h, community-news.cc, community-news.h, dialog.cc, dialog.h, documentation-bookmarks.cc, documentation-bookmarks.h, documentation-dock-widget.cc, documentation-dock-widget.h, documentation.cc, documentation.h, dw-main-window.cc, dw-main-window.h, external-editor-interface.cc, external-editor-interface.h, files-dock-widget.cc, files-dock-widget.h, find-files-dialog.cc, find-files-dialog.h, find-files-model.cc, find-files-model.h, gui-preferences.cc, gui-preferences.h, gui-settings.cc, gui-settings.h, history-dock-widget.cc, history-dock-widget.h, interpreter-qobject.cc, interpreter-qobject.h, file-editor-interface.h, file-editor-tab.cc, file-editor-tab.h, file-editor.cc, file-editor.h, find-dialog.cc, find-dialog.h, marker.cc, marker.h, octave-qscintilla.cc, octave-qscintilla.h, octave-txt-lexer.cc, octave-txt-lexer.h, main-window.cc, main-window.h, news-reader.cc, news-reader.h, octave-dock-widget.cc, octave-dock-widget.h, octave-qobject.cc, octave-qobject.h, qt-application.cc, qt-application.h, qt-interpreter-events.cc, qt-interpreter-events.h, release-notes.cc, release-notes.h, set-path-dialog.cc, set-path-dialog.h, set-path-model.cc, set-path-model.h, settings-dialog.cc, settings-dialog.h, shortcuts-tree-widget.cc, shortcuts-tree-widget.h, tab-bar.cc, tab-bar.h, terminal-dock-widget.cc, terminal-dock-widget.h, variable-editor-model.cc, variable-editor-model.h, variable-editor.cc, variable-editor.h, welcome-wizard.cc, welcome-wizard.h, workspace-model.cc, workspace-model.h, workspace-view.cc, workspace-view.h, build-env.h, Cell.cc, Cell.h, __contourc__.cc, __magick_read__.cc, auto-shlib.cc, auto-shlib.h, base-text-renderer.h, bsxfun.cc, c-file-ptr-stream.cc, c-file-ptr-stream.h, call-stack.cc, call-stack.h, debug.cc, defaults.cc, defaults.h, defun.cc, display.cc, display.h, dynamic-ld.cc, dynamic-ld.h, environment.cc, environment.h, error.cc, error.h, errwarn.cc, errwarn.h, event-manager.cc, event-manager.h, event-queue.cc, event-queue.h, fcn-info.cc, fcn-info.h, ft-text-renderer.cc, ft-text-renderer.h, genprops.awk, gh-manager.cc, gh-manager.h, gl-render.cc, gl-render.h, gl2ps-print.cc, graphics-toolkit.h, graphics.cc, graphics.in.h, gtk-manager.cc, gtk-manager.h, help.cc, help.h, hook-fcn.h, input.cc, input.h, interpreter-private.cc, interpreter-private.h, interpreter.cc, interpreter.h, jsondecode.cc, latex-text-renderer.cc, latex-text-renderer.h, load-path.cc, load-path.h, load-save.cc, load-save.h, ls-hdf5.cc, ls-hdf5.h, mxarray.h, oct-errno.h, oct-errno.in.cc, oct-fstrm.cc, oct-fstrm.h, oct-handle.h, oct-hist.cc, oct-hist.h, oct-iostrm.cc, oct-iostrm.h, oct-map.cc, oct-map.h, oct-opengl.h, oct-prcstrm.cc, oct-prcstrm.h, oct-procbuf.cc, oct-procbuf.h, oct-process.h, oct-stdstrm.h, oct-stream.cc, oct-stream.h, oct-strstrm.cc, oct-strstrm.h, oct-tex-lexer.in.ll, pager.cc, pager.h, pr-flt-fmt.cc, pr-flt-fmt.h, pr-output.cc, pr-output.h, procstream.cc, procstream.h, settings.cc, settings.h, sighandlers.cc, sighandlers.h, stack-frame.cc, stack-frame.h, svd.cc, syminfo.cc, syminfo.h, symrec.cc, symrec.h, symscope.cc, symscope.h, symtab.cc, symtab.h, sysdep.cc, sysdep.h, text-engine.cc, text-engine.h, text-renderer.cc, text-renderer.h, toplev.cc, url-handle-manager.cc, url-handle-manager.h, variables.cc, xpow.cc, __init_fltk__.cc, __init_gnuplot__.cc, __ode15__.cc, audiodevinfo.cc, gzip.cc, liboctinterp-build-info.h, liboctinterp-build-info.in.cc, mk-build-env-features.sh, mk-builtins.pl, cdef-class.cc, cdef-class.h, cdef-manager.h, cdef-method.cc, cdef-method.h, cdef-object.cc, cdef-object.h, cdef-package.cc, cdef-package.h, cdef-property.cc, cdef-property.h, cdef-utils.cc, cdef-utils.h, ov-base-diag.cc, ov-base-diag.h, ov-base-int.cc, ov-base-int.h, ov-base-mat.cc, ov-base-mat.h, ov-base-scalar.cc, ov-base-scalar.h, ov-base-sparse.cc, ov-base-sparse.h, ov-base.cc, ov-base.h, ov-bool-mat.cc, ov-bool-mat.h, ov-bool-sparse.cc, ov-bool-sparse.h, ov-bool.cc, ov-bool.h, ov-builtin.cc, ov-builtin.h, ov-cell.cc, ov-cell.h, ov-ch-mat.cc, ov-ch-mat.h, ov-class.cc, ov-class.h, ov-classdef.cc, ov-classdef.h, ov-colon.h, ov-complex.cc, ov-complex.h, ov-cs-list.h, ov-cx-diag.cc, ov-cx-diag.h, ov-cx-mat.cc, ov-cx-mat.h, ov-cx-sparse.cc, ov-cx-sparse.h, ov-dld-fcn.cc, ov-dld-fcn.h, ov-fcn-handle.cc, ov-fcn-handle.h, ov-fcn.cc, ov-fcn.h, ov-float.cc, ov-float.h, ov-flt-complex.cc, ov-flt-complex.h, ov-flt-cx-diag.cc, ov-flt-cx-diag.h, ov-flt-cx-mat.cc, ov-flt-cx-mat.h, ov-flt-re-diag.cc, ov-flt-re-diag.h, ov-flt-re-mat.cc, ov-flt-re-mat.h, ov-intx.h, ov-java.cc, ov-java.h, ov-lazy-idx.cc, ov-lazy-idx.h, ov-legacy-range.cc, ov-legacy-range.h, ov-magic-int.cc, ov-magic-int.h, ov-mex-fcn.cc, ov-mex-fcn.h, ov-null-mat.cc, ov-null-mat.h, ov-oncleanup.cc, ov-oncleanup.h, ov-perm.cc, ov-perm.h, ov-range.cc, ov-range.h, ov-re-diag.cc, ov-re-diag.h, ov-re-mat.cc, ov-re-mat.h, ov-re-sparse.cc, ov-re-sparse.h, ov-scalar.cc, ov-scalar.h, ov-str-mat.cc, ov-str-mat.h, ov-struct.cc, ov-struct.h, ov-typeinfo.cc, ov-typeinfo.h, ov-usr-fcn.cc, ov-usr-fcn.h, ov.cc, ov.h, ovl.cc, ovl.h, octave.cc, octave.h, anon-fcn-validator.h, bp-table.cc, bp-table.h, comment-list.cc, comment-list.h, filepos.h, lex.h, lex.ll, oct-lvalue.cc, oct-lvalue.h, oct-parse.yy, parse.h, profiler.cc, profiler.h, pt-anon-scopes.h, pt-arg-list.cc, pt-arg-list.h, pt-args-block.cc, pt-args-block.h, pt-array-list.cc, pt-array-list.h, pt-assign.cc, pt-assign.h, pt-binop.cc, pt-binop.h, pt-bp.h, pt-cbinop.h, pt-cell.h, pt-check.h, pt-classdef.cc, pt-classdef.h, pt-cmd.h, pt-colon.h, pt-const.h, pt-decl.cc, pt-decl.h, pt-eval.cc, pt-eval.h, pt-except.cc, pt-except.h, pt-exp.cc, pt-exp.h, pt-fcn-handle.cc, pt-fcn-handle.h, pt-id.cc, pt-id.h, pt-idx.cc, pt-idx.h, pt-jump.h, pt-loop.cc, pt-loop.h, pt-mat.h, pt-misc.cc, pt-misc.h, pt-pr-code.cc, pt-pr-code.h, pt-select.cc, pt-select.h, pt-spmd.cc, pt-spmd.h, pt-stmt.cc, pt-stmt.h, pt-tm-const.cc, pt-tm-const.h, pt-unop.cc, pt-unop.h, pt-walk.h, pt.cc, pt.h, token.cc, token.h, usage.h, Array-base.cc, Array.h, CColVector.cc, CColVector.h, CDiagMatrix.cc, CDiagMatrix.h, CMatrix.cc, CMatrix.h, CNDArray.cc, CNDArray.h, CRowVector.cc, CRowVector.h, CSparse.cc, CSparse.h, DiagArray2.cc, DiagArray2.h, MArray.cc, MArray.h, MDiagArray2.h, MSparse.h, MatrixType.cc, MatrixType.h, PermMatrix.cc, PermMatrix.h, Range.cc, Range.h, Sparse-b.cc, Sparse.cc, Sparse.h, boolMatrix.cc, boolMatrix.h, boolNDArray.cc, boolNDArray.h, boolSparse.cc, boolSparse.h, chMatrix.h, chNDArray.h, dColVector.cc, dColVector.h, dDiagMatrix.cc, dDiagMatrix.h, dMatrix.cc, dMatrix.h, dNDArray.cc, dNDArray.h, dRowVector.cc, dRowVector.h, dSparse.cc, dSparse.h, dim-vector.cc, dim-vector.h, fCColVector.cc, fCColVector.h, fCDiagMatrix.cc, fCDiagMatrix.h, fCMatrix.cc, fCMatrix.h, fCNDArray.cc, fCNDArray.h, fCRowVector.cc, fCRowVector.h, fColVector.cc, fColVector.h, fDiagMatrix.cc, fDiagMatrix.h, fMatrix.cc, fMatrix.h, fNDArray.cc, fNDArray.h, fRowVector.cc, fRowVector.h, idx-vector.cc, idx-vector.h, intNDArray.cc, intNDArray.h, liboctave-build-info.h, liboctave-build-info.in.cc, CollocWt.cc, CollocWt.h, DAE.h, DAEFunc.h, DAERT.h, DAERTFunc.h, DASPK.cc, DASPK.h, DASRT.cc, DASRT.h, DASSL.cc, DASSL.h, DET.h, EIG.h, LSODE.cc, LSODE.h, ODE.h, ODEFunc.h, ODES.h, ODESFunc.h, Quad.h, aepbalance.cc, aepbalance.h, base-dae.h, base-de.h, chol.cc, chol.h, eigs-base.cc, fEIG.h, gepbalance.h, gsvd.cc, gsvd.h, hess.h, lu.cc, lu.h, oct-fftw.cc, oct-fftw.h, oct-rand.cc, oct-rand.h, oct-spparms.cc, oct-spparms.h, qr.cc, qr.h, qrp.cc, qrp.h, randmtzig.cc, randmtzig.h, schur.h, sparse-chol.cc, sparse-chol.h, sparse-lu.cc, sparse-lu.h, sparse-qr.cc, sparse-qr.h, svd.cc, svd.h, child-list.cc, child-list.h, dir-ops.cc, dir-ops.h, file-ops.cc, file-ops.h, file-stat.cc, file-stat.h, lo-sysdep.cc, lo-sysdep.h, lo-sysinfo.cc, lo-sysinfo.h, mach-info.cc, mach-info.h, oct-env.cc, oct-env.h, oct-group.cc, oct-group.h, oct-password.cc, oct-password.h, oct-syscalls.cc, oct-syscalls.h, oct-time.cc, oct-time.h, oct-uname.cc, oct-uname.h, action-container.h, base-list.h, caseless-str.h, cmd-edit.cc, cmd-edit.h, cmd-hist.cc, cmd-hist.h, data-conv.cc, file-info.h, glob-match.cc, glob-match.h, kpse.cc, kpse.h, lo-array-errwarn.cc, lo-array-errwarn.h, lo-hash.cc, lo-hash.h, lo-ieee.cc, lo-ieee.h, lo-regexp.cc, lo-regexp.h, oct-inttypes.cc, oct-inttypes.h, oct-mutex.cc, oct-mutex.h, oct-refcount.h, oct-shlib.cc, oct-shlib.h, oct-sort.cc, oct-sort.h, oct-sparse.cc, octave-preserve-stream-state.h, pathsearch.cc, pathsearch.h, quit.cc, singleton-cleanup.cc, singleton-cleanup.h, str-vec.cc, str-vec.h, unwind-prot.cc, unwind-prot.h, url-transfer.cc, url-transfer.h, version.cc, version.in.h, cxx-signal-helpers.cc, acinclude.m4, main-cli.cc, main-gui.cc, main.in.cc, mkoctfile.in.cc, octave-build-info.h, octave-build-info.in.cc, octave-config.in.cc, octave-svgconvert.cc, shared-fcns.h: maint: Eliminate "(void)" in C++ function prototypes/declarations.
author Rik <rik@octave.org>
date Tue, 24 Jan 2023 17:19:44 -0800
parents 597f3ee61a48
children 3c2c585965cc
line wrap: on
line source

////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2003-2023 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
// Code stolen in large part from Python's, listobject.c, which itself had
// no license header.  However, thanks to Tim Peters for the parts of the
// code I ripped-off.
//
// As required in the Python license the short description of the changes
// made are
//
// * convert the sorting code in listobject.cc into a generic class,
//   replacing PyObject* with the type of the class T.
//
// * replaced usages of malloc, free, memcpy and memmove by standard C++
//   new [], delete [] and std::copy and std::copy_backward.  Note that replacing
//   memmove by std::copy is possible if the destination starts before the source.
//   If not, std::copy_backward needs to be used.
//
// * templatize comparison operator in most methods, provide possible dispatch
//
// * duplicate methods to avoid by-the-way indexed sorting
//
// * add methods for verifying sortedness of array
//
// * row sorting via breadth-first tree subsorting
//
// * binary lookup and sequential binary lookup optimized for dense downsampling.
//
// * NOTE: the memory management routines rely on the fact that delete [] silently
//   ignores null pointers.  Don't gripe about the missing checks - they're there.
//
//
// The Python license is
//
//   PSF LICENSE AGREEMENT FOR PYTHON 2.3
//   --------------------------------------
//
//   1. This LICENSE AGREEMENT is between the Python Software Foundation
//   ("PSF"), and the Individual or Organization ("Licensee") accessing and
//   otherwise using Python 2.3 software in source or binary form and its
//   associated documentation.
//
//   2. Subject to the terms and conditions of this License Agreement, PSF
//   hereby grants Licensee a nonexclusive, royalty-free, world-wide
//   license to reproduce, analyze, test, perform and/or display publicly,
//   prepare derivative works, distribute, and otherwise use Python 2.3
//   alone or in any derivative version, provided, however, that PSF's
//   License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
//   2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
//   retained in Python 2.3 alone or in any derivative version prepared by
//   Licensee.
//
//   3. In the event Licensee prepares a derivative work that is based on
//   or incorporates Python 2.3 or any part thereof, and wants to make
//   the derivative work available to others as provided herein, then
//   Licensee hereby agrees to include in any such work a brief summary of
//   the changes made to Python 2.3.
//
//   4. PSF is making Python 2.3 available to Licensee on an "AS IS"
//   basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
//   IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
//   DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
//   FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
//   INFRINGE ANY THIRD PARTY RIGHTS.
//
//   5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
//   2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
//   A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
//   OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
//
//   6. This License Agreement will automatically terminate upon a material
//   breach of its terms and conditions.
//
//   7. Nothing in this License Agreement shall be deemed to create any
//   relationship of agency, partnership, or joint venture between PSF and
//   Licensee.  This License Agreement does not grant permission to use PSF
//   trademarks or trade name in a trademark sense to endorse or promote
//   products or services of Licensee, or any third party.
//
//   8. By copying, installing or otherwise using Python 2.3, Licensee
//   agrees to be bound by the terms and conditions of this License
//   Agreement.
//
////////////////////////////////////////////////////////////////////////

// This file should not include config.h.  It is only included in other
// C++ source files that should have included config.h before including
// this file.

#include <cassert>
#include <algorithm>
#include <cstring>
#include <stack>

#include "lo-error.h"
#include "lo-mappers.h"
#include "quit.h"
#include "oct-sort.h"
#include "oct-locbuf.h"

template <typename T>
octave_sort<T>::octave_sort () :
  m_compare (ascending_compare), m_ms (nullptr)
{ }

template <typename T>
octave_sort<T>::octave_sort (const compare_fcn_type& comp)
  : m_compare (comp), m_ms (nullptr)
{ }

template <typename T>
octave_sort<T>::~octave_sort ()
{
  delete m_ms;
}

template <typename T>
void
octave_sort<T>::set_compare (sortmode mode)
{
  if (mode == ASCENDING)
    m_compare = ascending_compare;
  else if (mode == DESCENDING)
    m_compare = descending_compare;
  else
    m_compare = compare_fcn_type ();
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::binarysort (T *data, octave_idx_type nel,
                            octave_idx_type start, Comp comp)
{
  if (start == 0)
    ++start;

  for (; start < nel; ++start)
    {
      /* set l to where *start belongs */
      octave_idx_type l = 0;
      octave_idx_type r = start;
      T pivot = data[start];
      /* Invariants:
       * pivot >= all in [lo, l).
       * pivot  < all in [r, start).
       * The second is vacuously true at the start.
       */
      do
        {
          octave_idx_type p = l + ((r - l) >> 1);
          if (comp (pivot, data[p]))
            r = p;
          else
            l = p+1;
        }
      while (l < r);
      /* The invariants still hold, so pivot >= all in [lo, l) and
         pivot < all in [l, start), so pivot belongs at l.  Note
         that if there are elements equal to pivot, l points to the
         first slot after them -- that's why this sort is stable.
         Slide over to make room.
         Caution: using memmove is much slower under MSVC 5;
         we're not usually moving many slots. */
      // NOTE: using swap and going upwards appears to be faster.
      for (octave_idx_type p = l; p < start; p++)
        std::swap (pivot, data[p]);
      data[start] = pivot;
    }

  return;
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel,
                            octave_idx_type start, Comp comp)
{
  if (start == 0)
    ++start;

  for (; start < nel; ++start)
    {
      /* set l to where *start belongs */
      octave_idx_type l = 0;
      octave_idx_type r = start;
      T pivot = data[start];
      /* Invariants:
       * pivot >= all in [lo, l).
       * pivot  < all in [r, start).
       * The second is vacuously true at the start.
       */
      do
        {
          octave_idx_type p = l + ((r - l) >> 1);
          if (comp (pivot, data[p]))
            r = p;
          else
            l = p+1;
        }
      while (l < r);
      /* The invariants still hold, so pivot >= all in [lo, l) and
         pivot < all in [l, start), so pivot belongs at l.  Note
         that if there are elements equal to pivot, l points to the
         first slot after them -- that's why this sort is stable.
         Slide over to make room.
         Caution: using memmove is much slower under MSVC 5;
         we're not usually moving many slots. */
      // NOTE: using swap and going upwards appears to be faster.
      for (octave_idx_type p = l; p < start; p++)
        std::swap (pivot, data[p]);
      data[start] = pivot;
      octave_idx_type ipivot = idx[start];
      for (octave_idx_type p = l; p < start; p++)
        std::swap (ipivot, idx[p]);
      idx[start] = ipivot;
    }

  return;
}

/*
Return the length of the run beginning at lo, in the slice [lo, hi).  lo < hi
is required on entry.  "A run" is the longest ascending sequence, with

    lo[0] <= lo[1] <= lo[2] <= ...

or the longest descending sequence, with

    lo[0] > lo[1] > lo[2] > ...

DESCENDING is set to false in the former case, or to true in the latter.
For its intended use in a stable mergesort, the strictness of the defn of
"descending" is needed so that the caller can safely reverse a descending
sequence without violating stability (strict > ensures there are no equal
elements to get out of order).

Returns -1 in case of error.
*/
template <typename T>
template <typename Comp>
octave_idx_type
octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
                           Comp comp)
{
  octave_idx_type n;
  T *hi = lo + nel;

  descending = false;
  ++lo;
  if (lo == hi)
    return 1;

  n = 2;

  if (comp (*lo, *(lo-1)))
    {
      descending = true;
      for (lo = lo+1; lo < hi; ++lo, ++n)
        {
          if (comp (*lo, *(lo-1)))
            ;
          else
            break;
        }
    }
  else
    {
      for (lo = lo+1; lo < hi; ++lo, ++n)
        {
          if (comp (*lo, *(lo-1)))
            break;
        }
    }

  return n;
}

/*
Locate the proper position of key in a sorted vector; if the vector contains
an element equal to key, return the position immediately to the left of
the leftmost equal element.  [gallop_right() does the same except returns
the position to the right of the rightmost equal element (if any).]

"a" is a sorted vector with n elements, starting at a[0].  n must be > 0.

"hint" is an index at which to begin the search, 0 <= hint < n.  The closer
hint is to the final result, the faster this runs.

The return value is the int k in 0..n such that

    a[k-1] < key <= a[k]

pretending that *(a-1) is minus infinity and a[n] is plus infinity.  IOW,
key belongs at index k; or, IOW, the first k elements of a should precede
key, and the last n-k should follow key.

Returns -1 on error.  See listsort.txt for info on the method.
*/
template <typename T>
template <typename Comp>
octave_idx_type
octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n,
                             octave_idx_type hint,
                             Comp comp)
{
  octave_idx_type ofs;
  octave_idx_type lastofs;
  octave_idx_type k;

  a += hint;
  lastofs = 0;
  ofs = 1;
  if (comp (*a, key))
    {
      /* a[hint] < key -- gallop right, until
       * a[hint + lastofs] < key <= a[hint + ofs]
       */
      const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */
      while (ofs < maxofs)
        {
          if (comp (a[ofs], key))
            {
              lastofs = ofs;
              ofs = (ofs << 1) + 1;
              if (ofs <= 0)     /* int overflow */
                ofs = maxofs;
            }
          else  /* key <= a[hint + ofs] */
            break;
        }
      if (ofs > maxofs)
        ofs = maxofs;
      /* Translate back to offsets relative to &a[0]. */
      lastofs += hint;
      ofs += hint;
    }
  else
    {
      /* key <= a[hint] -- gallop left, until
       * a[hint - ofs] < key <= a[hint - lastofs]
       */
      const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */
      while (ofs < maxofs)
        {
          if (comp (*(a-ofs), key))
            break;
          /* key <= a[hint - ofs] */
          lastofs = ofs;
          ofs = (ofs << 1) + 1;
          if (ofs <= 0) /* int overflow */
            ofs = maxofs;
        }
      if (ofs > maxofs)
        ofs = maxofs;
      /* Translate back to positive offsets relative to &a[0]. */
      k = lastofs;
      lastofs = hint - ofs;
      ofs = hint - k;
    }
  a -= hint;

  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
   * right of lastofs but no farther right than ofs.  Do a binary
   * search, with invariant a[lastofs-1] < key <= a[ofs].
   */
  ++lastofs;
  while (lastofs < ofs)
    {
      octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);

      if (comp (a[m], key))
        lastofs = m+1;  /* a[m] < key */
      else
        ofs = m;        /* key <= a[m] */
    }

  return ofs;
}

/*
Exactly like gallop_left(), except that if key already exists in a[0:n],
finds the position immediately to the right of the rightmost equal value.

The return value is the int k in 0..n such that

    a[k-1] <= key < a[k]

or -1 if error.

The code duplication is massive, but this is enough different given that
we're sticking to "<" comparisons that it's much harder to follow if
written as one routine with yet another "left or right?" flag.
*/
template <typename T>
template <typename Comp>
octave_idx_type
octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n,
                              octave_idx_type hint,
                              Comp comp)
{
  octave_idx_type ofs;
  octave_idx_type lastofs;
  octave_idx_type k;

  a += hint;
  lastofs = 0;
  ofs = 1;
  if (comp (key, *a))
    {
      /* key < a[hint] -- gallop left, until
       * a[hint - ofs] <= key < a[hint - lastofs]
       */
      const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */
      while (ofs < maxofs)
        {
          if (comp (key, *(a-ofs)))
            {
              lastofs = ofs;
              ofs = (ofs << 1) + 1;
              if (ofs <= 0)     /* int overflow */
                ofs = maxofs;
            }
          else  /* a[hint - ofs] <= key */
            break;
        }
      if (ofs > maxofs)
        ofs = maxofs;
      /* Translate back to positive offsets relative to &a[0]. */
      k = lastofs;
      lastofs = hint - ofs;
      ofs = hint - k;
    }
  else
    {
      /* a[hint] <= key -- gallop right, until
       * a[hint + lastofs] <= key < a[hint + ofs]
       */
      const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */
      while (ofs < maxofs)
        {
          if (comp (key, a[ofs]))
            break;
          /* a[hint + ofs] <= key */
          lastofs = ofs;
          ofs = (ofs << 1) + 1;
          if (ofs <= 0) /* int overflow */
            ofs = maxofs;
        }
      if (ofs > maxofs)
        ofs = maxofs;
      /* Translate back to offsets relative to &a[0]. */
      lastofs += hint;
      ofs += hint;
    }
  a -= hint;

  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
   * right of lastofs but no farther right than ofs.  Do a binary
   * search, with invariant a[lastofs-1] <= key < a[ofs].
   */
  ++lastofs;
  while (lastofs < ofs)
    {
      octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);

      if (comp (key, a[m]))
        ofs = m;        /* key < a[m] */
      else
        lastofs = m+1;  /* a[m] <= key */
    }

  return ofs;
}

static inline octave_idx_type
roundupsize (std::size_t n)
{
  unsigned int nbits = 3;
  std::size_t n2 = n >> 8;

  /* Round up:
   * If n <       256, to a multiple of        8.
   * If n <      2048, to a multiple of       64.
   * If n <     16384, to a multiple of      512.
   * If n <    131072, to a multiple of     4096.
   * If n <   1048576, to a multiple of    32768.
   * If n <   8388608, to a multiple of   262144.
   * If n <  67108864, to a multiple of  2097152.
   * If n < 536870912, to a multiple of 16777216.
   * ...
   * If n < 2**(5+3*i), to a multiple of 2**(3*i).
   *
   * This over-allocates proportional to the list size, making room
   * for additional growth.  The over-allocation is mild, but is
   * enough to give linear-time amortized behavior over a long
   * sequence of appends() in the presence of a poorly-performing
   * system realloc() (which is a reality, e.g., across all flavors
   * of Windows, with Win9x behavior being particularly bad -- and
   * we've still got address space fragmentation problems on Win9x
   * even with this scheme, although it requires much longer lists to
   * provoke them than it used to).
   */
  while (n2)
    {
      n2 >>= 3;
      nbits += 3;
    }

  std::size_t new_size = ((n >> nbits) + 1) << nbits;

  if (new_size == 0
      || new_size
         > static_cast<std::size_t> (std::numeric_limits<octave_idx_type>::max ()))
    (*current_liboctave_error_handler)
      ("unable to allocate sufficient memory for sort");

  return static_cast<octave_idx_type> (new_size);
}

/* Ensure enough temp memory for 'need' array slots is available.
 * Returns 0 on success and -1 if the memory can't be gotten.
 */
template <typename T>
void
octave_sort<T>::MergeState::getmem (octave_idx_type need)
{
  if (need <= m_alloced)
    return;

  need = roundupsize (need);
  /* Don't realloc!  That can cost cycles to copy the old data, but
   * we don't care what's in the block.
   */
  delete [] m_a;
  delete [] m_ia; // Must do this or fool possible next getmemi.
  m_a = new T [need];
  m_alloced = need;

}

template <typename T>
void
octave_sort<T>::MergeState::getmemi (octave_idx_type need)
{
  if (m_ia && need <= m_alloced)
    return;

  need = roundupsize (need);
  /* Don't realloc!  That can cost cycles to copy the old data, but
   * we don't care what's in the block.
   */
  delete [] m_a;
  delete [] m_ia;

  m_a = new T [need];
  m_ia = new octave_idx_type [need];
  m_alloced = need;
}

/* Merge the na elements starting at pa with the nb elements starting at pb
 * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
 * merge, and should have na <= nb.  See listsort.txt for more info.
 * Return 0 if successful, -1 if error.
 */
template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_lo (T *pa, octave_idx_type na,
                          T *pb, octave_idx_type nb,
                          Comp comp)
{
  octave_idx_type k;
  T *dest;
  int result = -1;      /* guilty until proved innocent */
  octave_idx_type min_gallop = m_ms->m_min_gallop;

  m_ms->getmem (na);

  std::copy (pa, pa + na, m_ms->m_a);
  dest = pa;
  pa = m_ms->m_a;

  *dest++ = *pb++;
  --nb;
  if (nb == 0)
    goto Succeed;
  if (na == 1)
    goto CopyB;

  for (;;)
    {
      octave_idx_type acount = 0;       /* # of times A won in a row */
      octave_idx_type bcount = 0;       /* # of times B won in a row */

      /* Do the straightforward thing until (if ever) one run
       * appears to win consistently.
       */
      for (;;)
        {

          // FIXME: these loops are candidates for further optimizations.
          // Rather than testing everything in each cycle, it may be more
          // efficient to do it in hunks.
          if (comp (*pb, *pa))
            {
              *dest++ = *pb++;
              ++bcount;
              acount = 0;
              --nb;
              if (nb == 0)
                goto Succeed;
              if (bcount >= min_gallop)
                break;
            }
          else
            {
              *dest++ = *pa++;
              ++acount;
              bcount = 0;
              --na;
              if (na == 1)
                goto CopyB;
              if (acount >= min_gallop)
                break;
            }
        }

      /* One run is winning so consistently that galloping may
       * be a huge win.  So try that, and continue galloping until
       * (if ever) neither run appears to be winning consistently
       * anymore.
       */
      ++min_gallop;
      do
        {
          min_gallop -= (min_gallop > 1);
          m_ms->m_min_gallop = min_gallop;
          k = gallop_right (*pb, pa, na, 0, comp);
          acount = k;
          if (k)
            {
              if (k < 0)
                goto Fail;
              dest = std::copy (pa, pa + k, dest);
              pa += k;
              na -= k;
              if (na == 1)
                goto CopyB;
              /* na==0 is impossible now if the comparison
               * function is consistent, but we can't assume
               * that it is.
               */
              if (na == 0)
                goto Succeed;
            }
          *dest++ = *pb++;
          --nb;
          if (nb == 0)
            goto Succeed;

          k = gallop_left (*pa, pb, nb, 0, comp);
          bcount = k;
          if (k)
            {
              if (k < 0)
                goto Fail;
              dest = std::copy (pb, pb + k, dest);
              pb += k;
              nb -= k;
              if (nb == 0)
                goto Succeed;
            }
          *dest++ = *pa++;
          --na;
          if (na == 1)
            goto CopyB;
        }
      while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);

      ++min_gallop;     /* penalize it for leaving galloping mode */
      m_ms->m_min_gallop = min_gallop;
    }

Succeed:
  result = 0;

Fail:
  if (na)
    std::copy (pa, pa + na, dest);
  return result;

CopyB:
  /* The last element of pa belongs at the end of the merge. */
  std::copy (pb, pb + nb, dest);
  dest[nb] = *pa;

  return 0;
}

template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na,
                          T *pb, octave_idx_type *ipb, octave_idx_type nb,
                          Comp comp)
{
  octave_idx_type k;
  T *dest;
  octave_idx_type *idest;
  int result = -1;      /* guilty until proved innocent */
  octave_idx_type min_gallop = m_ms->m_min_gallop;

  m_ms->getmemi (na);

  std::copy (pa, pa + na, m_ms->m_a);
  std::copy (ipa, ipa + na, m_ms->m_ia);
  dest = pa; idest = ipa;
  pa = m_ms->m_a; ipa = m_ms->m_ia;

  *dest++ = *pb++; *idest++ = *ipb++;
  --nb;
  if (nb == 0)
    goto Succeed;
  if (na == 1)
    goto CopyB;

  for (;;)
    {
      octave_idx_type acount = 0;       /* # of times A won in a row */
      octave_idx_type bcount = 0;       /* # of times B won in a row */

      /* Do the straightforward thing until (if ever) one run
       * appears to win consistently.
       */
      for (;;)
        {

          if (comp (*pb, *pa))
            {
              *dest++ = *pb++; *idest++ = *ipb++;
              ++bcount;
              acount = 0;
              --nb;
              if (nb == 0)
                goto Succeed;
              if (bcount >= min_gallop)
                break;
            }
          else
            {
              *dest++ = *pa++; *idest++ = *ipa++;
              ++acount;
              bcount = 0;
              --na;
              if (na == 1)
                goto CopyB;
              if (acount >= min_gallop)
                break;
            }
        }

      /* One run is winning so consistently that galloping may
       * be a huge win.  So try that, and continue galloping until
       * (if ever) neither run appears to be winning consistently
       * anymore.
       */
      ++min_gallop;
      do
        {
          min_gallop -= (min_gallop > 1);
          m_ms->m_min_gallop = min_gallop;
          k = gallop_right (*pb, pa, na, 0, comp);
          acount = k;
          if (k)
            {
              if (k < 0)
                goto Fail;
              dest = std::copy (pa, pa + k, dest);
              idest = std::copy (ipa, ipa + k, idest);
              pa += k; ipa += k;
              na -= k;
              if (na == 1)
                goto CopyB;
              /* na==0 is impossible now if the comparison
               * function is consistent, but we can't assume
               * that it is.
               */
              if (na == 0)
                goto Succeed;
            }
          *dest++ = *pb++; *idest++ = *ipb++;
          --nb;
          if (nb == 0)
            goto Succeed;

          k = gallop_left (*pa, pb, nb, 0, comp);
          bcount = k;
          if (k)
            {
              if (k < 0)
                goto Fail;
              dest = std::copy (pb, pb + k, dest);
              idest = std::copy (ipb, ipb + k, idest);
              pb += k; ipb += k;
              nb -= k;
              if (nb == 0)
                goto Succeed;
            }
          *dest++ = *pa++; *idest++ = *ipa++;
          --na;
          if (na == 1)
            goto CopyB;
        }
      while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);

      ++min_gallop;     /* penalize it for leaving galloping mode */
      m_ms->m_min_gallop = min_gallop;
    }

Succeed:
  result = 0;

Fail:
  if (na)
    {
      std::copy (pa, pa + na, dest);
      std::copy (ipa, ipa + na, idest);
    }
  return result;

CopyB:
  /* The last element of pa belongs at the end of the merge. */
  std::copy (pb, pb + nb, dest);
  std::copy (ipb, ipb + nb, idest);
  dest[nb] = *pa;
  idest[nb] = *ipa;

  return 0;
}

/* Merge the na elements starting at pa with the nb elements starting at pb
 * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
 * merge, and should have na >= nb.  See listsort.txt for more info.
 * Return 0 if successful, -1 if error.
 */
template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_hi (T *pa, octave_idx_type na,
                          T *pb, octave_idx_type nb,
                          Comp comp)
{
  octave_idx_type k;
  T *dest;
  int result = -1;      /* guilty until proved innocent */
  T *basea, *baseb;
  octave_idx_type min_gallop = m_ms->m_min_gallop;

  m_ms->getmem (nb);

  dest = pb + nb - 1;
  std::copy (pb, pb + nb, m_ms->m_a);
  basea = pa;
  baseb = m_ms->m_a;
  pb = m_ms->m_a + nb - 1;
  pa += na - 1;

  *dest-- = *pa--;
  --na;
  if (na == 0)
    goto Succeed;
  if (nb == 1)
    goto CopyA;

  for (;;)
    {
      octave_idx_type acount = 0;       /* # of times A won in a row */
      octave_idx_type bcount = 0;       /* # of times B won in a row */

      /* Do the straightforward thing until (if ever) one run
       * appears to win consistently.
       */
      for (;;)
        {
          if (comp (*pb, *pa))
            {
              *dest-- = *pa--;
              ++acount;
              bcount = 0;
              --na;
              if (na == 0)
                goto Succeed;
              if (acount >= min_gallop)
                break;
            }
          else
            {
              *dest-- = *pb--;
              ++bcount;
              acount = 0;
              --nb;
              if (nb == 1)
                goto CopyA;
              if (bcount >= min_gallop)
                break;
            }
        }

      /* One run is winning so consistently that galloping may
       * be a huge win.  So try that, and continue galloping until
       * (if ever) neither run appears to be winning consistently
       * anymore.
       */
      ++min_gallop;
      do
        {
          min_gallop -= (min_gallop > 1);
          m_ms->m_min_gallop = min_gallop;
          k = gallop_right (*pb, basea, na, na-1, comp);
          if (k < 0)
            goto Fail;
          k = na - k;
          acount = k;
          if (k)
            {
              dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
              pa -= k;
              na -= k;
              if (na == 0)
                goto Succeed;
            }
          *dest-- = *pb--;
          --nb;
          if (nb == 1)
            goto CopyA;

          k = gallop_left (*pa, baseb, nb, nb-1, comp);
          if (k < 0)
            goto Fail;
          k = nb - k;
          bcount = k;
          if (k)
            {
              dest -= k;
              pb -= k;
              std::copy (pb+1, pb+1 + k, dest+1);
              nb -= k;
              if (nb == 1)
                goto CopyA;
              /* nb==0 is impossible now if the comparison
               * function is consistent, but we can't assume
               * that it is.
               */
              if (nb == 0)
                goto Succeed;
            }
          *dest-- = *pa--;
          --na;
          if (na == 0)
            goto Succeed;
        } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
      ++min_gallop;     /* penalize it for leaving galloping mode */
      m_ms->m_min_gallop = min_gallop;
    }

Succeed:
  result = 0;

Fail:
  if (nb)
    std::copy (baseb, baseb + nb, dest-(nb-1));
  return result;

CopyA:
  /* The first element of pb belongs at the front of the merge. */
  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
  pa -= na;
  *dest = *pb;

  return 0;
}

template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na,
                          T *pb, octave_idx_type *ipb, octave_idx_type nb,
                          Comp comp)
{
  octave_idx_type k;
  T *dest;
  octave_idx_type *idest;
  int result = -1;      /* guilty until proved innocent */
  T *basea, *baseb;
  octave_idx_type *ibaseb;
  octave_idx_type min_gallop = m_ms->m_min_gallop;

  m_ms->getmemi (nb);

  dest = pb + nb - 1;
  idest = ipb + nb - 1;
  std::copy (pb, pb + nb, m_ms->m_a);
  std::copy (ipb, ipb + nb, m_ms->m_ia);
  basea = pa;
  baseb = m_ms->m_a; ibaseb = m_ms->m_ia;
  pb = m_ms->m_a + nb - 1; ipb = m_ms->m_ia + nb - 1;
  pa += na - 1; ipa += na - 1;

  *dest-- = *pa--; *idest-- = *ipa--;
  --na;
  if (na == 0)
    goto Succeed;
  if (nb == 1)
    goto CopyA;

  for (;;)
    {
      octave_idx_type acount = 0;       /* # of times A won in a row */
      octave_idx_type bcount = 0;       /* # of times B won in a row */

      /* Do the straightforward thing until (if ever) one run
       * appears to win consistently.
       */
      for (;;)
        {
          if (comp (*pb, *pa))
            {
              *dest-- = *pa--; *idest-- = *ipa--;
              ++acount;
              bcount = 0;
              --na;
              if (na == 0)
                goto Succeed;
              if (acount >= min_gallop)
                break;
            }
          else
            {
              *dest-- = *pb--; *idest-- = *ipb--;
              ++bcount;
              acount = 0;
              --nb;
              if (nb == 1)
                goto CopyA;
              if (bcount >= min_gallop)
                break;
            }
        }

      /* One run is winning so consistently that galloping may
       * be a huge win.  So try that, and continue galloping until
       * (if ever) neither run appears to be winning consistently
       * anymore.
       */
      ++min_gallop;
      do
        {
          min_gallop -= (min_gallop > 1);
          m_ms->m_min_gallop = min_gallop;
          k = gallop_right (*pb, basea, na, na-1, comp);
          if (k < 0)
            goto Fail;
          k = na - k;
          acount = k;
          if (k)
            {
              dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
              idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
              pa -= k; ipa -= k;
              na -= k;
              if (na == 0)
                goto Succeed;
            }
          *dest-- = *pb--; *idest-- = *ipb--;
          --nb;
          if (nb == 1)
            goto CopyA;

          k = gallop_left (*pa, baseb, nb, nb-1, comp);
          if (k < 0)
            goto Fail;
          k = nb - k;
          bcount = k;
          if (k)
            {
              dest -= k; idest -= k;
              pb -= k; ipb -= k;
              std::copy (pb+1, pb+1 + k, dest+1);
              std::copy (ipb+1, ipb+1 + k, idest+1);
              nb -= k;
              if (nb == 1)
                goto CopyA;
              /* nb==0 is impossible now if the comparison
               * function is consistent, but we can't assume
               * that it is.
               */
              if (nb == 0)
                goto Succeed;
            }
          *dest-- = *pa--; *idest-- = *ipa--;
          --na;
          if (na == 0)
            goto Succeed;
        } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
      ++min_gallop;     /* penalize it for leaving galloping mode */
      m_ms->m_min_gallop = min_gallop;
    }

Succeed:
  result = 0;

Fail:
  if (nb)
    {
      std::copy (baseb, baseb + nb, dest-(nb-1));
      std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
    }
  return result;

CopyA:
  /* The first element of pb belongs at the front of the merge. */
  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
  pa -= na; ipa -= na;
  *dest = *pb; *idest = *ipb;

  return 0;
}

/* Merge the two runs at stack indices i and i+1.
 * Returns 0 on success, -1 on error.
 */
template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_at (octave_idx_type i, T *data,
                          Comp comp)
{
  T *pa, *pb;
  octave_idx_type na, nb;
  octave_idx_type k;

  pa = data + m_ms->m_pending[i].m_base;
  na = m_ms->m_pending[i].m_len;
  pb = data + m_ms->m_pending[i+1].m_base;
  nb = m_ms->m_pending[i+1].m_len;

  /* Record the length of the combined runs; if i is the 3rd-last
   * run now, also slide over the last run (which isn't involved
   * in this merge).  The current run i+1 goes away in any case.
   */
  m_ms->m_pending[i].m_len = na + nb;
  if (i == m_ms->m_n - 3)
    m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
  m_ms->m_n--;

  /* Where does b start in a?  Elements in a before that can be
   * ignored (already in place).
   */
  k = gallop_right (*pb, pa, na, 0, comp);
  if (k < 0)
    return -1;
  pa += k;
  na -= k;
  if (na == 0)
    return 0;

  /* Where does a end in b?  Elements in b after that can be
   * ignored (already in place).
   */
  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
  if (nb <= 0)
    return nb;

  /* Merge what remains of the runs, using a temp array with
   * min (na, nb) elements.
   */
  if (na <= nb)
    return merge_lo (pa, na, pb, nb, comp);
  else
    return merge_hi (pa, na, pb, nb, comp);
}

template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_at (octave_idx_type i, T *data, octave_idx_type *idx,
                          Comp comp)
{
  T *pa, *pb;
  octave_idx_type *ipa, *ipb;
  octave_idx_type na, nb;
  octave_idx_type k;

  pa = data + m_ms->m_pending[i].m_base;
  ipa = idx + m_ms->m_pending[i].m_base;
  na = m_ms->m_pending[i].m_len;
  pb = data + m_ms->m_pending[i+1].m_base;
  ipb = idx + m_ms->m_pending[i+1].m_base;
  nb = m_ms->m_pending[i+1].m_len;

  /* Record the length of the combined runs; if i is the 3rd-last
   * run now, also slide over the last run (which isn't involved
   * in this merge).  The current run i+1 goes away in any case.
   */
  m_ms->m_pending[i].m_len = na + nb;
  if (i == m_ms->m_n - 3)
    m_ms->m_pending[i+1] = m_ms->m_pending[i+2];
  m_ms->m_n--;

  /* Where does b start in a?  Elements in a before that can be
   * ignored (already in place).
   */
  k = gallop_right (*pb, pa, na, 0, comp);
  if (k < 0)
    return -1;
  pa += k; ipa += k;
  na -= k;
  if (na == 0)
    return 0;

  /* Where does a end in b?  Elements in b after that can be
   * ignored (already in place).
   */
  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
  if (nb <= 0)
    return nb;

  /* Merge what remains of the runs, using a temp array with
   * min (na, nb) elements.
   */
  if (na <= nb)
    return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
  else
    return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
}

/* Examine the stack of runs waiting to be merged, merging adjacent runs
 * until the stack invariants are re-established:
 *
 * 1. len[-3] > len[-2] + len[-1]
 * 2. len[-2] > len[-1]
 *
 * See listsort.txt for more info.
 *
 * Returns 0 on success, -1 on error.
 */
template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_collapse (T *data, Comp comp)
{
  struct s_slice *p = m_ms->m_pending;

  while (m_ms->m_n > 1)
    {
      octave_idx_type n = m_ms->m_n - 2;
      if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
        {
          if (p[n-1].m_len < p[n+1].m_len)
            --n;
          if (merge_at (n, data, comp) < 0)
            return -1;
        }
      else if (p[n].m_len <= p[n+1].m_len)
        {
          if (merge_at (n, data, comp) < 0)
            return -1;
        }
      else
        break;
    }

  return 0;
}

template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp)
{
  struct s_slice *p = m_ms->m_pending;

  while (m_ms->m_n > 1)
    {
      octave_idx_type n = m_ms->m_n - 2;
      if (n > 0 && p[n-1].m_len <= p[n].m_len + p[n+1].m_len)
        {
          if (p[n-1].m_len < p[n+1].m_len)
            --n;
          if (merge_at (n, data, idx, comp) < 0)
            return -1;
        }
      else if (p[n].m_len <= p[n+1].m_len)
        {
          if (merge_at (n, data, idx, comp) < 0)
            return -1;
        }
      else
        break;
    }

  return 0;
}

/* Regardless of invariants, merge all runs on the stack until only one
 * remains.  This is used at the end of the mergesort.
 *
 * Returns 0 on success, -1 on error.
 */
template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_force_collapse (T *data, Comp comp)
{
  struct s_slice *p = m_ms->m_pending;

  while (m_ms->m_n > 1)
    {
      octave_idx_type n = m_ms->m_n - 2;
      if (n > 0 && p[n-1].m_len < p[n+1].m_len)
        --n;
      if (merge_at (n, data, comp) < 0)
        return -1;
    }

  return 0;
}

template <typename T>
template <typename Comp>
int
octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp)
{
  struct s_slice *p = m_ms->m_pending;

  while (m_ms->m_n > 1)
    {
      octave_idx_type n = m_ms->m_n - 2;
      if (n > 0 && p[n-1].m_len < p[n+1].m_len)
        --n;
      if (merge_at (n, data, idx, comp) < 0)
        return -1;
    }

  return 0;
}

/* Compute a good value for the minimum run length; natural runs shorter
 * than this are boosted artificially via binary insertion.
 *
 * If n < 64, return n (it's too small to bother with fancy stuff).
 * Else if n is an exact power of 2, return 32.
 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
 * strictly less than, an exact power of 2.
 *
 * See listsort.txt for more info.
 */
template <typename T>
octave_idx_type
octave_sort<T>::merge_compute_minrun (octave_idx_type n)
{
  octave_idx_type r = 0;        /* becomes 1 if any 1 bits are shifted off */

  while (n >= 64)
    {
      r |= n & 1;
      n >>= 1;
    }

  return n + r;
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
{
  /* Re-initialize the Mergestate as this might be the second time called */
  if (! m_ms) m_ms = new MergeState;

  m_ms->reset ();
  m_ms->getmem (MERGESTATE_TEMP_SIZE);

  if (nel > 1)
    {
      octave_idx_type nremaining = nel;
      octave_idx_type lo = 0;

      /* March over the array once, left to right, finding natural runs,
       * and extending short natural runs to minrun elements.
       */
      octave_idx_type minrun = merge_compute_minrun (nremaining);
      do
        {
          bool descending;
          octave_idx_type n;

          /* Identify next run. */
          n = count_run (data + lo, nremaining, descending, comp);
          if (n < 0)
            return;
          if (descending)
            std::reverse (data + lo, data + lo + n);
          /* If short, extend to min (minrun, nremaining). */
          if (n < minrun)
            {
              const octave_idx_type force = (nremaining <= minrun ? nremaining
                                                                  : minrun);
              binarysort (data + lo, force, n, comp);
              n = force;
            }
          /* Push run onto m_pending-runs stack, and maybe merge. */
          assert (m_ms->m_n < MAX_MERGE_PENDING);
          m_ms->m_pending[m_ms->m_n].m_base = lo;
          m_ms->m_pending[m_ms->m_n].m_len = n;
          m_ms->m_n++;
          if (merge_collapse (data, comp) < 0)
            return;
          /* Advance to find next run. */
          lo += n;
          nremaining -= n;
        }
      while (nremaining);

      merge_force_collapse (data, comp);
    }
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel,
                      Comp comp)
{
  /* Re-initialize the Mergestate as this might be the second time called */
  if (! m_ms) m_ms = new MergeState;

  m_ms->reset ();
  m_ms->getmemi (MERGESTATE_TEMP_SIZE);

  if (nel > 1)
    {
      octave_idx_type nremaining = nel;
      octave_idx_type lo = 0;

      /* March over the array once, left to right, finding natural runs,
       * and extending short natural runs to minrun elements.
       */
      octave_idx_type minrun = merge_compute_minrun (nremaining);
      do
        {
          bool descending;
          octave_idx_type n;

          /* Identify next run. */
          n = count_run (data + lo, nremaining, descending, comp);
          if (n < 0)
            return;
          if (descending)
            {
              std::reverse (data + lo, data + lo + n);
              std::reverse (idx + lo, idx + lo + n);
            }
          /* If short, extend to min (minrun, nremaining). */
          if (n < minrun)
            {
              const octave_idx_type force = (nremaining <= minrun ? nremaining
                                                                  : minrun);
              binarysort (data + lo, idx + lo, force, n, comp);
              n = force;
            }
          /* Push run onto m_pending-runs stack, and maybe merge. */
          assert (m_ms->m_n < MAX_MERGE_PENDING);
          m_ms->m_pending[m_ms->m_n].m_base = lo;
          m_ms->m_pending[m_ms->m_n].m_len = n;
          m_ms->m_n++;
          if (merge_collapse (data, idx, comp) < 0)
            return;
          /* Advance to find next run. */
          lo += n;
          nremaining -= n;
        }
      while (nremaining);

      merge_force_collapse (data, idx, comp);
    }
}

template <typename T>
using compare_fcn_ptr = bool (*) (typename ref_param<T>::type,
                                  typename ref_param<T>::type);

template <typename T>
void
octave_sort<T>::sort (T *data, octave_idx_type nel)
{
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    sort (data, nel, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      sort (data, nel, std::greater<T> ());
    else
#endif
      if (m_compare)
        sort (data, nel, m_compare);
}

template <typename T>
void
octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel)
{
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    sort (data, idx, nel, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      sort (data, idx, nel, std::greater<T> ());
    else
#endif
      if (m_compare)
        sort (data, idx, nel, m_compare);
}

template <typename T>
template <typename Comp>
bool
octave_sort<T>::issorted (const T *data, octave_idx_type nel, Comp comp)
{
  const T *end = data + nel;
  if (data != end)
    {
      const T *next = data;
      while (++next != end)
        {
          if (comp (*next, *data))
            break;
          data = next;
        }
      data = next;
    }

  return data == end;
}

template <typename T>
bool
octave_sort<T>::issorted (const T *data, octave_idx_type nel)
{
  bool retval = false;
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    retval = issorted (data, nel, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      retval = issorted (data, nel, std::greater<T> ());
    else
#endif
      if (m_compare)
        retval = issorted (data, nel, m_compare);

  return retval;
}

struct sortrows_run_t
{
public:
  sortrows_run_t (octave_idx_type c, octave_idx_type o, octave_idx_type n)
    : col (c), ofs (o), nel (n) { }
  //--------
  octave_idx_type col, ofs, nel;
};

template <typename T>
template <typename Comp>
void
octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,
                           octave_idx_type rows, octave_idx_type cols,
                           Comp comp)
{
  OCTAVE_LOCAL_BUFFER (T, buf, rows);
  for (octave_idx_type i = 0; i < rows; i++)
    idx[i] = i;

  if (cols == 0 || rows <= 1)
    return;

  // This is a breadth-first traversal.
  typedef sortrows_run_t run_t;
  std::stack<run_t> runs;

  runs.push (run_t (0, 0, rows));

  while (! runs.empty ())
    {
      octave_idx_type col = runs.top ().col;
      octave_idx_type ofs = runs.top ().ofs;
      octave_idx_type nel = runs.top ().nel;
      runs.pop ();
      assert (nel > 1);

      T *lbuf = buf + ofs;
      const T *ldata = data + rows*col;
      octave_idx_type *lidx = idx + ofs;

      // Gather.
      for (octave_idx_type i = 0; i < nel; i++)
        lbuf[i] = ldata[lidx[i]];

      // Sort.
      sort (lbuf, lidx, nel, comp);

      // Identify constant runs and schedule subsorts.
      if (col < cols-1)
        {
          octave_idx_type lst = 0;
          for (octave_idx_type i = 0; i < nel; i++)
            {
              if (comp (lbuf[lst], lbuf[i]))
                {
                  if (i > lst + 1)
                    runs.push (run_t (col+1, ofs + lst, i - lst));
                  lst = i;
                }
            }
          if (nel > lst + 1)
            runs.push (run_t (col+1, ofs + lst, nel - lst));
        }
    }
}

template <typename T>
void
octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,
                           octave_idx_type rows, octave_idx_type cols)
{
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    sort_rows (data, idx, rows, cols, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      sort_rows (data, idx, rows, cols, std::greater<T> ());
    else
#endif
      if (m_compare)
        sort_rows (data, idx, rows, cols, m_compare);
}

template <typename T>
template <typename Comp>
bool
octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,
                                octave_idx_type cols, Comp comp)
{
  if (rows <= 1 || cols == 0)
    return true;

  // This is a breadth-first traversal.
  const T *lastrow = data + rows*(cols - 1);
  typedef std::pair<const T *, octave_idx_type> run_t;
  std::stack<run_t> runs;

  bool sorted = true;
  runs.push (run_t (data, rows));
  while (sorted && ! runs.empty ())
    {
      const T *lo = runs.top ().first;
      octave_idx_type n = runs.top ().second;
      runs.pop ();
      if (lo < lastrow)
        {
          // Not the final column.
          assert (n > 1);
          const T *hi = lo + n;
          const T *lst = lo;
          for (lo++; lo < hi; lo++)
            {
              if (comp (*lst, *lo))
                {
                  if (lo > lst + 1)
                    runs.push (run_t (lst + rows, lo - lst));
                  lst = lo;
                }
              else if (comp (*lo, *lst))
                break;

            }
          if (lo == hi)
            {
              if (lo > lst + 1)
                runs.push (run_t (lst + rows, lo - lst));
            }
          else
            {
              sorted = false;
              break;
            }
        }
      else
        // The final column - use fast code.
        sorted = issorted (lo, n, comp);
    }

  return sorted;
}

template <typename T>
bool
octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,
                                octave_idx_type cols)
{
  bool retval = false;
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    retval = is_sorted_rows (data, rows, cols, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
    else
#endif
      if (m_compare)
        retval = is_sorted_rows (data, rows, cols, m_compare);

  return retval;
}

// The simple binary lookup.

template <typename T>
template <typename Comp>
octave_idx_type
octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                        const T& value, Comp comp)
{
  octave_idx_type lo = 0;
  octave_idx_type hi = nel;

  while (lo < hi)
    {
      octave_idx_type mid = lo + ((hi-lo) >> 1);
      if (comp (value, data[mid]))
        hi = mid;
      else
        lo = mid + 1;
    }

  return lo;
}

template <typename T>
octave_idx_type
octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                        const T& value)
{
  octave_idx_type retval = 0;
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    retval = lookup (data, nel, value, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      retval = lookup (data, nel, value, std::greater<T> ());
    else
#endif
      if (m_compare)
        retval = lookup (data, nel, value, m_compare);

  return retval;
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                        const T *values, octave_idx_type nvalues,
                        octave_idx_type *idx, Comp comp)
{
  // Use a sequence of binary lookups.
  // FIXME: Can this be sped up generally?  The sorted merge case is dealt with
  // elsewhere.
  for (octave_idx_type j = 0; j < nvalues; j++)
    idx[j] = lookup (data, nel, values[j], comp);
}

template <typename T>
void
octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                        const T *values, octave_idx_type nvalues,
                        octave_idx_type *idx)
{
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    lookup (data, nel, values, nvalues, idx, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      lookup (data, nel, values, nvalues, idx, std::greater<T> ());
    else
#endif
      if (m_compare)
        lookup (data, nel, values, nvalues, idx, m_compare);
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
                               const T *values, octave_idx_type nvalues,
                               octave_idx_type *idx, bool rev, Comp comp)
{
  if (rev)
    {
      octave_idx_type i = 0;
      octave_idx_type j = nvalues - 1;

      if (nvalues > 0 && nel > 0)
        {
          while (true)
            {
              if (comp (values[j], data[i]))
                {
                  idx[j] = i;
                  if (--j < 0)
                    break;
                }
              else if (++i == nel)
                break;
            }
        }

      for (; j >= 0; j--)
        idx[j] = i;
    }
  else
    {
      octave_idx_type i = 0;
      octave_idx_type j = 0;

      if (nvalues > 0 && nel > 0)
        {
          while (true)
            {
              if (comp (values[j], data[i]))
                {
                  idx[j] = i;
                  if (++j == nvalues)
                    break;
                }
              else if (++i == nel)
                break;
            }
        }

      for (; j != nvalues; j++)
        idx[j] = i;
    }
}

template <typename T>
void
octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
                               const T *values, octave_idx_type nvalues,
                               octave_idx_type *idx, bool rev)
{
#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
    else
#endif
      if (m_compare)
        lookup_sorted (data, nel, values, nvalues, idx, rev, m_compare);
}

template <typename T>
template <typename Comp>
void
octave_sort<T>::nth_element (T *data, octave_idx_type nel,
                             octave_idx_type lo, octave_idx_type up,
                             Comp comp)
{
  // Simply wrap the STL algorithms.
  // FIXME: this will fail if we attempt to inline <,> for Complex.
  if (up == lo+1)
    std::nth_element (data, data + lo, data + nel, comp);
  else if (lo == 0)
    std::partial_sort (data, data + up, data + nel, comp);
  else
    {
      std::nth_element (data, data + lo, data + nel, comp);
      if (up == lo + 2)
        {
          // Finding two subsequent elements.
          std::swap (data[lo+1],
                     *std::min_element (data + lo + 1, data + nel, comp));
        }
      else
        std::partial_sort (data + lo + 1, data + up, data + nel, comp);
    }
}

template <typename T>
void
octave_sort<T>::nth_element (T *data, octave_idx_type nel,
                             octave_idx_type lo, octave_idx_type up)
{
  if (up < 0)
    up = lo + 1;

#if defined (INLINE_ASCENDING_SORT)
  if (*m_compare.template target<compare_fcn_ptr<T>> () == ascending_compare)
    nth_element (data, nel, lo, up, std::less<T> ());
  else
#endif
#if defined (INLINE_DESCENDING_SORT)
    if (*m_compare.template target<compare_fcn_ptr<T>> () == descending_compare)
      nth_element (data, nel, lo, up, std::greater<T> ());
    else
#endif
      if (m_compare)
        nth_element (data, nel, lo, up, m_compare);
}

template <typename T>
bool
octave_sort<T>::ascending_compare (typename ref_param<T>::type x,
                                   typename ref_param<T>::type y)
{
  return x < y;
}

template <typename T>
bool
octave_sort<T>::descending_compare (typename ref_param<T>::type x,
                                    typename ref_param<T>::type y)
{
  return x > y;
}