view libinterp/corefcn/symrcm.cc @ 31605:e88a07dec498 stable

maint: Use macros to begin/end C++ namespaces. * oct-conf-post-public.in.h: Define two macros (OCTAVE_BEGIN_NAMESPACE, OCTAVE_END_NAMESPACE) that can be used to start/end a namespace. * mk-opts.pl, build-env.h, build-env.in.cc, __betainc__.cc, __contourc__.cc, __dsearchn__.cc, __eigs__.cc, __expint__.cc, __ftp__.cc, __gammainc__.cc, __ichol__.cc, __ilu__.cc, __isprimelarge__.cc, __lin_interpn__.cc, __magick_read__.cc, __pchip_deriv__.cc, __qp__.cc, amd.cc, auto-shlib.cc, auto-shlib.h, balance.cc, base-text-renderer.cc, base-text-renderer.h, besselj.cc, bitfcns.cc, bsxfun.cc, c-file-ptr-stream.cc, c-file-ptr-stream.h, call-stack.cc, call-stack.h, ccolamd.cc, cellfun.cc, chol.cc, colamd.cc, colloc.cc, conv2.cc, daspk.cc, dasrt.cc, dassl.cc, data.cc, data.h, debug.cc, defaults.cc, defaults.h, defun-int.h, defun.cc, det.cc, dirfns.cc, display.cc, display.h, dlmread.cc, dmperm.cc, dot.cc, dynamic-ld.cc, dynamic-ld.h, eig.cc, ellipj.cc, environment.cc, environment.h, error.cc, error.h, errwarn.h, event-manager.cc, event-manager.h, event-queue.cc, event-queue.h, fcn-info.cc, fcn-info.h, fft.cc, fft2.cc, fftn.cc, file-io.cc, filter.cc, find.cc, ft-text-renderer.cc, ft-text-renderer.h, gcd.cc, getgrent.cc, getpwent.cc, getrusage.cc, givens.cc, gl-render.cc, gl-render.h, gl2ps-print.cc, gl2ps-print.h, graphics-toolkit.cc, graphics-toolkit.h, graphics.cc, graphics.in.h, gsvd.cc, gtk-manager.cc, gtk-manager.h, hash.cc, help.cc, help.h, hess.cc, hex2num.cc, hook-fcn.cc, hook-fcn.h, input.cc, input.h, interpreter-private.cc, interpreter-private.h, interpreter.cc, interpreter.h, inv.cc, jsondecode.cc, jsonencode.cc, kron.cc, latex-text-renderer.cc, latex-text-renderer.h, load-path.cc, load-path.h, load-save.cc, load-save.h, lookup.cc, ls-ascii-helper.cc, ls-ascii-helper.h, ls-oct-text.cc, ls-utils.cc, ls-utils.h, lsode.cc, lu.cc, mappers.cc, matrix_type.cc, max.cc, mex-private.h, mex.cc, mgorth.cc, nproc.cc, oct-fstrm.cc, oct-fstrm.h, oct-hdf5-types.cc, oct-hdf5-types.h, oct-hist.cc, oct-hist.h, oct-iostrm.cc, oct-iostrm.h, oct-opengl.h, oct-prcstrm.cc, oct-prcstrm.h, oct-procbuf.cc, oct-procbuf.h, oct-process.cc, oct-process.h, oct-stdstrm.h, oct-stream.cc, oct-stream.h, oct-strstrm.cc, oct-strstrm.h, oct-tex-lexer.in.ll, oct-tex-parser.yy, ordqz.cc, ordschur.cc, pager.cc, pager.h, pinv.cc, pow2.cc, pr-flt-fmt.cc, pr-output.cc, procstream.cc, procstream.h, psi.cc, qr.cc, quad.cc, quadcc.cc, qz.cc, rand.cc, rcond.cc, regexp.cc, schur.cc, settings.cc, settings.h, sighandlers.cc, sighandlers.h, sparse-xdiv.cc, sparse-xdiv.h, sparse-xpow.cc, sparse-xpow.h, sparse.cc, spparms.cc, sqrtm.cc, stack-frame.cc, stack-frame.h, stream-euler.cc, strfind.cc, strfns.cc, sub2ind.cc, svd.cc, sylvester.cc, symbfact.cc, syminfo.cc, syminfo.h, symrcm.cc, symrec.cc, symrec.h, symscope.cc, symscope.h, symtab.cc, symtab.h, syscalls.cc, sysdep.cc, sysdep.h, text-engine.cc, text-engine.h, text-renderer.cc, text-renderer.h, time.cc, toplev.cc, tril.cc, tsearch.cc, typecast.cc, url-handle-manager.cc, url-handle-manager.h, urlwrite.cc, utils.cc, utils.h, variables.cc, variables.h, xdiv.cc, xdiv.h, xnorm.cc, xnorm.h, xpow.cc, xpow.h, __delaunayn__.cc, __fltk_uigetfile__.cc, __glpk__.cc, __init_fltk__.cc, __init_gnuplot__.cc, __ode15__.cc, __voronoi__.cc, audiodevinfo.cc, audioread.cc, convhulln.cc, fftw.cc, gzip.cc, mk-build-env-features.sh, mk-builtins.pl, cdef-class.cc, cdef-class.h, cdef-fwd.h, cdef-manager.cc, cdef-manager.h, cdef-method.cc, cdef-method.h, cdef-object.cc, cdef-object.h, cdef-package.cc, cdef-package.h, cdef-property.cc, cdef-property.h, cdef-utils.cc, cdef-utils.h, ov-base.cc, ov-base.h, ov-bool-mat.cc, ov-builtin.h, ov-cell.cc, ov-class.cc, ov-class.h, ov-classdef.cc, ov-classdef.h, ov-complex.cc, ov-fcn-handle.cc, ov-fcn-handle.h, ov-fcn.h, ov-java.cc, ov-java.h, ov-mex-fcn.h, ov-null-mat.cc, ov-oncleanup.cc, ov-struct.cc, ov-typeinfo.cc, ov-typeinfo.h, ov-usr-fcn.cc, ov-usr-fcn.h, ov.cc, ov.h, octave.cc, octave.h, mk-ops.sh, op-b-b.cc, op-b-bm.cc, op-b-sbm.cc, op-bm-b.cc, op-bm-bm.cc, op-bm-sbm.cc, op-cdm-cdm.cc, op-cell.cc, op-chm.cc, op-class.cc, op-cm-cm.cc, op-cm-cs.cc, op-cm-m.cc, op-cm-s.cc, op-cm-scm.cc, op-cm-sm.cc, op-cs-cm.cc, op-cs-cs.cc, op-cs-m.cc, op-cs-s.cc, op-cs-scm.cc, op-cs-sm.cc, op-dm-dm.cc, op-dm-scm.cc, op-dm-sm.cc, op-dm-template.cc, op-dms-template.cc, op-fcdm-fcdm.cc, op-fcm-fcm.cc, op-fcm-fcs.cc, op-fcm-fm.cc, op-fcm-fs.cc, op-fcn.cc, op-fcs-fcm.cc, op-fcs-fcs.cc, op-fcs-fm.cc, op-fcs-fs.cc, op-fdm-fdm.cc, op-fm-fcm.cc, op-fm-fcs.cc, op-fm-fm.cc, op-fm-fs.cc, op-fs-fcm.cc, op-fs-fcs.cc, op-fs-fm.cc, op-fs-fs.cc, op-i16-i16.cc, op-i32-i32.cc, op-i64-i64.cc, op-i8-i8.cc, op-int-concat.cc, op-m-cm.cc, op-m-cs.cc, op-m-m.cc, op-m-s.cc, op-m-scm.cc, op-m-sm.cc, op-mi.cc, op-pm-pm.cc, op-pm-scm.cc, op-pm-sm.cc, op-pm-template.cc, op-range.cc, op-s-cm.cc, op-s-cs.cc, op-s-m.cc, op-s-s.cc, op-s-scm.cc, op-s-sm.cc, op-sbm-b.cc, op-sbm-bm.cc, op-sbm-sbm.cc, op-scm-cm.cc, op-scm-cs.cc, op-scm-m.cc, op-scm-s.cc, op-scm-scm.cc, op-scm-sm.cc, op-sm-cm.cc, op-sm-cs.cc, op-sm-m.cc, op-sm-s.cc, op-sm-scm.cc, op-sm-sm.cc, op-str-m.cc, op-str-s.cc, op-str-str.cc, op-struct.cc, op-ui16-ui16.cc, op-ui32-ui32.cc, op-ui64-ui64.cc, op-ui8-ui8.cc, ops.h, anon-fcn-validator.cc, anon-fcn-validator.h, bp-table.cc, bp-table.h, comment-list.cc, comment-list.h, filepos.h, lex.h, lex.ll, oct-lvalue.cc, oct-lvalue.h, oct-parse.yy, parse.h, profiler.cc, profiler.h, pt-anon-scopes.cc, pt-anon-scopes.h, pt-arg-list.cc, pt-arg-list.h, pt-args-block.cc, pt-args-block.h, pt-array-list.cc, pt-array-list.h, pt-assign.cc, pt-assign.h, pt-binop.cc, pt-binop.h, pt-bp.cc, pt-bp.h, pt-cbinop.cc, pt-cbinop.h, pt-cell.cc, pt-cell.h, pt-check.cc, pt-check.h, pt-classdef.cc, pt-classdef.h, pt-cmd.h, pt-colon.cc, pt-colon.h, pt-const.cc, pt-const.h, pt-decl.cc, pt-decl.h, pt-eval.cc, pt-eval.h, pt-except.cc, pt-except.h, pt-exp.cc, pt-exp.h, pt-fcn-handle.cc, pt-fcn-handle.h, pt-id.cc, pt-id.h, pt-idx.cc, pt-idx.h, pt-jump.h, pt-loop.cc, pt-loop.h, pt-mat.cc, pt-mat.h, pt-misc.cc, pt-misc.h, pt-pr-code.cc, pt-pr-code.h, pt-select.cc, pt-select.h, pt-spmd.cc, pt-spmd.h, pt-stmt.cc, pt-stmt.h, pt-tm-const.cc, pt-tm-const.h, pt-unop.cc, pt-unop.h, pt-vm-eval.cc, pt-walk.cc, pt-walk.h, pt.cc, pt.h, token.cc, token.h, Range.cc, Range.h, idx-vector.cc, idx-vector.h, range-fwd.h, CollocWt.cc, CollocWt.h, aepbalance.cc, aepbalance.h, chol.cc, chol.h, gepbalance.cc, gepbalance.h, gsvd.cc, gsvd.h, hess.cc, hess.h, lo-mappers.cc, lo-mappers.h, lo-specfun.cc, lo-specfun.h, lu.cc, lu.h, oct-convn.cc, oct-convn.h, oct-fftw.cc, oct-fftw.h, oct-norm.cc, oct-norm.h, oct-rand.cc, oct-rand.h, oct-spparms.cc, oct-spparms.h, qr.cc, qr.h, qrp.cc, qrp.h, randgamma.cc, randgamma.h, randmtzig.cc, randmtzig.h, randpoisson.cc, randpoisson.h, schur.cc, schur.h, sparse-chol.cc, sparse-chol.h, sparse-lu.cc, sparse-lu.h, sparse-qr.cc, sparse-qr.h, svd.cc, svd.h, child-list.cc, child-list.h, dir-ops.cc, dir-ops.h, file-ops.cc, file-ops.h, file-stat.cc, file-stat.h, lo-sysdep.cc, lo-sysdep.h, lo-sysinfo.cc, lo-sysinfo.h, mach-info.cc, mach-info.h, oct-env.cc, oct-env.h, oct-group.cc, oct-group.h, oct-password.cc, oct-password.h, oct-syscalls.cc, oct-syscalls.h, oct-time.cc, oct-time.h, oct-uname.cc, oct-uname.h, action-container.cc, action-container.h, base-list.h, cmd-edit.cc, cmd-edit.h, cmd-hist.cc, cmd-hist.h, f77-fcn.h, file-info.cc, file-info.h, lo-array-errwarn.cc, lo-array-errwarn.h, lo-hash.cc, lo-hash.h, lo-ieee.h, lo-regexp.cc, lo-regexp.h, lo-utils.cc, lo-utils.h, oct-base64.cc, oct-base64.h, oct-glob.cc, oct-glob.h, oct-inttypes.h, oct-mutex.cc, oct-mutex.h, oct-refcount.h, oct-shlib.cc, oct-shlib.h, oct-sparse.cc, oct-sparse.h, oct-string.h, octave-preserve-stream-state.h, pathsearch.cc, pathsearch.h, quit.cc, quit.h, unwind-prot.cc, unwind-prot.h, url-transfer.cc, url-transfer.h : Use new macros to begin/end C++ namespaces.
author Rik <rik@octave.org>
date Thu, 01 Dec 2022 14:23:45 -0800
parents 796f54d4ddbf
children aac27ad79be6
line wrap: on
line source

////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2007-2022 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////

/*
An implementation of the Reverse Cuthill-McKee algorithm (symrcm)

The implementation of this algorithm is based in the descriptions found in

@INPROCEEDINGS{,
        author = {E. Cuthill and J. McKee},
        title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
        booktitle = {Proceedings of the 24th ACM National Conference},
        publisher = {Brandon Press},
        pages = {157 -- 172},
        location = {New Jersey},
        year = {1969}
}

@BOOK{,
        author = {Alan George and Joseph W. H. Liu},
        title = {Computer Solution of Large Sparse Positive Definite Systems},
        publisher = {Prentice Hall Series in Computational Mathematics},
        ISBN = {0-13-165274-5},
        year = {1981}
}

The algorithm represents a heuristic approach to the NP-complete minimum
bandwidth problem.

Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
                           <weitzel@@ldknet.org>
*/

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#include <algorithm>

#include "CSparse.h"
#include "boolNDArray.h"
#include "dNDArray.h"
#include "dSparse.h"
#include "oct-locbuf.h"
#include "oct-sparse.h"
#include "quit.h"

#include "defun.h"
#include "errwarn.h"
#include "ov.h"
#include "ovl.h"

OCTAVE_BEGIN_NAMESPACE(octave)

// A node struct for the Cuthill-McKee algorithm
struct CMK_Node
{
  // the node's id (matrix row index)
  octave_idx_type id;
  // the node's degree
  octave_idx_type deg;
  // minimal distance to the root of the spanning tree
  octave_idx_type dist;
};

// A simple queue.
// Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
// stored in an array.  qh and qt point to queue head and tail.

// Enqueue operation (adds a node "o" at the tail)

inline static void
Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qt, const CMK_Node& o)
{
  Q[qt] = o;
  qt = (qt + 1) % (N + 1);
}

// Dequeue operation (removes a node from the head)

inline static CMK_Node
Q_deq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qh)
{
  CMK_Node r = Q[qh];
  qh = (qh + 1) % (N + 1);
  return r;
}

// Predicate (queue empty)
#define Q_empty(Q, N, qh, qt)   ((qh) == (qt))

// A simple, array-based binary heap (used as a priority queue for nodes)

// the left descendant of entry i
#define LEFT(i)         (((i) << 1) + 1)        // = (2*(i)+1)
// the right descendant of entry i
#define RIGHT(i)        (((i) << 1) + 2)        // = (2*(i)+2)
// the parent of entry i
#define PARENT(i)       (((i) - 1) >> 1)        // = floor(((i)-1)/2)

// Builds a min-heap (the root contains the smallest element).  A is an array
// with the graph's nodes, i is a starting position, size is the length of A.

static void
H_heapify_min (CMK_Node *A, octave_idx_type i, octave_idx_type size)
{
  octave_idx_type j = i;
  for (;;)
    {
      octave_idx_type l = LEFT(j);
      octave_idx_type r = RIGHT(j);

      octave_idx_type smallest;
      if (l < size && A[l].deg < A[j].deg)
        smallest = l;
      else
        smallest = j;

      if (r < size && A[r].deg < A[smallest].deg)
        smallest = r;

      if (smallest != j)
        {
          std::swap (A[j], A[smallest]);
          j = smallest;
        }
      else
        break;
    }
}

// Heap operation insert.  Running time is O(log(n))

static void
H_insert (CMK_Node *H, octave_idx_type& h, const CMK_Node& o)
{
  octave_idx_type i = h++;

  H[i] = o;

  if (i == 0)
    return;
  do
    {
      octave_idx_type p = PARENT(i);
      if (H[i].deg < H[p].deg)
        {
          std::swap (H[i], H[p]);

          i = p;
        }
      else
        break;
    }
  while (i > 0);
}

// Heap operation remove-min.  Removes the smallest element in O(1) and
// reorganizes the heap optionally in O(log(n))

inline static CMK_Node
H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
{
  CMK_Node r = H[0];
  H[0] = H[--h];
  if (reorg)
    H_heapify_min (H, 0, h);
  return r;
}

// Predicate (heap empty)
#define H_empty(H, h)   ((h) == 0)

// Helper function for the Cuthill-McKee algorithm.  Tries to determine a
// pseudo-peripheral node of the graph as starting node.

static octave_idx_type
find_starting_node (octave_idx_type N, const octave_idx_type *ridx,
                    const octave_idx_type *cidx, const octave_idx_type *ridx2,
                    const octave_idx_type *cidx2, octave_idx_type *D,
                    octave_idx_type start)
{
  CMK_Node w;

  OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);
  boolNDArray btmp (dim_vector (1, N), false);
  bool *visit = btmp.fortran_vec ();

  octave_idx_type qh = 0;
  octave_idx_type qt = 0;
  CMK_Node x;
  x.id = start;
  x.deg = D[start];
  x.dist = 0;
  Q_enq (Q, N, qt, x);
  visit[start] = true;

  // distance level
  octave_idx_type level = 0;
  // current largest "eccentricity"
  octave_idx_type max_dist = 0;

  for (;;)
    {
      while (! Q_empty (Q, N, qh, qt))
        {
          CMK_Node v = Q_deq (Q, N, qh);

          if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
            x = v;

          octave_idx_type i = v.id;

          // add all unvisited neighbors to the queue
          octave_idx_type j1 = cidx[i];
          octave_idx_type j2 = cidx2[i];
          while (j1 < cidx[i+1] || j2 < cidx2[i+1])
            {
              octave_quit ();

              if (j1 == cidx[i+1])
                {
                  octave_idx_type r2 = ridx2[j2++];
                  if (! visit[r2])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r2;
                      w.deg = D[r2];
                      w.dist = v.dist+1;
                      Q_enq (Q, N, qt, w);
                      visit[r2] = true;

                      if (w.dist > level)
                        level = w.dist;
                    }
                }
              else if (j2 == cidx2[i+1])
                {
                  octave_idx_type r1 = ridx[j1++];
                  if (! visit[r1])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r1;
                      w.deg = D[r1];
                      w.dist = v.dist+1;
                      Q_enq (Q, N, qt, w);
                      visit[r1] = true;

                      if (w.dist > level)
                        level = w.dist;
                    }
                }
              else
                {
                  octave_idx_type r1 = ridx[j1];
                  octave_idx_type r2 = ridx2[j2];
                  if (r1 <= r2)
                    {
                      if (! visit[r1])
                        {
                          w.id = r1;
                          w.deg = D[r1];
                          w.dist = v.dist+1;
                          Q_enq (Q, N, qt, w);
                          visit[r1] = true;

                          if (w.dist > level)
                            level = w.dist;
                        }
                      j1++;
                      if (r1 == r2)
                        j2++;
                    }
                  else
                    {
                      if (! visit[r2])
                        {
                          w.id = r2;
                          w.deg = D[r2];
                          w.dist = v.dist+1;
                          Q_enq (Q, N, qt, w);
                          visit[r2] = true;

                          if (w.dist > level)
                            level = w.dist;
                        }
                      j2++;
                    }
                }
            }
        } // finish of BFS

      if (max_dist < x.dist)
        {
          max_dist = x.dist;

          for (octave_idx_type i = 0; i < N; i++)
            visit[i] = false;

          visit[x.id] = true;
          x.dist = 0;
          qt = qh = 0;
          Q_enq (Q, N, qt, x);
        }
      else
        break;
    }
  return x.id;
}

// Calculates the node's degrees.  This means counting the nonzero elements
// in the symmetric matrix' rows.  This works for non-symmetric matrices
// as well.

static octave_idx_type
calc_degrees (octave_idx_type N, const octave_idx_type *ridx,
              const octave_idx_type *cidx, octave_idx_type *D)
{
  octave_idx_type max_deg = 0;

  for (octave_idx_type i = 0; i < N; i++)
    D[i] = 0;

  for (octave_idx_type j = 0; j < N; j++)
    {
      for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
        {
          octave_quit ();

          octave_idx_type k = ridx[i];
          // there is a nonzero element (k,j)
          D[k]++;
          if (D[k] > max_deg)
            max_deg = D[k];
          // if there is no element (j,k) there is one in
          // the symmetric matrix:
          if (k != j)
            {
              bool found = false;
              for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
                {
                  octave_quit ();

                  if (ridx[l] == j)
                    {
                      found = true;
                      break;
                    }
                  else if (ridx[l] > j)
                    break;
                }

              if (! found)
                {
                  // A(j,k) == 0
                  D[j]++;
                  if (D[j] > max_deg)
                    max_deg = D[j];
                }
            }
        }
    }
  return max_deg;
}

// Transpose of the structure of a square sparse matrix

static void
transpose (octave_idx_type N, const octave_idx_type *ridx,
           const octave_idx_type *cidx, octave_idx_type *ridx2,
           octave_idx_type *cidx2)
{
  octave_idx_type nz = cidx[N];

  OCTAVE_LOCAL_BUFFER (octave_idx_type, w, N + 1);
  for (octave_idx_type i = 0; i < N; i++)
    w[i] = 0;
  for (octave_idx_type i = 0; i < nz; i++)
    w[ridx[i]]++;
  nz = 0;
  for (octave_idx_type i = 0; i < N; i++)
    {
      octave_quit ();

      cidx2[i] = nz;
      nz += w[i];
      w[i] = cidx2[i];
    }
  cidx2[N] = nz;
  w[N] = nz;

  for (octave_idx_type j = 0; j < N; j++)
    for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
      {
        octave_quit ();

        octave_idx_type q = w[ridx[k]]++;
        ridx2[q] = j;
      }
}

// An implementation of the Cuthill-McKee algorithm.
DEFUN (symrcm, args, ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{p} =} symrcm (@var{S})
Return the symmetric reverse @nospell{Cuthill-McKee} permutation of @var{S}.

@var{p} is a permutation vector such that
@code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements closer
to the diagonal than @var{S}.  This is a good preordering for LU or
Cholesky@tie{}factorization of matrices that come from ``long, skinny''
problems.  It works for both symmetric and asymmetric @var{S}.

The algorithm represents a heuristic approach to the NP-complete bandwidth
minimization problem.  The implementation is based in the descriptions found
in

@nospell{E. Cuthill, J. McKee}.
@cite{Reducing the Bandwidth of Sparse Symmetric Matrices}.
Proceedings of the 24th @nospell{ACM} National Conference,
157--172 1969, Brandon Press, New Jersey.

@nospell{A. George, J.W.H. Liu}.  @cite{Computer Solution of Large Sparse
Positive Definite Systems}, Prentice Hall Series in Computational
Mathematics, ISBN 0-13-165274-5, 1981.

@seealso{colperm, colamd, symamd}
@end deftypefn */)
{
  if (args.length () != 1)
    print_usage ();

  octave_value arg = args(0);

  // the parameter of the matrix is converted into a sparse matrix
  //(if necessary)
  octave_idx_type *cidx;
  octave_idx_type *ridx;
  SparseMatrix Ar;
  SparseComplexMatrix Ac;

  if (arg.isreal ())
    {
      Ar = arg.sparse_matrix_value ();
      // Note cidx/ridx are const, so use xridx and xcidx...
      cidx = Ar.xcidx ();
      ridx = Ar.xridx ();
    }
  else
    {
      Ac = arg.sparse_complex_matrix_value ();
      cidx = Ac.xcidx ();
      ridx = Ac.xridx ();
    }

  octave_idx_type nr = arg.rows ();
  octave_idx_type nc = arg.columns ();

  if (nr != nc)
    err_square_matrix_required ("symrcm", "S");

  if (nr == 0 && nc == 0)
    return ovl (NDArray (dim_vector (1, 0)));

  // sizes of the heaps
  octave_idx_type s = 0;

  // head- and tail-indices for the queue
  octave_idx_type qt = 0;
  octave_idx_type qh = 0;
  CMK_Node v, w;
  // dimension of the matrix
  octave_idx_type N = nr;

  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
  transpose (N, ridx, cidx, ridx2, cidx2);

  // the permutation vector
  NDArray P (dim_vector (1, N));

  // compute the node degrees
  OCTAVE_LOCAL_BUFFER (octave_idx_type, D, N);
  octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);

  // if none of the nodes has a degree > 0 (a matrix of zeros)
  // the return value corresponds to the identity permutation
  if (max_deg == 0)
    {
      for (octave_idx_type i = 0; i < N; i++)
        P(i) = i;

      return ovl (P);
    }

  // a heap for the a node's neighbors.  The number of neighbors is
  // limited by the maximum degree max_deg:
  OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);

  // a queue for the BFS.  The array is always one element larger than
  // the number of entries that are stored.
  OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);

  // a counter (for building the permutation)
  octave_idx_type c = -1;

  // upper bound for the bandwidth (=quality of solution)
  // initialize the bandwidth of the graph with 0.  B contains the
  // the maximum of the theoretical lower limits of the subgraphs
  // bandwidths.
  octave_idx_type B = 0;

  // mark all nodes as unvisited; with the exception of the nodes
  // that have degree==0 and build a CC of the graph.

  boolNDArray btmp (dim_vector (1, N), false);
  bool *visit = btmp.fortran_vec ();

  do
    {
      // locate an unvisited starting node of the graph
      octave_idx_type i;
      for (i = 0; i < N; i++)
        if (! visit[i])
          break;

      // locate a probably better starting node
      v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);

      // mark the node as visited and enqueue it (a starting node
      // for the BFS).  Since the node will be a root of a spanning
      // tree, its dist is 0.
      v.deg = D[v.id];
      v.dist = 0;
      visit[v.id] = true;
      Q_enq (Q, N, qt, v);

      // lower bound for the bandwidth of a subgraph
      // keep a "level" in the spanning tree (= min. distance to the
      // root) for determining the bandwidth of the computed
      // permutation P
      octave_idx_type Bsub = 0;
      // min. dist. to the root is 0
      octave_idx_type level = 0;
      // the root is the first/only node on level 0
      octave_idx_type level_N = 1;

      while (! Q_empty (Q, N, qh, qt))
        {
          v = Q_deq (Q, N, qh);
          i = v.id;

          c++;

          // for computing the inverse permutation P where
          // A(inv(P),inv(P)) or P'*A*P is banded
          //         P(i) = c;

          // for computing permutation P where
          // A(P(i),P(j)) or P*A*P' is banded
          P(c) = i;

          // put all unvisited neighbors j of node i on the heap
          s = 0;
          octave_idx_type j1 = cidx[i];
          octave_idx_type j2 = cidx2[i];

          octave_quit ();

          while (j1 < cidx[i+1] || j2 < cidx2[i+1])
            {
              octave_quit ();

              if (j1 == cidx[i+1])
                {
                  octave_idx_type r2 = ridx2[j2++];
                  if (! visit[r2])
                    {
                      // the distance of node j is dist(i)+1
                      w.id = r2;
                      w.deg = D[r2];
                      w.dist = v.dist+1;
                      H_insert (S, s, w);
                      visit[r2] = true;
                    }
                }
              else if (j2 == cidx2[i+1])
                {
                  octave_idx_type r1 = ridx[j1++];
                  if (! visit[r1])
                    {
                      w.id = r1;
                      w.deg = D[r1];
                      w.dist = v.dist+1;
                      H_insert (S, s, w);
                      visit[r1] = true;
                    }
                }
              else
                {
                  octave_idx_type r1 = ridx[j1];
                  octave_idx_type r2 = ridx2[j2];
                  if (r1 <= r2)
                    {
                      if (! visit[r1])
                        {
                          w.id = r1;
                          w.deg = D[r1];
                          w.dist = v.dist+1;
                          H_insert (S, s, w);
                          visit[r1] = true;
                        }
                      j1++;
                      if (r1 == r2)
                        j2++;
                    }
                  else
                    {
                      if (! visit[r2])
                        {
                          w.id = r2;
                          w.deg = D[r2];
                          w.dist = v.dist+1;
                          H_insert (S, s, w);
                          visit[r2] = true;
                        }
                      j2++;
                    }
                }
            }

          // add the neighbors to the queue (sorted by node degree)
          while (! H_empty (S, s))
            {
              octave_quit ();

              // locate a neighbor of i with minimal degree in O(log(N))
              v = H_remove_min (S, s, 1);

              // entered the BFS a new level?
              if (v.dist > level)
                {
                  // adjustment of bandwidth:
                  // "[...] the minimum bandwidth that
                  // can be obtained [...] is the
                  //  maximum number of nodes per level"
                  if (Bsub < level_N)
                    Bsub = level_N;

                  level = v.dist;
                  // v is the first node on the new level
                  level_N = 1;
                }
              else
                {
                  // there is no new level but another node on
                  // this level:
                  level_N++;
                }

              // enqueue v in O(1)
              Q_enq (Q, N, qt, v);
            }

          // synchronize the bandwidth with level_N once again:
          if (Bsub < level_N)
            Bsub = level_N;
        }
      // finish of BFS.  If there are still unvisited nodes in the graph
      // then it is split into CCs.  The computed bandwidth is the maximum
      // of all subgraphs.  Update:
      if (Bsub > B)
        B = Bsub;
    }
  // are there any nodes left?
  while (c+1 < N);

  // compute the reverse-ordering
  s = N / 2 - 1;
  for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
    std::swap (P.elem (i), P.elem (j));

  // increment all indices, since Octave is not C
  return ovl (P+1);
}

OCTAVE_END_NAMESPACE(octave)