Mercurial > octave
diff libinterp/corefcn/symrcm.cc @ 28024:c28b8ba841fb
move sparse functions from dldfcn to corefcn directory (bug #57459)
Octave is always linked with SuiteSparse libraries to support
functions in liboctave so there is no advantage to dynamically load
the interpreter DEFUN functions that depend on SuiteSparse.
* __eigs__.cc, amd.cc, ccolamd.cc, chol.cc, colamd.cc, dmperm.cc,
qr.cc, symbfact.cc, symrcm.cc: Move from dldfcn to corefcn directory.
Define functions with DEFUN or DEFMETHOD instead of DEFUN_DLD or
DEFMETHOD_DLD.
* libinterp/dldfcn/module-files, libinterp/corefcn/module.mk: Update.
* amd.cc (Famd): Don't mlock function.
* symbfact.cc (Fsymbfact): Don't mlock function.
* variables.cc, type.m, which.m: Update tests.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Wed, 29 Jan 2020 06:30:40 -0500 |
parents | libinterp/dldfcn/symrcm.cc@c20b7290c778 |
children | 0a5b15007766 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/corefcn/symrcm.cc Wed Jan 29 06:30:40 2020 -0500 @@ -0,0 +1,705 @@ +//////////////////////////////////////////////////////////////////////// +// +// Copyright (C) 2007-2020 The Octave Project Developers +// +// See the file COPYRIGHT.md in the top-level directory of this +// distribution or <https://octave.org/copyright/>. +// +// This file is part of Octave. +// +// Octave is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Octave is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Octave; see the file COPYING. If not, see +// <https://www.gnu.org/licenses/>. +// +//////////////////////////////////////////////////////////////////////// + +/* +An implementation of the Reverse Cuthill-McKee algorithm (symrcm) + +The implementation of this algorithm is based in the descriptions found in + +@INPROCEEDINGS{, + author = {E. Cuthill and J. McKee}, + title = {Reducing the Bandwidth of Sparse Symmetric Matrices}, + booktitle = {Proceedings of the 24th ACM National Conference}, + publisher = {Brandon Press}, + pages = {157 -- 172}, + location = {New Jersey}, + year = {1969} +} + +@BOOK{, + author = {Alan George and Joseph W. H. Liu}, + title = {Computer Solution of Large Sparse Positive Definite Systems}, + publisher = {Prentice Hall Series in Computational Mathematics}, + ISBN = {0-13-165274-5}, + year = {1981} +} + +The algorithm represents a heuristic approach to the NP-complete minimum +bandwidth problem. + +Written by Michael Weitzel <michael.weitzel@@uni-siegen.de> + <weitzel@@ldknet.org> +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include <algorithm> + +#include "CSparse.h" +#include "boolNDArray.h" +#include "dNDArray.h" +#include "dSparse.h" +#include "oct-locbuf.h" +#include "oct-sparse.h" +#include "quit.h" + +#include "defun.h" +#include "errwarn.h" +#include "ov.h" +#include "ovl.h" + +// A node struct for the Cuthill-McKee algorithm +struct CMK_Node +{ + // the node's id (matrix row index) + octave_idx_type id; + // the node's degree + octave_idx_type deg; + // minimal distance to the root of the spanning tree + octave_idx_type dist; +}; + +// A simple queue. +// Queues Q have a fixed maximum size N (rows,cols of the matrix) and are +// stored in an array. qh and qt point to queue head and tail. + +// Enqueue operation (adds a node "o" at the tail) + +inline static void +Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qt, const CMK_Node& o) +{ + Q[qt] = o; + qt = (qt + 1) % (N + 1); +} + +// Dequeue operation (removes a node from the head) + +inline static CMK_Node +Q_deq (CMK_Node * Q, octave_idx_type N, octave_idx_type& qh) +{ + CMK_Node r = Q[qh]; + qh = (qh + 1) % (N + 1); + return r; +} + +// Predicate (queue empty) +#define Q_empty(Q, N, qh, qt) ((qh) == (qt)) + +// A simple, array-based binary heap (used as a priority queue for nodes) + +// the left descendant of entry i +#define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1) +// the right descendant of entry i +#define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2) +// the parent of entry i +#define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2) + +// Builds a min-heap (the root contains the smallest element). A is an array +// with the graph's nodes, i is a starting position, size is the length of A. + +static void +H_heapify_min (CMK_Node *A, octave_idx_type i, octave_idx_type size) +{ + octave_idx_type j = i; + for (;;) + { + octave_idx_type l = LEFT(j); + octave_idx_type r = RIGHT(j); + + octave_idx_type smallest; + if (l < size && A[l].deg < A[j].deg) + smallest = l; + else + smallest = j; + + if (r < size && A[r].deg < A[smallest].deg) + smallest = r; + + if (smallest != j) + { + std::swap (A[j], A[smallest]); + j = smallest; + } + else + break; + } +} + +// Heap operation insert. Running time is O(log(n)) + +static void +H_insert (CMK_Node *H, octave_idx_type& h, const CMK_Node& o) +{ + octave_idx_type i = h++; + + H[i] = o; + + if (i == 0) + return; + do + { + octave_idx_type p = PARENT(i); + if (H[i].deg < H[p].deg) + { + std::swap (H[i], H[p]); + + i = p; + } + else + break; + } + while (i > 0); +} + +// Heap operation remove-min. Removes the smallest element in O(1) and +// reorganizes the heap optionally in O(log(n)) + +inline static CMK_Node +H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/) +{ + CMK_Node r = H[0]; + H[0] = H[--h]; + if (reorg) + H_heapify_min (H, 0, h); + return r; +} + +// Predicate (heap empty) +#define H_empty(H, h) ((h) == 0) + +// Helper function for the Cuthill-McKee algorithm. Tries to determine a +// pseudo-peripheral node of the graph as starting node. + +static octave_idx_type +find_starting_node (octave_idx_type N, const octave_idx_type *ridx, + const octave_idx_type *cidx, const octave_idx_type *ridx2, + const octave_idx_type *cidx2, octave_idx_type *D, + octave_idx_type start) +{ + CMK_Node w; + + OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1); + boolNDArray btmp (dim_vector (1, N), false); + bool *visit = btmp.fortran_vec (); + + octave_idx_type qh = 0; + octave_idx_type qt = 0; + CMK_Node x; + x.id = start; + x.deg = D[start]; + x.dist = 0; + Q_enq (Q, N, qt, x); + visit[start] = true; + + // distance level + octave_idx_type level = 0; + // current largest "eccentricity" + octave_idx_type max_dist = 0; + + for (;;) + { + while (! Q_empty (Q, N, qh, qt)) + { + CMK_Node v = Q_deq (Q, N, qh); + + if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg)) + x = v; + + octave_idx_type i = v.id; + + // add all unvisited neighbors to the queue + octave_idx_type j1 = cidx[i]; + octave_idx_type j2 = cidx2[i]; + while (j1 < cidx[i+1] || j2 < cidx2[i+1]) + { + octave_quit (); + + if (j1 == cidx[i+1]) + { + octave_idx_type r2 = ridx2[j2++]; + if (! visit[r2]) + { + // the distance of node j is dist(i)+1 + w.id = r2; + w.deg = D[r2]; + w.dist = v.dist+1; + Q_enq (Q, N, qt, w); + visit[r2] = true; + + if (w.dist > level) + level = w.dist; + } + } + else if (j2 == cidx2[i+1]) + { + octave_idx_type r1 = ridx[j1++]; + if (! visit[r1]) + { + // the distance of node j is dist(i)+1 + w.id = r1; + w.deg = D[r1]; + w.dist = v.dist+1; + Q_enq (Q, N, qt, w); + visit[r1] = true; + + if (w.dist > level) + level = w.dist; + } + } + else + { + octave_idx_type r1 = ridx[j1]; + octave_idx_type r2 = ridx2[j2]; + if (r1 <= r2) + { + if (! visit[r1]) + { + w.id = r1; + w.deg = D[r1]; + w.dist = v.dist+1; + Q_enq (Q, N, qt, w); + visit[r1] = true; + + if (w.dist > level) + level = w.dist; + } + j1++; + if (r1 == r2) + j2++; + } + else + { + if (! visit[r2]) + { + w.id = r2; + w.deg = D[r2]; + w.dist = v.dist+1; + Q_enq (Q, N, qt, w); + visit[r2] = true; + + if (w.dist > level) + level = w.dist; + } + j2++; + } + } + } + } // finish of BFS + + if (max_dist < x.dist) + { + max_dist = x.dist; + + for (octave_idx_type i = 0; i < N; i++) + visit[i] = false; + + visit[x.id] = true; + x.dist = 0; + qt = qh = 0; + Q_enq (Q, N, qt, x); + } + else + break; + } + return x.id; +} + +// Calculates the node's degrees. This means counting the nonzero elements +// in the symmetric matrix' rows. This works for non-symmetric matrices +// as well. + +static octave_idx_type +calc_degrees (octave_idx_type N, const octave_idx_type *ridx, + const octave_idx_type *cidx, octave_idx_type *D) +{ + octave_idx_type max_deg = 0; + + for (octave_idx_type i = 0; i < N; i++) + D[i] = 0; + + for (octave_idx_type j = 0; j < N; j++) + { + for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++) + { + octave_quit (); + + octave_idx_type k = ridx[i]; + // there is a nonzero element (k,j) + D[k]++; + if (D[k] > max_deg) + max_deg = D[k]; + // if there is no element (j,k) there is one in + // the symmetric matrix: + if (k != j) + { + bool found = false; + for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++) + { + octave_quit (); + + if (ridx[l] == j) + { + found = true; + break; + } + else if (ridx[l] > j) + break; + } + + if (! found) + { + // A(j,k) == 0 + D[j]++; + if (D[j] > max_deg) + max_deg = D[j]; + } + } + } + } + return max_deg; +} + +// Transpose of the structure of a square sparse matrix + +static void +transpose (octave_idx_type N, const octave_idx_type *ridx, + const octave_idx_type *cidx, octave_idx_type *ridx2, + octave_idx_type *cidx2) +{ + octave_idx_type nz = cidx[N]; + + OCTAVE_LOCAL_BUFFER (octave_idx_type, w, N + 1); + for (octave_idx_type i = 0; i < N; i++) + w[i] = 0; + for (octave_idx_type i = 0; i < nz; i++) + w[ridx[i]]++; + nz = 0; + for (octave_idx_type i = 0; i < N; i++) + { + octave_quit (); + + cidx2[i] = nz; + nz += w[i]; + w[i] = cidx2[i]; + } + cidx2[N] = nz; + w[N] = nz; + + for (octave_idx_type j = 0; j < N; j++) + for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++) + { + octave_quit (); + + octave_idx_type q = w[ridx[k]]++; + ridx2[q] = j; + } +} + +// An implementation of the Cuthill-McKee algorithm. +DEFUN (symrcm, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{p} =} symrcm (@var{S}) +Return the symmetric reverse @nospell{Cuthill-McKee} permutation of @var{S}. + +@var{p} is a permutation vector such that +@code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements closer +to the diagonal than @var{S}. This is a good preordering for LU or +Cholesky@tie{}factorization of matrices that come from ``long, skinny'' +problems. It works for both symmetric and asymmetric @var{S}. + +The algorithm represents a heuristic approach to the NP-complete bandwidth +minimization problem. The implementation is based in the descriptions found +in + +@nospell{E. Cuthill, J. McKee}. +@cite{Reducing the Bandwidth of Sparse Symmetric Matrices}. +Proceedings of the 24th @nospell{ACM} National Conference, +157--172 1969, Brandon Press, New Jersey. + +@nospell{A. George, J.W.H. Liu}. @cite{Computer Solution of Large Sparse +Positive Definite Systems}, Prentice Hall Series in Computational +Mathematics, ISBN 0-13-165274-5, 1981. + +@seealso{colperm, colamd, symamd} +@end deftypefn */) +{ + if (args.length () != 1) + print_usage (); + + octave_value arg = args(0); + + // the parameter of the matrix is converted into a sparse matrix + //(if necessary) + octave_idx_type *cidx; + octave_idx_type *ridx; + SparseMatrix Ar; + SparseComplexMatrix Ac; + + if (arg.isreal ()) + { + Ar = arg.sparse_matrix_value (); + // Note cidx/ridx are const, so use xridx and xcidx... + cidx = Ar.xcidx (); + ridx = Ar.xridx (); + } + else + { + Ac = arg.sparse_complex_matrix_value (); + cidx = Ac.xcidx (); + ridx = Ac.xridx (); + } + + octave_idx_type nr = arg.rows (); + octave_idx_type nc = arg.columns (); + + if (nr != nc) + err_square_matrix_required ("symrcm", "S"); + + if (nr == 0 && nc == 0) + return ovl (NDArray (dim_vector (1, 0))); + + // sizes of the heaps + octave_idx_type s = 0; + + // head- and tail-indices for the queue + octave_idx_type qt = 0; + octave_idx_type qh = 0; + CMK_Node v, w; + // dimension of the matrix + octave_idx_type N = nr; + + OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1); + OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]); + transpose (N, ridx, cidx, ridx2, cidx2); + + // the permutation vector + NDArray P (dim_vector (1, N)); + + // compute the node degrees + OCTAVE_LOCAL_BUFFER (octave_idx_type, D, N); + octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D); + + // if none of the nodes has a degree > 0 (a matrix of zeros) + // the return value corresponds to the identity permutation + if (max_deg == 0) + { + for (octave_idx_type i = 0; i < N; i++) + P(i) = i; + + return ovl (P); + } + + // a heap for the a node's neighbors. The number of neighbors is + // limited by the maximum degree max_deg: + OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg); + + // a queue for the BFS. The array is always one element larger than + // the number of entries that are stored. + OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1); + + // a counter (for building the permutation) + octave_idx_type c = -1; + + // upper bound for the bandwidth (=quality of solution) + // initialize the bandwidth of the graph with 0. B contains the + // the maximum of the theoretical lower limits of the subgraphs + // bandwidths. + octave_idx_type B = 0; + + // mark all nodes as unvisited; with the exception of the nodes + // that have degree==0 and build a CC of the graph. + + boolNDArray btmp (dim_vector (1, N), false); + bool *visit = btmp.fortran_vec (); + + do + { + // locate an unvisited starting node of the graph + octave_idx_type i; + for (i = 0; i < N; i++) + if (! visit[i]) + break; + + // locate a probably better starting node + v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i); + + // mark the node as visited and enqueue it (a starting node + // for the BFS). Since the node will be a root of a spanning + // tree, its dist is 0. + v.deg = D[v.id]; + v.dist = 0; + visit[v.id] = true; + Q_enq (Q, N, qt, v); + + // lower bound for the bandwidth of a subgraph + // keep a "level" in the spanning tree (= min. distance to the + // root) for determining the bandwidth of the computed + // permutation P + octave_idx_type Bsub = 0; + // min. dist. to the root is 0 + octave_idx_type level = 0; + // the root is the first/only node on level 0 + octave_idx_type level_N = 1; + + while (! Q_empty (Q, N, qh, qt)) + { + v = Q_deq (Q, N, qh); + i = v.id; + + c++; + + // for computing the inverse permutation P where + // A(inv(P),inv(P)) or P'*A*P is banded + // P(i) = c; + + // for computing permutation P where + // A(P(i),P(j)) or P*A*P' is banded + P(c) = i; + + // put all unvisited neighbors j of node i on the heap + s = 0; + octave_idx_type j1 = cidx[i]; + octave_idx_type j2 = cidx2[i]; + + octave_quit (); + + while (j1 < cidx[i+1] || j2 < cidx2[i+1]) + { + octave_quit (); + + if (j1 == cidx[i+1]) + { + octave_idx_type r2 = ridx2[j2++]; + if (! visit[r2]) + { + // the distance of node j is dist(i)+1 + w.id = r2; + w.deg = D[r2]; + w.dist = v.dist+1; + H_insert (S, s, w); + visit[r2] = true; + } + } + else if (j2 == cidx2[i+1]) + { + octave_idx_type r1 = ridx[j1++]; + if (! visit[r1]) + { + w.id = r1; + w.deg = D[r1]; + w.dist = v.dist+1; + H_insert (S, s, w); + visit[r1] = true; + } + } + else + { + octave_idx_type r1 = ridx[j1]; + octave_idx_type r2 = ridx2[j2]; + if (r1 <= r2) + { + if (! visit[r1]) + { + w.id = r1; + w.deg = D[r1]; + w.dist = v.dist+1; + H_insert (S, s, w); + visit[r1] = true; + } + j1++; + if (r1 == r2) + j2++; + } + else + { + if (! visit[r2]) + { + w.id = r2; + w.deg = D[r2]; + w.dist = v.dist+1; + H_insert (S, s, w); + visit[r2] = true; + } + j2++; + } + } + } + + // add the neighbors to the queue (sorted by node degree) + while (! H_empty (S, s)) + { + octave_quit (); + + // locate a neighbor of i with minimal degree in O(log(N)) + v = H_remove_min (S, s, 1); + + // entered the BFS a new level? + if (v.dist > level) + { + // adjustment of bandwidth: + // "[...] the minimum bandwidth that + // can be obtained [...] is the + // maximum number of nodes per level" + if (Bsub < level_N) + Bsub = level_N; + + level = v.dist; + // v is the first node on the new level + level_N = 1; + } + else + { + // there is no new level but another node on + // this level: + level_N++; + } + + // enqueue v in O(1) + Q_enq (Q, N, qt, v); + } + + // synchronize the bandwidth with level_N once again: + if (Bsub < level_N) + Bsub = level_N; + } + // finish of BFS. If there are still unvisited nodes in the graph + // then it is split into CCs. The computed bandwidth is the maximum + // of all subgraphs. Update: + if (Bsub > B) + B = Bsub; + } + // are there any nodes left? + while (c+1 < N); + + // compute the reverse-ordering + s = N / 2 - 1; + for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--) + std::swap (P.elem (i), P.elem (j)); + + // increment all indices, since Octave is not C + return ovl (P+1); +}