view liboctave/numeric/sparse-dmsolve.cc @ 30564:796f54d4ddbf stable

update Octave Project Developers copyright for the new year In files that have the "Octave Project Developers" copyright notice, update for 2021. In all .txi and .texi files except gpl.txi and gpl.texi in the doc/liboctave and doc/interpreter directories, change the copyright to "Octave Project Developers", the same as used for other source files. Update copyright notices for 2022 (not done since 2019). For gpl.txi and gpl.texi, change the copyright notice to be "Free Software Foundation, Inc." and leave the date at 2007 only because this file only contains the text of the GPL, not anything created by the Octave Project Developers. Add Paul Thomas to contributors.in.
author John W. Eaton <jwe@octave.org>
date Tue, 28 Dec 2021 18:22:40 -0500
parents 1f0a2689cab2
children
line wrap: on
line source

////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2006-2022 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#include <algorithm>

#include "CMatrix.h"
#include "CSparse.h"
#include "MArray.h"
#include "MSparse.h"
#include "MatrixType.h"
#include "dSparse.h"
#include "lo-error.h"
#include "oct-inttypes-fwd.h"
#include "oct-locbuf.h"
#include "oct-sort.h"
#include "oct-sparse.h"
#include "quit.h"
#include "sparse-dmsolve.h"
#include "sparse-qr.h"

template <typename T>
static MSparse<T>
dmsolve_extract (const MSparse<T>& A, const octave_idx_type *Pinv,
                 const octave_idx_type *Q, octave_idx_type rst,
                 octave_idx_type rend, octave_idx_type cst,
                 octave_idx_type cend, octave_idx_type maxnz = -1,
                 bool lazy = false)
{
  octave_idx_type nr = rend - rst;
  octave_idx_type nc = cend - cst;

  maxnz = (maxnz < 0 ? A.nnz () : maxnz);

  octave_idx_type nz;

  // Cast to uint64 to handle overflow in this multiplication
  if (octave_uint64 (nr)*octave_uint64 (nc) < octave_uint64 (maxnz))
    nz = nr*nc;
  else
    nz = maxnz;

  MSparse<T> B (nr, nc, (nz < maxnz ? nz : maxnz));

  // Some sparse functions can support lazy indexing (where elements
  // in the row are in no particular order), even though octave in
  // general can't.  For those functions that can using it is a big
  // win here in terms of speed.

  if (lazy)
    {
      nz = 0;

      for (octave_idx_type j = cst ; j < cend ; j++)
        {
          octave_idx_type qq = (Q ? Q[j] : j);

          B.xcidx (j - cst) = nz;

          for (octave_idx_type p = A.cidx (qq) ; p < A.cidx (qq+1) ; p++)
            {
              octave_quit ();

              octave_idx_type r = (Pinv ? Pinv[A.ridx (p)] : A.ridx (p));

              if (r >= rst && r < rend)
                {
                  B.xdata (nz) = A.data (p);
                  B.xridx (nz++) = r - rst;
                }
            }
        }

      B.xcidx (cend - cst) = nz;
    }
  else
    {
      OCTAVE_LOCAL_BUFFER (T, X, rend - rst);

      octave_sort<octave_idx_type> sort;
      octave_idx_type *ri = B.xridx ();

      nz = 0;

      for (octave_idx_type j = cst ; j < cend ; j++)
        {
          octave_idx_type qq = (Q ? Q[j] : j);

          B.xcidx (j - cst) = nz;

          for (octave_idx_type p = A.cidx (qq) ; p < A.cidx (qq+1) ; p++)
            {
              octave_quit ();

              octave_idx_type r = (Pinv ? Pinv[A.ridx (p)] : A.ridx (p));

              if (r >= rst && r < rend)
                {
                  X[r-rst] = A.data (p);
                  B.xridx (nz++) = r - rst;
                }
            }

          sort.sort (ri + B.xcidx (j - cst), nz - B.xcidx (j - cst));

          for (octave_idx_type p = B.cidx (j - cst); p < nz; p++)
            B.xdata (p) = X[B.xridx (p)];
        }

      B.xcidx (cend - cst) = nz;
    }

  return B;
}

template <typename T>
static MArray<T>
dmsolve_extract (const MArray<T>& m, const octave_idx_type *,
                 const octave_idx_type *, octave_idx_type r1,
                 octave_idx_type r2, octave_idx_type c1,
                 octave_idx_type c2)
{
  r2 -= 1;
  c2 -= 1;

  if (r1 > r2)
    std::swap (r1, r2);

  if (c1 > c2)
    std::swap (c1, c2);

  octave_idx_type new_r = r2 - r1 + 1;
  octave_idx_type new_c = c2 - c1 + 1;

  MArray<T> result (dim_vector (new_r, new_c));

  for (octave_idx_type j = 0; j < new_c; j++)
    {
      for (octave_idx_type i = 0; i < new_r; i++)
        result.xelem (i, j) = m.elem (r1+i, c1+j);
    }

  return result;
}

template <typename T>
static void
dmsolve_insert (MArray<T>& a, const MArray<T>& b, const octave_idx_type *Q,
                octave_idx_type r, octave_idx_type c)
{
  T *ax = a.fortran_vec ();

  const T *bx = b.data ();

  octave_idx_type anr = a.rows ();

  octave_idx_type nr = b.rows ();
  octave_idx_type nc = b.cols ();

  for (octave_idx_type j = 0; j < nc; j++)
    {
      octave_idx_type aoff = (c + j) * anr;
      octave_idx_type boff = j * nr;

      for (octave_idx_type i = 0; i < nr; i++)
        {
          octave_quit ();
          ax[Q[r + i] + aoff] = bx[i + boff];
        }
    }
}

template <typename T>
static void
dmsolve_insert (MSparse<T>& a, const MSparse<T>& b, const octave_idx_type *Q,
                octave_idx_type r, octave_idx_type c)
{
  octave_idx_type b_rows = b.rows ();
  octave_idx_type b_cols = b.cols ();

  octave_idx_type nr = a.rows ();
  octave_idx_type nc = a.cols ();

  OCTAVE_LOCAL_BUFFER (octave_idx_type, Qinv, nr);

  for (octave_idx_type i = 0; i < nr; i++)
    Qinv[Q[i]] = i;

  // First count the number of elements in the final array
  octave_idx_type nel = a.xcidx (c) + b.nnz ();

  if (c + b_cols < nc)
    nel += a.xcidx (nc) - a.xcidx (c + b_cols);

  for (octave_idx_type i = c; i < c + b_cols; i++)
    {
      for (octave_idx_type j = a.xcidx (i); j < a.xcidx (i+1); j++)
        {
          if (Qinv[a.xridx (j)] < r || Qinv[a.xridx (j)] >= r + b_rows)
            nel++;
        }
    }

  OCTAVE_LOCAL_BUFFER (T, X, nr);

  octave_sort<octave_idx_type> sort;

  MSparse<T> tmp (a);

  a = MSparse<T> (nr, nc, nel);

  octave_idx_type *ri = a.xridx ();

  for (octave_idx_type i = 0; i < tmp.cidx (c); i++)
    {
      a.xdata (i) = tmp.xdata (i);
      a.xridx (i) = tmp.xridx (i);
    }

  for (octave_idx_type i = 0; i < c + 1; i++)
    a.xcidx (i) = tmp.xcidx (i);

  octave_idx_type ii = a.xcidx (c);

  for (octave_idx_type i = c; i < c + b_cols; i++)
    {
      octave_quit ();

      for (octave_idx_type j = tmp.xcidx (i); j < tmp.xcidx (i+1); j++)
        {
          if (Qinv[tmp.xridx (j)] < r ||  Qinv[tmp.xridx (j)] >= r + b_rows)
            {
              X[tmp.xridx (j)] = tmp.xdata (j);
              a.xridx (ii++) = tmp.xridx (j);
            }
        }

      octave_quit ();

      for (octave_idx_type j = b.cidx (i-c); j < b.cidx (i-c+1); j++)
        {
          X[Q[r + b.ridx (j)]] = b.data (j);
          a.xridx (ii++) = Q[r + b.ridx (j)];
        }

      sort.sort (ri + a.xcidx (i), ii - a.xcidx (i));

      for (octave_idx_type p = a.xcidx (i); p < ii; p++)
        a.xdata (p) = X[a.xridx (p)];

      a.xcidx (i+1) = ii;
    }

  for (octave_idx_type i = c + b_cols; i < nc; i++)
    {
      for (octave_idx_type j = tmp.xcidx (i); j < tmp.cidx (i+1); j++)
        {
          a.xdata (ii) = tmp.xdata (j);
          a.xridx (ii++) = tmp.xridx (j);
        }

      a.xcidx (i+1) = ii;
    }
}

template <typename T, typename RT>
static void
dmsolve_permute (MArray<RT>& a, const MArray<T>& b, const octave_idx_type *p)
{
  octave_idx_type b_nr = b.rows ();
  octave_idx_type b_nc = b.cols ();

  const T *Bx = b.data ();

  a.resize (dim_vector (b_nr, b_nc));

  RT *Btx = a.fortran_vec ();

  for (octave_idx_type j = 0; j < b_nc; j++)
    {
      octave_idx_type off = j * b_nr;
      for (octave_idx_type i = 0; i < b_nr; i++)
        {
          octave_quit ();
          Btx[p[i] + off] = Bx[ i + off];
        }
    }
}

template <typename T, typename RT>
static void
dmsolve_permute (MSparse<RT>& a, const MSparse<T>& b, const octave_idx_type *p)
{
  octave_idx_type b_nr = b.rows ();
  octave_idx_type b_nc = b.cols ();
  octave_idx_type b_nz = b.nnz ();

  octave_idx_type nz = 0;

  a = MSparse<RT> (b_nr, b_nc, b_nz);
  octave_sort<octave_idx_type> sort;
  octave_idx_type *ri = a.xridx ();

  OCTAVE_LOCAL_BUFFER (RT, X, b_nr);

  a.xcidx (0) = 0;

  for (octave_idx_type j = 0; j < b_nc; j++)
    {
      for (octave_idx_type i = b.cidx (j); i < b.cidx (j+1); i++)
        {
          octave_quit ();
          octave_idx_type r = p[b.ridx (i)];
          X[r] = b.data (i);
          a.xridx (nz++) = p[b.ridx (i)];
        }

      sort.sort (ri + a.xcidx (j), nz - a.xcidx (j));

      for (octave_idx_type i = a.cidx (j); i < nz; i++)
        {
          octave_quit ();
          a.xdata (i) = X[a.xridx (i)];
        }

      a.xcidx (j+1) = nz;
    }
}

#if defined (HAVE_CXSPARSE)

static void
solve_singularity_warning (double)
{
  // Dummy singularity handler so that LU solver doesn't flag
  // an error for numerically rank defficient matrices
}

#endif

template <typename RT, typename ST, typename T>
RT
dmsolve (const ST& a, const T& b, octave_idx_type& info)
{
  RT retval;

#if defined (HAVE_CXSPARSE)

  octave_idx_type nr = a.rows ();
  octave_idx_type nc = a.cols ();

  octave_idx_type b_nr = b.rows ();
  octave_idx_type b_nc = b.cols ();

  if (nr < 0 || nc < 0 || nr != b_nr)
    (*current_liboctave_error_handler)
      ("matrix dimension mismatch in solution of minimum norm problem");

  if (nr == 0 || nc == 0 || b_nc == 0)
    retval = RT (nc, b_nc, 0.0);
  else
    {
      octave_idx_type nnz_remaining = a.nnz ();

      CXSPARSE_DNAME () csm;

      csm.m = nr;
      csm.n = nc;
      csm.x = nullptr;
      csm.nz = -1;
      csm.nzmax = a.nnz ();

      // Cast away const on A, with full knowledge that CSparse won't touch it.
      // Prevents the methods below from making a copy of the data.
      csm.p = const_cast<octave::suitesparse_integer *>
                (octave::to_suitesparse_intptr (a.cidx ()));
      csm.i = const_cast<octave::suitesparse_integer *>
                (octave::to_suitesparse_intptr (a.ridx ()));

      CXSPARSE_DNAME (d) *dm = CXSPARSE_DNAME(_dmperm) (&csm, 0);
      octave_idx_type *p = octave::to_octave_idx_type_ptr (dm->p);
      octave_idx_type *q = octave::to_octave_idx_type_ptr (dm->q);

      OCTAVE_LOCAL_BUFFER (octave_idx_type, pinv, nr);

      for (octave_idx_type i = 0; i < nr; i++)
        pinv[p[i]] = i;

      RT btmp;
      dmsolve_permute (btmp, b, pinv);
      info = 0;

      retval.resize (nc, b_nc);

      // Leading over-determined block
      if (dm->rr[2] < nr && dm->cc[3] < nc)
        {
          ST m = dmsolve_extract (a, pinv, q, dm->rr[2], nr, dm->cc[3], nc,
                                  nnz_remaining, true);
          nnz_remaining -= m.nnz ();
          RT mtmp = octave::math::qrsolve (m, dmsolve_extract (btmp,
                                                               nullptr, nullptr,
                                                               dm->rr[2], b_nr,
                                                               0, b_nc),
                                           info);
          dmsolve_insert (retval, mtmp, q, dm->cc[3], 0);

          if (dm->rr[2] > 0 && ! info)
            {
              m = dmsolve_extract (a, pinv, q, 0, dm->rr[2],
                                   dm->cc[3], nc, nnz_remaining, true);
              nnz_remaining -= m.nnz ();
              RT ctmp = dmsolve_extract (btmp, nullptr, nullptr,
                                         0, dm->rr[2], 0, b_nc);
              btmp.insert (ctmp - m * mtmp, 0, 0);
            }
        }

      // Structurally non-singular blocks
      // FIXME: Should use fine Dulmange-Mendelsohn decomposition here.
      if (dm->rr[1] < dm->rr[2] && dm->cc[2] < dm->cc[3] && ! info)
        {
          ST m = dmsolve_extract (a, pinv, q, dm->rr[1], dm->rr[2],
                                  dm->cc[2], dm->cc[3], nnz_remaining, false);
          nnz_remaining -= m.nnz ();
          RT btmp2 = dmsolve_extract (btmp, nullptr, nullptr,
                                      dm->rr[1], dm->rr[2],
                                      0, b_nc);
          double rcond = 0.0;
          MatrixType mtyp (MatrixType::Full);
          RT mtmp = m.solve (mtyp, btmp2, info, rcond,
                             solve_singularity_warning, false);
          if (info != 0)
            {
              info = 0;
              mtmp = octave::math::qrsolve (m, btmp2, info);
            }

          dmsolve_insert (retval, mtmp, q, dm->cc[2], 0);
          if (dm->rr[1] > 0 && ! info)
            {
              m = dmsolve_extract (a, pinv, q, 0, dm->rr[1], dm->cc[2],
                                   dm->cc[3], nnz_remaining, true);
              nnz_remaining -= m.nnz ();
              RT ctmp = dmsolve_extract (btmp, nullptr, nullptr,
                                         0, dm->rr[1], 0, b_nc);
              btmp.insert (ctmp - m * mtmp, 0, 0);
            }
        }

      // Trailing under-determined block
      if (dm->rr[1] > 0 && dm->cc[2] > 0 && ! info)
        {
          ST m = dmsolve_extract (a, pinv, q, 0, dm->rr[1], 0,
                                  dm->cc[2], nnz_remaining, true);
          RT mtmp = octave::math::qrsolve (m, dmsolve_extract (btmp, nullptr,
                                                               nullptr, 0,
                                                               dm->rr[1], 0,
                                                               b_nc),
                                           info);
          dmsolve_insert (retval, mtmp, q, 0, 0);
        }

      CXSPARSE_DNAME (_dfree) (dm);
    }

#else

  octave_unused_parameter (a);
  octave_unused_parameter (b);
  octave_unused_parameter (info);

  (*current_liboctave_error_handler)
    ("support for CXSparse was unavailable or disabled when liboctave was built");

#endif

  return retval;
}

// Instantiations we need.

template OCTAVE_API ComplexMatrix
dmsolve<ComplexMatrix, SparseComplexMatrix, Matrix>
  (const SparseComplexMatrix&, const Matrix&, octave_idx_type&);

template OCTAVE_API SparseComplexMatrix
dmsolve<SparseComplexMatrix, SparseComplexMatrix, SparseMatrix>
  (const SparseComplexMatrix&, const SparseMatrix&, octave_idx_type&);

template OCTAVE_API ComplexMatrix
dmsolve<ComplexMatrix, SparseComplexMatrix, ComplexMatrix>
  (const SparseComplexMatrix&, const ComplexMatrix&, octave_idx_type&);

template OCTAVE_API SparseComplexMatrix
dmsolve<SparseComplexMatrix, SparseComplexMatrix, SparseComplexMatrix>
  (const SparseComplexMatrix&, const SparseComplexMatrix&, octave_idx_type&);

template OCTAVE_API Matrix
dmsolve<Matrix, SparseMatrix, Matrix>
  (const SparseMatrix&, const Matrix&, octave_idx_type&);

template OCTAVE_API SparseMatrix
dmsolve<SparseMatrix, SparseMatrix, SparseMatrix>
  (const SparseMatrix&, const SparseMatrix&, octave_idx_type&);

template OCTAVE_API ComplexMatrix
dmsolve<ComplexMatrix, SparseMatrix, ComplexMatrix>
  (const SparseMatrix&, const ComplexMatrix&, octave_idx_type&);

template OCTAVE_API SparseComplexMatrix
dmsolve<SparseComplexMatrix, SparseMatrix, SparseComplexMatrix>
  (const SparseMatrix&, const SparseComplexMatrix&, octave_idx_type&);