changeset 10367:173e10268080

avoid indexing nonexistent elements in sparse diag
author John W. Eaton <jwe@octave.org>
date Sat, 27 Feb 2010 16:05:16 -0500
parents e5ae13b8b2c2
children d2849dbcc858
files liboctave/ChangeLog liboctave/Sparse.cc src/DLD-FUNCTIONS/conv2.cc
diffstat 3 files changed, 89 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/ChangeLog	Sat Feb 27 08:37:34 2010 +0100
+++ b/liboctave/ChangeLog	Sat Feb 27 16:05:16 2010 -0500
@@ -1,3 +1,8 @@
+2010-02-27  John W. Eaton  <jwe@octave.org>
+
+	* Sparse.cc (Sparse<T>::diag): Handle case of diag (szv) when szv
+	is a sparse vector with nnz = 0.
+
 2010-02-27  Jaroslav Hajek  <highegg@gmail.com>
 
 	* Array-util.cc (gripe_index_out_of_range): New function.
--- a/liboctave/Sparse.cc	Sat Feb 27 08:37:34 2010 +0100
+++ b/liboctave/Sparse.cc	Sat Feb 27 16:05:16 2010 -0500
@@ -2344,43 +2344,59 @@
         {
           octave_idx_type n = nnc + std::abs (k);
           octave_idx_type nz = nzmax ();
+
           d = Sparse<T> (n, n, nz);
-          for (octave_idx_type i = 0; i < coff+1; i++)
-            d.xcidx (i) = 0;
-          for (octave_idx_type j = 0; j < nnc; j++)
+
+          if (nnz () > 0)
             {
-              for (octave_idx_type i = cidx(j); i < cidx(j+1); i++)
+              for (octave_idx_type i = 0; i < coff+1; i++)
+                d.xcidx (i) = 0;
+
+              for (octave_idx_type j = 0; j < nnc; j++)
                 {
-                  d.xdata (i) = data (i);
-                  d.xridx (i) = j + roff;
+                  for (octave_idx_type i = cidx(j); i < cidx(j+1); i++)
+                    {
+                      d.xdata (i) = data (i);
+                      d.xridx (i) = j + roff;
+                    }
+                  d.xcidx (j + coff + 1) = cidx(j+1);
                 }
-              d.xcidx (j + coff + 1) = cidx(j+1);
+
+              for (octave_idx_type i = nnc + coff + 1; i < n + 1; i++)
+                d.xcidx (i) = nz;
             }
-          for (octave_idx_type i = nnc + coff + 1; i < n + 1; i++)
-            d.xcidx (i) = nz;
         } 
       else 
         {
           octave_idx_type n = nnr + std::abs (k);
           octave_idx_type nz = nzmax ();
-          octave_idx_type ii = 0;
-          octave_idx_type ir = ridx(0);
+
           d = Sparse<T> (n, n, nz);
-          for (octave_idx_type i = 0; i < coff+1; i++)
-            d.xcidx (i) = 0;
-          for (octave_idx_type i = 0; i < nnr; i++)
+
+          if (nnz () > 0)
             {
-              if (ir == i)
+              octave_idx_type ii = 0;
+              octave_idx_type ir = ridx(0);
+
+              for (octave_idx_type i = 0; i < coff+1; i++)
+                d.xcidx (i) = 0;
+
+              for (octave_idx_type i = 0; i < nnr; i++)
                 {
-                  d.xdata (ii) = data (ii);
-                  d.xridx (ii++) = ir + roff;
-                  if (ii != nz)
-                    ir = ridx (ii);
+                  if (ir == i)
+                    {
+                      d.xdata (ii) = data (ii);
+                      d.xridx (ii++) = ir + roff;
+
+                      if (ii != nz)
+                        ir = ridx (ii);
+                    }
+                  d.xcidx (i + coff + 1) = ii;
                 }
-              d.xcidx (i + coff + 1) = ii;
+
+              for (octave_idx_type i = nnr + coff + 1; i < n+1; i++)
+                d.xcidx (i) = nz;
             }
-          for (octave_idx_type i = nnr + coff + 1; i < n+1; i++)
-            d.xcidx (i) = nz;
         }
     }
 
--- a/src/DLD-FUNCTIONS/conv2.cc	Sat Feb 27 08:37:34 2010 +0100
+++ b/src/DLD-FUNCTIONS/conv2.cc	Sat Feb 27 16:05:16 2010 -0500
@@ -30,30 +30,32 @@
 #include "oct-obj.h"
 #include "utils.h"
 
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
 enum Shape { SHAPE_FULL, SHAPE_SAME, SHAPE_VALID };
 
 #if !defined (CXX_NEW_FRIEND_TEMPLATE_DECL)
 extern MArray<double>
-conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape);
+conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&,
+       Shape);
 
 extern MArray<Complex>
-conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape);
+conv2 (const MArray<Complex>&, const MArray<Complex>&,
+       const MArray<Complex>&, Shape);
 
 extern MArray<float>
-conv2 (MArray<float>&, MArray<float>&, MArray<float>&, Shape);
+conv2 (const MArray<float>&, const MArray<float>&, const MArray<float>&,
+       Shape);
 
 extern MArray<FloatComplex>
-conv2 (MArray<FloatComplex>&, MArray<FloatComplex>&, MArray<FloatComplex>&, Shape);
+conv2 (const MArray<FloatComplex>&, const MArray<FloatComplex>&,
+       const MArray<FloatComplex>&, Shape);
 #endif
 
 template <class T>
 MArray<T>
-conv2 (MArray<T>& R, MArray<T>& C, MArray<T>& A, Shape ishape)
+conv2 (const MArray<T>& R, const MArray<T>& C, const MArray<T>& A, Shape ishape)
 {
-  octave_idx_type  Rn =  R.length ();
-  octave_idx_type  Cm =  C.length ();
+  octave_idx_type  Rn = R.length ();
+  octave_idx_type  Cm = C.length ();
   octave_idx_type  Am = A.rows ();
   octave_idx_type  An = A.columns ();
 
@@ -113,8 +115,8 @@
         {
           T sum = 0;
 
-          octave_idx_type ci = Cm - 1 - MAX(0, edgM-oi);
-          octave_idx_type ai = MAX(0, oi-edgM);
+          octave_idx_type ci = Cm - 1 - std::max (0, edgM-oi);
+          octave_idx_type ai = std::max (0, oi-edgM);
           const T* Ad = A.data() + ai + Am*oj;
           const T* Cd = C.data() + ci;
           for ( ; ci >= 0 && ai < Am; ci--, Cd--, ai++, Ad++)
@@ -127,15 +129,15 @@
         {
           T sum = 0;
 
-          octave_idx_type rj = Rn - 1 - MAX(0, edgN-oj);
-          octave_idx_type aj = MAX(0, oj-edgN) ;
+          octave_idx_type rj = Rn - 1 - std::max (0, edgN-oj);
+          octave_idx_type aj = std::max (0, oj-edgN);
           const T* Xd = X.data() + aj;
           const T* Rd = R.data() + rj;
 
           for ( ; rj >= 0 && aj < An; rj--, Rd--, aj++, Xd++)
             sum += (*Xd) * (*Rd);
 
-          O(oi,oj)=  sum;
+          O(oi,oj) = sum;
         }
     }
 
@@ -158,7 +160,7 @@
 
 template <class T>
 MArray<T>
-conv2 (MArray<T>&A, MArray<T>&B, Shape ishape)
+conv2 (const MArray<T>& A, const MArray<T>& B, Shape ishape)
 {
   // Convolution works fastest if we choose the A matrix to be
   // the largest.
@@ -210,29 +212,36 @@
 
   MArray<T> O (outM, outN);
 
-  for (octave_idx_type oi = 0; oi < outM; oi++)
+  T *Od = O.fortran_vec ();
+
+  for (octave_idx_type oj = 0; oj < outN; oj++)
     {
-      for (octave_idx_type oj = 0; oj < outN; oj++)
+      octave_idx_type aj0 = std::max (0, oj-edgN);
+      octave_idx_type bj0 = Bn - 1 - std::max (0, edgN-oj);
+
+      for (octave_idx_type oi = 0; oi < outM; oi++)
         {
           T sum = 0;
 
-          for (octave_idx_type bj = Bn - 1 - MAX (0, edgN-oj), aj= MAX (0, oj-edgN);
-               bj >= 0 && aj < An; bj--, aj++)
+          octave_idx_type bi0 = Bm - 1 - std::max (0, edgM-oi);
+          octave_idx_type ai0 = std::max (0, oi-edgM);
+
+          for (octave_idx_type aj = aj0, bj = bj0; bj >= 0 && aj < An;
+               bj--, aj++)
             {
-              octave_idx_type bi = Bm - 1 - MAX (0, edgM-oi);
-              octave_idx_type ai = MAX (0, oi-edgM);
-              const T* Ad = A.data () + ai + Am*aj;
-              const T* Bd = B.data () + bi + Bm*bj;
+              const T* Ad = A.data () + ai0 + Am*aj;
+              const T* Bd = B.data () + bi0 + Bm*bj;
 
-              for ( ; bi >= 0 && ai < Am; bi--, Bd--, ai++, Ad++)
+              for (octave_idx_type ai = ai0, bi = bi0; bi >= 0 && ai < Am;
+                   bi--, ai++)
                 {
-                  sum += (*Ad) * (*Bd);
+                  sum += (*Ad++) * (*Bd--);
                   // Comment: it seems to be 2.5 x faster than this:
                   //        sum+= A(ai,aj) * B(bi,bj);
                 }
             }
 
-          O(oi,oj) = sum;
+          *Od++ = sum;
         }
     }
 
@@ -258,11 +267,11 @@
 of @var{c} is given by\n\
 \n\
 @table @asis\n\
-@item @var{shape}= 'full'\n\
+@item @var{shape} = 'full'\n\
 returns full 2-D convolution\n\
-@item @var{shape}= 'same'\n\
+@item @var{shape} = 'same'\n\
 same size as a. 'central' part of convolution\n\
-@item @var{shape}= 'valid'\n\
+@item @var{shape} = 'valid'\n\
 only parts which do not include zero-padded edges\n\
 @end table\n\
 \n\
@@ -274,8 +283,8 @@
   octave_value retval;
   octave_value tmp;
   int nargin = args.length ();
-  std::string shape= "full"; //default
-  bool separable= false;
+  std::string shape = "full"; //default
+  bool separable = false;
   Shape ishape;
 
   if (nargin < 2)
@@ -419,13 +428,15 @@
 }
 
 template MArray<double>
-conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape);
+conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&,
+       Shape);
 
 template MArray<double>
-conv2 (MArray<double>&, MArray<double>&, Shape);
+conv2 (const MArray<double>&, const MArray<double>&, Shape);
 
 template MArray<Complex>
-conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape);
+conv2 (const MArray<Complex>&, const MArray<Complex>&,
+       const MArray<Complex>&, Shape);
 
 template MArray<Complex>
-conv2 (MArray<Complex>&, MArray<Complex>&, Shape);
+conv2 (const MArray<Complex>&, const MArray<Complex>&, Shape);