# HG changeset patch # User John W. Eaton # Date 1267305338 18000 # Node ID d2849dbcc8588e199f54a5946415447ba25bca71 # Parent 173e10268080aaf634dbbf72512b3ed5af217624 undo unintended changes to conv2 in previous commit diff -r 173e10268080 -r d2849dbcc858 src/DLD-FUNCTIONS/conv2.cc --- a/src/DLD-FUNCTIONS/conv2.cc Sat Feb 27 16:05:16 2010 -0500 +++ b/src/DLD-FUNCTIONS/conv2.cc Sat Feb 27 16:15:38 2010 -0500 @@ -30,32 +30,30 @@ #include "oct-obj.h" #include "utils.h" +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + enum Shape { SHAPE_FULL, SHAPE_SAME, SHAPE_VALID }; #if !defined (CXX_NEW_FRIEND_TEMPLATE_DECL) extern MArray -conv2 (const MArray&, const MArray&, const MArray&, - Shape); +conv2 (MArray&, MArray&, MArray&, Shape); extern MArray -conv2 (const MArray&, const MArray&, - const MArray&, Shape); +conv2 (MArray&, MArray&, MArray&, Shape); extern MArray -conv2 (const MArray&, const MArray&, const MArray&, - Shape); +conv2 (MArray&, MArray&, MArray&, Shape); extern MArray -conv2 (const MArray&, const MArray&, - const MArray&, Shape); +conv2 (MArray&, MArray&, MArray&, Shape); #endif template MArray -conv2 (const MArray& R, const MArray& C, const MArray& A, Shape ishape) +conv2 (MArray& R, MArray& C, MArray& A, Shape ishape) { - octave_idx_type Rn = R.length (); - octave_idx_type Cm = C.length (); + octave_idx_type Rn = R.length (); + octave_idx_type Cm = C.length (); octave_idx_type Am = A.rows (); octave_idx_type An = A.columns (); @@ -115,8 +113,8 @@ { T sum = 0; - octave_idx_type ci = Cm - 1 - std::max (0, edgM-oi); - octave_idx_type ai = std::max (0, oi-edgM); + octave_idx_type ci = Cm - 1 - MAX(0, edgM-oi); + octave_idx_type ai = MAX(0, oi-edgM); const T* Ad = A.data() + ai + Am*oj; const T* Cd = C.data() + ci; for ( ; ci >= 0 && ai < Am; ci--, Cd--, ai++, Ad++) @@ -129,15 +127,15 @@ { T sum = 0; - octave_idx_type rj = Rn - 1 - std::max (0, edgN-oj); - octave_idx_type aj = std::max (0, oj-edgN); + octave_idx_type rj = Rn - 1 - MAX(0, edgN-oj); + octave_idx_type aj = MAX(0, oj-edgN) ; const T* Xd = X.data() + aj; const T* Rd = R.data() + rj; for ( ; rj >= 0 && aj < An; rj--, Rd--, aj++, Xd++) sum += (*Xd) * (*Rd); - O(oi,oj) = sum; + O(oi,oj)= sum; } } @@ -160,7 +158,7 @@ template MArray -conv2 (const MArray& A, const MArray& B, Shape ishape) +conv2 (MArray&A, MArray&B, Shape ishape) { // Convolution works fastest if we choose the A matrix to be // the largest. @@ -212,36 +210,29 @@ MArray O (outM, outN); - T *Od = O.fortran_vec (); - - for (octave_idx_type oj = 0; oj < outN; oj++) + for (octave_idx_type oi = 0; oi < outM; oi++) { - octave_idx_type aj0 = std::max (0, oj-edgN); - octave_idx_type bj0 = Bn - 1 - std::max (0, edgN-oj); - - for (octave_idx_type oi = 0; oi < outM; oi++) + for (octave_idx_type oj = 0; oj < outN; oj++) { T sum = 0; - octave_idx_type bi0 = Bm - 1 - std::max (0, edgM-oi); - octave_idx_type ai0 = std::max (0, oi-edgM); - - for (octave_idx_type aj = aj0, bj = bj0; bj >= 0 && aj < An; - bj--, aj++) + for (octave_idx_type bj = Bn - 1 - MAX (0, edgN-oj), aj= MAX (0, oj-edgN); + bj >= 0 && aj < An; bj--, aj++) { - const T* Ad = A.data () + ai0 + Am*aj; - const T* Bd = B.data () + bi0 + Bm*bj; + octave_idx_type bi = Bm - 1 - MAX (0, edgM-oi); + octave_idx_type ai = MAX (0, oi-edgM); + const T* Ad = A.data () + ai + Am*aj; + const T* Bd = B.data () + bi + Bm*bj; - for (octave_idx_type ai = ai0, bi = bi0; bi >= 0 && ai < Am; - bi--, ai++) + for ( ; bi >= 0 && ai < Am; bi--, Bd--, ai++, Ad++) { - sum += (*Ad++) * (*Bd--); + sum += (*Ad) * (*Bd); // Comment: it seems to be 2.5 x faster than this: // sum+= A(ai,aj) * B(bi,bj); } } - *Od++ = sum; + O(oi,oj) = sum; } } @@ -267,11 +258,11 @@ of @var{c} is given by\n\ \n\ @table @asis\n\ -@item @var{shape} = 'full'\n\ +@item @var{shape}= 'full'\n\ returns full 2-D convolution\n\ -@item @var{shape} = 'same'\n\ +@item @var{shape}= 'same'\n\ same size as a. 'central' part of convolution\n\ -@item @var{shape} = 'valid'\n\ +@item @var{shape}= 'valid'\n\ only parts which do not include zero-padded edges\n\ @end table\n\ \n\ @@ -283,8 +274,8 @@ octave_value retval; octave_value tmp; int nargin = args.length (); - std::string shape = "full"; //default - bool separable = false; + std::string shape= "full"; //default + bool separable= false; Shape ishape; if (nargin < 2) @@ -428,15 +419,13 @@ } template MArray -conv2 (const MArray&, const MArray&, const MArray&, - Shape); +conv2 (MArray&, MArray&, MArray&, Shape); template MArray -conv2 (const MArray&, const MArray&, Shape); +conv2 (MArray&, MArray&, Shape); template MArray -conv2 (const MArray&, const MArray&, - const MArray&, Shape); +conv2 (MArray&, MArray&, MArray&, Shape); template MArray -conv2 (const MArray&, const MArray&, Shape); +conv2 (MArray&, MArray&, Shape);