Mercurial > forge
changeset 6092:bb65ad321074 octave-forge
increase accuracy by using Kahan's summation formula - this reduces the error by a factor of N; first tests show it slows down by about 25%
author | schloegl |
---|---|
date | Tue, 29 Sep 2009 15:29:06 +0000 |
parents | 96ec53af7c21 |
children | bbd86b2a9f9c |
files | extra/NaN/src/covm_mex.cpp |
diffstat | 1 files changed, 164 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/src/covm_mex.cpp Tue Sep 29 13:22:38 2009 +0000 +++ b/extra/NaN/src/covm_mex.cpp Tue Sep 29 15:29:06 2009 +0000 @@ -53,14 +53,14 @@ size_t rX,cX,rY,cY,nW = 0; size_t i,j,k; // running indices - char flag_isNaN = 0; + char flag_isNaN = 0, flag_speed=0; /*********** check input arguments *****************/ // check for proper number of input and output arguments - if ((PInputCount <= 0) || (PInputCount > 4)) { - mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W]]])\n\n"); + if ((PInputCount <= 0) || (PInputCount > 5)) { + mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); /* mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); @@ -111,6 +111,16 @@ mexErrMsgTxt("number of elements in W must match numbers of rows in X"); } + + if ((PInputCount > 4) && mxGetChars(PInputs[4])) { + switch (*mxGetChars(PInputs[4])) { + case 'S': + case 's': + flag_speed = 1; + break; + } + } + //mexPrintf("Flag Speed=%i\n",flag_speed); if (Y0==NULL) { Y0 = X0; @@ -191,9 +201,11 @@ } } -#else +#else + if (flag_speed) { /*------ version 2 --------------------- this version seems to be faster than the one above. + it is also faster but less accurate than the version below */ if ( (X0 != Y0) || (cX != cY) ) /******** X!=Y, output is not symetric *******/ @@ -294,6 +306,154 @@ } } + } + else { + /*------ version 3 --------------------- + using Kahan's summation formula [1] + this gives more accurate results while the computational effort within the loop is about 4x as high + However, first test show an increase in computational time of only about 25 %. + + [1] David Goldberg, + What Every Computer Scientist Should Know About Floating-Point Arithmetic + ACM Computing Surveys, Vol 23, No 1, March 1991 + */ + if ( (X0 != Y0) || (cX != cY) ) + /******** X!=Y, output is not symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + long double cc=0.0; + long double nn=0.0; + long double rc=0.0; + long double rn=0.0; + for (k=0; k<rX; k++) { + long double t,y; + long double z = ((long double)X[k])*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z*W[k]; [1] + y = z*W[k]-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + // nn += W[k]; [1] + y = z*W[k]-rn; + t = nn+y; + rn= (t-nn)-y; + nn= t; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = nn; + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + long double cc=0.0; + long double rc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + long double t,y; + long double z = ((long double)X[k])*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z; [1] + y = z-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + nn++; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = (double)nn; + } + else // if (X0==Y0) && (cX==cY) + /******** X==Y, output is symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + long double cc=0.0; + long double nn=0.0; + long double rc=0.0; + long double rn=0.0; + for (k=0; k<rX; k++) { + long double t,y; + long double z = ((long double)X[k])*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z*W[k]; [1] + y = z*W[k]-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + // nn += W[k]; [1] + y = z*W[k]-rn; + t = nn+y; + rn= (t-nn)-y; + nn= t; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = nn; + NN[j+i*cX] = nn; + } + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + long double cc=0.0; + long double rc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + long double t,y; + long double z = ((long double)X[k])*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z; [1] + y = z-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + nn++; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = (double)nn; + NN[j+i*cX] = (double)nn; + } + } + } #ifndef NO_FLAG //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");