Mercurial > forge
changeset 6096:c016a897bc75 octave-forge
increase accuracy by using Kahan's summation formula - this reduces the error by a factor of N; first tests show a penalty of about 40% in terms of speed; currently, this is only implemented for stride=1 (summation along 1st dimension)
author | schloegl |
---|---|
date | Wed, 30 Sep 2009 08:18:02 +0000 |
parents | 5a6a0347cca1 |
children | 7393a66559c6 |
files | extra/NaN/src/sumskipnan_mex.cpp |
diffstat | 1 files changed, 189 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/src/sumskipnan_mex.cpp Tue Sep 29 21:59:09 2009 +0000 +++ b/extra/NaN/src/sumskipnan_mex.cpp Wed Sep 30 08:18:02 2009 +0000 @@ -22,6 +22,11 @@ // usage: // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); // +// SUMSKIPNAN uses two techniques to reduce errors: +// 1) long double (80bit) instead of 64-bit double is used internally +// 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps +// The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). +// // Input: // - x data array // - DIM (optional) dimension to sum @@ -50,10 +55,11 @@ inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); //#define NO_FLAG - #ifdef tmwtypes_h #if (MX_API_VER<0x07020000) typedef int mwSize; @@ -174,7 +180,7 @@ if (D1==1) { double count; - __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W); + __sumskipnan2we__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W); } else { for (j=0; j<D2; j++) { @@ -207,6 +213,7 @@ } while (ix2 != (l+1)*D1); } // end for (j= + /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = LongOutputSum[ix0+j]; } @@ -221,7 +228,7 @@ ix1 = ix0*D2; // index for input if (D1==1) { - __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W); + __sumskipnan2we__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W); } else { for (j=0; j<D2; j++) { @@ -256,6 +263,7 @@ } while (ix2 != (l+1)*D1); } // end for (j= + /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = LongOutputSum[ix0+j]; LOutputCount[ix0+j] = LongOutputCount[ix0+j]; @@ -271,7 +279,7 @@ ix1 = ix0*D2; // index for input if (D1==1) { - __sumskipnan3w__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W); + __sumskipnan3we__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W); } else { for (j=0; j<D2; j++) { @@ -309,6 +317,7 @@ } while (ix2 != (l+1)*D1); } // end for (j= + /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = LongOutputSum[ix0+j]; LOutputCount[ix0+j] = LongOutputCount[ix0+j]; @@ -374,13 +383,13 @@ void *end = data + stride*Ni; if (W) { // with weight vector - register long double count = 0.0; + long double count = 0.0; do { register double x = *data; if (!isnan(x)) { count += *W; - sum += *W*x; + sum += *W*x; } #ifndef NO_FLAG else @@ -394,9 +403,9 @@ *No = count; } else { // w/o weight vector - register size_t countI = 0; + size_t countI = 0; do { - register double x = *data; + double x = *data; if (!isnan(x)) { countI++; @@ -430,9 +439,9 @@ void *end = data + stride*Ni; if (W) { // with weight vector - register long double count = 0.0; + long double count = 0.0; do { - register double x = *data; + double x = *data; if (!isnan(x)) { count += *W; double t = *W*x; @@ -450,9 +459,9 @@ *No = count; } else { // w/o weight vector - register size_t countI = 0; + size_t countI = 0; do { - register double x = *data; + double x = *data; if (!isnan(x)) { countI++; sum += x; @@ -475,3 +484,171 @@ *s2 = msq; } + + + +inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + long double rc=0.0, rn=0.0; + do { + register double x = *data; + long double t,y; + if (!isnan(x)) + { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + long double rc=0.0; + do { + double x = *data; + long double t,y; + if (!isnan(x)) + { + countI++; + // sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + +} + + + +/*************************************** + using Kahan's summation formula [1] + this gives more accurate results while the computational effort within the loop is about 4x as high + First tests show a penalty of about 40% in terms of computational time. + + [1] David Goldberg, + What Every Computer Scientist Should Know About Floating-Point Arithmetic + ACM Computing Surveys, Vol 23, No 1, March 1991. + ****************************************/ + +inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + long double msq=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + long double rc=0.0, rn=0.0, rq=0.0; + do { + double x = *data; + long double t,y; + if (!isnan(x)) { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + long double w = *W*x; + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*w; + y = w*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + long double rc=0.0, rq=0.0; + do { + double x = *data; + long double t,y; + if (!isnan(x)) { + countI++; + //sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*x; + y = x*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + *s2 = msq; +} +