Mercurial > forge
changeset 6100:329b9cde3604 octave-forge
any combination of naive and Kahan summation with double and extended can be controlled through flag_accuracy_level
author | schloegl |
---|---|
date | Wed, 30 Sep 2009 22:18:27 +0000 |
parents | 7e8f7a4e9b45 |
children | 34f64c54a7d0 |
files | extra/NaN/inst/flag_accuracy_level.m extra/NaN/src/covm_mex.cpp extra/NaN/src/sumskipnan_mex.cpp |
diffstat | 3 files changed, 745 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/NaN/inst/flag_accuracy_level.m Wed Sep 30 22:18:27 2009 +0000 @@ -0,0 +1,67 @@ +function FLAG = flag_accuracy_level(i) +% FLAG_ACCURACY_LEVEL sets and gets accuracy level +% used in SUMSKIPNAN_MEX and COVM_MEX +% The error margin of the naive summation is N*eps (N is the number of samples), +% the error margin is only 2*eps if Kahan's summation is used [1]. +% +% 0: maximum speed and minimum accuracy (error = N*2^-52) +% of double (64bit float) without Kahan summation +% 1: {default] accuracy of extend double (80bit float) +% without Kahan summation (error = N*2^-64) +% 2: minimum speed and minimum accuracy (error = 2^-64) +% of double with Kahan summation +% 3: accuracy (error = 2^-52) +% of double (64bit float) with Kahan summation +% +% First tests suggest that 1 is a good solution +% +% FLAG = flag_accuracy_level() +% gets current level +% +% flag_accuracy_level(FLAG) % sets mode +% +% This function is experimental and might disappear without further notice, +% so donot rely on it. +% +% Reference: +% [1] David Goldberg, +% What Every Computer Scientist Should Know About Floating-Point Arithmetic +% ACM Computing Surveys, Vol 23, No 1, March 1991. + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +% $Id$ +% Copyright (C) 2009 by Alois Schloegl <a.schloegl@ieee.org> +% This function is part of the NaN-toolbox +% http://hci.tu-graz.ac.at/~schloegl/matlab/NaN/ + + +persistent FLAG_ACCURACY_LEVEL; + +%% if strcmp(version,'3.6'), FLAG_ACCURACY_LEVEL=1; end; %% hack for the use with Freemat3.6 + +%%% set DEFAULT value of FLAG +if isempty(FLAG_ACCURACY_LEVEL), + FLAG_ACCURACY_LEVEL = 1; +end; + +if nargin>0, + if (i>3) i=3; end; + if (i<0) i=0; end; + FLAG_ACCURACY_LEVEL = double(i); +end; +FLAG = FLAG_ACCURACY_LEVEL; +
--- a/extra/NaN/src/covm_mex.cpp Wed Sep 30 19:09:46 2009 +0000 +++ b/extra/NaN/src/covm_mex.cpp Wed Sep 30 22:18:27 2009 +0000 @@ -109,18 +109,18 @@ W = mxGetPr(PInputs[3]); else mexErrMsgTxt("number of elements in W must match numbers of rows in X"); - } - if ((PInputCount > 4) && mxGetChars(PInputs[4])) { - switch (*mxGetChars(PInputs[4])) { - case 'S': - case 's': - flag_speed = 1; - break; - } - } - //mexPrintf("Flag Speed=%i\n",flag_speed); + int ACC_LEVEL = 1; + { + mxArray *LEVEL = NULL; + int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); + if (!s) { + ACC_LEVEL = (int) mxGetScalar(LEVEL); + } + mxDestroyArray(LEVEL); + } + mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); if (Y0==NULL) { Y0 = X0; @@ -202,7 +202,112 @@ } #else - if (flag_speed) { + if (ACC_LEVEL == 0) { + /*------ version 2 --------------------- + this version seems to be faster than the one above. + it is also faster but less accurate than the version below + */ + if ( (X0 != Y0) || (cX != cY) ) + /******** X!=Y, output is not symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double nn=0.0; + for (k=0; k<rX; k++) { + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + cc += z*W[k]; + nn += W[k]; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = nn; + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + cc += z; + nn++; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = (double)nn; + } + else // if (X0==Y0) && (cX==cY) + /******** X==Y, output is symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double nn=0.0; + for (k=0; k<rX; k++) { + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + cc += z*W[k]; + nn += W[k]; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = nn; + NN[j+i*cX] = nn; + } + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + cc += z; + nn++; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = (double)nn; + NN[j+i*cX] = (double)nn; + } + } + + } + else if (ACC_LEVEL == 1) { /*------ version 2 --------------------- this version seems to be faster than the one above. it is also faster but less accurate than the version below @@ -307,7 +412,7 @@ } } - else { + else if (ACC_LEVEL == 2) { /*------ version 3 --------------------- using Kahan's summation formula [1] this gives more accurate results while the computational effort within the loop is about 4x as high @@ -454,6 +559,154 @@ } } } + else if (ACC_LEVEL == 3) { + /*------ version 3 --------------------- + using Kahan's summation formula [1] + this gives more accurate results while the computational effort within the loop is about 4x as high + However, first test show an increase in computational time of only about 25 %. + + [1] David Goldberg, + What Every Computer Scientist Should Know About Floating-Point Arithmetic + ACM Computing Surveys, Vol 23, No 1, March 1991 + */ + if ( (X0 != Y0) || (cX != cY) ) + /******** X!=Y, output is not symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double nn=0.0; + double rc=0.0; + double rn=0.0; + for (k=0; k<rX; k++) { + double t,y; + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z*W[k]; [1] + y = z*W[k]-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + // nn += W[k]; [1] + y = z*W[k]-rn; + t = nn+y; + rn= (t-nn)-y; + nn= t; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = nn; + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=0; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double rc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + double t,y; + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z; [1] + y = z-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + nn++; + } + CC[i+j*cX] = cc; + if (NN != NULL) + NN[i+j*cX] = (double)nn; + } + else // if (X0==Y0) && (cX==cY) + /******** X==Y, output is symetric *******/ + if (W) /* weighted version */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double nn=0.0; + double rc=0.0; + double rn=0.0; + for (k=0; k<rX; k++) { + double t,y; + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z*W[k]; [1] + y = z*W[k]-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + // nn += W[k]; [1] + y = z*W[k]-rn; + t = nn+y; + rn= (t-nn)-y; + nn= t; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = nn; + NN[j+i*cX] = nn; + } + } + else /* no weights, all weights are 1 */ + for (i=0; i<cX; i++) + for (j=i; j<cY; j++) { + X = X0+i*rX; + Y = Y0+j*rY; + double cc=0.0; + double rc=0.0; + size_t nn=0; + for (k=0; k<rX; k++) { + double t,y; + double z = X[k]*Y[k]; + if (isnan(z)) { +#ifndef NO_FLAG + flag_isNaN = 1; +#endif + continue; + } + // cc += z; [1] + y = z-rc; + t = cc+y; + rc= (t-cc)-y; + cc= t; + + nn++; + } + CC[i+j*cX] = cc; + CC[j+i*cX] = cc; + if (NN != NULL) { + NN[i+j*cX] = (double)nn; + NN[j+i*cX] = (double)nn; + } + } + } + #ifndef NO_FLAG //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");
--- a/extra/NaN/src/sumskipnan_mex.cpp Wed Sep 30 19:09:46 2009 +0000 +++ b/extra/NaN/src/sumskipnan_mex.cpp Wed Sep 30 22:18:27 2009 +0000 @@ -53,10 +53,14 @@ #include <math.h> #include "mex.h" +inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); //#define NO_FLAG @@ -149,7 +153,20 @@ W = mxGetPr(PInputs[3]); else mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); - } + } + + int ACC_LEVEL = 1; + { + mxArray *LEVEL = NULL; + int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); + if (!s) { + ACC_LEVEL = (int) mxGetScalar(LEVEL); + if ((D1>1) && (ACC_LEVEL>2)) + mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); + } + mxDestroyArray(LEVEL); + } + mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); // create outputs #define TYP mxDOUBLE_CLASS @@ -172,23 +189,101 @@ if (D1*D2*D3<1) // zero size array ; // do nothing - else if ((POutputCount <= 1) && !mxIsComplex(PInputs[0])) { + else if ((D1==1) && (ACC_LEVEL<1)) { + // extended accuray, naive summation, error = N*2^-64 + switch (POutputCount) { + case 1: + for (l = 0; l<D3; l++) { + double count; + __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); + } + break; + case 2: + for (l = 0; l<D3; l++) { + __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); + } + break; + case 3: + for (l = 0; l<D3; l++) { + __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); + } + break; + } + } + else if ((D1==1) && (ACC_LEVEL==1)) { + // extended accuray, naive summation, error = N*2^-64 + switch (POutputCount) { + case 1: + for (l = 0; l<D3; l++) { + double count; + __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); + } + break; + case 2: + for (l = 0; l<D3; l++) { + __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); + } + break; + case 3: + for (l = 0; l<D3; l++) { + __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); + } + break; + } + } + else if ((D1==1) && (ACC_LEVEL==2)) { + // ACC_LEVEL==2: extended accuracy and Kahan Summation, error = 2^-64 + switch (POutputCount) { + case 1: + for (l = 0; l<D3; l++) { + double count; + __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); + } + break; + case 2: + for (l = 0; l<D3; l++) { + __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); + } + break; + case 3: + for (l = 0; l<D3; l++) { + __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); + } + break; + } + } + else if ((D1==1) && (ACC_LEVEL==3)) { + // ACC_LEVEL==2: extended accuracy and Kahan Summation, error = 2^-64 + switch (POutputCount) { + case 1: + for (l = 0; l<D3; l++) { + double count; + __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); + } + break; + case 2: + for (l = 0; l<D3; l++) { + __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); + } + break; + case 3: + for (l = 0; l<D3; l++) { + __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); + } + break; + } + } + else if (POutputCount <= 1) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; // index for output ix1 = ix0*D2; // index for input - if (D1==1) - { - double count; - __sumskipnan2we__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W); - } - else { - for (j=0; j<D2; j++) { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputSum[ix2] += W[j]*x; } @@ -200,7 +295,7 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputSum[ix2] += x; } @@ -211,32 +306,26 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } // end for (j= + } // end for (j= - /* copy to output */ - for (j=0; j<D1; j++) { - LOutputSum[ix0+j] = LongOutputSum[ix0+j]; - } - } // end else + /* copy to output */ + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + } } } - else if ((POutputCount == 2) && !mxIsComplex(PInputs[0])) { + else if (POutputCount == 2) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; ix1 = ix0*D2; // index for input - if (D1==1) - { - __sumskipnan2we__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W); - } - else { - for (j=0; j<D2; j++) { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputCount[ix2] += W[j]; LongOutputSum[ix2] += W[j]*x; @@ -249,7 +338,7 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputCount[ix2] += 1.0; LongOutputSum[ix2] += x; @@ -261,33 +350,27 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } // end for (j= + } // end for (j= /* copy to output */ - for (j=0; j<D1; j++) { - LOutputSum[ix0+j] = LongOutputSum[ix0+j]; - LOutputCount[ix0+j] = LongOutputCount[ix0+j]; - } - } // end else + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + LOutputCount[ix0+j] = LongOutputCount[ix0+j]; + } // end else } } - else if ((POutputCount == 3) && !mxIsComplex(PInputs[0])) { + else if (POutputCount == 3) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; ix1 = ix0*D2; // index for input - if (D1==1) - { - __sumskipnan3we__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W); - } - else { - for (j=0; j<D2; j++) { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputCount[ix2] += W[j]; double t = W[j]*x; @@ -302,7 +385,7 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register long double x = *LInput; + long double x = *LInput; if (!isnan(x)) { LongOutputCount[ix2] += 1.0; LongOutputSum[ix2] += x; @@ -315,15 +398,14 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } // end for (j= + } // end for (j= - /* copy to output */ - for (j=0; j<D1; j++) { - LOutputSum[ix0+j] = LongOutputSum[ix0+j]; - LOutputCount[ix0+j] = LongOutputCount[ix0+j]; - LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; - } - } // end else + /* copy to output */ + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + LOutputCount[ix0+j] = LongOutputCount[ix0+j]; + LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; + } } } if (LongOutputSum) mxFree(LongOutputSum); @@ -385,7 +467,117 @@ // with weight vector long double count = 0.0; do { - register double x = *data; + long double x = *data; + if (!isnan(x)) + { + count += *W; + sum += *W*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + do { + long double x = *data; + if (!isnan(x)) + { + countI++; + sum += x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + +} + + +inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + long double msq=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + do { + long double x = *data; + if (!isnan(x)) { + count += *W; + long double t = *W*x; + sum += t; + msq += x*t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + do { + long double x = *data; + if (!isnan(x)) { + countI++; + sum += x; + msq += x*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + *s2 = msq; +} + +inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + do { + double x = *data; if (!isnan(x)) { count += *W; @@ -429,17 +621,17 @@ } -inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { - long double sum=0; - long double msq=0; + double sum=0; + double msq=0; char flag=0; // LOOP along dimension DIM void *end = data + stride*Ni; if (W) { // with weight vector - long double count = 0.0; + double count = 0.0; do { double x = *data; if (!isnan(x)) { @@ -486,6 +678,15 @@ +/*************************************** + using Kahan's summation formula [1] + this gives more accurate results while the computational effort within the loop is about 4x as high + First tests show a penalty of about 40% in terms of computational time. + + [1] David Goldberg, + What Every Computer Scientist Should Know About Floating-Point Arithmetic + ACM Computing Surveys, Vol 23, No 1, March 1991. + ****************************************/ inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { @@ -499,7 +700,7 @@ long double count = 0.0; long double rc=0.0, rn=0.0; do { - register double x = *data; + long double x = *data; long double t,y; if (!isnan(x)) { @@ -530,7 +731,7 @@ size_t countI = 0; long double rc=0.0; do { - double x = *data; + long double x = *data; long double t,y; if (!isnan(x)) { @@ -559,17 +760,6 @@ } - -/*************************************** - using Kahan's summation formula [1] - this gives more accurate results while the computational effort within the loop is about 4x as high - First tests show a penalty of about 40% in terms of computational time. - - [1] David Goldberg, - What Every Computer Scientist Should Know About Floating-Point Arithmetic - ACM Computing Surveys, Vol 23, No 1, March 1991. - ****************************************/ - inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { long double sum=0; @@ -583,7 +773,7 @@ long double count = 0.0; long double rc=0.0, rn=0.0, rq=0.0; do { - double x = *data; + long double x = *data; long double t,y; if (!isnan(x)) { //count += *W; [1] @@ -619,7 +809,7 @@ size_t countI = 0; long double rc=0.0, rq=0.0; do { - double x = *data; + long double x = *data; long double t,y; if (!isnan(x)) { countI++; @@ -652,3 +842,157 @@ *s2 = msq; } +inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + double rc=0.0, rn=0.0; + do { + double x = *data; + double t,y; + if (!isnan(x)) + { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + double rc=0.0; + do { + double x = *data; + double t,y; + if (!isnan(x)) + { + countI++; + // sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + +} + + +inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + double msq=0; + char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + double rc=0.0, rn=0.0, rq=0.0; + do { + double x = *data; + double t,y; + if (!isnan(x)) { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + double w = *W*x; + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*w; + y = w*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + double rc=0.0, rq=0.0; + do { + double x = *data; + double t,y; + if (!isnan(x)) { + countI++; + //sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*x; + y = x*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (double)countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + *s2 = msq; +} +