Mercurial > forge
view extra/NaN/src/sumskipnan_mex.cpp @ 12691:6d6285a2a633 octave-forge
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
author | schloegl |
---|---|
date | Sat, 12 Sep 2015 14:16:39 +0000 |
parents | f26b1170ea90 |
children | 13815b367946 |
line wrap: on
line source
//------------------------------------------------------------------- // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. // // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see <http://www.gnu.org/licenses/>. // // // sumskipnan: sums all non-NaN values // usage: // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); // // SUMSKIPNAN uses two techniques to reduce errors: // 1) long double (80bit) instead of 64-bit double is used internally // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). // // Input: // - x data array // - DIM (optional) dimension to sum // - flag (optional) is actually an output argument telling whether some NaN was observed // - W (optional) weight vector to compute weighted sum (default 1) // // Output: // - o (weighted) sum along dimension DIM // - count of valid elements // - sums of squares // // // $Id$ // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- #include <math.h> #include <stdint.h> #include "mex.h" /* math.h has isnan() defined for all sizes of floating point numbers, but c++ assumes isnan(double), causing possible conversions for float and long double */ #define ISNAN(a) (a!=a) inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); //#define NO_FLAG #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { const mwSize *SZ; double* LInput; double* LOutputSum; double* LOutputCount; double* LOutputSum2; long double* LongOutputSum = NULL; long double* LongOutputCount = NULL; long double* LongOutputSum2 = NULL; double x; double* W = NULL; // weight vector mwSize DIM = 0; mwSize D1, D2, D3; // NN; // mwSize ND, ND2; // number of dimensions: input, output mwSize ix0, ix1, ix2; // index to input and output mwSize j, l; // running indices mwSize *SZ2; // size of output char flag_isNaN = 0; // check for proper number of input and output arguments if ((PInputCount <= 0) || (PInputCount > 4)) mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); if (POutputCount > 4) mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); // get 1st argument if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) LInput = mxGetPr(PInputs[0]); else mexErrMsgTxt("First argument must be and not sparse REAL/DOUBLE."); // get 2nd argument if (PInputCount > 1) { switch (mxGetNumberOfElements(PInputs[1])) { case 0: x = 0.0; // accept empty element break; case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); break; default:x = -1.0; // invalid } if ((x < 0) || (x > 65535) || (x != floor(x))) mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); DIM = (unsigned)floor(x); } // get size ND = mxGetNumberOfDimensions(PInputs[0]); // NN = mxGetNumberOfElements(PInputs[0]); SZ = mxGetDimensions(PInputs[0]); // if DIM==0 (undefined), look for first dimension with more than 1 element. for (j = 0; (DIM < 1) && (j < ND); j++) if (SZ[j]>1) DIM = j+1; if (DIM < 1) DIM=1; // in case DIM is still undefined ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size for (j=0; j<ND; j++) // copy size of input; SZ2[j] = SZ[j]; for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 SZ2[j] = 1; for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; // get weight vector for weighted sumskipnan if (PInputCount > 3) { if (!mxGetNumberOfElements(PInputs[3])) ; // empty weight vector - no weighting else if (mxGetNumberOfElements(PInputs[3])==D2) W = mxGetPr(PInputs[3]); else mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); } int ACC_LEVEL = 0; { mxArray *LEVEL = NULL; int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); if (!s) { ACC_LEVEL = (int) mxGetScalar(LEVEL); if ((D1>1) && (ACC_LEVEL>2)) mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); } mxDestroyArray(LEVEL); } // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); // create outputs #define TYP mxDOUBLE_CLASS POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum = mxGetPr(POutput[0]); if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); if (POutputCount >= 2) { POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputCount = mxGetPr(POutput[1]); if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); } if (POutputCount >= 3) { POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum2 = mxGetPr(POutput[2]); if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); } mxFree(SZ2); if (!D1 || !D2 || !D3) // zero size array ; // do nothing else if (D1==1) { if (ACC_LEVEL<1) { // double accuray, naive summation, error = N*2^-52 switch (POutputCount) { case 0: case 1: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { double count; __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); } break; case 2: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); } break; case 3: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); } break; } } else if (ACC_LEVEL==1) { // extended accuray, naive summation, error = N*2^-64 switch (POutputCount) { case 0: case 1: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { double count; __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); } break; case 2: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); } break; case 3: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); } break; } } else if (ACC_LEVEL==3) { // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 switch (POutputCount) { case 0: case 1: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { double count; __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); } break; case 2: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); } break; case 3: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); } break; } } else if (ACC_LEVEL==2) { // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 switch (POutputCount) { case 0: case 1: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { double count; __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); } break; case 2: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); } break; case 3: #pragma omp parallel for schedule(dynamic) for (l = 0; l<D3; l++) { __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); } break; } } } else if (POutputCount <= 1) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; // index for output ix1 = ix0*D2; // index for input for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { long double x = *LInput; if (!ISNAN(x)) { LongOutputSum[ix2] += W[j]*x; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); else do { long double x = *LInput; if (!ISNAN(x)) { LongOutputSum[ix2] += x; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); } // end for (j= /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; } } } else if (POutputCount == 2) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; ix1 = ix0*D2; // index for input for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { long double x = *LInput; if (!ISNAN(x)) { LongOutputCount[ix2] += W[j]; LongOutputSum[ix2] += W[j]*x; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); else do { long double x = *LInput; if (!ISNAN(x)) { LongOutputCount[ix2] += 1.0; LongOutputSum[ix2] += x; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); } // end for (j= /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; } // end else } } else if (POutputCount == 3) { // OUTER LOOP: along dimensions > DIM for (l = 0; l<D3; l++) { ix0 = l*D1; ix1 = ix0*D2; // index for input for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { long double x = *LInput; if (!ISNAN(x)) { LongOutputCount[ix2] += W[j]; long double t = W[j]*x; LongOutputSum[ix2] += t; LongOutputSum2[ix2] += x*t; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); else do { long double x = *LInput; if (!ISNAN(x)) { LongOutputCount[ix2] += 1.0; LongOutputSum[ix2] += x; LongOutputSum2[ix2] += x*x; } #ifndef NO_FLAG else flag_isNaN = 1; #endif LInput++; ix2++; } while (ix2 != (l+1)*D1); } // end for (j= /* copy to output */ for (j=0; j<D1; j++) { LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; LOutputSum2[ix0+j] = (typeof(*LOutputSum2))LongOutputSum2[ix0+j]; } } } if (LongOutputSum) mxFree(LongOutputSum); if (LongOutputCount) mxFree(LongOutputCount); if (LongOutputSum2) mxFree(LongOutputSum2); #ifndef NO_FLAG //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); /* this is a hack, the third input argument is used to return whether a NaN occured or not. this requires that the input argument is a non-empty variable */ if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { // set FLAG_NANS_OCCURED switch (mxGetClassID(PInputs[2])) { case mxLOGICAL_CLASS: case mxCHAR_CLASS: case mxINT8_CLASS: case mxUINT8_CLASS: *(uint8_t*)mxGetData(PInputs[2]) = 1; break; case mxDOUBLE_CLASS: *(double*)mxGetData(PInputs[2]) = 1.0; break; case mxSINGLE_CLASS: *(float*)mxGetData(PInputs[2]) = 1.0; break; case mxINT16_CLASS: case mxUINT16_CLASS: *(uint16_t*)mxGetData(PInputs[2]) = 1; break; case mxINT32_CLASS: case mxUINT32_CLASS: *(uint32_t*)mxGetData(PInputs[2])= 1; break; case mxINT64_CLASS: case mxUINT64_CLASS: *(uint64_t*)mxGetData(PInputs[2]) = 1; break; case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxCELL_CLASS: case mxSTRUCT_CLASS: default: mexPrintf("Type of 3rd input argument not supported."); } } #endif } #define stride 1 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { long double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; do { long double x = *data; if (!ISNAN(x)) { count += *W; sum += *W*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; do { long double x = *data; if (!ISNAN(x)) { countI++; sum += x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; } inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { long double sum=0; long double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; do { long double x = *data; if (!ISNAN(x)) { count += *W; long double t = *W*x; sum += t; msq += x*t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; do { long double x = *data; if (!ISNAN(x)) { countI++; sum += x; msq += x*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; *s2 = (typeof(*s2))msq; } inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; do { double x = *data; if (!ISNAN(x)) { count += *W; sum += *W*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; do { double x = *data; if (!ISNAN(x)) { countI++; sum += x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; } inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { double sum=0; double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; do { double x = *data; if (!ISNAN(x)) { count += *W; double t = *W*x; sum += t; msq += x*t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; do { double x = *data; if (!ISNAN(x)) { countI++; sum += x; msq += x*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; *s2 = (typeof(*s2))msq; } /*************************************** using Kahan's summation formula [1] this gives more accurate results while the computational effort within the loop is about 4x as high First tests show a penalty of about 40% in terms of computational time. [1] David Goldberg, What Every Computer Scientist Should Know About Floating-Point Arithmetic ACM Computing Surveys, Vol 23, No 1, March 1991. ****************************************/ inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { long double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; long double rc=0.0, rn=0.0; do { long double x = *data; long double t,y; if (!ISNAN(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; long double rc=0.0; do { long double x = *data; long double t,y; if (!ISNAN(x)) { countI++; // sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; } inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { long double sum=0; long double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; long double rc=0.0, rn=0.0, rq=0.0; do { long double x = *data; long double t,y; if (!ISNAN(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; long double w = *W*x; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*w; y = w*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; long double rc=0.0, rq=0.0; do { long double x = *data; long double t,y; if (!ISNAN(x)) { countI++; //sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*x; y = x*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; *s2 = (typeof(*s))msq; } inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; double rc=0.0, rn=0.0; do { double x = *data; double t,y; if (!ISNAN(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; double rc=0.0; do { double x = *data; double t,y; if (!ISNAN(x)) { countI++; // sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; } inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { double sum=0; double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; double rc=0.0, rn=0.0, rq=0.0; do { double x = *data; double t,y; if (!ISNAN(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; double w = *W*x; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*w; y = w*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = (typeof(*No))count; } else { // w/o weight vector size_t countI = 0; double rc=0.0, rq=0.0; do { double x = *data; double t,y; if (!ISNAN(x)) { countI++; //sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*x; y = x*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (typeof(*No))countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = (typeof(*s))sum; *s2 = (typeof(*s))msq; }