Mercurial > forge
changeset 6079:21b5caea63bf octave-forge
extended accuracy for stride >1
author | schloegl |
---|---|
date | Thu, 24 Sep 2009 16:09:34 +0000 |
parents | 217cced3976c |
children | ec451e449573 |
files | extra/NaN/src/sumskipnan_mex.cpp |
diffstat | 1 files changed, 58 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/src/sumskipnan_mex.cpp Wed Sep 23 12:05:18 2009 +0000 +++ b/extra/NaN/src/sumskipnan_mex.cpp Thu Sep 24 16:09:34 2009 +0000 @@ -41,6 +41,9 @@ // //------------------------------------------------------------------- + + + #include <inttypes.h> #include <math.h> #include "mex.h" @@ -65,6 +68,9 @@ double* LOutputSum; double* LOutputCount; double* LOutputSum2; + long double* LongOutputSum = NULL; + long double* LongOutputCount = NULL; + long double* LongOutputSum2 = NULL; double x; double* W = NULL; // weight vector @@ -89,7 +95,7 @@ mexErrMsgTxt("First argument must be REAL/DOUBLE."); // get 2nd argument - if (PInputCount > 1){ + if (PInputCount > 1) { switch (mxGetNumberOfElements(PInputs[1])) { case 0: x = 0.0; // accept empty element break; @@ -144,14 +150,17 @@ POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum = mxGetPr(POutput[0]); + if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); if (POutputCount >= 2) { POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputCount = mxGetPr(POutput[1]); + if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); } if (POutputCount >= 3) { POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum2 = mxGetPr(POutput[2]); + if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); } mxFree(SZ2); @@ -167,14 +176,15 @@ double count; __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W); } - else for (j=0; j<D2; j++) { + else { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputSum[ix2] += W[j]*x; + LongOutputSum[ix2] += W[j]*x; } #ifndef NO_FLAG else @@ -184,9 +194,9 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputSum[ix2] += x; + LongOutputSum[ix2] += x; } #ifndef NO_FLAG else @@ -195,7 +205,12 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } + } // end for (j= + + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + } + } // end else } } @@ -208,15 +223,16 @@ { __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W); } - else for (j=0; j<D2; j++) { + else { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputCount[ix2] += W[j]; - LOutputSum[ix2] += W[j]*x; + LongOutputCount[ix2] += W[j]; + LongOutputSum[ix2] += W[j]*x; } #ifndef NO_FLAG else @@ -226,10 +242,10 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputCount[ix2] += 1.0; - LOutputSum[ix2] += x; + LongOutputCount[ix2] += 1.0; + LongOutputSum[ix2] += x; } #ifndef NO_FLAG else @@ -238,7 +254,13 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } + } // end for (j= + + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + LOutputCount[ix0+j] = LongOutputCount[ix0+j]; + } + } // end else } } @@ -251,17 +273,18 @@ { __sumskipnan3w__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W); } - else for (j=0; j<D2; j++) { + else { + for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM if (W) do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputCount[ix2] += W[j]; + LongOutputCount[ix2] += W[j]; double t = W[j]*x; - LOutputSum[ix2] += t; - LOutputSum2[ix2] += x*t; + LongOutputSum[ix2] += t; + LongOutputSum2[ix2] += x*t; } #ifndef NO_FLAG else @@ -271,11 +294,11 @@ ix2++; } while (ix2 != (l+1)*D1); else do { - register double x = *LInput; + register long double x = *LInput; if (!isnan(x)) { - LOutputCount[ix2] += 1.0; - LOutputSum[ix2] += x; - LOutputSum2[ix2] += x*x; + LongOutputCount[ix2] += 1.0; + LongOutputSum[ix2] += x; + LongOutputSum2[ix2] += x*x; } #ifndef NO_FLAG else @@ -284,9 +307,19 @@ LInput++; ix2++; } while (ix2 != (l+1)*D1); - } + } // end for (j= + + for (j=0; j<D1; j++) { + LOutputSum[ix0+j] = LongOutputSum[ix0+j]; + LOutputCount[ix0+j] = LongOutputCount[ix0+j]; + LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; + } + } // end else } } + if (LongOutputSum) mxFree(LongOutputSum); + if (LongOutputCount) mxFree(LongOutputCount); + if (LongOutputSum2) mxFree(LongOutputSum2); #ifndef NO_FLAG //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");