Mercurial > forge
annotate extra/NaN/src/sumskipnan_mex.cpp @ 8232:49c1c23128ea octave-forge
use only single test for (D1==1)
author | schloegl |
---|---|
date | Wed, 27 Jul 2011 22:20:09 +0000 |
parents | 6a419bec96bb |
children | 903fbabbd5fe |
rev | line source |
---|---|
7992 | 1 |
6549 | 2 //------------------------------------------------------------------- |
3 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // sumskipnan: sums all non-NaN values | |
21 // usage: | |
22 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); | |
23 // | |
24 // SUMSKIPNAN uses two techniques to reduce errors: | |
25 // 1) long double (80bit) instead of 64-bit double is used internally | |
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps | |
27 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). | |
28 // | |
29 // Input: | |
30 // - x data array | |
31 // - DIM (optional) dimension to sum | |
32 // - flag (optional) is actually an output argument telling whether some NaN was observed | |
33 // - W (optional) weight vector to compute weighted sum (default 1) | |
34 // | |
35 // Output: | |
36 // - o (weighted) sum along dimension DIM | |
37 // - count of valid elements | |
38 // - sums of squares | |
39 // | |
40 // | |
41 // $Id$ | |
8037 | 42 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 43 // This function is part of the NaN-toolbox |
7889 | 44 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 45 // |
46 //------------------------------------------------------------------- | |
47 | |
48 | |
49 | |
50 | |
51 #include <math.h> | |
7888 | 52 #include <stdint.h> |
6549 | 53 #include "mex.h" |
54 | |
55 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
56 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
57 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
58 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
59 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
60 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
61 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
62 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
63 | |
64 //#define NO_FLAG | |
65 | |
66 #ifdef tmwtypes_h | |
67 #if (MX_API_VER<=0x07020000) | |
68 typedef int mwSize; | |
69 #endif | |
70 #endif | |
71 | |
72 | |
73 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
74 { | |
75 const mwSize *SZ; | |
76 double* LInput; | |
77 double* LOutputSum; | |
78 double* LOutputCount; | |
79 double* LOutputSum2; | |
80 long double* LongOutputSum = NULL; | |
81 long double* LongOutputCount = NULL; | |
82 long double* LongOutputSum2 = NULL; | |
83 double x; | |
84 double* W = NULL; // weight vector | |
85 | |
86 mwSize DIM = 0; | |
87 mwSize D1, D2, D3; // NN; // | |
88 mwSize ND, ND2; // number of dimensions: input, output | |
89 mwSize ix0, ix1, ix2; // index to input and output | |
90 mwSize j, l; // running indices | |
91 mwSize *SZ2; // size of output | |
92 char flag_isNaN = 0; | |
93 | |
94 // check for proper number of input and output arguments | |
95 if ((PInputCount <= 0) || (PInputCount > 4)) | |
96 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); | |
97 if (POutputCount > 4) | |
98 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); | |
99 | |
100 // get 1st argument | |
101 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0])) | |
102 LInput = mxGetPr(PInputs[0]); | |
103 else | |
104 mexErrMsgTxt("First argument must be REAL/DOUBLE."); | |
105 | |
106 // get 2nd argument | |
107 if (PInputCount > 1) { | |
108 switch (mxGetNumberOfElements(PInputs[1])) { | |
109 case 0: x = 0.0; // accept empty element | |
110 break; | |
111 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); | |
112 break; | |
113 default:x = -1.0; // invalid | |
114 } | |
115 if ((x < 0) || (x > 65535) || (x != floor(x))) | |
116 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); | |
117 | |
118 DIM = (unsigned)floor(x); | |
119 } | |
120 | |
121 // get size | |
122 ND = mxGetNumberOfDimensions(PInputs[0]); | |
123 // NN = mxGetNumberOfElements(PInputs[0]); | |
124 SZ = mxGetDimensions(PInputs[0]); | |
125 | |
126 // if DIM==0 (undefined), look for first dimension with more than 1 element. | |
127 for (j = 0; (DIM < 1) && (j < ND); j++) | |
128 if (SZ[j]>1) DIM = j+1; | |
129 | |
130 if (DIM < 1) DIM=1; // in case DIM is still undefined | |
131 | |
132 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output | |
133 | |
134 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size | |
135 | |
136 for (j=0; j<ND; j++) // copy size of input; | |
137 SZ2[j] = SZ[j]; | |
138 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 | |
139 SZ2[j] = 1; | |
140 | |
141 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM | |
142 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM | |
143 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements | |
144 | |
145 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; | |
146 | |
147 // get weight vector for weighted sumskipnan | |
148 if (PInputCount > 3) { | |
149 if (!mxGetNumberOfElements(PInputs[3])) | |
150 ; // empty weight vector - no weighting | |
151 else if (mxGetNumberOfElements(PInputs[3])==D2) | |
152 W = mxGetPr(PInputs[3]); | |
153 else | |
154 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); | |
155 } | |
156 | |
157 int ACC_LEVEL = 0; | |
158 { | |
159 mxArray *LEVEL = NULL; | |
160 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
161 if (!s) { | |
162 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
163 if ((D1>1) && (ACC_LEVEL>2)) | |
164 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); | |
165 } | |
166 mxDestroyArray(LEVEL); | |
167 } | |
168 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
169 | |
170 // create outputs | |
171 #define TYP mxDOUBLE_CLASS | |
172 | |
173 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
174 LOutputSum = mxGetPr(POutput[0]); | |
175 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
176 if (POutputCount >= 2) { | |
177 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
178 LOutputCount = mxGetPr(POutput[1]); | |
179 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
180 } | |
181 if (POutputCount >= 3) { | |
182 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
183 LOutputSum2 = mxGetPr(POutput[2]); | |
184 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
185 } | |
186 mxFree(SZ2); | |
187 | |
7992 | 188 |
8232 | 189 if (!D1 || !D2 || !D3) // zero size array |
6549 | 190 ; // do nothing |
8232 | 191 else if (D1==1) { |
192 if (ACC_LEVEL<1) { | |
6549 | 193 // double accuray, naive summation, error = N*2^-52 |
194 switch (POutputCount) { | |
195 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
196 #pragma omp parallel for schedule(dynamic) |
6549 | 197 for (l = 0; l<D3; l++) { |
198 double count; | |
199 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 200 } |
6549 | 201 break; |
202 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
203 #pragma omp parallel for schedule(dynamic) |
6549 | 204 for (l = 0; l<D3; l++) { |
205 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 206 } |
6549 | 207 break; |
208 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
209 #pragma omp parallel for schedule(dynamic) |
6549 | 210 for (l = 0; l<D3; l++) { |
211 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 212 } |
6549 | 213 break; |
214 } | |
8232 | 215 } |
216 else if (ACC_LEVEL==1) { | |
6549 | 217 // extended accuray, naive summation, error = N*2^-64 |
218 switch (POutputCount) { | |
219 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
220 #pragma omp parallel for schedule(dynamic) |
6549 | 221 for (l = 0; l<D3; l++) { |
222 double count; | |
223 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 224 } |
6549 | 225 break; |
226 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
227 #pragma omp parallel for schedule(dynamic) |
6549 | 228 for (l = 0; l<D3; l++) { |
229 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 230 } |
6549 | 231 break; |
232 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
233 #pragma omp parallel for schedule(dynamic) |
6549 | 234 for (l = 0; l<D3; l++) { |
235 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 236 } |
6549 | 237 break; |
238 } | |
8232 | 239 } |
240 else if (ACC_LEVEL==3) { | |
6549 | 241 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 |
242 switch (POutputCount) { | |
243 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
244 #pragma omp parallel for schedule(dynamic) |
6549 | 245 for (l = 0; l<D3; l++) { |
246 double count; | |
247 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 248 } |
6549 | 249 break; |
250 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
251 #pragma omp parallel for schedule(dynamic) |
6549 | 252 for (l = 0; l<D3; l++) { |
253 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 254 } |
6549 | 255 break; |
256 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
257 #pragma omp parallel for schedule(dynamic) |
6549 | 258 for (l = 0; l<D3; l++) { |
259 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 260 } |
6549 | 261 break; |
262 } | |
8232 | 263 } |
264 else if (ACC_LEVEL==2) { | |
6549 | 265 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 |
266 switch (POutputCount) { | |
267 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
268 #pragma omp parallel for schedule(dynamic) |
6549 | 269 for (l = 0; l<D3; l++) { |
270 double count; | |
271 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 272 } |
6549 | 273 break; |
274 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
275 #pragma omp parallel for schedule(dynamic) |
6549 | 276 for (l = 0; l<D3; l++) { |
277 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 278 } |
6549 | 279 break; |
280 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
281 #pragma omp parallel for schedule(dynamic) |
6549 | 282 for (l = 0; l<D3; l++) { |
283 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 284 } |
6549 | 285 break; |
286 } | |
8232 | 287 } |
6549 | 288 } |
289 else if (POutputCount <= 1) { | |
290 // OUTER LOOP: along dimensions > DIM | |
7992 | 291 for (l = 0; l<D3; l++) { |
6549 | 292 ix0 = l*D1; // index for output |
293 ix1 = ix0*D2; // index for input | |
294 for (j=0; j<D2; j++) { | |
295 // minimize cache misses | |
296 ix2 = ix0; // index for output | |
297 // Inner LOOP: along dimensions < DIM | |
298 if (W) do { | |
299 long double x = *LInput; | |
300 if (!isnan(x)) { | |
301 LongOutputSum[ix2] += W[j]*x; | |
302 } | |
303 #ifndef NO_FLAG | |
304 else | |
305 flag_isNaN = 1; | |
306 #endif | |
307 LInput++; | |
308 ix2++; | |
309 } while (ix2 != (l+1)*D1); | |
310 else do { | |
311 long double x = *LInput; | |
312 if (!isnan(x)) { | |
313 LongOutputSum[ix2] += x; | |
314 } | |
315 #ifndef NO_FLAG | |
316 else | |
317 flag_isNaN = 1; | |
318 #endif | |
319 LInput++; | |
320 ix2++; | |
321 } while (ix2 != (l+1)*D1); | |
322 } // end for (j= | |
323 | |
324 /* copy to output */ | |
325 for (j=0; j<D1; j++) { | |
326 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
327 } | |
328 } | |
329 } | |
330 | |
331 else if (POutputCount == 2) { | |
332 // OUTER LOOP: along dimensions > DIM | |
333 for (l = 0; l<D3; l++) { | |
334 ix0 = l*D1; | |
335 ix1 = ix0*D2; // index for input | |
336 for (j=0; j<D2; j++) { | |
337 // minimize cache misses | |
338 ix2 = ix0; // index for output | |
339 // Inner LOOP: along dimensions < DIM | |
340 if (W) do { | |
341 long double x = *LInput; | |
342 if (!isnan(x)) { | |
343 LongOutputCount[ix2] += W[j]; | |
344 LongOutputSum[ix2] += W[j]*x; | |
345 } | |
346 #ifndef NO_FLAG | |
347 else | |
348 flag_isNaN = 1; | |
349 #endif | |
350 LInput++; | |
351 ix2++; | |
352 } while (ix2 != (l+1)*D1); | |
353 else do { | |
354 long double x = *LInput; | |
355 if (!isnan(x)) { | |
356 LongOutputCount[ix2] += 1.0; | |
357 LongOutputSum[ix2] += x; | |
358 } | |
359 #ifndef NO_FLAG | |
360 else | |
361 flag_isNaN = 1; | |
362 #endif | |
363 LInput++; | |
364 ix2++; | |
365 } while (ix2 != (l+1)*D1); | |
366 } // end for (j= | |
367 | |
368 /* copy to output */ | |
369 for (j=0; j<D1; j++) { | |
370 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
371 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
372 } // end else | |
373 } | |
374 } | |
375 | |
376 else if (POutputCount == 3) { | |
377 // OUTER LOOP: along dimensions > DIM | |
378 for (l = 0; l<D3; l++) { | |
379 ix0 = l*D1; | |
380 ix1 = ix0*D2; // index for input | |
381 for (j=0; j<D2; j++) { | |
382 // minimize cache misses | |
383 ix2 = ix0; // index for output | |
384 // Inner LOOP: along dimensions < DIM | |
385 if (W) do { | |
386 long double x = *LInput; | |
387 if (!isnan(x)) { | |
388 LongOutputCount[ix2] += W[j]; | |
389 double t = W[j]*x; | |
390 LongOutputSum[ix2] += t; | |
391 LongOutputSum2[ix2] += x*t; | |
392 } | |
393 #ifndef NO_FLAG | |
394 else | |
395 flag_isNaN = 1; | |
396 #endif | |
397 LInput++; | |
398 ix2++; | |
399 } while (ix2 != (l+1)*D1); | |
400 else do { | |
401 long double x = *LInput; | |
402 if (!isnan(x)) { | |
403 LongOutputCount[ix2] += 1.0; | |
404 LongOutputSum[ix2] += x; | |
405 LongOutputSum2[ix2] += x*x; | |
406 } | |
407 #ifndef NO_FLAG | |
408 else | |
409 flag_isNaN = 1; | |
410 #endif | |
411 LInput++; | |
412 ix2++; | |
413 } while (ix2 != (l+1)*D1); | |
414 } // end for (j= | |
415 | |
416 /* copy to output */ | |
417 for (j=0; j<D1; j++) { | |
418 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
419 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
420 LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; | |
421 } | |
422 } | |
423 } | |
7992 | 424 |
6549 | 425 if (LongOutputSum) mxFree(LongOutputSum); |
426 if (LongOutputCount) mxFree(LongOutputCount); | |
427 if (LongOutputSum2) mxFree(LongOutputSum2); | |
428 | |
429 #ifndef NO_FLAG | |
430 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
431 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
432 this requires that the input argument is a non-empty variable | |
433 */ | |
434 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
435 // set FLAG_NANS_OCCURED | |
436 switch (mxGetClassID(PInputs[2])) { | |
437 case mxLOGICAL_CLASS: | |
438 case mxCHAR_CLASS: | |
439 case mxINT8_CLASS: | |
440 case mxUINT8_CLASS: | |
441 *(uint8_t*)mxGetData(PInputs[2]) = 1; | |
442 break; | |
443 case mxDOUBLE_CLASS: | |
444 *(double*)mxGetData(PInputs[2]) = 1.0; | |
445 break; | |
446 case mxSINGLE_CLASS: | |
447 *(float*)mxGetData(PInputs[2]) = 1.0; | |
448 break; | |
449 case mxINT16_CLASS: | |
450 case mxUINT16_CLASS: | |
451 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
452 break; | |
453 case mxINT32_CLASS: | |
454 case mxUINT32_CLASS: | |
455 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
456 break; | |
457 case mxINT64_CLASS: | |
458 case mxUINT64_CLASS: | |
459 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
460 break; | |
461 case mxFUNCTION_CLASS: | |
462 case mxUNKNOWN_CLASS: | |
463 case mxCELL_CLASS: | |
464 case mxSTRUCT_CLASS: | |
7301
485d1594d155
addresses undesired side-effect off in-place sorting of data
schloegl
parents:
6549
diff
changeset
|
465 default: |
6549 | 466 mexPrintf("Type of 3rd input argument not supported."); |
467 } | |
468 } | |
469 #endif | |
470 } | |
471 | |
472 #define stride 1 | |
473 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
474 { | |
475 long double sum=0; | |
476 char flag=0; | |
477 // LOOP along dimension DIM | |
478 | |
479 double *end = data + stride*Ni; | |
480 if (W) { | |
481 // with weight vector | |
482 long double count = 0.0; | |
483 do { | |
484 long double x = *data; | |
485 if (!isnan(x)) | |
486 { | |
487 count += *W; | |
488 sum += *W*x; | |
489 } | |
490 #ifndef NO_FLAG | |
491 else | |
492 flag = 1; | |
493 #endif | |
494 | |
495 data++; // stride=1 | |
496 W++; | |
497 } | |
498 while (data < end); | |
499 *No = count; | |
500 } else { | |
501 // w/o weight vector | |
502 size_t countI = 0; | |
503 do { | |
504 long double x = *data; | |
505 if (!isnan(x)) | |
506 { | |
507 countI++; | |
508 sum += x; | |
509 } | |
510 #ifndef NO_FLAG | |
511 else | |
512 flag = 1; | |
513 #endif | |
514 data++; // stride=1 | |
515 } | |
516 while (data < end); | |
517 *No = (double)countI; | |
518 } | |
519 | |
520 #ifndef NO_FLAG | |
521 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
522 #endif | |
523 *s = sum; | |
524 | |
525 } | |
526 | |
527 | |
528 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
529 { | |
530 long double sum=0; | |
531 long double msq=0; | |
532 char flag=0; | |
533 // LOOP along dimension DIM | |
534 | |
535 double *end = data + stride*Ni; | |
536 if (W) { | |
537 // with weight vector | |
538 long double count = 0.0; | |
539 do { | |
540 long double x = *data; | |
541 if (!isnan(x)) { | |
542 count += *W; | |
543 long double t = *W*x; | |
544 sum += t; | |
545 msq += x*t; | |
546 } | |
547 #ifndef NO_FLAG | |
548 else | |
549 flag = 1; | |
550 #endif | |
551 data++; // stride=1 | |
552 W++; | |
553 } | |
554 while (data < end); | |
555 *No = count; | |
556 } else { | |
557 // w/o weight vector | |
558 size_t countI = 0; | |
559 do { | |
560 long double x = *data; | |
561 if (!isnan(x)) { | |
562 countI++; | |
563 sum += x; | |
564 msq += x*x; | |
565 } | |
566 #ifndef NO_FLAG | |
567 else | |
568 flag = 1; | |
569 #endif | |
570 data++; // stride=1 | |
571 } | |
572 while (data < end); | |
573 *No = (double)countI; | |
574 } | |
575 | |
576 #ifndef NO_FLAG | |
577 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
578 #endif | |
579 *s = sum; | |
580 *s2 = msq; | |
581 } | |
582 | |
583 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
584 { | |
585 double sum=0; | |
586 char flag=0; | |
587 // LOOP along dimension DIM | |
588 | |
589 double *end = data + stride*Ni; | |
590 if (W) { | |
591 // with weight vector | |
592 double count = 0.0; | |
593 do { | |
594 double x = *data; | |
595 if (!isnan(x)) | |
596 { | |
597 count += *W; | |
598 sum += *W*x; | |
599 } | |
600 #ifndef NO_FLAG | |
601 else | |
602 flag = 1; | |
603 #endif | |
604 | |
605 data++; // stride=1 | |
606 W++; | |
607 } | |
608 while (data < end); | |
609 *No = count; | |
610 } else { | |
611 // w/o weight vector | |
612 size_t countI = 0; | |
613 do { | |
614 double x = *data; | |
615 if (!isnan(x)) | |
616 { | |
617 countI++; | |
618 sum += x; | |
619 } | |
620 #ifndef NO_FLAG | |
621 else | |
622 flag = 1; | |
623 #endif | |
624 data++; // stride=1 | |
625 } | |
626 while (data < end); | |
627 *No = (double)countI; | |
628 } | |
629 | |
630 #ifndef NO_FLAG | |
631 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
632 #endif | |
633 *s = sum; | |
634 | |
635 } | |
636 | |
637 | |
638 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
639 { | |
640 double sum=0; | |
641 double msq=0; | |
642 char flag=0; | |
643 // LOOP along dimension DIM | |
644 | |
645 double *end = data + stride*Ni; | |
646 if (W) { | |
647 // with weight vector | |
648 double count = 0.0; | |
649 do { | |
650 double x = *data; | |
651 if (!isnan(x)) { | |
652 count += *W; | |
653 double t = *W*x; | |
654 sum += t; | |
655 msq += x*t; | |
656 } | |
657 #ifndef NO_FLAG | |
658 else | |
659 flag = 1; | |
660 #endif | |
661 data++; // stride=1 | |
662 W++; | |
663 } | |
664 while (data < end); | |
665 *No = count; | |
666 } else { | |
667 // w/o weight vector | |
668 size_t countI = 0; | |
669 do { | |
670 double x = *data; | |
671 if (!isnan(x)) { | |
672 countI++; | |
673 sum += x; | |
674 msq += x*x; | |
675 } | |
676 #ifndef NO_FLAG | |
677 else | |
678 flag = 1; | |
679 #endif | |
680 data++; // stride=1 | |
681 } | |
682 while (data < end); | |
683 *No = (double)countI; | |
684 } | |
685 | |
686 #ifndef NO_FLAG | |
687 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
688 #endif | |
689 *s = sum; | |
690 *s2 = msq; | |
691 } | |
692 | |
693 | |
694 | |
695 /*************************************** | |
696 using Kahan's summation formula [1] | |
697 this gives more accurate results while the computational effort within the loop is about 4x as high | |
698 First tests show a penalty of about 40% in terms of computational time. | |
699 | |
700 [1] David Goldberg, | |
701 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
702 ACM Computing Surveys, Vol 23, No 1, March 1991. | |
703 ****************************************/ | |
704 | |
705 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
706 { | |
707 long double sum=0; | |
708 char flag=0; | |
709 // LOOP along dimension DIM | |
710 | |
711 double *end = data + stride*Ni; | |
712 if (W) { | |
713 // with weight vector | |
714 long double count = 0.0; | |
715 long double rc=0.0, rn=0.0; | |
716 do { | |
717 long double x = *data; | |
718 long double t,y; | |
719 if (!isnan(x)) | |
720 { | |
721 //count += *W; [1] | |
722 y = *W-rn; | |
723 t = count+y; | |
724 rn= (t-count)-y; | |
725 count= t; | |
726 | |
727 //sum += *W*x; [1] | |
728 y = *W*x-rc; | |
729 t = sum+y; | |
730 rc= (t-sum)-y; | |
731 sum= t; | |
732 } | |
733 #ifndef NO_FLAG | |
734 else | |
735 flag = 1; | |
736 #endif | |
737 | |
738 data++; // stride=1 | |
739 W++; | |
740 } | |
741 while (data < end); | |
742 *No = count; | |
743 } else { | |
744 // w/o weight vector | |
745 size_t countI = 0; | |
746 long double rc=0.0; | |
747 do { | |
748 long double x = *data; | |
749 long double t,y; | |
750 if (!isnan(x)) | |
751 { | |
752 countI++; | |
753 // sum += x; [1] | |
754 y = x-rc; | |
755 t = sum+y; | |
756 rc= (t-sum)-y; | |
757 sum= t; | |
758 } | |
759 #ifndef NO_FLAG | |
760 else | |
761 flag = 1; | |
762 #endif | |
763 data++; // stride=1 | |
764 } | |
765 while (data < end); | |
766 *No = (double)countI; | |
767 } | |
768 | |
769 #ifndef NO_FLAG | |
770 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
771 #endif | |
772 *s = sum; | |
773 | |
774 } | |
775 | |
776 | |
777 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
778 { | |
779 long double sum=0; | |
780 long double msq=0; | |
781 char flag=0; | |
782 // LOOP along dimension DIM | |
783 | |
784 double *end = data + stride*Ni; | |
785 if (W) { | |
786 // with weight vector | |
787 long double count = 0.0; | |
788 long double rc=0.0, rn=0.0, rq=0.0; | |
789 do { | |
790 long double x = *data; | |
791 long double t,y; | |
792 if (!isnan(x)) { | |
793 //count += *W; [1] | |
794 y = *W-rn; | |
795 t = count+y; | |
796 rn= (t-count)-y; | |
797 count= t; | |
798 | |
799 long double w = *W*x; | |
800 //sum += *W*x; [1] | |
801 y = *W*x-rc; | |
802 t = sum+y; | |
803 rc= (t-sum)-y; | |
804 sum= t; | |
805 | |
806 // msq += x*w; | |
807 y = w*x-rq; | |
808 t = msq+y; | |
809 rq= (t-msq)-y; | |
810 msq= t; | |
811 } | |
812 #ifndef NO_FLAG | |
813 else | |
814 flag = 1; | |
815 #endif | |
816 data++; // stride=1 | |
817 W++; | |
818 } | |
819 while (data < end); | |
820 *No = count; | |
821 } else { | |
822 // w/o weight vector | |
823 size_t countI = 0; | |
824 long double rc=0.0, rq=0.0; | |
825 do { | |
826 long double x = *data; | |
827 long double t,y; | |
828 if (!isnan(x)) { | |
829 countI++; | |
830 //sum += x; [1] | |
831 y = x-rc; | |
832 t = sum+y; | |
833 rc= (t-sum)-y; | |
834 sum= t; | |
835 | |
836 // msq += x*x; | |
837 y = x*x-rq; | |
838 t = msq+y; | |
839 rq= (t-msq)-y; | |
840 msq= t; | |
841 } | |
842 #ifndef NO_FLAG | |
843 else | |
844 flag = 1; | |
845 #endif | |
846 data++; // stride=1 | |
847 } | |
848 while (data < end); | |
849 *No = (double)countI; | |
850 } | |
851 | |
852 #ifndef NO_FLAG | |
853 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
854 #endif | |
855 *s = sum; | |
856 *s2 = msq; | |
857 } | |
858 | |
859 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
860 { | |
861 double sum=0; | |
862 char flag=0; | |
863 // LOOP along dimension DIM | |
864 | |
865 double *end = data + stride*Ni; | |
866 if (W) { | |
867 // with weight vector | |
868 double count = 0.0; | |
869 double rc=0.0, rn=0.0; | |
870 do { | |
871 double x = *data; | |
872 double t,y; | |
873 if (!isnan(x)) | |
874 { | |
875 //count += *W; [1] | |
876 y = *W-rn; | |
877 t = count+y; | |
878 rn= (t-count)-y; | |
879 count= t; | |
880 | |
881 //sum += *W*x; [1] | |
882 y = *W*x-rc; | |
883 t = sum+y; | |
884 rc= (t-sum)-y; | |
885 sum= t; | |
886 } | |
887 #ifndef NO_FLAG | |
888 else | |
889 flag = 1; | |
890 #endif | |
891 | |
892 data++; // stride=1 | |
893 W++; | |
894 } | |
895 while (data < end); | |
896 *No = count; | |
897 } else { | |
898 // w/o weight vector | |
899 size_t countI = 0; | |
900 double rc=0.0; | |
901 do { | |
902 double x = *data; | |
903 double t,y; | |
904 if (!isnan(x)) | |
905 { | |
906 countI++; | |
907 // sum += x; [1] | |
908 y = x-rc; | |
909 t = sum+y; | |
910 rc= (t-sum)-y; | |
911 sum= t; | |
912 } | |
913 #ifndef NO_FLAG | |
914 else | |
915 flag = 1; | |
916 #endif | |
917 data++; // stride=1 | |
918 } | |
919 while (data < end); | |
920 *No = (double)countI; | |
921 } | |
922 | |
923 #ifndef NO_FLAG | |
924 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
925 #endif | |
926 *s = sum; | |
927 | |
928 } | |
929 | |
930 | |
931 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
932 { | |
933 double sum=0; | |
934 double msq=0; | |
935 char flag=0; | |
936 // LOOP along dimension DIM | |
937 | |
938 double *end = data + stride*Ni; | |
939 if (W) { | |
940 // with weight vector | |
941 double count = 0.0; | |
942 double rc=0.0, rn=0.0, rq=0.0; | |
943 do { | |
944 double x = *data; | |
945 double t,y; | |
946 if (!isnan(x)) { | |
947 //count += *W; [1] | |
948 y = *W-rn; | |
949 t = count+y; | |
950 rn= (t-count)-y; | |
951 count= t; | |
952 | |
953 double w = *W*x; | |
954 //sum += *W*x; [1] | |
955 y = *W*x-rc; | |
956 t = sum+y; | |
957 rc= (t-sum)-y; | |
958 sum= t; | |
959 | |
960 // msq += x*w; | |
961 y = w*x-rq; | |
962 t = msq+y; | |
963 rq= (t-msq)-y; | |
964 msq= t; | |
965 } | |
966 #ifndef NO_FLAG | |
967 else | |
968 flag = 1; | |
969 #endif | |
970 data++; // stride=1 | |
971 W++; | |
972 } | |
973 while (data < end); | |
974 *No = count; | |
975 } else { | |
976 // w/o weight vector | |
977 size_t countI = 0; | |
978 double rc=0.0, rq=0.0; | |
979 do { | |
980 double x = *data; | |
981 double t,y; | |
982 if (!isnan(x)) { | |
983 countI++; | |
984 //sum += x; [1] | |
985 y = x-rc; | |
986 t = sum+y; | |
987 rc= (t-sum)-y; | |
988 sum= t; | |
989 | |
990 // msq += x*x; | |
991 y = x*x-rq; | |
992 t = msq+y; | |
993 rq= (t-msq)-y; | |
994 msq= t; | |
995 } | |
996 #ifndef NO_FLAG | |
997 else | |
998 flag = 1; | |
999 #endif | |
1000 data++; // stride=1 | |
1001 } | |
1002 while (data < end); | |
1003 *No = (double)countI; | |
1004 } | |
1005 | |
1006 #ifndef NO_FLAG | |
1007 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
1008 #endif | |
1009 *s = sum; | |
1010 *s2 = msq; | |
1011 } | |
1012 |