6549
|
1 //------------------------------------------------------------------- |
|
2 #pragma hdrstop |
|
3 //------------------------------------------------------------------- |
|
4 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. |
|
5 // |
|
6 // |
|
7 // This program is free software; you can redistribute it and/or modify |
|
8 // it under the terms of the GNU General Public License as published by |
|
9 // the Free Software Foundation; either version 3 of the License, or |
|
10 // (at your option) any later version. |
|
11 // |
|
12 // This program is distributed in the hope that it will be useful, |
|
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 // GNU General Public License for more details. |
|
16 // |
|
17 // You should have received a copy of the GNU General Public License |
|
18 // along with this program; if not, see <http://www.gnu.org/licenses/>. |
|
19 // |
|
20 // |
|
21 // sumskipnan: sums all non-NaN values |
|
22 // usage: |
|
23 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); |
|
24 // |
|
25 // SUMSKIPNAN uses two techniques to reduce errors: |
|
26 // 1) long double (80bit) instead of 64-bit double is used internally |
|
27 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps |
|
28 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). |
|
29 // |
|
30 // Input: |
|
31 // - x data array |
|
32 // - DIM (optional) dimension to sum |
|
33 // - flag (optional) is actually an output argument telling whether some NaN was observed |
|
34 // - W (optional) weight vector to compute weighted sum (default 1) |
|
35 // |
|
36 // Output: |
|
37 // - o (weighted) sum along dimension DIM |
|
38 // - count of valid elements |
|
39 // - sums of squares |
|
40 // |
|
41 // |
|
42 // $Id$ |
|
43 // Copyright (C) 2009 Alois Schloegl <a.schloegl@ieee.org> |
|
44 // This function is part of the NaN-toolbox |
|
45 // http://hci.tugraz.at/~schloegl/matlab/NaN/ |
|
46 // |
|
47 //------------------------------------------------------------------- |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 #include <inttypes.h> |
|
53 #include <math.h> |
|
54 #include "mex.h" |
|
55 |
|
56 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
|
57 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
|
58 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
|
59 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
|
60 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
|
61 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
|
62 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
|
63 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
|
64 |
|
65 //#define NO_FLAG |
|
66 |
|
67 #ifdef tmwtypes_h |
|
68 #if (MX_API_VER<=0x07020000) |
|
69 typedef int mwSize; |
|
70 #endif |
|
71 #endif |
|
72 |
|
73 |
|
74 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) |
|
75 { |
|
76 const mwSize *SZ; |
|
77 double* LInput; |
|
78 double* LOutputSum; |
|
79 double* LOutputCount; |
|
80 double* LOutputSum2; |
|
81 long double* LongOutputSum = NULL; |
|
82 long double* LongOutputCount = NULL; |
|
83 long double* LongOutputSum2 = NULL; |
|
84 double x; |
|
85 double* W = NULL; // weight vector |
|
86 |
|
87 mwSize DIM = 0; |
|
88 mwSize D1, D2, D3; // NN; // |
|
89 mwSize ND, ND2; // number of dimensions: input, output |
|
90 mwSize ix0, ix1, ix2; // index to input and output |
|
91 mwSize j, l; // running indices |
|
92 mwSize *SZ2; // size of output |
|
93 char flag_isNaN = 0; |
|
94 |
|
95 // check for proper number of input and output arguments |
|
96 if ((PInputCount <= 0) || (PInputCount > 4)) |
|
97 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); |
|
98 if (POutputCount > 4) |
|
99 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); |
|
100 |
|
101 // get 1st argument |
|
102 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0])) |
|
103 LInput = mxGetPr(PInputs[0]); |
|
104 else |
|
105 mexErrMsgTxt("First argument must be REAL/DOUBLE."); |
|
106 |
|
107 // get 2nd argument |
|
108 if (PInputCount > 1) { |
|
109 switch (mxGetNumberOfElements(PInputs[1])) { |
|
110 case 0: x = 0.0; // accept empty element |
|
111 break; |
|
112 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); |
|
113 break; |
|
114 default:x = -1.0; // invalid |
|
115 } |
|
116 if ((x < 0) || (x > 65535) || (x != floor(x))) |
|
117 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); |
|
118 |
|
119 DIM = (unsigned)floor(x); |
|
120 } |
|
121 |
|
122 // get size |
|
123 ND = mxGetNumberOfDimensions(PInputs[0]); |
|
124 // NN = mxGetNumberOfElements(PInputs[0]); |
|
125 SZ = mxGetDimensions(PInputs[0]); |
|
126 |
|
127 // if DIM==0 (undefined), look for first dimension with more than 1 element. |
|
128 for (j = 0; (DIM < 1) && (j < ND); j++) |
|
129 if (SZ[j]>1) DIM = j+1; |
|
130 |
|
131 if (DIM < 1) DIM=1; // in case DIM is still undefined |
|
132 |
|
133 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output |
|
134 |
|
135 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size |
|
136 |
|
137 for (j=0; j<ND; j++) // copy size of input; |
|
138 SZ2[j] = SZ[j]; |
|
139 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 |
|
140 SZ2[j] = 1; |
|
141 |
|
142 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM |
|
143 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM |
|
144 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements |
|
145 |
|
146 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; |
|
147 |
|
148 // get weight vector for weighted sumskipnan |
|
149 if (PInputCount > 3) { |
|
150 if (!mxGetNumberOfElements(PInputs[3])) |
|
151 ; // empty weight vector - no weighting |
|
152 else if (mxGetNumberOfElements(PInputs[3])==D2) |
|
153 W = mxGetPr(PInputs[3]); |
|
154 else |
|
155 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); |
|
156 } |
|
157 |
|
158 int ACC_LEVEL = 0; |
|
159 { |
|
160 mxArray *LEVEL = NULL; |
|
161 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); |
|
162 if (!s) { |
|
163 ACC_LEVEL = (int) mxGetScalar(LEVEL); |
|
164 if ((D1>1) && (ACC_LEVEL>2)) |
|
165 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); |
|
166 } |
|
167 mxDestroyArray(LEVEL); |
|
168 } |
|
169 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); |
|
170 |
|
171 // create outputs |
|
172 #define TYP mxDOUBLE_CLASS |
|
173 |
|
174 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); |
|
175 LOutputSum = mxGetPr(POutput[0]); |
|
176 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); |
|
177 |
|
178 if (POutputCount >= 2) { |
|
179 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); |
|
180 LOutputCount = mxGetPr(POutput[1]); |
|
181 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); |
|
182 } |
|
183 if (POutputCount >= 3) { |
|
184 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); |
|
185 LOutputSum2 = mxGetPr(POutput[2]); |
|
186 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); |
|
187 } |
|
188 mxFree(SZ2); |
|
189 |
|
190 if (D1*D2*D3<1) // zero size array |
|
191 ; // do nothing |
|
192 else if ((D1==1) && (ACC_LEVEL<1)) { |
|
193 // double accuray, naive summation, error = N*2^-52 |
|
194 switch (POutputCount) { |
|
195 case 1: |
|
196 for (l = 0; l<D3; l++) { |
|
197 double count; |
|
198 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); |
|
199 } |
|
200 break; |
|
201 case 2: |
|
202 for (l = 0; l<D3; l++) { |
|
203 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); |
|
204 } |
|
205 break; |
|
206 case 3: |
|
207 for (l = 0; l<D3; l++) { |
|
208 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); |
|
209 } |
|
210 break; |
|
211 } |
|
212 } |
|
213 else if ((D1==1) && (ACC_LEVEL==1)) { |
|
214 // extended accuray, naive summation, error = N*2^-64 |
|
215 switch (POutputCount) { |
|
216 case 1: |
|
217 for (l = 0; l<D3; l++) { |
|
218 double count; |
|
219 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); |
|
220 } |
|
221 break; |
|
222 case 2: |
|
223 for (l = 0; l<D3; l++) { |
|
224 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); |
|
225 } |
|
226 break; |
|
227 case 3: |
|
228 for (l = 0; l<D3; l++) { |
|
229 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); |
|
230 } |
|
231 break; |
|
232 } |
|
233 } |
|
234 else if ((D1==1) && (ACC_LEVEL==3)) { |
|
235 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 |
|
236 switch (POutputCount) { |
|
237 case 1: |
|
238 for (l = 0; l<D3; l++) { |
|
239 double count; |
|
240 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); |
|
241 } |
|
242 break; |
|
243 case 2: |
|
244 for (l = 0; l<D3; l++) { |
|
245 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); |
|
246 } |
|
247 break; |
|
248 case 3: |
|
249 for (l = 0; l<D3; l++) { |
|
250 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); |
|
251 } |
|
252 break; |
|
253 } |
|
254 } |
|
255 else if ((D1==1) && (ACC_LEVEL==2)) { |
|
256 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 |
|
257 switch (POutputCount) { |
|
258 case 1: |
|
259 for (l = 0; l<D3; l++) { |
|
260 double count; |
|
261 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); |
|
262 } |
|
263 break; |
|
264 case 2: |
|
265 for (l = 0; l<D3; l++) { |
|
266 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); |
|
267 } |
|
268 break; |
|
269 case 3: |
|
270 for (l = 0; l<D3; l++) { |
|
271 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); |
|
272 } |
|
273 break; |
|
274 } |
|
275 } |
|
276 else if (POutputCount <= 1) { |
|
277 // OUTER LOOP: along dimensions > DIM |
|
278 for (l = 0; l<D3; l++) { |
|
279 ix0 = l*D1; // index for output |
|
280 ix1 = ix0*D2; // index for input |
|
281 for (j=0; j<D2; j++) { |
|
282 // minimize cache misses |
|
283 ix2 = ix0; // index for output |
|
284 // Inner LOOP: along dimensions < DIM |
|
285 if (W) do { |
|
286 long double x = *LInput; |
|
287 if (!isnan(x)) { |
|
288 LongOutputSum[ix2] += W[j]*x; |
|
289 } |
|
290 #ifndef NO_FLAG |
|
291 else |
|
292 flag_isNaN = 1; |
|
293 #endif |
|
294 LInput++; |
|
295 ix2++; |
|
296 } while (ix2 != (l+1)*D1); |
|
297 else do { |
|
298 long double x = *LInput; |
|
299 if (!isnan(x)) { |
|
300 LongOutputSum[ix2] += x; |
|
301 } |
|
302 #ifndef NO_FLAG |
|
303 else |
|
304 flag_isNaN = 1; |
|
305 #endif |
|
306 LInput++; |
|
307 ix2++; |
|
308 } while (ix2 != (l+1)*D1); |
|
309 } // end for (j= |
|
310 |
|
311 /* copy to output */ |
|
312 for (j=0; j<D1; j++) { |
|
313 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; |
|
314 } |
|
315 } |
|
316 } |
|
317 |
|
318 else if (POutputCount == 2) { |
|
319 // OUTER LOOP: along dimensions > DIM |
|
320 for (l = 0; l<D3; l++) { |
|
321 ix0 = l*D1; |
|
322 ix1 = ix0*D2; // index for input |
|
323 for (j=0; j<D2; j++) { |
|
324 // minimize cache misses |
|
325 ix2 = ix0; // index for output |
|
326 // Inner LOOP: along dimensions < DIM |
|
327 if (W) do { |
|
328 long double x = *LInput; |
|
329 if (!isnan(x)) { |
|
330 LongOutputCount[ix2] += W[j]; |
|
331 LongOutputSum[ix2] += W[j]*x; |
|
332 } |
|
333 #ifndef NO_FLAG |
|
334 else |
|
335 flag_isNaN = 1; |
|
336 #endif |
|
337 LInput++; |
|
338 ix2++; |
|
339 } while (ix2 != (l+1)*D1); |
|
340 else do { |
|
341 long double x = *LInput; |
|
342 if (!isnan(x)) { |
|
343 LongOutputCount[ix2] += 1.0; |
|
344 LongOutputSum[ix2] += x; |
|
345 } |
|
346 #ifndef NO_FLAG |
|
347 else |
|
348 flag_isNaN = 1; |
|
349 #endif |
|
350 LInput++; |
|
351 ix2++; |
|
352 } while (ix2 != (l+1)*D1); |
|
353 } // end for (j= |
|
354 |
|
355 /* copy to output */ |
|
356 for (j=0; j<D1; j++) { |
|
357 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; |
|
358 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; |
|
359 } // end else |
|
360 } |
|
361 } |
|
362 |
|
363 else if (POutputCount == 3) { |
|
364 // OUTER LOOP: along dimensions > DIM |
|
365 for (l = 0; l<D3; l++) { |
|
366 ix0 = l*D1; |
|
367 ix1 = ix0*D2; // index for input |
|
368 for (j=0; j<D2; j++) { |
|
369 // minimize cache misses |
|
370 ix2 = ix0; // index for output |
|
371 // Inner LOOP: along dimensions < DIM |
|
372 if (W) do { |
|
373 long double x = *LInput; |
|
374 if (!isnan(x)) { |
|
375 LongOutputCount[ix2] += W[j]; |
|
376 double t = W[j]*x; |
|
377 LongOutputSum[ix2] += t; |
|
378 LongOutputSum2[ix2] += x*t; |
|
379 } |
|
380 #ifndef NO_FLAG |
|
381 else |
|
382 flag_isNaN = 1; |
|
383 #endif |
|
384 LInput++; |
|
385 ix2++; |
|
386 } while (ix2 != (l+1)*D1); |
|
387 else do { |
|
388 long double x = *LInput; |
|
389 if (!isnan(x)) { |
|
390 LongOutputCount[ix2] += 1.0; |
|
391 LongOutputSum[ix2] += x; |
|
392 LongOutputSum2[ix2] += x*x; |
|
393 } |
|
394 #ifndef NO_FLAG |
|
395 else |
|
396 flag_isNaN = 1; |
|
397 #endif |
|
398 LInput++; |
|
399 ix2++; |
|
400 } while (ix2 != (l+1)*D1); |
|
401 } // end for (j= |
|
402 |
|
403 /* copy to output */ |
|
404 for (j=0; j<D1; j++) { |
|
405 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; |
|
406 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; |
|
407 LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; |
|
408 } |
|
409 } |
|
410 } |
|
411 if (LongOutputSum) mxFree(LongOutputSum); |
|
412 if (LongOutputCount) mxFree(LongOutputCount); |
|
413 if (LongOutputSum2) mxFree(LongOutputSum2); |
|
414 |
|
415 #ifndef NO_FLAG |
|
416 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); |
|
417 /* this is a hack, the third input argument is used to return whether a NaN occured or not. |
|
418 this requires that the input argument is a non-empty variable |
|
419 */ |
|
420 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { |
|
421 // set FLAG_NANS_OCCURED |
|
422 switch (mxGetClassID(PInputs[2])) { |
|
423 case mxLOGICAL_CLASS: |
|
424 case mxCHAR_CLASS: |
|
425 case mxINT8_CLASS: |
|
426 case mxUINT8_CLASS: |
|
427 *(uint8_t*)mxGetData(PInputs[2]) = 1; |
|
428 break; |
|
429 case mxDOUBLE_CLASS: |
|
430 *(double*)mxGetData(PInputs[2]) = 1.0; |
|
431 break; |
|
432 case mxSINGLE_CLASS: |
|
433 *(float*)mxGetData(PInputs[2]) = 1.0; |
|
434 break; |
|
435 case mxINT16_CLASS: |
|
436 case mxUINT16_CLASS: |
|
437 *(uint16_t*)mxGetData(PInputs[2]) = 1; |
|
438 break; |
|
439 case mxINT32_CLASS: |
|
440 case mxUINT32_CLASS: |
|
441 *(uint32_t*)mxGetData(PInputs[2])= 1; |
|
442 break; |
|
443 case mxINT64_CLASS: |
|
444 case mxUINT64_CLASS: |
|
445 *(uint64_t*)mxGetData(PInputs[2]) = 1; |
|
446 break; |
|
447 case mxFUNCTION_CLASS: |
|
448 case mxUNKNOWN_CLASS: |
|
449 case mxCELL_CLASS: |
|
450 case mxSTRUCT_CLASS: |
|
451 mexPrintf("Type of 3rd input argument not supported."); |
|
452 } |
|
453 } |
|
454 #endif |
|
455 } |
|
456 |
|
457 |
|
458 #define stride 1 |
|
459 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
|
460 { |
|
461 long double sum=0; |
|
462 char flag=0; |
|
463 // LOOP along dimension DIM |
|
464 |
|
465 double *end = data + stride*Ni; |
|
466 if (W) { |
|
467 // with weight vector |
|
468 long double count = 0.0; |
|
469 do { |
|
470 long double x = *data; |
|
471 if (!isnan(x)) |
|
472 { |
|
473 count += *W; |
|
474 sum += *W*x; |
|
475 } |
|
476 #ifndef NO_FLAG |
|
477 else |
|
478 flag = 1; |
|
479 #endif |
|
480 |
|
481 data++; // stride=1 |
|
482 W++; |
|
483 } |
|
484 while (data < end); |
|
485 *No = count; |
|
486 } else { |
|
487 // w/o weight vector |
|
488 size_t countI = 0; |
|
489 do { |
|
490 long double x = *data; |
|
491 if (!isnan(x)) |
|
492 { |
|
493 countI++; |
|
494 sum += x; |
|
495 } |
|
496 #ifndef NO_FLAG |
|
497 else |
|
498 flag = 1; |
|
499 #endif |
|
500 data++; // stride=1 |
|
501 } |
|
502 while (data < end); |
|
503 *No = (double)countI; |
|
504 } |
|
505 |
|
506 #ifndef NO_FLAG |
|
507 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
508 #endif |
|
509 *s = sum; |
|
510 |
|
511 } |
|
512 |
|
513 |
|
514 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
|
515 { |
|
516 long double sum=0; |
|
517 long double msq=0; |
|
518 char flag=0; |
|
519 // LOOP along dimension DIM |
|
520 |
|
521 double *end = data + stride*Ni; |
|
522 if (W) { |
|
523 // with weight vector |
|
524 long double count = 0.0; |
|
525 do { |
|
526 long double x = *data; |
|
527 if (!isnan(x)) { |
|
528 count += *W; |
|
529 long double t = *W*x; |
|
530 sum += t; |
|
531 msq += x*t; |
|
532 } |
|
533 #ifndef NO_FLAG |
|
534 else |
|
535 flag = 1; |
|
536 #endif |
|
537 data++; // stride=1 |
|
538 W++; |
|
539 } |
|
540 while (data < end); |
|
541 *No = count; |
|
542 } else { |
|
543 // w/o weight vector |
|
544 size_t countI = 0; |
|
545 do { |
|
546 long double x = *data; |
|
547 if (!isnan(x)) { |
|
548 countI++; |
|
549 sum += x; |
|
550 msq += x*x; |
|
551 } |
|
552 #ifndef NO_FLAG |
|
553 else |
|
554 flag = 1; |
|
555 #endif |
|
556 data++; // stride=1 |
|
557 } |
|
558 while (data < end); |
|
559 *No = (double)countI; |
|
560 } |
|
561 |
|
562 #ifndef NO_FLAG |
|
563 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
564 #endif |
|
565 *s = sum; |
|
566 *s2 = msq; |
|
567 } |
|
568 |
|
569 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
|
570 { |
|
571 double sum=0; |
|
572 char flag=0; |
|
573 // LOOP along dimension DIM |
|
574 |
|
575 double *end = data + stride*Ni; |
|
576 if (W) { |
|
577 // with weight vector |
|
578 double count = 0.0; |
|
579 do { |
|
580 double x = *data; |
|
581 if (!isnan(x)) |
|
582 { |
|
583 count += *W; |
|
584 sum += *W*x; |
|
585 } |
|
586 #ifndef NO_FLAG |
|
587 else |
|
588 flag = 1; |
|
589 #endif |
|
590 |
|
591 data++; // stride=1 |
|
592 W++; |
|
593 } |
|
594 while (data < end); |
|
595 *No = count; |
|
596 } else { |
|
597 // w/o weight vector |
|
598 size_t countI = 0; |
|
599 do { |
|
600 double x = *data; |
|
601 if (!isnan(x)) |
|
602 { |
|
603 countI++; |
|
604 sum += x; |
|
605 } |
|
606 #ifndef NO_FLAG |
|
607 else |
|
608 flag = 1; |
|
609 #endif |
|
610 data++; // stride=1 |
|
611 } |
|
612 while (data < end); |
|
613 *No = (double)countI; |
|
614 } |
|
615 |
|
616 #ifndef NO_FLAG |
|
617 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
618 #endif |
|
619 *s = sum; |
|
620 |
|
621 } |
|
622 |
|
623 |
|
624 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
|
625 { |
|
626 double sum=0; |
|
627 double msq=0; |
|
628 char flag=0; |
|
629 // LOOP along dimension DIM |
|
630 |
|
631 double *end = data + stride*Ni; |
|
632 if (W) { |
|
633 // with weight vector |
|
634 double count = 0.0; |
|
635 do { |
|
636 double x = *data; |
|
637 if (!isnan(x)) { |
|
638 count += *W; |
|
639 double t = *W*x; |
|
640 sum += t; |
|
641 msq += x*t; |
|
642 } |
|
643 #ifndef NO_FLAG |
|
644 else |
|
645 flag = 1; |
|
646 #endif |
|
647 data++; // stride=1 |
|
648 W++; |
|
649 } |
|
650 while (data < end); |
|
651 *No = count; |
|
652 } else { |
|
653 // w/o weight vector |
|
654 size_t countI = 0; |
|
655 do { |
|
656 double x = *data; |
|
657 if (!isnan(x)) { |
|
658 countI++; |
|
659 sum += x; |
|
660 msq += x*x; |
|
661 } |
|
662 #ifndef NO_FLAG |
|
663 else |
|
664 flag = 1; |
|
665 #endif |
|
666 data++; // stride=1 |
|
667 } |
|
668 while (data < end); |
|
669 *No = (double)countI; |
|
670 } |
|
671 |
|
672 #ifndef NO_FLAG |
|
673 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
674 #endif |
|
675 *s = sum; |
|
676 *s2 = msq; |
|
677 } |
|
678 |
|
679 |
|
680 |
|
681 /*************************************** |
|
682 using Kahan's summation formula [1] |
|
683 this gives more accurate results while the computational effort within the loop is about 4x as high |
|
684 First tests show a penalty of about 40% in terms of computational time. |
|
685 |
|
686 [1] David Goldberg, |
|
687 What Every Computer Scientist Should Know About Floating-Point Arithmetic |
|
688 ACM Computing Surveys, Vol 23, No 1, March 1991. |
|
689 ****************************************/ |
|
690 |
|
691 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
|
692 { |
|
693 long double sum=0; |
|
694 char flag=0; |
|
695 // LOOP along dimension DIM |
|
696 |
|
697 double *end = data + stride*Ni; |
|
698 if (W) { |
|
699 // with weight vector |
|
700 long double count = 0.0; |
|
701 long double rc=0.0, rn=0.0; |
|
702 do { |
|
703 long double x = *data; |
|
704 long double t,y; |
|
705 if (!isnan(x)) |
|
706 { |
|
707 //count += *W; [1] |
|
708 y = *W-rn; |
|
709 t = count+y; |
|
710 rn= (t-count)-y; |
|
711 count= t; |
|
712 |
|
713 //sum += *W*x; [1] |
|
714 y = *W*x-rc; |
|
715 t = sum+y; |
|
716 rc= (t-sum)-y; |
|
717 sum= t; |
|
718 } |
|
719 #ifndef NO_FLAG |
|
720 else |
|
721 flag = 1; |
|
722 #endif |
|
723 |
|
724 data++; // stride=1 |
|
725 W++; |
|
726 } |
|
727 while (data < end); |
|
728 *No = count; |
|
729 } else { |
|
730 // w/o weight vector |
|
731 size_t countI = 0; |
|
732 long double rc=0.0; |
|
733 do { |
|
734 long double x = *data; |
|
735 long double t,y; |
|
736 if (!isnan(x)) |
|
737 { |
|
738 countI++; |
|
739 // sum += x; [1] |
|
740 y = x-rc; |
|
741 t = sum+y; |
|
742 rc= (t-sum)-y; |
|
743 sum= t; |
|
744 } |
|
745 #ifndef NO_FLAG |
|
746 else |
|
747 flag = 1; |
|
748 #endif |
|
749 data++; // stride=1 |
|
750 } |
|
751 while (data < end); |
|
752 *No = (double)countI; |
|
753 } |
|
754 |
|
755 #ifndef NO_FLAG |
|
756 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
757 #endif |
|
758 *s = sum; |
|
759 |
|
760 } |
|
761 |
|
762 |
|
763 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
|
764 { |
|
765 long double sum=0; |
|
766 long double msq=0; |
|
767 char flag=0; |
|
768 // LOOP along dimension DIM |
|
769 |
|
770 double *end = data + stride*Ni; |
|
771 if (W) { |
|
772 // with weight vector |
|
773 long double count = 0.0; |
|
774 long double rc=0.0, rn=0.0, rq=0.0; |
|
775 do { |
|
776 long double x = *data; |
|
777 long double t,y; |
|
778 if (!isnan(x)) { |
|
779 //count += *W; [1] |
|
780 y = *W-rn; |
|
781 t = count+y; |
|
782 rn= (t-count)-y; |
|
783 count= t; |
|
784 |
|
785 long double w = *W*x; |
|
786 //sum += *W*x; [1] |
|
787 y = *W*x-rc; |
|
788 t = sum+y; |
|
789 rc= (t-sum)-y; |
|
790 sum= t; |
|
791 |
|
792 // msq += x*w; |
|
793 y = w*x-rq; |
|
794 t = msq+y; |
|
795 rq= (t-msq)-y; |
|
796 msq= t; |
|
797 } |
|
798 #ifndef NO_FLAG |
|
799 else |
|
800 flag = 1; |
|
801 #endif |
|
802 data++; // stride=1 |
|
803 W++; |
|
804 } |
|
805 while (data < end); |
|
806 *No = count; |
|
807 } else { |
|
808 // w/o weight vector |
|
809 size_t countI = 0; |
|
810 long double rc=0.0, rq=0.0; |
|
811 do { |
|
812 long double x = *data; |
|
813 long double t,y; |
|
814 if (!isnan(x)) { |
|
815 countI++; |
|
816 //sum += x; [1] |
|
817 y = x-rc; |
|
818 t = sum+y; |
|
819 rc= (t-sum)-y; |
|
820 sum= t; |
|
821 |
|
822 // msq += x*x; |
|
823 y = x*x-rq; |
|
824 t = msq+y; |
|
825 rq= (t-msq)-y; |
|
826 msq= t; |
|
827 } |
|
828 #ifndef NO_FLAG |
|
829 else |
|
830 flag = 1; |
|
831 #endif |
|
832 data++; // stride=1 |
|
833 } |
|
834 while (data < end); |
|
835 *No = (double)countI; |
|
836 } |
|
837 |
|
838 #ifndef NO_FLAG |
|
839 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
840 #endif |
|
841 *s = sum; |
|
842 *s2 = msq; |
|
843 } |
|
844 |
|
845 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
|
846 { |
|
847 double sum=0; |
|
848 char flag=0; |
|
849 // LOOP along dimension DIM |
|
850 |
|
851 double *end = data + stride*Ni; |
|
852 if (W) { |
|
853 // with weight vector |
|
854 double count = 0.0; |
|
855 double rc=0.0, rn=0.0; |
|
856 do { |
|
857 double x = *data; |
|
858 double t,y; |
|
859 if (!isnan(x)) |
|
860 { |
|
861 //count += *W; [1] |
|
862 y = *W-rn; |
|
863 t = count+y; |
|
864 rn= (t-count)-y; |
|
865 count= t; |
|
866 |
|
867 //sum += *W*x; [1] |
|
868 y = *W*x-rc; |
|
869 t = sum+y; |
|
870 rc= (t-sum)-y; |
|
871 sum= t; |
|
872 } |
|
873 #ifndef NO_FLAG |
|
874 else |
|
875 flag = 1; |
|
876 #endif |
|
877 |
|
878 data++; // stride=1 |
|
879 W++; |
|
880 } |
|
881 while (data < end); |
|
882 *No = count; |
|
883 } else { |
|
884 // w/o weight vector |
|
885 size_t countI = 0; |
|
886 double rc=0.0; |
|
887 do { |
|
888 double x = *data; |
|
889 double t,y; |
|
890 if (!isnan(x)) |
|
891 { |
|
892 countI++; |
|
893 // sum += x; [1] |
|
894 y = x-rc; |
|
895 t = sum+y; |
|
896 rc= (t-sum)-y; |
|
897 sum= t; |
|
898 } |
|
899 #ifndef NO_FLAG |
|
900 else |
|
901 flag = 1; |
|
902 #endif |
|
903 data++; // stride=1 |
|
904 } |
|
905 while (data < end); |
|
906 *No = (double)countI; |
|
907 } |
|
908 |
|
909 #ifndef NO_FLAG |
|
910 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
911 #endif |
|
912 *s = sum; |
|
913 |
|
914 } |
|
915 |
|
916 |
|
917 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
|
918 { |
|
919 double sum=0; |
|
920 double msq=0; |
|
921 char flag=0; |
|
922 // LOOP along dimension DIM |
|
923 |
|
924 double *end = data + stride*Ni; |
|
925 if (W) { |
|
926 // with weight vector |
|
927 double count = 0.0; |
|
928 double rc=0.0, rn=0.0, rq=0.0; |
|
929 do { |
|
930 double x = *data; |
|
931 double t,y; |
|
932 if (!isnan(x)) { |
|
933 //count += *W; [1] |
|
934 y = *W-rn; |
|
935 t = count+y; |
|
936 rn= (t-count)-y; |
|
937 count= t; |
|
938 |
|
939 double w = *W*x; |
|
940 //sum += *W*x; [1] |
|
941 y = *W*x-rc; |
|
942 t = sum+y; |
|
943 rc= (t-sum)-y; |
|
944 sum= t; |
|
945 |
|
946 // msq += x*w; |
|
947 y = w*x-rq; |
|
948 t = msq+y; |
|
949 rq= (t-msq)-y; |
|
950 msq= t; |
|
951 } |
|
952 #ifndef NO_FLAG |
|
953 else |
|
954 flag = 1; |
|
955 #endif |
|
956 data++; // stride=1 |
|
957 W++; |
|
958 } |
|
959 while (data < end); |
|
960 *No = count; |
|
961 } else { |
|
962 // w/o weight vector |
|
963 size_t countI = 0; |
|
964 double rc=0.0, rq=0.0; |
|
965 do { |
|
966 double x = *data; |
|
967 double t,y; |
|
968 if (!isnan(x)) { |
|
969 countI++; |
|
970 //sum += x; [1] |
|
971 y = x-rc; |
|
972 t = sum+y; |
|
973 rc= (t-sum)-y; |
|
974 sum= t; |
|
975 |
|
976 // msq += x*x; |
|
977 y = x*x-rq; |
|
978 t = msq+y; |
|
979 rq= (t-msq)-y; |
|
980 msq= t; |
|
981 } |
|
982 #ifndef NO_FLAG |
|
983 else |
|
984 flag = 1; |
|
985 #endif |
|
986 data++; // stride=1 |
|
987 } |
|
988 while (data < end); |
|
989 *No = (double)countI; |
|
990 } |
|
991 |
|
992 #ifndef NO_FLAG |
|
993 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; |
|
994 #endif |
|
995 *s = sum; |
|
996 *s2 = msq; |
|
997 } |
|
998 |