Mercurial > forge
annotate extra/NaN/src/sumskipnan_mex.cpp @ 7889:c101c486d80a octave-forge
fix web address
author | schloegl |
---|---|
date | Thu, 27 Jan 2011 17:10:36 +0000 |
parents | b9f35668b55e |
children | db5092052107 |
rev | line source |
---|---|
6549 | 1 //------------------------------------------------------------------- |
2 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. | |
3 // | |
4 // | |
5 // This program is free software; you can redistribute it and/or modify | |
6 // it under the terms of the GNU General Public License as published by | |
7 // the Free Software Foundation; either version 3 of the License, or | |
8 // (at your option) any later version. | |
9 // | |
10 // This program is distributed in the hope that it will be useful, | |
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 // GNU General Public License for more details. | |
14 // | |
15 // You should have received a copy of the GNU General Public License | |
16 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
17 // | |
18 // | |
19 // sumskipnan: sums all non-NaN values | |
20 // usage: | |
21 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); | |
22 // | |
23 // SUMSKIPNAN uses two techniques to reduce errors: | |
24 // 1) long double (80bit) instead of 64-bit double is used internally | |
25 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps | |
26 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). | |
27 // | |
28 // Input: | |
29 // - x data array | |
30 // - DIM (optional) dimension to sum | |
31 // - flag (optional) is actually an output argument telling whether some NaN was observed | |
32 // - W (optional) weight vector to compute weighted sum (default 1) | |
33 // | |
34 // Output: | |
35 // - o (weighted) sum along dimension DIM | |
36 // - count of valid elements | |
37 // - sums of squares | |
38 // | |
39 // | |
40 // $Id$ | |
7889 | 41 // Copyright (C) 2009,2010,2011 Alois Schloegl <a.schloegl@ieee.org> |
6549 | 42 // This function is part of the NaN-toolbox |
7889 | 43 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 44 // |
45 //------------------------------------------------------------------- | |
46 | |
47 | |
48 | |
49 | |
50 #include <math.h> | |
7888 | 51 #include <stdint.h> |
6549 | 52 #include "mex.h" |
53 | |
54 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
55 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
56 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
57 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
58 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
59 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
60 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
61 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
62 | |
63 //#define NO_FLAG | |
64 | |
65 #ifdef tmwtypes_h | |
66 #if (MX_API_VER<=0x07020000) | |
67 typedef int mwSize; | |
68 #endif | |
69 #endif | |
70 | |
71 | |
72 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
73 { | |
74 const mwSize *SZ; | |
75 double* LInput; | |
76 double* LOutputSum; | |
77 double* LOutputCount; | |
78 double* LOutputSum2; | |
79 long double* LongOutputSum = NULL; | |
80 long double* LongOutputCount = NULL; | |
81 long double* LongOutputSum2 = NULL; | |
82 double x; | |
83 double* W = NULL; // weight vector | |
84 | |
85 mwSize DIM = 0; | |
86 mwSize D1, D2, D3; // NN; // | |
87 mwSize ND, ND2; // number of dimensions: input, output | |
88 mwSize ix0, ix1, ix2; // index to input and output | |
89 mwSize j, l; // running indices | |
90 mwSize *SZ2; // size of output | |
91 char flag_isNaN = 0; | |
92 | |
93 // check for proper number of input and output arguments | |
94 if ((PInputCount <= 0) || (PInputCount > 4)) | |
95 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); | |
96 if (POutputCount > 4) | |
97 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); | |
98 | |
99 // get 1st argument | |
100 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0])) | |
101 LInput = mxGetPr(PInputs[0]); | |
102 else | |
103 mexErrMsgTxt("First argument must be REAL/DOUBLE."); | |
104 | |
105 // get 2nd argument | |
106 if (PInputCount > 1) { | |
107 switch (mxGetNumberOfElements(PInputs[1])) { | |
108 case 0: x = 0.0; // accept empty element | |
109 break; | |
110 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); | |
111 break; | |
112 default:x = -1.0; // invalid | |
113 } | |
114 if ((x < 0) || (x > 65535) || (x != floor(x))) | |
115 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); | |
116 | |
117 DIM = (unsigned)floor(x); | |
118 } | |
119 | |
120 // get size | |
121 ND = mxGetNumberOfDimensions(PInputs[0]); | |
122 // NN = mxGetNumberOfElements(PInputs[0]); | |
123 SZ = mxGetDimensions(PInputs[0]); | |
124 | |
125 // if DIM==0 (undefined), look for first dimension with more than 1 element. | |
126 for (j = 0; (DIM < 1) && (j < ND); j++) | |
127 if (SZ[j]>1) DIM = j+1; | |
128 | |
129 if (DIM < 1) DIM=1; // in case DIM is still undefined | |
130 | |
131 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output | |
132 | |
133 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size | |
134 | |
135 for (j=0; j<ND; j++) // copy size of input; | |
136 SZ2[j] = SZ[j]; | |
137 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 | |
138 SZ2[j] = 1; | |
139 | |
140 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM | |
141 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM | |
142 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements | |
143 | |
144 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; | |
145 | |
146 // get weight vector for weighted sumskipnan | |
147 if (PInputCount > 3) { | |
148 if (!mxGetNumberOfElements(PInputs[3])) | |
149 ; // empty weight vector - no weighting | |
150 else if (mxGetNumberOfElements(PInputs[3])==D2) | |
151 W = mxGetPr(PInputs[3]); | |
152 else | |
153 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); | |
154 } | |
155 | |
156 int ACC_LEVEL = 0; | |
157 { | |
158 mxArray *LEVEL = NULL; | |
159 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
160 if (!s) { | |
161 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
162 if ((D1>1) && (ACC_LEVEL>2)) | |
163 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); | |
164 } | |
165 mxDestroyArray(LEVEL); | |
166 } | |
167 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
168 | |
169 // create outputs | |
170 #define TYP mxDOUBLE_CLASS | |
171 | |
172 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
173 LOutputSum = mxGetPr(POutput[0]); | |
174 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
175 | |
176 if (POutputCount >= 2) { | |
177 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
178 LOutputCount = mxGetPr(POutput[1]); | |
179 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
180 } | |
181 if (POutputCount >= 3) { | |
182 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
183 LOutputSum2 = mxGetPr(POutput[2]); | |
184 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
185 } | |
186 mxFree(SZ2); | |
187 | |
188 if (D1*D2*D3<1) // zero size array | |
189 ; // do nothing | |
190 else if ((D1==1) && (ACC_LEVEL<1)) { | |
191 // double accuray, naive summation, error = N*2^-52 | |
192 switch (POutputCount) { | |
193 case 1: | |
194 for (l = 0; l<D3; l++) { | |
195 double count; | |
196 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
197 } | |
198 break; | |
199 case 2: | |
200 for (l = 0; l<D3; l++) { | |
201 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
202 } | |
203 break; | |
204 case 3: | |
205 for (l = 0; l<D3; l++) { | |
206 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
207 } | |
208 break; | |
209 } | |
210 } | |
211 else if ((D1==1) && (ACC_LEVEL==1)) { | |
212 // extended accuray, naive summation, error = N*2^-64 | |
213 switch (POutputCount) { | |
214 case 1: | |
215 for (l = 0; l<D3; l++) { | |
216 double count; | |
217 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
218 } | |
219 break; | |
220 case 2: | |
221 for (l = 0; l<D3; l++) { | |
222 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
223 } | |
224 break; | |
225 case 3: | |
226 for (l = 0; l<D3; l++) { | |
227 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
228 } | |
229 break; | |
230 } | |
231 } | |
232 else if ((D1==1) && (ACC_LEVEL==3)) { | |
233 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 | |
234 switch (POutputCount) { | |
235 case 1: | |
236 for (l = 0; l<D3; l++) { | |
237 double count; | |
238 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
239 } | |
240 break; | |
241 case 2: | |
242 for (l = 0; l<D3; l++) { | |
243 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
244 } | |
245 break; | |
246 case 3: | |
247 for (l = 0; l<D3; l++) { | |
248 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
249 } | |
250 break; | |
251 } | |
252 } | |
253 else if ((D1==1) && (ACC_LEVEL==2)) { | |
254 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 | |
255 switch (POutputCount) { | |
256 case 1: | |
257 for (l = 0; l<D3; l++) { | |
258 double count; | |
259 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
260 } | |
261 break; | |
262 case 2: | |
263 for (l = 0; l<D3; l++) { | |
264 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
265 } | |
266 break; | |
267 case 3: | |
268 for (l = 0; l<D3; l++) { | |
269 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
270 } | |
271 break; | |
272 } | |
273 } | |
274 else if (POutputCount <= 1) { | |
275 // OUTER LOOP: along dimensions > DIM | |
276 for (l = 0; l<D3; l++) { | |
277 ix0 = l*D1; // index for output | |
278 ix1 = ix0*D2; // index for input | |
279 for (j=0; j<D2; j++) { | |
280 // minimize cache misses | |
281 ix2 = ix0; // index for output | |
282 // Inner LOOP: along dimensions < DIM | |
283 if (W) do { | |
284 long double x = *LInput; | |
285 if (!isnan(x)) { | |
286 LongOutputSum[ix2] += W[j]*x; | |
287 } | |
288 #ifndef NO_FLAG | |
289 else | |
290 flag_isNaN = 1; | |
291 #endif | |
292 LInput++; | |
293 ix2++; | |
294 } while (ix2 != (l+1)*D1); | |
295 else do { | |
296 long double x = *LInput; | |
297 if (!isnan(x)) { | |
298 LongOutputSum[ix2] += x; | |
299 } | |
300 #ifndef NO_FLAG | |
301 else | |
302 flag_isNaN = 1; | |
303 #endif | |
304 LInput++; | |
305 ix2++; | |
306 } while (ix2 != (l+1)*D1); | |
307 } // end for (j= | |
308 | |
309 /* copy to output */ | |
310 for (j=0; j<D1; j++) { | |
311 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
312 } | |
313 } | |
314 } | |
315 | |
316 else if (POutputCount == 2) { | |
317 // OUTER LOOP: along dimensions > DIM | |
318 for (l = 0; l<D3; l++) { | |
319 ix0 = l*D1; | |
320 ix1 = ix0*D2; // index for input | |
321 for (j=0; j<D2; j++) { | |
322 // minimize cache misses | |
323 ix2 = ix0; // index for output | |
324 // Inner LOOP: along dimensions < DIM | |
325 if (W) do { | |
326 long double x = *LInput; | |
327 if (!isnan(x)) { | |
328 LongOutputCount[ix2] += W[j]; | |
329 LongOutputSum[ix2] += W[j]*x; | |
330 } | |
331 #ifndef NO_FLAG | |
332 else | |
333 flag_isNaN = 1; | |
334 #endif | |
335 LInput++; | |
336 ix2++; | |
337 } while (ix2 != (l+1)*D1); | |
338 else do { | |
339 long double x = *LInput; | |
340 if (!isnan(x)) { | |
341 LongOutputCount[ix2] += 1.0; | |
342 LongOutputSum[ix2] += x; | |
343 } | |
344 #ifndef NO_FLAG | |
345 else | |
346 flag_isNaN = 1; | |
347 #endif | |
348 LInput++; | |
349 ix2++; | |
350 } while (ix2 != (l+1)*D1); | |
351 } // end for (j= | |
352 | |
353 /* copy to output */ | |
354 for (j=0; j<D1; j++) { | |
355 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
356 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
357 } // end else | |
358 } | |
359 } | |
360 | |
361 else if (POutputCount == 3) { | |
362 // OUTER LOOP: along dimensions > DIM | |
363 for (l = 0; l<D3; l++) { | |
364 ix0 = l*D1; | |
365 ix1 = ix0*D2; // index for input | |
366 for (j=0; j<D2; j++) { | |
367 // minimize cache misses | |
368 ix2 = ix0; // index for output | |
369 // Inner LOOP: along dimensions < DIM | |
370 if (W) do { | |
371 long double x = *LInput; | |
372 if (!isnan(x)) { | |
373 LongOutputCount[ix2] += W[j]; | |
374 double t = W[j]*x; | |
375 LongOutputSum[ix2] += t; | |
376 LongOutputSum2[ix2] += x*t; | |
377 } | |
378 #ifndef NO_FLAG | |
379 else | |
380 flag_isNaN = 1; | |
381 #endif | |
382 LInput++; | |
383 ix2++; | |
384 } while (ix2 != (l+1)*D1); | |
385 else do { | |
386 long double x = *LInput; | |
387 if (!isnan(x)) { | |
388 LongOutputCount[ix2] += 1.0; | |
389 LongOutputSum[ix2] += x; | |
390 LongOutputSum2[ix2] += x*x; | |
391 } | |
392 #ifndef NO_FLAG | |
393 else | |
394 flag_isNaN = 1; | |
395 #endif | |
396 LInput++; | |
397 ix2++; | |
398 } while (ix2 != (l+1)*D1); | |
399 } // end for (j= | |
400 | |
401 /* copy to output */ | |
402 for (j=0; j<D1; j++) { | |
403 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
404 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
405 LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; | |
406 } | |
407 } | |
408 } | |
409 if (LongOutputSum) mxFree(LongOutputSum); | |
410 if (LongOutputCount) mxFree(LongOutputCount); | |
411 if (LongOutputSum2) mxFree(LongOutputSum2); | |
412 | |
413 #ifndef NO_FLAG | |
414 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
415 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
416 this requires that the input argument is a non-empty variable | |
417 */ | |
418 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
419 // set FLAG_NANS_OCCURED | |
420 switch (mxGetClassID(PInputs[2])) { | |
421 case mxLOGICAL_CLASS: | |
422 case mxCHAR_CLASS: | |
423 case mxINT8_CLASS: | |
424 case mxUINT8_CLASS: | |
425 *(uint8_t*)mxGetData(PInputs[2]) = 1; | |
426 break; | |
427 case mxDOUBLE_CLASS: | |
428 *(double*)mxGetData(PInputs[2]) = 1.0; | |
429 break; | |
430 case mxSINGLE_CLASS: | |
431 *(float*)mxGetData(PInputs[2]) = 1.0; | |
432 break; | |
433 case mxINT16_CLASS: | |
434 case mxUINT16_CLASS: | |
435 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
436 break; | |
437 case mxINT32_CLASS: | |
438 case mxUINT32_CLASS: | |
439 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
440 break; | |
441 case mxINT64_CLASS: | |
442 case mxUINT64_CLASS: | |
443 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
444 break; | |
445 case mxFUNCTION_CLASS: | |
446 case mxUNKNOWN_CLASS: | |
447 case mxCELL_CLASS: | |
448 case mxSTRUCT_CLASS: | |
7301
485d1594d155
addresses undesired side-effect off in-place sorting of data
schloegl
parents:
6549
diff
changeset
|
449 default: |
6549 | 450 mexPrintf("Type of 3rd input argument not supported."); |
451 } | |
452 } | |
453 #endif | |
454 } | |
455 | |
456 | |
457 #define stride 1 | |
458 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
459 { | |
460 long double sum=0; | |
461 char flag=0; | |
462 // LOOP along dimension DIM | |
463 | |
464 double *end = data + stride*Ni; | |
465 if (W) { | |
466 // with weight vector | |
467 long double count = 0.0; | |
468 do { | |
469 long double x = *data; | |
470 if (!isnan(x)) | |
471 { | |
472 count += *W; | |
473 sum += *W*x; | |
474 } | |
475 #ifndef NO_FLAG | |
476 else | |
477 flag = 1; | |
478 #endif | |
479 | |
480 data++; // stride=1 | |
481 W++; | |
482 } | |
483 while (data < end); | |
484 *No = count; | |
485 } else { | |
486 // w/o weight vector | |
487 size_t countI = 0; | |
488 do { | |
489 long double x = *data; | |
490 if (!isnan(x)) | |
491 { | |
492 countI++; | |
493 sum += x; | |
494 } | |
495 #ifndef NO_FLAG | |
496 else | |
497 flag = 1; | |
498 #endif | |
499 data++; // stride=1 | |
500 } | |
501 while (data < end); | |
502 *No = (double)countI; | |
503 } | |
504 | |
505 #ifndef NO_FLAG | |
506 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
507 #endif | |
508 *s = sum; | |
509 | |
510 } | |
511 | |
512 | |
513 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
514 { | |
515 long double sum=0; | |
516 long double msq=0; | |
517 char flag=0; | |
518 // LOOP along dimension DIM | |
519 | |
520 double *end = data + stride*Ni; | |
521 if (W) { | |
522 // with weight vector | |
523 long double count = 0.0; | |
524 do { | |
525 long double x = *data; | |
526 if (!isnan(x)) { | |
527 count += *W; | |
528 long double t = *W*x; | |
529 sum += t; | |
530 msq += x*t; | |
531 } | |
532 #ifndef NO_FLAG | |
533 else | |
534 flag = 1; | |
535 #endif | |
536 data++; // stride=1 | |
537 W++; | |
538 } | |
539 while (data < end); | |
540 *No = count; | |
541 } else { | |
542 // w/o weight vector | |
543 size_t countI = 0; | |
544 do { | |
545 long double x = *data; | |
546 if (!isnan(x)) { | |
547 countI++; | |
548 sum += x; | |
549 msq += x*x; | |
550 } | |
551 #ifndef NO_FLAG | |
552 else | |
553 flag = 1; | |
554 #endif | |
555 data++; // stride=1 | |
556 } | |
557 while (data < end); | |
558 *No = (double)countI; | |
559 } | |
560 | |
561 #ifndef NO_FLAG | |
562 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
563 #endif | |
564 *s = sum; | |
565 *s2 = msq; | |
566 } | |
567 | |
568 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
569 { | |
570 double sum=0; | |
571 char flag=0; | |
572 // LOOP along dimension DIM | |
573 | |
574 double *end = data + stride*Ni; | |
575 if (W) { | |
576 // with weight vector | |
577 double count = 0.0; | |
578 do { | |
579 double x = *data; | |
580 if (!isnan(x)) | |
581 { | |
582 count += *W; | |
583 sum += *W*x; | |
584 } | |
585 #ifndef NO_FLAG | |
586 else | |
587 flag = 1; | |
588 #endif | |
589 | |
590 data++; // stride=1 | |
591 W++; | |
592 } | |
593 while (data < end); | |
594 *No = count; | |
595 } else { | |
596 // w/o weight vector | |
597 size_t countI = 0; | |
598 do { | |
599 double x = *data; | |
600 if (!isnan(x)) | |
601 { | |
602 countI++; | |
603 sum += x; | |
604 } | |
605 #ifndef NO_FLAG | |
606 else | |
607 flag = 1; | |
608 #endif | |
609 data++; // stride=1 | |
610 } | |
611 while (data < end); | |
612 *No = (double)countI; | |
613 } | |
614 | |
615 #ifndef NO_FLAG | |
616 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
617 #endif | |
618 *s = sum; | |
619 | |
620 } | |
621 | |
622 | |
623 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
624 { | |
625 double sum=0; | |
626 double msq=0; | |
627 char flag=0; | |
628 // LOOP along dimension DIM | |
629 | |
630 double *end = data + stride*Ni; | |
631 if (W) { | |
632 // with weight vector | |
633 double count = 0.0; | |
634 do { | |
635 double x = *data; | |
636 if (!isnan(x)) { | |
637 count += *W; | |
638 double t = *W*x; | |
639 sum += t; | |
640 msq += x*t; | |
641 } | |
642 #ifndef NO_FLAG | |
643 else | |
644 flag = 1; | |
645 #endif | |
646 data++; // stride=1 | |
647 W++; | |
648 } | |
649 while (data < end); | |
650 *No = count; | |
651 } else { | |
652 // w/o weight vector | |
653 size_t countI = 0; | |
654 do { | |
655 double x = *data; | |
656 if (!isnan(x)) { | |
657 countI++; | |
658 sum += x; | |
659 msq += x*x; | |
660 } | |
661 #ifndef NO_FLAG | |
662 else | |
663 flag = 1; | |
664 #endif | |
665 data++; // stride=1 | |
666 } | |
667 while (data < end); | |
668 *No = (double)countI; | |
669 } | |
670 | |
671 #ifndef NO_FLAG | |
672 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
673 #endif | |
674 *s = sum; | |
675 *s2 = msq; | |
676 } | |
677 | |
678 | |
679 | |
680 /*************************************** | |
681 using Kahan's summation formula [1] | |
682 this gives more accurate results while the computational effort within the loop is about 4x as high | |
683 First tests show a penalty of about 40% in terms of computational time. | |
684 | |
685 [1] David Goldberg, | |
686 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
687 ACM Computing Surveys, Vol 23, No 1, March 1991. | |
688 ****************************************/ | |
689 | |
690 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
691 { | |
692 long double sum=0; | |
693 char flag=0; | |
694 // LOOP along dimension DIM | |
695 | |
696 double *end = data + stride*Ni; | |
697 if (W) { | |
698 // with weight vector | |
699 long double count = 0.0; | |
700 long double rc=0.0, rn=0.0; | |
701 do { | |
702 long double x = *data; | |
703 long double t,y; | |
704 if (!isnan(x)) | |
705 { | |
706 //count += *W; [1] | |
707 y = *W-rn; | |
708 t = count+y; | |
709 rn= (t-count)-y; | |
710 count= t; | |
711 | |
712 //sum += *W*x; [1] | |
713 y = *W*x-rc; | |
714 t = sum+y; | |
715 rc= (t-sum)-y; | |
716 sum= t; | |
717 } | |
718 #ifndef NO_FLAG | |
719 else | |
720 flag = 1; | |
721 #endif | |
722 | |
723 data++; // stride=1 | |
724 W++; | |
725 } | |
726 while (data < end); | |
727 *No = count; | |
728 } else { | |
729 // w/o weight vector | |
730 size_t countI = 0; | |
731 long double rc=0.0; | |
732 do { | |
733 long double x = *data; | |
734 long double t,y; | |
735 if (!isnan(x)) | |
736 { | |
737 countI++; | |
738 // sum += x; [1] | |
739 y = x-rc; | |
740 t = sum+y; | |
741 rc= (t-sum)-y; | |
742 sum= t; | |
743 } | |
744 #ifndef NO_FLAG | |
745 else | |
746 flag = 1; | |
747 #endif | |
748 data++; // stride=1 | |
749 } | |
750 while (data < end); | |
751 *No = (double)countI; | |
752 } | |
753 | |
754 #ifndef NO_FLAG | |
755 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
756 #endif | |
757 *s = sum; | |
758 | |
759 } | |
760 | |
761 | |
762 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
763 { | |
764 long double sum=0; | |
765 long double msq=0; | |
766 char flag=0; | |
767 // LOOP along dimension DIM | |
768 | |
769 double *end = data + stride*Ni; | |
770 if (W) { | |
771 // with weight vector | |
772 long double count = 0.0; | |
773 long double rc=0.0, rn=0.0, rq=0.0; | |
774 do { | |
775 long double x = *data; | |
776 long double t,y; | |
777 if (!isnan(x)) { | |
778 //count += *W; [1] | |
779 y = *W-rn; | |
780 t = count+y; | |
781 rn= (t-count)-y; | |
782 count= t; | |
783 | |
784 long double w = *W*x; | |
785 //sum += *W*x; [1] | |
786 y = *W*x-rc; | |
787 t = sum+y; | |
788 rc= (t-sum)-y; | |
789 sum= t; | |
790 | |
791 // msq += x*w; | |
792 y = w*x-rq; | |
793 t = msq+y; | |
794 rq= (t-msq)-y; | |
795 msq= t; | |
796 } | |
797 #ifndef NO_FLAG | |
798 else | |
799 flag = 1; | |
800 #endif | |
801 data++; // stride=1 | |
802 W++; | |
803 } | |
804 while (data < end); | |
805 *No = count; | |
806 } else { | |
807 // w/o weight vector | |
808 size_t countI = 0; | |
809 long double rc=0.0, rq=0.0; | |
810 do { | |
811 long double x = *data; | |
812 long double t,y; | |
813 if (!isnan(x)) { | |
814 countI++; | |
815 //sum += x; [1] | |
816 y = x-rc; | |
817 t = sum+y; | |
818 rc= (t-sum)-y; | |
819 sum= t; | |
820 | |
821 // msq += x*x; | |
822 y = x*x-rq; | |
823 t = msq+y; | |
824 rq= (t-msq)-y; | |
825 msq= t; | |
826 } | |
827 #ifndef NO_FLAG | |
828 else | |
829 flag = 1; | |
830 #endif | |
831 data++; // stride=1 | |
832 } | |
833 while (data < end); | |
834 *No = (double)countI; | |
835 } | |
836 | |
837 #ifndef NO_FLAG | |
838 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
839 #endif | |
840 *s = sum; | |
841 *s2 = msq; | |
842 } | |
843 | |
844 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
845 { | |
846 double sum=0; | |
847 char flag=0; | |
848 // LOOP along dimension DIM | |
849 | |
850 double *end = data + stride*Ni; | |
851 if (W) { | |
852 // with weight vector | |
853 double count = 0.0; | |
854 double rc=0.0, rn=0.0; | |
855 do { | |
856 double x = *data; | |
857 double t,y; | |
858 if (!isnan(x)) | |
859 { | |
860 //count += *W; [1] | |
861 y = *W-rn; | |
862 t = count+y; | |
863 rn= (t-count)-y; | |
864 count= t; | |
865 | |
866 //sum += *W*x; [1] | |
867 y = *W*x-rc; | |
868 t = sum+y; | |
869 rc= (t-sum)-y; | |
870 sum= t; | |
871 } | |
872 #ifndef NO_FLAG | |
873 else | |
874 flag = 1; | |
875 #endif | |
876 | |
877 data++; // stride=1 | |
878 W++; | |
879 } | |
880 while (data < end); | |
881 *No = count; | |
882 } else { | |
883 // w/o weight vector | |
884 size_t countI = 0; | |
885 double rc=0.0; | |
886 do { | |
887 double x = *data; | |
888 double t,y; | |
889 if (!isnan(x)) | |
890 { | |
891 countI++; | |
892 // sum += x; [1] | |
893 y = x-rc; | |
894 t = sum+y; | |
895 rc= (t-sum)-y; | |
896 sum= t; | |
897 } | |
898 #ifndef NO_FLAG | |
899 else | |
900 flag = 1; | |
901 #endif | |
902 data++; // stride=1 | |
903 } | |
904 while (data < end); | |
905 *No = (double)countI; | |
906 } | |
907 | |
908 #ifndef NO_FLAG | |
909 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
910 #endif | |
911 *s = sum; | |
912 | |
913 } | |
914 | |
915 | |
916 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
917 { | |
918 double sum=0; | |
919 double msq=0; | |
920 char flag=0; | |
921 // LOOP along dimension DIM | |
922 | |
923 double *end = data + stride*Ni; | |
924 if (W) { | |
925 // with weight vector | |
926 double count = 0.0; | |
927 double rc=0.0, rn=0.0, rq=0.0; | |
928 do { | |
929 double x = *data; | |
930 double t,y; | |
931 if (!isnan(x)) { | |
932 //count += *W; [1] | |
933 y = *W-rn; | |
934 t = count+y; | |
935 rn= (t-count)-y; | |
936 count= t; | |
937 | |
938 double w = *W*x; | |
939 //sum += *W*x; [1] | |
940 y = *W*x-rc; | |
941 t = sum+y; | |
942 rc= (t-sum)-y; | |
943 sum= t; | |
944 | |
945 // msq += x*w; | |
946 y = w*x-rq; | |
947 t = msq+y; | |
948 rq= (t-msq)-y; | |
949 msq= t; | |
950 } | |
951 #ifndef NO_FLAG | |
952 else | |
953 flag = 1; | |
954 #endif | |
955 data++; // stride=1 | |
956 W++; | |
957 } | |
958 while (data < end); | |
959 *No = count; | |
960 } else { | |
961 // w/o weight vector | |
962 size_t countI = 0; | |
963 double rc=0.0, rq=0.0; | |
964 do { | |
965 double x = *data; | |
966 double t,y; | |
967 if (!isnan(x)) { | |
968 countI++; | |
969 //sum += x; [1] | |
970 y = x-rc; | |
971 t = sum+y; | |
972 rc= (t-sum)-y; | |
973 sum= t; | |
974 | |
975 // msq += x*x; | |
976 y = x*x-rq; | |
977 t = msq+y; | |
978 rq= (t-msq)-y; | |
979 msq= t; | |
980 } | |
981 #ifndef NO_FLAG | |
982 else | |
983 flag = 1; | |
984 #endif | |
985 data++; // stride=1 | |
986 } | |
987 while (data < end); | |
988 *No = (double)countI; | |
989 } | |
990 | |
991 #ifndef NO_FLAG | |
992 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
993 #endif | |
994 *s = sum; | |
995 *s2 = msq; | |
996 } | |
997 |