Mercurial > forge
annotate extra/NaN/src/sumskipnan_mex.cpp @ 8037:6a419bec96bb octave-forge
update contact e-mail and www address
author | schloegl |
---|---|
date | Wed, 20 Apr 2011 09:16:06 +0000 |
parents | f5040c012714 |
children | 49c1c23128ea |
rev | line source |
---|---|
7992 | 1 |
6549 | 2 //------------------------------------------------------------------- |
3 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // sumskipnan: sums all non-NaN values | |
21 // usage: | |
22 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); | |
23 // | |
24 // SUMSKIPNAN uses two techniques to reduce errors: | |
25 // 1) long double (80bit) instead of 64-bit double is used internally | |
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps | |
27 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). | |
28 // | |
29 // Input: | |
30 // - x data array | |
31 // - DIM (optional) dimension to sum | |
32 // - flag (optional) is actually an output argument telling whether some NaN was observed | |
33 // - W (optional) weight vector to compute weighted sum (default 1) | |
34 // | |
35 // Output: | |
36 // - o (weighted) sum along dimension DIM | |
37 // - count of valid elements | |
38 // - sums of squares | |
39 // | |
40 // | |
41 // $Id$ | |
8037 | 42 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 43 // This function is part of the NaN-toolbox |
7889 | 44 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 45 // |
46 //------------------------------------------------------------------- | |
47 | |
48 | |
49 | |
50 | |
51 #include <math.h> | |
7888 | 52 #include <stdint.h> |
6549 | 53 #include "mex.h" |
54 | |
55 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
56 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
57 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
58 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
59 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
60 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
61 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); | |
62 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); | |
63 | |
64 //#define NO_FLAG | |
65 | |
66 #ifdef tmwtypes_h | |
67 #if (MX_API_VER<=0x07020000) | |
68 typedef int mwSize; | |
69 #endif | |
70 #endif | |
71 | |
72 | |
73 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
74 { | |
75 const mwSize *SZ; | |
76 double* LInput; | |
77 double* LOutputSum; | |
78 double* LOutputCount; | |
79 double* LOutputSum2; | |
80 long double* LongOutputSum = NULL; | |
81 long double* LongOutputCount = NULL; | |
82 long double* LongOutputSum2 = NULL; | |
83 double x; | |
84 double* W = NULL; // weight vector | |
85 | |
86 mwSize DIM = 0; | |
87 mwSize D1, D2, D3; // NN; // | |
88 mwSize ND, ND2; // number of dimensions: input, output | |
89 mwSize ix0, ix1, ix2; // index to input and output | |
90 mwSize j, l; // running indices | |
91 mwSize *SZ2; // size of output | |
92 char flag_isNaN = 0; | |
93 | |
94 // check for proper number of input and output arguments | |
95 if ((PInputCount <= 0) || (PInputCount > 4)) | |
96 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); | |
97 if (POutputCount > 4) | |
98 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); | |
99 | |
100 // get 1st argument | |
101 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0])) | |
102 LInput = mxGetPr(PInputs[0]); | |
103 else | |
104 mexErrMsgTxt("First argument must be REAL/DOUBLE."); | |
105 | |
106 // get 2nd argument | |
107 if (PInputCount > 1) { | |
108 switch (mxGetNumberOfElements(PInputs[1])) { | |
109 case 0: x = 0.0; // accept empty element | |
110 break; | |
111 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); | |
112 break; | |
113 default:x = -1.0; // invalid | |
114 } | |
115 if ((x < 0) || (x > 65535) || (x != floor(x))) | |
116 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); | |
117 | |
118 DIM = (unsigned)floor(x); | |
119 } | |
120 | |
121 // get size | |
122 ND = mxGetNumberOfDimensions(PInputs[0]); | |
123 // NN = mxGetNumberOfElements(PInputs[0]); | |
124 SZ = mxGetDimensions(PInputs[0]); | |
125 | |
126 // if DIM==0 (undefined), look for first dimension with more than 1 element. | |
127 for (j = 0; (DIM < 1) && (j < ND); j++) | |
128 if (SZ[j]>1) DIM = j+1; | |
129 | |
130 if (DIM < 1) DIM=1; // in case DIM is still undefined | |
131 | |
132 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output | |
133 | |
134 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size | |
135 | |
136 for (j=0; j<ND; j++) // copy size of input; | |
137 SZ2[j] = SZ[j]; | |
138 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 | |
139 SZ2[j] = 1; | |
140 | |
141 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM | |
142 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM | |
143 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements | |
144 | |
145 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; | |
146 | |
147 // get weight vector for weighted sumskipnan | |
148 if (PInputCount > 3) { | |
149 if (!mxGetNumberOfElements(PInputs[3])) | |
150 ; // empty weight vector - no weighting | |
151 else if (mxGetNumberOfElements(PInputs[3])==D2) | |
152 W = mxGetPr(PInputs[3]); | |
153 else | |
154 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); | |
155 } | |
156 | |
157 int ACC_LEVEL = 0; | |
158 { | |
159 mxArray *LEVEL = NULL; | |
160 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
161 if (!s) { | |
162 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
163 if ((D1>1) && (ACC_LEVEL>2)) | |
164 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); | |
165 } | |
166 mxDestroyArray(LEVEL); | |
167 } | |
168 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
169 | |
170 // create outputs | |
171 #define TYP mxDOUBLE_CLASS | |
172 | |
173 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
174 LOutputSum = mxGetPr(POutput[0]); | |
175 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
176 if (POutputCount >= 2) { | |
177 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
178 LOutputCount = mxGetPr(POutput[1]); | |
179 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
180 } | |
181 if (POutputCount >= 3) { | |
182 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
183 LOutputSum2 = mxGetPr(POutput[2]); | |
184 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
185 } | |
186 mxFree(SZ2); | |
187 | |
7992 | 188 |
6549 | 189 if (D1*D2*D3<1) // zero size array |
190 ; // do nothing | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
191 else if ((D1==1) && (ACC_LEVEL<1)) { |
6549 | 192 // double accuray, naive summation, error = N*2^-52 |
193 switch (POutputCount) { | |
194 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
195 #pragma omp parallel for schedule(dynamic) |
6549 | 196 for (l = 0; l<D3; l++) { |
197 double count; | |
198 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 199 } |
6549 | 200 break; |
201 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
202 #pragma omp parallel for schedule(dynamic) |
6549 | 203 for (l = 0; l<D3; l++) { |
204 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 205 } |
6549 | 206 break; |
207 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
208 #pragma omp parallel for schedule(dynamic) |
6549 | 209 for (l = 0; l<D3; l++) { |
210 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 211 } |
6549 | 212 break; |
213 } | |
214 } | |
215 else if ((D1==1) && (ACC_LEVEL==1)) { | |
216 // extended accuray, naive summation, error = N*2^-64 | |
217 switch (POutputCount) { | |
218 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
219 #pragma omp parallel for schedule(dynamic) |
6549 | 220 for (l = 0; l<D3; l++) { |
221 double count; | |
222 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 223 } |
6549 | 224 break; |
225 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
226 #pragma omp parallel for schedule(dynamic) |
6549 | 227 for (l = 0; l<D3; l++) { |
228 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 229 } |
6549 | 230 break; |
231 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
232 #pragma omp parallel for schedule(dynamic) |
6549 | 233 for (l = 0; l<D3; l++) { |
234 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 235 } |
6549 | 236 break; |
237 } | |
238 } | |
239 else if ((D1==1) && (ACC_LEVEL==3)) { | |
240 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 | |
241 switch (POutputCount) { | |
242 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
243 #pragma omp parallel for schedule(dynamic) |
6549 | 244 for (l = 0; l<D3; l++) { |
245 double count; | |
246 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 247 } |
6549 | 248 break; |
249 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
250 #pragma omp parallel for schedule(dynamic) |
6549 | 251 for (l = 0; l<D3; l++) { |
252 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 253 } |
6549 | 254 break; |
255 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
256 #pragma omp parallel for schedule(dynamic) |
6549 | 257 for (l = 0; l<D3; l++) { |
258 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 259 } |
6549 | 260 break; |
261 } | |
262 } | |
263 else if ((D1==1) && (ACC_LEVEL==2)) { | |
264 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 | |
265 switch (POutputCount) { | |
266 case 1: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
267 #pragma omp parallel for schedule(dynamic) |
6549 | 268 for (l = 0; l<D3; l++) { |
269 double count; | |
270 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 271 } |
6549 | 272 break; |
273 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
274 #pragma omp parallel for schedule(dynamic) |
6549 | 275 for (l = 0; l<D3; l++) { |
276 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 277 } |
6549 | 278 break; |
279 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
280 #pragma omp parallel for schedule(dynamic) |
6549 | 281 for (l = 0; l<D3; l++) { |
282 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 283 } |
6549 | 284 break; |
285 } | |
286 } | |
287 else if (POutputCount <= 1) { | |
288 // OUTER LOOP: along dimensions > DIM | |
7992 | 289 for (l = 0; l<D3; l++) { |
6549 | 290 ix0 = l*D1; // index for output |
291 ix1 = ix0*D2; // index for input | |
292 for (j=0; j<D2; j++) { | |
293 // minimize cache misses | |
294 ix2 = ix0; // index for output | |
295 // Inner LOOP: along dimensions < DIM | |
296 if (W) do { | |
297 long double x = *LInput; | |
298 if (!isnan(x)) { | |
299 LongOutputSum[ix2] += W[j]*x; | |
300 } | |
301 #ifndef NO_FLAG | |
302 else | |
303 flag_isNaN = 1; | |
304 #endif | |
305 LInput++; | |
306 ix2++; | |
307 } while (ix2 != (l+1)*D1); | |
308 else do { | |
309 long double x = *LInput; | |
310 if (!isnan(x)) { | |
311 LongOutputSum[ix2] += x; | |
312 } | |
313 #ifndef NO_FLAG | |
314 else | |
315 flag_isNaN = 1; | |
316 #endif | |
317 LInput++; | |
318 ix2++; | |
319 } while (ix2 != (l+1)*D1); | |
320 } // end for (j= | |
321 | |
322 /* copy to output */ | |
323 for (j=0; j<D1; j++) { | |
324 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
325 } | |
326 } | |
327 } | |
328 | |
329 else if (POutputCount == 2) { | |
330 // OUTER LOOP: along dimensions > DIM | |
331 for (l = 0; l<D3; l++) { | |
332 ix0 = l*D1; | |
333 ix1 = ix0*D2; // index for input | |
334 for (j=0; j<D2; j++) { | |
335 // minimize cache misses | |
336 ix2 = ix0; // index for output | |
337 // Inner LOOP: along dimensions < DIM | |
338 if (W) do { | |
339 long double x = *LInput; | |
340 if (!isnan(x)) { | |
341 LongOutputCount[ix2] += W[j]; | |
342 LongOutputSum[ix2] += W[j]*x; | |
343 } | |
344 #ifndef NO_FLAG | |
345 else | |
346 flag_isNaN = 1; | |
347 #endif | |
348 LInput++; | |
349 ix2++; | |
350 } while (ix2 != (l+1)*D1); | |
351 else do { | |
352 long double x = *LInput; | |
353 if (!isnan(x)) { | |
354 LongOutputCount[ix2] += 1.0; | |
355 LongOutputSum[ix2] += x; | |
356 } | |
357 #ifndef NO_FLAG | |
358 else | |
359 flag_isNaN = 1; | |
360 #endif | |
361 LInput++; | |
362 ix2++; | |
363 } while (ix2 != (l+1)*D1); | |
364 } // end for (j= | |
365 | |
366 /* copy to output */ | |
367 for (j=0; j<D1; j++) { | |
368 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
369 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
370 } // end else | |
371 } | |
372 } | |
373 | |
374 else if (POutputCount == 3) { | |
375 // OUTER LOOP: along dimensions > DIM | |
376 for (l = 0; l<D3; l++) { | |
377 ix0 = l*D1; | |
378 ix1 = ix0*D2; // index for input | |
379 for (j=0; j<D2; j++) { | |
380 // minimize cache misses | |
381 ix2 = ix0; // index for output | |
382 // Inner LOOP: along dimensions < DIM | |
383 if (W) do { | |
384 long double x = *LInput; | |
385 if (!isnan(x)) { | |
386 LongOutputCount[ix2] += W[j]; | |
387 double t = W[j]*x; | |
388 LongOutputSum[ix2] += t; | |
389 LongOutputSum2[ix2] += x*t; | |
390 } | |
391 #ifndef NO_FLAG | |
392 else | |
393 flag_isNaN = 1; | |
394 #endif | |
395 LInput++; | |
396 ix2++; | |
397 } while (ix2 != (l+1)*D1); | |
398 else do { | |
399 long double x = *LInput; | |
400 if (!isnan(x)) { | |
401 LongOutputCount[ix2] += 1.0; | |
402 LongOutputSum[ix2] += x; | |
403 LongOutputSum2[ix2] += x*x; | |
404 } | |
405 #ifndef NO_FLAG | |
406 else | |
407 flag_isNaN = 1; | |
408 #endif | |
409 LInput++; | |
410 ix2++; | |
411 } while (ix2 != (l+1)*D1); | |
412 } // end for (j= | |
413 | |
414 /* copy to output */ | |
415 for (j=0; j<D1; j++) { | |
416 LOutputSum[ix0+j] = LongOutputSum[ix0+j]; | |
417 LOutputCount[ix0+j] = LongOutputCount[ix0+j]; | |
418 LOutputSum2[ix0+j] = LongOutputSum2[ix0+j]; | |
419 } | |
420 } | |
421 } | |
7992 | 422 |
6549 | 423 if (LongOutputSum) mxFree(LongOutputSum); |
424 if (LongOutputCount) mxFree(LongOutputCount); | |
425 if (LongOutputSum2) mxFree(LongOutputSum2); | |
426 | |
427 #ifndef NO_FLAG | |
428 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
429 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
430 this requires that the input argument is a non-empty variable | |
431 */ | |
432 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
433 // set FLAG_NANS_OCCURED | |
434 switch (mxGetClassID(PInputs[2])) { | |
435 case mxLOGICAL_CLASS: | |
436 case mxCHAR_CLASS: | |
437 case mxINT8_CLASS: | |
438 case mxUINT8_CLASS: | |
439 *(uint8_t*)mxGetData(PInputs[2]) = 1; | |
440 break; | |
441 case mxDOUBLE_CLASS: | |
442 *(double*)mxGetData(PInputs[2]) = 1.0; | |
443 break; | |
444 case mxSINGLE_CLASS: | |
445 *(float*)mxGetData(PInputs[2]) = 1.0; | |
446 break; | |
447 case mxINT16_CLASS: | |
448 case mxUINT16_CLASS: | |
449 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
450 break; | |
451 case mxINT32_CLASS: | |
452 case mxUINT32_CLASS: | |
453 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
454 break; | |
455 case mxINT64_CLASS: | |
456 case mxUINT64_CLASS: | |
457 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
458 break; | |
459 case mxFUNCTION_CLASS: | |
460 case mxUNKNOWN_CLASS: | |
461 case mxCELL_CLASS: | |
462 case mxSTRUCT_CLASS: | |
7301
485d1594d155
addresses undesired side-effect off in-place sorting of data
schloegl
parents:
6549
diff
changeset
|
463 default: |
6549 | 464 mexPrintf("Type of 3rd input argument not supported."); |
465 } | |
466 } | |
467 #endif | |
468 } | |
469 | |
470 #define stride 1 | |
471 inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
472 { | |
473 long double sum=0; | |
474 char flag=0; | |
475 // LOOP along dimension DIM | |
476 | |
477 double *end = data + stride*Ni; | |
478 if (W) { | |
479 // with weight vector | |
480 long double count = 0.0; | |
481 do { | |
482 long double x = *data; | |
483 if (!isnan(x)) | |
484 { | |
485 count += *W; | |
486 sum += *W*x; | |
487 } | |
488 #ifndef NO_FLAG | |
489 else | |
490 flag = 1; | |
491 #endif | |
492 | |
493 data++; // stride=1 | |
494 W++; | |
495 } | |
496 while (data < end); | |
497 *No = count; | |
498 } else { | |
499 // w/o weight vector | |
500 size_t countI = 0; | |
501 do { | |
502 long double x = *data; | |
503 if (!isnan(x)) | |
504 { | |
505 countI++; | |
506 sum += x; | |
507 } | |
508 #ifndef NO_FLAG | |
509 else | |
510 flag = 1; | |
511 #endif | |
512 data++; // stride=1 | |
513 } | |
514 while (data < end); | |
515 *No = (double)countI; | |
516 } | |
517 | |
518 #ifndef NO_FLAG | |
519 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
520 #endif | |
521 *s = sum; | |
522 | |
523 } | |
524 | |
525 | |
526 inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
527 { | |
528 long double sum=0; | |
529 long double msq=0; | |
530 char flag=0; | |
531 // LOOP along dimension DIM | |
532 | |
533 double *end = data + stride*Ni; | |
534 if (W) { | |
535 // with weight vector | |
536 long double count = 0.0; | |
537 do { | |
538 long double x = *data; | |
539 if (!isnan(x)) { | |
540 count += *W; | |
541 long double t = *W*x; | |
542 sum += t; | |
543 msq += x*t; | |
544 } | |
545 #ifndef NO_FLAG | |
546 else | |
547 flag = 1; | |
548 #endif | |
549 data++; // stride=1 | |
550 W++; | |
551 } | |
552 while (data < end); | |
553 *No = count; | |
554 } else { | |
555 // w/o weight vector | |
556 size_t countI = 0; | |
557 do { | |
558 long double x = *data; | |
559 if (!isnan(x)) { | |
560 countI++; | |
561 sum += x; | |
562 msq += x*x; | |
563 } | |
564 #ifndef NO_FLAG | |
565 else | |
566 flag = 1; | |
567 #endif | |
568 data++; // stride=1 | |
569 } | |
570 while (data < end); | |
571 *No = (double)countI; | |
572 } | |
573 | |
574 #ifndef NO_FLAG | |
575 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
576 #endif | |
577 *s = sum; | |
578 *s2 = msq; | |
579 } | |
580 | |
581 inline int __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
582 { | |
583 double sum=0; | |
584 char flag=0; | |
585 // LOOP along dimension DIM | |
586 | |
587 double *end = data + stride*Ni; | |
588 if (W) { | |
589 // with weight vector | |
590 double count = 0.0; | |
591 do { | |
592 double x = *data; | |
593 if (!isnan(x)) | |
594 { | |
595 count += *W; | |
596 sum += *W*x; | |
597 } | |
598 #ifndef NO_FLAG | |
599 else | |
600 flag = 1; | |
601 #endif | |
602 | |
603 data++; // stride=1 | |
604 W++; | |
605 } | |
606 while (data < end); | |
607 *No = count; | |
608 } else { | |
609 // w/o weight vector | |
610 size_t countI = 0; | |
611 do { | |
612 double x = *data; | |
613 if (!isnan(x)) | |
614 { | |
615 countI++; | |
616 sum += x; | |
617 } | |
618 #ifndef NO_FLAG | |
619 else | |
620 flag = 1; | |
621 #endif | |
622 data++; // stride=1 | |
623 } | |
624 while (data < end); | |
625 *No = (double)countI; | |
626 } | |
627 | |
628 #ifndef NO_FLAG | |
629 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
630 #endif | |
631 *s = sum; | |
632 | |
633 } | |
634 | |
635 | |
636 inline int __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
637 { | |
638 double sum=0; | |
639 double msq=0; | |
640 char flag=0; | |
641 // LOOP along dimension DIM | |
642 | |
643 double *end = data + stride*Ni; | |
644 if (W) { | |
645 // with weight vector | |
646 double count = 0.0; | |
647 do { | |
648 double x = *data; | |
649 if (!isnan(x)) { | |
650 count += *W; | |
651 double t = *W*x; | |
652 sum += t; | |
653 msq += x*t; | |
654 } | |
655 #ifndef NO_FLAG | |
656 else | |
657 flag = 1; | |
658 #endif | |
659 data++; // stride=1 | |
660 W++; | |
661 } | |
662 while (data < end); | |
663 *No = count; | |
664 } else { | |
665 // w/o weight vector | |
666 size_t countI = 0; | |
667 do { | |
668 double x = *data; | |
669 if (!isnan(x)) { | |
670 countI++; | |
671 sum += x; | |
672 msq += x*x; | |
673 } | |
674 #ifndef NO_FLAG | |
675 else | |
676 flag = 1; | |
677 #endif | |
678 data++; // stride=1 | |
679 } | |
680 while (data < end); | |
681 *No = (double)countI; | |
682 } | |
683 | |
684 #ifndef NO_FLAG | |
685 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
686 #endif | |
687 *s = sum; | |
688 *s2 = msq; | |
689 } | |
690 | |
691 | |
692 | |
693 /*************************************** | |
694 using Kahan's summation formula [1] | |
695 this gives more accurate results while the computational effort within the loop is about 4x as high | |
696 First tests show a penalty of about 40% in terms of computational time. | |
697 | |
698 [1] David Goldberg, | |
699 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
700 ACM Computing Surveys, Vol 23, No 1, March 1991. | |
701 ****************************************/ | |
702 | |
703 inline int __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
704 { | |
705 long double sum=0; | |
706 char flag=0; | |
707 // LOOP along dimension DIM | |
708 | |
709 double *end = data + stride*Ni; | |
710 if (W) { | |
711 // with weight vector | |
712 long double count = 0.0; | |
713 long double rc=0.0, rn=0.0; | |
714 do { | |
715 long double x = *data; | |
716 long double t,y; | |
717 if (!isnan(x)) | |
718 { | |
719 //count += *W; [1] | |
720 y = *W-rn; | |
721 t = count+y; | |
722 rn= (t-count)-y; | |
723 count= t; | |
724 | |
725 //sum += *W*x; [1] | |
726 y = *W*x-rc; | |
727 t = sum+y; | |
728 rc= (t-sum)-y; | |
729 sum= t; | |
730 } | |
731 #ifndef NO_FLAG | |
732 else | |
733 flag = 1; | |
734 #endif | |
735 | |
736 data++; // stride=1 | |
737 W++; | |
738 } | |
739 while (data < end); | |
740 *No = count; | |
741 } else { | |
742 // w/o weight vector | |
743 size_t countI = 0; | |
744 long double rc=0.0; | |
745 do { | |
746 long double x = *data; | |
747 long double t,y; | |
748 if (!isnan(x)) | |
749 { | |
750 countI++; | |
751 // sum += x; [1] | |
752 y = x-rc; | |
753 t = sum+y; | |
754 rc= (t-sum)-y; | |
755 sum= t; | |
756 } | |
757 #ifndef NO_FLAG | |
758 else | |
759 flag = 1; | |
760 #endif | |
761 data++; // stride=1 | |
762 } | |
763 while (data < end); | |
764 *No = (double)countI; | |
765 } | |
766 | |
767 #ifndef NO_FLAG | |
768 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
769 #endif | |
770 *s = sum; | |
771 | |
772 } | |
773 | |
774 | |
775 inline int __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
776 { | |
777 long double sum=0; | |
778 long double msq=0; | |
779 char flag=0; | |
780 // LOOP along dimension DIM | |
781 | |
782 double *end = data + stride*Ni; | |
783 if (W) { | |
784 // with weight vector | |
785 long double count = 0.0; | |
786 long double rc=0.0, rn=0.0, rq=0.0; | |
787 do { | |
788 long double x = *data; | |
789 long double t,y; | |
790 if (!isnan(x)) { | |
791 //count += *W; [1] | |
792 y = *W-rn; | |
793 t = count+y; | |
794 rn= (t-count)-y; | |
795 count= t; | |
796 | |
797 long double w = *W*x; | |
798 //sum += *W*x; [1] | |
799 y = *W*x-rc; | |
800 t = sum+y; | |
801 rc= (t-sum)-y; | |
802 sum= t; | |
803 | |
804 // msq += x*w; | |
805 y = w*x-rq; | |
806 t = msq+y; | |
807 rq= (t-msq)-y; | |
808 msq= t; | |
809 } | |
810 #ifndef NO_FLAG | |
811 else | |
812 flag = 1; | |
813 #endif | |
814 data++; // stride=1 | |
815 W++; | |
816 } | |
817 while (data < end); | |
818 *No = count; | |
819 } else { | |
820 // w/o weight vector | |
821 size_t countI = 0; | |
822 long double rc=0.0, rq=0.0; | |
823 do { | |
824 long double x = *data; | |
825 long double t,y; | |
826 if (!isnan(x)) { | |
827 countI++; | |
828 //sum += x; [1] | |
829 y = x-rc; | |
830 t = sum+y; | |
831 rc= (t-sum)-y; | |
832 sum= t; | |
833 | |
834 // msq += x*x; | |
835 y = x*x-rq; | |
836 t = msq+y; | |
837 rq= (t-msq)-y; | |
838 msq= t; | |
839 } | |
840 #ifndef NO_FLAG | |
841 else | |
842 flag = 1; | |
843 #endif | |
844 data++; // stride=1 | |
845 } | |
846 while (data < end); | |
847 *No = (double)countI; | |
848 } | |
849 | |
850 #ifndef NO_FLAG | |
851 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
852 #endif | |
853 *s = sum; | |
854 *s2 = msq; | |
855 } | |
856 | |
857 inline int __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) | |
858 { | |
859 double sum=0; | |
860 char flag=0; | |
861 // LOOP along dimension DIM | |
862 | |
863 double *end = data + stride*Ni; | |
864 if (W) { | |
865 // with weight vector | |
866 double count = 0.0; | |
867 double rc=0.0, rn=0.0; | |
868 do { | |
869 double x = *data; | |
870 double t,y; | |
871 if (!isnan(x)) | |
872 { | |
873 //count += *W; [1] | |
874 y = *W-rn; | |
875 t = count+y; | |
876 rn= (t-count)-y; | |
877 count= t; | |
878 | |
879 //sum += *W*x; [1] | |
880 y = *W*x-rc; | |
881 t = sum+y; | |
882 rc= (t-sum)-y; | |
883 sum= t; | |
884 } | |
885 #ifndef NO_FLAG | |
886 else | |
887 flag = 1; | |
888 #endif | |
889 | |
890 data++; // stride=1 | |
891 W++; | |
892 } | |
893 while (data < end); | |
894 *No = count; | |
895 } else { | |
896 // w/o weight vector | |
897 size_t countI = 0; | |
898 double rc=0.0; | |
899 do { | |
900 double x = *data; | |
901 double t,y; | |
902 if (!isnan(x)) | |
903 { | |
904 countI++; | |
905 // sum += x; [1] | |
906 y = x-rc; | |
907 t = sum+y; | |
908 rc= (t-sum)-y; | |
909 sum= t; | |
910 } | |
911 #ifndef NO_FLAG | |
912 else | |
913 flag = 1; | |
914 #endif | |
915 data++; // stride=1 | |
916 } | |
917 while (data < end); | |
918 *No = (double)countI; | |
919 } | |
920 | |
921 #ifndef NO_FLAG | |
922 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
923 #endif | |
924 *s = sum; | |
925 | |
926 } | |
927 | |
928 | |
929 inline int __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) | |
930 { | |
931 double sum=0; | |
932 double msq=0; | |
933 char flag=0; | |
934 // LOOP along dimension DIM | |
935 | |
936 double *end = data + stride*Ni; | |
937 if (W) { | |
938 // with weight vector | |
939 double count = 0.0; | |
940 double rc=0.0, rn=0.0, rq=0.0; | |
941 do { | |
942 double x = *data; | |
943 double t,y; | |
944 if (!isnan(x)) { | |
945 //count += *W; [1] | |
946 y = *W-rn; | |
947 t = count+y; | |
948 rn= (t-count)-y; | |
949 count= t; | |
950 | |
951 double w = *W*x; | |
952 //sum += *W*x; [1] | |
953 y = *W*x-rc; | |
954 t = sum+y; | |
955 rc= (t-sum)-y; | |
956 sum= t; | |
957 | |
958 // msq += x*w; | |
959 y = w*x-rq; | |
960 t = msq+y; | |
961 rq= (t-msq)-y; | |
962 msq= t; | |
963 } | |
964 #ifndef NO_FLAG | |
965 else | |
966 flag = 1; | |
967 #endif | |
968 data++; // stride=1 | |
969 W++; | |
970 } | |
971 while (data < end); | |
972 *No = count; | |
973 } else { | |
974 // w/o weight vector | |
975 size_t countI = 0; | |
976 double rc=0.0, rq=0.0; | |
977 do { | |
978 double x = *data; | |
979 double t,y; | |
980 if (!isnan(x)) { | |
981 countI++; | |
982 //sum += x; [1] | |
983 y = x-rc; | |
984 t = sum+y; | |
985 rc= (t-sum)-y; | |
986 sum= t; | |
987 | |
988 // msq += x*x; | |
989 y = x*x-rq; | |
990 t = msq+y; | |
991 rq= (t-msq)-y; | |
992 msq= t; | |
993 } | |
994 #ifndef NO_FLAG | |
995 else | |
996 flag = 1; | |
997 #endif | |
998 data++; // stride=1 | |
999 } | |
1000 while (data < end); | |
1001 *No = (double)countI; | |
1002 } | |
1003 | |
1004 #ifndef NO_FLAG | |
1005 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
1006 #endif | |
1007 *s = sum; | |
1008 *s2 = msq; | |
1009 } | |
1010 |