Mercurial > forge
annotate extra/NaN/src/sumskipnan_mex.cpp @ 12691:6d6285a2a633 octave-forge
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
author | schloegl |
---|---|
date | Sat, 12 Sep 2015 14:16:39 +0000 |
parents | f26b1170ea90 |
children | 13815b367946 |
rev | line source |
---|---|
7992 | 1 |
6549 | 2 //------------------------------------------------------------------- |
3 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // sumskipnan: sums all non-NaN values | |
21 // usage: | |
22 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); | |
23 // | |
24 // SUMSKIPNAN uses two techniques to reduce errors: | |
25 // 1) long double (80bit) instead of 64-bit double is used internally | |
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps | |
27 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). | |
28 // | |
29 // Input: | |
30 // - x data array | |
31 // - DIM (optional) dimension to sum | |
32 // - flag (optional) is actually an output argument telling whether some NaN was observed | |
33 // - W (optional) weight vector to compute weighted sum (default 1) | |
34 // | |
35 // Output: | |
36 // - o (weighted) sum along dimension DIM | |
37 // - count of valid elements | |
38 // - sums of squares | |
39 // | |
40 // | |
41 // $Id$ | |
8037 | 42 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 43 // This function is part of the NaN-toolbox |
7889 | 44 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 45 // |
46 //------------------------------------------------------------------- | |
47 | |
48 | |
49 | |
50 | |
51 #include <math.h> | |
7888 | 52 #include <stdint.h> |
6549 | 53 #include "mex.h" |
54 | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
55 /* |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
56 math.h has isnan() defined for all sizes of floating point numbers, |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
57 but c++ assumes isnan(double), causing possible conversions for float and long double |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
58 */ |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
59 #define ISNAN(a) (a!=a) |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
60 |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
61 |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
62 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
63 inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
64 inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
65 inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
66 inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
67 inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
68 inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
69 inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6549 | 70 |
71 //#define NO_FLAG | |
72 | |
73 #ifdef tmwtypes_h | |
74 #if (MX_API_VER<=0x07020000) | |
75 typedef int mwSize; | |
76 #endif | |
77 #endif | |
78 | |
79 | |
80 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
81 { | |
82 const mwSize *SZ; | |
83 double* LInput; | |
84 double* LOutputSum; | |
85 double* LOutputCount; | |
86 double* LOutputSum2; | |
87 long double* LongOutputSum = NULL; | |
88 long double* LongOutputCount = NULL; | |
89 long double* LongOutputSum2 = NULL; | |
90 double x; | |
91 double* W = NULL; // weight vector | |
92 | |
93 mwSize DIM = 0; | |
94 mwSize D1, D2, D3; // NN; // | |
95 mwSize ND, ND2; // number of dimensions: input, output | |
96 mwSize ix0, ix1, ix2; // index to input and output | |
97 mwSize j, l; // running indices | |
98 mwSize *SZ2; // size of output | |
99 char flag_isNaN = 0; | |
100 | |
101 // check for proper number of input and output arguments | |
102 if ((PInputCount <= 0) || (PInputCount > 4)) | |
103 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); | |
104 if (POutputCount > 4) | |
105 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); | |
106 | |
107 // get 1st argument | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
12555
diff
changeset
|
108 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) |
6549 | 109 LInput = mxGetPr(PInputs[0]); |
110 else | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
12555
diff
changeset
|
111 mexErrMsgTxt("First argument must be and not sparse REAL/DOUBLE."); |
6549 | 112 |
113 // get 2nd argument | |
114 if (PInputCount > 1) { | |
115 switch (mxGetNumberOfElements(PInputs[1])) { | |
116 case 0: x = 0.0; // accept empty element | |
117 break; | |
118 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); | |
119 break; | |
120 default:x = -1.0; // invalid | |
121 } | |
122 if ((x < 0) || (x > 65535) || (x != floor(x))) | |
123 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); | |
124 | |
125 DIM = (unsigned)floor(x); | |
126 } | |
127 | |
128 // get size | |
129 ND = mxGetNumberOfDimensions(PInputs[0]); | |
130 // NN = mxGetNumberOfElements(PInputs[0]); | |
131 SZ = mxGetDimensions(PInputs[0]); | |
132 | |
133 // if DIM==0 (undefined), look for first dimension with more than 1 element. | |
134 for (j = 0; (DIM < 1) && (j < ND); j++) | |
135 if (SZ[j]>1) DIM = j+1; | |
136 | |
137 if (DIM < 1) DIM=1; // in case DIM is still undefined | |
138 | |
139 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output | |
140 | |
141 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size | |
142 | |
143 for (j=0; j<ND; j++) // copy size of input; | |
144 SZ2[j] = SZ[j]; | |
145 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 | |
146 SZ2[j] = 1; | |
147 | |
148 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM | |
149 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM | |
150 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements | |
151 | |
152 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; | |
153 | |
154 // get weight vector for weighted sumskipnan | |
155 if (PInputCount > 3) { | |
156 if (!mxGetNumberOfElements(PInputs[3])) | |
157 ; // empty weight vector - no weighting | |
158 else if (mxGetNumberOfElements(PInputs[3])==D2) | |
159 W = mxGetPr(PInputs[3]); | |
160 else | |
161 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); | |
162 } | |
163 | |
164 int ACC_LEVEL = 0; | |
165 { | |
166 mxArray *LEVEL = NULL; | |
167 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
168 if (!s) { | |
169 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
170 if ((D1>1) && (ACC_LEVEL>2)) | |
171 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); | |
172 } | |
173 mxDestroyArray(LEVEL); | |
174 } | |
175 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
176 | |
177 // create outputs | |
178 #define TYP mxDOUBLE_CLASS | |
179 | |
180 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
181 LOutputSum = mxGetPr(POutput[0]); | |
182 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
183 if (POutputCount >= 2) { | |
184 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
185 LOutputCount = mxGetPr(POutput[1]); | |
186 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
187 } | |
188 if (POutputCount >= 3) { | |
189 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
190 LOutputSum2 = mxGetPr(POutput[2]); | |
191 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
192 } | |
193 mxFree(SZ2); | |
194 | |
7992 | 195 |
8232 | 196 if (!D1 || !D2 || !D3) // zero size array |
6549 | 197 ; // do nothing |
8232 | 198 else if (D1==1) { |
199 if (ACC_LEVEL<1) { | |
6549 | 200 // double accuray, naive summation, error = N*2^-52 |
201 switch (POutputCount) { | |
8875 | 202 case 0: |
6549 | 203 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
204 #pragma omp parallel for schedule(dynamic) |
6549 | 205 for (l = 0; l<D3; l++) { |
206 double count; | |
207 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 208 } |
6549 | 209 break; |
210 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
211 #pragma omp parallel for schedule(dynamic) |
6549 | 212 for (l = 0; l<D3; l++) { |
213 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 214 } |
6549 | 215 break; |
216 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
217 #pragma omp parallel for schedule(dynamic) |
6549 | 218 for (l = 0; l<D3; l++) { |
219 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 220 } |
6549 | 221 break; |
222 } | |
8232 | 223 } |
224 else if (ACC_LEVEL==1) { | |
6549 | 225 // extended accuray, naive summation, error = N*2^-64 |
226 switch (POutputCount) { | |
8875 | 227 case 0: |
6549 | 228 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
229 #pragma omp parallel for schedule(dynamic) |
6549 | 230 for (l = 0; l<D3; l++) { |
231 double count; | |
232 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 233 } |
6549 | 234 break; |
235 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
236 #pragma omp parallel for schedule(dynamic) |
6549 | 237 for (l = 0; l<D3; l++) { |
238 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 239 } |
6549 | 240 break; |
241 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
242 #pragma omp parallel for schedule(dynamic) |
6549 | 243 for (l = 0; l<D3; l++) { |
244 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 245 } |
6549 | 246 break; |
247 } | |
8232 | 248 } |
249 else if (ACC_LEVEL==3) { | |
6549 | 250 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 |
251 switch (POutputCount) { | |
8875 | 252 case 0: |
6549 | 253 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
254 #pragma omp parallel for schedule(dynamic) |
6549 | 255 for (l = 0; l<D3; l++) { |
256 double count; | |
257 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 258 } |
6549 | 259 break; |
260 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
261 #pragma omp parallel for schedule(dynamic) |
6549 | 262 for (l = 0; l<D3; l++) { |
263 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 264 } |
6549 | 265 break; |
266 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
267 #pragma omp parallel for schedule(dynamic) |
6549 | 268 for (l = 0; l<D3; l++) { |
269 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 270 } |
6549 | 271 break; |
272 } | |
8232 | 273 } |
274 else if (ACC_LEVEL==2) { | |
6549 | 275 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 |
276 switch (POutputCount) { | |
8875 | 277 case 0: |
6549 | 278 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
279 #pragma omp parallel for schedule(dynamic) |
6549 | 280 for (l = 0; l<D3; l++) { |
281 double count; | |
282 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 283 } |
6549 | 284 break; |
285 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
286 #pragma omp parallel for schedule(dynamic) |
6549 | 287 for (l = 0; l<D3; l++) { |
288 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 289 } |
6549 | 290 break; |
291 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
292 #pragma omp parallel for schedule(dynamic) |
6549 | 293 for (l = 0; l<D3; l++) { |
294 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 295 } |
6549 | 296 break; |
297 } | |
8232 | 298 } |
6549 | 299 } |
300 else if (POutputCount <= 1) { | |
301 // OUTER LOOP: along dimensions > DIM | |
7992 | 302 for (l = 0; l<D3; l++) { |
6549 | 303 ix0 = l*D1; // index for output |
304 ix1 = ix0*D2; // index for input | |
305 for (j=0; j<D2; j++) { | |
306 // minimize cache misses | |
307 ix2 = ix0; // index for output | |
308 // Inner LOOP: along dimensions < DIM | |
309 if (W) do { | |
310 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
311 if (!ISNAN(x)) { |
6549 | 312 LongOutputSum[ix2] += W[j]*x; |
313 } | |
314 #ifndef NO_FLAG | |
315 else | |
316 flag_isNaN = 1; | |
317 #endif | |
318 LInput++; | |
319 ix2++; | |
320 } while (ix2 != (l+1)*D1); | |
321 else do { | |
322 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
323 if (!ISNAN(x)) { |
6549 | 324 LongOutputSum[ix2] += x; |
325 } | |
326 #ifndef NO_FLAG | |
327 else | |
328 flag_isNaN = 1; | |
329 #endif | |
330 LInput++; | |
331 ix2++; | |
332 } while (ix2 != (l+1)*D1); | |
333 } // end for (j= | |
334 | |
335 /* copy to output */ | |
336 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
337 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
6549 | 338 } |
339 } | |
340 } | |
341 | |
342 else if (POutputCount == 2) { | |
343 // OUTER LOOP: along dimensions > DIM | |
344 for (l = 0; l<D3; l++) { | |
345 ix0 = l*D1; | |
346 ix1 = ix0*D2; // index for input | |
347 for (j=0; j<D2; j++) { | |
348 // minimize cache misses | |
12306
6eb642de94a3
update docu of TSA and NaN package: push version to 4.2.7 and 2.5.8 for TSA and NaN package, resp.
schloegl
parents:
8875
diff
changeset
|
349 ix2 = ix0; // index for output |
6549 | 350 // Inner LOOP: along dimensions < DIM |
351 if (W) do { | |
352 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
353 if (!ISNAN(x)) { |
6549 | 354 LongOutputCount[ix2] += W[j]; |
355 LongOutputSum[ix2] += W[j]*x; | |
356 } | |
357 #ifndef NO_FLAG | |
358 else | |
359 flag_isNaN = 1; | |
360 #endif | |
361 LInput++; | |
362 ix2++; | |
363 } while (ix2 != (l+1)*D1); | |
364 else do { | |
365 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
366 if (!ISNAN(x)) { |
6549 | 367 LongOutputCount[ix2] += 1.0; |
368 LongOutputSum[ix2] += x; | |
369 } | |
370 #ifndef NO_FLAG | |
371 else | |
372 flag_isNaN = 1; | |
373 #endif | |
374 LInput++; | |
375 ix2++; | |
376 } while (ix2 != (l+1)*D1); | |
377 } // end for (j= | |
378 | |
379 /* copy to output */ | |
380 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
381 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
382 LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; |
6549 | 383 } // end else |
384 } | |
385 } | |
386 | |
387 else if (POutputCount == 3) { | |
388 // OUTER LOOP: along dimensions > DIM | |
389 for (l = 0; l<D3; l++) { | |
390 ix0 = l*D1; | |
391 ix1 = ix0*D2; // index for input | |
392 for (j=0; j<D2; j++) { | |
393 // minimize cache misses | |
394 ix2 = ix0; // index for output | |
395 // Inner LOOP: along dimensions < DIM | |
396 if (W) do { | |
397 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
398 if (!ISNAN(x)) { |
6549 | 399 LongOutputCount[ix2] += W[j]; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
400 long double t = W[j]*x; |
6549 | 401 LongOutputSum[ix2] += t; |
402 LongOutputSum2[ix2] += x*t; | |
403 } | |
404 #ifndef NO_FLAG | |
405 else | |
406 flag_isNaN = 1; | |
407 #endif | |
408 LInput++; | |
409 ix2++; | |
410 } while (ix2 != (l+1)*D1); | |
411 else do { | |
412 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
413 if (!ISNAN(x)) { |
6549 | 414 LongOutputCount[ix2] += 1.0; |
415 LongOutputSum[ix2] += x; | |
416 LongOutputSum2[ix2] += x*x; | |
417 } | |
418 #ifndef NO_FLAG | |
419 else | |
420 flag_isNaN = 1; | |
421 #endif | |
422 LInput++; | |
423 ix2++; | |
424 } while (ix2 != (l+1)*D1); | |
425 } // end for (j= | |
426 | |
427 /* copy to output */ | |
428 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
429 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
430 LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
431 LOutputSum2[ix0+j] = (typeof(*LOutputSum2))LongOutputSum2[ix0+j]; |
6549 | 432 } |
433 } | |
434 } | |
7992 | 435 |
6549 | 436 if (LongOutputSum) mxFree(LongOutputSum); |
437 if (LongOutputCount) mxFree(LongOutputCount); | |
438 if (LongOutputSum2) mxFree(LongOutputSum2); | |
439 | |
440 #ifndef NO_FLAG | |
441 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
442 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
443 this requires that the input argument is a non-empty variable | |
444 */ | |
445 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
446 // set FLAG_NANS_OCCURED | |
447 switch (mxGetClassID(PInputs[2])) { | |
448 case mxLOGICAL_CLASS: | |
449 case mxCHAR_CLASS: | |
450 case mxINT8_CLASS: | |
451 case mxUINT8_CLASS: | |
452 *(uint8_t*)mxGetData(PInputs[2]) = 1; | |
453 break; | |
454 case mxDOUBLE_CLASS: | |
455 *(double*)mxGetData(PInputs[2]) = 1.0; | |
456 break; | |
457 case mxSINGLE_CLASS: | |
458 *(float*)mxGetData(PInputs[2]) = 1.0; | |
459 break; | |
460 case mxINT16_CLASS: | |
461 case mxUINT16_CLASS: | |
462 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
463 break; | |
464 case mxINT32_CLASS: | |
465 case mxUINT32_CLASS: | |
466 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
467 break; | |
468 case mxINT64_CLASS: | |
469 case mxUINT64_CLASS: | |
470 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
471 break; | |
472 case mxFUNCTION_CLASS: | |
473 case mxUNKNOWN_CLASS: | |
474 case mxCELL_CLASS: | |
475 case mxSTRUCT_CLASS: | |
7301
485d1594d155
addresses undesired side-effect off in-place sorting of data
schloegl
parents:
6549
diff
changeset
|
476 default: |
6549 | 477 mexPrintf("Type of 3rd input argument not supported."); |
478 } | |
479 } | |
480 #endif | |
481 } | |
482 | |
483 #define stride 1 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
484 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 485 { |
486 long double sum=0; | |
487 char flag=0; | |
488 // LOOP along dimension DIM | |
489 | |
490 double *end = data + stride*Ni; | |
491 if (W) { | |
492 // with weight vector | |
493 long double count = 0.0; | |
494 do { | |
495 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
496 if (!ISNAN(x)) |
6549 | 497 { |
498 count += *W; | |
499 sum += *W*x; | |
500 } | |
501 #ifndef NO_FLAG | |
502 else | |
503 flag = 1; | |
504 #endif | |
505 | |
506 data++; // stride=1 | |
507 W++; | |
508 } | |
509 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
510 *No = (typeof(*No))count; |
6549 | 511 } else { |
512 // w/o weight vector | |
513 size_t countI = 0; | |
514 do { | |
515 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
516 if (!ISNAN(x)) |
6549 | 517 { |
518 countI++; | |
519 sum += x; | |
520 } | |
521 #ifndef NO_FLAG | |
522 else | |
523 flag = 1; | |
524 #endif | |
525 data++; // stride=1 | |
526 } | |
527 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
528 *No = (typeof(*No))countI; |
6549 | 529 } |
530 | |
531 #ifndef NO_FLAG | |
532 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
533 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
534 *s = (typeof(*s))sum; |
6549 | 535 |
536 } | |
537 | |
538 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
539 inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 540 { |
541 long double sum=0; | |
542 long double msq=0; | |
543 char flag=0; | |
544 // LOOP along dimension DIM | |
545 | |
546 double *end = data + stride*Ni; | |
547 if (W) { | |
548 // with weight vector | |
549 long double count = 0.0; | |
550 do { | |
551 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
552 if (!ISNAN(x)) { |
6549 | 553 count += *W; |
554 long double t = *W*x; | |
555 sum += t; | |
556 msq += x*t; | |
557 } | |
558 #ifndef NO_FLAG | |
559 else | |
560 flag = 1; | |
561 #endif | |
562 data++; // stride=1 | |
563 W++; | |
564 } | |
565 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
566 *No = (typeof(*No))count; |
6549 | 567 } else { |
568 // w/o weight vector | |
569 size_t countI = 0; | |
570 do { | |
571 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
572 if (!ISNAN(x)) { |
6549 | 573 countI++; |
574 sum += x; | |
575 msq += x*x; | |
576 } | |
577 #ifndef NO_FLAG | |
578 else | |
579 flag = 1; | |
580 #endif | |
581 data++; // stride=1 | |
582 } | |
583 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
584 *No = (typeof(*No))countI; |
6549 | 585 } |
586 | |
587 #ifndef NO_FLAG | |
588 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
589 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
590 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
591 *s2 = (typeof(*s2))msq; |
6549 | 592 } |
593 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
594 inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 595 { |
596 double sum=0; | |
597 char flag=0; | |
598 // LOOP along dimension DIM | |
599 | |
600 double *end = data + stride*Ni; | |
601 if (W) { | |
602 // with weight vector | |
603 double count = 0.0; | |
604 do { | |
605 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
606 if (!ISNAN(x)) |
6549 | 607 { |
608 count += *W; | |
609 sum += *W*x; | |
610 } | |
611 #ifndef NO_FLAG | |
612 else | |
613 flag = 1; | |
614 #endif | |
615 | |
616 data++; // stride=1 | |
617 W++; | |
618 } | |
619 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
620 *No = (typeof(*No))count; |
6549 | 621 } else { |
622 // w/o weight vector | |
623 size_t countI = 0; | |
624 do { | |
625 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
626 if (!ISNAN(x)) |
6549 | 627 { |
628 countI++; | |
629 sum += x; | |
630 } | |
631 #ifndef NO_FLAG | |
632 else | |
633 flag = 1; | |
634 #endif | |
635 data++; // stride=1 | |
636 } | |
637 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
638 *No = (typeof(*No))countI; |
6549 | 639 } |
640 | |
641 #ifndef NO_FLAG | |
642 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
643 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
644 *s = (typeof(*s))sum; |
6549 | 645 |
646 } | |
647 | |
648 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
649 inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 650 { |
651 double sum=0; | |
652 double msq=0; | |
653 char flag=0; | |
654 // LOOP along dimension DIM | |
655 | |
656 double *end = data + stride*Ni; | |
657 if (W) { | |
658 // with weight vector | |
659 double count = 0.0; | |
660 do { | |
661 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
662 if (!ISNAN(x)) { |
6549 | 663 count += *W; |
664 double t = *W*x; | |
665 sum += t; | |
666 msq += x*t; | |
667 } | |
668 #ifndef NO_FLAG | |
669 else | |
670 flag = 1; | |
671 #endif | |
672 data++; // stride=1 | |
673 W++; | |
674 } | |
675 while (data < end); | |
676 *No = count; | |
677 } else { | |
678 // w/o weight vector | |
679 size_t countI = 0; | |
680 do { | |
681 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
682 if (!ISNAN(x)) { |
6549 | 683 countI++; |
684 sum += x; | |
685 msq += x*x; | |
686 } | |
687 #ifndef NO_FLAG | |
688 else | |
689 flag = 1; | |
690 #endif | |
691 data++; // stride=1 | |
692 } | |
693 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
694 *No = (typeof(*No))countI; |
6549 | 695 } |
696 | |
697 #ifndef NO_FLAG | |
698 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
699 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
700 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
701 *s2 = (typeof(*s2))msq; |
6549 | 702 } |
703 | |
704 | |
705 | |
706 /*************************************** | |
707 using Kahan's summation formula [1] | |
708 this gives more accurate results while the computational effort within the loop is about 4x as high | |
709 First tests show a penalty of about 40% in terms of computational time. | |
710 | |
711 [1] David Goldberg, | |
712 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
713 ACM Computing Surveys, Vol 23, No 1, March 1991. | |
714 ****************************************/ | |
715 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
716 inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 717 { |
718 long double sum=0; | |
719 char flag=0; | |
720 // LOOP along dimension DIM | |
721 | |
722 double *end = data + stride*Ni; | |
723 if (W) { | |
724 // with weight vector | |
725 long double count = 0.0; | |
726 long double rc=0.0, rn=0.0; | |
727 do { | |
728 long double x = *data; | |
729 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
730 if (!ISNAN(x)) |
6549 | 731 { |
732 //count += *W; [1] | |
733 y = *W-rn; | |
734 t = count+y; | |
735 rn= (t-count)-y; | |
736 count= t; | |
737 | |
738 //sum += *W*x; [1] | |
739 y = *W*x-rc; | |
740 t = sum+y; | |
741 rc= (t-sum)-y; | |
742 sum= t; | |
743 } | |
744 #ifndef NO_FLAG | |
745 else | |
746 flag = 1; | |
747 #endif | |
748 | |
749 data++; // stride=1 | |
750 W++; | |
751 } | |
752 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
753 *No = (typeof(*No))count; |
6549 | 754 } else { |
755 // w/o weight vector | |
756 size_t countI = 0; | |
757 long double rc=0.0; | |
758 do { | |
759 long double x = *data; | |
760 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
761 if (!ISNAN(x)) |
6549 | 762 { |
763 countI++; | |
764 // sum += x; [1] | |
765 y = x-rc; | |
766 t = sum+y; | |
767 rc= (t-sum)-y; | |
768 sum= t; | |
769 } | |
770 #ifndef NO_FLAG | |
771 else | |
772 flag = 1; | |
773 #endif | |
774 data++; // stride=1 | |
775 } | |
776 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
777 *No = (typeof(*No))countI; |
6549 | 778 } |
779 | |
780 #ifndef NO_FLAG | |
781 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
782 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
783 *s = (typeof(*s))sum; |
6549 | 784 |
785 } | |
786 | |
787 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
788 inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 789 { |
790 long double sum=0; | |
791 long double msq=0; | |
792 char flag=0; | |
793 // LOOP along dimension DIM | |
794 | |
795 double *end = data + stride*Ni; | |
796 if (W) { | |
797 // with weight vector | |
798 long double count = 0.0; | |
799 long double rc=0.0, rn=0.0, rq=0.0; | |
800 do { | |
801 long double x = *data; | |
802 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
803 if (!ISNAN(x)) { |
6549 | 804 //count += *W; [1] |
805 y = *W-rn; | |
806 t = count+y; | |
807 rn= (t-count)-y; | |
808 count= t; | |
809 | |
810 long double w = *W*x; | |
811 //sum += *W*x; [1] | |
812 y = *W*x-rc; | |
813 t = sum+y; | |
814 rc= (t-sum)-y; | |
815 sum= t; | |
816 | |
817 // msq += x*w; | |
818 y = w*x-rq; | |
819 t = msq+y; | |
820 rq= (t-msq)-y; | |
821 msq= t; | |
822 } | |
823 #ifndef NO_FLAG | |
824 else | |
825 flag = 1; | |
826 #endif | |
827 data++; // stride=1 | |
828 W++; | |
829 } | |
830 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
831 *No = (typeof(*No))count; |
6549 | 832 } else { |
833 // w/o weight vector | |
834 size_t countI = 0; | |
835 long double rc=0.0, rq=0.0; | |
836 do { | |
837 long double x = *data; | |
838 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
839 if (!ISNAN(x)) { |
6549 | 840 countI++; |
841 //sum += x; [1] | |
842 y = x-rc; | |
843 t = sum+y; | |
844 rc= (t-sum)-y; | |
845 sum= t; | |
846 | |
847 // msq += x*x; | |
848 y = x*x-rq; | |
849 t = msq+y; | |
850 rq= (t-msq)-y; | |
851 msq= t; | |
852 } | |
853 #ifndef NO_FLAG | |
854 else | |
855 flag = 1; | |
856 #endif | |
857 data++; // stride=1 | |
858 } | |
859 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
860 *No = (typeof(*No))countI; |
6549 | 861 } |
862 | |
863 #ifndef NO_FLAG | |
864 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
865 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
866 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
867 *s2 = (typeof(*s))msq; |
6549 | 868 } |
869 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
870 inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 871 { |
872 double sum=0; | |
873 char flag=0; | |
874 // LOOP along dimension DIM | |
875 | |
876 double *end = data + stride*Ni; | |
877 if (W) { | |
878 // with weight vector | |
879 double count = 0.0; | |
880 double rc=0.0, rn=0.0; | |
881 do { | |
882 double x = *data; | |
883 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
884 if (!ISNAN(x)) |
6549 | 885 { |
886 //count += *W; [1] | |
887 y = *W-rn; | |
888 t = count+y; | |
889 rn= (t-count)-y; | |
890 count= t; | |
891 | |
892 //sum += *W*x; [1] | |
893 y = *W*x-rc; | |
894 t = sum+y; | |
895 rc= (t-sum)-y; | |
896 sum= t; | |
897 } | |
898 #ifndef NO_FLAG | |
899 else | |
900 flag = 1; | |
901 #endif | |
902 | |
903 data++; // stride=1 | |
904 W++; | |
905 } | |
906 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
907 *No = (typeof(*No))count; |
6549 | 908 } else { |
909 // w/o weight vector | |
910 size_t countI = 0; | |
911 double rc=0.0; | |
912 do { | |
913 double x = *data; | |
914 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
915 if (!ISNAN(x)) |
6549 | 916 { |
917 countI++; | |
918 // sum += x; [1] | |
919 y = x-rc; | |
920 t = sum+y; | |
921 rc= (t-sum)-y; | |
922 sum= t; | |
923 } | |
924 #ifndef NO_FLAG | |
925 else | |
926 flag = 1; | |
927 #endif | |
928 data++; // stride=1 | |
929 } | |
930 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
931 *No = (typeof(*No))countI; |
6549 | 932 } |
933 | |
934 #ifndef NO_FLAG | |
935 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
936 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
937 *s = (typeof(*s))sum; |
6549 | 938 |
939 } | |
940 | |
941 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
942 inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 943 { |
944 double sum=0; | |
945 double msq=0; | |
946 char flag=0; | |
947 // LOOP along dimension DIM | |
948 | |
949 double *end = data + stride*Ni; | |
950 if (W) { | |
951 // with weight vector | |
952 double count = 0.0; | |
953 double rc=0.0, rn=0.0, rq=0.0; | |
954 do { | |
955 double x = *data; | |
956 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
957 if (!ISNAN(x)) { |
6549 | 958 //count += *W; [1] |
959 y = *W-rn; | |
960 t = count+y; | |
961 rn= (t-count)-y; | |
962 count= t; | |
963 | |
964 double w = *W*x; | |
965 //sum += *W*x; [1] | |
966 y = *W*x-rc; | |
967 t = sum+y; | |
968 rc= (t-sum)-y; | |
969 sum= t; | |
970 | |
971 // msq += x*w; | |
972 y = w*x-rq; | |
973 t = msq+y; | |
974 rq= (t-msq)-y; | |
975 msq= t; | |
976 } | |
977 #ifndef NO_FLAG | |
978 else | |
979 flag = 1; | |
980 #endif | |
981 data++; // stride=1 | |
982 W++; | |
983 } | |
984 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
985 *No = (typeof(*No))count; |
6549 | 986 } else { |
987 // w/o weight vector | |
988 size_t countI = 0; | |
989 double rc=0.0, rq=0.0; | |
990 do { | |
991 double x = *data; | |
992 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
993 if (!ISNAN(x)) { |
6549 | 994 countI++; |
995 //sum += x; [1] | |
996 y = x-rc; | |
997 t = sum+y; | |
998 rc= (t-sum)-y; | |
999 sum= t; | |
1000 | |
1001 // msq += x*x; | |
1002 y = x*x-rq; | |
1003 t = msq+y; | |
1004 rq= (t-msq)-y; | |
1005 msq= t; | |
1006 } | |
1007 #ifndef NO_FLAG | |
1008 else | |
1009 flag = 1; | |
1010 #endif | |
1011 data++; // stride=1 | |
1012 } | |
1013 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1014 *No = (typeof(*No))countI; |
6549 | 1015 } |
1016 | |
1017 #ifndef NO_FLAG | |
1018 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
1019 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1020 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1021 *s2 = (typeof(*s))msq; |
6549 | 1022 } |
1023 |