Mercurial > forge
annotate extra/NaN/src/sumskipnan_mex.cpp @ 12702:29b7963bf748 octave-forge
define typeof() when missing; support for clang compiler added
author | schloegl |
---|---|
date | Tue, 22 Dec 2015 10:28:53 +0000 |
parents | 13815b367946 |
children |
rev | line source |
---|---|
7992 | 1 |
6549 | 2 //------------------------------------------------------------------- |
3 // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // sumskipnan: sums all non-NaN values | |
21 // usage: | |
22 // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); | |
23 // | |
24 // SUMSKIPNAN uses two techniques to reduce errors: | |
25 // 1) long double (80bit) instead of 64-bit double is used internally | |
26 // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps | |
27 // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). | |
28 // | |
29 // Input: | |
30 // - x data array | |
31 // - DIM (optional) dimension to sum | |
32 // - flag (optional) is actually an output argument telling whether some NaN was observed | |
33 // - W (optional) weight vector to compute weighted sum (default 1) | |
34 // | |
35 // Output: | |
36 // - o (weighted) sum along dimension DIM | |
37 // - count of valid elements | |
38 // - sums of squares | |
39 // | |
40 // | |
41 // $Id$ | |
8037 | 42 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 43 // This function is part of the NaN-toolbox |
7889 | 44 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 45 // |
46 //------------------------------------------------------------------- | |
47 | |
48 | |
49 | |
50 | |
51 #include <math.h> | |
7888 | 52 #include <stdint.h> |
6549 | 53 #include "mex.h" |
54 | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
55 /* |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
56 math.h has isnan() defined for all sizes of floating point numbers, |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
57 but c++ assumes isnan(double), causing possible conversions for float and long double |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
58 */ |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
59 #define ISNAN(a) (a!=a) |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
60 |
12702
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12692
diff
changeset
|
61 #ifndef typeof |
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12692
diff
changeset
|
62 #define typeof __typeof__ |
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12692
diff
changeset
|
63 #endif |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
64 |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
65 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
66 inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
67 inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
68 inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
69 inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
70 inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
71 inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); |
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
72 inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); |
6549 | 73 |
74 //#define NO_FLAG | |
75 | |
76 #ifdef tmwtypes_h | |
77 #if (MX_API_VER<=0x07020000) | |
78 typedef int mwSize; | |
79 #endif | |
80 #endif | |
81 | |
82 | |
83 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
84 { | |
85 const mwSize *SZ; | |
86 double* LInput; | |
87 double* LOutputSum; | |
88 double* LOutputCount; | |
89 double* LOutputSum2; | |
90 long double* LongOutputSum = NULL; | |
91 long double* LongOutputCount = NULL; | |
92 long double* LongOutputSum2 = NULL; | |
93 double x; | |
94 double* W = NULL; // weight vector | |
95 | |
12692
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
96 size_t DIM = 0; |
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
97 size_t D1, D2, D3; // NN; // |
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
98 size_t ND, ND2; // number of dimensions: input, output |
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
99 size_t ix0, ix1, ix2; // index to input and output |
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
100 size_t j, l; // running indices |
13815b367946
use size_t instead of mwSize/mwIndex whenever possible
schloegl
parents:
12691
diff
changeset
|
101 mwIndex *SZ2; // size of output |
6549 | 102 char flag_isNaN = 0; |
103 | |
104 // check for proper number of input and output arguments | |
105 if ((PInputCount <= 0) || (PInputCount > 4)) | |
106 mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); | |
107 if (POutputCount > 4) | |
108 mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); | |
109 | |
110 // get 1st argument | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
12555
diff
changeset
|
111 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) |
6549 | 112 LInput = mxGetPr(PInputs[0]); |
113 else | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
12555
diff
changeset
|
114 mexErrMsgTxt("First argument must be and not sparse REAL/DOUBLE."); |
6549 | 115 |
116 // get 2nd argument | |
117 if (PInputCount > 1) { | |
118 switch (mxGetNumberOfElements(PInputs[1])) { | |
119 case 0: x = 0.0; // accept empty element | |
120 break; | |
121 case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); | |
122 break; | |
123 default:x = -1.0; // invalid | |
124 } | |
125 if ((x < 0) || (x > 65535) || (x != floor(x))) | |
126 mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); | |
127 | |
128 DIM = (unsigned)floor(x); | |
129 } | |
130 | |
131 // get size | |
132 ND = mxGetNumberOfDimensions(PInputs[0]); | |
133 // NN = mxGetNumberOfElements(PInputs[0]); | |
134 SZ = mxGetDimensions(PInputs[0]); | |
135 | |
136 // if DIM==0 (undefined), look for first dimension with more than 1 element. | |
137 for (j = 0; (DIM < 1) && (j < ND); j++) | |
138 if (SZ[j]>1) DIM = j+1; | |
139 | |
140 if (DIM < 1) DIM=1; // in case DIM is still undefined | |
141 | |
142 ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output | |
143 | |
144 SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size | |
145 | |
146 for (j=0; j<ND; j++) // copy size of input; | |
147 SZ2[j] = SZ[j]; | |
148 for (j=ND; j<ND2; j++) // in case DIM > ND, add extra elements 1 | |
149 SZ2[j] = 1; | |
150 | |
151 for (j=0, D1=1; j<DIM-1; D1=D1*SZ2[j++]); // D1 is the number of elements between two elements along dimension DIM | |
152 D2 = SZ2[DIM-1]; // D2 contains the size along dimension DIM | |
153 for (j=DIM, D3=1; j<ND; D3=D3*SZ2[j++]); // D3 is the number of blocks containing D1*D2 elements | |
154 | |
155 SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; | |
156 | |
157 // get weight vector for weighted sumskipnan | |
158 if (PInputCount > 3) { | |
159 if (!mxGetNumberOfElements(PInputs[3])) | |
160 ; // empty weight vector - no weighting | |
161 else if (mxGetNumberOfElements(PInputs[3])==D2) | |
162 W = mxGetPr(PInputs[3]); | |
163 else | |
164 mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); | |
165 } | |
166 | |
167 int ACC_LEVEL = 0; | |
168 { | |
169 mxArray *LEVEL = NULL; | |
170 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
171 if (!s) { | |
172 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
173 if ((D1>1) && (ACC_LEVEL>2)) | |
174 mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); | |
175 } | |
176 mxDestroyArray(LEVEL); | |
177 } | |
178 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
179 | |
180 // create outputs | |
181 #define TYP mxDOUBLE_CLASS | |
182 | |
183 POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
184 LOutputSum = mxGetPr(POutput[0]); | |
185 if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
186 if (POutputCount >= 2) { | |
187 POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
188 LOutputCount = mxGetPr(POutput[1]); | |
189 if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
190 } | |
191 if (POutputCount >= 3) { | |
192 POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); | |
193 LOutputSum2 = mxGetPr(POutput[2]); | |
194 if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); | |
195 } | |
196 mxFree(SZ2); | |
197 | |
7992 | 198 |
8232 | 199 if (!D1 || !D2 || !D3) // zero size array |
6549 | 200 ; // do nothing |
8232 | 201 else if (D1==1) { |
202 if (ACC_LEVEL<1) { | |
6549 | 203 // double accuray, naive summation, error = N*2^-52 |
204 switch (POutputCount) { | |
8875 | 205 case 0: |
6549 | 206 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
207 #pragma omp parallel for schedule(dynamic) |
6549 | 208 for (l = 0; l<D3; l++) { |
209 double count; | |
210 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 211 } |
6549 | 212 break; |
213 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
214 #pragma omp parallel for schedule(dynamic) |
6549 | 215 for (l = 0; l<D3; l++) { |
216 __sumskipnan2wr__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 217 } |
6549 | 218 break; |
219 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
220 #pragma omp parallel for schedule(dynamic) |
6549 | 221 for (l = 0; l<D3; l++) { |
222 __sumskipnan3wr__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 223 } |
6549 | 224 break; |
225 } | |
8232 | 226 } |
227 else if (ACC_LEVEL==1) { | |
6549 | 228 // extended accuray, naive summation, error = N*2^-64 |
229 switch (POutputCount) { | |
8875 | 230 case 0: |
6549 | 231 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
232 #pragma omp parallel for schedule(dynamic) |
6549 | 233 for (l = 0; l<D3; l++) { |
234 double count; | |
235 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 236 } |
6549 | 237 break; |
238 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
239 #pragma omp parallel for schedule(dynamic) |
6549 | 240 for (l = 0; l<D3; l++) { |
241 __sumskipnan2w__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 242 } |
6549 | 243 break; |
244 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
245 #pragma omp parallel for schedule(dynamic) |
6549 | 246 for (l = 0; l<D3; l++) { |
247 __sumskipnan3w__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 248 } |
6549 | 249 break; |
250 } | |
8232 | 251 } |
252 else if (ACC_LEVEL==3) { | |
6549 | 253 // ACC_LEVEL==3: extended accuracy and Kahan Summation, error = 2^-64 |
254 switch (POutputCount) { | |
8875 | 255 case 0: |
6549 | 256 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
257 #pragma omp parallel for schedule(dynamic) |
6549 | 258 for (l = 0; l<D3; l++) { |
259 double count; | |
260 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 261 } |
6549 | 262 break; |
263 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
264 #pragma omp parallel for schedule(dynamic) |
6549 | 265 for (l = 0; l<D3; l++) { |
266 __sumskipnan2we__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 267 } |
6549 | 268 break; |
269 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
270 #pragma omp parallel for schedule(dynamic) |
6549 | 271 for (l = 0; l<D3; l++) { |
272 __sumskipnan3we__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 273 } |
6549 | 274 break; |
275 } | |
8232 | 276 } |
277 else if (ACC_LEVEL==2) { | |
6549 | 278 // ACC_LEVEL==2: double accuracy and Kahan Summation, error = 2^-52 |
279 switch (POutputCount) { | |
8875 | 280 case 0: |
6549 | 281 case 1: |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
282 #pragma omp parallel for schedule(dynamic) |
6549 | 283 for (l = 0; l<D3; l++) { |
284 double count; | |
285 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, &count, &flag_isNaN, W); | |
7992 | 286 } |
6549 | 287 break; |
288 case 2: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
289 #pragma omp parallel for schedule(dynamic) |
6549 | 290 for (l = 0; l<D3; l++) { |
291 __sumskipnan2wer__(LInput+l*D2, D2, LOutputSum+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 292 } |
6549 | 293 break; |
294 case 3: | |
8035
f5040c012714
move #PRAGMA OMP PARALLEL into #PRAGMA OMP PARALLEL FOR - this fixes SegFaulting on ML7.11
schloegl
parents:
7992
diff
changeset
|
295 #pragma omp parallel for schedule(dynamic) |
6549 | 296 for (l = 0; l<D3; l++) { |
297 __sumskipnan3wer__(LInput+l*D2, D2, LOutputSum+l, LOutputSum2+l, LOutputCount+l, &flag_isNaN, W); | |
7992 | 298 } |
6549 | 299 break; |
300 } | |
8232 | 301 } |
6549 | 302 } |
303 else if (POutputCount <= 1) { | |
304 // OUTER LOOP: along dimensions > DIM | |
7992 | 305 for (l = 0; l<D3; l++) { |
6549 | 306 ix0 = l*D1; // index for output |
307 ix1 = ix0*D2; // index for input | |
308 for (j=0; j<D2; j++) { | |
309 // minimize cache misses | |
310 ix2 = ix0; // index for output | |
311 // Inner LOOP: along dimensions < DIM | |
312 if (W) do { | |
313 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
314 if (!ISNAN(x)) { |
6549 | 315 LongOutputSum[ix2] += W[j]*x; |
316 } | |
317 #ifndef NO_FLAG | |
318 else | |
319 flag_isNaN = 1; | |
320 #endif | |
321 LInput++; | |
322 ix2++; | |
323 } while (ix2 != (l+1)*D1); | |
324 else do { | |
325 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
326 if (!ISNAN(x)) { |
6549 | 327 LongOutputSum[ix2] += x; |
328 } | |
329 #ifndef NO_FLAG | |
330 else | |
331 flag_isNaN = 1; | |
332 #endif | |
333 LInput++; | |
334 ix2++; | |
335 } while (ix2 != (l+1)*D1); | |
336 } // end for (j= | |
337 | |
338 /* copy to output */ | |
339 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
340 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
6549 | 341 } |
342 } | |
343 } | |
344 | |
345 else if (POutputCount == 2) { | |
346 // OUTER LOOP: along dimensions > DIM | |
347 for (l = 0; l<D3; l++) { | |
348 ix0 = l*D1; | |
349 ix1 = ix0*D2; // index for input | |
350 for (j=0; j<D2; j++) { | |
351 // minimize cache misses | |
12306
6eb642de94a3
update docu of TSA and NaN package: push version to 4.2.7 and 2.5.8 for TSA and NaN package, resp.
schloegl
parents:
8875
diff
changeset
|
352 ix2 = ix0; // index for output |
6549 | 353 // Inner LOOP: along dimensions < DIM |
354 if (W) do { | |
355 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
356 if (!ISNAN(x)) { |
6549 | 357 LongOutputCount[ix2] += W[j]; |
358 LongOutputSum[ix2] += W[j]*x; | |
359 } | |
360 #ifndef NO_FLAG | |
361 else | |
362 flag_isNaN = 1; | |
363 #endif | |
364 LInput++; | |
365 ix2++; | |
366 } while (ix2 != (l+1)*D1); | |
367 else do { | |
368 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
369 if (!ISNAN(x)) { |
6549 | 370 LongOutputCount[ix2] += 1.0; |
371 LongOutputSum[ix2] += x; | |
372 } | |
373 #ifndef NO_FLAG | |
374 else | |
375 flag_isNaN = 1; | |
376 #endif | |
377 LInput++; | |
378 ix2++; | |
379 } while (ix2 != (l+1)*D1); | |
380 } // end for (j= | |
381 | |
382 /* copy to output */ | |
383 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
384 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
385 LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; |
6549 | 386 } // end else |
387 } | |
388 } | |
389 | |
390 else if (POutputCount == 3) { | |
391 // OUTER LOOP: along dimensions > DIM | |
392 for (l = 0; l<D3; l++) { | |
393 ix0 = l*D1; | |
394 ix1 = ix0*D2; // index for input | |
395 for (j=0; j<D2; j++) { | |
396 // minimize cache misses | |
397 ix2 = ix0; // index for output | |
398 // Inner LOOP: along dimensions < DIM | |
399 if (W) do { | |
400 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
401 if (!ISNAN(x)) { |
6549 | 402 LongOutputCount[ix2] += W[j]; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
403 long double t = W[j]*x; |
6549 | 404 LongOutputSum[ix2] += t; |
405 LongOutputSum2[ix2] += x*t; | |
406 } | |
407 #ifndef NO_FLAG | |
408 else | |
409 flag_isNaN = 1; | |
410 #endif | |
411 LInput++; | |
412 ix2++; | |
413 } while (ix2 != (l+1)*D1); | |
414 else do { | |
415 long double x = *LInput; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
416 if (!ISNAN(x)) { |
6549 | 417 LongOutputCount[ix2] += 1.0; |
418 LongOutputSum[ix2] += x; | |
419 LongOutputSum2[ix2] += x*x; | |
420 } | |
421 #ifndef NO_FLAG | |
422 else | |
423 flag_isNaN = 1; | |
424 #endif | |
425 LInput++; | |
426 ix2++; | |
427 } while (ix2 != (l+1)*D1); | |
428 } // end for (j= | |
429 | |
430 /* copy to output */ | |
431 for (j=0; j<D1; j++) { | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
432 LOutputSum[ix0+j] = (typeof(*LOutputSum))LongOutputSum[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
433 LOutputCount[ix0+j] = (typeof(*LOutputCount))LongOutputCount[ix0+j]; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
434 LOutputSum2[ix0+j] = (typeof(*LOutputSum2))LongOutputSum2[ix0+j]; |
6549 | 435 } |
436 } | |
437 } | |
7992 | 438 |
6549 | 439 if (LongOutputSum) mxFree(LongOutputSum); |
440 if (LongOutputCount) mxFree(LongOutputCount); | |
441 if (LongOutputSum2) mxFree(LongOutputSum2); | |
442 | |
443 #ifndef NO_FLAG | |
444 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
445 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
446 this requires that the input argument is a non-empty variable | |
447 */ | |
448 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
449 // set FLAG_NANS_OCCURED | |
450 switch (mxGetClassID(PInputs[2])) { | |
451 case mxLOGICAL_CLASS: | |
452 case mxCHAR_CLASS: | |
453 case mxINT8_CLASS: | |
454 case mxUINT8_CLASS: | |
455 *(uint8_t*)mxGetData(PInputs[2]) = 1; | |
456 break; | |
457 case mxDOUBLE_CLASS: | |
458 *(double*)mxGetData(PInputs[2]) = 1.0; | |
459 break; | |
460 case mxSINGLE_CLASS: | |
461 *(float*)mxGetData(PInputs[2]) = 1.0; | |
462 break; | |
463 case mxINT16_CLASS: | |
464 case mxUINT16_CLASS: | |
465 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
466 break; | |
467 case mxINT32_CLASS: | |
468 case mxUINT32_CLASS: | |
469 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
470 break; | |
471 case mxINT64_CLASS: | |
472 case mxUINT64_CLASS: | |
473 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
474 break; | |
475 case mxFUNCTION_CLASS: | |
476 case mxUNKNOWN_CLASS: | |
477 case mxCELL_CLASS: | |
478 case mxSTRUCT_CLASS: | |
7301
485d1594d155
addresses undesired side-effect off in-place sorting of data
schloegl
parents:
6549
diff
changeset
|
479 default: |
6549 | 480 mexPrintf("Type of 3rd input argument not supported."); |
481 } | |
482 } | |
483 #endif | |
484 } | |
485 | |
486 #define stride 1 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
487 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 488 { |
489 long double sum=0; | |
490 char flag=0; | |
491 // LOOP along dimension DIM | |
492 | |
493 double *end = data + stride*Ni; | |
494 if (W) { | |
495 // with weight vector | |
496 long double count = 0.0; | |
497 do { | |
498 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
499 if (!ISNAN(x)) |
6549 | 500 { |
501 count += *W; | |
502 sum += *W*x; | |
503 } | |
504 #ifndef NO_FLAG | |
505 else | |
506 flag = 1; | |
507 #endif | |
508 | |
509 data++; // stride=1 | |
510 W++; | |
511 } | |
512 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
513 *No = (typeof(*No))count; |
6549 | 514 } else { |
515 // w/o weight vector | |
516 size_t countI = 0; | |
517 do { | |
518 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
519 if (!ISNAN(x)) |
6549 | 520 { |
521 countI++; | |
522 sum += x; | |
523 } | |
524 #ifndef NO_FLAG | |
525 else | |
526 flag = 1; | |
527 #endif | |
528 data++; // stride=1 | |
529 } | |
530 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
531 *No = (typeof(*No))countI; |
6549 | 532 } |
533 | |
534 #ifndef NO_FLAG | |
535 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
536 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
537 *s = (typeof(*s))sum; |
6549 | 538 |
539 } | |
540 | |
541 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
542 inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 543 { |
544 long double sum=0; | |
545 long double msq=0; | |
546 char flag=0; | |
547 // LOOP along dimension DIM | |
548 | |
549 double *end = data + stride*Ni; | |
550 if (W) { | |
551 // with weight vector | |
552 long double count = 0.0; | |
553 do { | |
554 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
555 if (!ISNAN(x)) { |
6549 | 556 count += *W; |
557 long double t = *W*x; | |
558 sum += t; | |
559 msq += x*t; | |
560 } | |
561 #ifndef NO_FLAG | |
562 else | |
563 flag = 1; | |
564 #endif | |
565 data++; // stride=1 | |
566 W++; | |
567 } | |
568 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
569 *No = (typeof(*No))count; |
6549 | 570 } else { |
571 // w/o weight vector | |
572 size_t countI = 0; | |
573 do { | |
574 long double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
575 if (!ISNAN(x)) { |
6549 | 576 countI++; |
577 sum += x; | |
578 msq += x*x; | |
579 } | |
580 #ifndef NO_FLAG | |
581 else | |
582 flag = 1; | |
583 #endif | |
584 data++; // stride=1 | |
585 } | |
586 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
587 *No = (typeof(*No))countI; |
6549 | 588 } |
589 | |
590 #ifndef NO_FLAG | |
591 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
592 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
593 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
594 *s2 = (typeof(*s2))msq; |
6549 | 595 } |
596 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
597 inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 598 { |
599 double sum=0; | |
600 char flag=0; | |
601 // LOOP along dimension DIM | |
602 | |
603 double *end = data + stride*Ni; | |
604 if (W) { | |
605 // with weight vector | |
606 double count = 0.0; | |
607 do { | |
608 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
609 if (!ISNAN(x)) |
6549 | 610 { |
611 count += *W; | |
612 sum += *W*x; | |
613 } | |
614 #ifndef NO_FLAG | |
615 else | |
616 flag = 1; | |
617 #endif | |
618 | |
619 data++; // stride=1 | |
620 W++; | |
621 } | |
622 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
623 *No = (typeof(*No))count; |
6549 | 624 } else { |
625 // w/o weight vector | |
626 size_t countI = 0; | |
627 do { | |
628 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
629 if (!ISNAN(x)) |
6549 | 630 { |
631 countI++; | |
632 sum += x; | |
633 } | |
634 #ifndef NO_FLAG | |
635 else | |
636 flag = 1; | |
637 #endif | |
638 data++; // stride=1 | |
639 } | |
640 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
641 *No = (typeof(*No))countI; |
6549 | 642 } |
643 | |
644 #ifndef NO_FLAG | |
645 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
646 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
647 *s = (typeof(*s))sum; |
6549 | 648 |
649 } | |
650 | |
651 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
652 inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 653 { |
654 double sum=0; | |
655 double msq=0; | |
656 char flag=0; | |
657 // LOOP along dimension DIM | |
658 | |
659 double *end = data + stride*Ni; | |
660 if (W) { | |
661 // with weight vector | |
662 double count = 0.0; | |
663 do { | |
664 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
665 if (!ISNAN(x)) { |
6549 | 666 count += *W; |
667 double t = *W*x; | |
668 sum += t; | |
669 msq += x*t; | |
670 } | |
671 #ifndef NO_FLAG | |
672 else | |
673 flag = 1; | |
674 #endif | |
675 data++; // stride=1 | |
676 W++; | |
677 } | |
678 while (data < end); | |
679 *No = count; | |
680 } else { | |
681 // w/o weight vector | |
682 size_t countI = 0; | |
683 do { | |
684 double x = *data; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
685 if (!ISNAN(x)) { |
6549 | 686 countI++; |
687 sum += x; | |
688 msq += x*x; | |
689 } | |
690 #ifndef NO_FLAG | |
691 else | |
692 flag = 1; | |
693 #endif | |
694 data++; // stride=1 | |
695 } | |
696 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
697 *No = (typeof(*No))countI; |
6549 | 698 } |
699 | |
700 #ifndef NO_FLAG | |
701 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
702 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
703 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
704 *s2 = (typeof(*s2))msq; |
6549 | 705 } |
706 | |
707 | |
708 | |
709 /*************************************** | |
710 using Kahan's summation formula [1] | |
711 this gives more accurate results while the computational effort within the loop is about 4x as high | |
712 First tests show a penalty of about 40% in terms of computational time. | |
713 | |
714 [1] David Goldberg, | |
715 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
716 ACM Computing Surveys, Vol 23, No 1, March 1991. | |
717 ****************************************/ | |
718 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
719 inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 720 { |
721 long double sum=0; | |
722 char flag=0; | |
723 // LOOP along dimension DIM | |
724 | |
725 double *end = data + stride*Ni; | |
726 if (W) { | |
727 // with weight vector | |
728 long double count = 0.0; | |
729 long double rc=0.0, rn=0.0; | |
730 do { | |
731 long double x = *data; | |
732 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
733 if (!ISNAN(x)) |
6549 | 734 { |
735 //count += *W; [1] | |
736 y = *W-rn; | |
737 t = count+y; | |
738 rn= (t-count)-y; | |
739 count= t; | |
740 | |
741 //sum += *W*x; [1] | |
742 y = *W*x-rc; | |
743 t = sum+y; | |
744 rc= (t-sum)-y; | |
745 sum= t; | |
746 } | |
747 #ifndef NO_FLAG | |
748 else | |
749 flag = 1; | |
750 #endif | |
751 | |
752 data++; // stride=1 | |
753 W++; | |
754 } | |
755 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
756 *No = (typeof(*No))count; |
6549 | 757 } else { |
758 // w/o weight vector | |
759 size_t countI = 0; | |
760 long double rc=0.0; | |
761 do { | |
762 long double x = *data; | |
763 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
764 if (!ISNAN(x)) |
6549 | 765 { |
766 countI++; | |
767 // sum += x; [1] | |
768 y = x-rc; | |
769 t = sum+y; | |
770 rc= (t-sum)-y; | |
771 sum= t; | |
772 } | |
773 #ifndef NO_FLAG | |
774 else | |
775 flag = 1; | |
776 #endif | |
777 data++; // stride=1 | |
778 } | |
779 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
780 *No = (typeof(*No))countI; |
6549 | 781 } |
782 | |
783 #ifndef NO_FLAG | |
784 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
785 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
786 *s = (typeof(*s))sum; |
6549 | 787 |
788 } | |
789 | |
790 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
791 inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 792 { |
793 long double sum=0; | |
794 long double msq=0; | |
795 char flag=0; | |
796 // LOOP along dimension DIM | |
797 | |
798 double *end = data + stride*Ni; | |
799 if (W) { | |
800 // with weight vector | |
801 long double count = 0.0; | |
802 long double rc=0.0, rn=0.0, rq=0.0; | |
803 do { | |
804 long double x = *data; | |
805 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
806 if (!ISNAN(x)) { |
6549 | 807 //count += *W; [1] |
808 y = *W-rn; | |
809 t = count+y; | |
810 rn= (t-count)-y; | |
811 count= t; | |
812 | |
813 long double w = *W*x; | |
814 //sum += *W*x; [1] | |
815 y = *W*x-rc; | |
816 t = sum+y; | |
817 rc= (t-sum)-y; | |
818 sum= t; | |
819 | |
820 // msq += x*w; | |
821 y = w*x-rq; | |
822 t = msq+y; | |
823 rq= (t-msq)-y; | |
824 msq= t; | |
825 } | |
826 #ifndef NO_FLAG | |
827 else | |
828 flag = 1; | |
829 #endif | |
830 data++; // stride=1 | |
831 W++; | |
832 } | |
833 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
834 *No = (typeof(*No))count; |
6549 | 835 } else { |
836 // w/o weight vector | |
837 size_t countI = 0; | |
838 long double rc=0.0, rq=0.0; | |
839 do { | |
840 long double x = *data; | |
841 long double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
842 if (!ISNAN(x)) { |
6549 | 843 countI++; |
844 //sum += x; [1] | |
845 y = x-rc; | |
846 t = sum+y; | |
847 rc= (t-sum)-y; | |
848 sum= t; | |
849 | |
850 // msq += x*x; | |
851 y = x*x-rq; | |
852 t = msq+y; | |
853 rq= (t-msq)-y; | |
854 msq= t; | |
855 } | |
856 #ifndef NO_FLAG | |
857 else | |
858 flag = 1; | |
859 #endif | |
860 data++; // stride=1 | |
861 } | |
862 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
863 *No = (typeof(*No))countI; |
6549 | 864 } |
865 | |
866 #ifndef NO_FLAG | |
867 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
868 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
869 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
870 *s2 = (typeof(*s))msq; |
6549 | 871 } |
872 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
873 inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) |
6549 | 874 { |
875 double sum=0; | |
876 char flag=0; | |
877 // LOOP along dimension DIM | |
878 | |
879 double *end = data + stride*Ni; | |
880 if (W) { | |
881 // with weight vector | |
882 double count = 0.0; | |
883 double rc=0.0, rn=0.0; | |
884 do { | |
885 double x = *data; | |
886 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
887 if (!ISNAN(x)) |
6549 | 888 { |
889 //count += *W; [1] | |
890 y = *W-rn; | |
891 t = count+y; | |
892 rn= (t-count)-y; | |
893 count= t; | |
894 | |
895 //sum += *W*x; [1] | |
896 y = *W*x-rc; | |
897 t = sum+y; | |
898 rc= (t-sum)-y; | |
899 sum= t; | |
900 } | |
901 #ifndef NO_FLAG | |
902 else | |
903 flag = 1; | |
904 #endif | |
905 | |
906 data++; // stride=1 | |
907 W++; | |
908 } | |
909 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
910 *No = (typeof(*No))count; |
6549 | 911 } else { |
912 // w/o weight vector | |
913 size_t countI = 0; | |
914 double rc=0.0; | |
915 do { | |
916 double x = *data; | |
917 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
918 if (!ISNAN(x)) |
6549 | 919 { |
920 countI++; | |
921 // sum += x; [1] | |
922 y = x-rc; | |
923 t = sum+y; | |
924 rc= (t-sum)-y; | |
925 sum= t; | |
926 } | |
927 #ifndef NO_FLAG | |
928 else | |
929 flag = 1; | |
930 #endif | |
931 data++; // stride=1 | |
932 } | |
933 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
934 *No = (typeof(*No))countI; |
6549 | 935 } |
936 | |
937 #ifndef NO_FLAG | |
938 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
939 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
940 *s = (typeof(*s))sum; |
6549 | 941 |
942 } | |
943 | |
944 | |
12555
6dea7ba11ac7
address compiler warnings on functions with no return argument
schloegl
parents:
12306
diff
changeset
|
945 inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) |
6549 | 946 { |
947 double sum=0; | |
948 double msq=0; | |
949 char flag=0; | |
950 // LOOP along dimension DIM | |
951 | |
952 double *end = data + stride*Ni; | |
953 if (W) { | |
954 // with weight vector | |
955 double count = 0.0; | |
956 double rc=0.0, rn=0.0, rq=0.0; | |
957 do { | |
958 double x = *data; | |
959 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
960 if (!ISNAN(x)) { |
6549 | 961 //count += *W; [1] |
962 y = *W-rn; | |
963 t = count+y; | |
964 rn= (t-count)-y; | |
965 count= t; | |
966 | |
967 double w = *W*x; | |
968 //sum += *W*x; [1] | |
969 y = *W*x-rc; | |
970 t = sum+y; | |
971 rc= (t-sum)-y; | |
972 sum= t; | |
973 | |
974 // msq += x*w; | |
975 y = w*x-rq; | |
976 t = msq+y; | |
977 rq= (t-msq)-y; | |
978 msq= t; | |
979 } | |
980 #ifndef NO_FLAG | |
981 else | |
982 flag = 1; | |
983 #endif | |
984 data++; // stride=1 | |
985 W++; | |
986 } | |
987 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
988 *No = (typeof(*No))count; |
6549 | 989 } else { |
990 // w/o weight vector | |
991 size_t countI = 0; | |
992 double rc=0.0, rq=0.0; | |
993 do { | |
994 double x = *data; | |
995 double t,y; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
996 if (!ISNAN(x)) { |
6549 | 997 countI++; |
998 //sum += x; [1] | |
999 y = x-rc; | |
1000 t = sum+y; | |
1001 rc= (t-sum)-y; | |
1002 sum= t; | |
1003 | |
1004 // msq += x*x; | |
1005 y = x*x-rq; | |
1006 t = msq+y; | |
1007 rq= (t-msq)-y; | |
1008 msq= t; | |
1009 } | |
1010 #ifndef NO_FLAG | |
1011 else | |
1012 flag = 1; | |
1013 #endif | |
1014 data++; // stride=1 | |
1015 } | |
1016 while (data < end); | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1017 *No = (typeof(*No))countI; |
6549 | 1018 } |
1019 | |
1020 #ifndef NO_FLAG | |
1021 if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; | |
1022 #endif | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1023 *s = (typeof(*s))sum; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
1024 *s2 = (typeof(*s))msq; |
6549 | 1025 } |
1026 |