Mercurial > forge
annotate extra/NaN/src/covm_mex.cpp @ 12702:29b7963bf748 octave-forge
define typeof() when missing; support for clang compiler added
author | schloegl |
---|---|
date | Tue, 22 Dec 2015 10:28:53 +0000 |
parents | 79e7259c6ff1 |
children |
rev | line source |
---|---|
6549 | 1 /* |
2 //------------------------------------------------------------------- | |
3 // C-MEX implementation of COVM - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // covm: in-product of matrices, NaN are skipped. | |
21 // usage: | |
22 // [cc,nn] = covm_mex(X,Y,flag,W); | |
23 // | |
24 // Input: | |
25 // - X: | |
26 // - Y: [optional], if empty, Y=X; | |
27 // - flag: if not empty, it is set to 1 if some NaN was observed | |
28 // - W: weight vector to compute weighted correlation | |
29 // | |
30 // Output: | |
31 // - CC = X' * sparse(diag(W)) * Y while NaN's are skipped | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
32 // - NN = real(~ISNAN(X)')*sparse(diag(W))*real(~ISNAN(Y)) count of valid (non-NaN) elements |
6549 | 33 // computed more efficiently |
34 // | |
35 // $Id$ | |
8037 | 36 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 37 // This function is part of the NaN-toolbox |
7889 | 38 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 39 // |
40 //------------------------------------------------------------------- | |
41 */ | |
42 | |
6585 | 43 #ifdef __GNUC__ |
7888 | 44 #include <stdint.h> |
6585 | 45 #endif |
6549 | 46 #include <math.h> |
47 #include "mex.h" | |
48 | |
49 /*#define NO_FLAG*/ | |
50 | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
51 /* |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
52 math.h has isnan() defined for all sizes of floating point numbers, |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
53 but c++ assumes isnan(double), causing possible conversions for float and long double |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
54 */ |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
55 #define ISNAN(a) (a!=a) |
6549 | 56 |
12702
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12697
diff
changeset
|
57 #ifndef typeof |
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12697
diff
changeset
|
58 #define typeof __typeof__ |
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12697
diff
changeset
|
59 #endif |
29b7963bf748
define typeof() when missing; support for clang compiler added
schloegl
parents:
12697
diff
changeset
|
60 |
6549 | 61 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) |
62 { | |
7992 | 63 double *X0=NULL, *Y0=NULL, *W=NULL; |
64 double *CC; | |
65 double *NN = NULL; | |
6549 | 66 |
7992 | 67 size_t rX,cX,rY,cY; |
68 size_t i; | |
69 char flag_isNaN = 0; | |
6585 | 70 int ACC_LEVEL; |
6549 | 71 |
72 /*********** check input arguments *****************/ | |
73 | |
74 // check for proper number of input and output arguments | |
75 if ((PInputCount <= 0) || (PInputCount > 5)) { | |
76 mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); | |
77 mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); | |
78 /* | |
79 mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); | |
80 mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n"); | |
81 mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n"); | |
82 mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'*sparse(diag(W))*Y, number of rows of X and Y must match\n"); | |
83 mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'*sparse(diag(W))*X\n"); | |
84 mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n"); | |
85 mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n"); | |
86 mexPrintf("\t\t if isempty(W), all weights are 1\n"); | |
87 mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n"); | |
88 */ return; | |
89 } | |
90 if (POutputCount > 2) | |
91 mexErrMsgTxt("covm.MEX has 1 to 2 output arguments."); | |
92 | |
93 | |
94 // get 1st argument | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
8037
diff
changeset
|
95 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) |
6549 | 96 X0 = mxGetPr(PInputs[0]); |
97 else | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
8037
diff
changeset
|
98 mexErrMsgTxt("First argument must be non-sparse REAL/DOUBLE."); |
6549 | 99 rX = mxGetM(PInputs[0]); |
100 cX = mxGetN(PInputs[0]); | |
101 | |
102 // get 2nd argument | |
103 if (PInputCount > 1) { | |
104 if (!mxGetNumberOfElements(PInputs[1])) | |
105 ; // Y0 = NULL; | |
106 | |
107 else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1])) | |
108 Y0 = mxGetPr(PInputs[1]); | |
109 | |
110 else | |
111 mexErrMsgTxt("Second argument must be REAL/DOUBLE."); | |
112 } | |
113 | |
114 | |
115 // get weight vector for weighted sumskipnan | |
116 if (PInputCount > 3) { | |
117 // get 4th argument | |
7992 | 118 size_t nW = mxGetNumberOfElements(PInputs[3]); |
6549 | 119 if (!nW) |
120 ; | |
121 else if (nW == rX) | |
122 W = mxGetPr(PInputs[3]); | |
123 else | |
124 mexErrMsgTxt("number of elements in W must match numbers of rows in X"); | |
125 } | |
126 | |
6585 | 127 #ifdef __GNUC__ |
128 ACC_LEVEL = 0; | |
6549 | 129 { |
130 mxArray *LEVEL = NULL; | |
131 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
132 if (!s) { | |
133 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
134 } | |
135 mxDestroyArray(LEVEL); | |
136 } | |
137 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
6585 | 138 #endif |
6549 | 139 if (Y0==NULL) { |
140 Y0 = X0; | |
141 rY = rX; | |
142 cY = cX; | |
143 } | |
144 else { | |
145 rY = mxGetM(PInputs[1]); | |
146 cY = mxGetN(PInputs[1]); | |
147 } | |
148 if (rX != rY) | |
149 mexErrMsgTxt("number of rows in X and Y do not match"); | |
150 | |
151 /*********** create output arguments *****************/ | |
152 | |
153 POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL); | |
154 CC = mxGetPr(POutput[0]); | |
155 | |
156 if (POutputCount > 1) { | |
157 POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL); | |
158 NN = mxGetPr(POutput[1]); | |
159 } | |
160 | |
161 | |
162 /*********** compute covariance *****************/ | |
163 | |
164 #if 0 | |
165 /*------ version 1 --------------------- | |
166 this solution is slower than the alternative solution below | |
167 for transposed matrices, this might be faster. | |
168 */ | |
169 for (k=0; k<rX; k++) { | |
170 double w; | |
171 if (W) { | |
172 w = W[k]; | |
173 for (i=0; i<cX; i++) { | |
174 double x = X0[k+i*rX]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
175 if (ISNAN(x)) { |
6549 | 176 #ifndef NO_FLAG |
177 flag_isNaN = 1; | |
178 #endif | |
179 continue; | |
180 } | |
181 for (j=0; j<cY; j++) { | |
182 double y = Y0[k+j*rY]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
183 if (ISNAN(y)) { |
6549 | 184 #ifndef NO_FLAG |
185 flag_isNaN = 1; | |
186 #endif | |
187 continue; | |
188 } | |
189 CC[i+j*cX] += x*y*w; | |
190 if (NN != NULL) | |
191 NN[i+j*cX] += w; | |
192 } | |
193 } | |
194 } | |
195 else for (i=0; i<cX; i++) { | |
196 double x = X0[k+i*rX]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
197 if (ISNAN(x)) { |
6549 | 198 #ifndef NO_FLAG |
199 flag_isNaN = 1; | |
200 #endif | |
201 continue; | |
202 } | |
203 for (j=0; j<cY; j++) { | |
204 double y = Y0[k+j*rY]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
205 if (ISNAN(y)) { |
6549 | 206 #ifndef NO_FLAG |
207 flag_isNaN = 1; | |
208 #endif | |
209 continue; | |
210 } | |
211 CC[i+j*cX] += x*y; | |
212 if (NN != NULL) | |
213 NN[i+j*cX] += 1.0; | |
214 } | |
215 } | |
216 } | |
217 | |
218 #else | |
7992 | 219 |
220 #pragma omp parallel | |
221 { | |
6585 | 222 #ifdef __GNUC__ |
223 if (ACC_LEVEL == 0) | |
224 #endif | |
225 { | |
6549 | 226 /*------ version 2 --------------------- |
227 using naive summation with double accuracy [1] | |
228 */ | |
229 if ( (X0 != Y0) || (cX != cY) ) | |
230 /******** X!=Y, output is not symetric *******/ | |
231 if (W) /* weighted version */ | |
7992 | 232 #pragma omp for schedule(dynamic) nowait |
233 for (i = 0; i < cX * cY; i++) | |
234 { | |
235 double *X = X0 + (i%cX) * rX; | |
236 double *Y = Y0 + (i/cX) * rY; | |
237 double cc = 0.0; | |
238 double nw = 0.0; | |
239 size_t k; | |
6549 | 240 for (k=0; k<rX; k++) { |
241 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
242 if (ISNAN(z)) { |
6549 | 243 #ifndef NO_FLAG |
244 flag_isNaN = 1; | |
245 #endif | |
246 continue; | |
247 } | |
248 cc += z*W[k]; | |
6585 | 249 nw += W[k]; |
6549 | 250 } |
7992 | 251 CC[i] = cc; |
6549 | 252 if (NN != NULL) |
7992 | 253 NN[i] = nw; |
6549 | 254 } |
255 else /* no weights, all weights are 1 */ | |
7992 | 256 #pragma omp for schedule(dynamic) nowait |
257 for (i = 0; i < cX * cY; i++) | |
258 { | |
259 double *X = X0 + (i%cX) * rX; | |
260 double *Y = Y0 + (i/cX) * rY; | |
261 double cc = 0.0; | |
262 size_t nn = 0; | |
263 size_t k; | |
6549 | 264 for (k=0; k<rX; k++) { |
265 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
266 if (ISNAN(z)) { |
6549 | 267 #ifndef NO_FLAG |
268 flag_isNaN = 1; | |
269 #endif | |
270 continue; | |
271 } | |
272 cc += z; | |
273 nn++; | |
274 } | |
7992 | 275 CC[i] = cc; |
6549 | 276 if (NN != NULL) |
7992 | 277 NN[i] = (double)nn; |
6549 | 278 } |
279 else // if (X0==Y0) && (cX==cY) | |
280 /******** X==Y, output is symetric *******/ | |
281 if (W) /* weighted version */ | |
7992 | 282 #pragma omp for schedule(dynamic) nowait |
283 for (i = 0; i < cX * cY; i++) | |
284 { | |
285 size_t ii = i%cX; | |
286 size_t jj = i/cX; | |
287 if (ii < jj) continue; | |
288 double *X = X0 + ii * rX; | |
289 double *Y = Y0 + jj * rY; | |
290 double cc = 0.0; | |
291 double nw = 0.0; | |
292 size_t k; | |
6549 | 293 for (k=0; k<rX; k++) { |
294 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
295 if (ISNAN(z)) { |
6549 | 296 #ifndef NO_FLAG |
297 flag_isNaN = 1; | |
298 #endif | |
299 continue; | |
300 } | |
301 cc += z*W[k]; | |
6585 | 302 nw += W[k]; |
7992 | 303 } |
304 size_t j = jj + ii*cX; | |
305 CC[i] = cc; | |
306 CC[j] = cc; | |
6549 | 307 if (NN != NULL) { |
7992 | 308 NN[i] = nw; |
309 NN[j] = nw; | |
310 } | |
6549 | 311 } |
312 else /* no weights, all weights are 1 */ | |
7992 | 313 #pragma omp for schedule(dynamic) nowait |
314 for (i = 0; i < cX * cY; i++) | |
315 { | |
316 size_t ii = i%cX; | |
317 size_t jj = i/cX; | |
318 if (ii < jj) continue; | |
319 double *X = X0 + ii * rX; | |
320 double *Y = Y0 + jj * rY; | |
321 double cc = 0.0; | |
322 size_t nn = 0; | |
323 size_t k; | |
6549 | 324 for (k=0; k<rX; k++) { |
325 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
326 if (ISNAN(z)) { |
6549 | 327 #ifndef NO_FLAG |
328 flag_isNaN = 1; | |
329 #endif | |
330 continue; | |
331 } | |
332 cc += z; | |
333 nn++; | |
334 } | |
7992 | 335 size_t j = jj + ii*cX; |
336 CC[i] = cc; | |
337 CC[j] = cc; | |
6549 | 338 if (NN != NULL) { |
7992 | 339 NN[i] = (double)nn; |
340 NN[j] = (double)nn; | |
6549 | 341 } |
342 } | |
343 | |
344 } | |
6585 | 345 |
346 #ifdef __GNUC__ | |
347 | |
6549 | 348 else if (ACC_LEVEL == 1) { |
349 /*------ version 2 --------------------- | |
350 using naive summation with extended accuracy [1] | |
351 */ | |
352 if ( (X0 != Y0) || (cX != cY) ) | |
353 /******** X!=Y, output is not symetric *******/ | |
354 if (W) /* weighted version */ | |
7992 | 355 #pragma omp for schedule(dynamic) nowait |
356 for (i = 0; i < cX * cY; i++) | |
357 { | |
358 double *X = X0 + (i%cX) * rX; | |
359 double *Y = Y0 + (i/cX) * rY; | |
6549 | 360 long double cc=0.0; |
361 long double nn=0.0; | |
7992 | 362 size_t k; |
6549 | 363 for (k=0; k<rX; k++) { |
364 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
365 if (ISNAN(z)) { |
6549 | 366 #ifndef NO_FLAG |
367 flag_isNaN = 1; | |
368 #endif | |
369 continue; | |
370 } | |
371 cc += z*W[k]; | |
372 nn += W[k]; | |
373 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
374 CC[i] = (typeof(*CC))cc; |
6549 | 375 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
376 NN[i] = (typeof(*NN))nn; |
6549 | 377 } |
378 else /* no weights, all weights are 1 */ | |
7992 | 379 #pragma omp for schedule(dynamic) nowait |
380 for (i = 0; i < cX * cY; i++) | |
381 { | |
382 double *X = X0 + (i%cX) * rX; | |
383 double *Y = Y0 + (i/cX) * rY; | |
6549 | 384 long double cc=0.0; |
385 size_t nn=0; | |
7992 | 386 size_t k; |
6549 | 387 for (k=0; k<rX; k++) { |
388 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
389 if (ISNAN(z)) { |
6549 | 390 #ifndef NO_FLAG |
391 flag_isNaN = 1; | |
392 #endif | |
393 continue; | |
394 } | |
395 cc += z; | |
396 nn++; | |
397 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
398 CC[i] = (typeof(*CC))cc; |
6549 | 399 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
400 NN[i] = (typeof(*NN))nn; |
6549 | 401 } |
402 else // if (X0==Y0) && (cX==cY) | |
403 /******** X==Y, output is symetric *******/ | |
404 if (W) /* weighted version */ | |
7992 | 405 #pragma omp for schedule(dynamic) nowait |
406 for (i = 0; i < cX * cY; i++) | |
407 { | |
408 size_t ii = i%cX; | |
409 size_t jj = i/cX; | |
410 if (ii < jj) continue; | |
411 double *X = X0 + ii * rX; | |
412 double *Y = Y0 + jj * rY; | |
6549 | 413 long double cc=0.0; |
414 long double nn=0.0; | |
7992 | 415 size_t k; |
6549 | 416 for (k=0; k<rX; k++) { |
417 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
418 if (ISNAN(z)) { |
6549 | 419 #ifndef NO_FLAG |
420 flag_isNaN = 1; | |
421 #endif | |
422 continue; | |
423 } | |
424 cc += z*W[k]; | |
425 nn += W[k]; | |
426 } | |
7992 | 427 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
428 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
429 CC[j] = (typeof(*CC))cc; |
6549 | 430 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
431 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
432 NN[j] = (typeof(*NN))nn; |
6549 | 433 } |
434 } | |
435 else /* no weights, all weights are 1 */ | |
7992 | 436 #pragma omp for schedule(dynamic) nowait |
437 for (i = 0; i < cX * cY; i++) | |
438 { | |
439 size_t ii = i%cX; | |
440 size_t jj = i/cX; | |
441 if (ii < jj) continue; | |
442 double *X = X0 + ii * rX; | |
443 double *Y = Y0 + jj * rY; | |
6549 | 444 long double cc=0.0; |
445 size_t nn=0; | |
7992 | 446 size_t k; |
6549 | 447 for (k=0; k<rX; k++) { |
448 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
449 if (ISNAN(z)) { |
6549 | 450 #ifndef NO_FLAG |
451 flag_isNaN = 1; | |
452 #endif | |
453 continue; | |
454 } | |
455 cc += z; | |
456 nn++; | |
457 } | |
7992 | 458 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
459 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
460 CC[j] = (typeof(*CC))cc; |
6549 | 461 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
462 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
463 NN[j] = (typeof(*NN))nn; |
6549 | 464 } |
465 } | |
466 | |
467 } | |
468 else if (ACC_LEVEL == 3) { | |
469 /*------ version 3 --------------------- | |
470 using Kahan's summation with extended (long double) accuracy [1] | |
471 this gives more accurate results while the computational effort within the loop is about 4x as high | |
472 However, first test show an increase in computational time of only about 25 %. | |
473 | |
474 [1] David Goldberg, | |
475 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
476 ACM Computing Surveys, Vol 23, No 1, March 1991 | |
477 */ | |
478 if ( (X0 != Y0) || (cX != cY) ) | |
479 /******** X!=Y, output is not symetric *******/ | |
480 if (W) /* weighted version */ | |
7992 | 481 #pragma omp for schedule(dynamic) nowait |
482 for (i = 0; i < cX * cY; i++) | |
483 { | |
484 double *X = X0 + (i%cX) * rX; | |
485 double *Y = Y0 + (i/cX) * rY; | |
6549 | 486 long double cc=0.0; |
487 long double nn=0.0; | |
488 long double rc=0.0; | |
489 long double rn=0.0; | |
7992 | 490 size_t k; |
6549 | 491 for (k=0; k<rX; k++) { |
492 long double t,y; | |
493 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
494 if (ISNAN(z)) { |
6549 | 495 #ifndef NO_FLAG |
496 flag_isNaN = 1; | |
497 #endif | |
498 continue; | |
499 } | |
500 // cc += z*W[k]; [1] | |
501 y = z*W[k]-rc; | |
502 t = cc+y; | |
503 rc= (t-cc)-y; | |
504 cc= t; | |
505 | |
506 // nn += W[k]; [1] | |
507 y = z*W[k]-rn; | |
508 t = nn+y; | |
509 rn= (t-nn)-y; | |
510 nn= t; | |
511 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
512 CC[i] = (typeof(*CC))cc; |
6549 | 513 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
514 NN[i] = (typeof(*NN))nn; |
6549 | 515 } |
516 else /* no weights, all weights are 1 */ | |
7992 | 517 #pragma omp for schedule(dynamic) nowait |
518 for (i = 0; i < cX * cY; i++) | |
519 { | |
520 double *X = X0 + (i%cX) * rX; | |
521 double *Y = Y0 + (i/cX) * rY; | |
6549 | 522 long double cc=0.0; |
523 long double rc=0.0; | |
524 size_t nn=0; | |
7992 | 525 size_t k; |
6549 | 526 for (k=0; k<rX; k++) { |
527 long double t,y; | |
528 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
529 if (ISNAN(z)) { |
6549 | 530 #ifndef NO_FLAG |
531 flag_isNaN = 1; | |
532 #endif | |
533 continue; | |
534 } | |
535 // cc += z; [1] | |
536 y = z-rc; | |
537 t = cc+y; | |
538 rc= (t-cc)-y; | |
539 cc= t; | |
540 | |
541 nn++; | |
542 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
543 CC[i] = (typeof(*CC))cc; |
6549 | 544 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
545 NN[i] = (typeof(*NN))nn; |
6549 | 546 } |
547 else // if (X0==Y0) && (cX==cY) | |
548 /******** X==Y, output is symetric *******/ | |
549 if (W) /* weighted version */ | |
7992 | 550 #pragma omp for schedule(dynamic) nowait |
551 for (i = 0; i < cX * cY; i++) | |
552 { | |
553 size_t ii = i%cX; | |
554 size_t jj = i/cX; | |
555 if (ii < jj) continue; | |
556 double *X = X0 + ii * rX; | |
557 double *Y = Y0 + jj * rY; | |
6549 | 558 long double cc=0.0; |
559 long double nn=0.0; | |
560 long double rc=0.0; | |
561 long double rn=0.0; | |
7992 | 562 size_t k; |
6549 | 563 for (k=0; k<rX; k++) { |
564 long double t,y; | |
565 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
566 if (ISNAN(z)) { |
6549 | 567 #ifndef NO_FLAG |
568 flag_isNaN = 1; | |
569 #endif | |
570 continue; | |
571 } | |
572 // cc += z*W[k]; [1] | |
573 y = z*W[k]-rc; | |
574 t = cc+y; | |
575 rc= (t-cc)-y; | |
576 cc= t; | |
577 | |
578 // nn += W[k]; [1] | |
579 y = z*W[k]-rn; | |
580 t = nn+y; | |
581 rn= (t-nn)-y; | |
582 nn= t; | |
583 } | |
7992 | 584 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
585 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
586 CC[j] = (typeof(*CC))cc; |
6549 | 587 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
588 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
589 NN[j] = (typeof(*NN))nn; |
6549 | 590 } |
591 } | |
592 else /* no weights, all weights are 1 */ | |
7992 | 593 #pragma omp for schedule(dynamic) nowait |
594 for (i = 0; i < cX * cY; i++) | |
595 { | |
596 size_t ii = i%cX; | |
597 size_t jj = i/cX; | |
598 if (ii < jj) continue; | |
599 double *X = X0 + ii * rX; | |
600 double *Y = Y0 + jj * rY; | |
6549 | 601 long double cc=0.0; |
602 long double rc=0.0; | |
603 size_t nn=0; | |
7992 | 604 size_t k; |
6549 | 605 for (k=0; k<rX; k++) { |
606 long double t,y; | |
607 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
608 if (ISNAN(z)) { |
6549 | 609 #ifndef NO_FLAG |
610 flag_isNaN = 1; | |
611 #endif | |
612 continue; | |
613 } | |
614 // cc += z; [1] | |
615 y = z-rc; | |
616 t = cc+y; | |
617 rc= (t-cc)-y; | |
618 cc= t; | |
619 | |
620 nn++; | |
621 } | |
7992 | 622 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
623 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
624 CC[j] = (typeof(*CC))cc; |
6549 | 625 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
626 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
627 NN[j] = (typeof(*NN))nn; |
6549 | 628 } |
629 } | |
630 } | |
631 else if (ACC_LEVEL == 2) { | |
632 /*------ version 3 --------------------- | |
633 using Kahan's summation with double accuracy [1] | |
634 this gives more accurate results while the computational effort within the loop is about 4x as high | |
635 However, first test show an increase in computational time of only about 25 %. | |
636 | |
637 [1] David Goldberg, | |
638 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
639 ACM Computing Surveys, Vol 23, No 1, March 1991 | |
640 */ | |
641 if ( (X0 != Y0) || (cX != cY) ) | |
642 /******** X!=Y, output is not symetric *******/ | |
643 if (W) /* weighted version */ | |
7992 | 644 #pragma omp for schedule(dynamic) nowait |
645 for (i = 0; i < cX * cY; i++) | |
646 { | |
647 double *X = X0 + (i%cX) * rX; | |
648 double *Y = Y0 + (i/cX) * rY; | |
6549 | 649 double cc=0.0; |
650 double nn=0.0; | |
651 double rc=0.0; | |
652 double rn=0.0; | |
7992 | 653 size_t k; |
6549 | 654 for (k=0; k<rX; k++) { |
655 double t,y; | |
656 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
657 if (ISNAN(z)) { |
6549 | 658 #ifndef NO_FLAG |
659 flag_isNaN = 1; | |
660 #endif | |
661 continue; | |
662 } | |
663 // cc += z*W[k]; [1] | |
664 y = z*W[k]-rc; | |
665 t = cc+y; | |
666 rc= (t-cc)-y; | |
667 cc= t; | |
668 | |
669 // nn += W[k]; [1] | |
670 y = z*W[k]-rn; | |
671 t = nn+y; | |
672 rn= (t-nn)-y; | |
673 nn= t; | |
674 } | |
7992 | 675 CC[i] = cc; |
6549 | 676 if (NN != NULL) |
7992 | 677 NN[i] = nn; |
6549 | 678 } |
679 else /* no weights, all weights are 1 */ | |
7992 | 680 #pragma omp for schedule(dynamic) nowait |
681 for (i = 0; i < cX * cY; i++) | |
682 { | |
683 double *X = X0 + (i%cX) * rX; | |
684 double *Y = Y0 + (i/cX) * rY; | |
6549 | 685 double cc=0.0; |
686 double rc=0.0; | |
687 size_t nn=0; | |
7992 | 688 size_t k; |
6549 | 689 for (k=0; k<rX; k++) { |
690 double t,y; | |
691 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
692 if (ISNAN(z)) { |
6549 | 693 #ifndef NO_FLAG |
694 flag_isNaN = 1; | |
695 #endif | |
696 continue; | |
697 } | |
698 // cc += z; [1] | |
699 y = z-rc; | |
700 t = cc+y; | |
701 rc= (t-cc)-y; | |
702 cc= t; | |
703 | |
704 nn++; | |
705 } | |
7992 | 706 CC[i] = cc; |
6549 | 707 if (NN != NULL) |
7992 | 708 NN[i] = (double)nn; |
6549 | 709 } |
710 else // if (X0==Y0) && (cX==cY) | |
711 /******** X==Y, output is symetric *******/ | |
712 if (W) /* weighted version */ | |
7992 | 713 #pragma omp for schedule(dynamic) nowait |
714 for (i = 0; i < cX * cY; i++) | |
715 { | |
716 size_t ii = i%cX; | |
717 size_t jj = i/cX; | |
718 if (ii < jj) continue; | |
719 double *X = X0 + ii * rX; | |
720 double *Y = Y0 + jj * rY; | |
6549 | 721 double cc=0.0; |
722 double nn=0.0; | |
723 double rc=0.0; | |
724 double rn=0.0; | |
7992 | 725 size_t k; |
6549 | 726 for (k=0; k<rX; k++) { |
727 double t,y; | |
728 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
729 if (ISNAN(z)) { |
6549 | 730 #ifndef NO_FLAG |
731 flag_isNaN = 1; | |
732 #endif | |
733 continue; | |
734 } | |
735 // cc += z*W[k]; [1] | |
736 y = z*W[k]-rc; | |
737 t = cc+y; | |
738 rc= (t-cc)-y; | |
739 cc= t; | |
740 | |
741 // nn += W[k]; [1] | |
742 y = z*W[k]-rn; | |
743 t = nn+y; | |
744 rn= (t-nn)-y; | |
745 nn= t; | |
746 } | |
7992 | 747 size_t j = jj + ii*cX; |
748 CC[i] = cc; | |
749 CC[j] = cc; | |
6549 | 750 if (NN != NULL) { |
7992 | 751 NN[i] = nn; |
752 NN[j] = nn; | |
6549 | 753 } |
754 } | |
755 else /* no weights, all weights are 1 */ | |
7992 | 756 #pragma omp for schedule(dynamic) nowait |
757 for (i = 0; i < cX * cY; i++) | |
758 { | |
759 size_t ii = i%cX; | |
760 size_t jj = i/cX; | |
761 if (ii < jj) continue; | |
762 double *X = X0 + ii * rX; | |
763 double *Y = Y0 + jj * rY; | |
6549 | 764 double cc=0.0; |
765 double rc=0.0; | |
766 size_t nn=0; | |
7992 | 767 size_t k; |
6549 | 768 for (k=0; k<rX; k++) { |
769 double t,y; | |
770 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
771 if (ISNAN(z)) { |
6549 | 772 #ifndef NO_FLAG |
773 flag_isNaN = 1; | |
774 #endif | |
775 continue; | |
776 } | |
777 // cc += z; [1] | |
778 y = z-rc; | |
779 t = cc+y; | |
780 rc= (t-cc)-y; | |
781 cc= t; | |
782 | |
783 nn++; | |
784 } | |
7992 | 785 size_t j = jj + ii*cX; |
786 CC[i] = cc; | |
787 CC[j] = cc; | |
6549 | 788 if (NN != NULL) { |
7992 | 789 NN[i] = (double)nn; |
790 NN[j] = (double)nn; | |
6549 | 791 } |
792 } | |
793 } | |
6585 | 794 #endif |
7992 | 795 } // end pragma omg parallel |
796 | |
797 | |
6549 | 798 #ifndef NO_FLAG |
799 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
800 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
801 this requires that the input argument is a non-empty variable | |
802 */ | |
803 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
804 // set FLAG_NANS_OCCURED | |
805 switch (mxGetClassID(PInputs[2])) { | |
806 case mxDOUBLE_CLASS: | |
807 *(double*)mxGetData(PInputs[2]) = 1.0; | |
808 break; | |
809 case mxSINGLE_CLASS: | |
810 *(float*)mxGetData(PInputs[2]) = 1.0; | |
811 break; | |
6585 | 812 case mxLOGICAL_CLASS: |
813 case mxCHAR_CLASS: | |
814 case mxINT8_CLASS: | |
815 case mxUINT8_CLASS: | |
816 *(char*)mxGetData(PInputs[2]) = 1; | |
817 break; | |
818 #ifdef __GNUC__ | |
6549 | 819 case mxINT16_CLASS: |
820 case mxUINT16_CLASS: | |
821 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
822 break; | |
823 case mxINT32_CLASS: | |
824 case mxUINT32_CLASS: | |
825 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
826 break; | |
827 case mxINT64_CLASS: | |
828 case mxUINT64_CLASS: | |
829 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
830 break; | |
831 case mxFUNCTION_CLASS: | |
832 case mxUNKNOWN_CLASS: | |
833 case mxCELL_CLASS: | |
834 case mxSTRUCT_CLASS: | |
6585 | 835 #endif |
836 default: | |
12697
79e7259c6ff1
nan and tsa: Typo in MEX message - fixes bug #46568
schloegl
parents:
12691
diff
changeset
|
837 mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurrence."); |
6549 | 838 } |
839 } | |
840 #endif | |
841 #endif | |
842 } | |
843 |