Mercurial > forge
annotate extra/NaN/src/covm_mex.cpp @ 12691:6d6285a2a633 octave-forge
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
author | schloegl |
---|---|
date | Sat, 12 Sep 2015 14:16:39 +0000 |
parents | f26b1170ea90 |
children | 79e7259c6ff1 |
rev | line source |
---|---|
6549 | 1 /* |
2 //------------------------------------------------------------------- | |
3 // C-MEX implementation of COVM - this function is part of the NaN-toolbox. | |
4 // | |
5 // | |
6 // This program is free software; you can redistribute it and/or modify | |
7 // it under the terms of the GNU General Public License as published by | |
8 // the Free Software Foundation; either version 3 of the License, or | |
9 // (at your option) any later version. | |
10 // | |
11 // This program is distributed in the hope that it will be useful, | |
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 // GNU General Public License for more details. | |
15 // | |
16 // You should have received a copy of the GNU General Public License | |
17 // along with this program; if not, see <http://www.gnu.org/licenses/>. | |
18 // | |
19 // | |
20 // covm: in-product of matrices, NaN are skipped. | |
21 // usage: | |
22 // [cc,nn] = covm_mex(X,Y,flag,W); | |
23 // | |
24 // Input: | |
25 // - X: | |
26 // - Y: [optional], if empty, Y=X; | |
27 // - flag: if not empty, it is set to 1 if some NaN was observed | |
28 // - W: weight vector to compute weighted correlation | |
29 // | |
30 // Output: | |
31 // - CC = X' * sparse(diag(W)) * Y while NaN's are skipped | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
32 // - NN = real(~ISNAN(X)')*sparse(diag(W))*real(~ISNAN(Y)) count of valid (non-NaN) elements |
6549 | 33 // computed more efficiently |
34 // | |
35 // $Id$ | |
8037 | 36 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com> |
6549 | 37 // This function is part of the NaN-toolbox |
7889 | 38 // http://pub.ist.ac.at/~schloegl/matlab/NaN/ |
6549 | 39 // |
40 //------------------------------------------------------------------- | |
41 */ | |
42 | |
6585 | 43 #ifdef __GNUC__ |
7888 | 44 #include <stdint.h> |
6585 | 45 #endif |
6549 | 46 #include <math.h> |
47 #include "mex.h" | |
48 | |
49 /*#define NO_FLAG*/ | |
50 | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
51 /* |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
52 math.h has isnan() defined for all sizes of floating point numbers, |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
53 but c++ assumes isnan(double), causing possible conversions for float and long double |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
54 */ |
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
55 #define ISNAN(a) (a!=a) |
6549 | 56 |
57 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) | |
58 { | |
7992 | 59 double *X0=NULL, *Y0=NULL, *W=NULL; |
60 double *CC; | |
61 double *NN = NULL; | |
6549 | 62 |
7992 | 63 size_t rX,cX,rY,cY; |
64 size_t i; | |
65 char flag_isNaN = 0; | |
6585 | 66 int ACC_LEVEL; |
6549 | 67 |
68 /*********** check input arguments *****************/ | |
69 | |
70 // check for proper number of input and output arguments | |
71 if ((PInputCount <= 0) || (PInputCount > 5)) { | |
72 mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); | |
73 mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); | |
74 /* | |
75 mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); | |
76 mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n"); | |
77 mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n"); | |
78 mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'*sparse(diag(W))*Y, number of rows of X and Y must match\n"); | |
79 mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'*sparse(diag(W))*X\n"); | |
80 mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n"); | |
81 mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n"); | |
82 mexPrintf("\t\t if isempty(W), all weights are 1\n"); | |
83 mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n"); | |
84 */ return; | |
85 } | |
86 if (POutputCount > 2) | |
87 mexErrMsgTxt("covm.MEX has 1 to 2 output arguments."); | |
88 | |
89 | |
90 // get 1st argument | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
8037
diff
changeset
|
91 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) |
6549 | 92 X0 = mxGetPr(PInputs[0]); |
93 else | |
12640
de98e4cb9248
check for sparse matrices and and convert to full if needed
schloegl
parents:
8037
diff
changeset
|
94 mexErrMsgTxt("First argument must be non-sparse REAL/DOUBLE."); |
6549 | 95 rX = mxGetM(PInputs[0]); |
96 cX = mxGetN(PInputs[0]); | |
97 | |
98 // get 2nd argument | |
99 if (PInputCount > 1) { | |
100 if (!mxGetNumberOfElements(PInputs[1])) | |
101 ; // Y0 = NULL; | |
102 | |
103 else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1])) | |
104 Y0 = mxGetPr(PInputs[1]); | |
105 | |
106 else | |
107 mexErrMsgTxt("Second argument must be REAL/DOUBLE."); | |
108 } | |
109 | |
110 | |
111 // get weight vector for weighted sumskipnan | |
112 if (PInputCount > 3) { | |
113 // get 4th argument | |
7992 | 114 size_t nW = mxGetNumberOfElements(PInputs[3]); |
6549 | 115 if (!nW) |
116 ; | |
117 else if (nW == rX) | |
118 W = mxGetPr(PInputs[3]); | |
119 else | |
120 mexErrMsgTxt("number of elements in W must match numbers of rows in X"); | |
121 } | |
122 | |
6585 | 123 #ifdef __GNUC__ |
124 ACC_LEVEL = 0; | |
6549 | 125 { |
126 mxArray *LEVEL = NULL; | |
127 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); | |
128 if (!s) { | |
129 ACC_LEVEL = (int) mxGetScalar(LEVEL); | |
130 } | |
131 mxDestroyArray(LEVEL); | |
132 } | |
133 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); | |
6585 | 134 #endif |
6549 | 135 if (Y0==NULL) { |
136 Y0 = X0; | |
137 rY = rX; | |
138 cY = cX; | |
139 } | |
140 else { | |
141 rY = mxGetM(PInputs[1]); | |
142 cY = mxGetN(PInputs[1]); | |
143 } | |
144 if (rX != rY) | |
145 mexErrMsgTxt("number of rows in X and Y do not match"); | |
146 | |
147 /*********** create output arguments *****************/ | |
148 | |
149 POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL); | |
150 CC = mxGetPr(POutput[0]); | |
151 | |
152 if (POutputCount > 1) { | |
153 POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL); | |
154 NN = mxGetPr(POutput[1]); | |
155 } | |
156 | |
157 | |
158 /*********** compute covariance *****************/ | |
159 | |
160 #if 0 | |
161 /*------ version 1 --------------------- | |
162 this solution is slower than the alternative solution below | |
163 for transposed matrices, this might be faster. | |
164 */ | |
165 for (k=0; k<rX; k++) { | |
166 double w; | |
167 if (W) { | |
168 w = W[k]; | |
169 for (i=0; i<cX; i++) { | |
170 double x = X0[k+i*rX]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
171 if (ISNAN(x)) { |
6549 | 172 #ifndef NO_FLAG |
173 flag_isNaN = 1; | |
174 #endif | |
175 continue; | |
176 } | |
177 for (j=0; j<cY; j++) { | |
178 double y = Y0[k+j*rY]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
179 if (ISNAN(y)) { |
6549 | 180 #ifndef NO_FLAG |
181 flag_isNaN = 1; | |
182 #endif | |
183 continue; | |
184 } | |
185 CC[i+j*cX] += x*y*w; | |
186 if (NN != NULL) | |
187 NN[i+j*cX] += w; | |
188 } | |
189 } | |
190 } | |
191 else for (i=0; i<cX; i++) { | |
192 double x = X0[k+i*rX]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
193 if (ISNAN(x)) { |
6549 | 194 #ifndef NO_FLAG |
195 flag_isNaN = 1; | |
196 #endif | |
197 continue; | |
198 } | |
199 for (j=0; j<cY; j++) { | |
200 double y = Y0[k+j*rY]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
201 if (ISNAN(y)) { |
6549 | 202 #ifndef NO_FLAG |
203 flag_isNaN = 1; | |
204 #endif | |
205 continue; | |
206 } | |
207 CC[i+j*cX] += x*y; | |
208 if (NN != NULL) | |
209 NN[i+j*cX] += 1.0; | |
210 } | |
211 } | |
212 } | |
213 | |
214 #else | |
7992 | 215 |
216 #pragma omp parallel | |
217 { | |
6585 | 218 #ifdef __GNUC__ |
219 if (ACC_LEVEL == 0) | |
220 #endif | |
221 { | |
6549 | 222 /*------ version 2 --------------------- |
223 using naive summation with double accuracy [1] | |
224 */ | |
225 if ( (X0 != Y0) || (cX != cY) ) | |
226 /******** X!=Y, output is not symetric *******/ | |
227 if (W) /* weighted version */ | |
7992 | 228 #pragma omp for schedule(dynamic) nowait |
229 for (i = 0; i < cX * cY; i++) | |
230 { | |
231 double *X = X0 + (i%cX) * rX; | |
232 double *Y = Y0 + (i/cX) * rY; | |
233 double cc = 0.0; | |
234 double nw = 0.0; | |
235 size_t k; | |
6549 | 236 for (k=0; k<rX; k++) { |
237 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
238 if (ISNAN(z)) { |
6549 | 239 #ifndef NO_FLAG |
240 flag_isNaN = 1; | |
241 #endif | |
242 continue; | |
243 } | |
244 cc += z*W[k]; | |
6585 | 245 nw += W[k]; |
6549 | 246 } |
7992 | 247 CC[i] = cc; |
6549 | 248 if (NN != NULL) |
7992 | 249 NN[i] = nw; |
6549 | 250 } |
251 else /* no weights, all weights are 1 */ | |
7992 | 252 #pragma omp for schedule(dynamic) nowait |
253 for (i = 0; i < cX * cY; i++) | |
254 { | |
255 double *X = X0 + (i%cX) * rX; | |
256 double *Y = Y0 + (i/cX) * rY; | |
257 double cc = 0.0; | |
258 size_t nn = 0; | |
259 size_t k; | |
6549 | 260 for (k=0; k<rX; k++) { |
261 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
262 if (ISNAN(z)) { |
6549 | 263 #ifndef NO_FLAG |
264 flag_isNaN = 1; | |
265 #endif | |
266 continue; | |
267 } | |
268 cc += z; | |
269 nn++; | |
270 } | |
7992 | 271 CC[i] = cc; |
6549 | 272 if (NN != NULL) |
7992 | 273 NN[i] = (double)nn; |
6549 | 274 } |
275 else // if (X0==Y0) && (cX==cY) | |
276 /******** X==Y, output is symetric *******/ | |
277 if (W) /* weighted version */ | |
7992 | 278 #pragma omp for schedule(dynamic) nowait |
279 for (i = 0; i < cX * cY; i++) | |
280 { | |
281 size_t ii = i%cX; | |
282 size_t jj = i/cX; | |
283 if (ii < jj) continue; | |
284 double *X = X0 + ii * rX; | |
285 double *Y = Y0 + jj * rY; | |
286 double cc = 0.0; | |
287 double nw = 0.0; | |
288 size_t k; | |
6549 | 289 for (k=0; k<rX; k++) { |
290 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
291 if (ISNAN(z)) { |
6549 | 292 #ifndef NO_FLAG |
293 flag_isNaN = 1; | |
294 #endif | |
295 continue; | |
296 } | |
297 cc += z*W[k]; | |
6585 | 298 nw += W[k]; |
7992 | 299 } |
300 size_t j = jj + ii*cX; | |
301 CC[i] = cc; | |
302 CC[j] = cc; | |
6549 | 303 if (NN != NULL) { |
7992 | 304 NN[i] = nw; |
305 NN[j] = nw; | |
306 } | |
6549 | 307 } |
308 else /* no weights, all weights are 1 */ | |
7992 | 309 #pragma omp for schedule(dynamic) nowait |
310 for (i = 0; i < cX * cY; i++) | |
311 { | |
312 size_t ii = i%cX; | |
313 size_t jj = i/cX; | |
314 if (ii < jj) continue; | |
315 double *X = X0 + ii * rX; | |
316 double *Y = Y0 + jj * rY; | |
317 double cc = 0.0; | |
318 size_t nn = 0; | |
319 size_t k; | |
6549 | 320 for (k=0; k<rX; k++) { |
321 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
322 if (ISNAN(z)) { |
6549 | 323 #ifndef NO_FLAG |
324 flag_isNaN = 1; | |
325 #endif | |
326 continue; | |
327 } | |
328 cc += z; | |
329 nn++; | |
330 } | |
7992 | 331 size_t j = jj + ii*cX; |
332 CC[i] = cc; | |
333 CC[j] = cc; | |
6549 | 334 if (NN != NULL) { |
7992 | 335 NN[i] = (double)nn; |
336 NN[j] = (double)nn; | |
6549 | 337 } |
338 } | |
339 | |
340 } | |
6585 | 341 |
342 #ifdef __GNUC__ | |
343 | |
6549 | 344 else if (ACC_LEVEL == 1) { |
345 /*------ version 2 --------------------- | |
346 using naive summation with extended accuracy [1] | |
347 */ | |
348 if ( (X0 != Y0) || (cX != cY) ) | |
349 /******** X!=Y, output is not symetric *******/ | |
350 if (W) /* weighted version */ | |
7992 | 351 #pragma omp for schedule(dynamic) nowait |
352 for (i = 0; i < cX * cY; i++) | |
353 { | |
354 double *X = X0 + (i%cX) * rX; | |
355 double *Y = Y0 + (i/cX) * rY; | |
6549 | 356 long double cc=0.0; |
357 long double nn=0.0; | |
7992 | 358 size_t k; |
6549 | 359 for (k=0; k<rX; k++) { |
360 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
361 if (ISNAN(z)) { |
6549 | 362 #ifndef NO_FLAG |
363 flag_isNaN = 1; | |
364 #endif | |
365 continue; | |
366 } | |
367 cc += z*W[k]; | |
368 nn += W[k]; | |
369 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
370 CC[i] = (typeof(*CC))cc; |
6549 | 371 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
372 NN[i] = (typeof(*NN))nn; |
6549 | 373 } |
374 else /* no weights, all weights are 1 */ | |
7992 | 375 #pragma omp for schedule(dynamic) nowait |
376 for (i = 0; i < cX * cY; i++) | |
377 { | |
378 double *X = X0 + (i%cX) * rX; | |
379 double *Y = Y0 + (i/cX) * rY; | |
6549 | 380 long double cc=0.0; |
381 size_t nn=0; | |
7992 | 382 size_t k; |
6549 | 383 for (k=0; k<rX; k++) { |
384 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
385 if (ISNAN(z)) { |
6549 | 386 #ifndef NO_FLAG |
387 flag_isNaN = 1; | |
388 #endif | |
389 continue; | |
390 } | |
391 cc += z; | |
392 nn++; | |
393 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
394 CC[i] = (typeof(*CC))cc; |
6549 | 395 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
396 NN[i] = (typeof(*NN))nn; |
6549 | 397 } |
398 else // if (X0==Y0) && (cX==cY) | |
399 /******** X==Y, output is symetric *******/ | |
400 if (W) /* weighted version */ | |
7992 | 401 #pragma omp for schedule(dynamic) nowait |
402 for (i = 0; i < cX * cY; i++) | |
403 { | |
404 size_t ii = i%cX; | |
405 size_t jj = i/cX; | |
406 if (ii < jj) continue; | |
407 double *X = X0 + ii * rX; | |
408 double *Y = Y0 + jj * rY; | |
6549 | 409 long double cc=0.0; |
410 long double nn=0.0; | |
7992 | 411 size_t k; |
6549 | 412 for (k=0; k<rX; k++) { |
413 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
414 if (ISNAN(z)) { |
6549 | 415 #ifndef NO_FLAG |
416 flag_isNaN = 1; | |
417 #endif | |
418 continue; | |
419 } | |
420 cc += z*W[k]; | |
421 nn += W[k]; | |
422 } | |
7992 | 423 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
424 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
425 CC[j] = (typeof(*CC))cc; |
6549 | 426 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
427 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
428 NN[j] = (typeof(*NN))nn; |
6549 | 429 } |
430 } | |
431 else /* no weights, all weights are 1 */ | |
7992 | 432 #pragma omp for schedule(dynamic) nowait |
433 for (i = 0; i < cX * cY; i++) | |
434 { | |
435 size_t ii = i%cX; | |
436 size_t jj = i/cX; | |
437 if (ii < jj) continue; | |
438 double *X = X0 + ii * rX; | |
439 double *Y = Y0 + jj * rY; | |
6549 | 440 long double cc=0.0; |
441 size_t nn=0; | |
7992 | 442 size_t k; |
6549 | 443 for (k=0; k<rX; k++) { |
444 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
445 if (ISNAN(z)) { |
6549 | 446 #ifndef NO_FLAG |
447 flag_isNaN = 1; | |
448 #endif | |
449 continue; | |
450 } | |
451 cc += z; | |
452 nn++; | |
453 } | |
7992 | 454 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
455 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
456 CC[j] = (typeof(*CC))cc; |
6549 | 457 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
458 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
459 NN[j] = (typeof(*NN))nn; |
6549 | 460 } |
461 } | |
462 | |
463 } | |
464 else if (ACC_LEVEL == 3) { | |
465 /*------ version 3 --------------------- | |
466 using Kahan's summation with extended (long double) accuracy [1] | |
467 this gives more accurate results while the computational effort within the loop is about 4x as high | |
468 However, first test show an increase in computational time of only about 25 %. | |
469 | |
470 [1] David Goldberg, | |
471 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
472 ACM Computing Surveys, Vol 23, No 1, March 1991 | |
473 */ | |
474 if ( (X0 != Y0) || (cX != cY) ) | |
475 /******** X!=Y, output is not symetric *******/ | |
476 if (W) /* weighted version */ | |
7992 | 477 #pragma omp for schedule(dynamic) nowait |
478 for (i = 0; i < cX * cY; i++) | |
479 { | |
480 double *X = X0 + (i%cX) * rX; | |
481 double *Y = Y0 + (i/cX) * rY; | |
6549 | 482 long double cc=0.0; |
483 long double nn=0.0; | |
484 long double rc=0.0; | |
485 long double rn=0.0; | |
7992 | 486 size_t k; |
6549 | 487 for (k=0; k<rX; k++) { |
488 long double t,y; | |
489 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
490 if (ISNAN(z)) { |
6549 | 491 #ifndef NO_FLAG |
492 flag_isNaN = 1; | |
493 #endif | |
494 continue; | |
495 } | |
496 // cc += z*W[k]; [1] | |
497 y = z*W[k]-rc; | |
498 t = cc+y; | |
499 rc= (t-cc)-y; | |
500 cc= t; | |
501 | |
502 // nn += W[k]; [1] | |
503 y = z*W[k]-rn; | |
504 t = nn+y; | |
505 rn= (t-nn)-y; | |
506 nn= t; | |
507 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
508 CC[i] = (typeof(*CC))cc; |
6549 | 509 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
510 NN[i] = (typeof(*NN))nn; |
6549 | 511 } |
512 else /* no weights, all weights are 1 */ | |
7992 | 513 #pragma omp for schedule(dynamic) nowait |
514 for (i = 0; i < cX * cY; i++) | |
515 { | |
516 double *X = X0 + (i%cX) * rX; | |
517 double *Y = Y0 + (i/cX) * rY; | |
6549 | 518 long double cc=0.0; |
519 long double rc=0.0; | |
520 size_t nn=0; | |
7992 | 521 size_t k; |
6549 | 522 for (k=0; k<rX; k++) { |
523 long double t,y; | |
524 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
525 if (ISNAN(z)) { |
6549 | 526 #ifndef NO_FLAG |
527 flag_isNaN = 1; | |
528 #endif | |
529 continue; | |
530 } | |
531 // cc += z; [1] | |
532 y = z-rc; | |
533 t = cc+y; | |
534 rc= (t-cc)-y; | |
535 cc= t; | |
536 | |
537 nn++; | |
538 } | |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
539 CC[i] = (typeof(*CC))cc; |
6549 | 540 if (NN != NULL) |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
541 NN[i] = (typeof(*NN))nn; |
6549 | 542 } |
543 else // if (X0==Y0) && (cX==cY) | |
544 /******** X==Y, output is symetric *******/ | |
545 if (W) /* weighted version */ | |
7992 | 546 #pragma omp for schedule(dynamic) nowait |
547 for (i = 0; i < cX * cY; i++) | |
548 { | |
549 size_t ii = i%cX; | |
550 size_t jj = i/cX; | |
551 if (ii < jj) continue; | |
552 double *X = X0 + ii * rX; | |
553 double *Y = Y0 + jj * rY; | |
6549 | 554 long double cc=0.0; |
555 long double nn=0.0; | |
556 long double rc=0.0; | |
557 long double rn=0.0; | |
7992 | 558 size_t k; |
6549 | 559 for (k=0; k<rX; k++) { |
560 long double t,y; | |
561 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
562 if (ISNAN(z)) { |
6549 | 563 #ifndef NO_FLAG |
564 flag_isNaN = 1; | |
565 #endif | |
566 continue; | |
567 } | |
568 // cc += z*W[k]; [1] | |
569 y = z*W[k]-rc; | |
570 t = cc+y; | |
571 rc= (t-cc)-y; | |
572 cc= t; | |
573 | |
574 // nn += W[k]; [1] | |
575 y = z*W[k]-rn; | |
576 t = nn+y; | |
577 rn= (t-nn)-y; | |
578 nn= t; | |
579 } | |
7992 | 580 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
581 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
582 CC[j] = (typeof(*CC))cc; |
6549 | 583 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
584 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
585 NN[j] = (typeof(*NN))nn; |
6549 | 586 } |
587 } | |
588 else /* no weights, all weights are 1 */ | |
7992 | 589 #pragma omp for schedule(dynamic) nowait |
590 for (i = 0; i < cX * cY; i++) | |
591 { | |
592 size_t ii = i%cX; | |
593 size_t jj = i/cX; | |
594 if (ii < jj) continue; | |
595 double *X = X0 + ii * rX; | |
596 double *Y = Y0 + jj * rY; | |
6549 | 597 long double cc=0.0; |
598 long double rc=0.0; | |
599 size_t nn=0; | |
7992 | 600 size_t k; |
6549 | 601 for (k=0; k<rX; k++) { |
602 long double t,y; | |
603 long double z = ((long double)X[k])*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
604 if (ISNAN(z)) { |
6549 | 605 #ifndef NO_FLAG |
606 flag_isNaN = 1; | |
607 #endif | |
608 continue; | |
609 } | |
610 // cc += z; [1] | |
611 y = z-rc; | |
612 t = cc+y; | |
613 rc= (t-cc)-y; | |
614 cc= t; | |
615 | |
616 nn++; | |
617 } | |
7992 | 618 size_t j = jj + ii*cX; |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
619 CC[i] = (typeof(*CC))cc; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
620 CC[j] = (typeof(*CC))cc; |
6549 | 621 if (NN != NULL) { |
12685
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
622 NN[i] = (typeof(*NN))nn; |
f26b1170ea90
resulting values should be really converted to output data type
schloegl
parents:
12640
diff
changeset
|
623 NN[j] = (typeof(*NN))nn; |
6549 | 624 } |
625 } | |
626 } | |
627 else if (ACC_LEVEL == 2) { | |
628 /*------ version 3 --------------------- | |
629 using Kahan's summation with double accuracy [1] | |
630 this gives more accurate results while the computational effort within the loop is about 4x as high | |
631 However, first test show an increase in computational time of only about 25 %. | |
632 | |
633 [1] David Goldberg, | |
634 What Every Computer Scientist Should Know About Floating-Point Arithmetic | |
635 ACM Computing Surveys, Vol 23, No 1, March 1991 | |
636 */ | |
637 if ( (X0 != Y0) || (cX != cY) ) | |
638 /******** X!=Y, output is not symetric *******/ | |
639 if (W) /* weighted version */ | |
7992 | 640 #pragma omp for schedule(dynamic) nowait |
641 for (i = 0; i < cX * cY; i++) | |
642 { | |
643 double *X = X0 + (i%cX) * rX; | |
644 double *Y = Y0 + (i/cX) * rY; | |
6549 | 645 double cc=0.0; |
646 double nn=0.0; | |
647 double rc=0.0; | |
648 double rn=0.0; | |
7992 | 649 size_t k; |
6549 | 650 for (k=0; k<rX; k++) { |
651 double t,y; | |
652 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
653 if (ISNAN(z)) { |
6549 | 654 #ifndef NO_FLAG |
655 flag_isNaN = 1; | |
656 #endif | |
657 continue; | |
658 } | |
659 // cc += z*W[k]; [1] | |
660 y = z*W[k]-rc; | |
661 t = cc+y; | |
662 rc= (t-cc)-y; | |
663 cc= t; | |
664 | |
665 // nn += W[k]; [1] | |
666 y = z*W[k]-rn; | |
667 t = nn+y; | |
668 rn= (t-nn)-y; | |
669 nn= t; | |
670 } | |
7992 | 671 CC[i] = cc; |
6549 | 672 if (NN != NULL) |
7992 | 673 NN[i] = nn; |
6549 | 674 } |
675 else /* no weights, all weights are 1 */ | |
7992 | 676 #pragma omp for schedule(dynamic) nowait |
677 for (i = 0; i < cX * cY; i++) | |
678 { | |
679 double *X = X0 + (i%cX) * rX; | |
680 double *Y = Y0 + (i/cX) * rY; | |
6549 | 681 double cc=0.0; |
682 double rc=0.0; | |
683 size_t nn=0; | |
7992 | 684 size_t k; |
6549 | 685 for (k=0; k<rX; k++) { |
686 double t,y; | |
687 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
688 if (ISNAN(z)) { |
6549 | 689 #ifndef NO_FLAG |
690 flag_isNaN = 1; | |
691 #endif | |
692 continue; | |
693 } | |
694 // cc += z; [1] | |
695 y = z-rc; | |
696 t = cc+y; | |
697 rc= (t-cc)-y; | |
698 cc= t; | |
699 | |
700 nn++; | |
701 } | |
7992 | 702 CC[i] = cc; |
6549 | 703 if (NN != NULL) |
7992 | 704 NN[i] = (double)nn; |
6549 | 705 } |
706 else // if (X0==Y0) && (cX==cY) | |
707 /******** X==Y, output is symetric *******/ | |
708 if (W) /* weighted version */ | |
7992 | 709 #pragma omp for schedule(dynamic) nowait |
710 for (i = 0; i < cX * cY; i++) | |
711 { | |
712 size_t ii = i%cX; | |
713 size_t jj = i/cX; | |
714 if (ii < jj) continue; | |
715 double *X = X0 + ii * rX; | |
716 double *Y = Y0 + jj * rY; | |
6549 | 717 double cc=0.0; |
718 double nn=0.0; | |
719 double rc=0.0; | |
720 double rn=0.0; | |
7992 | 721 size_t k; |
6549 | 722 for (k=0; k<rX; k++) { |
723 double t,y; | |
724 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
725 if (ISNAN(z)) { |
6549 | 726 #ifndef NO_FLAG |
727 flag_isNaN = 1; | |
728 #endif | |
729 continue; | |
730 } | |
731 // cc += z*W[k]; [1] | |
732 y = z*W[k]-rc; | |
733 t = cc+y; | |
734 rc= (t-cc)-y; | |
735 cc= t; | |
736 | |
737 // nn += W[k]; [1] | |
738 y = z*W[k]-rn; | |
739 t = nn+y; | |
740 rn= (t-nn)-y; | |
741 nn= t; | |
742 } | |
7992 | 743 size_t j = jj + ii*cX; |
744 CC[i] = cc; | |
745 CC[j] = cc; | |
6549 | 746 if (NN != NULL) { |
7992 | 747 NN[i] = nn; |
748 NN[j] = nn; | |
6549 | 749 } |
750 } | |
751 else /* no weights, all weights are 1 */ | |
7992 | 752 #pragma omp for schedule(dynamic) nowait |
753 for (i = 0; i < cX * cY; i++) | |
754 { | |
755 size_t ii = i%cX; | |
756 size_t jj = i/cX; | |
757 if (ii < jj) continue; | |
758 double *X = X0 + ii * rX; | |
759 double *Y = Y0 + jj * rY; | |
6549 | 760 double cc=0.0; |
761 double rc=0.0; | |
762 size_t nn=0; | |
7992 | 763 size_t k; |
6549 | 764 for (k=0; k<rX; k++) { |
765 double t,y; | |
766 double z = X[k]*Y[k]; | |
12691
6d6285a2a633
use macro ISNAN() instead of C++'s isnan() - because it supports all floating point formats not just double
schloegl
parents:
12685
diff
changeset
|
767 if (ISNAN(z)) { |
6549 | 768 #ifndef NO_FLAG |
769 flag_isNaN = 1; | |
770 #endif | |
771 continue; | |
772 } | |
773 // cc += z; [1] | |
774 y = z-rc; | |
775 t = cc+y; | |
776 rc= (t-cc)-y; | |
777 cc= t; | |
778 | |
779 nn++; | |
780 } | |
7992 | 781 size_t j = jj + ii*cX; |
782 CC[i] = cc; | |
783 CC[j] = cc; | |
6549 | 784 if (NN != NULL) { |
7992 | 785 NN[i] = (double)nn; |
786 NN[j] = (double)nn; | |
6549 | 787 } |
788 } | |
789 } | |
6585 | 790 #endif |
7992 | 791 } // end pragma omg parallel |
792 | |
793 | |
6549 | 794 #ifndef NO_FLAG |
795 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); | |
796 /* this is a hack, the third input argument is used to return whether a NaN occured or not. | |
797 this requires that the input argument is a non-empty variable | |
798 */ | |
799 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { | |
800 // set FLAG_NANS_OCCURED | |
801 switch (mxGetClassID(PInputs[2])) { | |
802 case mxDOUBLE_CLASS: | |
803 *(double*)mxGetData(PInputs[2]) = 1.0; | |
804 break; | |
805 case mxSINGLE_CLASS: | |
806 *(float*)mxGetData(PInputs[2]) = 1.0; | |
807 break; | |
6585 | 808 case mxLOGICAL_CLASS: |
809 case mxCHAR_CLASS: | |
810 case mxINT8_CLASS: | |
811 case mxUINT8_CLASS: | |
812 *(char*)mxGetData(PInputs[2]) = 1; | |
813 break; | |
814 #ifdef __GNUC__ | |
6549 | 815 case mxINT16_CLASS: |
816 case mxUINT16_CLASS: | |
817 *(uint16_t*)mxGetData(PInputs[2]) = 1; | |
818 break; | |
819 case mxINT32_CLASS: | |
820 case mxUINT32_CLASS: | |
821 *(uint32_t*)mxGetData(PInputs[2])= 1; | |
822 break; | |
823 case mxINT64_CLASS: | |
824 case mxUINT64_CLASS: | |
825 *(uint64_t*)mxGetData(PInputs[2]) = 1; | |
826 break; | |
827 case mxFUNCTION_CLASS: | |
828 case mxUNKNOWN_CLASS: | |
829 case mxCELL_CLASS: | |
830 case mxSTRUCT_CLASS: | |
6585 | 831 #endif |
832 default: | |
6549 | 833 mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurence."); |
834 } | |
835 } | |
836 #endif | |
837 #endif | |
838 } | |
839 |