forge: extra/NaN/src/covm_mex.cpp annotate

annotate extra/NaN/src/covm_mex.cpp @ 12685:f26b1170ea90 octave-forge

resulting values should be really converted to output data type

author	schloegl
date	Sat, 12 Sep 2015 07:15:01 +0000
parents	de98e4cb9248
children	6d6285a2a633

rev	line source
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	1 /*
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	2 //-------------------------------------------------------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	3 // C-MEX implementation of COVM - this function is part of the NaN-toolbox.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	4 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	5 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	6 // This program is free software; you can redistribute it and/or modify
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	7 // it under the terms of the GNU General Public License as published by
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	8 // the Free Software Foundation; either version 3 of the License, or
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	9 // (at your option) any later version.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	10 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	11 // This program is distributed in the hope that it will be useful,
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	14 // GNU General Public License for more details.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	15 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	16 // You should have received a copy of the GNU General Public License
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	17 // along with this program; if not, see <http://www.gnu.org/licenses/>.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	18 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	19 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	20 // covm: in-product of matrices, NaN are skipped.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	21 // usage:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	22 // [cc,nn] = covm_mex(X,Y,flag,W);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	23 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	24 // Input:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	25 // - X:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	26 // - Y: [optional], if empty, Y=X;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	27 // - flag: if not empty, it is set to 1 if some NaN was observed
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	28 // - W: weight vector to compute weighted correlation
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	29 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	30 // Output:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	31 // - CC = X' * sparse(diag(W)) * Y while NaN's are skipped
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	32 // - NN = real(~isnan(X)')sparse(diag(W))real(~isnan(Y)) count of valid (non-NaN) elements
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	33 // computed more efficiently
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	34 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	35 // $Id$
8037 6a419bec96bb update contact e-mail and www address schloegl parents: 7992 diff changeset	36 // Copyright (C) 2009,2010,2011 Alois Schloegl <alois.schloegl@gmail.com>
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	37 // This function is part of the NaN-toolbox
7889 c101c486d80a fix web address schloegl parents: 7888 diff changeset	38 // http://pub.ist.ac.at/~schloegl/matlab/NaN/
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	39 //
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	40 //-------------------------------------------------------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	41 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	42
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	43 #ifdef __GNUC__
7888 b9f35668b55e replace <inttypes.h> with <stdint.h> schloegl parents: 6585 diff changeset	44 #include <stdint.h>
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	45 #endif
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	46 #include <math.h>
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	47 #include "mex.h"
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	48
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	49 /#define NO_FLAG/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	50
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	51
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	52 void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[])
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	53 {
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	54 double X0=NULL, Y0=NULL, *W=NULL;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	55 double *CC;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	56 double *NN = NULL;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	57
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	58 size_t rX,cX,rY,cY;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	59 size_t i;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	60 char flag_isNaN = 0;
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	61 int ACC_LEVEL;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	62
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	63 /********* check input arguments ***************/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	64
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	65 // check for proper number of input and output arguments
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	66 if ((PInputCount <= 0) \|\| (PInputCount > 5)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	67 mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	68 mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	69 /*
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	70 mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	71 mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	72 mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	73 mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'sparse(diag(W))Y, number of rows of X and Y must match\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	74 mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'sparse(diag(W))X\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	75 mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	76 mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	77 mexPrintf("\t\t if isempty(W), all weights are 1\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	78 mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	79 */ return;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	80 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	81 if (POutputCount > 2)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	82 mexErrMsgTxt("covm.MEX has 1 to 2 output arguments.");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	83
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	84
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	85 // get 1st argument
12640 de98e4cb9248 check for sparse matrices and and convert to full if needed schloegl parents: 8037 diff changeset	86 if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) )
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	87 X0 = mxGetPr(PInputs[0]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	88 else
12640 de98e4cb9248 check for sparse matrices and and convert to full if needed schloegl parents: 8037 diff changeset	89 mexErrMsgTxt("First argument must be non-sparse REAL/DOUBLE.");
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	90 rX = mxGetM(PInputs[0]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	91 cX = mxGetN(PInputs[0]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	92
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	93 // get 2nd argument
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	94 if (PInputCount > 1) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	95 if (!mxGetNumberOfElements(PInputs[1]))
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	96 ; // Y0 = NULL;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	97
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	98 else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1]))
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	99 Y0 = mxGetPr(PInputs[1]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	100
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	101 else
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	102 mexErrMsgTxt("Second argument must be REAL/DOUBLE.");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	103 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	104
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	105
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	106 // get weight vector for weighted sumskipnan
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	107 if (PInputCount > 3) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	108 // get 4th argument
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	109 size_t nW = mxGetNumberOfElements(PInputs[3]);
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	110 if (!nW)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	111 ;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	112 else if (nW == rX)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	113 W = mxGetPr(PInputs[3]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	114 else
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	115 mexErrMsgTxt("number of elements in W must match numbers of rows in X");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	116 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	117
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	118 #ifdef __GNUC__
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	119 ACC_LEVEL = 0;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	120 {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	121 mxArray *LEVEL = NULL;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	122 int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	123 if (!s) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	124 ACC_LEVEL = (int) mxGetScalar(LEVEL);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	125 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	126 mxDestroyArray(LEVEL);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	127 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	128 // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL);
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	129 #endif
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	130 if (Y0==NULL) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	131 Y0 = X0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	132 rY = rX;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	133 cY = cX;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	134 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	135 else {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	136 rY = mxGetM(PInputs[1]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	137 cY = mxGetN(PInputs[1]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	138 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	139 if (rX != rY)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	140 mexErrMsgTxt("number of rows in X and Y do not match");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	141
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	142 /********* create output arguments ***************/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	143
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	144 POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	145 CC = mxGetPr(POutput[0]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	146
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	147 if (POutputCount > 1) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	148 POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	149 NN = mxGetPr(POutput[1]);
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	150 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	151
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	152
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	153 /********* compute covariance ***************/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	154
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	155 #if 0
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	156 /*------ version 1 ---------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	157 this solution is slower than the alternative solution below
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	158 for transposed matrices, this might be faster.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	159 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	160 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	161 double w;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	162 if (W) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	163 w = W[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	164 for (i=0; i<cX; i++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	165 double x = X0[k+i*rX];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	166 if (isnan(x)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	167 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	168 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	169 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	170 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	171 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	172 for (j=0; j<cY; j++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	173 double y = Y0[k+j*rY];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	174 if (isnan(y)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	175 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	176 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	177 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	178 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	179 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	180 CC[i+jcX] += xy*w;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	181 if (NN != NULL)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	182 NN[i+j*cX] += w;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	183 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	184 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	185 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	186 else for (i=0; i<cX; i++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	187 double x = X0[k+i*rX];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	188 if (isnan(x)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	189 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	190 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	191 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	192 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	193 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	194 for (j=0; j<cY; j++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	195 double y = Y0[k+j*rY];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	196 if (isnan(y)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	197 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	198 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	199 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	200 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	201 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	202 CC[i+jcX] += xy;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	203 if (NN != NULL)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	204 NN[i+j*cX] += 1.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	205 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	206 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	207 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	208
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	209 #else
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	210
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	211 #pragma omp parallel
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	212 {
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	213 #ifdef __GNUC__
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	214 if (ACC_LEVEL == 0)
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	215 #endif
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	216 {
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	217 /*------ version 2 ---------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	218 using naive summation with double accuracy [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	219 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	220 if ( (X0 != Y0) \|\| (cX != cY) )
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	221 /****** X!=Y, output is not symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	222 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	223 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	224 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	225 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	226 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	227 double Y = Y0 + (i/cX) rY;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	228 double cc = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	229 double nw = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	230 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	231 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	232 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	233 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	234 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	235 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	236 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	237 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	238 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	239 cc += z*W[k];
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	240 nw += W[k];
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	241 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	242 CC[i] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	243 if (NN != NULL)
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	244 NN[i] = nw;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	245 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	246 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	247 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	248 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	249 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	250 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	251 double Y = Y0 + (i/cX) rY;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	252 double cc = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	253 size_t nn = 0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	254 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	255 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	256 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	257 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	258 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	259 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	260 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	261 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	262 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	263 cc += z;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	264 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	265 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	266 CC[i] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	267 if (NN != NULL)
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	268 NN[i] = (double)nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	269 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	270 else // if (X0==Y0) && (cX==cY)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	271 /****** X==Y, output is symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	272 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	273 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	274 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	275 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	276 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	277 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	278 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	279 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	280 double Y = Y0 + jj rY;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	281 double cc = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	282 double nw = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	283 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	284 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	285 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	286 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	287 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	288 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	289 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	290 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	291 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	292 cc += z*W[k];
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	293 nw += W[k];
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	294 }
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	295 size_t j = jj + ii*cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	296 CC[i] = cc;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	297 CC[j] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	298 if (NN != NULL) {
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	299 NN[i] = nw;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	300 NN[j] = nw;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	301 }
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	302 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	303 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	304 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	305 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	306 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	307 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	308 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	309 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	310 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	311 double Y = Y0 + jj rY;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	312 double cc = 0.0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	313 size_t nn = 0;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	314 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	315 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	316 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	317 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	318 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	319 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	320 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	321 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	322 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	323 cc += z;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	324 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	325 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	326 size_t j = jj + ii*cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	327 CC[i] = cc;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	328 CC[j] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	329 if (NN != NULL) {
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	330 NN[i] = (double)nn;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	331 NN[j] = (double)nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	332 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	333 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	334
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	335 }
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	336
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	337 #ifdef __GNUC__
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	338
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	339 else if (ACC_LEVEL == 1) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	340 /*------ version 2 ---------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	341 using naive summation with extended accuracy [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	342 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	343 if ( (X0 != Y0) \|\| (cX != cY) )
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	344 /****** X!=Y, output is not symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	345 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	346 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	347 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	348 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	349 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	350 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	351 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	352 long double nn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	353 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	354 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	355 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	356 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	357 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	358 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	359 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	360 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	361 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	362 cc += z*W[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	363 nn += W[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	364 }
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	365 CC[i] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	366 if (NN != NULL)
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	367 NN[i] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	368 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	369 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	370 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	371 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	372 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	373 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	374 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	375 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	376 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	377 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	378 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	379 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	380 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	381 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	382 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	383 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	384 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	385 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	386 cc += z;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	387 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	388 }
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	389 CC[i] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	390 if (NN != NULL)
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	391 NN[i] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	392 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	393 else // if (X0==Y0) && (cX==cY)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	394 /****** X==Y, output is symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	395 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	396 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	397 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	398 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	399 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	400 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	401 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	402 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	403 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	404 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	405 long double nn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	406 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	407 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	408 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	409 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	410 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	411 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	412 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	413 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	414 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	415 cc += z*W[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	416 nn += W[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	417 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	418 size_t j = jj + ii*cX;
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	419 CC[i] = (typeof(*CC))cc;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	420 CC[j] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	421 if (NN != NULL) {
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	422 NN[i] = (typeof(*NN))nn;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	423 NN[j] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	424 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	425 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	426 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	427 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	428 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	429 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	430 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	431 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	432 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	433 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	434 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	435 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	436 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	437 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	438 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	439 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	440 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	441 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	442 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	443 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	444 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	445 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	446 cc += z;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	447 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	448 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	449 size_t j = jj + ii*cX;
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	450 CC[i] = (typeof(*CC))cc;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	451 CC[j] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	452 if (NN != NULL) {
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	453 NN[i] = (typeof(*NN))nn;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	454 NN[j] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	455 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	456 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	457
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	458 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	459 else if (ACC_LEVEL == 3) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	460 /*------ version 3 ---------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	461 using Kahan's summation with extended (long double) accuracy [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	462 this gives more accurate results while the computational effort within the loop is about 4x as high
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	463 However, first test show an increase in computational time of only about 25 %.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	464
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	465 [1] David Goldberg,
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	466 What Every Computer Scientist Should Know About Floating-Point Arithmetic
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	467 ACM Computing Surveys, Vol 23, No 1, March 1991
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	468 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	469 if ( (X0 != Y0) \|\| (cX != cY) )
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	470 /****** X!=Y, output is not symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	471 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	472 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	473 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	474 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	475 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	476 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	477 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	478 long double nn=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	479 long double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	480 long double rn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	481 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	482 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	483 long double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	484 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	485 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	486 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	487 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	488 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	489 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	490 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	491 // cc += z*W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	492 y = z*W[k]-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	493 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	494 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	495 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	496
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	497 // nn += W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	498 y = z*W[k]-rn;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	499 t = nn+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	500 rn= (t-nn)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	501 nn= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	502 }
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	503 CC[i] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	504 if (NN != NULL)
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	505 NN[i] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	506 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	507 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	508 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	509 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	510 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	511 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	512 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	513 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	514 long double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	515 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	516 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	517 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	518 long double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	519 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	520 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	521 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	522 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	523 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	524 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	525 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	526 // cc += z; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	527 y = z-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	528 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	529 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	530 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	531
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	532 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	533 }
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	534 CC[i] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	535 if (NN != NULL)
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	536 NN[i] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	537 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	538 else // if (X0==Y0) && (cX==cY)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	539 /****** X==Y, output is symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	540 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	541 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	542 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	543 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	544 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	545 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	546 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	547 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	548 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	549 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	550 long double nn=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	551 long double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	552 long double rn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	553 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	554 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	555 long double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	556 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	557 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	558 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	559 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	560 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	561 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	562 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	563 // cc += z*W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	564 y = z*W[k]-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	565 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	566 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	567 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	568
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	569 // nn += W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	570 y = z*W[k]-rn;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	571 t = nn+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	572 rn= (t-nn)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	573 nn= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	574 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	575 size_t j = jj + ii*cX;
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	576 CC[i] = (typeof(*CC))cc;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	577 CC[j] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	578 if (NN != NULL) {
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	579 NN[i] = (typeof(*NN))nn;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	580 NN[j] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	581 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	582 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	583 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	584 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	585 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	586 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	587 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	588 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	589 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	590 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	591 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	592 long double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	593 long double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	594 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	595 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	596 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	597 long double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	598 long double z = ((long double)X[k])*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	599 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	600 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	601 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	602 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	603 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	604 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	605 // cc += z; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	606 y = z-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	607 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	608 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	609 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	610
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	611 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	612 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	613 size_t j = jj + ii*cX;
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	614 CC[i] = (typeof(*CC))cc;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	615 CC[j] = (typeof(*CC))cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	616 if (NN != NULL) {
12685 f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	617 NN[i] = (typeof(*NN))nn;
f26b1170ea90 resulting values should be really converted to output data type schloegl parents: 12640 diff changeset	618 NN[j] = (typeof(*NN))nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	619 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	620 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	621 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	622 else if (ACC_LEVEL == 2) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	623 /*------ version 3 ---------------------
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	624 using Kahan's summation with double accuracy [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	625 this gives more accurate results while the computational effort within the loop is about 4x as high
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	626 However, first test show an increase in computational time of only about 25 %.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	627
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	628 [1] David Goldberg,
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	629 What Every Computer Scientist Should Know About Floating-Point Arithmetic
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	630 ACM Computing Surveys, Vol 23, No 1, March 1991
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	631 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	632 if ( (X0 != Y0) \|\| (cX != cY) )
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	633 /****** X!=Y, output is not symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	634 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	635 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	636 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	637 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	638 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	639 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	640 double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	641 double nn=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	642 double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	643 double rn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	644 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	645 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	646 double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	647 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	648 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	649 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	650 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	651 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	652 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	653 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	654 // cc += z*W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	655 y = z*W[k]-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	656 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	657 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	658 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	659
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	660 // nn += W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	661 y = z*W[k]-rn;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	662 t = nn+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	663 rn= (t-nn)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	664 nn= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	665 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	666 CC[i] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	667 if (NN != NULL)
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	668 NN[i] = nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	669 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	670 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	671 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	672 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	673 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	674 double X = X0 + (i%cX) rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	675 double Y = Y0 + (i/cX) rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	676 double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	677 double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	678 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	679 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	680 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	681 double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	682 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	683 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	684 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	685 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	686 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	687 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	688 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	689 // cc += z; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	690 y = z-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	691 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	692 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	693 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	694
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	695 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	696 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	697 CC[i] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	698 if (NN != NULL)
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	699 NN[i] = (double)nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	700 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	701 else // if (X0==Y0) && (cX==cY)
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	702 /****** X==Y, output is symetric *****/
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	703 if (W) /* weighted version */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	704 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	705 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	706 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	707 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	708 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	709 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	710 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	711 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	712 double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	713 double nn=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	714 double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	715 double rn=0.0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	716 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	717 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	718 double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	719 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	720 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	721 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	722 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	723 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	724 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	725 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	726 // cc += z*W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	727 y = z*W[k]-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	728 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	729 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	730 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	731
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	732 // nn += W[k]; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	733 y = z*W[k]-rn;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	734 t = nn+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	735 rn= (t-nn)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	736 nn= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	737 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	738 size_t j = jj + ii*cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	739 CC[i] = cc;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	740 CC[j] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	741 if (NN != NULL) {
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	742 NN[i] = nn;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	743 NN[j] = nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	744 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	745 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	746 else /* no weights, all weights are 1 */
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	747 #pragma omp for schedule(dynamic) nowait
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	748 for (i = 0; i < cX * cY; i++)
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	749 {
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	750 size_t ii = i%cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	751 size_t jj = i/cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	752 if (ii < jj) continue;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	753 double X = X0 + ii rX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	754 double Y = Y0 + jj rY;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	755 double cc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	756 double rc=0.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	757 size_t nn=0;
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	758 size_t k;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	759 for (k=0; k<rX; k++) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	760 double t,y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	761 double z = X[k]*Y[k];
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	762 if (isnan(z)) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	763 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	764 flag_isNaN = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	765 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	766 continue;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	767 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	768 // cc += z; [1]
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	769 y = z-rc;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	770 t = cc+y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	771 rc= (t-cc)-y;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	772 cc= t;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	773
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	774 nn++;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	775 }
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	776 size_t j = jj + ii*cX;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	777 CC[i] = cc;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	778 CC[j] = cc;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	779 if (NN != NULL) {
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	780 NN[i] = (double)nn;
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	781 NN[j] = (double)nn;
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	782 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	783 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	784 }
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	785 #endif
7992 db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	786 } // end pragma omg parallel
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	787
db5092052107 OpenMP support included schloegl parents: 7889 diff changeset	788
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	789 #ifndef NO_FLAG
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	790 //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned.");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	791 /* this is a hack, the third input argument is used to return whether a NaN occured or not.
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	792 this requires that the input argument is a non-empty variable
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	793 */
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	794 if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	795 // set FLAG_NANS_OCCURED
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	796 switch (mxGetClassID(PInputs[2])) {
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	797 case mxDOUBLE_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	798 (double)mxGetData(PInputs[2]) = 1.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	799 break;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	800 case mxSINGLE_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	801 (float)mxGetData(PInputs[2]) = 1.0;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	802 break;
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	803 case mxLOGICAL_CLASS:
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	804 case mxCHAR_CLASS:
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	805 case mxINT8_CLASS:
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	806 case mxUINT8_CLASS:
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	807 (char)mxGetData(PInputs[2]) = 1;
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	808 break;
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	809 #ifdef __GNUC__
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	810 case mxINT16_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	811 case mxUINT16_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	812 (uint16_t)mxGetData(PInputs[2]) = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	813 break;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	814 case mxINT32_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	815 case mxUINT32_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	816 (uint32_t)mxGetData(PInputs[2])= 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	817 break;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	818 case mxINT64_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	819 case mxUINT64_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	820 (uint64_t)mxGetData(PInputs[2]) = 1;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	821 break;
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	822 case mxFUNCTION_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	823 case mxUNKNOWN_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	824 case mxCELL_CLASS:
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	825 case mxSTRUCT_CLASS:
6585 ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	826 #endif
ae521dec5b54 partial support of lcc-win compiler schloegl parents: 6549 diff changeset	827 default:
6549 41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	828 mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurence.");
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	829 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	830 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	831 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	832 #endif
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	833 }
41e9854fe26d use .cpp instead of .c schloegl parents: diff changeset	834

Mercurial > forge

annotate extra/NaN/src/covm_mex.cpp @ 12685:f26b1170ea90 octave-forge