Mercurial > forge
view extra/NaN/src/covm_mex.cpp @ 6585:ae521dec5b54 octave-forge
partial support of lcc-win compiler
author | schloegl |
---|---|
date | Thu, 14 Jan 2010 17:50:16 +0000 |
parents | 41e9854fe26d |
children | b9f35668b55e |
line wrap: on
line source
/* //------------------------------------------------------------------- #pragma hdrstop //------------------------------------------------------------------- // C-MEX implementation of COVM - this function is part of the NaN-toolbox. // // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see <http://www.gnu.org/licenses/>. // // // covm: in-product of matrices, NaN are skipped. // usage: // [cc,nn] = covm_mex(X,Y,flag,W); // // Input: // - X: // - Y: [optional], if empty, Y=X; // - flag: if not empty, it is set to 1 if some NaN was observed // - W: weight vector to compute weighted correlation // // Output: // - CC = X' * sparse(diag(W)) * Y while NaN's are skipped // - NN = real(~isnan(X)')*sparse(diag(W))*real(~isnan(Y)) count of valid (non-NaN) elements // computed more efficiently // // $Id$ // Copyright (C) 2009 Alois Schloegl <a.schloegl@ieee.org> // This function is part of the NaN-toolbox // http://hci.tugraz.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- */ #ifdef __GNUC__ #include <inttypes.h> #endif #include <math.h> #include "mex.h" /*#define NO_FLAG*/ void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { double *X0,*Y0=NULL,*X,*Y,*W=NULL; double *CC,cc,nw; double *NN=NULL; size_t rX,cX,rY,cY,nW = 0; size_t i,j,k,nn; char flag_isNaN = 0, flag_speed=0; int ACC_LEVEL; /*********** check input arguments *****************/ // check for proper number of input and output arguments if ((PInputCount <= 0) || (PInputCount > 5)) { mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); /* mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n"); mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n"); mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'*sparse(diag(W))*Y, number of rows of X and Y must match\n"); mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'*sparse(diag(W))*X\n"); mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n"); mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n"); mexPrintf("\t\t if isempty(W), all weights are 1\n"); mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n"); */ return; } if (POutputCount > 2) mexErrMsgTxt("covm.MEX has 1 to 2 output arguments."); // get 1st argument if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0])) X0 = mxGetPr(PInputs[0]); else mexErrMsgTxt("First argument must be REAL/DOUBLE."); rX = mxGetM(PInputs[0]); cX = mxGetN(PInputs[0]); // get 2nd argument if (PInputCount > 1) { if (!mxGetNumberOfElements(PInputs[1])) ; // Y0 = NULL; else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1])) Y0 = mxGetPr(PInputs[1]); else mexErrMsgTxt("Second argument must be REAL/DOUBLE."); } // get weight vector for weighted sumskipnan if (PInputCount > 3) { // get 4th argument nW = mxGetNumberOfElements(PInputs[3]); if (!nW) ; else if (nW == rX) W = mxGetPr(PInputs[3]); else mexErrMsgTxt("number of elements in W must match numbers of rows in X"); } #ifdef __GNUC__ ACC_LEVEL = 0; { mxArray *LEVEL = NULL; int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); if (!s) { ACC_LEVEL = (int) mxGetScalar(LEVEL); } mxDestroyArray(LEVEL); } // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); #endif if (Y0==NULL) { Y0 = X0; rY = rX; cY = cX; } else { rY = mxGetM(PInputs[1]); cY = mxGetN(PInputs[1]); } if (rX != rY) mexErrMsgTxt("number of rows in X and Y do not match"); /*********** create output arguments *****************/ POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL); CC = mxGetPr(POutput[0]); if (POutputCount > 1) { POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL); NN = mxGetPr(POutput[1]); } /*********** compute covariance *****************/ #if 0 /*------ version 1 --------------------- this solution is slower than the alternative solution below for transposed matrices, this might be faster. */ for (k=0; k<rX; k++) { double w; if (W) { w = W[k]; for (i=0; i<cX; i++) { double x = X0[k+i*rX]; if (isnan(x)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } for (j=0; j<cY; j++) { double y = Y0[k+j*rY]; if (isnan(y)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } CC[i+j*cX] += x*y*w; if (NN != NULL) NN[i+j*cX] += w; } } } else for (i=0; i<cX; i++) { double x = X0[k+i*rX]; if (isnan(x)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } for (j=0; j<cY; j++) { double y = Y0[k+j*rY]; if (isnan(y)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } CC[i+j*cX] += x*y; if (NN != NULL) NN[i+j*cX] += 1.0; } } } #else #ifdef __GNUC__ if (ACC_LEVEL == 0) #endif { /*------ version 2 --------------------- using naive summation with double accuracy [1] */ if ( (X0 != Y0) || (cX != cY) ) /******** X!=Y, output is not symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; cc=0.0; nw=0.0; for (k=0; k<rX; k++) { double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z*W[k]; nw += W[k]; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = nw; } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; cc=0.0; nn=0; for (k=0; k<rX; k++) { double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z; nn++; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = (double)nn; } else // if (X0==Y0) && (cX==cY) /******** X==Y, output is symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; cc=0.0; nw=0.0; for (k=0; k<rX; k++) { double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z*W[k]; nw += W[k]; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = nw; NN[j+i*cX] = nw; } } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; cc=0.0; nn=0; for (k=0; k<rX; k++) { double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z; nn++; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = (double)nn; NN[j+i*cX] = (double)nn; } } } #ifdef __GNUC__ else if (ACC_LEVEL == 1) { /*------ version 2 --------------------- using naive summation with extended accuracy [1] */ if ( (X0 != Y0) || (cX != cY) ) /******** X!=Y, output is not symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double nn=0.0; for (k=0; k<rX; k++) { long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z*W[k]; nn += W[k]; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = nn; } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; size_t nn=0; for (k=0; k<rX; k++) { long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z; nn++; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = (double)nn; } else // if (X0==Y0) && (cX==cY) /******** X==Y, output is symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double nn=0.0; for (k=0; k<rX; k++) { long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z*W[k]; nn += W[k]; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = nn; NN[j+i*cX] = nn; } } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; size_t nn=0; for (k=0; k<rX; k++) { long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } cc += z; nn++; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = (double)nn; NN[j+i*cX] = (double)nn; } } } else if (ACC_LEVEL == 3) { /*------ version 3 --------------------- using Kahan's summation with extended (long double) accuracy [1] this gives more accurate results while the computational effort within the loop is about 4x as high However, first test show an increase in computational time of only about 25 %. [1] David Goldberg, What Every Computer Scientist Should Know About Floating-Point Arithmetic ACM Computing Surveys, Vol 23, No 1, March 1991 */ if ( (X0 != Y0) || (cX != cY) ) /******** X!=Y, output is not symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double nn=0.0; long double rc=0.0; long double rn=0.0; for (k=0; k<rX; k++) { long double t,y; long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z*W[k]; [1] y = z*W[k]-rc; t = cc+y; rc= (t-cc)-y; cc= t; // nn += W[k]; [1] y = z*W[k]-rn; t = nn+y; rn= (t-nn)-y; nn= t; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = nn; } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double rc=0.0; size_t nn=0; for (k=0; k<rX; k++) { long double t,y; long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z; [1] y = z-rc; t = cc+y; rc= (t-cc)-y; cc= t; nn++; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = (double)nn; } else // if (X0==Y0) && (cX==cY) /******** X==Y, output is symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double nn=0.0; long double rc=0.0; long double rn=0.0; for (k=0; k<rX; k++) { long double t,y; long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z*W[k]; [1] y = z*W[k]-rc; t = cc+y; rc= (t-cc)-y; cc= t; // nn += W[k]; [1] y = z*W[k]-rn; t = nn+y; rn= (t-nn)-y; nn= t; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = nn; NN[j+i*cX] = nn; } } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; long double cc=0.0; long double rc=0.0; size_t nn=0; for (k=0; k<rX; k++) { long double t,y; long double z = ((long double)X[k])*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z; [1] y = z-rc; t = cc+y; rc= (t-cc)-y; cc= t; nn++; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = (double)nn; NN[j+i*cX] = (double)nn; } } } else if (ACC_LEVEL == 2) { /*------ version 3 --------------------- using Kahan's summation with double accuracy [1] this gives more accurate results while the computational effort within the loop is about 4x as high However, first test show an increase in computational time of only about 25 %. [1] David Goldberg, What Every Computer Scientist Should Know About Floating-Point Arithmetic ACM Computing Surveys, Vol 23, No 1, March 1991 */ if ( (X0 != Y0) || (cX != cY) ) /******** X!=Y, output is not symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; double cc=0.0; double nn=0.0; double rc=0.0; double rn=0.0; for (k=0; k<rX; k++) { double t,y; double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z*W[k]; [1] y = z*W[k]-rc; t = cc+y; rc= (t-cc)-y; cc= t; // nn += W[k]; [1] y = z*W[k]-rn; t = nn+y; rn= (t-nn)-y; nn= t; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = nn; } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=0; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; double cc=0.0; double rc=0.0; size_t nn=0; for (k=0; k<rX; k++) { double t,y; double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z; [1] y = z-rc; t = cc+y; rc= (t-cc)-y; cc= t; nn++; } CC[i+j*cX] = cc; if (NN != NULL) NN[i+j*cX] = (double)nn; } else // if (X0==Y0) && (cX==cY) /******** X==Y, output is symetric *******/ if (W) /* weighted version */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; double cc=0.0; double nn=0.0; double rc=0.0; double rn=0.0; for (k=0; k<rX; k++) { double t,y; double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z*W[k]; [1] y = z*W[k]-rc; t = cc+y; rc= (t-cc)-y; cc= t; // nn += W[k]; [1] y = z*W[k]-rn; t = nn+y; rn= (t-nn)-y; nn= t; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = nn; NN[j+i*cX] = nn; } } else /* no weights, all weights are 1 */ for (i=0; i<cX; i++) for (j=i; j<cY; j++) { X = X0+i*rX; Y = Y0+j*rY; double cc=0.0; double rc=0.0; size_t nn=0; for (k=0; k<rX; k++) { double t,y; double z = X[k]*Y[k]; if (isnan(z)) { #ifndef NO_FLAG flag_isNaN = 1; #endif continue; } // cc += z; [1] y = z-rc; t = cc+y; rc= (t-cc)-y; cc= t; nn++; } CC[i+j*cX] = cc; CC[j+i*cX] = cc; if (NN != NULL) { NN[i+j*cX] = (double)nn; NN[j+i*cX] = (double)nn; } } } #endif #ifndef NO_FLAG //mexPrintf("Third argument must be not empty - otherwise status whether a NaN occured or not cannot be returned."); /* this is a hack, the third input argument is used to return whether a NaN occured or not. this requires that the input argument is a non-empty variable */ if (flag_isNaN && (PInputCount > 2) && mxGetNumberOfElements(PInputs[2])) { // set FLAG_NANS_OCCURED switch (mxGetClassID(PInputs[2])) { case mxDOUBLE_CLASS: *(double*)mxGetData(PInputs[2]) = 1.0; break; case mxSINGLE_CLASS: *(float*)mxGetData(PInputs[2]) = 1.0; break; case mxLOGICAL_CLASS: case mxCHAR_CLASS: case mxINT8_CLASS: case mxUINT8_CLASS: *(char*)mxGetData(PInputs[2]) = 1; break; #ifdef __GNUC__ case mxINT16_CLASS: case mxUINT16_CLASS: *(uint16_t*)mxGetData(PInputs[2]) = 1; break; case mxINT32_CLASS: case mxUINT32_CLASS: *(uint32_t*)mxGetData(PInputs[2])= 1; break; case mxINT64_CLASS: case mxUINT64_CLASS: *(uint64_t*)mxGetData(PInputs[2]) = 1; break; case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxCELL_CLASS: case mxSTRUCT_CLASS: #endif default: mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurence."); } } #endif #endif }