Mercurial > forge
changeset 5582:f6b16a46e7b7 octave-forge
NaN-toolbox: support weighting of samples
author | schloegl |
---|---|
date | Wed, 06 May 2009 11:08:58 +0000 |
parents | 0f5993c2eaef |
children | 24eba4e66040 |
files | extra/NaN/inst/covm.m extra/NaN/inst/sumskipnan.m extra/NaN/src/sumskipnan_mex.cpp |
diffstat | 3 files changed, 217 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/inst/covm.m Tue May 05 13:45:56 2009 +0000 +++ b/extra/NaN/inst/covm.m Wed May 06 11:08:58 2009 +0000 @@ -1,4 +1,4 @@ -function [CC,NN] = covm(X,Y,Mode); +function [CC,NN] = covm(X,Y,Mode,W); % COVM generates covariance matrix % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. @@ -8,6 +8,8 @@ % calculates the (auto-)correlation matrix of X % COVM(X,Y,Mode); % calculates the crosscorrelation between X and Y +% COVM(...,W); +% weighted crosscorrelation % % Mode = 'M' minimum or standard mode [default] % C = X'*X; or X'*Y correlation matrix @@ -55,6 +57,7 @@ FLAG_NANS_OCCURED = logical(0); % default value end; +W = []; if nargin<3, if nargin==2, if isnumeric(Y), @@ -69,6 +72,16 @@ elseif nargin==0, error('Missing argument(s)'); end; + +elseif (nargin==3) && isnumeric(Mode) && ~isnumeric(Y); + W = Mode; + Mode = Y; + Y = 0; + +elseif (nargin==4) && ~isnumeric(Mode) && isnumeric(Y); + ; %% ok +else + error('invalid input arguments'); end; Mode = upper(Mode); @@ -88,19 +101,35 @@ warning('Covariance is ill-defined, because of too few observations (rows)'); end; +if ~isempty(W) + W = W(:); + if (r1~=numel(W)) + error('Error COVM: size of weight vector does not fit number of rows'); + end; + w = spdiags(W(:),0,numel(W),numel(W)); + nn = sum(W(:)); +else + w = 1; + nn = r1; +end; + if ~isempty(Y), if (~any(Mode=='D') & ~any(Mode=='E')), % if Mode == M - NN = real(X==X)'*real(Y==Y); + NN = (w*real(X==X))'*(w*real(Y==Y)); FLAG_NANS_OCCURED = any(NN(:)<r1); X(X~=X) = 0; % skip NaN's Y(Y~=Y) = 0; % skip NaN's - CC = X'*Y; + CC = (w*X)'*(w*Y); else % if any(Mode=='D') | any(Mode=='E'), - [S1,N1] = sumskipnan(X,1); - [S2,N2] = sumskipnan(Y,1); + %[S1,N1] = sumskipnan(X,1,W); + %[S2,N2] = sumskipnan(Y,1,W); + S1 = sumskipnan(w*X,1); + S2 = sumskipnan(w*Y,1); + N1 = sum(w*(~isnan(X)),1); + N2 = sum(w*(~isnan(Y)),1); - NN = real(X==X)'*real(Y==Y); - FLAG_NANS_OCCURED = any(NN(:)<r1); + NN = (w*real(X==X))'*(w*real(Y==Y)); + FLAG_NANS_OCCURED = any(NN(:)~=nn); if any(Mode=='D'), % detrending mode X = X - ones(r1,1)*(S1./N1); @@ -124,16 +153,19 @@ else if (~any(Mode=='D') & ~any(Mode=='E')), % if Mode == M - tmp = real(X==X); + tmp = w*real(X==X); NN = tmp'*tmp; X(X~=X) = 0; % skip NaN's - CC = X'*X; + tmp = w*X; + CC = tmp'*tmp; FLAG_NANS_OCCURED = any(NN(:)<r1); else % if any(Mode=='D') | any(Mode=='E'), - [S,N] = sumskipnan(X,1); - tmp = real(X==X); + %[S,N] = sumskipnan(X,1,W); + S = sumskipnan(w*X,1); + N = sum(w*real(~isnan(X)),1); + tmp = w*real(X==X); NN = tmp'*tmp; - FLAG_NANS_OCCURED = any(NN(:)<r1); + FLAG_NANS_OCCURED = any(NN(:)~=nn); if any(Mode=='D'), % detrending mode X = X - ones(r1,1)*(S./N); if any(Mode=='1'), % 'D1' @@ -144,7 +176,8 @@ end; X(X~=X) = 0; % skip NaN's - CC = X'*X; + tmp = w*X; + CC = tmp'*tmp; if any(Mode=='E'), % extended mode NN = [r1, N; N', NN];
--- a/extra/NaN/inst/sumskipnan.m Tue May 05 13:45:56 2009 +0000 +++ b/extra/NaN/inst/sumskipnan.m Wed May 06 11:08:58 2009 +0000 @@ -1,4 +1,4 @@ -function [o,count,SSQ] = sumskipnan(x,DIM) +function [o,count,SSQ] = sumskipnan(x, DIM, W) % SUMSKIPNAN adds all non-NaN values. % % All NaN's are skipped; NaN's are considered as missing values. @@ -11,8 +11,12 @@ % % Y = sumskipnan(x [,DIM]) % [Y,N,SSQ] = sumskipnan(x [,DIM]) +% [...] = sumskipnan(x, DIM, W) % -% DIM dimension +% x input data +% DIM dimension (default: []) +% empty DIM sets DIM to first non singleton dimension +% W weight vector for weighted sum, numel(W) must fit size(x,DIM) % Y resulting sum % N number of valid (not missing) elements % SSQ sum of squares @@ -23,6 +27,7 @@ % features: % - can deal with NaN's (missing values) % - implements dimension argument. +% - computes weighted sum % - compatible with Matlab and Octave % % see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ, @@ -31,7 +36,7 @@ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by -% the Free Software Foundation; either version 2 of the License, or +% the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, @@ -53,6 +58,9 @@ if nargin<2, DIM = []; end; +if nargin<3, + W = []; +end; % an efficient implementation in C of the following lines % could significantly increase performance @@ -75,11 +83,13 @@ end if (DIM<1) DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5 - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % non-float data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -if ~isa(x,'float') || ~flag_implicit_skip_nan(), %%% skip always NaN's +if (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's + if ~isempty(W) + error('SUMSKIPNAN: weighted sum of integers not supported, yet'); + end; x = double(x); o = sum(x,DIM); if nargout>1 @@ -95,6 +105,10 @@ return; end; +if ~isempty(W) && (size(x,DIM)~=numel(W)) + error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)'); +end; + %% mex and oct files expect double x = double(x); @@ -112,16 +126,16 @@ end; if (nargout<2), - o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED); + o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) - io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED); + io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); o = o + i*io; end; return; elseif (nargout==2), - [o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED); + [o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) - [io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED); + [io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); if any(count(:)-icount(:)) error('Number of NaNs differ for REAL and IMAG part'); else @@ -130,9 +144,9 @@ end; return; elseif (nargout>=3), - [o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED); + [o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) - [io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED); + [io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); if any(count(:)-icount(:)) error('Number of NaNs differ for REAL and IMAG part'); else @@ -145,6 +159,10 @@ end; +if ~isempty(W) + error('weighted sumskipnan requires sumskipnan_mex'); +end; + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % count non-NaN's %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/extra/NaN/src/sumskipnan_mex.cpp Tue May 05 13:45:56 2009 +0000 +++ b/extra/NaN/src/sumskipnan_mex.cpp Wed May 06 11:08:58 2009 +0000 @@ -23,6 +23,8 @@ // Input: // - array to sum // - dimension to sum +// - flag (is actually an output argument telling whether some NaN was observed) +// - weight vector to compute weighted sum // // Output: // - sums @@ -44,6 +46,9 @@ inline int __sumskipnan2__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN); inline int __sumskipnan3__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN); +inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); + //#define NO_FLAG void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) @@ -54,6 +59,7 @@ double* LOutputCount; double* LOutputSum2; double x; + double* W = NULL; // weight vector //unsigned long LCount; mwSize DIM = 0; @@ -65,8 +71,8 @@ char flag_isNaN = 0; // check for proper number of input and output arguments - if ((PInputCount <= 0) || (PInputCount > 3)) - mexErrMsgTxt("SumSkipNan.MEX requires 1,2 or 3 arguments."); + if ((PInputCount <= 0) || (PInputCount > 4)) + mexErrMsgTxt("SumSkipNan.MEX requires between 1 and 4 arguments."); if (POutputCount > 4) mexErrMsgTxt("SumSkipNan.MEX has 1 to 3 output arguments."); @@ -117,6 +123,16 @@ SZ2[DIM-1] = 1; // size of output is same as size of input but SZ(DIM)=1; + // get weight vector for weighted sumskipnan + if (PInputCount > 3) { + if (!mxGetNumberOfElements(PInputs[3])) + ; // empty weight vector - no weighting + else if (mxGetNumberOfElements(PInputs[3])==D2) + W = mxGetPr(PInputs[3]); + else + mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); + } + // create outputs #define TYP mxDOUBLE_CLASS @@ -142,13 +158,28 @@ if (D1==1) { double count; - __sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN); + if (W) + __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W); + else + __sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN); } else for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM - do { + if (W) do { + register double x = *LInput; + if (!isnan(x)) { + LOutputSum[ix2] += W[j]*x; + } +#ifndef NO_FLAG + else + flag_isNaN = 1; +#endif + LInput++; + ix2++; + } while (ix2 != (l+1)*D1); + else do { register double x = *LInput; if (!isnan(x)) { LOutputSum[ix2] += x; @@ -171,13 +202,29 @@ ix1 = ix0*D2; // index for input if (D1==1) { - __sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN); + if (W) + __sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W); + else + __sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN); } else for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM - do { + if (W) do { + register double x = *LInput; + if (!isnan(x)) { + LOutputCount[ix2] += W[j]; + LOutputSum[ix2] += W[j]*x; + } +#ifndef NO_FLAG + else + flag_isNaN = 1; +#endif + LInput++; + ix2++; + } while (ix2 != (l+1)*D1); + else do { register double x = *LInput; if (!isnan(x)) { LOutputCount[ix2] += 1.0; @@ -202,13 +249,31 @@ if (D1==1) { size_t count; - __sumskipnan3__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN); + if (W) + __sumskipnan3w__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W); + else + __sumskipnan3__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN); } else for (j=0; j<D2; j++) { // minimize cache misses ix2 = ix0; // index for output // Inner LOOP: along dimensions < DIM - do { + if (W) do { + register double x = *LInput; + if (!isnan(x)) { + LOutputCount[ix2] += W[j]; + double t = W[j]*x; + LOutputSum[ix2] += t; + LOutputSum2[ix2] += t*t; + } +#ifndef NO_FLAG + else + flag_isNaN = 1; +#endif + LInput++; + ix2++; + } while (ix2 != (l+1)*D1); + else do { register double x = *LInput; if (!isnan(x)) { LOutputCount[ix2] += 1.0; @@ -331,3 +396,72 @@ *No = (double)count; } +#define stride 1 +inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + register double sum=0; + register double count=0; + register char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + do { + register double x = *data; + if (!isnan(x)) + { + count += *W; + sum += *W*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data +=stride; + W++; + } + while (data < end); + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + *No = count; + +} + +inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + register double sum=0; + register double msq=0; + register double count=0; + register char flag=0; + // LOOP along dimension DIM + + void *end = data + stride*Ni; + do { + register double x = *data; + if (!isnan(x)) { + count += *W; + double t = *W*x; + sum += t; + msq += t*t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = sum; + *s2 = msq; + *No = count; +} +