changeset 5582:f6b16a46e7b7 octave-forge

NaN-toolbox: support weighting of samples
author schloegl
date Wed, 06 May 2009 11:08:58 +0000
parents 0f5993c2eaef
children 24eba4e66040
files extra/NaN/inst/covm.m extra/NaN/inst/sumskipnan.m extra/NaN/src/sumskipnan_mex.cpp
diffstat 3 files changed, 217 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/extra/NaN/inst/covm.m	Tue May 05 13:45:56 2009 +0000
+++ b/extra/NaN/inst/covm.m	Wed May 06 11:08:58 2009 +0000
@@ -1,4 +1,4 @@
-function [CC,NN] = covm(X,Y,Mode);
+function [CC,NN] = covm(X,Y,Mode,W);
 % COVM generates covariance matrix
 % X and Y can contain missing values encoded with NaN.
 % NaN's are skipped, NaN do not result in a NaN output. 
@@ -8,6 +8,8 @@
 %      calculates the (auto-)correlation matrix of X
 % COVM(X,Y,Mode);
 %      calculates the crosscorrelation between X and Y
+% COVM(...,W);
+%	weighted crosscorrelation 
 %
 % Mode = 'M' minimum or standard mode [default]
 % 	C = X'*X; or X'*Y correlation matrix
@@ -55,6 +57,7 @@
 	FLAG_NANS_OCCURED = logical(0);  % default value 
 end;
 
+W = []; 
 if nargin<3,
         if nargin==2,
 		if isnumeric(Y),
@@ -69,6 +72,16 @@
         elseif nargin==0,
                 error('Missing argument(s)');
         end;
+
+elseif (nargin==3) && isnumeric(Mode) && ~isnumeric(Y);
+	W = Mode; 
+	Mode = Y;
+	Y = 0;
+
+elseif (nargin==4) && ~isnumeric(Mode) && isnumeric(Y);
+	; %% ok 
+else 
+	error('invalid input arguments');	
 end;        
 
 Mode = upper(Mode);
@@ -88,19 +101,35 @@
         warning('Covariance is ill-defined, because of too few observations (rows)');
 end;
 
+if ~isempty(W)
+	W = W(:);
+	if (r1~=numel(W))
+		error('Error COVM: size of weight vector does not fit number of rows');
+	end; 	
+	w = spdiags(W(:),0,numel(W),numel(W));
+	nn = sum(W(:)); 
+else
+	w = 1;
+	nn = r1;
+end; 
+	
 if ~isempty(Y),
         if (~any(Mode=='D') & ~any(Mode=='E')), % if Mode == M
-        	NN = real(X==X)'*real(Y==Y);
+        	NN = (w*real(X==X))'*(w*real(Y==Y));
 		FLAG_NANS_OCCURED = any(NN(:)<r1);
 	        X(X~=X) = 0; % skip NaN's
 	        Y(Y~=Y) = 0; % skip NaN's
-        	CC = X'*Y;
+        	CC = (w*X)'*(w*Y);
         else  % if any(Mode=='D') | any(Mode=='E'), 
-	        [S1,N1] = sumskipnan(X,1);
-                [S2,N2] = sumskipnan(Y,1);
+	        %[S1,N1] = sumskipnan(X,1,W);
+                %[S2,N2] = sumskipnan(Y,1,W);
+	        S1 = sumskipnan(w*X,1);
+                S2 = sumskipnan(w*Y,1);
+	        N1 = sum(w*(~isnan(X)),1);
+                N2 = sum(w*(~isnan(Y)),1);
                 
-                NN = real(X==X)'*real(Y==Y);
-		FLAG_NANS_OCCURED = any(NN(:)<r1);
+                NN = (w*real(X==X))'*(w*real(Y==Y));
+		FLAG_NANS_OCCURED = any(NN(:)~=nn);
         
 	        if any(Mode=='D'), % detrending mode
         		X  = X - ones(r1,1)*(S1./N1);
@@ -124,16 +153,19 @@
         
 else        
         if (~any(Mode=='D') & ~any(Mode=='E')), % if Mode == M
-        	tmp = real(X==X);
+        	tmp = w*real(X==X);
 		NN  = tmp'*tmp;
 		X(X~=X) = 0; % skip NaN's
-	        CC = X'*X;
+		tmp = w*X; 
+	        CC = tmp'*tmp;
 		FLAG_NANS_OCCURED = any(NN(:)<r1);
         else  % if any(Mode=='D') | any(Mode=='E'), 
-	        [S,N] = sumskipnan(X,1);
-        	tmp = real(X==X);
+	        %[S,N] = sumskipnan(X,1,W);
+	        S = sumskipnan(w*X,1);
+	        N = sum(w*real(~isnan(X)),1);
+        	tmp = w*real(X==X);
                 NN  = tmp'*tmp;
-		FLAG_NANS_OCCURED = any(NN(:)<r1);
+		FLAG_NANS_OCCURED = any(NN(:)~=nn);
                 if any(Mode=='D'), % detrending mode
 	                X  = X - ones(r1,1)*(S./N);
                         if any(Mode=='1'),  %  'D1'
@@ -144,7 +176,8 @@
                 end;
                 
                 X(X~=X) = 0; % skip NaN's
-                CC = X'*X;
+                tmp = w*X;
+                CC = tmp'*tmp;
                 
                 if any(Mode=='E'), % extended mode
                         NN = [r1, N; N', NN];
--- a/extra/NaN/inst/sumskipnan.m	Tue May 05 13:45:56 2009 +0000
+++ b/extra/NaN/inst/sumskipnan.m	Wed May 06 11:08:58 2009 +0000
@@ -1,4 +1,4 @@
-function [o,count,SSQ] = sumskipnan(x,DIM)
+function [o,count,SSQ] = sumskipnan(x, DIM, W)
 % SUMSKIPNAN adds all non-NaN values. 
 %
 % All NaN's are skipped; NaN's are considered as missing values. 
@@ -11,8 +11,12 @@
 % 
 % Y = sumskipnan(x [,DIM])
 % [Y,N,SSQ] = sumskipnan(x [,DIM])
+% [...] = sumskipnan(x, DIM, W)
 % 
-% DIM	dimension
+% x	input data 	
+% DIM	dimension (default: [])
+%	empty DIM sets DIM to first non singleton dimension	
+% W	weight vector for weighted sum, numel(W) must fit size(x,DIM)
 % Y	resulting sum
 % N	number of valid (not missing) elements
 % SSQ	sum of squares
@@ -23,6 +27,7 @@
 % features:
 % - can deal with NaN's (missing values)
 % - implements dimension argument. 
+% - computes weighted sum 
 % - compatible with Matlab and Octave
 %
 % see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ, 
@@ -31,7 +36,7 @@
 
 %    This program is free software; you can redistribute it and/or modify
 %    it under the terms of the GNU General Public License as published by
-%    the Free Software Foundation; either version 2 of the License, or
+%    the Free Software Foundation; either version 3 of the License, or
 %    (at your option) any later version.
 %
 %    This program is distributed in the hope that it will be useful,
@@ -53,6 +58,9 @@
 if nargin<2,
         DIM = [];
 end;
+if nargin<3,
+        W = [];
+end;
 
 % an efficient implementation in C of the following lines 
 % could significantly increase performance 
@@ -75,11 +83,13 @@
 end
 if (DIM<1) DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5
 
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % non-float data 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-if ~isa(x,'float') || ~flag_implicit_skip_nan(), %%% skip always NaN's
+if  (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's
+	if ~isempty(W)
+		error('SUMSKIPNAN: weighted sum of integers not supported, yet');
+	end; 
 	x = double(x); 
 	o = sum(x,DIM);
 	if nargout>1
@@ -95,6 +105,10 @@
 	return; 
 end; 	
 
+if ~isempty(W) && (size(x,DIM)~=numel(W))
+	error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)');
+end; 
+
 %% mex and oct files expect double
 x = double(x); 
 
@@ -112,16 +126,16 @@
 	end;
 	
 	if (nargout<2),
-		o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED);
+		o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
 		if (~isreal(x))
-			io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED);
+			io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
 			o  = o + i*io;
 		end; 
 		return; 
 	elseif (nargout==2),
-		[o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED);
+		[o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
 		if (~isreal(x))
-			[io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED);
+			[io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
 			if any(count(:)-icount(:))
 				error('Number of NaNs differ for REAL and IMAG part');
 			else
@@ -130,9 +144,9 @@
 		end; 
 		return; 
 	elseif (nargout>=3),
-		[o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED);
+		[o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W);
 		if (~isreal(x))
-			[io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED);
+			[io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W);
 			if any(count(:)-icount(:))
 				error('Number of NaNs differ for REAL and IMAG part');
 			else
@@ -145,6 +159,10 @@
 end; 
 
 
+if ~isempty(W) 
+	error('weighted sumskipnan requires sumskipnan_mex');
+end; 	
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % count non-NaN's
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/extra/NaN/src/sumskipnan_mex.cpp	Tue May 05 13:45:56 2009 +0000
+++ b/extra/NaN/src/sumskipnan_mex.cpp	Wed May 06 11:08:58 2009 +0000
@@ -23,6 +23,8 @@
 // Input:
 // - array to sum
 // - dimension to sum
+// - flag (is actually an output argument telling whether some NaN was observed)
+// - weight vector to compute weighted sum 
 //
 // Output:
 // - sums
@@ -44,6 +46,9 @@
 inline int __sumskipnan2__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN);
 inline int __sumskipnan3__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN);
 
+inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W);
+inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W);
+
 //#define NO_FLAG
 
 void mexFunction(int POutputCount,  mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) 
@@ -54,6 +59,7 @@
     	double* 	LOutputCount;
     	double* 	LOutputSum2;
     	double  	x;
+    	double*		W = NULL;		// weight vector 
     	//unsigned long   LCount;
 
     	mwSize		DIM = 0; 
@@ -65,8 +71,8 @@
 	char	 	flag_isNaN = 0;
 
 	// check for proper number of input and output arguments
-	if ((PInputCount <= 0) || (PInputCount > 3))
-	        mexErrMsgTxt("SumSkipNan.MEX requires 1,2 or 3 arguments.");
+	if ((PInputCount <= 0) || (PInputCount > 4))
+	        mexErrMsgTxt("SumSkipNan.MEX requires between 1 and 4 arguments.");
 	if (POutputCount > 4)
 	        mexErrMsgTxt("SumSkipNan.MEX has 1 to 3 output arguments.");
 
@@ -117,6 +123,16 @@
 
 	SZ2[DIM-1] = 1;		// size of output is same as size of input but SZ(DIM)=1;
 
+    	// get weight vector for weighted sumskipnan 
+       	if  (PInputCount > 3)	{
+		if (!mxGetNumberOfElements(PInputs[3])) 
+			; // empty weight vector - no weighting 
+		else if (mxGetNumberOfElements(PInputs[3])==D2)
+			W = mxGetPr(PInputs[3]);
+		else
+			mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension");
+	}	
+
 	    // create outputs
 	#define TYP mxDOUBLE_CLASS
 
@@ -142,13 +158,28 @@
 			if (D1==1)  	
 			{
 				double count;
-				__sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN);
+				if (W) 
+					__sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN, W);
+				else 	
+					__sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, &count, &flag_isNaN);
  	       		}
 			else for (j=0; j<D2; j++) {
 				// minimize cache misses 
 				ix2 =   ix0;	// index for output 
 				// Inner LOOP: along dimensions < DIM
-				do {
+				if (W) do {
+					register double x = *LInput;
+        				if (!isnan(x)) {
+						LOutputSum[ix2]   += W[j]*x; 
+					}
+#ifndef NO_FLAG
+					else 
+						flag_isNaN = 1; 
+#endif 
+					LInput++;
+					ix2++;
+				} while (ix2 != (l+1)*D1);
+				else do {
 					register double x = *LInput;
         				if (!isnan(x)) {
 						LOutputSum[ix2]   += x; 
@@ -171,13 +202,29 @@
 			ix1 = ix0*D2;	// index for input 
 			if (D1==1)  	
 			{
-				__sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN);
+				if (W) 
+					__sumskipnan2w__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN, W);
+				else 	
+					__sumskipnan2__(LInput+ix1, D2, LOutputSum+ix0, LOutputCount+ix0, &flag_isNaN);
  	       		}
 			else for (j=0; j<D2; j++) {
 				// minimize cache misses 
 				ix2 =   ix0;	// index for output 
 				// Inner LOOP: along dimensions < DIM
-				do {
+				if (W) do {
+					register double x = *LInput;
+        				if (!isnan(x)) {
+						LOutputCount[ix2] += W[j]; 
+						LOutputSum[ix2]   += W[j]*x; 
+					}
+#ifndef NO_FLAG
+					else 
+						flag_isNaN = 1; 
+#endif
+					LInput++;
+					ix2++;
+				} while (ix2 != (l+1)*D1);
+				else do {
 					register double x = *LInput;
         				if (!isnan(x)) {
 						LOutputCount[ix2] += 1.0; 
@@ -202,13 +249,31 @@
 			if (D1==1)  	
 			{
 				size_t count;
-				__sumskipnan3__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN);
+				if (W) 
+					__sumskipnan3w__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN, W);
+				else 	
+					__sumskipnan3__(LInput+ix1, D2, LOutputSum+ix0, LOutputSum2+ix0, LOutputCount+ix0, &flag_isNaN);
  	       		}
 			else for (j=0; j<D2; j++) {
 				// minimize cache misses 
 				ix2 =   ix0;	// index for output 
 				// Inner LOOP: along dimensions < DIM
-				do {
+				if (W) do {
+					register double x = *LInput;
+        				if (!isnan(x)) {
+						LOutputCount[ix2] += W[j]; 
+						double t = W[j]*x;
+						LOutputSum[ix2]   += t; 
+						LOutputSum2[ix2]  += t*t; 
+					}
+#ifndef NO_FLAG
+					else 
+						flag_isNaN = 1; 
+#endif
+					LInput++;
+					ix2++;	
+				} while (ix2 != (l+1)*D1);
+				else do {
 					register double x = *LInput;
         				if (!isnan(x)) {
 						LOutputCount[ix2] += 1.0; 
@@ -331,3 +396,72 @@
         *No = (double)count;
 }
 
+#define stride 1 
+inline int __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W)
+{
+	register double sum=0; 
+	register double count=0; 
+	register char   flag=0; 
+	// LOOP  along dimension DIM
+	
+	void *end = data + stride*Ni; 
+	do {
+		register double x = *data;
+        	if (!isnan(x))
+		{
+			count += *W; 
+			sum   += *W*x; 
+		}
+#ifndef NO_FLAG
+		else 
+			flag = 1; 
+#endif
+
+		data +=stride;	
+		W++;
+	}
+	while (data < end);
+	
+#ifndef NO_FLAG
+	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; 
+#endif
+	*s  = sum;
+        *No = count;
+
+}
+
+inline int __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W)
+{
+	register double sum=0; 
+	register double msq=0; 
+	register double count=0; 
+	register char   flag=0; 
+	// LOOP  along dimension DIM
+	
+	void *end = data + stride*Ni; 
+	do {
+		register double x = *data;
+        	if (!isnan(x)) {
+			count += *W;
+			double t = *W*x; 
+			sum += t; 
+			msq += t*t; 
+		}
+#ifndef NO_FLAG
+		else 
+			flag = 1; 
+#endif
+
+		data++;	// stride=1
+		W++;
+	}
+	while (data < end);
+
+#ifndef NO_FLAG
+	if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; 
+#endif
+	*s  = sum;
+	*s2 = msq; 
+        *No = count;
+}
+