changeset 10269:d5042d4ccfaa octave-forge

added a new function csaps_sel
author nir-krakauer
date Fri, 18 May 2012 14:23:03 +0000
parents a98ba64657d4
children c40aea585f21
files main/splines/INDEX main/splines/inst/csaps_sel.m
diffstat 2 files changed, 164 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/main/splines/INDEX	Fri May 18 04:42:55 2012 +0000
+++ b/main/splines/INDEX	Fri May 18 14:23:03 2012 +0000
@@ -3,6 +3,7 @@
  csapi
  csape
  csaps
+ csaps_sel
  fnplt 
  fnder
  fnval
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/splines/inst/csaps_sel.m	Fri May 18 14:23:03 2012 +0000
@@ -0,0 +1,163 @@
+## Copyright (C) 2012 Nir Krakauer
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; If not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn{Function File}{[@var{yi} @var{p} @var{sigma2},@var{unc_y}] =} csaps_sel(@var{x}, @var{y}, @var{xi}, @var{w}=[], @var{crit}=[])
+## @deftypefnx{Function File}{[@var{pp} @var{p} @var{sigma2},@var{unc_y}] =} csaps_sel(@var{x}, @var{y}, [], @var{w}=[], @var{crit}=[])
+##
+## Cubic spline approximation with smoothing parameter estimation @*
+## approximate [@var{x},@var{y}], weighted by @var{w} (inverse variance; if not given, equal weighting is assumed), at @var{xi}.
+##
+## The chosen cubic spline with natural boundary conditions @var{pp}(@var{x}) minimizes @var{p} Sum_i @var{w}_i*(@var{y}_i - @var{pp}(@var{x}_i))^2  +  (1-@var{p}) Int @var{pp}''(@var{x}) d@var{x}.
+## A selection criterion @var{crit} is used to find a suitable value for @var{p} (between 0 and 1); possible values for @var{crit} are `aicc' (corrected Akaike information criterion, the default); `aic' (original Akaike information criterion); `gcv' (generalized cross validation)  
+##
+## @var{x} and @var{w} should be @var{n} by 1 in size; @var{y} should be @var{n} by @var{m}; @var{xi} should be @var{k} by 1; the values in @var{x} should be distinct; the values in @var{w} should be nonzero.
+##
+## returns the selected @var{p}, the estimated data scatter (variance from the smooth trend) @var{sigma2}, and the estimated uncertainty (SD) of the smoothing spline fit at each @var{x} value, @var{unc_y}.
+##
+## Note: The current evaluation of the effective number of model parameters uses singular value decomposition of an @var{n} by @var{n} matrix and is not computation or storage efficient for large @var{n} (thousands or greater). See Hutchinson (1985) for a more efficient method.
+##
+## References: 
+##
+## Carl de Boor (1978), A Practical Guide to Splines, Springer, Chapter XIV
+##
+## Clifford M. Hurvich, Jeffrey S. Simonoff, Chih-Ling Tsai (1998), Smoothing parameter selection in nonparametric regression using an improved Akaike information criterion, J. Royal Statistical Society, 60B:271-293
+##
+## M. F. Hutchinson and F. R. de Hoog (1985), Smoothing noisy data with spline functions, Numerische Mathematik, 47:99-106
+##
+## M. F. Hutchinson (1986), Algorithm 642: A fast procedure for calculating minimum cross-validation cubic smoothing splines, ACM Transactions on Mathematical Software, 12:150-153
+##
+## Grace Wahba (1983), Bayesian ``confidence intervals'' for the cross-validated smoothing spline, J Royal Statistical Society, 45B:133-150
+##
+## @end deftypefn
+## @seealso{csaps, spline, csapi, ppval, gcvspl}
+
+## Author: Nir Krakauer <nkrakauer@ccny.cuny.edu>
+
+function [ret,p,sigma2,unc_y]=csaps_sel(x,y,xi,w,crit)
+
+  if (nargin < 5)
+    crit = [];
+    if(nargin < 4)
+      w = [];
+      if(nargin < 3)
+        xi = [];
+      endif
+    endif
+  endif
+
+  if(columns(x) > 1)
+    x = x.';
+    y = y.';
+    w = w.';
+  endif
+
+  [x,i] = sort(x);
+  y = y(i, :);
+
+  n = numel(x);
+  
+  if isempty(w)
+    w = ones(n, 1);
+  end
+
+  if isempty(crit)
+    crit = 'aicc';
+  end  
+
+  h = diff(x);
+
+  R = spdiags([h(1:end-1) 2*(h(1:end-1) + h(2:end)) h(2:end)], [-1 0 1], n-2, n-2);
+
+  QT = spdiags([1 ./ h(1:end-1) -(1 ./ h(1:end-1) + 1 ./ h(2:end)) 1 ./ h(2:end)], [0 1 2], n-2, n);
+  
+##determine influence matrix for different p without repeated inversion
+  [U D V] = svd(diag(1 ./ sqrt(w))*QT'*sqrtm(inv(R)), 0); D = diag(D).^2;
+
+
+##choose p by minimizing the penalty function
+  penalty_function = @(p) penalty_compute(p, U, D, y, w, n, crit);
+
+  p = fminbnd(penalty_function, 0, 1);
+
+
+
+  H = influence_matrix(p, U, D, n);
+  [MSR, Ht] = penalty_terms(H, y, w);
+  sigma2 = MSR * (n / (n-Ht)); #estimated data error variance (wahba83)
+  unc_y = sqrt(sigma2 * diag(H) ./ w); #uncertainty (SD) of fitted curve at each input x-value (hutchinson86)
+  
+
+## solve for the scaled second derivatives u and for the function values a at the knots (if p = 1, a = y) 
+  u = (6*(1-p)*QT*diag(1 ./ w)*QT' + p*R) \ (QT*y);
+  a = y - 6*(1-p)*diag(1 ./ w)*QT'*u;
+
+## derivatives at all but the last knot for the piecewise cubic spline
+  aa = a(1:(end-1), :);
+  cc = zeros(size(y)); 
+  cc(2:(n-1), :) = 6*p*u; #cc([1 n], :) = 0 [natural spline]
+  dd = diff(cc) ./ h;
+  cc = cc(1:(end-1), :);
+  bb = diff(a) ./ h - (cc/2).*h - (dd/6).*(h.^2);
+
+  ret = mkpp (x, cat (2, dd'(:)/6, cc'(:)/2, bb'(:), aa'(:)), size(y, 2));
+
+  if ~isempty(xi)
+    ret = ppval (ret, xi);
+  endif
+
+endfunction
+
+
+
+function H = influence_matrix(p, U, D, n) #returns influence matrix for given p
+	H = speye(n) - U * diag(D ./ (D + (p / (6*(1-p))))) * U';
+endfunction	
+
+function [MSR, Ht] = penalty_terms(H, y, w)
+	MSR = mean(w .* (y - H*y) .^ 2); #mean square residual
+	Ht = trace(H); #effective number of fitted parameters
+endfunction
+
+function J = aicc(MSR, Ht, n)
+	J = mean(log(MSR)(:)) + 2 * (Ht + 1) / max(n - Ht - 2, 0); #hurvich98, taking the average if there are multiple data sets as in woltring86 
+endfunction
+
+function J = aic(MSR, Ht, n)
+	J = mean(log(MSR)(:)) + 2 * Ht / n;
+endfunction
+
+function J = gcv(MSR, Ht, n)
+	J = mean(log(MSR)(:)) - 2 * log(1 - Ht / n);
+endfunction
+
+function J = penalty_compute(p, U, D, y, w, n, crit) #evaluates a user-supplied penalty function at given p
+	H = influence_matrix(p, U, D, n);
+	[MSR, Ht] = penalty_terms(H, y, w);
+	J = feval(crit, MSR, Ht, n);
+	if ~isfinite(J)
+		J = Inf;
+	endif 	
+endfunction
+
+
+%!shared x,y,ret,p,sigma2,unc_y
+%! x = [0:0.01:1]'; y = sin(x);
+%! [ret,p,sigma2,unc_y]=csaps_sel(x,y,x);
+%!assert (1-p, 0, 1E-6);
+%!assert (sigma2, 0, 1E-10);
+%!assert (ret-y, zeros(size(y)), 1E-4);
+%!assert (unc_y, zeros(size(unc_y)), 1E-5);
+