changeset 4885:28ab079d8f0e

[project @ 2004-04-30 04:21:33 by jwe]
author jwe
date Fri, 30 Apr 2004 04:21:33 +0000
parents a9f67193e3a0
children 54b076a24718
files scripts/ChangeLog scripts/statistics/base/iqr.m scripts/statistics/base/kurtosis.m scripts/statistics/base/ranks.m scripts/statistics/base/run_count.m scripts/statistics/base/skewness.m scripts/statistics/base/statistics.m scripts/statistics/base/studentize.m
diffstat 8 files changed, 319 insertions(+), 126 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/ChangeLog	Fri Apr 30 04:21:33 2004 +0000
@@ -1,3 +1,18 @@
+2004-04-29  David Bateman  <dbateman@free.fr>
+
+	* statistics/base/ranks.m, statistics/base/run_count.m,
+	statistics/base/studentize.m, statistics/base/kurtosis.m
+	statistics/base/statistics.m, statistics/base/skewness.m
+	statistics/base/iqr.m:
+	Make N-d array aware.  Allow optional argument to define the
+	dimension along which to operate.  Update the documentation.
+
+	* statistics/base/ranks.m: Change algorithm to use sort,
+	and adjust for the ties after.
+
+	* statistics/base/run_counts.m: Change algorithm to use
+	the a combination of diff and find, rather than a for-loop.
+
 2004-04-23  Paul Kienzle  <pkienzle@users.sf.net>
 
 	* plot/hist.m: Correctly determine cutoffs.  New tests.
--- a/scripts/statistics/base/iqr.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/iqr.m	Fri Apr 30 04:21:33 2004 +0000
@@ -18,31 +18,60 @@
 ## 02111-1307, USA.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} iqr (@var{x})
+## @deftypefn {Function File} {} iqr (@var{x}, @var{dim})
 ## If @var{x} is a vector, return the interquartile range, i.e., the
 ## difference between the upper and lower quartile, of the input data.
 ##
-## If @var{x} is a matrix, do the above for each column of @var{x}.
+## If @var{x} is a matrix, do the above for first non singleton
+## dimension of @var{x}.. If the option @var{dim} argument is given,
+## then operate along this dimension.
 ## @end deftypefn
 
 ## Author KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Description: Interquartile range
 
-function y = iqr (x)
+function y = iqr (x, dim)
 
-  if (nargin != 1)
-    usage ("iqr (x)");
+  if (nargin != 1 && nargin != 2)
+    usage ("iqr (x, dim)");
   endif
 
-  if (rows (x) == 1)
-    x = x.';
+  nd = ndims (x);
+  sz = size (x);
+  nel = numel (x);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
+    endif
+  else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("iqr: dim must be an integer and valid dimension");
+    endif
   endif
 
-  [r, c] = size (x);
-  y = zeros (1, c);
+  ## This code is a bit heavy, but is needed until empirical_inv 
+  ## takes other than vector arguments.
+  c = sz (dim);
+  sz (dim) = 1;
+  y = zeros (sz);
+  stride = prod (sz (1:dim-1));
+  for i = 1 : nel / c;
+    offset = i;
+    offset2 = 0;
+    while (offset > stride)
+      offset -= stride;
+      offset2++;
+    endwhile
+    offset += offset2 * stride * c;
+    rng = [0 : c-1] * stride + offset;
 
-  for i = 1:c;
-    y(i) = empirical_inv (3/4, x(:,i)) - empirical_inv (1/4, x(:,i));
+    y (i) = empirical_inv (3/4, x(rng)) - empirical_inv (1/4, x(rng));
   endfor
 
 endfunction
--- a/scripts/statistics/base/kurtosis.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/kurtosis.m	Fri Apr 30 04:21:33 2004 +0000
@@ -18,7 +18,7 @@
 ## 02111-1307, USA.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} kurtosis (@var{x})
+## @deftypefn {Function File} {} kurtosis (@var{x}, @var{dim})
 ## If @var{x} is a vector of length @math{N}, return the kurtosis
 ## @iftex
 ## @tex
@@ -35,36 +35,53 @@
 ## @end ifinfo
 ##
 ## @noindent
-## of @var{x}.  If @var{x} is a matrix, return the row vector containing
-## the kurtosis of each column.
+## of @var{x}.  If @var{x} is a matrix, return the kurtosis over the
+## first non-singleton dimension. The optional argument @var{dim}
+## can be given to force the kurtosis to be given over that 
+## dimension.
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Created: 29 July 1994
 ## Adapted-By: jwe
 
-function retval = kurtosis (x)
+function retval = kurtosis (x, dim)
 
-  if (nargin != 1)
-    usage ("kurtosis (x)");
+  if (nargin != 1 && nargin != 2)
+    usage ("kurtosis (x, dim)");
   endif
 
-  if (isvector (x))
-    x = x - mean (x);
-    if (! any (x))
-      retval = 0;
-    else
-      retval = sum (x .^ 4) / (length (x) * std (x) ^ 4) - 3;
+  nd = ndims (x);
+  sz = size (x);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
     endif
-  elseif (ismatrix (x))
-    [nr, nc] = size (x);
-    x = x - ones (nr, 1) * mean (x);
-    retval = zeros (1, nc);
-    s      = std (x);
-    ind    = find (s > 0);
-    retval (ind) = sum (x (:, ind) .^ 4) ./ (nr * s (ind) .^ 4) - 3;
   else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("kurtosis: dim must be an integer and valid dimension");
+    endif
+  endif
+  
+  if (! ismatrix (x))
     error ("kurtosis: x has to be a matrix or a vector");
   endif
 
+  c = sz (dim);
+  sz (dim) = 1;
+  idx = ones (1, nd);
+  idx (dim) = c;
+  x = x - repmat (mean (x, dim), idx);
+  retval = zeros (sz);
+  s = std (x, [], dim);
+  x = sum(x.^4, dim);
+  ind = find (s > 0);
+  retval (ind) = x (ind) ./ (c * s (ind) .^ 4) - 3;
+
 endfunction
--- a/scripts/statistics/base/ranks.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/ranks.m	Fri Apr 30 04:21:33 2004 +0000
@@ -18,37 +18,78 @@
 ## 02111-1307, USA.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} ranks (@var{x})
+## @deftypefn {Function File} {} ranks (@var{x}, @var{dim})
 ## If @var{x} is a vector, return the (column) vector of ranks of
 ## @var{x} adjusted for ties.
 ##
-## If @var{x} is a matrix, do the above for each column of @var{x}.
+## If @var{x} is a matrix, do the above for along the first 
+## non-singleton dimension. If the optional argument @var{dim} is
+## given, operate along this dimension.
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Description: Compute ranks
 
-## This code is rather ugly, but is there an easy way to get the ranks
-## adjusted for ties from sort?
+## This code was rather ugly, since it didn't use sort due to the
+## fact of how to deal with ties. Now it does use sort and its
+## even uglier!!! At least it handles NDArrays..
+
+function y = ranks (x, dim)
+
+  if (nargin != 1 && nargin != 2)
+    usage ("ranks (x, dim)");
+  endif
 
-function y = ranks (x)
-
-  if (nargin != 1)
-    usage ("ranks (x)");
+  nd = ndims (x);
+  sz = size (x);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
+    endif
+  else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("ranks: dim must be an integer and valid dimension");
+    endif
   endif
 
-  y = [];
-
-  [r, c] = size (x);
-  if ((r == 1) && (c > 0))
-    p = x' * ones (1, c);
-    y = sum (p < p') + (sum (p == p') + 1) / 2;
-  elseif (r > 1)
-    o = ones (1, r);
-    for i = 1 : c;
-      p = x (:, i) * o;
-      y = [y, (sum (p < p') + (sum (p == p') + 1) / 2)'];
-    endfor
+  if (sz (dim) == 1)
+    y = ones(sz);
+  else
+    ## The algorithm works only on dim=1, so permute if necesary
+    if (dim != 1)
+      perm = [1 : nd];
+      perm (1) = dim;
+      perm (dim) = 1;
+      x = permute (x, perm);
+    endif
+    sz  = size (x);
+    infvec = -Inf * ones ([1, sz(2 : end)]);
+    [xs, y] = sort (x);
+    eq_el = find (diff ([xs; infvec]) == 0);
+    if (isempty (eq_el))
+      [eq_el, y] = sort (y);  
+    else
+      runs = complement (eq_el+1, eq_el);
+      runs = reshape (y (runs), size (runs)) + 
+             floor (runs ./ sz (1)) * sz(1);
+      len = diff (find (diff ([Inf; eq_el; -Inf]) != 1)) + 1;
+      [eq_el, y] = sort (y);  
+      for i = 1 : length(runs)
+	p = y (runs (i)) + (len (i) - 1) / 2;
+	for j = 0 : len (i) - 1
+	  y (runs (i) + j) = p;
+	endfor
+      endfor
+    endif  
+    if (dim != 1)
+      y = permute (y, perm);
+    endif
   endif
 
 endfunction
--- a/scripts/statistics/base/run_count.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/run_count.m	Fri Apr 30 04:21:33 2004 +0000
@@ -19,43 +19,75 @@
 
 ## -*- texinfo -*-
 ## @deftypefn {Function File} {} run_count (@var{x}, @var{n})
-## Count the upward runs in the columns of @var{x} of length 1, 2, ...,
-## @var{n}-1 and greater than or equal to @var{n}.
+## Count the upward runs along the first non-singleton dimension of
+## @var{x} of length 1, 2, ..., @var{n}-1 and greater than or equal 
+## to @var{n}. If the optional argument @var{dim} is given operate
+## along this dimension
 ## @end deftypefn
 
 ## Author: FL <Friedrich.Leisch@ci.tuwien.ac.at>
 ## Description: Count upward runs
 
-function retval = run_count (x, n)
+function retval = run_count (x, n, dim)
 
-  [xr, xc] = size(x);
+  if (nargin != 2 && nargin != 3)
+    usage ("run_count (x, n) or run_count (x, n, dim)");
+  endif
 
-  tmp = zeros (xr,xc);
-  retval = zeros (n, xc);
-
-  for j = 1 : xc
-    run = 1;
-    count = 1;
-
-    for k = 2 : xr
+  nd = ndims (x);
+  sz = size (x);
+  if (nargin != 3)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
+    endif
+  else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("run_count: dim must be an integer and valid dimension");
+    endif
+  endif
 
-      if x(k, j) < x(k-1, j)
-        tmp(run, j) = count;
-        run = run + 1;
-        count = 0;
-      endif
-
-      count = count + 1;
+  if (! (isscalar (n) && n == round (n)) && n > 0 )
+    error ("run_count: n must be a positive integer");
+  endif
+  
+  nd = ndims (x);
+  if (dim != 1)
+    perm = [1 : nd];
+    perm (1) = dim;
+    perm (dim) = 1;
+    x = permute (x, perm);
+  endif
 
-    endfor
-
-    tmp(run, j) = count;
-
+  sz = size (x);
+  idx = cell ();
+  for i = 1 : nd
+    idx {i} = 1 : sz(i);
   endfor
+  c = sz (1); 
+  tmp = zeros ([c + 1, sz(2 : end)]);
+  infvec = Inf * ones ([1, sz(2 : end)]);
 
+  ind = find (diff ([infvec; x; -infvec]) < 0);
+  tmp (ind (2 : end) - 1) = diff (ind);
+  tmp = tmp (idx {:});
+
+  sz (1) = n;
+  retval = zeros (sz);
   for k=1 : (n-1)
-    retval(k, :) = sum (tmp == k);
+    idx {1} = k;
+    retval (idx {:}) = sum (tmp == k);
   endfor
-  retval(n, :) = sum (tmp >= n);
+  idx {1} = n;
+  retval (idx {:}) = sum (tmp >= n);
+
+  if (dim != 1)
+    retval = ipermute (retval, perm);
+  endif
 
 endfunction
--- a/scripts/statistics/base/skewness.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/skewness.m	Fri Apr 30 04:21:33 2004 +0000
@@ -18,7 +18,7 @@
 ## 02111-1307, USA.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} skewness (@var{x})
+## @deftypefn {Function File} {} skewness (@var{x}, @var{dim})
 ## If @var{x} is a vector of length @math{n}, return the skewness
 ## @iftex
 ## @tex
@@ -35,36 +35,52 @@
 ## @end ifinfo
 ##
 ## @noindent
-## of @var{x}.  If @var{x} is a matrix, return the row vector containing
-## the skewness of each column.
+## of @var{x}.  If @var{x} is a matrix, return the skewness along the
+## first non-singleton dimension of the matrix. If the optional
+## @var{dim} argument is given, operate along this dimension.
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Created: 29 July 1994
 ## Adapted-By: jwe
 
-function retval = skewness (x)
+function retval = skewness (x, dim)
 
-  if (nargin != 1)
-    usage ("skewness (x)");
+  if (nargin != 1 && nargin != 2)
+    usage ("skewness (x, dim)");
   endif
 
-  if (isvector (x))
-    x = x - mean (x);
-    if (! any (x))
-      retval = 0;
-    else
-      retval = sum (x .^ 3) / (length (x) * std (x) ^ 3);
+  nd = ndims (x);
+  sz = size (x);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
     endif
-  elseif (ismatrix (x))
-    [nr, nc] = size (x);
-    x = x - ones (nr, 1) * mean (x);
-    retval = zeros (1, nc);
-    s      = std (x);
-    ind    = find (s > 0);
-    retval (ind) = sum (x (:, ind) .^ 3) ./ (nr * s (ind) .^ 3);
   else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("skewness: dim must be an integer and valid dimension");
+    endif
+  endif
+
+  if (! ismatrix (x))
     error ("skewness: x has to be a matrix or a vector");
   endif
 
+  c = sz (dim);
+  idx = ones (1, nd);
+  idx (dim) = c;
+  x = x - repmat (mean (x, dim), idx);
+  sz (dim) = 1;
+  retval = zeros (sz);
+  s = std (x, [], dim);
+  ind = find (s > 0);
+  x = sum (x .^ 3, dim);
+  retval (ind) = x (ind) ./ (c * s (ind) .^ 3);
+  
 endfunction
--- a/scripts/statistics/base/statistics.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/statistics.m	Fri Apr 30 04:21:33 2004 +0000
@@ -29,27 +29,54 @@
 ## Author: KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Description: Compute basic statistics
 
-function S = statistics (X)
+function S = statistics (X, dim)
 
-  if (nargin != 1)
-    usage ("S = statistics (X)");
+  if (nargin != 1 && nargin != 2)
+    usage ("S = statistics (X, dim)");
   endif
 
-  if (prod (size (X)) > 1)
-    if (isvector (X))
-      X = reshape (X, length (X), 1);
+  nd = ndims (X);
+  sz = size (X);
+  nel = numel (X);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
+    endif
+  else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("statistics: dim must be an integer and valid dimension");
     endif
-    for k=1:columns(X)
-      S(:,k) = [(min (X(:,k)));
-                (empirical_inv ([0.25;0.5;0.75], X(:,k)));
-                (max (X(:,k)));
-                (mean (X(:,k)));
-                (std (X(:,k)));
-                (skewness (X(:,k)));
-                (kurtosis (X(:,k)))];
-    endfor
-  else
+  endif
+  
+  if (! ismatrix (X) || sz (dim) < 2)
     error ("statistics: invalid argument");
-  endif
+  endif    
+
+  ## This code is a bit heavy, but is needed until empirical_inv 
+  ## takes other than vector arguments.
+  c = sz (dim);
+  stride = prod (sz (1:dim-1));
+  sz (dim) = 3;
+  emp_inv = zeros (sz);
+  for i = 1 : nel / c;
+    offset = i;
+    offset2 = 0;
+    while (offset > stride)
+      offset -= stride;
+      offset2++;
+    endwhile
+    rng = [0 : c-1] * stride + offset + offset2 * stride * c;
+    rng2 = [0 : 2] * stride + offset + offset2 * stride * 3;
+    emp_inv(rng2) = empirical_inv ([0.25; 0.5; 0.75], X(rng));
+  endfor
+
+  S = cat (dim, min (X, [], dim), emp_inv, max (X, [], dim), mean (X, dim),
+	   std (X, [], dim), skewness (X, dim), kurtosis (X, dim));
 
 endfunction
--- a/scripts/statistics/base/studentize.m	Fri Apr 30 04:12:37 2004 +0000
+++ b/scripts/statistics/base/studentize.m	Fri Apr 30 04:21:33 2004 +0000
@@ -18,34 +18,50 @@
 ## 02111-1307, USA.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} studentize (@var{x})
+## @deftypefn {Function File} {} studentize (@var{x}, @var{dim})
 ## If @var{x} is a vector, subtract its mean and divide by its standard
 ## deviation.
 ##
-## If @var{x} is a matrix, do the above for each column.
+## If @var{x} is a matrix, do the above along the first non-singleton
+## dimension. If the optional argument @var{dim} is given then operate
+## along this dimension.
 ## @end deftypefn
 
 ## Author: KH <Kurt.Hornik@ci.tuwien.ac.at>
 ## Description: Subtract mean and divide by standard deviation
 
-function t = studentize (x)
+function t = studentize (x, dim)
 
-  if (nargin != 1)
-    usage ("studentize (x)");
+  if (nargin != 1 && nargin != 2)
+    usage ("studentize (x, dim)");
   endif
 
-  if isvector (x)
-    if (std (x) == 0)
-      t = zeros (size (x));
-    else
-      t = (x - mean (x)) / std (x);
+  nd = ndims (x);
+  sz = size (x);
+  if (nargin != 2)
+    %% Find the first non-singleton dimension
+    dim  = 1;
+    while (dim < nd + 1 && sz (dim) == 1)
+      dim = dim + 1;
+    endwhile
+    if (dim > nd)
+      dim = 1;
     endif
-  elseif ismatrix (x)
-    l = ones (rows (x), 1);
-    t = x - l * mean (x);
-    t = t ./ (l * max ([(std (t)); (! any (t))]));
   else
+    if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && 
+	dim < (nd + 1))
+      error ("studentize: dim must be an integer and valid dimension");
+    endif
+  endif
+
+  if (! ismatrix (x))
     error ("studentize: x must be a vector or a matrix");
   endif
 
-endfunction
\ No newline at end of file
+  c = sz (dim);
+  idx = ones (1, nd);
+  idx (dim) = c;
+  t = x - repmat (mean (x, dim), idx);
+  t = t ./ repmat (max (cat (dim, std(t, [], dim), ! any (t, dim)), [], dim), idx);
+
+endfunction