changeset 32011:ce36dddf5427

normalize.m: Enable compatible NaN handling (bug #50571) * scripts/statistics/normalize.m: Add 'omitnan' option to internal calls to std, mean, and median. Correct 'norm' processing to temporarily replace NaN values with 0 before calling sum. Add FIXME note detailing code simplification after NANFLAG gets implemented in sum. Remove Matlab NaN incompatibility note from docstring. Add BISTs for each method option to verify correct NaN handling. * etc/NEWS.9.md: Add note to Matlab Compatibility section about improved NaN handling.
author Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
date Thu, 13 Apr 2023 16:59:16 -0400
parents d790c977abb5
children a00c7e103041
files etc/NEWS.9.md scripts/statistics/normalize.m
diffstat 2 files changed, 42 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/etc/NEWS.9.md	Thu Apr 13 20:04:56 2023 +0200
+++ b/etc/NEWS.9.md	Thu Apr 13 16:59:16 2023 -0400
@@ -49,6 +49,9 @@
 
 - `mode` now produces Matlab compatible output for empty inputs (bug #50583).
 
+- `normalize` now produces Matlab compatible output for inputs containing NaN
+values (bug #50571).
+
 - `cov` now processes the input form cov(x,y) with two separate data arrays
 x and y, as cov(x(:),y(:)) to maintain Matlab compatibility.  It also accepts
 a NANFLAG option to allow ignoring NaN entries in input data (bug #50571)
--- a/scripts/statistics/normalize.m	Thu Apr 13 20:04:56 2023 +0200
+++ b/scripts/statistics/normalize.m	Thu Apr 13 16:59:16 2023 -0400
@@ -30,7 +30,6 @@
 ## @deftypefnx {} {@var{z} =} normalize (@dots{}, @var{method}, @var{option})
 ## @deftypefnx {} {@var{z} =} normalize (@dots{}, @var{scale}, @var{scaleoption}, @var{center}, @var{centeroption})
 ## @deftypefnx {} {[@var{z}, @var{c}, @var{s}] =} normalize (@dots{})
-##
 ## Return a normalization of the data in @var{x} using one of several available
 ## scaling and centering methods.
 ##
@@ -61,6 +60,9 @@
 ## If the optional second argument @var{dim} is given, operate along this
 ## dimension.
 ##
+## @code{normalize} ignores NaN values is @var{x} similar to the behavior of
+## the omitnan option in @code{std}, @code{mean}, and @code{median}.
+##
 ## The optional inputs @var{method} and @var{option} can be used to specify the
 ## type of normalization performed on @var{x}.  Note that only the
 ## @option{scale} and @option{center} options may be specified together using
@@ -156,10 +158,6 @@
 ## @item
 ## The option @option{DataVariables} is not yet implemented for Table class
 ## @var{x} inputs.
-##
-## @item
-## Certain arrays containing NaN elements may not return @sc{matlab} compatible
-## output.
 ## @end enumerate
 ##
 ## @seealso{zscore, iqr, norm, rescale, std, median, mean, mad}
@@ -167,9 +165,8 @@
 
 function [z, c, s] = normalize (x, varargin)
 
-  ## FIXME: Until NANFLAG/OMITNAN option is implemented in std, mean, median,
-  ## etc., normalize cannot efficiently reproduce some behavior with NaNs in
-  ## x.  xtests added to capture this.  (See bug #50571)
+  ## FIXME: Until NANFLAG/OMITNAN option is implemented in sum, inefficient
+  ##        workaround is used for method "norm" option 1  (See bug #50571)
 
   ## FIXME: When table class is implemented, remove DataVariables error line in
   ## option checking section and add DataVariables data handling switch
@@ -186,7 +183,7 @@
 
   if (nargin == 1)
     ## Directly handle simple 1 input case.
-    [s, c] = std (x);
+    [s, c] = std (x, "omitnan");
 
   else
     ## Parse input options
@@ -385,20 +382,27 @@
       case "zscore"
         switch (methodoption)
           case "std"
-            [s, c] = std (x, [], dim);
+            [s, c] = std (x, [], dim, "omitnan");
           case "robust"
             ## center/median to zero and MAD = 1
-            c = median (x, dim);
+            c = median (x, dim, "omitnan");
             ## FIXME: Use bsxfun, rather than broadcasting, until broadcasting
-            ##        supports diagonal and sparse matrices (Bugs #41441, #35787).
-            s = median (abs (bsxfun (@minus, x , c)), dim);
-            ## s = median (abs (x - c), dim);   # Automatic broadcasting
+            ##        supports diagonal and sparse matrices.
+            ##        (Bugs #41441, #35787).
+            s = median (abs (bsxfun (@minus, x , c)), dim, "omitnan");
+            ## s = median (abs (x - c), dim, "omitnan");# Broadcasting.
         endswitch
 
       case "norm"
         switch (methodoption)
           case 1
+            ## FIXME:  when sum supports omitnan option replace entire case
+            ## with single line:
+            ## s = sum (abs (x), dim, "omitnan");
+            xnan = isnan (x);
+            x(xnan) = 0;
             s = sum (abs (x), dim);
+            x(xnan) = NaN;
           case Inf
             s = max (abs (x), [], dim);
           otherwise
@@ -439,7 +443,7 @@
         c = process_center_option (x, dim, center_option);
 
       case "medianiqr"
-        c = median (x, dim);
+        c = median (x, dim, "omitnan");
         s = iqr (x, dim);
 
     endswitch
@@ -462,9 +466,9 @@
   else
     switch (center_option)
       case "mean"
-        c = mean (x, dim);
+        c = mean (x, dim, "omitnan");
       case "median"
-        c = median (x, dim);
+        c = median (x, dim, "omitnan");
     endswitch
   endif
 
@@ -479,7 +483,7 @@
   else
     switch (scale_option)
       case "std"
-        s = std (x, [], dim);
+        s = std (x, [], dim, "omitnan");
       case "mad"
         s = mad (x, 1, dim);
       case "first"
@@ -644,10 +648,23 @@
 %! assert (issparse (c));
 %! assert (issparse (s));
 
-## Matlab ignores NaNs, operating as if the vector had one less element, then
-## returns the result retaining the NaN in the solution.
-%!assert <50571> (normalize ([1 2 NaN], 2), [-1, 1, NaN]*sqrt(2)/2)
-%!assert <50571> (normalize ([1 2 NaN; 1 2 3], 2), [[-1 1 NaN]*sqrt(2)/2; -1 0 1], eps)
+## Test that normalize ignores NaN values
+%!assert <*50571> (normalize ([1 2 NaN], 2), [-1, 1, NaN]*sqrt(2)/2, eps)
+%!assert <*50571> (normalize ([1 2 NaN; 1 2 3], 2), [[-1 1 NaN]*sqrt(2)/2; -1 0 1], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 1 2 NaN], 1), NaN (2, 3))
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2), [sqrt(2)/2*[-1 1 NaN]; -1 0 1], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "zscore", "robust"), [-1 1 NaN; -1 0 1])
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "norm", 1), [1/3 2/3 NaN; 2/9 1/3 4/9], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "norm", Inf), [0.5 1 NaN; 0.5 0.75 1], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "range", [1 2]), [1 2 NaN; 1 1.5 2], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", 2), [0.5 1 NaN; 1 1.5 2], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "mad"), [2 4 NaN; 2 3 4], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "first"), [1 2 NaN; 1 1.5 2], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "iqr"), [1 2 NaN; 4/3 2 8/3], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", "mean"), [-0.5 0.5 NaN; -1 0 1], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", "median"), [-0.5 0.5 NaN; -1 0 1], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", -1), [2 3 NaN; 3 4 5], eps)
+%!assert <*50571> (normalize ([1 2 NaN; 2 3 NaN], 2, "center", "mean", "scale", "std"), sqrt(2)/2*[-1 1 NaN; -1 1 NaN], eps)
 
 ## Test input validation
 %!error <Invalid call> normalize ()