Mercurial > octave
changeset 32011:ce36dddf5427
normalize.m: Enable compatible NaN handling (bug #50571)
* scripts/statistics/normalize.m: Add 'omitnan' option to internal calls
to std, mean, and median. Correct 'norm' processing to temporarily replace
NaN values with 0 before calling sum. Add FIXME note detailing code
simplification after NANFLAG gets implemented in sum. Remove Matlab
NaN incompatibility note from docstring. Add BISTs for each method option
to verify correct NaN handling.
* etc/NEWS.9.md: Add note to Matlab Compatibility section about improved
NaN handling.
author | Nicholas R. Jankowski <jankowski.nicholas@gmail.com> |
---|---|
date | Thu, 13 Apr 2023 16:59:16 -0400 |
parents | d790c977abb5 |
children | a00c7e103041 |
files | etc/NEWS.9.md scripts/statistics/normalize.m |
diffstat | 2 files changed, 42 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/etc/NEWS.9.md Thu Apr 13 20:04:56 2023 +0200 +++ b/etc/NEWS.9.md Thu Apr 13 16:59:16 2023 -0400 @@ -49,6 +49,9 @@ - `mode` now produces Matlab compatible output for empty inputs (bug #50583). +- `normalize` now produces Matlab compatible output for inputs containing NaN +values (bug #50571). + - `cov` now processes the input form cov(x,y) with two separate data arrays x and y, as cov(x(:),y(:)) to maintain Matlab compatibility. It also accepts a NANFLAG option to allow ignoring NaN entries in input data (bug #50571)
--- a/scripts/statistics/normalize.m Thu Apr 13 20:04:56 2023 +0200 +++ b/scripts/statistics/normalize.m Thu Apr 13 16:59:16 2023 -0400 @@ -30,7 +30,6 @@ ## @deftypefnx {} {@var{z} =} normalize (@dots{}, @var{method}, @var{option}) ## @deftypefnx {} {@var{z} =} normalize (@dots{}, @var{scale}, @var{scaleoption}, @var{center}, @var{centeroption}) ## @deftypefnx {} {[@var{z}, @var{c}, @var{s}] =} normalize (@dots{}) -## ## Return a normalization of the data in @var{x} using one of several available ## scaling and centering methods. ## @@ -61,6 +60,9 @@ ## If the optional second argument @var{dim} is given, operate along this ## dimension. ## +## @code{normalize} ignores NaN values is @var{x} similar to the behavior of +## the omitnan option in @code{std}, @code{mean}, and @code{median}. +## ## The optional inputs @var{method} and @var{option} can be used to specify the ## type of normalization performed on @var{x}. Note that only the ## @option{scale} and @option{center} options may be specified together using @@ -156,10 +158,6 @@ ## @item ## The option @option{DataVariables} is not yet implemented for Table class ## @var{x} inputs. -## -## @item -## Certain arrays containing NaN elements may not return @sc{matlab} compatible -## output. ## @end enumerate ## ## @seealso{zscore, iqr, norm, rescale, std, median, mean, mad} @@ -167,9 +165,8 @@ function [z, c, s] = normalize (x, varargin) - ## FIXME: Until NANFLAG/OMITNAN option is implemented in std, mean, median, - ## etc., normalize cannot efficiently reproduce some behavior with NaNs in - ## x. xtests added to capture this. (See bug #50571) + ## FIXME: Until NANFLAG/OMITNAN option is implemented in sum, inefficient + ## workaround is used for method "norm" option 1 (See bug #50571) ## FIXME: When table class is implemented, remove DataVariables error line in ## option checking section and add DataVariables data handling switch @@ -186,7 +183,7 @@ if (nargin == 1) ## Directly handle simple 1 input case. - [s, c] = std (x); + [s, c] = std (x, "omitnan"); else ## Parse input options @@ -385,20 +382,27 @@ case "zscore" switch (methodoption) case "std" - [s, c] = std (x, [], dim); + [s, c] = std (x, [], dim, "omitnan"); case "robust" ## center/median to zero and MAD = 1 - c = median (x, dim); + c = median (x, dim, "omitnan"); ## FIXME: Use bsxfun, rather than broadcasting, until broadcasting - ## supports diagonal and sparse matrices (Bugs #41441, #35787). - s = median (abs (bsxfun (@minus, x , c)), dim); - ## s = median (abs (x - c), dim); # Automatic broadcasting + ## supports diagonal and sparse matrices. + ## (Bugs #41441, #35787). + s = median (abs (bsxfun (@minus, x , c)), dim, "omitnan"); + ## s = median (abs (x - c), dim, "omitnan");# Broadcasting. endswitch case "norm" switch (methodoption) case 1 + ## FIXME: when sum supports omitnan option replace entire case + ## with single line: + ## s = sum (abs (x), dim, "omitnan"); + xnan = isnan (x); + x(xnan) = 0; s = sum (abs (x), dim); + x(xnan) = NaN; case Inf s = max (abs (x), [], dim); otherwise @@ -439,7 +443,7 @@ c = process_center_option (x, dim, center_option); case "medianiqr" - c = median (x, dim); + c = median (x, dim, "omitnan"); s = iqr (x, dim); endswitch @@ -462,9 +466,9 @@ else switch (center_option) case "mean" - c = mean (x, dim); + c = mean (x, dim, "omitnan"); case "median" - c = median (x, dim); + c = median (x, dim, "omitnan"); endswitch endif @@ -479,7 +483,7 @@ else switch (scale_option) case "std" - s = std (x, [], dim); + s = std (x, [], dim, "omitnan"); case "mad" s = mad (x, 1, dim); case "first" @@ -644,10 +648,23 @@ %! assert (issparse (c)); %! assert (issparse (s)); -## Matlab ignores NaNs, operating as if the vector had one less element, then -## returns the result retaining the NaN in the solution. -%!assert <50571> (normalize ([1 2 NaN], 2), [-1, 1, NaN]*sqrt(2)/2) -%!assert <50571> (normalize ([1 2 NaN; 1 2 3], 2), [[-1 1 NaN]*sqrt(2)/2; -1 0 1], eps) +## Test that normalize ignores NaN values +%!assert <*50571> (normalize ([1 2 NaN], 2), [-1, 1, NaN]*sqrt(2)/2, eps) +%!assert <*50571> (normalize ([1 2 NaN; 1 2 3], 2), [[-1 1 NaN]*sqrt(2)/2; -1 0 1], eps) +%!assert <*50571> (normalize ([1 2 NaN; 1 2 NaN], 1), NaN (2, 3)) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2), [sqrt(2)/2*[-1 1 NaN]; -1 0 1], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "zscore", "robust"), [-1 1 NaN; -1 0 1]) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "norm", 1), [1/3 2/3 NaN; 2/9 1/3 4/9], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "norm", Inf), [0.5 1 NaN; 0.5 0.75 1], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "range", [1 2]), [1 2 NaN; 1 1.5 2], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", 2), [0.5 1 NaN; 1 1.5 2], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "mad"), [2 4 NaN; 2 3 4], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "first"), [1 2 NaN; 1 1.5 2], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "scale", "iqr"), [1 2 NaN; 4/3 2 8/3], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", "mean"), [-0.5 0.5 NaN; -1 0 1], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", "median"), [-0.5 0.5 NaN; -1 0 1], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 4], 2, "center", -1), [2 3 NaN; 3 4 5], eps) +%!assert <*50571> (normalize ([1 2 NaN; 2 3 NaN], 2, "center", "mean", "scale", "std"), sqrt(2)/2*[-1 1 NaN; -1 1 NaN], eps) ## Test input validation %!error <Invalid call> normalize ()