changeset 31124:df030ac26390

uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850) * /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to 'byrows' output, ensure ia and ic outputs have column orientation for arrays and cells, verify consistent single class handling, add BISTs for aforementioned cases, and update docstring to note non-complex input requirement.
author Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
date Tue, 05 Jul 2022 15:22:46 -0400
parents 18b8f73595e0
children 4581402b1c5b
files scripts/set/uniquetol.m
diffstat 1 files changed, 168 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/set/uniquetol.m	Tue Jul 05 10:38:41 2022 -0700
+++ b/scripts/set/uniquetol.m	Tue Jul 05 15:22:46 2022 -0400
@@ -33,7 +33,8 @@
 ## Two values, @var{x} and @var{y}, are within relative tolerance if
 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
 ##
-## The input @var{A} must be a floating point type (double or single).
+## The input @var{A} must be a non-complex floating point type (double or
+## single).
 ##
 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
 ## precision input or 1e-6 for single precision input.
@@ -98,20 +99,12 @@
 ## @seealso{unique, union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 
-
 function [c, ia, ic] = uniquetol (A, varargin)
 
   if (nargin < 1)
     print_usage ();
   endif
 
-  if (isempty (A))
-    c = A;
-    ia = [];
-    ic = [];
-    return;
-  endif
-
   if (! isfloat (A) || iscomplex (A))
     error ("Octave:uniquetol:unsupported-type",
            "uniquetol: A must be a double or single precision non-complex array");
@@ -163,6 +156,29 @@
     endif
   endfor
 
+  if (isempty (A))
+    sz_A = size (A);
+    ## hack for Matlab empty input compatibility
+    if (by_rows)
+      c = A;
+      sz_A(2) = 1;
+      ia = ones (sz_A);
+      ic = ones (sz_A);
+    else
+      c = ones (0,1);
+        if (sz_A(1) == 1)
+          c = c.';
+        endif
+      ia = ones (0,1);
+      ic = ones (0,1);
+    endif
+    if (isa (A, "single"))
+      ## c follows class of A, ia and ic are always class "double".
+      c = single (c);
+    endif
+    return;
+  endif
+
   if (isempty (data_scale))
     data_scale = max (abs (A(! isinf (A))(:)));
   endif
@@ -171,8 +187,11 @@
 
   if (by_rows)
 
-    nr = rows (A);
-    nc = columns (A);
+    ##start matrix in sorted order, retain sorting and inverting indices
+    [A, srtA] = sortrows (A);
+    [~, inv_srtA] = sort (srtA);
+
+    [nr, nc] = size (A);
     Iall = zeros (nr, 1);
     I = NaN (nc, 1);
     ia = {};
@@ -189,7 +208,7 @@
         sumeq = sum (equ);
         ia_tmp = find (equ);
         if (output_all_indices)
-          ia{end+1} = ia_tmp;
+          ia{end+1,1} = sort (srtA(ia_tmp));
         endif
         Iall(ii+(1:sumeq)) = ia_tmp;
         I(j) = ia_tmp(1);
@@ -204,9 +223,10 @@
     c = A(I,:);
 
     if (! output_all_indices)
-      ia = I(1:j-1);
+      ia = srtA(I(1:j-1));
     endif
-    ic = J;
+
+    ic = J(inv_srtA);
 
   else
     isrowvec = isrow (A);
@@ -247,7 +267,7 @@
     endif
     if (output_all_indices)
       nu = cumsumue(end);
-      ia = cell (1, nu);
+      ia = cell (nu, 1);
       for k = 1:nu
         ia{k} = setdiff (sAi(cumsumue==k), findisnanA);
       endfor
@@ -256,35 +276,42 @@
     endif
 
     if (anyisnanA)
-      rowsc1 = rows (c) + (1:sum (isnanA));
+      rowsc1 = [1:sum(isnanA(:))]';
+      if (~all (isnanA))
+        rowsc1 += rows (c);
+      endif
       c(rowsc1) = NaN;
-      ia(rowsc1) = findisnanA;
       ic(isnanA) = rowsc1;
+      if (output_all_indices)
+        ia(rowsc1) = num2cell (findisnanA);
+      else
+        ia(rowsc1) = findisnanA;
+      endif
+
+      ## if numel(c) was 1, appending NaNs creates a row vector instead of
+      ## expected column vector.
+      if (isrow (c))
+        c = c.';
+      endif
     endif
 
-    ## FIXME: Matlab-compatible orientation of output
-    ## Actually, Matlab prefers row vectors (2021/03/24), but this is different
-    ## from all the other set functions which prefer column vectors.  Assume
-    ## that this is a bug in Matlab's implementation and prefer column vectors.
+    ## Matlab compatibility - outputs are column vectors unless the input
+    ## is a row vector, in which case the output c is also a row vector.
+    ## ia and ic are always column vectors. (verified Matlab 2022a)
     if (isrowvec)
       c = c.';
     endif
-
   endif
-
 endfunction
 
 
 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
-%!        [1 1 2; 1 0 1])
-%!assert (uniquetol ([]), [])
+%!        [1 0 1; 1 1 2])
 %!assert (uniquetol ([1]), [1])
 %!assert (uniquetol ([2, 1]), [1, 2]);
 %!assert (uniquetol ([1; 2]), [1; 2])
 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]);
-%!assert (uniquetol (zeros (1, 0)), zeros (1, 0));
-%!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0))
 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4])
 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4])
 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4])
@@ -296,6 +323,12 @@
 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true),
 %!        single ([1;2;3;4]))
 
+## Test 2D array sorting
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! assert (uniquetol (a), [1:10,12,14,16,18]')
+%! assert (uniquetol (a, "byrows", true), sortrows (a))
+
 ## Matlab compatibility of output
 %!test
 %! x = 1:0.045:3;
@@ -314,8 +347,8 @@
 %! A = [2, 3, 4; 2, 3, 4];
 %! [c, ia, ic] = uniquetol (A, "byrows", true);
 %! assert (c, [2, 3, 4]);
-%! assert (A(ia,:), c);
-%! assert (c(ic,:), A);
+%! assert (ia, 1);
+%! assert (ic, [1;1]);
 
 %!test
 %! x = (2:7)'*pi;
@@ -328,7 +361,7 @@
 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95];
 %! B = log (exp (A));
 %! C = uniquetol ([A; B], "ByRows", true);
-%! assert (C, A);
+%! assert (C, sortrows(A), 10*eps);
 
 ## Test "DataScale" Property
 %!test
@@ -341,9 +374,110 @@
 %! A = [.1 .2 .3 10];
 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true);
 %! assert (C, [.1, 10]);
-%! assert (ia, {(1:3)', 4});
+%! assert (ia, {(1:3)'; 4});
 %! assert (ic, [1; 1; 1; 2]);
 
+## Test NaN inputs
+%!assert (uniquetol (NaN), NaN)
+%!assert (uniquetol ([NaN NaN]), [NaN NaN])
+%!assert (uniquetol ([NaN NaN]'), [NaN NaN]')
+%!assert (uniquetol (NaN(2,2)), NaN(4,1))
+
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! a(4:5) = NaN;
+%! [c, ia, ic] = uniquetol (a);
+%! assert (c, [1:10,12,14,18, NaN, NaN]');
+%! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]');
+%! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]');
+%! [c, ia, ic] = uniquetol (single (a));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "ByRows", true);
+%! assert (c, sortrows (a));
+%! assert (ia, [2,3,1,6,4,5]');
+%! assert (ic, [3,1,2,5,6,4]');
+%! [c, ia, ic] = uniquetol (single (a), "ByRows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true);
+%! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
+%! assert (ia, {2;3;1;6;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+
+## Test empty input compatibility
+%!test
+%! [c, ia, ic] = uniquetol ([]);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol ([], "byrows", true);
+%! assert (c, []);
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true);
+%! assert (c, ones (1,0));
+%! assert (ia, 1);
+%! assert (ic, 1);
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0,2));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1,2));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,2,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (single ([]));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+
+
 ## Test input validation
 %!error <Invalid call> uniquetol ()
 %!error <A must be a double or single precision> uniquetol (int8 (1))
@@ -354,6 +488,9 @@
 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)