# HG changeset patch
# User Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
# Date 1657048966 14400
# Node ID df030ac26390199ab1fccc003fca248288dcefe1
# Parent  18b8f73595e0d77a3b6ff786ea39c146c0900982
uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850)

* /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to
'byrows' output, ensure ia and ic outputs have column orientation for arrays
and cells, verify consistent single class handling, add BISTs for
aforementioned cases, and update docstring to note non-complex input
requirement.

diff -r 18b8f73595e0 -r df030ac26390 scripts/set/uniquetol.m
--- a/scripts/set/uniquetol.m	Tue Jul 05 10:38:41 2022 -0700
+++ b/scripts/set/uniquetol.m	Tue Jul 05 15:22:46 2022 -0400
@@ -33,7 +33,8 @@
 ## Two values, @var{x} and @var{y}, are within relative tolerance if
 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
 ##
-## The input @var{A} must be a floating point type (double or single).
+## The input @var{A} must be a non-complex floating point type (double or
+## single).
 ##
 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
 ## precision input or 1e-6 for single precision input.
@@ -98,20 +99,12 @@
 ## @seealso{unique, union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 
-
 function [c, ia, ic] = uniquetol (A, varargin)
 
   if (nargin < 1)
     print_usage ();
   endif
 
-  if (isempty (A))
-    c = A;
-    ia = [];
-    ic = [];
-    return;
-  endif
-
   if (! isfloat (A) || iscomplex (A))
     error ("Octave:uniquetol:unsupported-type",
            "uniquetol: A must be a double or single precision non-complex array");
@@ -163,6 +156,29 @@
     endif
   endfor
 
+  if (isempty (A))
+    sz_A = size (A);
+    ## hack for Matlab empty input compatibility
+    if (by_rows)
+      c = A;
+      sz_A(2) = 1;
+      ia = ones (sz_A);
+      ic = ones (sz_A);
+    else
+      c = ones (0,1);
+        if (sz_A(1) == 1)
+          c = c.';
+        endif
+      ia = ones (0,1);
+      ic = ones (0,1);
+    endif
+    if (isa (A, "single"))
+      ## c follows class of A, ia and ic are always class "double".
+      c = single (c);
+    endif
+    return;
+  endif
+
   if (isempty (data_scale))
     data_scale = max (abs (A(! isinf (A))(:)));
   endif
@@ -171,8 +187,11 @@
 
   if (by_rows)
 
-    nr = rows (A);
-    nc = columns (A);
+    ##start matrix in sorted order, retain sorting and inverting indices
+    [A, srtA] = sortrows (A);
+    [~, inv_srtA] = sort (srtA);
+
+    [nr, nc] = size (A);
     Iall = zeros (nr, 1);
     I = NaN (nc, 1);
     ia = {};
@@ -189,7 +208,7 @@
         sumeq = sum (equ);
         ia_tmp = find (equ);
         if (output_all_indices)
-          ia{end+1} = ia_tmp;
+          ia{end+1,1} = sort (srtA(ia_tmp));
         endif
         Iall(ii+(1:sumeq)) = ia_tmp;
         I(j) = ia_tmp(1);
@@ -204,9 +223,10 @@
     c = A(I,:);
 
     if (! output_all_indices)
-      ia = I(1:j-1);
+      ia = srtA(I(1:j-1));
     endif
-    ic = J;
+
+    ic = J(inv_srtA);
 
   else
     isrowvec = isrow (A);
@@ -247,7 +267,7 @@
     endif
     if (output_all_indices)
       nu = cumsumue(end);
-      ia = cell (1, nu);
+      ia = cell (nu, 1);
       for k = 1:nu
         ia{k} = setdiff (sAi(cumsumue==k), findisnanA);
       endfor
@@ -256,35 +276,42 @@
     endif
 
     if (anyisnanA)
-      rowsc1 = rows (c) + (1:sum (isnanA));
+      rowsc1 = [1:sum(isnanA(:))]';
+      if (~all (isnanA))
+        rowsc1 += rows (c);
+      endif
       c(rowsc1) = NaN;
-      ia(rowsc1) = findisnanA;
       ic(isnanA) = rowsc1;
+      if (output_all_indices)
+        ia(rowsc1) = num2cell (findisnanA);
+      else
+        ia(rowsc1) = findisnanA;
+      endif
+
+      ## if numel(c) was 1, appending NaNs creates a row vector instead of
+      ## expected column vector.
+      if (isrow (c))
+        c = c.';
+      endif
     endif
 
-    ## FIXME: Matlab-compatible orientation of output
-    ## Actually, Matlab prefers row vectors (2021/03/24), but this is different
-    ## from all the other set functions which prefer column vectors.  Assume
-    ## that this is a bug in Matlab's implementation and prefer column vectors.
+    ## Matlab compatibility - outputs are column vectors unless the input
+    ## is a row vector, in which case the output c is also a row vector.
+    ## ia and ic are always column vectors. (verified Matlab 2022a)
     if (isrowvec)
       c = c.';
     endif
-
   endif
-
 endfunction
 
 
 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
-%!        [1 1 2; 1 0 1])
-%!assert (uniquetol ([]), [])
+%!        [1 0 1; 1 1 2])
 %!assert (uniquetol ([1]), [1])
 %!assert (uniquetol ([2, 1]), [1, 2]);
 %!assert (uniquetol ([1; 2]), [1; 2])
 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]);
-%!assert (uniquetol (zeros (1, 0)), zeros (1, 0));
-%!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0))
 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4])
 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4])
 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4])
@@ -296,6 +323,12 @@
 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true),
 %!        single ([1;2;3;4]))
 
+## Test 2D array sorting
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! assert (uniquetol (a), [1:10,12,14,16,18]')
+%! assert (uniquetol (a, "byrows", true), sortrows (a))
+
 ## Matlab compatibility of output
 %!test
 %! x = 1:0.045:3;
@@ -314,8 +347,8 @@
 %! A = [2, 3, 4; 2, 3, 4];
 %! [c, ia, ic] = uniquetol (A, "byrows", true);
 %! assert (c, [2, 3, 4]);
-%! assert (A(ia,:), c);
-%! assert (c(ic,:), A);
+%! assert (ia, 1);
+%! assert (ic, [1;1]);
 
 %!test
 %! x = (2:7)'*pi;
@@ -328,7 +361,7 @@
 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95];
 %! B = log (exp (A));
 %! C = uniquetol ([A; B], "ByRows", true);
-%! assert (C, A);
+%! assert (C, sortrows(A), 10*eps);
 
 ## Test "DataScale" Property
 %!test
@@ -341,9 +374,110 @@
 %! A = [.1 .2 .3 10];
 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true);
 %! assert (C, [.1, 10]);
-%! assert (ia, {(1:3)', 4});
+%! assert (ia, {(1:3)'; 4});
 %! assert (ic, [1; 1; 1; 2]);
 
+## Test NaN inputs
+%!assert (uniquetol (NaN), NaN)
+%!assert (uniquetol ([NaN NaN]), [NaN NaN])
+%!assert (uniquetol ([NaN NaN]'), [NaN NaN]')
+%!assert (uniquetol (NaN(2,2)), NaN(4,1))
+
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! a(4:5) = NaN;
+%! [c, ia, ic] = uniquetol (a);
+%! assert (c, [1:10,12,14,18, NaN, NaN]');
+%! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]');
+%! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]');
+%! [c, ia, ic] = uniquetol (single (a));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "ByRows", true);
+%! assert (c, sortrows (a));
+%! assert (ia, [2,3,1,6,4,5]');
+%! assert (ic, [3,1,2,5,6,4]');
+%! [c, ia, ic] = uniquetol (single (a), "ByRows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true);
+%! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
+%! assert (ia, {2;3;1;6;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+
+## Test empty input compatibility
+%!test
+%! [c, ia, ic] = uniquetol ([]);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol ([], "byrows", true);
+%! assert (c, []);
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true);
+%! assert (c, ones (1,0));
+%! assert (ia, 1);
+%! assert (ic, 1);
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0,2));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1,2));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,2,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (single ([]));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+
+
 ## Test input validation
 %!error <Invalid call> uniquetol ()
 %!error <A must be a double or single precision> uniquetol (int8 (1))
@@ -354,6 +488,9 @@
 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)