Mercurial > jwe > octave

--- a/scripts/set/uniquetol.m	Tue Jul 05 10:38:41 2022 -0700
+++ b/scripts/set/uniquetol.m	Tue Jul 05 15:22:46 2022 -0400
@@ -33,7 +33,8 @@
 ## Two values, @var{x} and @var{y}, are within relative tolerance if
 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
 ##
-## The input @var{A} must be a floating point type (double or single).
+## The input @var{A} must be a non-complex floating point type (double or
+## single).
 ##
 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
 ## precision input or 1e-6 for single precision input.
@@ -98,20 +99,12 @@
 ## @seealso{unique, union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn

-
 function [c, ia, ic] = uniquetol (A, varargin)

   if (nargin < 1)
     print_usage ();
   endif

-  if (isempty (A))
-    c = A;
-    ia = [];
-    ic = [];
-    return;
-  endif
-
   if (! isfloat (A) || iscomplex (A))
     error ("Octave:uniquetol:unsupported-type",
            "uniquetol: A must be a double or single precision non-complex array");
@@ -163,6 +156,29 @@
     endif
   endfor

+  if (isempty (A))
+    sz_A = size (A);
+    ## hack for Matlab empty input compatibility
+    if (by_rows)
+      c = A;
+      sz_A(2) = 1;
+      ia = ones (sz_A);
+      ic = ones (sz_A);
+    else
+      c = ones (0,1);
+        if (sz_A(1) == 1)
+          c = c.';
+        endif
+      ia = ones (0,1);
+      ic = ones (0,1);
+    endif
+    if (isa (A, "single"))
+      ## c follows class of A, ia and ic are always class "double".
+      c = single (c);
+    endif
+    return;
+  endif
+
   if (isempty (data_scale))
     data_scale = max (abs (A(! isinf (A))(:)));
   endif
@@ -171,8 +187,11 @@

   if (by_rows)

-    nr = rows (A);
-    nc = columns (A);
+    ##start matrix in sorted order, retain sorting and inverting indices
+    [A, srtA] = sortrows (A);
+    [~, inv_srtA] = sort (srtA);
+
+    [nr, nc] = size (A);
     Iall = zeros (nr, 1);
     I = NaN (nc, 1);
     ia = {};
@@ -189,7 +208,7 @@
         sumeq = sum (equ);
         ia_tmp = find (equ);
         if (output_all_indices)
-          ia{end+1} = ia_tmp;
+          ia{end+1,1} = sort (srtA(ia_tmp));
         endif
         Iall(ii+(1:sumeq)) = ia_tmp;
         I(j) = ia_tmp(1);
@@ -204,9 +223,10 @@
     c = A(I,:);

     if (! output_all_indices)
-      ia = I(1:j-1);
+      ia = srtA(I(1:j-1));
     endif
-    ic = J;
+
+    ic = J(inv_srtA);

   else
     isrowvec = isrow (A);
@@ -247,7 +267,7 @@
     endif
     if (output_all_indices)
       nu = cumsumue(end);
-      ia = cell (1, nu);
+      ia = cell (nu, 1);
       for k = 1:nu
         ia{k} = setdiff (sAi(cumsumue==k), findisnanA);
       endfor
@@ -256,35 +276,42 @@
     endif

     if (anyisnanA)
-      rowsc1 = rows (c) + (1:sum (isnanA));
+      rowsc1 = [1:sum(isnanA(:))]';
+      if (~all (isnanA))
+        rowsc1 += rows (c);
+      endif
       c(rowsc1) = NaN;
-      ia(rowsc1) = findisnanA;
       ic(isnanA) = rowsc1;
+      if (output_all_indices)
+        ia(rowsc1) = num2cell (findisnanA);
+      else
+        ia(rowsc1) = findisnanA;
+      endif
+
+      ## if numel(c) was 1, appending NaNs creates a row vector instead of
+      ## expected column vector.
+      if (isrow (c))
+        c = c.';
+      endif
     endif

-    ## FIXME: Matlab-compatible orientation of output
-    ## Actually, Matlab prefers row vectors (2021/03/24), but this is different
-    ## from all the other set functions which prefer column vectors.  Assume
-    ## that this is a bug in Matlab's implementation and prefer column vectors.
+    ## Matlab compatibility - outputs are column vectors unless the input
+    ## is a row vector, in which case the output c is also a row vector.
+    ## ia and ic are always column vectors. (verified Matlab 2022a)
     if (isrowvec)
       c = c.';
     endif
-
   endif
-
 endfunction


 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
-%!        [1 1 2; 1 0 1])
-%!assert (uniquetol ([]), [])
+%!        [1 0 1; 1 1 2])
 %!assert (uniquetol ([1]), [1])
 %!assert (uniquetol ([2, 1]), [1, 2]);
 %!assert (uniquetol ([1; 2]), [1; 2])
 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]);
-%!assert (uniquetol (zeros (1, 0)), zeros (1, 0));
-%!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0))
 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4])
 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4])
 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4])
@@ -296,6 +323,12 @@
 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true),
 %!        single ([1;2;3;4]))

+## Test 2D array sorting
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! assert (uniquetol (a), [1:10,12,14,16,18]')
+%! assert (uniquetol (a, "byrows", true), sortrows (a))
+
 ## Matlab compatibility of output
 %!test
 %! x = 1:0.045:3;
@@ -314,8 +347,8 @@
 %! A = [2, 3, 4; 2, 3, 4];
 %! [c, ia, ic] = uniquetol (A, "byrows", true);
 %! assert (c, [2, 3, 4]);
-%! assert (A(ia,:), c);
-%! assert (c(ic,:), A);
+%! assert (ia, 1);
+%! assert (ic, [1;1]);

 %!test
 %! x = (2:7)'*pi;
@@ -328,7 +361,7 @@
 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95];
 %! B = log (exp (A));
 %! C = uniquetol ([A; B], "ByRows", true);
-%! assert (C, A);
+%! assert (C, sortrows(A), 10*eps);

 ## Test "DataScale" Property
 %!test
@@ -341,9 +374,110 @@
 %! A = [.1 .2 .3 10];
 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true);
 %! assert (C, [.1, 10]);
-%! assert (ia, {(1:3)', 4});
+%! assert (ia, {(1:3)'; 4});
 %! assert (ic, [1; 1; 1; 2]);

+## Test NaN inputs
+%!assert (uniquetol (NaN), NaN)
+%!assert (uniquetol ([NaN NaN]), [NaN NaN])
+%!assert (uniquetol ([NaN NaN]'), [NaN NaN]')
+%!assert (uniquetol (NaN(2,2)), NaN(4,1))
+
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! a(4:5) = NaN;
+%! [c, ia, ic] = uniquetol (a);
+%! assert (c, [1:10,12,14,18, NaN, NaN]');
+%! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]');
+%! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]');
+%! [c, ia, ic] = uniquetol (single (a));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "ByRows", true);
+%! assert (c, sortrows (a));
+%! assert (ia, [2,3,1,6,4,5]');
+%! assert (ic, [3,1,2,5,6,4]');
+%! [c, ia, ic] = uniquetol (single (a), "ByRows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true);
+%! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
+%! assert (ia, {2;3;1;6;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+
+## Test empty input compatibility
+%!test
+%! [c, ia, ic] = uniquetol ([]);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol ([], "byrows", true);
+%! assert (c, []);
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true);
+%! assert (c, ones (1,0));
+%! assert (ia, 1);
+%! assert (ic, 1);
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0,2));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1,2));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,2,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (single ([]));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+
+
 ## Test input validation
 %!error <Invalid call> uniquetol ()
 %!error <A must be a double or single precision> uniquetol (int8 (1))
@@ -354,6 +488,9 @@
 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)