# HG changeset patch
# User Rik <rik@octave.org>
# Date 1657066484 25200
# Node ID 4581402b1c5b785b41497e9ab18982c0f4e1b57b
# Parent  df030ac26390199ab1fccc003fca248288dcefe1
uniquetol.m: Simplify code for "ByRows" option (bug #59850).

Use standard form for error() messages and update input validation BIST tests
to pass.

* uniquetol.m: Use "real" instead of "non-complex" in documentation and in
error() messages.  Update input validation BIST tests to pass with new
messages.  Update documentation example so that text matches actual output
of Octave.  Use isreal() rather than iscomplex() in input validation.
New variable "calc_indices" which indicates whether outputs ia, ic should
be calculated.  Use "calc_indices" to reduce running unnecessary code.
In ByRows code, eliminate Iall variable and use J for the same purpose.
Eliminate linear search ("any (Iall == i)") with direct lookup ("if (J(i))").
Eliminate variables sumeq, ii.  Introduce intermediate variable "Arow_i" for
clarity.  Rename "equ" to "eq_rows" for clarity.  Use '!' instead of '~'
for logical negation.

diff -r df030ac26390 -r 4581402b1c5b scripts/set/uniquetol.m
--- a/scripts/set/uniquetol.m	Tue Jul 05 15:22:46 2022 -0400
+++ b/scripts/set/uniquetol.m	Tue Jul 05 17:14:44 2022 -0700
@@ -33,8 +33,8 @@
 ## Two values, @var{x} and @var{y}, are within relative tolerance if
 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
 ##
-## The input @var{A} must be a non-complex floating point type (double or
-## single).
+## The input @var{A} must be a real (non-complex) floating point type (double
+## or single).
 ##
 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
 ## precision input or 1e-6 for single precision input.
@@ -90,7 +90,7 @@
 ## ## Inverse_Function (Function (x)) should return exactly x
 ## y = exp (log (x));
 ## D = unique ([x, y])
-## @result{} [1.0000   2.0000   3.0000   3.0000   4.0000   5.0000   5.0000]
+## @result{} [1   2   3   3   4   5   5]
 ## C = uniquetol ([x, y])
 ## @result{} [1   2   3   4   5]
 ## @end group
@@ -105,29 +105,30 @@
     print_usage ();
   endif
 
-  if (! isfloat (A) || iscomplex (A))
+  if (! (isfloat (A) && isreal (A)))
     error ("Octave:uniquetol:unsupported-type",
-           "uniquetol: A must be a double or single precision non-complex array");
+           "uniquetol: A must be a real floating point array");
   endif
 
   if (nargin == 1 || ischar (varargin{1}))
     tol = ifelse (isa (A, "double"), 1e-12, 1e-6);
-  elseif (! (isfloat (varargin{1}) && isscalar (varargin{1}))
-          || iscomplex (varargin{1}))
-    error ("Octave:uniquetol:unsupported-type",
-           "uniquetol: TOL must be a double or single precision non-complex scalar");
   else
     tol = varargin{1};
     varargin(1) = [];
+    if (! (isfloat (tol) && isreal (tol) && isscalar (tol)))
+      error ("Octave:uniquetol:unsupported-type",
+             "uniquetol: TOL must be a real floating point scalar");
+     endif
   endif
 
   if (mod (numel (varargin), 2))
-    error ("uniquetol: PROPERTY/VALUE arguments must be passed in pairs");
+    error ("uniquetol: PROPERTY/VALUE arguments must occur in pairs");
   endif
 
   by_rows = false;
   output_all_indices = false;
   data_scale = [];
+  calc_indices = nargout > 1;
 
   for k = 1:2:numel (varargin)
     if (! ischar (varargin{k}))
@@ -140,12 +141,12 @@
         error ('uniquetol: A must be a 2-D array when "ByRows" is true');
       endif
     elseif (strcmpi (varargin{k}, "OutputAllIndices"))
-      output_all_indices = logical (varargin{k+1});
+      output_all_indices = logical (varargin{k+1}) & calc_indices;
     elseif (strcmpi (varargin{k}, "DataScale"))
       data_scale = varargin{k+1}(:).';
       if (! isfloat (data_scale) || iscomplex (data_scale)
           || any (data_scale(:) < 0) || any (isnan (data_scale(:))))
-        error ("uniquetol: DataScale must be a non-NaN, positive floating point scalar or vector");
+        error ("uniquetol: DataScale must be a positive floating point scalar or vector, without NaNs");
       endif
       cols_data_scale = columns (data_scale);
       if (cols_data_scale != 1 && cols_data_scale != columns (A))
@@ -157,24 +158,20 @@
   endfor
 
   if (isempty (A))
+    ## hack for Matlab empty input compatibility
     sz_A = size (A);
-    ## hack for Matlab empty input compatibility
     if (by_rows)
       c = A;
       sz_A(2) = 1;
       ia = ones (sz_A);
       ic = ones (sz_A);
     else
-      c = ones (0,1);
-        if (sz_A(1) == 1)
-          c = c.';
-        endif
-      ia = ones (0,1);
-      ic = ones (0,1);
-    endif
-    if (isa (A, "single"))
-      ## c follows class of A, ia and ic are always class "double".
-      c = single (c);
+      c = ones (0, 1, class (A));
+      if (sz_A(1) == 1)
+        c = c.';
+      endif
+      ia = ones (0, 1);
+      ic = ones (0, 1);
     endif
     return;
   endif
@@ -186,48 +183,49 @@
   tol *= data_scale;
 
   if (by_rows)
-
-    ##start matrix in sorted order, retain sorting and inverting indices
-    [A, srtA] = sortrows (A);
-    [~, inv_srtA] = sort (srtA);
+    ## Start matrix in sorted order, retain sorting and inverting indices.
+    if (calc_indices)
+      [A, srtA] = sortrows (A);
+      [~, inv_srtA] = sort (srtA);
+    else
+      A = sortrows (A);
+    endif
 
     [nr, nc] = size (A);
-    Iall = zeros (nr, 1);
-    I = NaN (nc, 1);
+    I = zeros (nr, 1);
     ia = {};
-    J = NaN (nc, 1);
+    J = zeros (nr, 1);
     j = 1;
-    ii = 0;
 
     for i = 1:nr
-      if (any (Iall == i))
-        continue;
+      if (J(i))
+        continue;  # row previously compared equal
+      endif
+
+      Arow_i = A(i,:);
+      eq_rows = all (abs (A - Arow_i) <= tol, 2);
+      eq_rows(i,1) = eq_rows(i,1) || any (! isfinite (Arow_i), 2);
+      if (output_all_indices)
+        ia_tmp = find (eq_rows);
+        ia{end+1,1} = sort (srtA(ia_tmp));
       else
-        equ = all (abs (A - A(i,:)) <= tol, 2);
-        equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2);
-        sumeq = sum (equ);
-        ia_tmp = find (equ);
-        if (output_all_indices)
-          ia{end+1,1} = sort (srtA(ia_tmp));
-        endif
-        Iall(ii+(1:sumeq)) = ia_tmp;
-        I(j) = ia_tmp(1);
-        J(equ) = j;
-        ii += sumeq;
-        j += 1;
+        ia_tmp = find (eq_rows, 1);
       endif
+      I(j) = ia_tmp(1);
+      J(eq_rows) = j;
+      j += 1;
     endfor
 
-    I(isnan (I)) = [];
-    J(isnan (J)) = [];
+    I = I(1:j-1);
     c = A(I,:);
 
-    if (! output_all_indices)
-      ia = srtA(I(1:j-1));
+    if (calc_indices)
+      if (! output_all_indices)
+        ia = srtA(I(1:j-1));
+      endif
+      ic = J(inv_srtA);
     endif
 
-    ic = J(inv_srtA);
-
   else
     isrowvec = isrow (A);
     A = A(:);
@@ -277,7 +275,7 @@
 
     if (anyisnanA)
       rowsc1 = [1:sum(isnanA(:))]';
-      if (~all (isnanA))
+      if (! all (isnanA))
         rowsc1 += rows (c);
       endif
       c(rowsc1) = NaN;
@@ -288,20 +286,21 @@
         ia(rowsc1) = findisnanA;
       endif
 
-      ## if numel(c) was 1, appending NaNs creates a row vector instead of
+      ## if numel (c) was 1, appending NaNs creates a row vector instead of
       ## expected column vector.
       if (isrow (c))
         c = c.';
       endif
     endif
 
-    ## Matlab compatibility - outputs are column vectors unless the input
+    ## Matlab compatibility: Outputs are column vectors unless the input
     ## is a row vector, in which case the output c is also a row vector.
     ## ia and ic are always column vectors. (verified Matlab 2022a)
     if (isrowvec)
       c = c.';
     endif
   endif
+
 endfunction
 
 
@@ -408,9 +407,10 @@
 %! assert (class (c), "single");
 %! assert (class (ia{1}), "double");
 %! assert (class (ic), "double");
-%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "ByRows", true);
 %! assert (ia, {2;3;1;6;4;5});
-%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
+%! [c, ia, ic] = uniquetol (single (a),
+%!                          "OutputAllIndices", true, "ByRows", true);
 %! assert (class (c), "single");
 %! assert (class (ia{1}), "double");
 %! assert (class (ic), "double");
@@ -480,21 +480,21 @@
 
 ## Test input validation
 %!error <Invalid call> uniquetol ()
-%!error <A must be a double or single precision> uniquetol (int8 (1))
-%!error <A must be .* non-complex> uniquetol (1i)
-%!error <TOL must be a double .* precision> uniquetol (1, int8 (1))
-%!error <TOL must be a .* scalar> uniquetol (1, [1, 2])
-%!error <TOL must be .* non-complex> uniquetol (1, 1i)
-%!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
+%!error <A must be a real floating point array> uniquetol (int8 (1))
+%!error <A must be a real floating point array> uniquetol (1i)
+%!error <TOL must be a real floating point scalar> uniquetol (1, int8 (1))
+%!error <TOL must be a real floating point scalar> uniquetol (1, [1, 2])
+%!error <TOL must be a real floating point scalar> uniquetol (1, 1i)
+%!error <arguments must occur in pairs> uniquetol (1, 2, "byrows")
 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
 %!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
 %!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
 %!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
+%!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
-%!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)
-%!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN)
+%!error <DataScale must be .* without NaNs> uniquetol (1, "DataScale", NaN)
 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2])
 %!error <unknown property 'foo'> uniquetol (1, "foo", "bar")
 %!error <unknown property 'foo'> uniquetol (1, 2, "foo", "bar")