jwe/octave: scripts/set/uniquetol.m comparison

comparison scripts/set/uniquetol.m @ 31124:df030ac26390

uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850) * /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to 'byrows' output, ensure ia and ic outputs have column orientation for arrays and cells, verify consistent single class handling, add BISTs for aforementioned cases, and update docstring to note non-complex input requirement.

author	Nicholas R. Jankowski <jankowski.nicholas@gmail.com>
date	Tue, 05 Jul 2022 15:22:46 -0400
parents	796f54d4ddbf
children	4581402b1c5b

comparison

equal deleted inserted replaced

-:18b8f73595e0
+:df030ac26390
 ## Return the unique elements of @var{A} within tolerance @var{tol}.
 ##
 ## Two values, @var{x} and @var{y}, are within relative tolerance if
 ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}.
 ##
-## The input @var{A} must be a floating point type (double or single).
+## The input @var{A} must be a non-complex floating point type (double or
+## single).
 ##
 ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double
 ## precision input or 1e-6 for single precision input.
 ##
 ## The function may also be called with the following optional property/value
 ## @end example
 ##
 ## @seealso{unique, union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 function [c, ia, ic] = uniquetol (A, varargin)
 if (nargin < 1)
 print_usage ();
-endif
-if (isempty (A))
-c = A;
-ia = [];
-ic = [];
-return;
 endif
 if (! isfloat (A) || iscomplex (A))
 error ("Octave:uniquetol:unsupported-type",
 "uniquetol: A must be a double or single precision non-complex array");
 else
 error ("uniquetol: unknown property '%s'", varargin{k});
 endif
 endfor
+if (isempty (A))
+sz_A = size (A);
+## hack for Matlab empty input compatibility
+if (by_rows)
+c = A;
+sz_A(2) = 1;
+ia = ones (sz_A);
+ic = ones (sz_A);
+else
+c = ones (0,1);
+if (sz_A(1) == 1)
+c = c.';
+endif
+ia = ones (0,1);
+ic = ones (0,1);
+endif
+if (isa (A, "single"))
+## c follows class of A, ia and ic are always class "double".
+c = single (c);
+endif
+return;
+endif
 if (isempty (data_scale))
 data_scale = max (abs (A(! isinf (A))(:)));
 endif
 tol *= data_scale;
 if (by_rows)
-nr = rows (A);
+##start matrix in sorted order, retain sorting and inverting indices
-nc = columns (A);
+[A, srtA] = sortrows (A);
+[~, inv_srtA] = sort (srtA);
+[nr, nc] = size (A);
 Iall = zeros (nr, 1);
 I = NaN (nc, 1);
 ia = {};
 J = NaN (nc, 1);
 j = 1;
 equ = all (abs (A - A(i,:)) <= tol, 2);
 equ(i,1) = equ(i,1) || any (! isfinite (A(i,:)), 2);
 sumeq = sum (equ);
 ia_tmp = find (equ);
 if (output_all_indices)
-ia{end+1} = ia_tmp;
+ia{end+1,1} = sort (srtA(ia_tmp));
 endif
 Iall(ii+(1:sumeq)) = ia_tmp;
 I(j) = ia_tmp(1);
 J(equ) = j;
 ii += sumeq;
 I(isnan (I)) = [];
 J(isnan (J)) = [];
 c = A(I,:);
 if (! output_all_indices)
-ia = I(1:j-1);
+ia = srtA(I(1:j-1));
 endif
-ic = J;
+ic = J(inv_srtA);
 else
 isrowvec = isrow (A);
 A = A(:);
 nr = rows (A);
 else
 findisnanA = [];
 endif
 if (output_all_indices)
 nu = cumsumue(end);
-ia = cell (1, nu);
+ia = cell (nu, 1);
 for k = 1:nu
 ia{k} = setdiff (sAi(cumsumue==k), findisnanA);
 endfor
 else
 ia = sAi(ue);
 endif
 if (anyisnanA)
-rowsc1 = rows (c) + (1:sum (isnanA));
+rowsc1 = [1:sum(isnanA(:))]';
+if (~all (isnanA))
+rowsc1 += rows (c);
+endif
 c(rowsc1) = NaN;
-ia(rowsc1) = findisnanA;
 ic(isnanA) = rowsc1;
-endif
+if (output_all_indices)
+ia(rowsc1) = num2cell (findisnanA);
-## FIXME: Matlab-compatible orientation of output
+else
-## Actually, Matlab prefers row vectors (2021/03/24), but this is different
+ia(rowsc1) = findisnanA;
-## from all the other set functions which prefer column vectors.  Assume
+endif
-## that this is a bug in Matlab's implementation and prefer column vectors.
+## if numel(c) was 1, appending NaNs creates a row vector instead of
+## expected column vector.
+if (isrow (c))
+c = c.';
+endif
+endif
+## Matlab compatibility - outputs are column vectors unless the input
+## is a row vector, in which case the output c is also a row vector.
+## ia and ic are always column vectors. (verified Matlab 2022a)
 if (isrowvec)
 c = c.';
 endif
+endif
-endif
 endfunction
 %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2])
 %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true),
-%!        [1 1 2; 1 0 1])
+%!        [1 0 1; 1 1 2])
-%!assert (uniquetol ([]), [])
 %!assert (uniquetol ([1]), [1])
 %!assert (uniquetol ([2, 1]), [1, 2]);
 %!assert (uniquetol ([1; 2]), [1; 2])
 %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]);
-%!assert (uniquetol (zeros (1, 0)), zeros (1, 0));
-%!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0))
 %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4])
 %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4])
 %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4])
 %!assert (uniquetol (sparse ([2,0;2,0])), sparse ([0;2]))
 %!assert (uniquetol (sparse ([1,2;2,3])), sparse ([1;2;3]))
 %!        single ([1,2,2,3,2,4]))
 %!assert (uniquetol (single ([1,2,2,3,2,4])), single ([1,2,3,4]))
 %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true),
 %!        single ([1;2;3;4]))
+## Test 2D array sorting
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! assert (uniquetol (a), [1:10,12,14,16,18]')
+%! assert (uniquetol (a, "byrows", true), sortrows (a))
 ## Matlab compatibility of output
 %!test
 %! x = 1:0.045:3;
 %! y = uniquetol (x, 0.1, "datascale", 1);
 %! assert (y(1:4), [1, 1.135, 1.27, 1.405]);
 ## Test index vector return arguments with "ByRows"
 %!test
 %! A = [2, 3, 4; 2, 3, 4];
 %! [c, ia, ic] = uniquetol (A, "byrows", true);
 %! assert (c, [2, 3, 4]);
-%! assert (A(ia,:), c);
+%! assert (ia, 1);
-%! assert (c(ic,:), A);
+%! assert (ic, [1;1]);
 %!test
 %! x = (2:7)'*pi;
 %! y = exp (log (x));
 %! C = uniquetol ([x; y]);
 ## Test "ByRows" Property
 %!test
 %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95];
 %! B = log (exp (A));
 %! C = uniquetol ([A; B], "ByRows", true);
-%! assert (C, A);
+%! assert (C, sortrows(A), 10*eps);
 ## Test "DataScale" Property
 %!test
 %! x = 10^11;
 %! C = uniquetol ([x, exp(log(x))], 1e-6, "DataScale", 1);
 ## Test "OutputAllIndices" Property
 %!test
 %! A = [.1 .2 .3 10];
 %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true);
 %! assert (C, [.1, 10]);
-%! assert (ia, {(1:3)', 4});
+%! assert (ia, {(1:3)'; 4});
 %! assert (ic, [1; 1; 1; 2]);
+## Test NaN inputs
+%!assert (uniquetol (NaN), NaN)
+%!assert (uniquetol ([NaN NaN]), [NaN NaN])
+%!assert (uniquetol ([NaN NaN]'), [NaN NaN]')
+%!assert (uniquetol (NaN(2,2)), NaN(4,1))
+%!test
+%! a = [magic(3); 2 * magic(3)];
+%! a(4:5) = NaN;
+%! [c, ia, ic] = uniquetol (a);
+%! assert (c, [1:10,12,14,18, NaN, NaN]');
+%! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]');
+%! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]');
+%! [c, ia, ic] = uniquetol (single (a));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "ByRows", true);
+%! assert (c, sortrows (a));
+%! assert (ia, [2,3,1,6,4,5]');
+%! assert (ic, [3,1,2,5,6,4]');
+%! [c, ia, ic] = uniquetol (single (a), "ByRows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true);
+%! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true);
+%! assert (ia, {2;3;1;6;4;5});
+%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia{1}), "double");
+%! assert (class (ic), "double");
+## Test empty input compatibility
+%!test
+%! [c, ia, ic] = uniquetol ([]);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol ([], "byrows", true);
+%! assert (c, []);
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true);
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true);
+%! assert (c, ones (1,0));
+%! assert (ia, 1);
+%! assert (ic, 1);
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,0,2));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (0,1,2));
+%! assert (c, ones (0,1));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (ones (1,2,0));
+%! assert (c, ones (1,0));
+%! assert (ia, ones (0,1));
+%! assert (ic, ones (0,1));
+%!test
+%! [c, ia, ic] = uniquetol (single ([]));
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "byrows", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
+%!test
+%! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true);
+%! assert (class (c), "single");
+%! assert (class (ia), "double");
+%! assert (class (ic), "double");
 ## Test input validation
 %!error <Invalid call> uniquetol ()
 %!error <A must be a double or single precision> uniquetol (int8 (1))
 %!error <A must be .* non-complex> uniquetol (1i)
 %!error <TOL must be a .* scalar> uniquetol (1, [1, 2])
 %!error <TOL must be .* non-complex> uniquetol (1, 1i)
 %!error <arguments must be passed in pairs> uniquetol (1, 2, "byrows")
 %!error <PROPERTY must be a string> uniquetol (1, 2, 3, "bar")
 %!error <A must be a 2-D array> uniquetol (ones (2,2,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (0,1,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,0,2), "byrows", true)
+%!error <A must be a 2-D array> uniquetol (ones (1,2,0), "byrows", true)
 %!error <DataScale must be a .* floating point> uniquetol (1, "DataScale", '1')
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", -1)
 %!error <DataScale must be .* positive> uniquetol (1, "DataScale", 1i)
 %!error <DataScale must be a non-NaN> uniquetol (1, "DataScale", NaN)
 %!error <invalid DataScale size> uniquetol (1, "DataScale", [1 2])

Mercurial > jwe > octave

comparison scripts/set/uniquetol.m @ 31124:df030ac26390