# HG changeset patch # User Nicholas R. Jankowski # Date 1657048966 14400 # Node ID df030ac26390199ab1fccc003fca248288dcefe1 # Parent 18b8f73595e0d77a3b6ff786ea39c146c0900982 uniquetol.m: improve matlab compatibility and add byrows sorting (bug #59850) * /scripts/set/uniquetol.m: improve empty and NaN handling, add sorting to 'byrows' output, ensure ia and ic outputs have column orientation for arrays and cells, verify consistent single class handling, add BISTs for aforementioned cases, and update docstring to note non-complex input requirement. diff -r 18b8f73595e0 -r df030ac26390 scripts/set/uniquetol.m --- a/scripts/set/uniquetol.m Tue Jul 05 10:38:41 2022 -0700 +++ b/scripts/set/uniquetol.m Tue Jul 05 15:22:46 2022 -0400 @@ -33,7 +33,8 @@ ## Two values, @var{x} and @var{y}, are within relative tolerance if ## @code{abs (@var{x} - @var{y}) <= @var{tol} * max (abs (@var{A}(:)))}. ## -## The input @var{A} must be a floating point type (double or single). +## The input @var{A} must be a non-complex floating point type (double or +## single). ## ## If @var{tol} is unspecified, the default tolerance is 1e-12 for double ## precision input or 1e-6 for single precision input. @@ -98,20 +99,12 @@ ## @seealso{unique, union, intersect, setdiff, setxor, ismember} ## @end deftypefn - function [c, ia, ic] = uniquetol (A, varargin) if (nargin < 1) print_usage (); endif - if (isempty (A)) - c = A; - ia = []; - ic = []; - return; - endif - if (! isfloat (A) || iscomplex (A)) error ("Octave:uniquetol:unsupported-type", "uniquetol: A must be a double or single precision non-complex array"); @@ -163,6 +156,29 @@ endif endfor + if (isempty (A)) + sz_A = size (A); + ## hack for Matlab empty input compatibility + if (by_rows) + c = A; + sz_A(2) = 1; + ia = ones (sz_A); + ic = ones (sz_A); + else + c = ones (0,1); + if (sz_A(1) == 1) + c = c.'; + endif + ia = ones (0,1); + ic = ones (0,1); + endif + if (isa (A, "single")) + ## c follows class of A, ia and ic are always class "double". + c = single (c); + endif + return; + endif + if (isempty (data_scale)) data_scale = max (abs (A(! isinf (A))(:))); endif @@ -171,8 +187,11 @@ if (by_rows) - nr = rows (A); - nc = columns (A); + ##start matrix in sorted order, retain sorting and inverting indices + [A, srtA] = sortrows (A); + [~, inv_srtA] = sort (srtA); + + [nr, nc] = size (A); Iall = zeros (nr, 1); I = NaN (nc, 1); ia = {}; @@ -189,7 +208,7 @@ sumeq = sum (equ); ia_tmp = find (equ); if (output_all_indices) - ia{end+1} = ia_tmp; + ia{end+1,1} = sort (srtA(ia_tmp)); endif Iall(ii+(1:sumeq)) = ia_tmp; I(j) = ia_tmp(1); @@ -204,9 +223,10 @@ c = A(I,:); if (! output_all_indices) - ia = I(1:j-1); + ia = srtA(I(1:j-1)); endif - ic = J; + + ic = J(inv_srtA); else isrowvec = isrow (A); @@ -247,7 +267,7 @@ endif if (output_all_indices) nu = cumsumue(end); - ia = cell (1, nu); + ia = cell (nu, 1); for k = 1:nu ia{k} = setdiff (sAi(cumsumue==k), findisnanA); endfor @@ -256,35 +276,42 @@ endif if (anyisnanA) - rowsc1 = rows (c) + (1:sum (isnanA)); + rowsc1 = [1:sum(isnanA(:))]'; + if (~all (isnanA)) + rowsc1 += rows (c); + endif c(rowsc1) = NaN; - ia(rowsc1) = findisnanA; ic(isnanA) = rowsc1; + if (output_all_indices) + ia(rowsc1) = num2cell (findisnanA); + else + ia(rowsc1) = findisnanA; + endif + + ## if numel(c) was 1, appending NaNs creates a row vector instead of + ## expected column vector. + if (isrow (c)) + c = c.'; + endif endif - ## FIXME: Matlab-compatible orientation of output - ## Actually, Matlab prefers row vectors (2021/03/24), but this is different - ## from all the other set functions which prefer column vectors. Assume - ## that this is a bug in Matlab's implementation and prefer column vectors. + ## Matlab compatibility - outputs are column vectors unless the input + ## is a row vector, in which case the output c is also a row vector. + ## ia and ic are always column vectors. (verified Matlab 2022a) if (isrowvec) c = c.'; endif - endif - endfunction %!assert (uniquetol ([1 1 2; 1 2 1; 1 1 2+10*eps]), [1;2]) %!assert (uniquetol ([1 1 2; 1 0 1; 1 1 2+10*eps], "byrows", true), -%! [1 1 2; 1 0 1]) -%!assert (uniquetol ([]), []) +%! [1 0 1; 1 1 2]) %!assert (uniquetol ([1]), [1]) %!assert (uniquetol ([2, 1]), [1, 2]); %!assert (uniquetol ([1; 2]), [1; 2]) %!assert (uniquetol ([-Inf, 1, NaN, Inf, NaN, Inf]), [-Inf, 1, Inf, NaN, NaN]); -%!assert (uniquetol (zeros (1, 0)), zeros (1, 0)); -%!assert (uniquetol (zeros (1, 0), "byrows", true), zeros (1, 0)) %!assert (uniquetol ([1,2,2,3,2,4], "byrows", true), [1,2,2,3,2,4]) %!assert (uniquetol ([1,2,2,3,2,4]), [1,2,3,4]) %!assert (uniquetol ([1,2,2,3,2,4].', "byrows", true), [1;2;3;4]) @@ -296,6 +323,12 @@ %!assert (uniquetol (single ([1,2,2,3,2,4].'), "byrows", true), %! single ([1;2;3;4])) +## Test 2D array sorting +%!test +%! a = [magic(3); 2 * magic(3)]; +%! assert (uniquetol (a), [1:10,12,14,16,18]') +%! assert (uniquetol (a, "byrows", true), sortrows (a)) + ## Matlab compatibility of output %!test %! x = 1:0.045:3; @@ -314,8 +347,8 @@ %! A = [2, 3, 4; 2, 3, 4]; %! [c, ia, ic] = uniquetol (A, "byrows", true); %! assert (c, [2, 3, 4]); -%! assert (A(ia,:), c); -%! assert (c(ic,:), A); +%! assert (ia, 1); +%! assert (ic, [1;1]); %!test %! x = (2:7)'*pi; @@ -328,7 +361,7 @@ %! A = [0.06, 0.21, 0.38; 0.38, 0.21, 0.39; 0.54, 0.56, 0.41; 0.46, 0.52, 0.95]; %! B = log (exp (A)); %! C = uniquetol ([A; B], "ByRows", true); -%! assert (C, A); +%! assert (C, sortrows(A), 10*eps); ## Test "DataScale" Property %!test @@ -341,9 +374,110 @@ %! A = [.1 .2 .3 10]; %! [C, ia, ic] = uniquetol (A, .1, "OutputAllIndices", true); %! assert (C, [.1, 10]); -%! assert (ia, {(1:3)', 4}); +%! assert (ia, {(1:3)'; 4}); %! assert (ic, [1; 1; 1; 2]); +## Test NaN inputs +%!assert (uniquetol (NaN), NaN) +%!assert (uniquetol ([NaN NaN]), [NaN NaN]) +%!assert (uniquetol ([NaN NaN]'), [NaN NaN]') +%!assert (uniquetol (NaN(2,2)), NaN(4,1)) + +%!test +%! a = [magic(3); 2 * magic(3)]; +%! a(4:5) = NaN; +%! [c, ia, ic] = uniquetol (a); +%! assert (c, [1:10,12,14,18, NaN, NaN]'); +%! assert (ia, [7,10,2,3,8,13,14,1,9,11,16,17,12,4,5]'); +%! assert (ic, [8,3,4,14,15,8,1,5,9,2,10,13,6,7,2,11,12,4]'); +%! [c, ia, ic] = uniquetol (single (a)); +%! assert (class (c), "single"); +%! assert (class (ia), "double"); +%! assert (class (ic), "double"); +%! [c, ia, ic] = uniquetol (a, "ByRows", true); +%! assert (c, sortrows (a)); +%! assert (ia, [2,3,1,6,4,5]'); +%! assert (ic, [3,1,2,5,6,4]'); +%! [c, ia, ic] = uniquetol (single (a), "ByRows", true); +%! assert (class (c), "single"); +%! assert (class (ia), "double"); +%! assert (class (ic), "double"); +%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true); +%! assert (ia, {7;[10;15];2;[3;18];8;13;14;[1;6];9;11;16;17;12;4;5}); +%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true); +%! assert (class (c), "single"); +%! assert (class (ia{1}), "double"); +%! assert (class (ic), "double"); +%! [c, ia, ic] = uniquetol (a, "OutputAllIndices", true, "byrows", true); +%! assert (ia, {2;3;1;6;4;5}); +%! [c, ia, ic] = uniquetol (single (a), "OutputAllIndices", true, "byrows", true); +%! assert (class (c), "single"); +%! assert (class (ia{1}), "double"); +%! assert (class (ic), "double"); + +## Test empty input compatibility +%!test +%! [c, ia, ic] = uniquetol ([]); +%! assert (c, ones (0,1)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol ([], "byrows", true); +%! assert (c, []); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (0,1)); +%! assert (c, ones (0,1)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (0,1), "byrows", true); +%! assert (c, ones (0,1)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (1,0)); +%! assert (c, ones (1,0)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (1,0), "byrows", true); +%! assert (c, ones (1,0)); +%! assert (ia, 1); +%! assert (ic, 1); +%!test +%! [c, ia, ic] = uniquetol (ones (1,0,2)); +%! assert (c, ones (1,0)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (0,1,2)); +%! assert (c, ones (0,1)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (ones (1,2,0)); +%! assert (c, ones (1,0)); +%! assert (ia, ones (0,1)); +%! assert (ic, ones (0,1)); +%!test +%! [c, ia, ic] = uniquetol (single ([])); +%! assert (class (c), "single"); +%! assert (class (ia), "double"); +%! assert (class (ic), "double"); +%!test +%! [c, ia, ic] = uniquetol (single ([]), "byrows", true); +%! assert (class (c), "single"); +%! assert (class (ia), "double"); +%! assert (class (ic), "double"); +%!test +%! [c, ia, ic] = uniquetol (single ([]), "OutputAllIndices", true); +%! assert (class (c), "single"); +%! assert (class (ia), "double"); +%! assert (class (ic), "double"); + + ## Test input validation %!error uniquetol () %!error uniquetol (int8 (1)) @@ -354,6 +488,9 @@ %!error uniquetol (1, 2, "byrows") %!error uniquetol (1, 2, 3, "bar") %!error uniquetol (ones (2,2,2), "byrows", true) +%!error uniquetol (ones (0,1,2), "byrows", true) +%!error uniquetol (ones (1,0,2), "byrows", true) +%!error uniquetol (ones (1,2,0), "byrows", true) %!error uniquetol (1, "DataScale", '1') %!error uniquetol (1, "DataScale", -1) %!error uniquetol (1, "DataScale", 1i)