Mercurial > octave
view scripts/set/unique.m @ 33559:62fca924fe85 default tip @
doc: Update NEWS.10.md file.
* NEWS.10.md: Indent NEWS.10.md for clarity.
Add note about changes to colormap() functionality.
author | Rik <rik@octave.org> |
---|---|
date | Thu, 09 May 2024 18:23:33 -0700 |
parents | ffc7bb75ea3e |
children |
line wrap: on
line source
######################################################################## ## ## Copyright (C) 2000-2024 The Octave Project Developers ## ## See the file COPYRIGHT.md in the top-level directory of this ## distribution or <https://octave.org/copyright/>. ## ## This file is part of Octave. ## ## Octave is free software: you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## <https://www.gnu.org/licenses/>. ## ######################################################################## ## -*- texinfo -*- ## @deftypefn {} {@var{y} =} unique (@var{x}) ## @deftypefnx {} {@var{y} =} unique (@var{x}, "rows") ## @deftypefnx {} {@var{y} =} unique (@dots{}, "sorted") ## @deftypefnx {} {@var{y} =} unique (@dots{}, "stable") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}) ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "first") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "last") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "legacy") ## Return the unique elements of @var{x}. ## ## If the input @var{x} is a column vector then return a column vector; ## Otherwise, return a row vector. @var{x} may also be a cell array of ## strings. ## ## If the optional argument @qcode{"rows"} is given then return the unique ## rows of @var{x}. The input must be a 2-D numeric matrix to use this option. ## ## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order ## in which unique values appear in the output. The default is ## @qcode{"sorted"} and values in the output are placed in ascending order. ## The alternative @qcode{"stable"} preserves the order found in the input ## @var{x}. ## ## If requested, return column index vectors @var{i} and @var{j} such that ## @code{@var{y} = @var{x}(@var{i})} and @code{@var{x} = @var{y}(@var{j})}. ## ## Additionally, if @var{i} is a requested output then one of the flags ## @qcode{"first"} or @qcode{"last"} may be given. If @qcode{"last"} is ## specified, return the highest possible indices in @var{i}, otherwise, if ## @qcode{"first"} is specified, return the lowest. The default is ## @qcode{"first"}. ## ## Example 1 : sort order ## ## @example ## @group ## unique ([3, 1, 1, 2]) ## @result{} [1, 2, 3] ## unique ([3, 1, 1, 2], "stable") ## @result{} [3, 1, 2] ## @end group ## @end example ## ## Example 2 : index selection ## ## @example ## @group ## [~, @var{i}] = unique ([3, 1, 1, 2], "first") ## @result{} @var{i} = [2; 4; 1] ## [~, @var{i}] = unique ([3, 1, 1, 2], "last") ## @result{} @var{i} = [3; 4; 1] ## @end group ## @end example ## ## Programming Notes: The input flag @qcode{"legacy"} changes the algorithm ## to be compatible with @sc{matlab} releases prior to R2012b. Specifically, ## The index ordering flag is changed to @qcode{"last"}, and the shape of the ## outputs @var{i}, @var{j} will follow the shape of the input @var{x} rather ## than always being column vectors. ## ## @seealso{uniquetol, union, intersect, setdiff, setxor, ismember} ## @end deftypefn function [y, i, j] = unique (x, varargin) if (nargin < 1) print_usage (); elseif (! (isnumeric (x) || islogical (x) || ischar (x) || iscellstr (x))) error ("unique: X must be an array or cell array of strings"); endif if (nargin > 1) ## parse options if (! iscellstr (varargin)) error ("unique: options must be strings"); endif optrows = any (strcmp ("rows", varargin)); optfirst = any (strcmp ("first", varargin)); optlast = any (strcmp ("last", varargin)); optsorted = any (strcmp ("sorted", varargin)); optstable = any (strcmp ("stable", varargin)); optlegacy = any (strcmp ("legacy", varargin)); if (optfirst && optlast) error ('unique: cannot specify both "first" and "last"'); elseif (optsorted && optstable) error ('unique: cannot specify both "sorted" and "stable"'); elseif ((optfirst || optlast) && (optsorted || optstable)) error ('unique: cannot specify "first"/"last" with "sorted"/"stable"'); elseif (optlegacy && (optsorted || optstable)) error ('unique: cannot specify "sorted" or "stable" with "legacy"'); elseif (optrows + optfirst + optlast + optsorted + optstable + optlegacy != nargin-1) error ("unique: invalid option"); endif ## Set defaults if not set earlier. if (! optfirst && ! optlast) optfirst = true; endif if (! optsorted && ! optstable) optsorted = true; endif if (optrows && iscellstr (x)) warning ('unique: "rows" is ignored for cell arrays'); optrows = false; endif else optrows = false; optfirst = true; optsorted = true; optlegacy = false; endif ## FIXME: The operations ## ## match = (y(1:n-1) == y(2:n)); ## y(idx) = []; ## ## are very slow on sparse matrices. Until they are fixed to be as ## fast as for full matrices, operate on the nonzero elements of the ## sparse array as long as we are not operating on rows. if (issparse (x) && ! optrows && nargout <= 1) if (nnz (x) < numel (x)) y = unique ([0; nonzeros(x)], varargin{:}); else ## Corner case where sparse matrix is actually full y = unique (full (x), varargin{:}); endif return; endif if (optrows) n = rows (x); isrowvec = false; else n = numel (x); isrowvec = isrow (x); endif ## Special cases 0 and 1 if (n == 0) y = x; if (! optrows && any (size (x))) if (iscellstr (x)) y = cell (0, 1); else y = zeros (0, 1, class (x)); endif endif i = j = []; return; elseif (n == 1) y = x; i = j = 1; return; endif ## Calculate y output if (optrows) if (nargout > 1 || ! optsorted) [y, j] = sortrows (x); j = j(:); else y = sortrows (x); endif match = all (y(1:n-1,:) == y(2:n,:), 2); if (optsorted) y(match,:) = []; else y = x; y(j([false; match]), :) = []; endif else if (isvector (x)) y = x; else y = x(:); endif if (nargout > 1 || ! optsorted) [y, j] = sort (y); j = j(:); else y = sort (y); endif if (iscellstr (y)) match = strcmp (y(1:n-1), y(2:n)); else match = (y(1:n-1) == y(2:n)); endif if (optsorted) y(match) = []; else if (isvector (x)) y = x; else y = x(:); endif y(j([false; match(:)])) = []; endif endif ## Calculate i and j outputs (2nd and 3rd outputs) if (nargout > 1) if (optsorted) idx = find (match); if (! optlegacy && optfirst) idx += 1; endif i = j; i(idx) = []; if (nargout > 2) j(j) = cumsum (! [false; match(:)]); endif else ## Get inverse of sort index j so that sort(x)(k) = x(j)(k) = x. k = j; # cheap way to copy dimensions k(j) = 1:n; ## Generate logical index of sorted unique value locations. uniquex = ! [false; match(:)]; ## Remap unique locations to unsorted x, such that y = x(i). i = find (uniquex(k)); if (nargout > 2) ## Example of index mappings to obtain i and j ('stable'). ## x = [40,20,40,20,20,30,10]' # input data, n = 7, m = 4 ## x(j) = [10,20,20,20,30,40,40]' # sorted x ## j = [7,2,4,5,6,1,3]' # sort index, x(j) = sort(x) ## k = [6,2,7,3,4,5,1]' # inverse idx of j, sort(x)(k) = x ## y = [40,20,30,10]' # unique x preserving ordering ## uniquex = [1,1,0,0,1,1,0]' # logical sorted idx of unique x vals ## i = [1,2,6,7]' # unique output index, y = x(i) ## u = [1,2,5,6]' # linear idx of unique x(j) elems. ## l = [1,2,2,2,5,6,6]' # unique elem. in full sort(x) ## l(k) = [6,2,6,2,2,5,1]' # l mapped back to unsorted x ## j(l(k)) = [1,2,1,2,2,6,7]' # unique elem. mapped to x idx ## p(i) = [1,2,#,#,#,3,4]' # map between i and j(l(k)) ni = numel (i); u = find (uniquex); # Linear index of unique elements of sort(x) l = u(cumsum (uniquex)); # Expand u for all elements in sort(x) p = j; # cheap way to copy dimensions p(i) = 1:ni; # set p to contain the vector positions of i. j = p(j(l(k))); # Replace j with 3rd output mapping y->x. endif endif if (optlegacy && isrowvec) i = i.'; if (nargout > 2) j = j.'; endif endif endif endfunction %!assert (unique ([1 1 2; 1 2 1; 1 1 2]), [1;2]) %!assert (unique ([1 1 2; 1 0 1; 1 1 2],"rows"), [1 0 1; 1 1 2]) %!assert (unique ([]), []) %!assert (unique ([1]), [1]) %!assert (unique ([1 2]), [1 2]) %!assert (unique ([1;2]), [1;2]) %!assert (unique ([1,NaN,Inf,NaN,Inf]), [1,Inf,NaN,NaN]) %!assert (unique ([1,NaN,Inf,NaN,Inf], "stable"), [1,NaN,Inf,NaN]) %!assert (unique ({"Foo","Bar","Foo"}), {"Bar","Foo"}) %!assert (unique ({"Foo","Bar","Foo"}, "stable"), {"Foo", "Bar"}) %!assert (unique ({"Foo","Bar","FooBar"}'), {"Bar","Foo","FooBar"}') %!assert (unique (zeros (1,0)), zeros (0,1)) %!assert (unique (zeros (1,0), "rows"), zeros (1,0)) %!assert (unique (cell (1,0)), cell (0,1)) %!assert (unique ({}), {}) %!assert (unique ([1,2,2,3,2,4], "rows"), [1,2,2,3,2,4]) %!assert (unique ([1,2,2,3,2,4]), [1,2,3,4]) %!assert (unique ([1,2,2,3,2,4]', "rows"), [1;2;3;4]) %!assert (unique (sparse ([2,0;2,0])), [0;2]) %!assert (unique (sparse ([1,2;2,3])), [1;2;3]) %!assert (unique ([1,2,2,3,2,4]', "rows"), [1;2;3;4]) %!assert (unique (single ([1,2,2,3,2,4]), "rows"), single ([1,2,2,3,2,4])) %!assert (unique (single ([1,2,2,3,2,4])), single ([1,2,3,4])) %!assert (unique (single ([1,2,2,3,2,4]'), "rows"), single ([1;2;3;4])) %!assert (unique (uint8 ([1,2,2,3,2,4]), "rows"), uint8 ([1,2,2,3,2,4])) %!assert (unique (uint8 ([1,2,2,3,2,4])), uint8 ([1,2,3,4])) %!assert (unique (uint8 ([1,2,2,3,2,4]'), "rows"), uint8 ([1;2;3;4])) ## Test options with numeric inputs %!test %! [y,i,j] = unique ([1,1,2,3,3,3,4], "sorted"); %! assert (y, [1,2,3,4]); %! assert (i, [1;3;4;7]); %! assert (j, [1;1;2;3;3;3;4]); %!test %! [y,i,j] = unique ([4,4,2,2,2,3,1], "stable"); %! assert (y, [4,2,3,1]); %! assert (i, [1;3;6;7]); %! assert (j, [1;1;2;2;2;3;4]); %!test %! [y,i,j] = unique ([1,1,2,3,3,3,4]', "last"); %! assert (y, [1,2,3,4]'); %! assert (i, [2;3;6;7]); %! assert (j, [1;1;2;3;3;3;4]); ## Test options with cellstr inputs %!test %! [y,i,j] = unique ({"z"; "z"; "z"}); %! assert (y, {"z"}); %! assert (i, [1]); %! assert (j, [1;1;1]); %!test %! [y,i,~] = unique ({"B"; "A"; "B"}, "stable"); %! assert (y, {"B"; "A"}); %! assert (i, [1; 2]); %!test %! A = [1,2,3; 1,2,3]; %! [y,i,j] = unique (A, "rows"); %! assert (y, [1,2,3]); %! assert (A(i,:), y); %! assert (y(j,:), A); %!test %! A = [4,5,6; 1,2,3; 4,5,6]; %! [y,i,j] = unique (A, "rows", "stable"); %! assert (y, [4,5,6; 1,2,3]); %! assert (A(i,:), y); %! assert (y(j,:), A); ## Test "legacy" option %!test %! [y,i,j] = unique ([1,1,2,3,3,3,4], "legacy"); %! assert (y, [1,2,3,4]); %! assert (i, [2,3,6,7]); %! assert (j, [1,1,2,3,3,3,4]); %!test %! A = [7 9 7; 0 0 0; 7 9 7; 5 5 5; 1 4 5]; %! [y,i,j] = unique (A, "rows", "legacy"); %! assert (y, [0 0 0; 1 4 5; 5 5 5; 7 9 7]); %! assert (i, [2; 5; 4; 3]); %! assert (j, [4; 1; 4; 3; 2]); %!test <*65176> %! a = [3 2 1 2; 1 2 2 1]; %! [o1, o2, o3] = unique (a); %! assert ({o1, o2, o3}, {[1;2;3], [2;3;1], [3;1;2;2;1;2;2;1]}); %! [o1, o2, o3] = unique (a, "stable"); %! assert ({o1, o2, o3}, {[3;1;2], [1;2;3], [1;2;3;3;2;3;3;2]}) %!test <*65176> %! a = [4,2,4,2,2,3,1]; %! [o1, o2, o3] = unique (a); %! assert ({o1, o2, o3}, {[1,2,3,4], [7;2;6;1], [4;2;4;2;2;3;1]}); %! [o1, o2, o3] = unique (a, "stable"); %! assert ({o1, o2, o3}, {[4,2,3,1], [1;2;6;7], [1;2;1;2;2;3;4]}) %!test <*65176> %! a = [3 2 1 2; 2 1 2 1]; %! [o1, o2, o3] = unique (a(1,:), "rows"); %! assert ({o1, o2, o3}, {a(1,:), 1, 1}); %! [o1, o2, o3] = unique (a(1,:), "rows", "stable"); %! assert ({o1, o2, o3}, {a(1,:), 1, 1}); %! [o1, o2, o3] = unique (a, "rows"); %! assert ({o1, o2, o3}, {[a(2,:); a(1,:)], [2;1], [2;1]}); %! [o1, o2, o3] = unique (a, "rows", "stable"); %! assert ({o1, o2, o3}, {a, [1;2], [1;2]}); %! [o1, o2, o3] = unique ([a;a], "rows"); %! assert ({o1, o2, o3}, {[a(2,:); a(1,:)], [2;1], [2;1;2;1]}); %! [o1, o2, o3] = unique ([a;a], "rows", "stable"); %! assert ({o1, o2, o3}, {a, [1;2], [1;2;1;2]}); %!test <*65176> %! a = gallery ("integerdata", [-100, 100], 6, 6); %! a = [a(2,:); a(1:5,:); a(2:6,:)]; %! [o1, o2, o3] = unique (a); %! assert ({o1, o1(o3), o2, o3}, {a(:)(o2), a(:), ... %! [26;22;34;45;57; 6;11;17;33;28;35;15;56; 2;59; 4;66; ... %! 16;50;49;27;24;37;44;48;39;38;13;23; 5;12;46;55; 1], ... %! [34;14;34;16;30; 6;34;16;30; 6; 7;31;28;31;12;18; 8;31;12;18; 8; 2;29; ... %! 22;29; 1;21;10;29; 1;21;10; 9; 3;11; 3;23;27;26; 3;23;27;26;24; 4;32; ... %! 4; 25;20;19; 4;25;20;19;33;13; 5;13;15; 2;24;13;15; 2;24;17]}); %! [o1, o2, o3] = unique (a, "stable"); %! assert ({o1, o1(o3), o2, o3}, {a(:)(o2), a(:), ... %! [ 1; 2; 4; 5; 6;11;12;13;15;16;17;22;23;24;26;27;28; ... %! 33;34;35;37;38;39;44;45;46;48;49;50;55;56;57;59;66], ... %! [ 1; 2; 1; 3; 4; 5; 1; 3; 4; 5; 6; 7; 8; 7; 9;10;11; 7; 9;10;11;12;13; ... %! 14;13;15;16;17;13;15;16;17;18;19;20;19;21;22;23;19;21;22;23;24;25;26;... %! 25;27;28;29;25;27;28;29;30;31;32;31;33;12;24;31;33;12;24;34]}); %! [o1, o2, o3] = unique (a, "rows"); %! assert ({o1, o1(o3,:), o2, o3}, {a(o2,:), a, ... %! [6;11;2;4;5;1], [6;3;6;4;5;1;6;4;5;1;2]}); %! [o1, o2, o3] = unique (a, "rows", "stable"); %! assert ({o1, o1(o3,:), o2, o3}, {a(o2,:), a, ... %! [1;2;4;5;6;11], [1;2;1;3;4;5;1;3;4;5;6]}); ## Test input validation %!error <Invalid call> unique () %!error <X must be an array or cell array of strings> unique ({1}) %!error <options must be strings> unique (1, 2) %!error <cannot specify both "first" and "last"> unique (1, "first", "last") %!error <cannot specify both "sorted" and "stable"> %! unique (1, "sorted", "stable"); %!error <cannot specify "first"/"last" with "sorted"/"stable"> %! unique (1, "first", "sorted"); %!error <cannot specify "first"/"last" with "sorted"/"stable"> %! unique (1, "last", "stable"); %!error <cannot specify "sorted" or "stable" with "legacy"> %! unique (1, "sorted", "legacy"); %!error <cannot specify "sorted" or "stable" with "legacy"> %! unique (1, "stable", "legacy"); %!error <invalid option> unique (1, "middle") %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption") %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption1", "UnknownOption2") %!error <invalid option> unique ({"a", "b", "c"}, "rows", "UnknownOption2") %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption1", "last") %!warning <"rows" is ignored for cell arrays> unique ({"1"}, "rows");