# HG changeset patch # User Rik # Date 1564070834 25200 # Node ID c4f9a0f097a3eb19083d55a2160d8a92ef49aec6 # Parent 9b053ba971b221b7f3f1467a2e39b1bd779305f7 unique.m: Add new option "stable" to control output ordering. * NEWS: Announce new option. * unique.m: Rewrite docstring to include "stable" and "sorted" options and documentation. Add two examples to docstring for clarification. Implement "stable" algorithm. Add BIST tests for behavior and input validation. diff -r 9b053ba971b2 -r c4f9a0f097a3 NEWS --- a/NEWS Wed Jul 24 12:37:21 2019 -0400 +++ b/NEWS Thu Jul 25 09:07:14 2019 -0700 @@ -9,6 +9,10 @@ behavior can be restored by setting `"editinplace"` to `false` and `"home"` to `"~/octave"`. +- The `unique` function accepts a new sorting option `"stable"` which + will return output values in the same order as the input, rather + than in ascending order. + #### Graphics backend - Graphic primitives now accept a color property value of `"none"` @@ -58,10 +62,10 @@ from releases prior to R2012b, can be obtained by using the `"legacy"` flag. -- The functions `intersect`, `setxor`, `union`, now accept a `"legacy"` - flag which changes the index values (second and third outputs) as well - as the orientation of all outputs to match Matlab releases prior to - R2012b. +- The functions `intersect`, `setxor`, and `union` now accept a + `"legacy"` flag which changes the index values (second and third + outputs) as well as the orientation of all outputs to match Matlab + releases prior to R2012b. - Complex RESTful web services can now be accessed by the `webread` and `webwrite` functions alongside with the `weboptions` structure. One diff -r 9b053ba971b2 -r c4f9a0f097a3 scripts/set/unique.m --- a/scripts/set/unique.m Wed Jul 24 12:37:21 2019 -0400 +++ b/scripts/set/unique.m Thu Jul 25 09:07:14 2019 -0700 @@ -20,19 +20,26 @@ ## -*- texinfo -*- ## @deftypefn {} {} unique (@var{x}) ## @deftypefnx {} {} unique (@var{x}, "rows") +## @deftypefnx {} {} unique (@dots{}, "sorted") +## @deftypefnx {} {} unique (@dots{}, "stable") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}) ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "first") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "last") ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "legacy") -## Return the unique elements of @var{x} sorted in ascending order. +## Return the unique elements of @var{x}. ## ## If the input @var{x} is a column vector then return a column vector; ## Otherwise, return a row vector. @var{x} may also be a cell array of ## strings. ## ## If the optional argument @qcode{"rows"} is given then return the unique -## rows of @var{x} sorted in ascending order. The input must be a 2-D matrix -## to use this option. +## rows of @var{x}. The input must be a 2-D numeric matrix to use this option. +## +## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order +## in which unique values appear in the output. The default is +## @qcode{"sorted"} and values in the output are placed in ascending order. +## The alternative @qcode{"stable"} preserves the order found in the input +## @var{x}. ## ## If requested, return column index vectors @var{i} and @var{j} such that ## @code{@var{y} = @var{x}(@var{i})} and @code{@var{x} = @var{y}(@var{j})}. @@ -43,12 +50,37 @@ ## @qcode{"first"} is specified, return the lowest. The default is ## @qcode{"first"}. ## -## Programming Note: The input flag @qcode{"legacy"} changes the algorithm +## Example 1 : sort order +## +## @example +## @group +## unique ([3, 1, 1, 2]) +## @result{} [1, 2, 3] +## unique ([3, 1, 1, 2], "stable") +## @result{} [3, 1, 2] +## @end group +## @end example +## +## Example 2 : index selection +## +## @example +## @group +## [~, @var{i}] = unique ([3, 1, 1, 2], "first") +## @result{} @var{i} = [2; 4; 1] +## [~, @var{i}] = unique ([3, 1, 1, 2], "last") +## @result{} @var{i} = [3; 4; 1] +## @end group +## @end example +## +## Programming Notes: The input flag @qcode{"legacy"} changes the algorithm ## to be compatible with @sc{matlab} releases prior to R2012b. Specifically, ## The index ordering flag is changed to @qcode{"last"}, and the shape of the ## outputs @var{i}, @var{j} will follow the shape of the input @var{x} rather ## than always being column vectors. ## +## The third output, @var{j}, has not been implemented yet when the sort +## order is @qcode{"stable"}. +## ## @seealso{union, intersect, setdiff, setxor, ismember} ## @end deftypefn @@ -69,17 +101,29 @@ optrows = any (strcmp ("rows", varargin)); optfirst = any (strcmp ("first", varargin)); optlast = any (strcmp ("last", varargin)); + optsorted = any (strcmp ("sorted", varargin)); + optstable = any (strcmp ("stable", varargin)); optlegacy = any (strcmp ("legacy", varargin)); if (optfirst && optlast) error ('unique: cannot specify both "first" and "last"'); - elseif (optfirst + optlast + optrows + optlegacy != nargin-1) + elseif (optsorted && optstable) + error ('unique: cannot specify both "sorted" and "stable"'); + elseif ((optfirst || optlast) && (optsorted || optstable)) + error ('unique: cannot specify "first"/"last" with "sorted"/"stable"'); + elseif (optlegacy && (optsorted || optstable)) + error ('unique: cannot specify "sorted" or "stable" with "legacy"'); + elseif (optrows + optfirst + optlast + optsorted + optstable + optlegacy + != nargin-1) error ("unique: invalid option"); endif - ## Set default to "first" if not set earlier. + ## Set defaults if not set earlier. if (! optfirst && ! optlast) optfirst = true; endif + if (! optsorted && ! optstable) + optsorted = true; + endif if (optrows && iscellstr (x)) warning ('unique: "rows" is ignored for cell arrays'); @@ -88,6 +132,7 @@ else optrows = false; optfirst = true; + optsorted = true; optlegacy = false; endif @@ -99,7 +144,6 @@ ## are very slow on sparse matrices. Until they are fixed to be as ## fast as for full matrices, operate on the nonzero elements of the ## sparse array as long as we are not operating on rows. - if (issparse (x) && ! optrows && nargout <= 1) if (nnz (x) < numel (x)) y = unique ([0; nonzeros(x)], varargin{:}); @@ -118,37 +162,46 @@ isrowvec = isrow (x); endif - y = x; ## Special cases 0 and 1 if (n == 0) - if (! optrows && isempty (x) && any (size (x))) - if (iscellstr (y)) + y = x; + if (! optrows && any (size (x))) + if (iscellstr (x)) y = cell (0, 1); else - y = zeros (0, 1, class (y)); + y = zeros (0, 1, class (x)); endif endif i = j = []; return; elseif (n == 1) + y = x; i = j = 1; return; endif + ## Calculate y output if (optrows) - if (nargout > 1) - [y, i] = sortrows (y); + if (nargout > 1 || ! optsorted) + [y, i] = sortrows (x); i = i(:); else - y = sortrows (y); + y = sortrows (x); endif match = all (y(1:n-1,:) == y(2:n,:), 2); - y(match,:) = []; + if (optsorted) + y(match,:) = []; + else + y = x; + y(i([false; match]), :) = []; + endif else - if (! isvector (y)) - y = y(:); + if (isvector (x)) + y = x; + else + y = x(:); endif - if (nargout > 1) + if (nargout > 1 || ! optsorted) [y, i] = sort (y); i = i(:); else @@ -159,23 +212,46 @@ else match = (y(1:n-1) == y(2:n)); endif - y(match) = []; + if (optsorted) + y(match) = []; + else + if (isvector (x)) + y = x; + else + y = x(:); + endif + y(i([false; match(:)])) = []; + endif endif + ## Calculate j output (3rd output) if (isargout (3)) - j = i; + j = i; # cheap way to copy dimensions j(i) = cumsum ([1; ! match(:)]); + if (! optsorted) + warning ("unique: third output J is not yet implemented"); + j = []; + endif + if (optlegacy && isrowvec) j = j.'; endif endif + ## Calculate i output (2nd output) if (isargout (2)) - idx = find (match); - if (! optlegacy && optfirst) - idx += 1; # in-place is faster than other forms of increment + if (optsorted) + idx = find (match); + if (! optlegacy && optfirst) + idx += 1; # in-place is faster than other forms of increment + endif + i(idx) = []; + else + i([false; match(:)]) = []; + ## FIXME: Is there a way to avoid a call to sort? + i = sort (i); endif - i(idx) = []; + if (optlegacy && isrowvec) i = i.'; endif @@ -191,7 +267,9 @@ %!assert (unique ([1 2]), [1 2]) %!assert (unique ([1;2]), [1;2]) %!assert (unique ([1,NaN,Inf,NaN,Inf]), [1,Inf,NaN,NaN]) +%!assert (unique ([1,NaN,Inf,NaN,Inf], "stable"), [1,NaN,Inf,NaN]) %!assert (unique ({"Foo","Bar","Foo"}), {"Bar","Foo"}) +%!assert (unique ({"Foo","Bar","Foo"}, "stable"), {"Foo", "Bar"}) %!assert (unique ({"Foo","Bar","FooBar"}'), {"Bar","Foo","FooBar"}') %!assert (unique (zeros (1,0)), zeros (0,1)) %!assert (unique (zeros (1,0), "rows"), zeros (1,0)) @@ -217,6 +295,12 @@ %! assert (j, [1;1;2;3;3;3;4]); %!test +%! [y,i,~] = unique ([4,4,2,2,2,3,1], "stable"); +%! assert (y, [4,2,3,1]); +%! assert (i, [1;3;6;7]); +%! ##assert (j, []); + +%!test %! [y,i,j] = unique ([1,1,2,3,3,3,4]', "last"); %! assert (y, [1,2,3,4]'); %! assert (i, [2;3;6;7]); @@ -229,6 +313,11 @@ %! assert (j, [1;1;1]); %!test +%! [y,i,~] = unique ({"B"; "A"; "B"}, "stable"); +%! assert (y, {"B"; "A"}); +%! assert (i, [1; 2]); + +%!test %! A = [1,2,3; 1,2,3]; %! [y,i,j] = unique (A, "rows"); %! assert (y, [1,2,3]); @@ -236,6 +325,13 @@ %! assert (y(j,:), A); %!test +%! A = [4,5,6; 1,2,3; 4,5,6]; +%! [y,i,~] = unique (A, "rows", "stable"); +%! assert (y, [4,5,6; 1,2,3]); +%! assert (A(i,:), y); +%! ##assert (y(j,:), A); + +%!test %! [y,i,j] = unique ([1,1,2,3,3,3,4], "legacy"); %! assert (y, [1,2,3,4]); %! assert (i, [2,3,6,7]); @@ -253,9 +349,22 @@ %!error unique ({1}) %!error unique (1, 2) %!error unique (1, "first", "last") +%!error +%! unique (1, "sorted", "stable"); +%!error +%! unique (1, "first", "sorted"); +%!error +%! unique (1, "last", "stable"); +%!error +%! unique (1, "sorted", "legacy"); +%!error +%! unique (1, "stable", "legacy"); %!error unique (1, "middle") %!error unique ({"a", "b", "c"}, "UnknownOption") %!error unique ({"a", "b", "c"}, "UnknownOption1", "UnknownOption2") %!error unique ({"a", "b", "c"}, "rows", "UnknownOption2") %!error unique ({"a", "b", "c"}, "UnknownOption1", "last") %!warning <"rows" is ignored for cell arrays> unique ({"1"}, "rows"); +%!warning +%! [y,i,j] = unique ([2,1], "stable"); +%! assert (j, []);