Mercurial > octave
changeset 27295:7ec25367bdc5
intersect.m: Add new option "stable" to control output ordering.
* NEWS: Announce new option.
* intersect.m: Rewrite docstring to include "stable" and "sorted" options and
documentation. Process special case of empty matrices and return immediately.
Re-indent code. Implement "stable" algorithm. Add BIST tests for behavior.
author | Rik <rik@octave.org> |
---|---|
date | Sun, 28 Jul 2019 07:54:21 -0700 |
parents | aa4147476138 |
children | 538468f901dd |
files | NEWS scripts/set/intersect.m |
diffstat | 2 files changed, 145 insertions(+), 80 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Sat Jul 27 18:28:45 2019 -0700 +++ b/NEWS Sun Jul 28 07:54:21 2019 -0700 @@ -9,9 +9,9 @@ behavior can be restored by setting `"editinplace"` to `false` and `"home"` to `"~/octave"`. -- The `setdiff', `setxor`, `union`, `unique` functions accept a new - sorting option `"stable"` which will return output values in the same - order as the input, rather than in ascending order. +- The `intersect`, `setdiff', `setxor`, `union`, and `unique` functions + accept a new sorting option `"stable"` which will return output values + in the same order as the input, rather than in ascending order. #### Graphics backend
--- a/scripts/set/intersect.m Sat Jul 27 18:28:45 2019 -0700 +++ b/scripts/set/intersect.m Sun Jul 28 07:54:21 2019 -0700 @@ -20,18 +20,25 @@ ## -*- texinfo -*- ## @deftypefn {} {@var{c} =} intersect (@var{a}, @var{b}) ## @deftypefnx {} {@var{c} =} intersect (@var{a}, @var{b}, "rows") +## @deftypefnx {} {@var{c} =} intersect (@dots{}, "sorted") +## @deftypefnx {} {@var{c} =} intersect (@dots{}, "stable") ## @deftypefnx {} {@var{c} =} intersect (@dots{}, "legacy") ## @deftypefnx {} {[@var{c}, @var{ia}, @var{ib}] =} intersect (@dots{}) ## -## Return the unique elements common to both @var{a} and @var{b} sorted in -## ascending order. +## Return the unique elements common to both @var{a} and @var{b}. ## ## If @var{a} and @var{b} are both row vectors then return a row vector; ## Otherwise, return a column vector. The inputs may also be cell arrays of ## strings. ## ## If the optional input @qcode{"rows"} is given then return the common rows of -## @var{a} and @var{b}. The inputs must be 2-D matrices to use this option. +## @var{a} and @var{b}. The inputs must be 2-D numeric matrices to use this +## option. +## +## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order +## in which unique values appear in the output. The default is +## @qcode{"sorted"} and values in the output are placed in ascending order. +## The alternative @qcode{"stable"} preserves the order found in the input. ## ## If requested, return column index vectors @var{ia} and @var{ib} such that ## @code{@var{c} = @var{a}(@var{ia})} and @code{@var{c} = @var{b}(@var{ib})}. @@ -50,8 +57,8 @@ [a, b] = validsetargs ("intersect", a, b, varargin{:}); + ## Special case of empty matrices if (isempty (a) || isempty (b)) - ## Special case shortcuts algorithm. ## Lots of type checking required for Matlab compatibility. if (isnumeric (a) && isnumeric (b)) c = []; @@ -61,88 +68,94 @@ c = ""; endif ia = ib = []; - else - by_rows = any (strcmp ("rows", varargin)); - optlegacy = any (strcmp ("legacy", varargin)); + return; + endif + + by_rows = any (strcmp ("rows", varargin)); + optsorted = ! any (strcmp ("stable", varargin)); + optlegacy = any (strcmp ("legacy", varargin)); - if (optlegacy) - isrowvec = ! iscolumn (a) || ! iscolumn (b); - else - isrowvec = isrow (a) && isrow (b); - endif + if (optlegacy) + isrowvec = ! iscolumn (a) || ! iscolumn (b); + else + isrowvec = isrow (a) && isrow (b); + endif + + ## Form A and B into sets + if (nargout > 1 || ! optsorted) + [a, ia] = unique (a, varargin{:}); + ia = ia(:); + [b, ib] = unique (b, varargin{:}); + ib = ib(:); + else + a = unique (a, varargin{:}); + b = unique (b, varargin{:}); + endif - ## Form A and B into sets - if (nargout > 1) - [a, ja] = unique (a, varargin{:}); - ja = ja(:); - [b, jb] = unique (b, varargin{:}); - jb = jb(:); + if (by_rows) + c = [a; b]; + if (nargout > 1 || ! optsorted) + [c, ic] = sortrows (c); else - a = unique (a, varargin{:}); - b = unique (b, varargin{:}); + c = sortrows (c); + endif + match = find (all (c(1:end-1,:) == c(2:end,:), 2)); + if (optsorted) + c = c(match, :); + else + c = [a; b]; + ## FIXME: Is there a way to avoid a call to sort? + c = c(sort (ic(match)), :); endif - - if (by_rows) - c = [a; b]; - if (nargout > 1) - [c, ic] = sortrows (c); - else - c = sortrows (c); - endif - ii = find (all (c(1:end-1,:) == c(2:end,:), 2)); - c = c(ii,:); - len_a = rows (a); + len_a = rows (a); + else + c = [a(:); b(:)]; + if (nargout > 1 || ! optsorted) + [c, ic] = sort (c); + else + c = sort (c); + endif + if (iscellstr (c)) + match = find (strcmp (c(1:end-1), c(2:end))); + else + match = find (c(1:end-1) == c(2:end)); + endif + len_a = length (a); + if (optsorted) + c = c(match); else c = [a(:); b(:)]; - if (nargout > 1) - [c, ic] = sort (c); # [a(:);b(:)](ic) == c - else - c = sort (c); - endif - if (iscellstr (c)) - ii = find (strcmp (c(1:end-1), c(2:end))); - else - ii = find (c(1:end-1) == c(2:end)); - endif - c = c(ii); - len_a = length (a); + ## FIXME: Is there a way to avoid a call to sort? + c = c(sort (ic(match))); endif + endif + + ## Adjust output orientation for Matlab compatibility + if (isrowvec) + c = c.'; + endif - ## Adjust output orientation for Matlab compatibility - if (isrowvec) - c = c.'; + if (nargout > 1) + ia = ia(ic(match)); # a(ia) == c + ib = ib(ic(match+1) - len_a); # b(ib) == c + if (! optsorted) + ## FIXME: Is there a way to avoid a call to sort? + ia = sort (ia); + [~, idx] = min (ib); + ib = [ib(idx:end); ib(1:idx-1)]; endif - - if (nargout > 1) - ia = ja(ic(ii)); # a(ia) == c - ib = jb(ic(ii+1) - len_a); # b(ib) == c - if (optlegacy && isrowvec) - ia = ia.'; - ib = ib.'; - endif + if (optlegacy && isrowvec) + ia = ia.'; + ib = ib.'; endif - endif endfunction -## Test orientation of output -%!shared a,b -%! a = 1:4; -%! b = 2:5; - -%!assert (size (intersect (a, b)), [1, 3]) -%!assert (size (intersect (a', b)), [3, 1]) -%!assert (size (intersect (a, b')), [3, 1]) -%!assert (size (intersect (a', b')), [3, 1]) -%!assert (size (intersect (a, b, "legacy")), [1, 3]) -%!assert (size (intersect (a', b, "legacy")), [1, 3]) -%!assert (size (intersect (a, b', "legacy")), [1, 3]) -%!assert (size (intersect (a', b', "legacy")), [3, 1]) - -## Clear shared variables -%!shared +%!assert (intersect ([1 2 3 4], [9 8 4 2]), [2, 4]) +%!assert (intersect ([1 2; 2 3; 4 5], [2 3; 3 4; 5 6], "rows"), [2 3]) +%!assert (intersect ([1 NaN], [NaN NaN 5]), zeros (1,0)) ## Test multi-dimensional arrays %!test @@ -155,12 +168,20 @@ %!test %! a = [3 2 4 5 7 6 5 1 0 13 13]; %! b = [3 5 12 1 1 7]; -%! [c,ia,ib] = intersect (a, b); +%! [c, ia, ib] = intersect (a, b); %! assert (c, [1, 3, 5, 7]); %! assert (ia, [8; 1; 4; 5]); %! assert (ib, [4; 1; 2; 6]); %! assert (a(ia), c); %! assert (b(ib), c); + +%!test +%! a = [1 1 1 2 2 2]; +%! b = [1 2 3 4 5 6]; +%! c = intersect (a, b); +%! assert(c, [1,2]); + +## Test "rows" argument %!test %! a = [1,1,2;1,4,5;2,1,7]; %! b = [1,4,5;2,3,4;1,1,2;9,8,7]; @@ -170,11 +191,7 @@ %! assert (ib, [3;1]); %! assert (a(ia,:), c); %! assert (b(ib,:), c); -%!test -%! a = [1 1 1 2 2 2]; -%! b = [1 2 3 4 5 6]; -%! c = intersect (a, b); -%! assert(c, [1,2]); + %!test %! a = [1 2 3 4; 5 6 7 8; 9 10 11 12]; %! [b, ia, ib] = intersect (a, a, "rows"); @@ -182,6 +199,40 @@ %! assert (ia, [1:3]'); %! assert (ib, [1:3]'); +## Test "stable" argument +%!test +%! a = [3 2 4 5 7 6 5 1 0 13 13]; +%! b = [3 5 12 1 1 7]; +%! [c, ia, ib] = intersect (a, b, "stable"); +%! assert (c, [3, 5, 7, 1]); +%! assert (ia, [1; 4; 5; 8]); +%! assert (ib, [1; 2; 6; 4]); +%! assert (a(ia), c); +%! assert (b(ib), c); + +%!test +%! a = [2 2 2 1 1 1]; +%! b = [1 2 3 4 5 6]; +%! c = intersect (a, b, "stable"); +%! assert(c, [2,1]); + +%!test +%! a = [1,4,5;1,1,2;2,1,7]; +%! b = [1,4,5;2,3,4;1,1,2;9,8,7]; +%! [c, ia, ib] = intersect (a, b, "rows", "stable"); +%! assert (c, [1,4,5; 1,1,2]); +%! assert (ia, [1;2]); +%! assert (ib, [1;3]); +%! assert (a(ia,:), c); +%! assert (b(ib,:), c); + +%!test +%! a = [1 2 3 4; 5 6 7 8; 9 10 11 12]; +%! [b, ia, ib] = intersect (a, a, "rows", "stable"); +%! assert (b, a); +%! assert (ia, [1:3]'); +%! assert (ib, [1:3]'); + ## Test "legacy" argument %!test %! a = [7 1 7 7 4]; @@ -195,6 +246,20 @@ %! assert (ia, [5, 4]); %! assert (ib, [4, 1]); +## Test orientation of output +%!shared a,b +%! a = 1:4; +%! b = 2:5; + +%!assert (size (intersect (a, b)), [1, 3]) +%!assert (size (intersect (a', b)), [3, 1]) +%!assert (size (intersect (a, b')), [3, 1]) +%!assert (size (intersect (a', b')), [3, 1]) +%!assert (size (intersect (a, b, "legacy")), [1, 3]) +%!assert (size (intersect (a', b, "legacy")), [1, 3]) +%!assert (size (intersect (a, b', "legacy")), [1, 3]) +%!assert (size (intersect (a', b', "legacy")), [3, 1]) + ## Test return type of empty intersections %!assert (intersect (['a', 'b'], {}), {}) %!assert (intersect ([], {'a', 'b'}), {})