changeset 27295:7ec25367bdc5

intersect.m: Add new option "stable" to control output ordering. * NEWS: Announce new option. * intersect.m: Rewrite docstring to include "stable" and "sorted" options and documentation. Process special case of empty matrices and return immediately. Re-indent code. Implement "stable" algorithm. Add BIST tests for behavior.
author Rik <rik@octave.org>
date Sun, 28 Jul 2019 07:54:21 -0700
parents aa4147476138
children 538468f901dd
files NEWS scripts/set/intersect.m
diffstat 2 files changed, 145 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Sat Jul 27 18:28:45 2019 -0700
+++ b/NEWS	Sun Jul 28 07:54:21 2019 -0700
@@ -9,9 +9,9 @@
   behavior can be restored by setting `"editinplace"` to `false` and
   `"home"` to `"~/octave"`.
 
-- The `setdiff', `setxor`, `union`, `unique` functions accept a new
-  sorting option `"stable"` which will return output values in the same
-  order as the input, rather than in ascending order.
+- The `intersect`, `setdiff', `setxor`, `union`, and `unique` functions
+  accept a new sorting option `"stable"` which will return output values
+  in the same order as the input, rather than in ascending order.
 
 #### Graphics backend
 
--- a/scripts/set/intersect.m	Sat Jul 27 18:28:45 2019 -0700
+++ b/scripts/set/intersect.m	Sun Jul 28 07:54:21 2019 -0700
@@ -20,18 +20,25 @@
 ## -*- texinfo -*-
 ## @deftypefn  {} {@var{c} =} intersect (@var{a}, @var{b})
 ## @deftypefnx {} {@var{c} =} intersect (@var{a}, @var{b}, "rows")
+## @deftypefnx {} {@var{c} =} intersect (@dots{}, "sorted")
+## @deftypefnx {} {@var{c} =} intersect (@dots{}, "stable")
 ## @deftypefnx {} {@var{c} =} intersect (@dots{}, "legacy")
 ## @deftypefnx {} {[@var{c}, @var{ia}, @var{ib}] =} intersect (@dots{})
 ##
-## Return the unique elements common to both @var{a} and @var{b} sorted in
-## ascending order.
+## Return the unique elements common to both @var{a} and @var{b}.
 ##
 ## If @var{a} and @var{b} are both row vectors then return a row vector;
 ## Otherwise, return a column vector.  The inputs may also be cell arrays of
 ## strings.
 ##
 ## If the optional input @qcode{"rows"} is given then return the common rows of
-## @var{a} and @var{b}.  The inputs must be 2-D matrices to use this option.
+## @var{a} and @var{b}.  The inputs must be 2-D numeric matrices to use this
+## option.
+##
+## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order
+## in which unique values appear in the output.  The default is
+## @qcode{"sorted"} and values in the output are placed in ascending order.
+## The alternative @qcode{"stable"} preserves the order found in the input.
 ##
 ## If requested, return column index vectors @var{ia} and @var{ib} such that
 ## @code{@var{c} = @var{a}(@var{ia})} and @code{@var{c} = @var{b}(@var{ib})}.
@@ -50,8 +57,8 @@
 
   [a, b] = validsetargs ("intersect", a, b, varargin{:});
 
+  ## Special case of empty matrices
   if (isempty (a) || isempty (b))
-    ## Special case shortcuts algorithm.
     ## Lots of type checking required for Matlab compatibility.
     if (isnumeric (a) && isnumeric (b))
       c = [];
@@ -61,88 +68,94 @@
       c = "";
     endif
     ia = ib = [];
-  else
-    by_rows = any (strcmp ("rows", varargin));
-    optlegacy = any (strcmp ("legacy", varargin));
+    return;
+  endif
+
+  by_rows = any (strcmp ("rows", varargin));
+  optsorted = ! any (strcmp ("stable", varargin));
+  optlegacy = any (strcmp ("legacy", varargin));
 
-    if (optlegacy)
-      isrowvec = ! iscolumn (a) || ! iscolumn (b);
-    else
-      isrowvec = isrow (a) && isrow (b);
-    endif
+  if (optlegacy)
+    isrowvec = ! iscolumn (a) || ! iscolumn (b);
+  else
+    isrowvec = isrow (a) && isrow (b);
+  endif
+
+  ## Form A and B into sets
+  if (nargout > 1 || ! optsorted)
+    [a, ia] = unique (a, varargin{:});
+    ia = ia(:);
+    [b, ib] = unique (b, varargin{:});
+    ib = ib(:);
+  else
+    a = unique (a, varargin{:});
+    b = unique (b, varargin{:});
+  endif
 
-    ## Form A and B into sets
-    if (nargout > 1)
-      [a, ja] = unique (a, varargin{:});
-      ja = ja(:);
-      [b, jb] = unique (b, varargin{:});
-      jb = jb(:);
+  if (by_rows)
+    c = [a; b];
+    if (nargout > 1 || ! optsorted)
+      [c, ic] = sortrows (c);
     else
-      a = unique (a, varargin{:});
-      b = unique (b, varargin{:});
+      c = sortrows (c);
+    endif
+    match = find (all (c(1:end-1,:) == c(2:end,:), 2));
+    if (optsorted)
+      c = c(match, :);
+    else
+      c = [a; b];
+      ## FIXME: Is there a way to avoid a call to sort?
+      c = c(sort (ic(match)), :);
     endif
-
-    if (by_rows)
-      c = [a; b];
-      if (nargout > 1)
-        [c, ic] = sortrows (c);
-      else
-        c = sortrows (c);
-      endif
-      ii = find (all (c(1:end-1,:) == c(2:end,:), 2));
-      c = c(ii,:);
-      len_a = rows (a);
+    len_a = rows (a);
+  else
+    c = [a(:); b(:)];
+    if (nargout > 1 || ! optsorted)
+      [c, ic] = sort (c);
+    else
+      c = sort (c);
+    endif
+    if (iscellstr (c))
+      match = find (strcmp (c(1:end-1), c(2:end)));
+    else
+      match = find (c(1:end-1) == c(2:end));
+    endif
+    len_a = length (a);
+    if (optsorted)
+      c = c(match);
     else
       c = [a(:); b(:)];
-      if (nargout > 1)
-        [c, ic] = sort (c);         # [a(:);b(:)](ic) == c
-      else
-        c = sort (c);
-      endif
-      if (iscellstr (c))
-        ii = find (strcmp (c(1:end-1), c(2:end)));
-      else
-        ii = find (c(1:end-1) == c(2:end));
-      endif
-      c = c(ii);
-      len_a = length (a);
+      ## FIXME: Is there a way to avoid a call to sort?
+      c = c(sort (ic(match)));
     endif
+  endif
+
+  ## Adjust output orientation for Matlab compatibility
+  if (isrowvec)
+    c = c.';
+  endif
 
-    ## Adjust output orientation for Matlab compatibility
-    if (isrowvec)
-      c = c.';
+  if (nargout > 1)
+    ia = ia(ic(match));            # a(ia) == c
+    ib = ib(ic(match+1) - len_a);  # b(ib) == c
+    if (! optsorted)
+      ## FIXME: Is there a way to avoid a call to sort?
+      ia = sort (ia);
+      [~, idx] = min (ib);
+      ib = [ib(idx:end); ib(1:idx-1)];
     endif
-
-    if (nargout > 1)
-      ia = ja(ic(ii));            # a(ia) == c
-      ib = jb(ic(ii+1) - len_a);  # b(ib) == c
-      if (optlegacy && isrowvec)
-        ia = ia.';
-        ib = ib.';
-      endif
+    if (optlegacy && isrowvec)
+      ia = ia.';
+      ib = ib.';
     endif
-
   endif
 
 endfunction
 
 
-## Test orientation of output
-%!shared a,b
-%! a = 1:4;
-%! b = 2:5;
-
-%!assert (size (intersect (a, b)), [1, 3])
-%!assert (size (intersect (a', b)), [3, 1])
-%!assert (size (intersect (a, b')), [3, 1])
-%!assert (size (intersect (a', b')), [3, 1])
-%!assert (size (intersect (a, b, "legacy")), [1, 3])
-%!assert (size (intersect (a', b, "legacy")), [1, 3])
-%!assert (size (intersect (a, b', "legacy")), [1, 3])
-%!assert (size (intersect (a', b', "legacy")), [3, 1])
-
-## Clear shared variables
-%!shared
+%!assert (intersect ([1 2 3 4], [9 8 4 2]), [2, 4])
+%!assert (intersect ([1 2; 2 3; 4 5], [2 3; 3 4; 5 6], "rows"), [2 3])
+%!assert (intersect ([1 NaN], [NaN NaN 5]), zeros (1,0))
 
 ## Test multi-dimensional arrays
 %!test
@@ -155,12 +168,20 @@
 %!test
 %! a = [3 2 4 5 7 6 5 1 0 13 13];
 %! b = [3 5 12 1 1 7];
-%! [c,ia,ib] = intersect (a, b);
+%! [c, ia, ib] = intersect (a, b);
 %! assert (c, [1, 3, 5, 7]);
 %! assert (ia, [8; 1; 4; 5]);
 %! assert (ib, [4; 1; 2; 6]);
 %! assert (a(ia), c);
 %! assert (b(ib), c);
+
+%!test
+%! a = [1 1 1 2 2 2];
+%! b = [1 2 3 4 5 6];
+%! c = intersect (a, b);
+%! assert(c, [1,2]);
+
+## Test "rows" argument
 %!test
 %! a = [1,1,2;1,4,5;2,1,7];
 %! b = [1,4,5;2,3,4;1,1,2;9,8,7];
@@ -170,11 +191,7 @@
 %! assert (ib, [3;1]);
 %! assert (a(ia,:), c);
 %! assert (b(ib,:), c);
-%!test
-%! a = [1 1 1 2 2 2];
-%! b = [1 2 3 4 5 6];
-%! c = intersect (a, b);
-%! assert(c, [1,2]);
+
 %!test
 %! a = [1 2 3 4; 5 6 7 8; 9 10 11 12];
 %! [b, ia, ib] = intersect (a, a, "rows");
@@ -182,6 +199,40 @@
 %! assert (ia, [1:3]');
 %! assert (ib, [1:3]');
 
+## Test "stable" argument
+%!test
+%! a = [3 2 4 5 7 6 5 1 0 13 13];
+%! b = [3 5 12 1 1 7];
+%! [c, ia, ib] = intersect (a, b, "stable");
+%! assert (c, [3, 5, 7, 1]);
+%! assert (ia, [1; 4; 5; 8]);
+%! assert (ib, [1; 2; 6; 4]);
+%! assert (a(ia), c);
+%! assert (b(ib), c);
+
+%!test
+%! a = [2 2 2 1 1 1];
+%! b = [1 2 3 4 5 6];
+%! c = intersect (a, b, "stable");
+%! assert(c, [2,1]);
+
+%!test
+%! a = [1,4,5;1,1,2;2,1,7];
+%! b = [1,4,5;2,3,4;1,1,2;9,8,7];
+%! [c, ia, ib] = intersect (a, b, "rows", "stable");
+%! assert (c, [1,4,5; 1,1,2]);
+%! assert (ia, [1;2]);
+%! assert (ib, [1;3]);
+%! assert (a(ia,:), c);
+%! assert (b(ib,:), c);
+
+%!test
+%! a = [1 2 3 4; 5 6 7 8; 9 10 11 12];
+%! [b, ia, ib] = intersect (a, a, "rows", "stable");
+%! assert (b, a);
+%! assert (ia, [1:3]');
+%! assert (ib, [1:3]');
+
 ## Test "legacy" argument
 %!test
 %! a = [7 1 7 7 4]; 
@@ -195,6 +246,20 @@
 %! assert (ia, [5, 4]);
 %! assert (ib, [4, 1]);
 
+## Test orientation of output
+%!shared a,b
+%! a = 1:4;
+%! b = 2:5;
+
+%!assert (size (intersect (a, b)), [1, 3])
+%!assert (size (intersect (a', b)), [3, 1])
+%!assert (size (intersect (a, b')), [3, 1])
+%!assert (size (intersect (a', b')), [3, 1])
+%!assert (size (intersect (a, b, "legacy")), [1, 3])
+%!assert (size (intersect (a', b, "legacy")), [1, 3])
+%!assert (size (intersect (a, b', "legacy")), [1, 3])
+%!assert (size (intersect (a', b', "legacy")), [3, 1])
+
 ## Test return type of empty intersections
 %!assert (intersect (['a', 'b'], {}), {})
 %!assert (intersect ([], {'a', 'b'}), {})