changeset 27226:6eb32f0aea87

Implement "legacy" flag for intersect function (bug #56592). * NEWS: Announce change to default intersect behavior. * intersect.m: Update documentation. Parse input options looking for "legacy" and set variable optlegacy if found. Record shape of input in variable isrowvec. When processing outputs 2,3 look for combination of optlegacy and isrowvec to decide if output should be transposed. Update BIST tests to pass with new behavior and add aditional test for "legacy" option. * union.m, unique.m: Update BIST tests to pass.
author Rik <rik@octave.org>
date Wed, 10 Jul 2019 13:48:01 -0700
parents 57bb8f531707
children c90648dde5cf
files NEWS scripts/set/intersect.m scripts/set/union.m scripts/set/unique.m
diffstat 4 files changed, 54 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Tue Jul 09 17:50:56 2019 -0700
+++ b/NEWS	Wed Jul 10 13:48:01 2019 -0700
@@ -47,6 +47,12 @@
 
 ### Matlab compatibility
 
+- The function `unique` now returns column index vectors for the second
+  and third outputs.  When duplicate values are present, the default
+  index to return is now the `"first"` occurrence.  The previous Octave
+  behavior, or Matlab behavior from releases prior to R2012b, can be
+  obtained by using the `"legacy"` flag.
+
 - Complex RESTful web services can now be accessed by the `webread` and
   `webwrite` functions alongside with the `weboptions` structure.  One
   major feature is the support for cookies to enable RESTful
--- a/scripts/set/intersect.m	Tue Jul 09 17:50:56 2019 -0700
+++ b/scripts/set/intersect.m	Wed Jul 10 13:48:01 2019 -0700
@@ -124,9 +124,9 @@
 %! a = [3 2 4 5 7 6 5 1 0 13 13];
 %! b = [3 5 12 1 1 7];
 %! [c,ia,ib] = intersect (a, b);
-%! assert (c, [1 3 5 7]);
-%! assert (ia, [8 1 7 5]);
-%! assert (ib, [5 1 2 6]);
+%! assert (c, [1, 3, 5, 7]);
+%! assert (ia, [8; 1; 4; 5]);
+%! assert (ib, [4; 1; 2; 6]);
 %! assert (a(ia), c);
 %! assert (b(ib), c);
 %!test
--- a/scripts/set/union.m	Tue Jul 09 17:50:56 2019 -0700
+++ b/scripts/set/union.m	Wed Jul 10 13:48:01 2019 -0700
@@ -100,7 +100,7 @@
 %! assert (y, [1; 2; 3; 4; 5]);
 %! assert (y, sort ([a(ia)'; b(ib)']));
 
-%!assert (nthargout (2:3, @union, [1, 2, 4], [2, 3, 5]), {[1, 3], [1, 2, 3]})
+%!assert (nthargout (2:3, @union, [1, 2, 4], [2, 3, 5]), {[1; 2; 3], [2; 3]})
 %!assert (nthargout (2:3, @union, [1 2; 2 3; 4 5], [2 3; 3 4; 5 6], "rows"),
 %!        {[1; 3], [1; 2; 3]})
 
--- a/scripts/set/unique.m	Tue Jul 09 17:50:56 2019 -0700
+++ b/scripts/set/unique.m	Wed Jul 10 13:48:01 2019 -0700
@@ -23,6 +23,7 @@
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{})
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "first")
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "last")
+## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "legacy")
 ## Return the unique elements of @var{x} sorted in ascending order.
 ##
 ## If the input @var{x} is a column vector then return a column vector;
@@ -33,14 +34,19 @@
 ## rows of @var{x} sorted in ascending order.  The input must be a 2-D matrix
 ## to use this option.
 ##
-## If requested, return index vectors @var{i} and @var{j} such that
+## If requested, return column index vectors @var{i} and @var{j} such that
 ## @code{@var{y} = @var{x}(@var{i})} and @code{@var{x} = @var{y}(@var{j})}.
 ##
-## Additionally, if @var{i} is a requested output then one of
-## @qcode{"first"} or @qcode{"last"} may be given as an input.  If
-## @qcode{"last"} is specified, return the highest possible indices in
-## @var{i}, otherwise, if @qcode{"first"} is specified, return the lowest.
-## The default is @qcode{"last"}.
+## Additionally, if @var{i} is a requested output then one of the flags
+## @qcode{"first"} or @qcode{"last"} may be given.  If @qcode{"last"} is
+## specified, return the highest possible indices in @var{i}, otherwise, if
+## @qcode{"first"} is specified, return the lowest.  The default is
+## @qcode{"first"}.
+##
+## Programming Note: The input flag @qcode{"legacy"} changes the default
+## to @qcode{"last"}, and the shape of the outputs @var{i}, @var{j} will follow
+## the shape of the input @var{x} rather than always being column vectors.
+##
 ## @seealso{union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 
@@ -58,12 +64,13 @@
       error ("unique: options must be strings");
     endif
 
-    optrows  = any (strcmp ("rows", varargin));
-    optfirst = any (strcmp ("first", varargin));
-    optlast  = any (strcmp ("last", varargin));
+    optrows   = any (strcmp ("rows", varargin));
+    optfirst  = any (strcmp ("first", varargin));
+    optlast   = any (strcmp ("last", varargin));
+    optlegacy = any (strcmp ("legacy", varargin));
     if (optfirst && optlast)
       error ('unique: cannot specify both "first" and "last"');
-    elseif (optfirst + optlast + optrows != nargin-1)
+    elseif (optfirst + optlast + optrows + optlegacy != nargin-1)
       error ("unique: invalid option");
     endif
 
@@ -73,7 +80,8 @@
     endif
   else
     optrows = false;
-    optfirst = false;
+    optfirst = true;
+    optlegacy = false;
   endif
 
   ## FIXME: The operations
@@ -97,10 +105,9 @@
 
   if (optrows)
     n = rows (x);
-    dim = 1;
   else
     n = numel (x);
-    dim = (rows (x) == 1) + 1;
+    isrowvec = isrow (x);
   endif
 
   y = x;
@@ -123,6 +130,7 @@
   if (optrows)
     if (nargout > 1)
       [y, i] = sortrows (y);
+      i = i(:);
     else
       y = sortrows (y);
     endif
@@ -134,6 +142,7 @@
     endif
     if (nargout > 1)
       [y, i] = sort (y);
+      i = i(:);
     else
       y = sort (y);
     endif
@@ -147,10 +156,9 @@
 
   if (isargout (3))
     j = i;
-    if (dim == 1)
-      j(i) = cumsum ([1; ! match]);
-    else
-      j(i) = cumsum ([1, ! match]);
+    j(i) = cumsum ([1; ! match(:)]);
+    if (optlegacy && isrowvec)
+      j = j.';
     endif
   endif
 
@@ -160,6 +168,9 @@
       idx += 1;   # in-place is faster than other forms of increment
     endif
     i(idx) = [];
+    if (optlegacy && isrowvec)
+      i = i.';
+    endif
   endif
 
 endfunction
@@ -194,28 +205,34 @@
 %!test
 %! [a,i,j] = unique ([1,1,2,3,3,3,4]);
 %! assert (a, [1,2,3,4]);
-%! assert (i, [2,3,6,7]);
-%! assert (j, [1,1,2,3,3,3,4]);
-%!
+%! assert (i, [1;3;4;7]);
+%! assert (j, [1;1;2;3;3;3;4]);
+
 %!test
-%! [a,i,j] = unique ([1,1,2,3,3,3,4]', "first");
+%! [a,i,j] = unique ([1,1,2,3,3,3,4]', "last");
 %! assert (a, [1,2,3,4]');
-%! assert (i, [1,3,4,7]');
-%! assert (j, [1,1,2,3,3,3,4]');
-%!
+%! assert (i, [2;3;6;7]);
+%! assert (j, [1;1;2;3;3;3;4]);
+
 %!test
 %! [a,i,j] = unique ({"z"; "z"; "z"});
 %! assert (a, {"z"});
-%! assert (i, [3]');
+%! assert (i, [1]);
 %! assert (j, [1;1;1]);
-%!
+
 %!test
-%! A = [1,2,3;1,2,3];
+%! A = [1,2,3; 1,2,3];
 %! [a,i,j] = unique (A, "rows");
 %! assert (a, [1,2,3]);
 %! assert (A(i,:), a);
 %! assert (a(j,:), A);
 
+%!test
+%! [a,i,j] = unique ([1,1,2,3,3,3,4], "legacy");
+%! assert (a, [1,2,3,4]);
+%! assert (i, [2,3,6,7]);
+%! assert (j, [1,1,2,3,3,3,4]);
+
 ## Test input validation
 %!error unique ()
 %!error <X must be an array or cell array of strings> unique ({1})