changeset 27291:c4f9a0f097a3

unique.m: Add new option "stable" to control output ordering. * NEWS: Announce new option. * unique.m: Rewrite docstring to include "stable" and "sorted" options and documentation. Add two examples to docstring for clarification. Implement "stable" algorithm. Add BIST tests for behavior and input validation.
author Rik <rik@octave.org>
date Thu, 25 Jul 2019 09:07:14 -0700
parents 9b053ba971b2
children e2ba5f061806
files NEWS scripts/set/unique.m
diffstat 2 files changed, 141 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Wed Jul 24 12:37:21 2019 -0400
+++ b/NEWS	Thu Jul 25 09:07:14 2019 -0700
@@ -9,6 +9,10 @@
   behavior can be restored by setting `"editinplace"` to `false` and
   `"home"` to `"~/octave"`.
 
+- The `unique` function accepts a new sorting option `"stable"` which
+  will return output values in the same order as the input, rather
+  than in ascending order.
+
 #### Graphics backend
 
 - Graphic primitives now accept a color property value of `"none"`
@@ -58,10 +62,10 @@
   from releases prior to R2012b, can be obtained by using the `"legacy"`
   flag.
 
-- The functions `intersect`, `setxor`, `union`, now accept a `"legacy"`
-  flag which changes the index values (second and third outputs) as well
-  as the orientation of all outputs to match Matlab releases prior to
-  R2012b.
+- The functions `intersect`, `setxor`, and `union` now accept a
+  `"legacy"` flag which changes the index values (second and third
+  outputs) as well as the orientation of all outputs to match Matlab
+  releases prior to R2012b.
 
 - Complex RESTful web services can now be accessed by the `webread` and
   `webwrite` functions alongside with the `weboptions` structure.  One
--- a/scripts/set/unique.m	Wed Jul 24 12:37:21 2019 -0400
+++ b/scripts/set/unique.m	Thu Jul 25 09:07:14 2019 -0700
@@ -20,19 +20,26 @@
 ## -*- texinfo -*-
 ## @deftypefn  {} {} unique (@var{x})
 ## @deftypefnx {} {} unique (@var{x}, "rows")
+## @deftypefnx {} {} unique (@dots{}, "sorted")
+## @deftypefnx {} {} unique (@dots{}, "stable")
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{})
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "first")
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "last")
 ## @deftypefnx {} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "legacy")
-## Return the unique elements of @var{x} sorted in ascending order.
+## Return the unique elements of @var{x}.
 ##
 ## If the input @var{x} is a column vector then return a column vector;
 ## Otherwise, return a row vector.  @var{x} may also be a cell array of
 ## strings.
 ##
 ## If the optional argument @qcode{"rows"} is given then return the unique
-## rows of @var{x} sorted in ascending order.  The input must be a 2-D matrix
-## to use this option.
+## rows of @var{x}.  The input must be a 2-D numeric matrix to use this option.
+##
+## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order
+## in which unique values appear in the output.  The default is
+## @qcode{"sorted"} and values in the output are placed in ascending order.
+## The alternative @qcode{"stable"} preserves the order found in the input
+## @var{x}.
 ##
 ## If requested, return column index vectors @var{i} and @var{j} such that
 ## @code{@var{y} = @var{x}(@var{i})} and @code{@var{x} = @var{y}(@var{j})}.
@@ -43,12 +50,37 @@
 ## @qcode{"first"} is specified, return the lowest.  The default is
 ## @qcode{"first"}.
 ##
-## Programming Note: The input flag @qcode{"legacy"} changes the algorithm
+## Example 1 : sort order
+##
+## @example
+## @group
+## unique ([3, 1, 1, 2])
+## @result{} [1, 2, 3] 
+## unique ([3, 1, 1, 2], "stable")
+## @result{} [3, 1, 2] 
+## @end group
+## @end example
+##
+## Example 2 : index selection
+##
+## @example
+## @group
+## [~, @var{i}] = unique ([3, 1, 1, 2], "first")
+## @result{} @var{i} = [2; 4; 1] 
+## [~, @var{i}] = unique ([3, 1, 1, 2], "last")
+## @result{} @var{i} = [3; 4; 1] 
+## @end group
+## @end example
+##
+## Programming Notes: The input flag @qcode{"legacy"} changes the algorithm
 ## to be compatible with @sc{matlab} releases prior to R2012b.  Specifically,
 ## The index ordering flag is changed to @qcode{"last"}, and the shape of the
 ## outputs @var{i}, @var{j} will follow the shape of the input @var{x} rather
 ## than always being column vectors.
 ##
+## The third output, @var{j}, has not been implemented yet when the sort
+## order is @qcode{"stable"}.
+##
 ## @seealso{union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 
@@ -69,17 +101,29 @@
     optrows   = any (strcmp ("rows", varargin));
     optfirst  = any (strcmp ("first", varargin));
     optlast   = any (strcmp ("last", varargin));
+    optsorted = any (strcmp ("sorted", varargin));
+    optstable = any (strcmp ("stable", varargin));
     optlegacy = any (strcmp ("legacy", varargin));
     if (optfirst && optlast)
       error ('unique: cannot specify both "first" and "last"');
-    elseif (optfirst + optlast + optrows + optlegacy != nargin-1)
+    elseif (optsorted && optstable)
+      error ('unique: cannot specify both "sorted" and "stable"');
+    elseif ((optfirst || optlast) && (optsorted || optstable))
+      error ('unique: cannot specify "first"/"last" with "sorted"/"stable"');
+    elseif (optlegacy && (optsorted || optstable))
+      error ('unique: cannot specify "sorted" or "stable" with "legacy"');
+    elseif (optrows + optfirst + optlast + optsorted + optstable + optlegacy
+            != nargin-1)
       error ("unique: invalid option");
     endif
 
-    ## Set default to "first" if not set earlier.
+    ## Set defaults if not set earlier.
     if (! optfirst && ! optlast)
       optfirst = true;
     endif
+    if (! optsorted && ! optstable)
+      optsorted = true;
+    endif
 
     if (optrows && iscellstr (x))
       warning ('unique: "rows" is ignored for cell arrays');
@@ -88,6 +132,7 @@
   else
     optrows = false;
     optfirst = true;
+    optsorted = true;
     optlegacy = false;
   endif
 
@@ -99,7 +144,6 @@
   ## are very slow on sparse matrices.  Until they are fixed to be as
   ## fast as for full matrices, operate on the nonzero elements of the
   ## sparse array as long as we are not operating on rows.
-
   if (issparse (x) && ! optrows && nargout <= 1)
     if (nnz (x) < numel (x))
       y = unique ([0; nonzeros(x)], varargin{:});
@@ -118,37 +162,46 @@
     isrowvec = isrow (x);
   endif
 
-  y = x;
   ## Special cases 0 and 1
   if (n == 0)
-    if (! optrows && isempty (x) && any (size (x)))
-      if (iscellstr (y))
+    y = x;
+    if (! optrows && any (size (x)))
+      if (iscellstr (x))
         y = cell (0, 1);
       else
-        y = zeros (0, 1, class (y));
+        y = zeros (0, 1, class (x));
       endif
     endif
     i = j = [];
     return;
   elseif (n == 1)
+    y = x;
     i = j = 1;
     return;
   endif
 
+  ## Calculate y output
   if (optrows)
-    if (nargout > 1)
-      [y, i] = sortrows (y);
+    if (nargout > 1 || ! optsorted)
+      [y, i] = sortrows (x);
       i = i(:);
     else
-      y = sortrows (y);
+      y = sortrows (x);
     endif
     match = all (y(1:n-1,:) == y(2:n,:), 2);
-    y(match,:) = [];
+    if (optsorted)
+      y(match,:) = [];
+    else
+      y = x;
+      y(i([false; match]), :) = [];
+    endif
   else
-    if (! isvector (y))
-      y = y(:);
+    if (isvector (x))
+      y = x;
+    else
+      y = x(:);
     endif
-    if (nargout > 1)
+    if (nargout > 1 || ! optsorted)
       [y, i] = sort (y);
       i = i(:);
     else
@@ -159,23 +212,46 @@
     else
       match = (y(1:n-1) == y(2:n));
     endif
-    y(match) = [];
+    if (optsorted)
+      y(match) = [];
+    else
+      if (isvector (x))
+        y = x;
+      else
+        y = x(:);
+      endif
+      y(i([false; match(:)])) = [];
+    endif
   endif
 
+  ## Calculate j output (3rd output)
   if (isargout (3))
-    j = i;
+    j = i;  # cheap way to copy dimensions
     j(i) = cumsum ([1; ! match(:)]);
+    if (! optsorted)
+      warning ("unique: third output J is not yet implemented");
+      j = [];
+    endif
+
     if (optlegacy && isrowvec)
       j = j.';
     endif
   endif
 
+  ## Calculate i output (2nd output)
   if (isargout (2))
-    idx = find (match);
-    if (! optlegacy && optfirst)
-      idx += 1;   # in-place is faster than other forms of increment
+    if (optsorted)
+      idx = find (match);
+      if (! optlegacy && optfirst)
+        idx += 1;   # in-place is faster than other forms of increment
+      endif
+      i(idx) = [];
+    else
+      i([false; match(:)]) = [];
+      ## FIXME: Is there a way to avoid a call to sort?
+      i = sort (i);
     endif
-    i(idx) = [];
+
     if (optlegacy && isrowvec)
       i = i.';
     endif
@@ -191,7 +267,9 @@
 %!assert (unique ([1 2]), [1 2])
 %!assert (unique ([1;2]), [1;2])
 %!assert (unique ([1,NaN,Inf,NaN,Inf]), [1,Inf,NaN,NaN])
+%!assert (unique ([1,NaN,Inf,NaN,Inf], "stable"), [1,NaN,Inf,NaN])
 %!assert (unique ({"Foo","Bar","Foo"}), {"Bar","Foo"})
+%!assert (unique ({"Foo","Bar","Foo"}, "stable"), {"Foo", "Bar"})
 %!assert (unique ({"Foo","Bar","FooBar"}'), {"Bar","Foo","FooBar"}')
 %!assert (unique (zeros (1,0)), zeros (0,1))
 %!assert (unique (zeros (1,0), "rows"), zeros (1,0))
@@ -217,6 +295,12 @@
 %! assert (j, [1;1;2;3;3;3;4]);
 
 %!test
+%! [y,i,~] = unique ([4,4,2,2,2,3,1], "stable");
+%! assert (y, [4,2,3,1]);
+%! assert (i, [1;3;6;7]);
+%! ##assert (j, []);
+
+%!test
 %! [y,i,j] = unique ([1,1,2,3,3,3,4]', "last");
 %! assert (y, [1,2,3,4]');
 %! assert (i, [2;3;6;7]);
@@ -229,6 +313,11 @@
 %! assert (j, [1;1;1]);
 
 %!test
+%! [y,i,~] = unique ({"B"; "A"; "B"}, "stable");
+%! assert (y, {"B"; "A"});
+%! assert (i, [1; 2]);
+
+%!test
 %! A = [1,2,3; 1,2,3];
 %! [y,i,j] = unique (A, "rows");
 %! assert (y, [1,2,3]);
@@ -236,6 +325,13 @@
 %! assert (y(j,:), A);
 
 %!test
+%! A = [4,5,6; 1,2,3; 4,5,6];
+%! [y,i,~] = unique (A, "rows", "stable");
+%! assert (y, [4,5,6; 1,2,3]);
+%! assert (A(i,:), y);
+%! ##assert (y(j,:), A);
+
+%!test
 %! [y,i,j] = unique ([1,1,2,3,3,3,4], "legacy");
 %! assert (y, [1,2,3,4]);
 %! assert (i, [2,3,6,7]);
@@ -253,9 +349,22 @@
 %!error <X must be an array or cell array of strings> unique ({1})
 %!error <options must be strings> unique (1, 2)
 %!error <cannot specify both "first" and "last"> unique (1, "first", "last")
+%!error <cannot specify both "sorted" and "stable">
+%! unique (1, "sorted", "stable");
+%!error <cannot specify "first"/"last" with "sorted"/"stable">
+%! unique (1, "first", "sorted");
+%!error <cannot specify "first"/"last" with "sorted"/"stable">
+%! unique (1, "last", "stable");
+%!error <cannot specify "sorted" or "stable" with "legacy">
+%! unique (1, "sorted", "legacy");
+%!error <cannot specify "sorted" or "stable" with "legacy">
+%! unique (1, "stable", "legacy");
 %!error <invalid option> unique (1, "middle")
 %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption")
 %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption1", "UnknownOption2")
 %!error <invalid option> unique ({"a", "b", "c"}, "rows", "UnknownOption2")
 %!error <invalid option> unique ({"a", "b", "c"}, "UnknownOption1", "last")
 %!warning <"rows" is ignored for cell arrays> unique ({"1"}, "rows");
+%!warning <third output J is not yet implemented>
+%! [y,i,j] = unique ([2,1], "stable");
+%! assert (j, []);