changeset 27292:e2ba5f061806

union: Add "stable" sorting algorithm. * NEWS: Announce addition. * union.m: Add new calling forms to docstring. Add documentation about "stable"/"sorted" options to control output ordering. Add new BIST tests and rewrite others to be less than 80 characters long. * validsetargs.m: Add input validation to ensure that only one of the options "stable", "sorted", or "legacy" is given.
author Rik <rik@octave.org>
date Fri, 26 Jul 2019 19:33:54 -0700
parents c4f9a0f097a3
children 6525b3fe3cf9
files NEWS scripts/set/private/validsetargs.m scripts/set/union.m
diffstat 3 files changed, 79 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Thu Jul 25 09:07:14 2019 -0700
+++ b/NEWS	Fri Jul 26 19:33:54 2019 -0700
@@ -9,8 +9,8 @@
   behavior can be restored by setting `"editinplace"` to `false` and
   `"home"` to `"~/octave"`.
 
-- The `unique` function accepts a new sorting option `"stable"` which
-  will return output values in the same order as the input, rather
+- The `unique`, `union` functions accept a new sorting option `"stable"`
+  which will return output values in the same order as the input, rather
   than in ascending order.
 
 #### Graphics backend
--- a/scripts/set/private/validsetargs.m	Thu Jul 25 09:07:14 2019 -0700
+++ b/scripts/set/private/validsetargs.m	Fri Jul 26 19:33:54 2019 -0700
@@ -42,10 +42,14 @@
       error ("%s: A and B must be arrays or cell arrays of strings", caller);
     endif
   else
+    optlegacy = false;
+    optsorted = false;
+    optstable = false;
+
     for arg = varargin
       switch (arg{1})
         case "legacy"
-          ## Accepted option, do nothing.
+          optlegacy = true;
 
         case "rows"
           if (iscell (x) || iscell (y))
@@ -60,11 +64,23 @@
             endif
           endif
 
+        case "sorted"
+          optsorted = true;
+
+        case "stable"
+          optstable = true;
+
         otherwise
           error ("%s: invalid option: %s", caller, arg{1});
 
       endswitch
     endfor
+
+    if (optsorted + optstable + optlegacy > 1)
+      error ('%s: only one of "sorted", "stable", or "legacy" may be specified',
+             caller);
+    endif
+
   endif
 
 endfunction
--- a/scripts/set/union.m	Thu Jul 25 09:07:14 2019 -0700
+++ b/scripts/set/union.m	Fri Jul 26 19:33:54 2019 -0700
@@ -20,19 +20,25 @@
 ## -*- texinfo -*-
 ## @deftypefn  {} {@var{c} =} union (@var{a}, @var{b})
 ## @deftypefnx {} {@var{c} =} union (@var{a}, @var{b}, "rows")
+## @deftypefnx {} {@var{c} =} union (@dots{}, "sorted")
+## @deftypefnx {} {@var{c} =} union (@dots{}, "stable")
 ## @deftypefnx {} {@var{c} =} union (@dots{}, "legacy")
 ## @deftypefnx {} {[@var{c}, @var{ia}, @var{ib}] =} union (@dots{})
 ##
-## Return the unique elements that are in either @var{a} or @var{b} sorted in
-## ascending order.
+## Return the unique elements that are in either @var{a} or @var{b}.
 ##
 ## If @var{a} and @var{b} are both row vectors then return a row vector;
 ## Otherwise, return a column vector.  The inputs may also be cell arrays of
 ## strings.
 ##
 ## If the optional input @qcode{"rows"} is given then return rows that are in
-## either @var{a} or @var{b}.  The inputs must be 2-D matrices to use this
-## option.
+## either @var{a} or @var{b}.  The inputs must be 2-D numeric matrices to use
+## this option.
+##
+## The optional argument @qcode{"sorted"}/@qcode{"stable"} controls the order
+## in which unique values appear in the output.  The default is
+## @qcode{"sorted"} and values in the output are placed in ascending order.
+## The alternative @qcode{"stable"} preserves the order found in the input.
 ##
 ## The optional outputs @var{ia} and @var{ib} are column index vectors such
 ## that @code{@var{a}(@var{ia})} and @code{@var{b}(@var{ib})} are disjoint sets
@@ -105,14 +111,44 @@
 
 %!test
 %! a = [3, 1, 4, 1, 5];
-%! b = [1, 2, 3, 4];
-%! [y, ia, ib] = union (a, b.');
+%! b = [1; 2; 3; 4];
+%! [y, ia, ib] = union (a, b);
 %! assert (y, [1; 2; 3; 4; 5]);
 %! assert (y, sort ([a(ia)'; b(ib)']));
 
-%!assert (nthargout (2:3, @union, [1, 2, 4], [2, 3, 5]), {[1; 2; 3], [2; 3]})
-%!assert (nthargout (2:3, @union, [1 2; 2 3; 4 5], [2 3; 3 4; 5 6], "rows"),
-%!        {[1; 2; 3], [2; 3]})
+## Test "stable" sorting order
+%!assert (union ([1, 2, 4], [2, 3, 5], "stable"), [1, 2, 4, 3, 5])
+%!assert (union ([1, 2, 4]', [2, 3, 5], "stable"), [1; 2; 4; 3; 5])
+%!assert (union ([1, 2, 4], [2, 3, 5]', "stable"), [1; 2; 4; 3; 5])
+
+%!test
+%! a = [3, 1, 4, 1, 5];
+%! b = [1; 2; 3; 4];
+%! [y, ia, ib] = union (a, b, "stable");
+%! assert (y, [3; 1; 4; 5; 2]);
+%! assert (ia, [1; 2; 3; 5]);
+%! assert (ib, [2]);
+
+## Test indexing outputs
+%!test
+%! a = [1, 4, 2];
+%! b = [2, 3, 5];
+%! [~, ia, ib] = union (a, b);
+%! assert (ia, [1; 3; 2]);
+%! assert (ib, [2; 3]);
+%! [~, ia, ib] = union (a, b, "stable");
+%! assert (ia, [1; 2; 3]);
+%! assert (ib, [2; 3]);
+
+%!test
+%! a = [1 2; 4 5; 2 3];
+%! b = [2 3; 3 4; 5 6];
+%! [~, ia, ib] = union (a, b, "rows");
+%! assert (ia, [1; 3; 2]);
+%! assert ([2; 3]);
+%! [~, ia, ib] = union (a, b, "rows", "stable");
+%! assert (ia, [1; 2; 3]);
+%! assert ([2; 3]);
 
 ## Test "legacy" option
 %!test
@@ -138,17 +174,25 @@
 ## Test common input validation for set routines contained in validsetargs
 %!error <cell array of strings cannot be combined> union ({"a"}, 1)
 %!error <A and B must be arrays or cell arrays> union (@sin, 1)
-%!error <invalid option: columns> union (1, 2, "columns")
 %!error <cells not supported with "rows"> union ({"a"}, {"b"}, "rows")
+%!error <cells not supported with "rows"> union ({"a"}, {"b"}, "rows","legacy")
 %!error <A and B must be arrays or cell arrays> union (@sin, 1, "rows")
+%!error <A and B must be arrays or cell arrays> union (@sin,1,"rows","legacy")
 %!error <A and B must be 2-dimensional matrices> union (rand(2,2,2), 1, "rows")
 %!error <A and B must be 2-dimensional matrices> union (1, rand(2,2,2), "rows")
+%!error <A and B must be 2-dimensional matrices>
+%! union (rand(2,2,2), 1, "rows", "legacy");
+%!error <A and B must be 2-dimensional matrices>
+%! union (1, rand(2,2,2), "rows", "legacy");
 %!error <number of columns in A and B must match> union ([1 2], 1, "rows")
 %!error <number of columns in A and B must match> union (1, [1 2], "rows")
+%!error <number of columns in A and B must match>
+%! union ([1 2], 1, "rows", "legacy");
+%!error <number of columns in A and B must match>
+%! union (1, [1 2], "rows", "legacy");
+%!error <invalid option: columns> union (1, 2, "columns")
 %!error <invalid option: columns> union (1, 2, "legacy", "columns")
-%!error <cells not supported with "rows"> union ({"a"}, {"b"}, "rows", "legacy")
-%!error <A and B must be arrays or cell arrays> union (@sin, 1, "rows", "legacy")
-%!error <A and B must be 2-dimensional matrices> union (rand(2,2,2), 1, "rows", "legacy")
-%!error <A and B must be 2-dimensional matrices> union (1, rand(2,2,2), "rows", "legacy")
-%!error <number of columns in A and B must match> union ([1 2], 1, "rows", "legacy")
-%!error <number of columns in A and B must match> union (1, [1 2], "rows", "legacy")
+%!error <only one of "sorted", "stable", or "legacy" may be specified>
+%! union (1, 2, "sorted", "stable");
+%!error <only one of "sorted", "stable", or "legacy" may be specified>
+%! union (1, 2, "sorted", "legacy");