# HG changeset patch # User Rik # Date 1407775185 25200 # Node ID d00f6b09258fa5632ff45aa345bf6594d9f232a8 # Parent 391e080ae81061e48d29edeff9b2dff170101d9b Overhaul functions in scripts/set directory. * set.txi: Rewrite documentation for set functions. * intersect.m: Rewrite docstring. Use by_rows variable for code clarity. Return output orientation which is compatible with Matlab. Add %!tests for output orientation and N-dimensional inputs. * setdiff.m: Rewrite docstring. Use by_rows variable for code clarity. Rename output i to ia to clarify it is an index into the set a. Return output orientation which is compatible with Matlab. Add %!tests for N-dimensional inputs. * setxor.m: Rewrite docstring. Use by_rows variable for code clarity. Return output orientation which is compatible with Matlab. Add %!tests for output orientation and N-dimensional inputs. * union.m: Rewrite docstring. Use by_rows variable for code clarity. Return output orientation which is compatible with Matlab. Add %!tests for output orientation and N-dimensional inputs. Add %!tests for validsetargs which are common to all set functions. * unique.m: Rewrite docstring. Verify that input is numeric or cell array of strings. Avoid computing idx for optional i,j outputs unless required. Add %!error tests for input validation. * ismember.m: Rewrite docstring. Use input variable 'a' instead of 'A' for conformance with rest of set functions. Rename output index variable to s_idx for clarity that it is an index into the set s. * powerset.m: Rewrite doctring. Add input validation on nargin. Add %!error input validation tests. * module.mk: Include validsetargs.m in build system. * validsetargs.m: Function renamed from validargs which was too general. * validargs.m: Function renamed to validsetargs. diff -r 391e080ae810 -r d00f6b09258f doc/interpreter/set.txi --- a/doc/interpreter/set.txi Sun Aug 10 08:18:18 2014 -0700 +++ b/doc/interpreter/set.txi Mon Aug 11 09:39:45 2014 -0700 @@ -19,9 +19,12 @@ @node Sets @chapter Sets -Octave has a limited number of functions for managing sets of data, where a -set is defined as a collection of unique elements. In Octave a set is -represented as a vector of numbers. +Octave has a number of functions for managing sets of data. A set is defined +as a collection of unique elements and is typically represented by a vector of +numbers sorted in ascending order. Any vector or matrix can be converted to a +set by removing duplicates through the use of the @code{unique} function. +However, it isn't necessary to explicitly create a set as all of the functions +which operate on sets will convert their input to a set before proceeding. @DOCSTRING(unique) @@ -32,28 +35,34 @@ @node Set Operations @section Set Operations -Octave supports the basic set operations. That is, Octave can compute -the union, intersection, and difference of two sets. -Octave also supports the @emph{Exclusive Or} set operation, and -membership determination. The functions for set operations all work in -pretty much the same way. As an example, assume that @code{x} and -@code{y} contains two sets, then +Octave supports several basic set operations. Octave can compute the union, +intersection, and difference of two sets. Octave also supports the +@emph{Exclusive Or} set operation. + +The functions for set operations all work in the same way by accepting two +input sets and returning a third set. As an example, assume that @code{a} and +@code{b} contains two sets, then @example -union (x, y) +union (a, b) @end example @noindent computes the union of the two sets. -@DOCSTRING(ismember) +Finally, determining whether elements belong to a set can be done with the +@code{ismember} function. Because sets are ordered this operation is very +efficient and is of order O(log2(n)) which is preferable to the @code{find} +function which is of order O(n). + +@DOCSTRING(intersect) @DOCSTRING(union) -@DOCSTRING(intersect) - @DOCSTRING(setdiff) @DOCSTRING(setxor) +@DOCSTRING(ismember) + @DOCSTRING(powerset) diff -r 391e080ae810 -r d00f6b09258f scripts/set/intersect.m --- a/scripts/set/intersect.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/intersect.m Mon Aug 11 09:39:45 2014 -0700 @@ -18,19 +18,25 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {} intersect (@var{a}, @var{b}) -## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} intersect (@var{a}, @var{b}) +## @deftypefn {Function File} {@var{c} =} intersect (@var{a}, @var{b}) +## @deftypefnx {Function File} {@var{c} =} intersect (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} intersect (@dots{}) +## +## Return the elements common to both @var{a} and @var{b} sorted in ascending +## order. ## -## Return the elements in both @var{a} and @var{b}, sorted in ascending -## order. If @var{a} and @var{b} are both column vectors return a column -## vector, otherwise return a row vector. -## @var{a}, @var{b} may be cell arrays of string(s). +## If @var{a} and @var{b} are both column vectors then return a column vector; +## Otherwise, return a row vector. The inputs may also be cell arrays of +## strings. ## -## Return index vectors @var{ia} and @var{ib} such that @code{a(ia)==c} and -## @code{b(ib)==c}. +## If the optional input @qcode{"rows"} is given then return the common rows of +## @var{a} and @var{b}. The inputs must be 2-D matrices to use this option. +## +## If requested, return index vectors @var{ia} and @var{ib} such that +## @code{@var{c} = @var{a}(@var{ia})} and @code{@var{c} = @var{b}(@var{ib})}. ## ## @end deftypefn -## @seealso{unique, union, setxor, setdiff, ismember} +## @seealso{unique, union, setdiff, setxor, ismember} function [c, ia, ib] = intersect (a, b, varargin) @@ -38,12 +44,15 @@ print_usage (); endif - [a, b] = validargs ("intersect", a, b, varargin{:}); + [a, b] = validsetargs ("intersect", a, b, varargin{:}); if (isempty (a) || isempty (b)) c = ia = ib = []; else - ## form a and b into sets + by_rows = nargin == 3; + iscol = isvector (a) && isvector (b) && iscolumn (a) && iscolumn (b); + + ## Form A and B into sets if (nargout > 1) [a, ja] = unique (a, varargin{:}); [b, jb] = unique (b, varargin{:}); @@ -52,7 +61,7 @@ b = unique (b, varargin{:}); endif - if (nargin > 2) + if (by_rows) c = [a; b]; [c, ic] = sortrows (c); ii = find (all (c(1:end-1,:) == c(2:end,:), 2)); @@ -60,7 +69,7 @@ len_a = rows (a); else c = [a(:); b(:)]; - [c, ic] = sort (c); # [a(:);b(:)](ic) == c + [c, ic] = sort (c); # [a(:);b(:)](ic) == c if (iscellstr (c)) ii = find (strcmp (c(1:end-1), c(2:end))); else @@ -71,11 +80,12 @@ endif if (nargout > 1) - ia = ja(ic(ii)); # a(ia) == c - ib = jb(ic(ii+1) - len_a); # b(ib) == c + ia = ja(ic(ii)); # a(ia) == c + ib = jb(ic(ii+1) - len_a); # b(ib) == c endif - if (nargin == 2 && (rows (b) == 1 || rows (a) == 1)) + ## Adjust output orientation for Matlab compatibility + if (! by_rows && ! iscol) c = c.'; endif endif @@ -83,7 +93,24 @@ endfunction -%!# Test the routine for index vectors ia and ib +## Test orientation of output +%!shared a,b +%! a = 1:4; +%! b = 2:5; + +%!assert (size (intersect (a, b)), [1 3]) +%!assert (size (intersect (a', b)), [1 3]) +%!assert (size (intersect (a, b')), [1 3]) +%!assert (size (intersect (a', b')), [3 1]) + +## Test multi-dimensional arrays +%!test +%! a = rand (3,3,3); +%! b = a; +%! b(1,1,1) = 2; +%! assert (intersect (a, b), sort (a(2:end))); + +## Test the routine for index vectors ia and ib %!test %! a = [3 2 4 5 7 6 5 1 0 13 13]; %! b = [3 5 12 1 1 7]; diff -r 391e080ae810 -r d00f6b09258f scripts/set/ismember.m --- a/scripts/set/ismember.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/ismember.m Mon Aug 11 09:39:45 2014 -0700 @@ -18,13 +18,16 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {@var{tf} =} ismember (@var{A}, @var{s}) -## @deftypefnx {Function File} {[@var{tf}, @var{S_idx}] =} ismember (@var{A}, @var{s}) -## @deftypefnx {Function File} {[@var{tf}, @var{S_idx}] =} ismember (@var{A}, @var{s}, "rows") -## Return a logical matrix @var{tf} with the same shape as @var{A} which is -## true (1) if @code{A(i,j)} is in @var{s} and false (0) if it is not. If a -## second output argument is requested, the index into @var{s} of each of the -## matching elements is also returned. +## @deftypefn {Function File} {@var{tf} =} ismember (@var{a}, @var{s}) +## @deftypefnx {Function File} {@var{tf} =} ismember (@var{a}, @var{s}, "rows") +## @deftypefnx {Function File} {[@var{tf}, @var{s_idx}] =} ismember (@dots{}) +## +## Return a logical matrix @var{tf} with the same shape as @var{a} which is +## true (1) if the element in @var{a} is found in @var{s} and false (0) if it +## is not. +## +## If a second output argument is requested then the index into @var{s} of each +## matching element is also returned. ## ## @example ## @group @@ -36,7 +39,7 @@ ## @end group ## @end example ## -## The inputs, @var{A} and @var{s}, may also be cell arrays. +## The inputs @var{a} and @var{s} may also be cell arrays. ## ## @example ## @group @@ -48,9 +51,9 @@ ## @end group ## @end example ## -## With the optional third argument @qcode{"rows"}, and matrices -## @var{A} and @var{s} with the same number of columns, compare rows in -## @var{A} with the rows in @var{s}. +## If the optional third argument @qcode{"rows"} is given then compare rows +## in @var{a} with rows in @var{s}. The inputs must be 2-D matrices with the +## same number of columns to use this option. ## ## @example ## @group @@ -62,7 +65,7 @@ ## @end group ## @end example ## -## @seealso{unique, union, intersect, setxor, setdiff} +## @seealso{lookup, unique, union, intersect, setdiff, setxor} ## @end deftypefn ## Author: Paul Kienzle @@ -71,68 +74,67 @@ ## Adapted-by: jwe ## Reimplemented using lookup & unique: Jaroslav Hajek -function [tf, a_idx] = ismember (A, s, varargin) +function [tf, s_idx] = ismember (a, s, varargin) if (nargin < 2 || nargin > 3) print_usage (); endif ## lookup() does not handle logical values - if (islogical (A)) - A = uint8 (A); + if (islogical (a)) + a = uint8 (a); endif if (islogical (s)) s = uint8 (s); endif - [A, s] = validargs ("ismember", A, s, varargin{:}); + [a, s] = validsetargs ("ismember", a, s, varargin{:}); + + by_rows = nargin == 3; - if (nargin == 2) + if (! by_rows) s = s(:); - ## We do it this way, because we expect the array to be often sorted. + ## Check sort status, because we expect the array will often be sorted. if (issorted (s)) is = []; else [s, is] = sort (s); endif - ## sort out NaNs in table + ## Remove NaNs from table because lookup can't handle them if (isreal (s) && ! isempty (s) && isnan (s(end))) - s = s(1:end - sum (isnan (s))); + s = s(1:end - sum (isnan (s))); endif if (nargout > 1) - a_idx = lookup (s, A, "m"); - tf = logical (a_idx); + s_idx = lookup (s, a, "m"); + tf = logical (s_idx); if (! isempty (is)) - a_idx(tf) = is (a_idx(tf)); + s_idx(tf) = is(s_idx(tf)); endif else - tf = lookup (s, A, "b"); + tf = lookup (s, a, "b"); endif - else - - if (isempty (A) || isempty (s)) - tf = false (rows (A), 1); - a_idx = zeros (rows (A), 1); + else # "rows" argument + if (isempty (a) || isempty (s)) + tf = false (rows (a), 1); + s_idx = zeros (rows (a), 1); else - if (rows (s) == 1) - tf = all (bsxfun (@eq, A, s), 2); - a_idx = double (tf); + tf = all (bsxfun (@eq, a, s), 2); + s_idx = double (tf); else ## FIXME: lookup does not support "rows", so we just use unique. - [~, ii, jj] = unique ([A; s], "rows", "last"); - na = rows (A); + [~, ii, jj] = unique ([a; s], "rows", "last"); + na = rows (a); jj = ii(jj(1:na)); tf = jj > na; if (nargout > 1) - a_idx = max (0, jj - na); + s_idx = max (0, jj - na); endif endif - endif endif @@ -160,67 +162,67 @@ %!assert (ismember ("1", "0123456789."), true) %!test -%! [result, a_idx] = ismember ([1, 2], []); +%! [result, s_idx] = ismember ([1, 2], []); %! assert (result, [false false]) -%! assert (a_idx, [0, 0]); +%! assert (s_idx, [0, 0]); %!test -%! [result, a_idx] = ismember ([], [1, 2]); +%! [result, s_idx] = ismember ([], [1, 2]); %! assert (result, logical ([])) -%! assert (a_idx, []); +%! assert (s_idx, []); %!test -%! [result, a_idx] = ismember ({"a", "b"}, ""); +%! [result, s_idx] = ismember ({"a", "b"}, ""); %! assert (result, [false false]) -%! assert (a_idx, [0, 0]); +%! assert (s_idx, [0, 0]); %!test -%! [result, a_idx] = ismember ({"a", "b"}, {}); +%! [result, s_idx] = ismember ({"a", "b"}, {}); %! assert (result, [false false]) -%! assert (a_idx, [0, 0]); +%! assert (s_idx, [0, 0]); %!test -%! [result, a_idx] = ismember ("", {"a", "b"}); +%! [result, s_idx] = ismember ("", {"a", "b"}); %! assert (result, false) -%! assert (a_idx, 0); +%! assert (s_idx, 0); %!test -%! [result, a_idx] = ismember ({}, {"a", "b"}); +%! [result, s_idx] = ismember ({}, {"a", "b"}); %! assert (result, logical ([])) -%! assert (a_idx, []); +%! assert (s_idx, []); %!test -%! [result, a_idx] = ismember ([1 2 3 4 5], [3]); +%! [result, s_idx] = ismember ([1 2 3 4 5], [3]); %! assert (result, logical ([0 0 1 0 0])) -%! assert (a_idx , [0 0 1 0 0]); +%! assert (s_idx , [0 0 1 0 0]); %!test -%! [result, a_idx] = ismember ([1 6], [1 2 3 4 5 1 6 1]); +%! [result, s_idx] = ismember ([1 6], [1 2 3 4 5 1 6 1]); %! assert (result, [true true]); -%! assert (a_idx(2), 7); +%! assert (s_idx(2), 7); %!test -%! [result, a_idx] = ismember ([3,10,1], [0,1,2,3,4,5,6,7,8,9]); +%! [result, s_idx] = ismember ([3,10,1], [0,1,2,3,4,5,6,7,8,9]); %! assert (result, [true false true]); -%! assert (a_idx, [4, 0, 2]); +%! assert (s_idx, [4, 0, 2]); %!test -%! [result, a_idx] = ismember ("1.1", "0123456789.1"); +%! [result, s_idx] = ismember ("1.1", "0123456789.1"); %! assert (result, [true true true]); -%! assert (a_idx, [12, 11, 12]); +%! assert (s_idx, [12, 11, 12]); %!test -%! [result, a_idx] = ismember ([1:3; 5:7; 4:6], [0:2; 1:3; 2:4; 3:5; 4:6], "rows"); +%! [result, s_idx] = ismember ([1:3; 5:7; 4:6], [0:2; 1:3; 2:4; 3:5; 4:6], "rows"); %! assert (result, [true; false; true]); -%! assert (a_idx, [2; 0; 5]); +%! assert (s_idx, [2; 0; 5]); %!test -%! [result, a_idx] = ismember ([1.1,1.2,1.3; 2.1,2.2,2.3; 10,11,12], [1.1,1.2,1.3; 10,11,12; 2.12,2.22,2.32], "rows"); +%! [result, s_idx] = ismember ([1.1,1.2,1.3; 2.1,2.2,2.3; 10,11,12], [1.1,1.2,1.3; 10,11,12; 2.12,2.22,2.32], "rows"); %! assert (result, [true; false; true]); -%! assert (a_idx, [1; 0; 2]); +%! assert (s_idx, [1; 0; 2]); %!test -%! [result, a_idx] = ismember ([1:3; 5:7; 4:6; 0:2; 1:3; 2:4], [1:3], "rows"); +%! [result, s_idx] = ismember ([1:3; 5:7; 4:6; 0:2; 1:3; 2:4], [1:3], "rows"); %! assert (result, logical ([1 0 0 0 1 0]')); -%! assert (a_idx, [1 0 0 0 1 0]'); +%! assert (s_idx, [1 0 0 0 1 0]'); diff -r 391e080ae810 -r d00f6b09258f scripts/set/module.mk --- a/scripts/set/module.mk Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/module.mk Mon Aug 11 09:39:45 2014 -0700 @@ -8,7 +8,7 @@ set/setxor.m \ set/union.m \ set/unique.m \ - set/private/validargs.m + set/private/validsetargs.m FCN_FILES += $(set_FCN_FILES) diff -r 391e080ae810 -r d00f6b09258f scripts/set/powerset.m --- a/scripts/set/powerset.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/powerset.m Mon Aug 11 09:39:45 2014 -0700 @@ -25,33 +25,35 @@ ## output will always be a cell array of either vectors or strings. ## ## With the optional second argument @qcode{"rows"}, each row of the set @var{a} -## is considered one element of the set. As a result, @var{a} must then be a -## numerical 2-D matrix. +## is considered one element of the set. The input must be a 2-D numeric +## matrix to use this argument. ## -## @seealso{unique, union, setxor, setdiff, ismember} +## @seealso{unique, union, intersect, setdiff, setxor, ismember} ## @end deftypefn function p = powerset (a, byrows_arg) + if (nargin < 1 || nargin > 2) + print_usage (); + endif + byrows = false; - if (nargin == 2) if (! strcmpi (byrows_arg, "rows")) error ('powerset: expecting second argument to be "rows"'); elseif (iscell (a)) - warning ('powerset: "rows" not valid for cell arrays'); + error ('powerset: "rows" not valid for cell arrays'); else byrows = true; endif - elseif (nargin != 1) - print_usage (); endif + if (iscell (a) && ! iscellstr (a)) - error ("powerset: cell arrays can only used for character strings"); + error ("powerset: cell arrays can only be used for character strings"); endif if (byrows) - a = unique (a, byrows_arg); + a = unique (a, "rows"); n = rows (a); else a = unique (a); @@ -86,12 +88,23 @@ endfunction -%!shared c, p -%! c = sort (cellstr ({ [], [1], [2], [3], [1, 2], [1, 3], [2, 3], [1, 2, 3]})); +%!test +%! c = sort (cellstr ({[], [1], [2], [3], [1, 2], [1, 3], [2, 3], [1, 2, 3]})); %! p = sort (cellstr (powerset ([1, 2, 3]))); -%!assert (p, c); +%! assert (p, c); + +%!test %! c = sort (cellstr ({ [], [1:3], [2:4], [3:5], [1:3; 2:4], [1:3; 3:5], [2:4; 3:5], [1:3; 2:4; 3:5]})); %! p = sort (cellstr (powerset ([1:3;2:4;3:5], "rows"))); -%!assert (p,c); +%! assert (p,c); + %!assert (powerset([]), {}); # always return a cell array +%% Test input validation +%!error powerset () +%!error powerset (1,2,3) +%!error powerset (1, "cols") +%!error <"rows" not valid for cell arrays> powerset ({1}, "rows") +%!error powerset ({1}) +%!error powerset (1:33) + diff -r 391e080ae810 -r d00f6b09258f scripts/set/private/validargs.m --- a/scripts/set/private/validargs.m Sun Aug 10 08:18:18 2014 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -## Copyright (C) 2000-2013 Paul Kienzle -## Copyright (C) 2009-2010 Jaroslav Hajek -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## . - -## Validate arguments for binary set operation. -function [x, y] = validargs (caller, x, y, byrows_arg) - - if (nargin == 3) - icx = iscellstr (x); - icy = iscellstr (y); - if (icx || icy) - if (icx && ischar (y)) - y = cellstr (y); - elseif (icy && ischar (x)) - x = cellstr (x); - elseif (! (icx && icy)) - error ("%s: cell array of strings cannot be combined with a nonstring value", caller); - endif - elseif (! (ismatrix (x) && ismatrix (y))) - error ("%s: input arguments must be arrays or cell arrays of strings", caller); - endif - elseif (nargin == 4) - if (strcmpi (byrows_arg, "rows")) - if (iscell (x) || iscell (y)) - error ('%s: cells not supported with "rows"', caller); - elseif (! (ismatrix (x) && ismatrix (y))) - error ("%s: input arguments must be arrays or cell arrays of strings", caller); - else - if (ndims (x) > 2 || ndims (y) > 2) - error ('%s: need 2-dimensional matrices for "rows"', caller); - elseif (columns (x) != columns (y) && ! (isempty (x) || isempty (y))) - error ("%s: number of columns must match", caller); - endif - endif - else - error ("%s: invalid option: %s", caller, byrows_arg); - endif - else - print_usage (caller); - endif - -endfunction - diff -r 391e080ae810 -r d00f6b09258f scripts/set/private/validsetargs.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/set/private/validsetargs.m Mon Aug 11 09:39:45 2014 -0700 @@ -0,0 +1,59 @@ +## Copyright (C) 2000-2013 Paul Kienzle +## Copyright (C) 2009-2010 Jaroslav Hajek +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## . + +## Validate arguments for binary set operation. + +function [x, y] = validsetargs (caller, x, y, byrows_arg) + + if (nargin == 3) + icx = iscellstr (x); + icy = iscellstr (y); + if (icx || icy) + if (icx && ischar (y)) + y = cellstr (y); + elseif (icy && ischar (x)) + x = cellstr (x); + elseif (! (icx && icy)) + error ("%s: cell array of strings cannot be combined with a nonstring value", caller); + endif + elseif (! (ismatrix (x) && ismatrix (y))) + error ("%s: A and B must be arrays or cell arrays of strings", caller); + endif + elseif (nargin == 4) + if (! strcmpi (byrows_arg, "rows")) + error ("%s: invalid option: %s", caller, byrows_arg); + endif + + if (iscell (x) || iscell (y)) + error ('%s: cells not supported with "rows"', caller); + elseif (! (ismatrix (x) && ismatrix (y))) + error ("%s: A and B must be arrays or cell arrays of strings", caller); + else + if (ndims (x) > 2 || ndims (y) > 2) + error ('%s: A and B must be 2-dimensional matrices for "rows"', caller); + elseif (columns (x) != columns (y) && ! (isempty (x) || isempty (y))) + error ("%s: number of columns in A and B must match", caller); + endif + endif + endif + +endfunction + + +## %!tests for function are in union.m diff -r 391e080ae810 -r d00f6b09258f scripts/set/setdiff.m --- a/scripts/set/setdiff.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/setdiff.m Mon Aug 11 09:39:45 2014 -0700 @@ -18,72 +18,81 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {} setdiff (@var{a}, @var{b}) -## @deftypefnx {Function File} {} setdiff (@var{a}, @var{b}, "rows") -## @deftypefnx {Function File} {[@var{c}, @var{i}] =} setdiff (@var{a}, @var{b}) -## Return the elements in @var{a} that are not in @var{b}, sorted in -## ascending order. If @var{a} and @var{b} are both column vectors -## return a column vector, otherwise return a row vector. -## @var{a}, @var{b} may be cell arrays of string(s). +## @deftypefn {Function File} {@var{c} =} setdiff (@var{a}, @var{b}) +## @deftypefnx {Function File} {@var{c} =} setdiff (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} {[@var{c}, @var{ia}] =} setdiff (@dots{}) +## Return the elements in @var{a} that are not in @var{b} sorted in +## ascending order. ## -## Given the optional third argument @qcode{"rows"}, return the rows in -## @var{a} that are not in @var{b}, sorted in ascending order by rows. +## If @var{a} and @var{b} are both column vectors return a column vector; +## Otherwise, return a row vector. The inputs may also be cell arrays of +## strings. ## -## If requested, return @var{i} such that @code{c = a(i)}. +## If the optional input @qcode{"rows"} is given then return the rows in +## @var{a} that are not in @var{b}. The inputs must be 2-D matrices to use +## this option. +## +## If requested, return the index vector @var{ia} such that +## @code{@var{c} = @var{a}(@var{ia})}. ## @seealso{unique, union, intersect, setxor, ismember} ## @end deftypefn ## Author: Paul Kienzle ## Adapted-by: jwe -function [c, i] = setdiff (a, b, varargin) +function [c, ia] = setdiff (a, b, varargin) if (nargin < 2 || nargin > 3) print_usage (); endif - [a, b] = validargs ("setdiff", a, b, varargin{:}); + [a, b] = validsetargs ("setdiff", a, b, varargin{:}); - if (nargin > 2) + by_rows = nargin == 3; + iscol = isvector (a) && isvector (b) && iscolumn (a) && iscolumn (b); + + if (by_rows) if (nargout > 1) - [c, i] = unique (a, "rows"); + [c, ia] = unique (a, "rows"); else c = unique (a, "rows"); endif if (! isempty (c) && ! isempty (b)) - ## Form a and b into combined set. + ## Form A and B into combined set. b = unique (b, "rows"); - [dummy, idx] = sortrows ([c; b]); - ## Eliminate those elements of a that are the same as in b. - dups = find (all (dummy(1:end-1,:) == dummy(2:end,:), 2)); + [tmp, idx] = sortrows ([c; b]); + ## Eliminate those elements of A that are the same as in B. + dups = find (all (tmp(1:end-1,:) == tmp(2:end,:), 2)); c(idx(dups),:) = []; if (nargout > 1) - i(idx(dups),:) = []; + ia(idx(dups),:) = []; endif endif else if (nargout > 1) - [c, i] = unique (a); + [c, ia] = unique (a); else c = unique (a); endif if (! isempty (c) && ! isempty (b)) ## Form a and b into combined set. b = unique (b); - [dummy, idx] = sort ([c(:); b(:)]); + [tmp, idx] = sort ([c(:); b(:)]); ## Eliminate those elements of a that are the same as in b. - if (iscellstr (dummy)) - dups = find (strcmp (dummy(1:end-1), dummy(2:end))); + if (iscellstr (tmp)) + dups = find (strcmp (tmp(1:end-1), tmp(2:end))); else - dups = find (dummy(1:end-1) == dummy(2:end)); + dups = find (tmp(1:end-1) == tmp(2:end)); endif c(idx(dups)) = []; if (nargout > 1) - i(idx(dups)) = []; + ia(idx(dups)) = []; endif ## Reshape if necessary for Matlab compatibility. - if (iscolumn (c) && ! iscolumn (b)) - c = c.'; + if (iscol) + c = c(:); + else + c = c(:).'; endif endif endif @@ -100,10 +109,11 @@ %!assert (setdiff ({"one","two";"three","four"}, {"one","two";"three","six"}), {"four"}) %!test -%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; -%! [y, i] = setdiff (a, b.'); -%! assert (y, [5]); -%! assert (y, a(i)); +%! a = [3, 1, 4, 1, 5]; +%! b = [1, 2, 3, 4]; +%! [c, ia] = setdiff (a, b'); +%! assert (c, [5]); +%! assert (c, a(ia)); %% Test output orientation compatibility (bug #42577) %!assert (setdiff ([1:5], 2), [1,3,4,5]) @@ -113,8 +123,8 @@ %!assert (setdiff ([1:5]', [2:3]), [1,4,5]) %!assert (setdiff ([1:5]', [2:3]'), [1;4;5]) -%% Test input validation -%!error setdiff () -%!error setdiff (1) -%!error setdiff (1,2,3,4) +%!test +%! a = rand (3,3,3); +%! b = a(1); +%! assert (setdiff (a, b), sort (a(2:end))); diff -r 391e080ae810 -r d00f6b09258f scripts/set/setxor.m --- a/scripts/set/setxor.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/setxor.m Mon Aug 11 09:39:45 2014 -0700 @@ -18,18 +18,24 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {} setxor (@var{a}, @var{b}) -## @deftypefnx {Function File} {} setxor (@var{a}, @var{b}, "rows") -## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} setxor (@var{a}, @var{b}) +## @deftypefn {Function File} {@var{c} =} setxor (@var{a}, @var{b}) +## @deftypefnx {Function File} {@var{c} =} setxor (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} setxor (@dots{}) +## +## Return the elements exclusive to sets @var{a} or @var{b} sorted in +## ascending order. ## -## Return the elements exclusive to @var{a} or @var{b}, sorted in ascending -## order. If @var{a} and @var{b} are both column vectors return a column -## vector, otherwise return a row vector. -## @var{a}, @var{b} may be cell arrays of string(s). +## If @var{a} and @var{b} are both column vectors return a column vector; +## Otherwise, return a row vector. The inputs may also be cell arrays of +## strings. ## -## With three output arguments, return index vectors @var{ia} and @var{ib} -## such that @code{a(ia)} and @code{b(ib)} are disjoint sets whose union -## is @var{c}. +## If the optional input @qcode{"rows"} is given then return the rows exclusive +## to sets @var{a} and @var{b}. The inputs must be 2-D matrices to use this +## option. +## +## If requested, return index vectors @var{ia} and @var{ib} such that +## @code{@var{a}(@var{ia})} and @code{@var{b}(@var{ib})} are disjoint sets +## whose union is @var{c}. ## ## @seealso{unique, union, intersect, setdiff, ismember} ## @end deftypefn @@ -40,7 +46,10 @@ print_usage (); endif - [a, b] = validargs ("setxor", a, b, varargin{:}); + [a, b] = validsetargs ("setxor", a, b, varargin{:}); + + by_rows = nargin == 3; + iscol = isvector (a) && isvector (b) && iscolumn (a) && iscolumn (b); ## Form A and B into sets. if (nargout > 1) @@ -57,8 +66,8 @@ c = a; else ## Reject duplicates. - if (nargin > 2) - na = rows (a); nb = rows (b); + if (by_rows) + na = rows (a); nb = rows (b); [c, i] = sortrows ([a; b]); n = rows (c); idx = find (all (c(1:n-1, :) == c(2:n, :), 2)); @@ -67,7 +76,7 @@ i([idx, idx+1],:) = []; endif else - na = numel (a); nb = numel (b); + na = numel (a); nb = numel (b); [c, i] = sort ([a(:); b(:)]); n = length (c); if (iscell (c)) @@ -79,11 +88,14 @@ c([idx, idx+1]) = []; i([idx, idx+1]) = []; endif - if (rows (a) == 1 || rows (b) == 1) + + ## Adjust output orientation for Matlab compatibility + if (! iscol) c = c.'; endif endif endif + if (nargout > 1) ia = ia(i(i <= na)); ib = ib(i(i > na) - na); @@ -92,18 +104,37 @@ endfunction -%!assert (setxor ([1,2,3],[2,3,4]),[1,4]) +%!assert (setxor ([1,2,3], [2,3,4]), [1,4]) %!assert (setxor ({'a'}, {'a', 'b'}), {'b'}) %!test -%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! a = [3, 1, 4, 1, 5]; +%! b = [1, 2, 3, 4]; %! [c, ia, ib] = setxor (a, b.'); %! assert (c, [2, 5]); %! assert (c, sort ([a(ia), b(ib)])); %!test -%! a = [1 2; 4 5; 1 3]; b = [1 1; 1 2; 4 5; 2 10]; +%! a = [1 2; 4 5; 1 3]; +%! b = [1 1; 1 2; 4 5; 2 10]; %! [c, ia, ib] = setxor (a, b, "rows"); %! assert (c, [1 1; 1 3; 2 10]); %! assert (c, sortrows ([a(ia,:); b(ib,:)])); +## Test orientation of output +%!shared x,y +%! x = 1:3; +%! y = 2:5; + +%!assert (size (setxor (x, y)), [1 3]) +%!assert (size (setxor (x', y)), [1 3]) +%!assert (size (setxor (x, y')), [1 3]) +%!assert (size (setxor (x', y')), [3 1]) + +## Test multi-dimensional arrays +%!test +%! a = rand (3,3,3); +%! b = a; +%! b(1,1,1) = 2; +%! assert (intersect (a, b), sort (a(2:end))); + diff -r 391e080ae810 -r d00f6b09258f scripts/set/union.m --- a/scripts/set/union.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/union.m Mon Aug 11 09:39:45 2014 -0700 @@ -18,38 +18,26 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {} union (@var{a}, @var{b}) -## @deftypefnx {Function File} {} union (@var{a}, @var{b}, "rows") -## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} union (@var{a}, @var{b}) +## @deftypefn {Function File} {@var{c} =} union (@var{a}, @var{b}) +## @deftypefnx {Function File} {@var{c} =} union (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} union (@dots{}) ## -## Return the set of elements that are in either of the sets @var{a} and -## @var{b}. @var{a}, @var{b} may be cell arrays of strings. -## For example: -## -## @example -## @group -## union ([1, 2, 4], [2, 3, 5]) -## @result{} [1, 2, 3, 4, 5] -## @end group -## @end example +## Return the elements that are in either @var{a} or @var{b} sorted in +## ascending order with duplicates removed. ## -## If the optional third input argument is the string @qcode{"rows"} then -## each row of the matrices @var{a} and @var{b} will be considered as a -## single set element. For example: +## If @var{a} and @var{b} are both column vectors return a column vector; +## Otherwise, return a row vector. The inputs may also be cell arrays of +## strings. ## -## @example -## @group -## union ([1, 2; 2, 3], [1, 2; 3, 4], "rows") -## @result{} 1 2 -## 2 3 -## 3 4 -## @end group -## @end example +## If the optional input @qcode{"rows"} is given then return rows that are in +## either @var{a} or @var{b}. The inputs must be 2-D matrices to use this +## option. +## +## The optional outputs @var{ia} and @var{ib} are index vectors such that +## @code{@var{a}(@var{ia})} and @code{@var{b}(@var{ib})} are disjoint sets +## whose union is @var{c}. ## -## The optional outputs @var{ia} and @var{ib} are index vectors such that -## @code{a(ia)} and @code{b(ib)} are disjoint sets whose union is @var{c}. -## -## @seealso{intersect, setdiff, unique} +## @seealso{unique, intersect, setdiff, setxor, ismember} ## @end deftypefn ## Author: jwe @@ -60,40 +48,59 @@ print_usage (); endif - [a, b] = validargs ("union", a, b, varargin{:}); + [a, b] = validsetargs ("union", a, b, varargin{:}); + + by_rows = nargin == 3; + iscol = isvector (a) && isvector (b) && iscolumn (a) && iscolumn (b); - if (nargin == 2) + if (by_rows) + y = [a; b]; + else y = [a(:); b(:)]; - na = numel (a); nb = numel (b); - if (rows (a) == 1 || rows (b) == 1) + ## Adjust output orientation for Matlab compatibility + if (! iscol) y = y.'; endif - else - y = [a; b]; - na = rows (a); nb = rows (b); endif - if (nargout == 1) + if (nargout <= 1) y = unique (y, varargin{:}); else - [y, i] = unique (y, varargin{:}); - ia = i(i <= na); - ib = i(i > na) - na; + [y, idx] = unique (y, varargin{:}); + na = numel (a); + ia = idx(idx <= na); + ib = idx(idx > na) - na; endif endfunction -%!assert (union ([1, 2, 4], [2, 3, 5]), [1, 2, 3, 4, 5]); -%!assert (union ([1; 2; 4], [2, 3, 5]), [1, 2, 3, 4, 5]); -%!assert (union ([1, 2, 3], [5; 7; 9]), [1, 2, 3, 5, 7, 9]); +%!assert (union ([1, 2, 4], [2, 3, 5]), [1, 2, 3, 4, 5]) +%!assert (union ([1; 2; 4], [2, 3, 5]), [1, 2, 3, 4, 5]) +%!assert (union ([1; 2; 4], [2; 3; 5]), [1; 2; 3; 4; 5]) +%!assert (union ([1, 2, 3], [5; 7; 9]), [1, 2, 3, 5, 7, 9]) + +## Test multi-dimensional arrays +%!test +%! a = rand (3,3,3); +%! b = a; +%! b(1,1,1) = 2; +%! assert (union (a, b), sort ([a(1:end), 2])); %!test -%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! a = [3, 1, 4, 1, 5]; +%! b = [1, 2, 3, 4]; %! [y, ia, ib] = union (a, b.'); %! assert (y, [1, 2, 3, 4, 5]); %! assert (y, sort ([a(ia), b(ib)])); -%!error union (1) -%!error union (1, 2, 3) +%% Test common input validation for set routines contained in validsetargs +%!error union ({"a"}, 1) +%!error union (@sin, 1) +%!error union (1, 2, "columns") +%!error union ({"a"}, {"b"}, "rows") +%!error union (@sin, 1, "rows") +%!error union (rand(2,2,2), 1, "rows") +%!error union ([1 2], 1, "rows") + diff -r 391e080ae810 -r d00f6b09258f scripts/set/unique.m --- a/scripts/set/unique.m Sun Aug 10 08:18:18 2014 -0700 +++ b/scripts/set/unique.m Mon Aug 11 09:39:45 2014 -0700 @@ -20,20 +20,20 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} unique (@var{x}) ## @deftypefnx {Function File} {} unique (@var{x}, "rows") -## @deftypefnx {Function File} {} unique (@dots{}, "first") -## @deftypefnx {Function File} {} unique (@dots{}, "last") ## @deftypefnx {Function File} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}) -## Return the unique elements of @var{x}, sorted in ascending order. -## If the input @var{x} is a vector then the output is also a vector with the -## same orientation (row or column) as the input. For a matrix input the -## output is always a column vector. @var{x} may also be a cell array of -## strings. +## @deftypefnx {Function File} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "first") +## @deftypefnx {Function File} {[@var{y}, @var{i}, @var{j}] =} unique (@dots{}, "last") +## Return the unique elements of @var{x} sorted in ascending order. ## -## If the optional argument @qcode{"rows"} is supplied, return the unique -## rows of @var{x}, sorted in ascending order. +## If the input @var{x} is a column vector then return a column vector; +## Otherwise, return a row vector. @var{x} may also be a cell array of strings. +## +## If the optional argument @qcode{"rows"} is given then return the unique +## rows of @var{x} sorted in ascending order. The input must be a 2-D matrix +## to use this option. ## ## If requested, return index vectors @var{i} and @var{j} such that -## @code{x(i)==y} and @code{y(j)==x}. +## @code{@var{y} = @var{x}(@var{i})} and @code{@var{x} = @var{y}(@var{j})}. ## ## Additionally, if @var{i} is a requested output then one of @qcode{"first"} or ## @qcode{"last"} may be given as an input. If @qcode{"last"} is specified, @@ -46,34 +46,32 @@ if (nargin < 1) print_usage (); + elseif (! (ismatrix (x) || iscellstr (x))) + error ("unique: X must be a matrix or cell array of strings"); endif if (nargin > 1) ## parse options - if (iscellstr (varargin)) - optfirst = strcmp ("first", varargin); - optlast = strcmp ("last", varargin); - optrows = strcmp ("rows", varargin); - if (! all (optfirst | optlast | optrows)) - error ("unique: invalid option"); - endif - optfirst = any (optfirst); - optlast = any (optlast); - optrows = any (optrows); - if (optfirst && optlast) - error ('unique: cannot specify both "last" and "first"'); - endif - else + if (! iscellstr (varargin)) error ("unique: options must be strings"); endif - if (optrows && iscell (x)) + optrows = any (strcmp ("rows", varargin)); + optfirst = any (strcmp ("first", varargin)); + optlast = any (strcmp ("last", varargin)); + if (optfirst && optlast) + error ('unique: cannot specify both "first" and "last"'); + elseif (optfirst + optlast + optrows != nargin-1) + error ("unique: invalid option"); + endif + + if (optrows && iscellstr (x)) warning ('unique: "rows" is ignored for cell arrays'); optrows = false; endif else + optrows = false; optfirst = false; - optrows = false; endif ## FIXME: The operations @@ -87,7 +85,7 @@ if (issparse (x) && ! optrows && nargout <= 1) if (nnz (x) < numel (x)) - y = unique ([0; (full (nonzeros (x)))], varargin{:}); + y = unique ([0; nonzeros(x)], varargin{:}); else ## Corner case where sparse matrix is actually full y = unique (full (x), varargin{:}); @@ -107,7 +105,7 @@ ## Special cases 0 and 1 if (n == 0) if (! optrows && isempty (x) && any (size (x))) - if (iscell (y)) + if (iscellstr (y)) y = cell (0, 1); else y = zeros (0, 1, class (y)); @@ -127,8 +125,7 @@ y = sortrows (y); endif match = all (y(1:n-1,:) == y(2:n,:), 2); - idx = find (match); - y(idx,:) = []; + y(match,:) = []; else if (! isvector (y)) y = y(:); @@ -138,13 +135,12 @@ else y = sort (y); endif - if (iscell (y)) + if (iscellstr (y)) match = strcmp (y(1:n-1), y(2:n)); else match = (y(1:n-1) == y(2:n)); endif - idx = find (match); - y(idx) = []; + y(match) = []; endif if (isargout (3)) @@ -157,25 +153,25 @@ endif if (isargout (2)) + idx = find (match); if (optfirst) - i(idx+1) = []; - else - i(idx) = []; + idx += 1; # in-place is faster than other forms of increment endif + i(idx) = []; endif endfunction -%!assert (unique ([1 1 2; 1 2 1; 1 1 2]),[1;2]) -%!assert (unique ([1 1 2; 1 0 1; 1 1 2],"rows"),[1 0 1; 1 1 2]) -%!assert (unique ([]),[]) -%!assert (unique ([1]),[1]) -%!assert (unique ([1 2]),[1 2]) -%!assert (unique ([1;2]),[1;2]) -%!assert (unique ([1,NaN,Inf,NaN,Inf]),[1,Inf,NaN,NaN]) -%!assert (unique ({"Foo","Bar","Foo"}),{"Bar","Foo"}) -%!assert (unique ({"Foo","Bar","FooBar"}'),{"Bar","Foo","FooBar"}') +%!assert (unique ([1 1 2; 1 2 1; 1 1 2]), [1;2]) +%!assert (unique ([1 1 2; 1 0 1; 1 1 2],"rows"), [1 0 1; 1 1 2]) +%!assert (unique ([]), []) +%!assert (unique ([1]), [1]) +%!assert (unique ([1 2]), [1 2]) +%!assert (unique ([1;2]), [1;2]) +%!assert (unique ([1,NaN,Inf,NaN,Inf]), [1,Inf,NaN,NaN]) +%!assert (unique ({"Foo","Bar","Foo"}), {"Bar","Foo"}) +%!assert (unique ({"Foo","Bar","FooBar"}'), {"Bar","Foo","FooBar"}') %!assert (unique (zeros (1,0)), zeros (0,1)) %!assert (unique (zeros (1,0), "rows"), zeros (1,0)) %!assert (unique (cell (1,0)), cell (0,1)) @@ -192,6 +188,7 @@ %!assert (unique (uint8 ([1,2,2,3,2,4]), "rows"), uint8 ([1,2,2,3,2,4])) %!assert (unique (uint8 ([1,2,2,3,2,4])), uint8 ([1,2,3,4])) %!assert (unique (uint8 ([1,2,2,3,2,4]'), "rows"), uint8 ([1,2,3,4]')) + %!test %! [a,i,j] = unique ([1,1,2,3,3,3,4]); %! assert (a, [1,2,3,4]); @@ -217,8 +214,15 @@ %! assert (A(i,:), a); %! assert (a(j,:), A); -%!error unique({"a", "b", "c"}, "UnknownOption") -%!error unique({"a", "b", "c"}, "UnknownOption1", "UnknownOption2") -%!error unique({"a", "b", "c"}, "rows", "UnknownOption2") -%!error unique({"a", "b", "c"}, "UnknownOption1", "last") +%% Test input validation +%!error unique () +%!error unique ({1}) +%!error unique (1, 2) +%!error unique (1, "first", "last") +%!error unique (1, "middle") +%!error unique ({"a", "b", "c"}, "UnknownOption") +%!error unique ({"a", "b", "c"}, "UnknownOption1", "UnknownOption2") +%!error unique ({"a", "b", "c"}, "rows", "UnknownOption2") +%!error unique ({"a", "b", "c"}, "UnknownOption1", "last") +%!warning <"rows" is ignored for cell arrays> unique ({"1"}, "rows");