# HG changeset patch # User Jaroslav Hajek # Date 1214424667 -7200 # Node ID e56bb65186f63d4bf82f8a9308fbef45548df372 # Parent 9d080df0c84355b8ecaef4eef3b27308d9daf20b improve set functions for Matlab compatibility diff -r 9d080df0c843 -r e56bb65186f6 scripts/ChangeLog --- a/scripts/ChangeLog Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/ChangeLog Wed Jun 25 22:11:07 2008 +0200 @@ -1,3 +1,12 @@ +2008-07-10 Jaroslav Hajek + + * set/unique.m: Implement 'first' and 'last', some simplifications. + * set/union.m: Implement output indices. + * set/intersect.m: Implement 'rows'. + * set/setdiff.m: Implement output indices. + * set/setxor.m: Implement 'rows' and output indices. + Add tests and adjust docs in all of the above. + 2008-06-11 John W. Eaton * set/ismember.m: Fix fail tests. diff -r 9d080df0c843 -r e56bb65186f6 scripts/set/intersect.m --- a/scripts/set/intersect.m Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/set/intersect.m Wed Jun 25 22:11:07 2008 +0200 @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -30,33 +31,50 @@ ## @end deftypefn ## @seealso{unique, union, setxor, setdiff, ismember} -function [c, ia, ib] = intersect (a, b) - if (nargin != 2) +function [c, ia, ib] = intersect (a, b, varargin) + + if (nargin < 2 || nargin > 3) print_usage (); endif + if (nargin == 3 && ! strcmpi (varargin{1}, "rows")) + error ("intersect: if a third input argument is present, it must be the string 'rows'"); + endif + + if (isempty (a) || isempty (b)) c = ia = ib = []; else ## form a and b into sets - [a, ja] = unique (a); - [b, jb] = unique (b); - - c = [a(:); b(:)]; - [c, ic] = sort (c); ## [a(:);b(:)](ic) == c - - if (iscellstr (c)) - ii = find (strcmp (c(1:end-1), c(2:end))); - else - ii = find (c(1:end-1) == c(2:end)); + if (nargout > 1) + [a, ja] = unique (a, varargin{:}); + [b, jb] = unique (b, varargin{:}); endif - c = c(ii); ## The answer - ia = ja(ic(ii)); ## a(ia) == c - ib = jb(ic(ii+1) - length (a)); ## b(ib) == c + if (nargin > 2) + c = [a; b]; + [c, ic] = sortrows (c); + ii = find (all (c(1:end-1,:) == c(2:end,:), 2)); + c = c(ii,:); + else + c = [a(:); b(:)]; + [c, ic] = sort (c); ## [a(:);b(:)](ic) == c + if (iscellstr (c)) + ii = find (strcmp (c(1:end-1), c(2:end))); + else + ii = find (c(1:end-1) == c(2:end)); + endif + c = c(ii); + endif - if (size (b, 1) == 1 || size (a, 1) == 1) + if (nargout > 1) + ia = ja(ic(ii)); ## a(ia) == c + ib = jb(ic(ii+1) - length (a)); ## b(ib) == c + endif + + + if (nargin == 2 && (size (b, 1) == 1 || size (a, 1) == 1)) c = c.'; endif endif @@ -74,3 +92,12 @@ %! assert(ib,[5 1 2 6]); %! assert(a(ia),c); %! assert(b(ib),c); +%!test +%! a = [1,1,2;1,4,5;2,1,7]; +%! b = [1,4,5;2,3,4;1,1,2;9,8,7]; +%! [c,ia,ib] = intersect(a,b,'rows'); +%! assert(c,[1,1,2;1,4,5]); +%! assert(ia,[1;2]); +%! assert(ib,[3;1]); +%! assert(a(ia,:),c); +%! assert(b(ib,:),c); diff -r 9d080df0c843 -r e56bb65186f6 scripts/set/setdiff.m --- a/scripts/set/setdiff.m Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/set/setdiff.m Wed Jun 25 22:11:07 2008 +0200 @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2005, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -19,19 +20,22 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} setdiff (@var{a}, @var{b}) ## @deftypefnx {Function File} {} setdiff (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} {[@var{c}, @var{i}] = } setdiff (@var{a}, @var{b}) ## Return the elements in @var{a} that are not in @var{b}, sorted in ## ascending order. If @var{a} and @var{b} are both column vectors ## return a column vector, otherwise return a row vector. ## ## Given the optional third argument @samp{"rows"}, return the rows in ## @var{a} that are not in @var{b}, sorted in ascending order by rows. +## +## If requested, return @var{i} such that @code{c = a(i)}. ## @seealso{unique, union, intersect, setxor, ismember} ## @end deftypefn ## Author: Paul Kienzle ## Adapted-by: jwe -function c = setdiff (a, b, byrows_arg) +function [c, i] = setdiff (a, b, byrows_arg) if (nargin < 2 || nargin > 3) print_usage (); @@ -50,7 +54,11 @@ endif if (byrows) - c = unique (a, "rows"); + if (nargout > 1) + [c, i] = unique (a, "rows"); + else + c = unique (a, "rows"); + endif if (! isempty (c) && ! isempty (b)) ## Form a and b into combined set. b = unique (b, "rows"); @@ -58,9 +66,16 @@ ## Eliminate those elements of a that are the same as in b. dups = find (all (dummy(1:end-1,:) == dummy(2:end,:), 2)); c(idx(dups),:) = []; + if (nargout > 1) + i(idx(dups),:) = []; + endif endif else - c = unique (a); + if (nargout > 1) + [c, i] = unique (a); + else + c = unique (a); + endif if (! isempty (c) && ! isempty (b)) ## Form a and b into combined set. b = unique (b); @@ -72,6 +87,9 @@ dups = find (dummy(1:end-1) == dummy(2:end)); endif c(idx(dups)) = []; + if (nargout > 1) + i(idx(dups)) = []; + endif ## Reshape if necessary. if (size (c, 1) != 1 && size (b, 1) == 1) c = c.'; @@ -88,3 +106,9 @@ %!assert(setdiff([1; 2; 3; 4], [1; 2; 4], "rows"), 3) %!assert(setdiff([1, 2; 3, 4], [1, 2; 3, 6], "rows"), [3, 4]) %!assert(setdiff({"one","two";"three","four"},{"one","two";"three","six"}), {"four"}) + +%!test +%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! [y, i] = setdiff (a, b.'); +%! assert(y, [5]); +%! assert(y, a(i)); diff -r 9d080df0c843 -r e56bb65186f6 scripts/set/setxor.m --- a/scripts/set/setxor.m Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/set/setxor.m Wed Jun 25 22:11:07 2008 +0200 @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -18,22 +19,38 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} setxor (@var{a}, @var{b}) +## @deftypefnx {Function File} {} setxor (@var{a}, @var{b}, 'rows') ## ## Return the elements exclusive to @var{a} or @var{b}, sorted in ascending ## order. If @var{a} and @var{b} are both column vectors return a column ## vector, otherwise return a row vector. ## +## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} setxor (@var{a}, @var{b}) +## +## Return index vectors @var{ia} and @var{ib} such that @code{a==c(ia)} and +## @code{b==c(ib)}. +## ## @seealso{unique, union, intersect, setdiff, ismember} ## @end deftypefn -function c = setxor (a, b) - if (nargin != 2) +function [c, ia, ib] = setxor (a, b, varargin) + + if (nargin < 2 || nargin > 3) print_usage (); endif + if (nargin == 3 && ! strcmpi (varargin{1}, "rows")) + error ("setxor: if a third input argument is present, it must be the string 'rows'"); + endif + ## Form A and B into sets. - a = unique (a); - b = unique (b); + if (nargout > 1) + [a, ia] = unique (a, varargin{:}); + [b, ib] = unique (b, varargin{:}); + else + a = unique (a, varargin{:}); + b = unique (b, varargin{:}); + endif if (isempty (a)) c = b; @@ -41,16 +58,39 @@ c = a; else ## Reject duplicates. - c = sort ([a(:); b(:)]); - n = length (c); - idx = find (c(1:n-1) == c(2:n)); - if (! isempty (idx)) - c([idx, idx+1]) = []; - endif - if (size (a, 1) == 1 || size (b, 1) == 1) - c = c.'; + if (nargin > 2) + na = rows (a); nb = rows (b); + [c, i] = sortrows ([a; b]); + n = rows (c); + idx = find (all (c(1:n-1) == c(2:n), 2)); + if (! isempty (idx)) + c([idx, idx+1],:) = []; + i([idx, idx+1],:) = []; + endif + else + na = numel (a); nb = numel (b); + [c, i] = sort ([a(:); b(:)]); + n = length (c); + idx = find (c(1:n-1) == c(2:n)); + if (! isempty (idx)) + c([idx, idx+1]) = []; + i([idx, idx+1]) = []; + endif + if (size (a, 1) == 1 || size (b, 1) == 1) + c = c.'; + endif endif endif + if (nargout > 1) + ia = ia(i(i <= na)); + ib = ib(i(i > na) - na); + endif + endfunction %!assert(setxor([1,2,3],[2,3,4]),[1,4]) +%!test +%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! [y, ia, ib] = setxor (a, b.'); +%! assert(y, [2, 5]); +%! assert(y, sort([a(ia), b(ib)])); diff -r 9d080df0c843 -r e56bb65186f6 scripts/set/union.m --- a/scripts/set/union.m Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/set/union.m Wed Jun 25 22:11:07 2008 +0200 @@ -1,5 +1,6 @@ ## Copyright (C) 1994, 1996, 1997, 1999, 2000, 2003, 2004, 2005, 2006, ## 2007, 2008 John W. Eaton +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -18,10 +19,10 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {} union (@var{x}, @var{y}) -## @deftypefnx{Function File} {} union (@var{x}, @var{y}, "rows") -## Return the set of elements that are in either of the sets @var{x} and -## @var{y}. For example, +## @deftypefn {Function File} {} union (@var{a}, @var{b}) +## @deftypefnx{Function File} {} union (@var{a}, @var{b}, "rows") +## Return the set of elements that are in either of the sets @var{a} and +## @var{b}. For example, ## ## @example ## @group @@ -31,7 +32,7 @@ ## @end example ## ## If the optional third input argument is the string "rows" each row of -## the matrices @var{x} and @var{y} will be considered an element of sets. +## the matrices @var{a} and @var{b} will be considered an element of sets. ## For example, ## @example ## @group @@ -41,12 +42,18 @@ ## 3 4 ## @end group ## @end example +## +## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} union (@var{a}, @var{b}) +## +## Return index vectors @var{ia} and @var{ib} such that @code{a==c(ia)} and +## @code{b==c(ib)}. +## ## @seealso{create_set, intersection, complement} ## @end deftypefn ## Author: jwe -function y = union (a, b, varargin) +function [y, ia, ib] = union (a, b, varargin) if (nargin < 2 || nargin > 3) print_usage (); @@ -56,23 +63,27 @@ error ("union: if a third input argument is present, it must be the string 'rows'"); endif - if (isempty (a)) - y = create_set (b, varargin{:}); - elseif (isempty (b)) - y = create_set (a, varargin{:}); - elseif (nargin == 2) - y = create_set ([a(:); b(:)]); + if (nargin == 2) + y = [a(:); b(:)]; + na = numel (a); nb = numel (b); if (size (a, 1) == 1 || size (b, 1) == 1) - y = y(:).'; - else - y = y(:); + y = y.'; endif elseif (ndims (a) == 2 && ndims (b) == 2 && columns (a) == columns (b)) - y = create_set ([a; b], "rows"); + y = [a; b]; + na = rows (a); nb = rows (b); else error ("union: input arguments must contain the same number of columns when \"rows\" is specified"); endif + if (nargout == 1) + y = unique (y, varargin{:}); + else + [y, i] = unique (y, varargin{:}); + ia = i(i <= na); + ib = i(i > na) - na; + endif + endfunction %!assert(all (all (union ([1, 2, 4], [2, 3, 5]) == [1, 2, 3, 4, 5]))); @@ -85,3 +96,8 @@ %!error union (1, 2, 3); +%!test +%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! [y, ia, ib] = union (a, b.'); +%! assert(y, [1, 2, 3, 4, 5]); +%! assert(y, sort([a(ia), b(ib)])); diff -r 9d080df0c843 -r e56bb65186f6 scripts/set/unique.m --- a/scripts/set/unique.m Mon Jun 30 15:51:31 2008 +0200 +++ b/scripts/set/unique.m Wed Jun 25 22:11:07 2008 +0200 @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2001, 2005, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -31,19 +32,45 @@ ## ## Return index vectors @var{i} and @var{j} such that @code{x(i)==y} and ## @code{y(j)==x}. +## +## Additionally, one of 'first' or 'last' can be given as an argument. +## 'last' (default) specifies that the highest possible indices are returned +## in @var{i}, while 'first' means the lowest. ## @seealso{union, intersect, setdiff, setxor, ismember} ## @end deftypefn -function [y, i, j] = unique (x, r) +function [y, i, j] = unique (x, varargin) - if (nargin < 1 || nargin > 2 || (nargin == 2 && ! strcmp (r, "rows"))) + if (nargin < 1) print_usage (); endif - if (nargin == 1) + ## parse options + if (iscellstr (varargin)) + optfirst = strmatch ('first', varargin) > 0; + optlast = strmatch ('last', varargin) > 0; + optrows = strmatch ('rows', varargin) > 0 && size (x, 2) > 1; + if (optfirst && optlast) + error ("unique: cannot specify both 'last' and 'first'."); + elseif (optfirst + optlast + optrows != nargin-1) + error ("unique: invalid option."); + endif + optlast = ! optfirst; + else + error ("unique: options must be strings"); + endif + + if (iscell (x)) + if (optrows) + warning ("unique: 'rows' is ignored for cell arrays"); + optrows = false; + endif + endif + + if (optrows) + n = size (x, 1); + else n = numel (x); - else - n = size (x, 1); endif y = x; @@ -55,20 +82,9 @@ return; endif - if (ischar (x)) - y = toascii (y); - endif - - if (nargin == 2 && size (y, 2) > 1) + if (optrows) [y, i] = sortrows (y); - if (iscell (y)) - match = cellfun ("size", y(1:n-1,:), 1) == cellfun ("size", y(2:n,:), 1); - idx = find (match); - match(idx) = all (char (y(idx)) == char (y(idx+1)), 2); - match = all (match'); - else - match = all ([y(1:n-1,:) == y(2:n,:)]'); - endif + match = all (y(1:n-1,:) == y(2:n,:), 2); idx = find (match); y(idx,:) = []; else @@ -77,11 +93,9 @@ endif [y, i] = sort (y); if (iscell (y)) - match = cellfun ("length", y(1:n-1)) == cellfun ("length", y(2:n)); - idx = find(match); - match(idx) = all (char (y(idx)) == char (y(idx+1)), 2); + match = strcmp (y(1:n-1), y(2:n)); else - match = [y(1:n-1) == y(2:n)]; + match = (y(1:n-1) == y(2:n)); endif idx = find (match); y(idx) = []; @@ -93,11 +107,12 @@ j = i; j(i) = cumsum (prepad (! match, n, 1)); endif - i(idx) = []; + if (optfirst) + i(idx+1) = []; + else + i(idx) = []; + endif - if (ischar (x)) - y = char (y); - endif endfunction @@ -110,3 +125,15 @@ %!assert(unique([1,NaN,Inf,NaN,Inf]),[1,Inf,NaN,NaN]) %!assert(unique({'Foo','Bar','Foo'}),{'Bar','Foo'}) %!assert(unique({'Foo','Bar','FooBar'}),{'Bar','Foo','FooBar'}) + +%!test +%! [a,i,j] = unique([1,1,2,3,3,3,4]); +%! assert(a,[1,2,3,4]) +%! assert(i,[2,3,6,7]) +%! assert(j,[1,1,2,3,3,3,4]) +%! +%!test +%! [a,i,j] = unique([1,1,2,3,3,3,4],'first'); +%! assert(a,[1,2,3,4]) +%! assert(i,[1,3,4,7]) +%! assert(j,[1,1,2,3,3,3,4])