changeset 10088:5edee330d4cb

better argument checking and handling in set functions
author Jaroslav Hajek <highegg@gmail.com>
date Tue, 12 Jan 2010 16:50:25 +0100
parents 090173f2db40
children dd70982c81a3
files scripts/ChangeLog scripts/set/intersect.m scripts/set/ismember.m scripts/set/module.mk scripts/set/private/validargs.m scripts/set/setdiff.m scripts/set/setxor.m scripts/set/union.m scripts/set/unique.m
diffstat 9 files changed, 95 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/ChangeLog	Tue Jan 12 16:50:25 2010 +0100
@@ -1,3 +1,13 @@
+2010-01-12  Jaroslav Hajek  <highegg@gmail.com>, Ben Barrowes  <benjamin.e.barrowes@usace.army.mil>
+
+	* set/private/validargs.m: New function.
+	* set/module.mk: Add it here.
+	* set/ismember.m: Use it here.
+	* set/intersect.m: Ditto.
+	* set/union.m: Ditto.
+	* set/setdiff.m: Ditto.
+	* set/setxor.m: Ditto.
+
 2010-01-11  Rik <octave@nomad.inbox5.com>
 
 	* Makefile.am: Update DOCSTRINGS rules to avoid simple_move_if_change_rule
@@ -25,6 +35,10 @@
 
 	* /optimization/qp.m: Add missing semicolon.
 
+2010-01-11  Ben Barrowes  <benjamin.e.barrowes@usace.army.mil>
+
+	* set/: allow set scripts to handle cell inputs
+
 2010-01-05  Jaroslav Hajek  <highegg@gmail.com>
 
 	* optimization/qp.m: Supply default values on request and register
--- a/scripts/set/intersect.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/intersect.m	Tue Jan 12 16:50:25 2010 +0100
@@ -24,6 +24,7 @@
 ## Return the elements in both @var{a} and @var{b}, sorted in ascending
 ## order.  If @var{a} and @var{b} are both column vectors return a column
 ## vector, otherwise return a row vector.
+## @var{a}, @var{b} may be cell arrays of string(s).
 ##
 ## Return index vectors @var{ia} and @var{ib} such that @code{a(ia)==c} and
 ## @code{b(ib)==c}.
@@ -37,10 +38,7 @@
     print_usage ();
   endif
 
-  if (nargin == 3 && ! strcmpi (varargin{1}, "rows"))
-    error ("intersect: if a third input argument is present, it must be the string 'rows'");
-  endif
-
+  [a, b] = validargs ("intersect", a, b, varargin{:});
 
   if (isempty (a) || isempty (b))
     c = ia = ib = [];
@@ -70,13 +68,11 @@
       c = c(ii);
     endif
 
-
     if (nargout > 1)
       ia = ja(ic(ii));                  ## a(ia) == c
       ib = jb(ic(ii+1) - length (a));   ## b(ib) == c
     endif
 
-
     if (nargin == 2 && (size (b, 1) == 1 || size (a, 1) == 1))
       c = c.';
     endif
--- a/scripts/set/ismember.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/ismember.m	Tue Jan 12 16:50:25 2010 +0100
@@ -71,25 +71,15 @@
 ## Adapted-by: jwe
 ## Reimplemented using lookup & unique: Jaroslav Hajek <highegg@gmail.com>
 
-function [tf, a_idx] = ismember (a, s, rows_opt) 
+function [tf, a_idx] = ismember (a, s, varargin) 
+
+  if (nargin < 2 || nargin > 3)
+    print_usage ();
+  endif
+
+  [a, s] = validargs ("ismember", a, s, varargin{:});
 
   if (nargin == 2)
-    ica = iscellstr (a);
-    ics = iscellstr (s);
-    if (ica || ics)
-      if (ica && ischar (s))
-        s = cellstr (s);
-      elseif (ics && ischar (a))
-        a = cellstr (a);
-      elseif (! (ica && ics))
-        error ("ismember: invalid argument types");
-      endif
-    elseif (! isa (a, class (s))) 
-      error ("ismember: both input arguments must be the same type");
-    elseif (! ischar (a) && ! isnumeric (a))
-      error ("ismember: input arguments must be arrays, cell arrays, or strings"); 
-    endif
-
     s = s(:);
     ## We do it this way, because we expect the array to be often sorted.
     if (issorted (s))
@@ -113,19 +103,11 @@
       tf = lookup (s, a, "b");
     endif
 
-  elseif (nargin == 3 && strcmpi (rows_opt, "rows"))
-    if (iscell (a) || iscell (s))
-      error ("ismember: cells not supported with ""rows""");
-    elseif (! isa (a, class (s))) 
-      error ("ismember: both input arguments must be the same type");
-    elseif (! ischar (a) && ! isnumeric (a))
-      error ("ismember: input arguments must be arrays, cell arrays, or strings"); 
-    endif
+  else
+
     if (isempty (a) || isempty (s))
       tf = false (rows (a), 1);
       a_idx = zeros (rows (a), 1);
-    elseif (columns (a) != columns (s))
-      error ("ismember: number of columns must match");
     else
 
       ## FIXME: lookup does not support "rows", so we just use unique.
@@ -139,8 +121,6 @@
       endif
 
     endif
-  else
-    print_usage ();
   endif
 
 endfunction
--- a/scripts/set/module.mk	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/module.mk	Tue Jan 12 16:50:25 2010 +0100
@@ -7,7 +7,8 @@
   set/setdiff.m \
   set/setxor.m \
   set/union.m \
-  set/unique.m
+  set/unique.m \
+  set/private/validargs.m
 
 FCN_FILES += $(set_FCN_FILES)
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/set/private/validargs.m	Tue Jan 12 16:50:25 2010 +0100
@@ -0,0 +1,57 @@
+## Copyright (C) 2000, 2005, 2006, 2007, 2008, 2009 Paul Kienzle
+## Copyright (C) 2009, 2010 Jaroslav Hajek
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## Validate arguments for binary set operation.
+function [x, y] = validargs (caller, x, y, byrows_arg)
+
+  if (nargin == 3)
+    icx = iscellstr (x);
+    icy = iscellstr (y);
+    if (icx || icy)
+      if (icx && ischar (y))
+        y = cellstr (y);
+      elseif (icy && ischar (x))
+        x = cellstr (x);
+      elseif (! (icx && icy))
+        error ("%s: cell array of strings cannot be combined with a nonstring value", caller);
+      endif
+    elseif (! (ismatrix (x) && ismatrix (y)))
+      error ("%s: input arguments must be arrays or cell arrays of strings", caller); 
+    endif
+  elseif (nargin == 4)
+    if (strcmpi (byrows_arg, "rows"))
+      if (iscell (x) || iscell (y))
+        error ("%s: cells not supported with ""rows""");
+      elseif (! (ismatrix (x) && ismatrix (y)))
+        error ("%s: input arguments must be arrays or cell arrays of strings", caller); 
+      else
+        if (ndims (x) > 2 || ndims (y) > 2)
+          error ("%s: need 2-dimensional matrices for ""rows""", caller);
+        elseif (columns (x) != columns (y) && ! (isempty (x) || isempty (y)))
+          error ("%s: number of columns must match", caller);
+        endif
+      endif
+    else
+      error ("%s: invalid option: %s", caller, byrows_arg);
+    endif
+  else
+    print_usage (caller);
+  endif
+
+endfunction
--- a/scripts/set/setdiff.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/setdiff.m	Tue Jan 12 16:50:25 2010 +0100
@@ -24,6 +24,7 @@
 ## Return the elements in @var{a} that are not in @var{b}, sorted in
 ## ascending order.  If @var{a} and @var{b} are both column vectors
 ## return a column vector, otherwise return a row vector.
+## @var{a}, @var{b} may be cell arrays of string(s).
 ##
 ## Given the optional third argument @samp{"rows"}, return the rows in
 ## @var{a} that are not in @var{b}, sorted in ascending order by rows.
@@ -35,25 +36,15 @@
 ## Author: Paul Kienzle
 ## Adapted-by: jwe
 
-function [c, i] = setdiff (a, b, byrows_arg)
+function [c, i] = setdiff (a, b, varargin)
 
   if (nargin < 2 || nargin > 3)
     print_usage ();
   endif
 
-  byrows = false;
+  [a, b] = validargs ("setdiff", a, b, varargin{:});
 
-  if (nargin == 3)
-    if (! strcmpi (byrows_arg, "rows"))
-      error ("expecting third argument to be \"rows\"");
-    elseif (iscell (a) || iscell (b))
-      warning ("setdiff: \"rows\" not valid for cell arrays");
-    else
-      byrows = true;
-    endif
-  endif
-
-  if (byrows)
+  if (nargin > 2)
     if (nargout > 1)
       [c, i] = unique (a, "rows");
     else
--- a/scripts/set/setxor.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/setxor.m	Tue Jan 12 16:50:25 2010 +0100
@@ -24,6 +24,7 @@
 ## Return the elements exclusive to @var{a} or @var{b}, sorted in ascending
 ## order.  If @var{a} and @var{b} are both column vectors return a column
 ## vector, otherwise return a row vector.
+## @var{a}, @var{b} may be cell arrays of string(s).
 ##
 ## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} setxor (@var{a}, @var{b})
 ##
@@ -39,9 +40,7 @@
     print_usage ();
   endif
 
-  if (nargin == 3 && ! strcmpi (varargin{1}, "rows"))
-    error ("setxor: if a third input argument is present, it must be the string 'rows'");
-  endif
+  [a, b] = validargs ("setxor", a, b, varargin{:});
 
   ## Form A and B into sets.
   if (nargout > 1)
--- a/scripts/set/union.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/union.m	Tue Jan 12 16:50:25 2010 +0100
@@ -22,7 +22,8 @@
 ## @deftypefn {Function File} {} union (@var{a}, @var{b})
 ## @deftypefnx{Function File} {} union (@var{a}, @var{b}, "rows")
 ## Return the set of elements that are in either of the sets @var{a} and
-## @var{b}.  For example,
+## @var{b}. @var{a}, @var{b} may be cell arrays of string(s).
+## For example,
 ##
 ## @example
 ## @group
@@ -59,9 +60,7 @@
     print_usage ();
   endif
 
-  if (nargin == 3 && ! strcmpi (varargin{1}, "rows"))
-    error ("union: if a third input argument is present, it must be the string 'rows'");
-  endif
+  [a, b] = validargs ("union", a, b, varargin{:});
 
   if (nargin == 2)
     y = [a(:); b(:)];
@@ -69,11 +68,9 @@
     if (size (a, 1) == 1 || size (b, 1) == 1)
       y = y.';
     endif
-  elseif (ndims (a) == 2 && ndims (b) == 2 && columns (a) == columns (b))
+  else
     y = [a; b];
     na = rows (a); nb = rows (b);
-  else
-    error ("union: input arguments must contain the same number of columns when \"rows\" is specified");
   endif
 
   if (nargout == 1)
--- a/scripts/set/unique.m	Tue Jan 12 13:20:17 2010 +0100
+++ b/scripts/set/unique.m	Tue Jan 12 16:50:25 2010 +0100
@@ -26,6 +26,7 @@
 ## Return the unique elements of @var{x}, sorted in ascending order.
 ## If @var{x} is a row vector, return a row vector, but if @var{x}
 ## is a column vector or a matrix return a column vector.
+## @var{x} can be a cell array of strings.
 ##
 ## If the optional argument @code{"rows"} is supplied, return the unique
 ## rows of @var{x}, sorted in ascending order.