changeset 8877:2c8b2399247b

implement strsplit; deprecate split
author Jaroslav Hajek <highegg@gmail.com>
date Thu, 26 Feb 2009 10:29:59 +0100
parents a017b920530e
children ebb8c1dcf4d3
files scripts/ChangeLog scripts/deprecated/Makefile.in scripts/deprecated/split.m scripts/general/int2str.m scripts/general/num2str.m scripts/help/__makeinfo__.m scripts/help/lookfor.m scripts/miscellaneous/compare_versions.m scripts/miscellaneous/tar.m scripts/miscellaneous/unpack.m scripts/miscellaneous/what.m scripts/miscellaneous/zip.m scripts/pkg/pkg.m scripts/strings/Makefile.in scripts/strings/split.m scripts/strings/strsplit.m scripts/strings/strtok.m scripts/testfun/rundemos.m
diffstat 18 files changed, 220 insertions(+), 167 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/ChangeLog	Thu Feb 26 10:29:59 2009 +0100
@@ -1,3 +1,23 @@
+2009-02-26  Jaroslav Hajek  <highegg@gmail.com>
+
+	* strings/strsplit.m: New function.
+	* strings/split.m: Move to deprecated/.
+	* strings/Makefile.in: Update.
+	* deprecated/Makefile.in: Update.
+
+	* general/int2str.m: Use strsplit instead of split.
+	* general/num2str.m: Ditto.
+	* help/__makeinfo__.m: Ditto.
+	* help/lookfor.m: Ditto.
+	* miscellaneous/compare_versions.m: Ditto.
+	* miscellaneous/tar.m: Ditto.
+	* miscellaneous/unpack.m: Ditto.
+	* miscellaneous/what.m: Ditto.
+	* miscellaneous/zip.m: Ditto.
+	* pkg/pkg.m: Ditto.
+	* strings/strtok.m: Ditto.
+	* testfun/rundemos.m: Ditto.
+
 2009-02-25  John W. Eaton  <jwe@octave.org>
 
 	* Makefile.in (distclean maintainer-clean): Remove tags and TAGS
--- a/scripts/deprecated/Makefile.in	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/deprecated/Makefile.in	Thu Feb 26 10:29:59 2009 +0100
@@ -50,7 +50,7 @@
   pascal_rnd.m poisson_cdf.m poisson_inv.m poisson_pdf.m \
   poisson_rnd.m polyinteg.m setstr.m spatan2.m spchol2inv.m \
   spcholinv.m spcumprod.m spcumsum.m spchol.m spdet.m spdiag.m \
-  spfind.m spinv.m spkron.m splchol.m splu.m spmax.m spmin.m \
+  spfind.m spinv.m spkron.m splchol.m split.m splu.m spmax.m spmin.m \
   spprod.m spqr.m spsum.m spsumsq.m struct_contains.m \
   struct_elements.m t_cdf.m t_inv.m t_pdf.m t_rnd.m uniform_cdf.m \
   uniform_inv.m uniform_pdf.m uniform_rnd.m unmark_command.m \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/deprecated/split.m	Thu Feb 26 10:29:59 2009 +0100
@@ -0,0 +1,121 @@
+## Copyright (C) 1996, 1999, 2000, 2005, 2006, 2007 Kurt Hornik
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {} split (@var{s}, @var{t}, @var{n})
+## This function has been deprecated. Use @code{char (strsplit (s, t))}
+## instead.
+## @end deftypefn
+
+## Divides the string @var{s} into pieces separated by @var{t}, returning
+## the result in a string array (padded with blanks to form a valid
+## matrix).  If the optional input @var{n} is supplied, split @var{s}
+## into at most @var{n} different pieces.
+##
+## For example,
+##
+## @example
+## split ("Test string", "t")
+##      @result{}
+##         "Tes "
+##         " s  "
+##         "ring"
+## @end example
+##
+## @example
+## split ("Test string", "t s", 2)
+##      @result{}
+##         "Tes  "
+##         "tring"
+## @end example
+## @seealso{strtok, index}
+## @end deftypefn
+
+## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at>
+## Adapted-By: jwe
+
+function m = split (s, t, n)
+
+  if (nargin == 2 || nargin == 3)
+    if (nargin == 2)
+      n = length (s);
+    endif
+
+    if (ischar (s) && ischar (t))
+
+      l_s = length (s);
+      l_t = length (t);
+
+      if (l_s == 0)
+	m = "";
+	return;
+      elseif (l_t == 0)
+	m = s';
+	return;
+      elseif (l_s < l_t)
+	error ("split: s must not be shorter than t");
+      endif
+
+      if (min (size (s)) != 1 || min (size (t)) != 1)
+	error("split: multi-line strings are not supported");
+      endif
+
+      ind = findstr (s, t, 0);
+      if (length (ind) == 0)
+	m = s;
+	return;
+      elseif (n - 1 < length(ind))
+	ind = ind(1:n-1);
+      endif
+      ind2 = [1, ind+l_t];
+      ind  = [ind, l_s+1];
+
+      ind_diff = ind-ind2;
+
+      ## Create a matrix of the correct size that's filled with spaces.
+      m_rows = length (ind);
+      m_cols = max (ind_diff);
+      m = repmat (" ", m_rows, m_cols);
+
+      ## Copy the strings to the matrix.
+      for i = 1:length (ind)
+	tmp = ind2(i):(ind(i)-1);
+	m(i,1:length(tmp)) = s(tmp);
+      endfor
+    else
+      error ("split: both s and t must be strings");
+    endif
+  else
+    print_usage ();
+  endif
+
+endfunction
+
+%!assert(all (all (split ("Test string", "t") == ["Tes "; " s  "; "ring"])));
+
+%!error split ();
+
+%!assert(all (strcmp (split ("foo bar baz", " ", 2), ["foo"; "bar baz"])));
+
+%!error split ("foo", "bar", 3, 4);
+
+%!assert (all (strcmp (split("road//to/hell","/"), ["road"; "    "; "to  "; "hell"])))
+
+%!assert (all (strcmp (split("/road/to/hell/","/"), ["    "; "road"; "to  "; "hell"; "    "])))
+
+
--- a/scripts/general/int2str.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/general/int2str.m	Thu Feb 26 10:29:59 2009 +0100
@@ -70,7 +70,7 @@
     endif
     tmp = sprintf (fmt, permute (x, [2, 1, 3 : nd]));
     tmp(end) = "";
-    retval = split (tmp, "\n");
+    retval = char (strsplit (tmp, "\n"));
   else
     print_usage ();
   endif
--- a/scripts/general/num2str.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/general/num2str.m	Thu Feb 26 10:29:59 2009 +0100
@@ -142,7 +142,7 @@
     endwhile
 
     tmp(length (tmp)) = "";
-    retval = strtrim (split (tmp, "\n"));
+    retval = char (strtrim (strsplit (tmp, "\n")));
   else
     if (nargin == 2)
       if (ischar (arg))
--- a/scripts/help/__makeinfo__.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/help/__makeinfo__.m	Thu Feb 26 10:29:59 2009 +0100
@@ -107,7 +107,7 @@
         endif
       endif
       see_also_args = text (bracket_start+1:(stop-1));
-      see_also_args = strtrim (cellstr (split (see_also_args, ",")));
+      see_also_args = strtrim (strsplit (see_also_args, ","));
       expanded = see_also (see_also_args);
       text = strcat (text (1:start-1), expanded, text (stop+1:end));
     endif
--- a/scripts/help/lookfor.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/help/lookfor.m	Thu Feb 26 10:29:59 2009 +0100
@@ -63,10 +63,10 @@
   endif
   
   ## Search functions in new path dirs.
-  orig_path = split_str (__pathorig__ (), pathsep ());
+  orig_path = strsplit (__pathorig__ (), pathsep ());
 
   ## ditto for path.
-  new_path = split_str (path (), pathsep ());
+  new_path = strsplit (path (), pathsep ());
 
   ## scratch out directories already covered by orig_path.
   if (had_core_cache)
@@ -183,30 +183,3 @@
   endif
 endfunction
 
-## split string using a separator (or more separators)
-## FIXME: maybe this function should be available to users?
-function s = split_str (p, sep)
-  if (isempty (p))
-    s = cell (size (p));
-  else
-    ## split p according to delimiter.
-    if (isscalar (sep))
-      ## single separator
-      idx = find (p == sep);
-    else
-      ## multiple separators
-      idx = strchr (p, sep);
-    endif
-
-    ## get substring sizes.
-    if (isempty (idx))
-      sizes = numel (p);
-    else
-      sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)];
-    endif
-    ## remove separators.
-    p(idx) = []; 
-    ## convert!
-    s = mat2cell (p, 1, sizes);
-  endif
-endfunction
--- a/scripts/miscellaneous/compare_versions.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/miscellaneous/compare_versions.m	Thu Feb 26 10:29:59 2009 +0100
@@ -109,8 +109,8 @@
     v2nochar = v2;
   endif
 
-  v1n = str2num (split (v1nochar, "."));
-  v2n = str2num (split (v2nochar, "."));
+  v1n = str2num (char (strsplit (v1nochar, ".")));
+  v2n = str2num (char (strsplit (v2nochar, ".")));
   if ((isempty (v1n) && isempty (v1c)) || (isempty (v2n) && isempty(v2c)))
     error ("compare_versions: given version strings are not valid: %s %s",
 	   v1, v2);
--- a/scripts/miscellaneous/tar.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/miscellaneous/tar.m	Thu Feb 26 10:29:59 2009 +0100
@@ -56,7 +56,7 @@
 	  if (output(end) == "\n")
 	    output(end) = [];
 	  endif
-          entries = cellstr (split (output, "\n"));
+          entries = strsplit (output, "\n");
 	  entries = entries';
 	endif
       else
--- a/scripts/miscellaneous/unpack.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/miscellaneous/unpack.m	Thu Feb 26 10:29:59 2009 +0100
@@ -194,7 +194,7 @@
     if (output(length (output)) == "\n")
       output(length (output)) = [];
     endif
-    files = parser (cellstr (split (output, "\n")))';
+    files = parser (strsplit (output, "\n"))';
 
     ## Move files if necessary
     if (needmove)
--- a/scripts/miscellaneous/what.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/miscellaneous/what.m	Thu Feb 26 10:29:59 2009 +0100
@@ -33,9 +33,7 @@
     d = pwd ();
   elseif (isempty (strfind (d, filesep ())))
     ## Find the appropriate directory on the path.
-    p = split (path (), pathsep());
-    p = cellfun (@(x) deblank (x), mat2cell (p, ones (1, size (p, 1)), ...
-		size (p, 2)), "UniformOutput", false);
+    p = strtrim (strsplit (path (), pathsep()));
     d = p{find (cellfun (@(x) ! isempty (strfind (x, d)), p))(end)};
   else
     [status, msg, msgid] = fileattrib (d);
--- a/scripts/miscellaneous/zip.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/miscellaneous/zip.m	Thu Feb 26 10:29:59 2009 +0100
@@ -56,7 +56,7 @@
 	    if (entries(end) == "\n")
 	      entries(end) = [];
 	    endif
-            entries = cellstr (split (entries, "\n"))';
+            entries = strsplit (entries, "\n");
 	  else
 	    error ("zip: zipinfo failed with exit status = %d", status);
 	  endif
--- a/scripts/pkg/pkg.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/pkg/pkg.m	Thu Feb 26 10:29:59 2009 +0100
@@ -1754,12 +1754,7 @@
 ## Split the text into a cell array of strings by sep.
 ## Example: "A, B" => {"A", "B"} (with sep = ",")
 function out = split_by (text, sep)
-  text_matrix = split (text, sep);
-  num_words = size (text_matrix, 1);
-  out = cell (num_words, 1);
-  for i = 1:num_words
-    out{i} = strip (text_matrix(i, :));
-  endfor
+  out = strtrim (strsplit (text, sep));
 endfunction
 
 ## Create an INDEX file for a package that doesn't provide one.
--- a/scripts/strings/Makefile.in	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/strings/Makefile.in	Thu Feb 26 10:29:59 2009 +0100
@@ -35,7 +35,7 @@
 
 SOURCES = base2dec.m bin2dec.m blanks.m deblank.m dec2base.m \
   dec2bin.m dec2hex.m findstr.m hex2dec.m index.m isletter.m isstrprop.m \
-  mat2str.m regexptranslate.m rindex.m split.m str2double.m \
+  mat2str.m regexptranslate.m rindex.m strsplit.m str2double.m \
   str2num.m strcat.m cstrcat.m strcmpi.m strchr.m strfind.m strjust.m strmatch.m \
   strncmpi.m strrep.m strtok.m strtrim.m strtrunc.m \
   substr.m validatestring.m
--- a/scripts/strings/split.m	Thu Feb 26 03:53:37 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-## Copyright (C) 1996, 1999, 2000, 2005, 2006, 2007 Kurt Hornik
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn {Function File} {} split (@var{s}, @var{t}, @var{n})
-## Divides the string @var{s} into pieces separated by @var{t}, returning
-## the result in a string array (padded with blanks to form a valid
-## matrix).  If the optional input @var{n} is supplied, split @var{s}
-## into at most @var{n} different pieces.
-##
-## For example,
-##
-## @example
-## split ("Test string", "t")
-##      @result{}
-##         "Tes "
-##         " s  "
-##         "ring"
-## @end example
-##
-## @example
-## split ("Test string", "t s", 2)
-##      @result{}
-##         "Tes  "
-##         "tring"
-## @end example
-## @seealso{strtok, index}
-## @end deftypefn
-
-## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at>
-## Adapted-By: jwe
-
-function m = split (s, t, n)
-
-  if (nargin == 2 || nargin == 3)
-    if (nargin == 2)
-      n = length (s);
-    endif
-
-    if (ischar (s) && ischar (t))
-
-      l_s = length (s);
-      l_t = length (t);
-
-      if (l_s == 0)
-	m = "";
-	return;
-      elseif (l_t == 0)
-	m = s';
-	return;
-      elseif (l_s < l_t)
-	error ("split: s must not be shorter than t");
-      endif
-
-      if (min (size (s)) != 1 || min (size (t)) != 1)
-	error("split: multi-line strings are not supported");
-      endif
-
-      ind = findstr (s, t, 0);
-      if (length (ind) == 0)
-	m = s;
-	return;
-      elseif (n - 1 < length(ind))
-	ind = ind(1:n-1);
-      endif
-      ind2 = [1, ind+l_t];
-      ind  = [ind, l_s+1];
-
-      ind_diff = ind-ind2;
-
-      ## Create a matrix of the correct size that's filled with spaces.
-      m_rows = length (ind);
-      m_cols = max (ind_diff);
-      m = repmat (" ", m_rows, m_cols);
-
-      ## Copy the strings to the matrix.
-      for i = 1:length (ind)
-	tmp = ind2(i):(ind(i)-1);
-	m(i,1:length(tmp)) = s(tmp);
-      endfor
-    else
-      error ("split: both s and t must be strings");
-    endif
-  else
-    print_usage ();
-  endif
-
-endfunction
-
-%!assert(all (all (split ("Test string", "t") == ["Tes "; " s  "; "ring"])));
-
-%!error split ();
-
-%!assert(all (strcmp (split ("foo bar baz", " ", 2), ["foo"; "bar baz"])));
-
-%!error split ("foo", "bar", 3, 4);
-
-%!assert (all (strcmp (split("road//to/hell","/"), ["road"; "    "; "to  "; "hell"])))
-
-%!assert (all (strcmp (split("/road/to/hell/","/"), ["    "; "road"; "to  "; "hell"; "    "])))
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/strings/strsplit.m	Thu Feb 26 10:29:59 2009 +0100
@@ -0,0 +1,63 @@
+## Copyright (C) 2009 Jaroslav Hajek
+##
+## This program is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {[@var{s}] =} strsplit (@var{p}, @var{sep}, @var{strip_empty})
+## Splits a single string using one or more delimiters.
+## The result is returned as a cell array of strings. Consecutive delimiters
+## and delimiters at boundaries result in empty strings, unless @var{strip_empty} is true.
+## The default value of @var{strip_empty} is false.
+## @end deftypefn
+
+function s = strsplit (p, sep, strip_empty = false)
+  if (! ischar (p) || rows (p) > 1 || ! ischar (sep) || ! islogical (strip_empty))
+    print_usage ();
+  endif
+
+  if (isempty (p))
+    s = cell (size (p));
+  else
+    ## split p according to delimiter.
+    if (isscalar (sep))
+      ## single separator
+      idx = find (p == sep);
+    else
+      ## multiple separators
+      idx = strchr (p, sep);
+    endif
+
+    ## get substring sizes.
+    if (isempty (idx))
+      sizes = numel (p);
+    else
+      sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)];
+    endif
+    ## remove separators.
+    p(idx) = []; 
+    if (strip_empty)
+      ## omit zero lengths.
+      sizes = sizes (sizes != 0); 
+    endif
+    ## convert!
+    s = mat2cell (p, 1, sizes);
+  endif
+endfunction
+
+%!assert (all (strcmp (strsplit ("road to hell", " "), {"road", "to", "hell"})))
+
+%!assert (all (strcmp (strsplit ("road to^hell", " ^"), {"road", "to", "hell"})))
+
+%!assert (all (strcmp (strsplit ("road   to--hell", " -", true), {"road", "to", "hell"})))
--- a/scripts/strings/strtok.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/strings/strtok.m	Thu Feb 26 10:29:59 2009 +0100
@@ -36,7 +36,7 @@
 ##         rem = *27+31
 ## @end group
 ## @end example
-## @seealso{index, split}
+## @seealso{index, strsplit}
 ## @end deftypefn
 
 ## FIXME: check what to do for a null delimiter
--- a/scripts/testfun/rundemos.m	Thu Feb 26 03:53:37 2009 -0500
+++ b/scripts/testfun/rundemos.m	Thu Feb 26 10:29:59 2009 +0100
@@ -25,7 +25,7 @@
 function rundemos (directory)
 
   if (nargin == 0)
-    dirs = cellstr (split (path (), pathsep ()));
+    dirs = strsplit (path (), pathsep ());
   elseif (nargin == 1)
     if (is_absolute_filename (directory))
       dirs = {directory};