# HG changeset patch # User Jaroslav Hajek # Date 1235640599 -3600 # Node ID 2c8b2399247beb0e23ed8185af2341a534abf6a5 # Parent a017b920530ebc74db0fcd4a59ac01d1885bc2f1 implement strsplit; deprecate split diff -r a017b920530e -r 2c8b2399247b scripts/ChangeLog --- a/scripts/ChangeLog Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/ChangeLog Thu Feb 26 10:29:59 2009 +0100 @@ -1,3 +1,23 @@ +2009-02-26 Jaroslav Hajek + + * strings/strsplit.m: New function. + * strings/split.m: Move to deprecated/. + * strings/Makefile.in: Update. + * deprecated/Makefile.in: Update. + + * general/int2str.m: Use strsplit instead of split. + * general/num2str.m: Ditto. + * help/__makeinfo__.m: Ditto. + * help/lookfor.m: Ditto. + * miscellaneous/compare_versions.m: Ditto. + * miscellaneous/tar.m: Ditto. + * miscellaneous/unpack.m: Ditto. + * miscellaneous/what.m: Ditto. + * miscellaneous/zip.m: Ditto. + * pkg/pkg.m: Ditto. + * strings/strtok.m: Ditto. + * testfun/rundemos.m: Ditto. + 2009-02-25 John W. Eaton * Makefile.in (distclean maintainer-clean): Remove tags and TAGS diff -r a017b920530e -r 2c8b2399247b scripts/deprecated/Makefile.in --- a/scripts/deprecated/Makefile.in Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/deprecated/Makefile.in Thu Feb 26 10:29:59 2009 +0100 @@ -50,7 +50,7 @@ pascal_rnd.m poisson_cdf.m poisson_inv.m poisson_pdf.m \ poisson_rnd.m polyinteg.m setstr.m spatan2.m spchol2inv.m \ spcholinv.m spcumprod.m spcumsum.m spchol.m spdet.m spdiag.m \ - spfind.m spinv.m spkron.m splchol.m splu.m spmax.m spmin.m \ + spfind.m spinv.m spkron.m splchol.m split.m splu.m spmax.m spmin.m \ spprod.m spqr.m spsum.m spsumsq.m struct_contains.m \ struct_elements.m t_cdf.m t_inv.m t_pdf.m t_rnd.m uniform_cdf.m \ uniform_inv.m uniform_pdf.m uniform_rnd.m unmark_command.m \ diff -r a017b920530e -r 2c8b2399247b scripts/deprecated/split.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/deprecated/split.m Thu Feb 26 10:29:59 2009 +0100 @@ -0,0 +1,121 @@ +## Copyright (C) 1996, 1999, 2000, 2005, 2006, 2007 Kurt Hornik +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} {} split (@var{s}, @var{t}, @var{n}) +## This function has been deprecated. Use @code{char (strsplit (s, t))} +## instead. +## @end deftypefn + +## Divides the string @var{s} into pieces separated by @var{t}, returning +## the result in a string array (padded with blanks to form a valid +## matrix). If the optional input @var{n} is supplied, split @var{s} +## into at most @var{n} different pieces. +## +## For example, +## +## @example +## split ("Test string", "t") +## @result{} +## "Tes " +## " s " +## "ring" +## @end example +## +## @example +## split ("Test string", "t s", 2) +## @result{} +## "Tes " +## "tring" +## @end example +## @seealso{strtok, index} +## @end deftypefn + +## Author: Kurt Hornik +## Adapted-By: jwe + +function m = split (s, t, n) + + if (nargin == 2 || nargin == 3) + if (nargin == 2) + n = length (s); + endif + + if (ischar (s) && ischar (t)) + + l_s = length (s); + l_t = length (t); + + if (l_s == 0) + m = ""; + return; + elseif (l_t == 0) + m = s'; + return; + elseif (l_s < l_t) + error ("split: s must not be shorter than t"); + endif + + if (min (size (s)) != 1 || min (size (t)) != 1) + error("split: multi-line strings are not supported"); + endif + + ind = findstr (s, t, 0); + if (length (ind) == 0) + m = s; + return; + elseif (n - 1 < length(ind)) + ind = ind(1:n-1); + endif + ind2 = [1, ind+l_t]; + ind = [ind, l_s+1]; + + ind_diff = ind-ind2; + + ## Create a matrix of the correct size that's filled with spaces. + m_rows = length (ind); + m_cols = max (ind_diff); + m = repmat (" ", m_rows, m_cols); + + ## Copy the strings to the matrix. + for i = 1:length (ind) + tmp = ind2(i):(ind(i)-1); + m(i,1:length(tmp)) = s(tmp); + endfor + else + error ("split: both s and t must be strings"); + endif + else + print_usage (); + endif + +endfunction + +%!assert(all (all (split ("Test string", "t") == ["Tes "; " s "; "ring"]))); + +%!error split (); + +%!assert(all (strcmp (split ("foo bar baz", " ", 2), ["foo"; "bar baz"]))); + +%!error split ("foo", "bar", 3, 4); + +%!assert (all (strcmp (split("road//to/hell","/"), ["road"; " "; "to "; "hell"]))) + +%!assert (all (strcmp (split("/road/to/hell/","/"), [" "; "road"; "to "; "hell"; " "]))) + + diff -r a017b920530e -r 2c8b2399247b scripts/general/int2str.m --- a/scripts/general/int2str.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/general/int2str.m Thu Feb 26 10:29:59 2009 +0100 @@ -70,7 +70,7 @@ endif tmp = sprintf (fmt, permute (x, [2, 1, 3 : nd])); tmp(end) = ""; - retval = split (tmp, "\n"); + retval = char (strsplit (tmp, "\n")); else print_usage (); endif diff -r a017b920530e -r 2c8b2399247b scripts/general/num2str.m --- a/scripts/general/num2str.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/general/num2str.m Thu Feb 26 10:29:59 2009 +0100 @@ -142,7 +142,7 @@ endwhile tmp(length (tmp)) = ""; - retval = strtrim (split (tmp, "\n")); + retval = char (strtrim (strsplit (tmp, "\n"))); else if (nargin == 2) if (ischar (arg)) diff -r a017b920530e -r 2c8b2399247b scripts/help/__makeinfo__.m --- a/scripts/help/__makeinfo__.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/help/__makeinfo__.m Thu Feb 26 10:29:59 2009 +0100 @@ -107,7 +107,7 @@ endif endif see_also_args = text (bracket_start+1:(stop-1)); - see_also_args = strtrim (cellstr (split (see_also_args, ","))); + see_also_args = strtrim (strsplit (see_also_args, ",")); expanded = see_also (see_also_args); text = strcat (text (1:start-1), expanded, text (stop+1:end)); endif diff -r a017b920530e -r 2c8b2399247b scripts/help/lookfor.m --- a/scripts/help/lookfor.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/help/lookfor.m Thu Feb 26 10:29:59 2009 +0100 @@ -63,10 +63,10 @@ endif ## Search functions in new path dirs. - orig_path = split_str (__pathorig__ (), pathsep ()); + orig_path = strsplit (__pathorig__ (), pathsep ()); ## ditto for path. - new_path = split_str (path (), pathsep ()); + new_path = strsplit (path (), pathsep ()); ## scratch out directories already covered by orig_path. if (had_core_cache) @@ -183,30 +183,3 @@ endif endfunction -## split string using a separator (or more separators) -## FIXME: maybe this function should be available to users? -function s = split_str (p, sep) - if (isempty (p)) - s = cell (size (p)); - else - ## split p according to delimiter. - if (isscalar (sep)) - ## single separator - idx = find (p == sep); - else - ## multiple separators - idx = strchr (p, sep); - endif - - ## get substring sizes. - if (isempty (idx)) - sizes = numel (p); - else - sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)]; - endif - ## remove separators. - p(idx) = []; - ## convert! - s = mat2cell (p, 1, sizes); - endif -endfunction diff -r a017b920530e -r 2c8b2399247b scripts/miscellaneous/compare_versions.m --- a/scripts/miscellaneous/compare_versions.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/miscellaneous/compare_versions.m Thu Feb 26 10:29:59 2009 +0100 @@ -109,8 +109,8 @@ v2nochar = v2; endif - v1n = str2num (split (v1nochar, ".")); - v2n = str2num (split (v2nochar, ".")); + v1n = str2num (char (strsplit (v1nochar, "."))); + v2n = str2num (char (strsplit (v2nochar, "."))); if ((isempty (v1n) && isempty (v1c)) || (isempty (v2n) && isempty(v2c))) error ("compare_versions: given version strings are not valid: %s %s", v1, v2); diff -r a017b920530e -r 2c8b2399247b scripts/miscellaneous/tar.m --- a/scripts/miscellaneous/tar.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/miscellaneous/tar.m Thu Feb 26 10:29:59 2009 +0100 @@ -56,7 +56,7 @@ if (output(end) == "\n") output(end) = []; endif - entries = cellstr (split (output, "\n")); + entries = strsplit (output, "\n"); entries = entries'; endif else diff -r a017b920530e -r 2c8b2399247b scripts/miscellaneous/unpack.m --- a/scripts/miscellaneous/unpack.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/miscellaneous/unpack.m Thu Feb 26 10:29:59 2009 +0100 @@ -194,7 +194,7 @@ if (output(length (output)) == "\n") output(length (output)) = []; endif - files = parser (cellstr (split (output, "\n")))'; + files = parser (strsplit (output, "\n"))'; ## Move files if necessary if (needmove) diff -r a017b920530e -r 2c8b2399247b scripts/miscellaneous/what.m --- a/scripts/miscellaneous/what.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/miscellaneous/what.m Thu Feb 26 10:29:59 2009 +0100 @@ -33,9 +33,7 @@ d = pwd (); elseif (isempty (strfind (d, filesep ()))) ## Find the appropriate directory on the path. - p = split (path (), pathsep()); - p = cellfun (@(x) deblank (x), mat2cell (p, ones (1, size (p, 1)), ... - size (p, 2)), "UniformOutput", false); + p = strtrim (strsplit (path (), pathsep())); d = p{find (cellfun (@(x) ! isempty (strfind (x, d)), p))(end)}; else [status, msg, msgid] = fileattrib (d); diff -r a017b920530e -r 2c8b2399247b scripts/miscellaneous/zip.m --- a/scripts/miscellaneous/zip.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/miscellaneous/zip.m Thu Feb 26 10:29:59 2009 +0100 @@ -56,7 +56,7 @@ if (entries(end) == "\n") entries(end) = []; endif - entries = cellstr (split (entries, "\n"))'; + entries = strsplit (entries, "\n"); else error ("zip: zipinfo failed with exit status = %d", status); endif diff -r a017b920530e -r 2c8b2399247b scripts/pkg/pkg.m --- a/scripts/pkg/pkg.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/pkg/pkg.m Thu Feb 26 10:29:59 2009 +0100 @@ -1754,12 +1754,7 @@ ## Split the text into a cell array of strings by sep. ## Example: "A, B" => {"A", "B"} (with sep = ",") function out = split_by (text, sep) - text_matrix = split (text, sep); - num_words = size (text_matrix, 1); - out = cell (num_words, 1); - for i = 1:num_words - out{i} = strip (text_matrix(i, :)); - endfor + out = strtrim (strsplit (text, sep)); endfunction ## Create an INDEX file for a package that doesn't provide one. diff -r a017b920530e -r 2c8b2399247b scripts/strings/Makefile.in --- a/scripts/strings/Makefile.in Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/strings/Makefile.in Thu Feb 26 10:29:59 2009 +0100 @@ -35,7 +35,7 @@ SOURCES = base2dec.m bin2dec.m blanks.m deblank.m dec2base.m \ dec2bin.m dec2hex.m findstr.m hex2dec.m index.m isletter.m isstrprop.m \ - mat2str.m regexptranslate.m rindex.m split.m str2double.m \ + mat2str.m regexptranslate.m rindex.m strsplit.m str2double.m \ str2num.m strcat.m cstrcat.m strcmpi.m strchr.m strfind.m strjust.m strmatch.m \ strncmpi.m strrep.m strtok.m strtrim.m strtrunc.m \ substr.m validatestring.m diff -r a017b920530e -r 2c8b2399247b scripts/strings/split.m --- a/scripts/strings/split.m Thu Feb 26 03:53:37 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -## Copyright (C) 1996, 1999, 2000, 2005, 2006, 2007 Kurt Hornik -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## . - -## -*- texinfo -*- -## @deftypefn {Function File} {} split (@var{s}, @var{t}, @var{n}) -## Divides the string @var{s} into pieces separated by @var{t}, returning -## the result in a string array (padded with blanks to form a valid -## matrix). If the optional input @var{n} is supplied, split @var{s} -## into at most @var{n} different pieces. -## -## For example, -## -## @example -## split ("Test string", "t") -## @result{} -## "Tes " -## " s " -## "ring" -## @end example -## -## @example -## split ("Test string", "t s", 2) -## @result{} -## "Tes " -## "tring" -## @end example -## @seealso{strtok, index} -## @end deftypefn - -## Author: Kurt Hornik -## Adapted-By: jwe - -function m = split (s, t, n) - - if (nargin == 2 || nargin == 3) - if (nargin == 2) - n = length (s); - endif - - if (ischar (s) && ischar (t)) - - l_s = length (s); - l_t = length (t); - - if (l_s == 0) - m = ""; - return; - elseif (l_t == 0) - m = s'; - return; - elseif (l_s < l_t) - error ("split: s must not be shorter than t"); - endif - - if (min (size (s)) != 1 || min (size (t)) != 1) - error("split: multi-line strings are not supported"); - endif - - ind = findstr (s, t, 0); - if (length (ind) == 0) - m = s; - return; - elseif (n - 1 < length(ind)) - ind = ind(1:n-1); - endif - ind2 = [1, ind+l_t]; - ind = [ind, l_s+1]; - - ind_diff = ind-ind2; - - ## Create a matrix of the correct size that's filled with spaces. - m_rows = length (ind); - m_cols = max (ind_diff); - m = repmat (" ", m_rows, m_cols); - - ## Copy the strings to the matrix. - for i = 1:length (ind) - tmp = ind2(i):(ind(i)-1); - m(i,1:length(tmp)) = s(tmp); - endfor - else - error ("split: both s and t must be strings"); - endif - else - print_usage (); - endif - -endfunction - -%!assert(all (all (split ("Test string", "t") == ["Tes "; " s "; "ring"]))); - -%!error split (); - -%!assert(all (strcmp (split ("foo bar baz", " ", 2), ["foo"; "bar baz"]))); - -%!error split ("foo", "bar", 3, 4); - -%!assert (all (strcmp (split("road//to/hell","/"), ["road"; " "; "to "; "hell"]))) - -%!assert (all (strcmp (split("/road/to/hell/","/"), [" "; "road"; "to "; "hell"; " "]))) - - diff -r a017b920530e -r 2c8b2399247b scripts/strings/strsplit.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/strings/strsplit.m Thu Feb 26 10:29:59 2009 +0100 @@ -0,0 +1,63 @@ +## Copyright (C) 2009 Jaroslav Hajek +## +## This program is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; see the file COPYING. If not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{s}] =} strsplit (@var{p}, @var{sep}, @var{strip_empty}) +## Splits a single string using one or more delimiters. +## The result is returned as a cell array of strings. Consecutive delimiters +## and delimiters at boundaries result in empty strings, unless @var{strip_empty} is true. +## The default value of @var{strip_empty} is false. +## @end deftypefn + +function s = strsplit (p, sep, strip_empty = false) + if (! ischar (p) || rows (p) > 1 || ! ischar (sep) || ! islogical (strip_empty)) + print_usage (); + endif + + if (isempty (p)) + s = cell (size (p)); + else + ## split p according to delimiter. + if (isscalar (sep)) + ## single separator + idx = find (p == sep); + else + ## multiple separators + idx = strchr (p, sep); + endif + + ## get substring sizes. + if (isempty (idx)) + sizes = numel (p); + else + sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)]; + endif + ## remove separators. + p(idx) = []; + if (strip_empty) + ## omit zero lengths. + sizes = sizes (sizes != 0); + endif + ## convert! + s = mat2cell (p, 1, sizes); + endif +endfunction + +%!assert (all (strcmp (strsplit ("road to hell", " "), {"road", "to", "hell"}))) + +%!assert (all (strcmp (strsplit ("road to^hell", " ^"), {"road", "to", "hell"}))) + +%!assert (all (strcmp (strsplit ("road to--hell", " -", true), {"road", "to", "hell"}))) diff -r a017b920530e -r 2c8b2399247b scripts/strings/strtok.m --- a/scripts/strings/strtok.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/strings/strtok.m Thu Feb 26 10:29:59 2009 +0100 @@ -36,7 +36,7 @@ ## rem = *27+31 ## @end group ## @end example -## @seealso{index, split} +## @seealso{index, strsplit} ## @end deftypefn ## FIXME: check what to do for a null delimiter diff -r a017b920530e -r 2c8b2399247b scripts/testfun/rundemos.m --- a/scripts/testfun/rundemos.m Thu Feb 26 03:53:37 2009 -0500 +++ b/scripts/testfun/rundemos.m Thu Feb 26 10:29:59 2009 +0100 @@ -25,7 +25,7 @@ function rundemos (directory) if (nargin == 0) - dirs = cellstr (split (path (), pathsep ())); + dirs = strsplit (path (), pathsep ()); elseif (nargin == 1) if (is_absolute_filename (directory)) dirs = {directory};