view scripts/strings/strsplit.m @ 17338:1c89599167a6

maint: End m-files with 1 blank line. Simplifies automated grammarchecking script. * scripts/@ftp/ascii.m, scripts/@ftp/binary.m, scripts/@ftp/cd.m, scripts/@ftp/close.m, scripts/@ftp/delete.m, scripts/@ftp/dir.m, scripts/@ftp/display.m, scripts/@ftp/ftp.m, scripts/@ftp/loadobj.m, scripts/@ftp/mget.m, scripts/@ftp/mkdir.m, scripts/@ftp/mput.m, scripts/@ftp/rename.m, scripts/@ftp/rmdir.m, scripts/@ftp/saveobj.m, scripts/audio/lin2mu.m, scripts/audio/loadaudio.m, scripts/audio/mu2lin.m, scripts/audio/record.m, scripts/audio/saveaudio.m, scripts/audio/setaudio.m, scripts/deprecated/__error_text__.m, scripts/deprecated/cut.m, scripts/deprecated/error_text.m, scripts/deprecated/isstr.m, scripts/deprecated/polyderiv.m, scripts/deprecated/studentize.m, scripts/deprecated/sylvester_matrix.m, scripts/general/bicubic.m, scripts/general/celldisp.m, scripts/general/colon.m, scripts/general/cplxpair.m, scripts/general/del2.m, scripts/general/display.m, scripts/general/isdir.m, scripts/general/isequaln.m, scripts/general/loadobj.m, scripts/general/private/__isequal__.m, scripts/general/private/__splinen__.m, scripts/general/profexplore.m, scripts/general/quadgk.m, scripts/general/randi.m, scripts/general/repmat.m, scripts/general/saveobj.m, scripts/geometry/delaunay.m, scripts/help/__unimplemented__.m, scripts/help/doc_cache_create.m, scripts/help/get_first_help_sentence.m, scripts/help/help.m, scripts/help/print_usage.m, scripts/help/private/__additional_help_message__.m, scripts/help/private/__strip_html_tags__.m, scripts/help/type.m, scripts/image/imfinfo.m, scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imwrite.m, scripts/image/private/__imfinfo__.m, scripts/image/private/__imread__.m, scripts/image/private/__imwrite__.m, scripts/image/private/imageIO.m, scripts/image/private/imwrite_filename.m, scripts/image/private/ind2x.m, scripts/io/beep.m, scripts/io/strread.m, scripts/io/textread.m, scripts/io/textscan.m, scripts/linear-algebra/krylov.m, scripts/linear-algebra/subspace.m, scripts/miscellaneous/bug_report.m, scripts/miscellaneous/bunzip2.m, scripts/miscellaneous/cast.m, scripts/miscellaneous/copyfile.m, scripts/miscellaneous/debug.m, scripts/miscellaneous/dir.m, scripts/miscellaneous/dump_prefs.m, scripts/miscellaneous/error_ids.m, scripts/miscellaneous/fileattrib.m, scripts/miscellaneous/gunzip.m, scripts/miscellaneous/isdeployed.m, scripts/miscellaneous/ismac.m, scripts/miscellaneous/mex.m, scripts/miscellaneous/mexext.m, scripts/miscellaneous/mkoctfile.m, scripts/miscellaneous/movefile.m, scripts/miscellaneous/namelengthmax.m, scripts/miscellaneous/news.m, scripts/miscellaneous/pack.m, scripts/miscellaneous/perl.m, scripts/miscellaneous/private/display_info_file.m, scripts/miscellaneous/python.m, scripts/miscellaneous/rmappdata.m, scripts/miscellaneous/run.m, scripts/miscellaneous/tar.m, scripts/miscellaneous/tempname.m, scripts/miscellaneous/untar.m, scripts/miscellaneous/unzip.m, scripts/miscellaneous/what.m, scripts/miscellaneous/zip.m, scripts/optimization/fminunc.m, scripts/optimization/fsolve.m, scripts/optimization/fzero.m, scripts/optimization/glpk.m, scripts/optimization/optimget.m, scripts/optimization/optimset.m, scripts/optimization/qp.m, scripts/optimization/sqp.m, scripts/path/pathdef.m, scripts/pkg/pkg.m, scripts/pkg/private/build.m, scripts/pkg/private/describe.m, scripts/pkg/private/dirempty.m, scripts/pkg/private/get_forge_download.m, scripts/pkg/private/get_forge_pkg.m, scripts/pkg/private/get_unsatisfied_deps.m, scripts/pkg/private/install.m, scripts/pkg/private/is_architecture_dependent.m, scripts/pkg/private/list_forge_packages.m, scripts/pkg/private/rebuild.m, scripts/pkg/private/shell.m, scripts/pkg/private/uninstall.m, scripts/plot/axes.m, scripts/plot/box.m, scripts/plot/closereq.m, scripts/plot/diffuse.m, scripts/plot/ezpolar.m, scripts/plot/findfigs.m, scripts/plot/gco.m, scripts/plot/guidata.m, scripts/plot/guihandles.m, scripts/plot/hdl2struct.m, scripts/plot/linkprop.m, scripts/plot/peaks.m, scripts/plot/print.m, scripts/plot/private/__add_datasource__.m, scripts/plot/private/__axis_label__.m, scripts/plot/private/__clabel__.m, scripts/plot/private/__color_str_rgb__.m, scripts/plot/private/__contour__.m, scripts/plot/private/__default_plot_options__.m, scripts/plot/private/__errcomm__.m, scripts/plot/private/__file_filter__.m, scripts/plot/private/__fltk_file_filter__.m, scripts/plot/private/__getlegenddata__.m, scripts/plot/private/__gnuplot_open_stream__.m, scripts/plot/private/__gnuplot_print__.m, scripts/plot/private/__go_draw_axes__.m, scripts/plot/private/__interp_cube__.m, scripts/plot/private/__is_function__.m, scripts/plot/private/__line__.m, scripts/plot/private/__marching_cube__.m, scripts/plot/private/__next_line_style__.m, scripts/plot/private/__patch__.m, scripts/plot/private/__pie__.m, scripts/plot/private/__pltopt__.m, scripts/plot/private/__quiver__.m, scripts/plot/private/__scatter__.m, scripts/plot/private/__stem__.m, scripts/plot/private/__uigetdir_fltk__.m, scripts/plot/private/__uigetfile_fltk__.m, scripts/plot/private/__uiobject_split_args__.m, scripts/plot/private/__uiputfile_fltk__.m, scripts/plot/refresh.m, scripts/plot/saveas.m, scripts/plot/shg.m, scripts/plot/specular.m, scripts/plot/sphere.m, scripts/plot/struct2hdl.m, scripts/plot/subplot.m, scripts/plot/uicontextmenu.m, scripts/plot/uicontrol.m, scripts/plot/uipanel.m, scripts/plot/uipushtool.m, scripts/plot/uiresume.m, scripts/plot/uitoggletool.m, scripts/plot/uitoolbar.m, scripts/plot/uiwait.m, scripts/plot/waitforbuttonpress.m, scripts/polynomial/pchip.m, scripts/polynomial/polyeig.m, scripts/polynomial/ppval.m, scripts/prefs/addpref.m, scripts/prefs/getpref.m, scripts/prefs/ispref.m, scripts/prefs/private/loadprefs.m, scripts/prefs/private/prefsfile.m, scripts/prefs/private/saveprefs.m, scripts/prefs/setpref.m, scripts/set/private/validargs.m, scripts/set/unique.m, scripts/signal/arch_fit.m, scripts/signal/arch_rnd.m, scripts/signal/arch_test.m, scripts/signal/arma_rnd.m, scripts/signal/durbinlevinson.m, scripts/signal/fractdiff.m, scripts/signal/freqz.m, scripts/signal/freqz_plot.m, scripts/signal/hurst.m, scripts/signal/periodogram.m, scripts/signal/private/rectangle_lw.m, scripts/signal/private/rectangle_sw.m, scripts/signal/private/triangle_sw.m, scripts/signal/spectral_adf.m, scripts/signal/spectral_xdf.m, scripts/signal/stft.m, scripts/signal/synthesis.m, scripts/signal/yulewalker.m, scripts/sparse/colperm.m, scripts/sparse/eigs.m, scripts/sparse/etreeplot.m, scripts/sparse/gmres.m, scripts/sparse/private/__sprand_impl__.m, scripts/sparse/spdiags.m, scripts/sparse/sprandn.m, scripts/specfun/bessel.m, scripts/specfun/betaln.m, scripts/specfun/expint.m, scripts/special-matrix/gallery.m, scripts/startup/__finish__.m, scripts/statistics/base/qqplot.m, scripts/statistics/distributions/tcdf.m, scripts/statistics/distributions/wienrnd.m, scripts/statistics/models/logistic_regression.m, scripts/statistics/models/private/logistic_regression_derivatives.m, scripts/statistics/models/private/logistic_regression_likelihood.m, scripts/statistics/tests/anova.m, scripts/statistics/tests/bartlett_test.m, scripts/statistics/tests/chisquare_test_homogeneity.m, scripts/statistics/tests/chisquare_test_independence.m, scripts/statistics/tests/cor_test.m, scripts/statistics/tests/f_test_regression.m, scripts/statistics/tests/hotelling_test.m, scripts/statistics/tests/hotelling_test_2.m, scripts/statistics/tests/kolmogorov_smirnov_test_2.m, scripts/statistics/tests/kruskal_wallis_test.m, scripts/statistics/tests/manova.m, scripts/statistics/tests/mcnemar_test.m, scripts/statistics/tests/prop_test_2.m, scripts/statistics/tests/run_test.m, scripts/statistics/tests/sign_test.m, scripts/statistics/tests/t_test.m, scripts/statistics/tests/t_test_2.m, scripts/statistics/tests/t_test_regression.m, scripts/statistics/tests/u_test.m, scripts/statistics/tests/var_test.m, scripts/statistics/tests/welch_test.m, scripts/statistics/tests/wilcoxon_test.m, scripts/statistics/tests/z_test.m, scripts/statistics/tests/z_test_2.m, scripts/strings/strcat.m, scripts/strings/strjoin.m, scripts/strings/strsplit.m, scripts/testfun/__have_feature__.m, scripts/testfun/__printf_assert__.m, scripts/testfun/__prog_output_assert__.m, scripts/testfun/__run_test_suite__.m, scripts/time/clock.m, scripts/time/datenum.m, scripts/ui/errordlg.m, scripts/ui/private/message_dialog.m: End m-files with 1 blank line.
author Rik <rik@octave.org>
date Wed, 28 Aug 2013 08:33:02 -0700
parents b81b9d079515
children 6dbc866379e2
line wrap: on
line source

## Copyright (C) 2009-2012 Jaroslav Hajek
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {[@var{cstr}] =} strsplit (@var{s})
## @deftypefnx {Function File} {[@var{cstr}] =} strsplit (@var{s}, @var{del})
## @deftypefnx {Function File} {[@var{cstr}] =} strsplit (@dots{}, @var{name}, @var{value})
## @deftypefnx {Function File} {[@var{cstr}, @var{matches}] =} strsplit (@dots{})
## Split the string @var{s} using the delimiters specified by @var{del}
## and return a cell-string array of sub-strings.  If a delimiter is not
## specified the string, @var{s}, is split at whitespace.  The delimiter,
## @var{del} may be a string, a scalar cell-string, or cell-string array.
## @var{del} must be a cell-string array.  By default, consecutive
## delimiters in the input string, @var{s}, are collapsed into one.
##
## The second output, @var{matches}, returns the delmiters which were matched
## in the original string.
##
## Example:
##
## @example
## strsplit ("a b c")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,b,c", ",")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a foo b,bar c", @{"\s", "foo", "bar"@})
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,,b, c", @{",", " "@}, false)
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = 
##             [1,3] = b
##             [1,4] = 
##             [1,5] = c
##           @}
##
## @end example
##
## Supported @var{name}/@var{value} pair arguments are;
##
## @itemize
## @item @var{collapsedelimiters} may take the value of @var{true} or
## @var{false} with the default being @var{false}.
##
## @item @var{delimitertype} may take the value of @code{simple} or
## @code{regularexpression}.  The default is @var{delimitertype} is
## @code{simple}.
## @end itemize
## 
## Example:
##
## @example
## strsplit ("a foo b,bar c", ",|\\s|foo|bar", "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
## 
## strsplit ("a,,b, c", "[, ]", false, "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = 
##             [1,3] = b
##             [1,4] = 
##             [1,5] = c
##           @}
## 
## strsplit ("a,\t,b, c", @{',', '\s'@}, "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
## 
## strsplit ("a,\t,b, c", @{',', ' ', '\t'@}, "collapsedelimiters", false)
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = 
##             [1,3] = 
##             [1,4] = b
##             [1,5] = 
##             [1,6] = c
##           @}
## @end example
## 
## @seealso{ostrsplit, strjoin, strtok, regexp}
## @end deftypefn

function [result, matches] = strsplit (str, del, varargin)

  args.collapsedelimiters = true;
  args.delimitertype = "default";

  [reg, params] = parseparams (varargin);

  if (nargin < 1)
    print_usage ();
  elseif (numel (reg) > 1)
    print_usage ();
  elseif (numel (reg) == 1)
    if (islogical (reg{1}) || isnumeric (reg{1}))
      args.collapsedelimiters = reg{1};
    else
      print_usage ();
    endif
  endif
  fields = fieldnames (args);
  for n = 1:2:numel(params)
    if (any (strcmpi (params{n}, fields)))
      args.(lower(params{n})) = params{n+1};
    elseif (ischar (varargin{n}))
      error ("strsplit:invalid_parameter_name",
             "strsplit: invalid parameter name, '%s'", varargin{n});
    else
      print_usage ();
    endif
  endfor

  if (strcmpi (args.delimitertype, "default"))
    args.delimitertype = "simple";
  endif

  ## Save the length of the "delimitertype" parameter
  length_deltype = numel (args.delimitertype);

  if (nargin == 1 || (nargin > 1 && (islogical (del) || isnumeric (del))))
    if (nargin > 1)
      ## Second input is the "collapsedelimiters" parameter
      args.collapsedelimiters = del;
    endif
    ## Set proper default for the delimiter type
    if (strncmpi (args.delimitertype, "simple", numel (args.delimitertype)))
      del = {" ","\f","\n","\r","\t","\v"};
    else
      del = "\\s";
    endif
  endif

  if (! ischar (str) || (! ischar (del) && ! iscellstr (del)))
    error ("strsplit: S and DEL must be string values");
  elseif (! isscalar (args.collapsedelimiters))
    error ("strsplit: COLLAPSEDELIMITERS must be a scalar value");
  endif

  if (strncmpi (args.delimitertype, "simple", length_deltype))
    if (iscellstr (del))
      del = cellfun (@do_string_escapes, del, "uniformoutput", false);
    else
      del = do_string_escapes (del);
    endif
    % This is clumsy, but needed for multi-row strings
    del = regexprep (del, '([^\w])', '\\$1');
  endif

  if (isempty (str))
    result = {str};
  elseif (strncmpi (args.delimitertype, "regularexpression", length_deltype)
          || strncmpi (args.delimitertype, "simple", length_deltype))
    if (iscellstr (del))
      del = sprintf ('%s|', del{:});
      del(end) = [];
    endif
    if (args.collapsedelimiters)
      del = ["(", del, ")+"];
    endif
    [result, ~, ~, ~, matches] = regexp (str, del, "split");
  else
    error ("strsplit:invalid_delimitertype",
           "strsplit: Invalid DELIMITERTYPE");
  endif
endfunction


%!shared str
%! str = "The rain in Spain stays mainly in the plain.";
% Split on all whitespace.
%!assert (strsplit (str), {"The", "rain", "in", "Spain", "stays", ...
%! "mainly", "in", "the", "plain."})
% Split on "ain".
%!assert (strsplit (str, "ain"), {"The r", " in Sp", " stays m", ...
%!  "ly in the pl", "."})
% Split on " " and "ain" (treating multiple delimiters as one).
%!test
%! s = strsplit (str, '\s|ain', true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", "m", "ly", "in", "the", "pl", "."})
%!test
%! s = strsplit (str, "\\s|ain", true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", "m", "ly", "in", "the", "pl", "."})
%!test
%! [s, m] = strsplit (str, {"\\s", "ain"}, true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", "m", "ly", "in", "the", "pl", "."})
%! assert (m, {" ", "ain ", " ", "ain ", " ", "ain", " ", " ", " ", "ain"})
% Split on " " and "ain", and treat multiple delimiters separately.
%!test
%! [s, m] = strsplit (str, {" ", "ain"}, "collapsedelimiters", false);
%! assert (s, {"The", "r", "", "in", "Sp", "", "stays", "m", "ly", "in", "the", "pl", "."})
%! assert (m, {" ", "ain", " ", " ", "ain", " ", " ", "ain", " ", " ", " ", "ain"})

%!assert (strsplit ("road to hell"), {"road", "to", "hell"})
%!assert (strsplit ("road to hell", " "), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", {" ","^"}), {"road", "to", "hell"})
%!assert (strsplit ("road   to--hell", {" ","-"}, true), {"road", "to", "hell"})
%!assert (strsplit (["a,bc,,de"], ",", false, "delimitertype", "s"), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,,de"], ",", false), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,de"], ",", true), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,de"], {","," "}, true), {"a", "bc", "de"})

%!assert (strsplit ("road to hell", " ", "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", '\^| ', "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", "[ ^]", "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road   to--hell", "[ -]", false, "delimitertype", "r"), {"road", "", "", "to", "", "hell"})
%!assert (strsplit (["a,bc,de"], ",", "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,,de"], ",", false, "delimitertype", "r"), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,de"], ",", true, "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,de"], "[, ]", true, "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit ("hello \t world", 1, "delimitertype", "r"), {"hello", "world"});

%!assert (strsplit ("foo\tbar", '\t', "delimitertype", "r"), {"foo", "bar"})
%!assert (strsplit ("foo\tbar", '\t', "delimitertype", "s"), {"foo", "bar"})

## Test "match" for consecutive delmiters
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', 'delimitertype', 'regularexpression',
%!   'collapsedelimiters', false);
%! assert (a, {"a", "", "", "b"})
%! assert (m, {"\t", " ", "\n"})
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', false, 'delimitertype', 'regularexpression');
%! assert (a, {"a", "", "", "b"})
%! assert (m, {"\t", " ", "\n"})
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', "delimitertype", "regularexpression");
%! assert (a, {"a", "b"})
%! assert (m, {"\t \n"})
%!test
%! [a, m] = strsplit ("a\t \nb", {"\t", " ", "\n"}, "delimitertype", "simple");
%! assert (a, {"a", "b"})
%! assert (m, {"\t \n"})
%!test
%! [s, m] = strsplit ("hello \t world", 1);
%! assert (s, {"hello", "world"});
%! assert (m, {" \t "});

%% Compatibility
%! assert (strsplit ("", "a"), {""})
%! assert (strsplit ("a", "a"), {"", ""})
%! assert (strsplit ("aa", "a"), {"", ""})
%! assert (strsplit ("aaa", "a"), {"", ""})

%% Test input validation
%!error strsplit ()
%!error strsplit ("abc", "b", true, 4)
%!error <S and DEL must be string values> strsplit (123, "b")
%!error <COLLAPSEDELIMITERS must be a scalar value> strsplit ("abc", "def", ones (3,3))